#!/usr/bin/perl -w # # Syntax: extract_defs [-h] [-o latex_def_file] [-p max_pass] master_file # # Purpose: Creates a stand-alone definitions file for given latex file. # More specifically, all the commands defined in the \input # files in the preamble of master_file are extracted recursively. # The program uses the environment variable $TEXINPUTS to locate # master .bib files that are not in the current directory or # that cannot be found using a relative or absolute path name. # The default file name for the definitions file is the # base name of master_file with the extension .tex_defs # max_pass is the maximum number of passes that is made # over the definitions file. # The -h flag shows the help info for this command. # Created: Jul 29, 1999 by Bart De Schutter # Last revised: May 2, 2006 by Bart De Schutter # # See http://www.deschutter.info/util/scripts.html for the latest version of # this script. # Status: public # Category: latex # Redefine warning trap to prevent warning for recursions that # go too deep. BEGIN { $SIG{'__WARN__'} = sub { warn $_[0] if $_[0] !~ /^Deep recursion on subroutine/ } } use strict; use Inout; ############################################################################### # # MAIN PROGRAM. # ############################################################################### my ($master_file,$final_def_file,@def_files,$fil); my (%command_def, %environment_def, %unknown_def, @info_nr_def); my ($ndefs, %info_def_taken, $include_unknowns, $max_pass); init_globs(); ($master_file, $final_def_file, $max_pass) = process_input_arguments (); #print "Master file: $master_file\n"; #print "Definitions file: $final_def_file\n\n"; check_existence_final_def_file ($final_def_file); @def_files=get_def_files ($master_file); #for $fil (@def_files) #{ # print "D: $fil\n"; #} if ( $#def_files >= 0 ) { process_def_files (@def_files); process_latex_file ($master_file); write_final_def_file ($final_def_file, $max_pass); } else { print STDERR "\nWARNING: $master_file contains no \\input commands\n". " in its preamble.\n\n"; } exit; ############################################################################### # # FUNCTION DEFINITIONS. # ############################################################################### ############################################################################### # # Process the input arguments and options. # ############################################################################### sub process_input_arguments { my ($i, $n_arg, $latex_file, $def_file, $option, $max_pass); $n_arg = @ARGV; if ( $n_arg == 0 ) { syntax_error('extract_defs requires at least 1 input argument.'); } $def_file = ""; $max_pass = 10; $latex_file = ""; for ( $i = 0; $i < $n_arg; $i++ ) { $option = $ARGV[$i]; if ( $option eq "-o" ) { $i++; if ( $i < $n_arg ) { $def_file = $ARGV[$i]; } else { syntax_error('option -o requires an extra argument.'); } } elsif ( $option eq "-p" ) { $i++; if ( $i < $n_arg ) { $max_pass = $ARGV[$i]; if ( !( $max_pass >= 0 ) ) { syntax_error('The argument of option -p should be a '. 'nonnegative integer.'); } } else { syntax_error('option -p requires an extra argument.'); } } elsif ( $option eq "-h" ) { system("sed '/^\$/q;/^#!/d;s/^# //;s/^#//' $0 >&2"); exit 1; } elsif ( $option =~ /^-/ ) { syntax_error('Unknown option $option or syntax error.'); } else { if ( $latex_file eq "" ) { $latex_file = $option; } else { syntax_error('extract_defs requires 1 input argument.'); } } } if ( $latex_file eq "" ) { syntax_error('extract_defs requires one input argument.'); } elsif ( $latex_file !~ /\.tex$/ ) { $latex_file .= ".tex"; } if ( $def_file eq "" ) { $def_file = strip_path_and_extension($latex_file).".tex_defs"; } elsif ( $def_file !~ /\.\w+$/ ) { $def_file .= ".tex"; } return ($latex_file, $def_file, $max_pass); } ############################################################################### # # Checks whether the definitions file already exists and if so, # whether it has been created by extract_defs. # ############################################################################### sub check_existence_final_def_file { my ($def_file) = @_; my ($line, $df_file); if ( -f $def_file ) { open (DEF, "<$def_file") || die("Cannot open $def_file"); $df_file = strip_path ($def_file); if (( ( defined($line=.) ) && ( $line !~ /^%%\s%% Definitions file $df_file created by extract_defs on /s ) )) { error_exit("Definitions file $def_file already exists and has not ". "been\n created by extract_defs. Exiting to prevent ". "possible corruption of existing data."); } close (DEF); } } ############################################################################### # # Create the definitions file and make a repass if necessary. # ############################################################################### sub write_final_def_file { my ($def_file, $max_pass, $pass) = @_; my ($df_file, $newdefs, $i, $def); if ( !defined($pass) ) { $pass = 0; } open (DEF, ">$def_file") || die("Cannot open $def_file"); $df_file = strip_path ($def_file); print DEF "%%\n%% Definitions file $df_file created by extract_defs on ". `date +"%b %-d, %Y at %T."`; print DEF "%%\n"; for ( $i=0; $i<$ndefs; $i++) { $def = $info_nr_def[$i]; #print "DEF = >$def<\n"; #print "Taken = $info_def_taken{$def}\n\n"; if ( $info_def_taken{$def} == 1 ) { print DEF $def; } elsif ( $info_def_taken{$def} == -1 ) { print DEF "\n% The following definition might be redundant:\n"; print DEF $def; print DEF "%\n\n"; } } close (DEF); if ( $pass < $max_pass ) { print "Processing definitions file $df_file: pass ".++$pass.".\n"; $newdefs = process_latex_file ($def_file, "defs"); # print $newdefs."\n"; if ( $newdefs == 1 ) { write_final_def_file ($def_file, $max_pass, $pass); } } else { print "\nWARNING: maximum number of passes ($max_pass) reached.\n". " Use the -p option to augment this number.\n\n"; } } ############################################################################### # # Extract the list of \input files in the preamble of given LaTeX file. # ############################################################################### sub get_def_files { my ($latex_file) = @_; my (@def_files, $line); @def_files = (); open (FIL, "<$latex_file") || die ("Cannot open $latex_file"); while ( defined($line=) ) { last if ( $line =~ /^\s*\\begin{document}/ ); if ( $line =~ /^\s*\\input{([\w\.+\-\#,\$]*)}/ ) # \w = alphanum + _ { @def_files = (@def_files,split(/,/,$1)); } } close (FIL); return (@def_files); } ############################################################################### # # Process the definition files. # ############################################################################### sub process_def_files { my (@def_files) = @_; my ($buffer, $fil, $line); for $fil (@def_files) { $fil = get_full_filename ($fil); print "Reading definitions from $fil.\n"; open (FIL, "<$fil") || die ("Cannot open $fil"); $buffer = ""; while ( defined($line=) ) { if ( ( $line !~ /^\s*%/ ) && ( $line !~ /^\s*$/ ) ) { $buffer .= remove_comment($line); # print "BUFFER!\n"; if ( definition_complete($buffer) ) { add_to_defs ($buffer); $buffer = ""; } } } if ( definition_complete($buffer) ) { add_to_defs ($buffer); } else { error_exit ("Incomplete definition in chunk\n${buffer}in file $fil."); } close (FIL); } } ############################################################################### # # Remove the comment from a LaTeX line but keep the % sign. # ############################################################################### sub remove_comment { my ($line) = @_; my ($part1, $part2); if ( $line =~ /(.*)?%(.*)/s ) { $part1 = $1; $part2 = $2; if ( $part1 !~ /\\$/ ) # no \% { return ($part1."%\n"); } else { return ($part1."%".remove_comment($part2)); } } return ($line); } ############################################################################### # # Get the full file name of a file in the TEXINPUTS path. # ############################################################################### sub get_full_filename { my ($fil) = @_; my ($dir, $newfil); if ( $fil !~ /.*\..*$/ ) { $fil .= ".tex"; } if ( -f $fil ) { return ($fil); } foreach $dir ( split(/:/,$ENV{'TEXINPUTS'}) ) { if ( $dir ne "." ) { $newfil = $dir."/".$fil; if ( -f $newfil ) { return ($newfil); } } } error_exit ("File $fil not found in the LaTeX search path."); } ############################################################################### # # Check whether a definition is complete. # ############################################################################### sub definition_complete { my ($def) = @_; # print ">>$def<<\nOK = ".n_args_ok($def)."\n"; return ( ( brace_level($def) == 0 ) && ( if_level($def) == 0 ) && ( n_args_ok($def) ) ); } ############################################################################### # # Determine the brace level of the given line. # ############################################################################### sub brace_level { my ($line) = @_; my ($brace_level, $part1, $brace, $part2); # print ">>$line<<\n"; $brace_level = 0; if ( $line =~ /(.*?)(}|{)(.*)/s ) { $part1 = $1; $brace = $2; $part2 = $3; # print "Part1: $part1\n"; # print "Part2: $part2\n"; # print "Brace: $brace\n"; if ( $part1 !~ /\\$/ ) # no \{ or \} { if ( $brace eq "{" ) # { { $brace_level = 1; } else # } { $brace_level = -1; } } if ( $part2 ne "" ) { $brace_level += brace_level($part2); } } return ($brace_level); } ############################################################################### # # Determine the if level of the given line. # ############################################################################### sub if_level { my ($line) = @_; my ($if_level, $if_command, $part2); $if_level = 0; if ( $line =~ /.*?(\\if|\\fi)(.*)/s ) { $if_command = $1; $part2 = $2; if ( $if_command eq "\\if" ) # \if { $if_level = 1; } else # \fi { if ( $part2 !~ /^[a-zA-Z]/ ) # not part of longer command { $if_level = -1; } } if ( $part2 ne "" ) { $if_level += if_level($part2); } } return ($if_level); } ############################################################################### # # Add a definition to the list of definitions. # ############################################################################### sub add_to_defs { my ($def, $complete_def) = @_; my ($command); if ( $def eq "" ) { return; } if ( !defined($complete_def) ) { $complete_def = $def; } #print "Orig:CCCCCC>>>$complete_def<<<<<$def<\n\n"; } } ############################################################################### # # Checks whether the command on the given line has enough arguments. # ############################################################################### sub n_args_ok { my ($def) = @_; my ($args, $part1, $part2, $br_level); $def =~ s/\s+//g; $def =~ s/%//g; if ( $def =~ /^\\(newcommand|renewcommand|providecommand)(.*)/ ) { # print "ARG: $2\n"; $args = $2; if ( ( $args =~ /^{\\[a-zA-Z]+}{/ ) || ( $args =~ /^{\\[a-zA-Z]+}\[\d+\]{/ ) ) { return (1) } else { return (0); } } elsif ( $def =~ /^\\(newenvironment|renewenvironment)(.*)/ ) { # print "ARG: $1\n"; $args = $2; if ( $args =~ /^{.*?}(\[\d+\]|){(.*)/ ) { ($part1, $part2, $br_level) = split_on_closing_brace ($2, 1); #print "ARG1: $part1\n"; #print "ARG2: $part2\n"; # print "BR_L: $br_level\n"; if ( ( $br_level != 0 ) || ( $part2 !~ /^{/ ) ) { return (0); } ($part1, $part2, $br_level) = split_on_closing_brace ($part2, 0); #print "ARG1: $part1\n"; #print "ARG2: $part2\n"; #print "BR_L: $br_level\n"; return ( $br_level == 0 ); } else { return (0); } } return (1); } ############################################################################### # # Given a string $line and the current brace level $brace_level, returns # the final brace level and the parts of $line before and after the closing # brace, if any. # ############################################################################### sub split_on_closing_brace { my ($line, $brace_level) = @_; my ($pre_brace, $post_brace, $part1, $part2, $brace); $pre_brace = $line; $post_brace = ""; if ( $line =~ /(.*?)([{}])(.*)/ ) { $part1 = $1; $brace = $2; $part2 = $3; if ( $part1 !~ /\\$/ ) # no \{ or \} { if ( $brace eq "{" ) # { { $brace_level++; } else # } { if ( --$brace_level == 0 ) { $pre_brace = $part1; $post_brace = $part2; return ($pre_brace, $post_brace, $brace_level); } } } ($pre_brace, $post_brace, $brace_level) = split_on_closing_brace ($part2, $brace_level); $pre_brace = $part1.$brace.$pre_brace; } return ($pre_brace, $post_brace, $brace_level); } ############################################################################### # # Searches the given LaTeX file for commands and environments. # ############################################################################### sub process_latex_file { my ($fil, $level) = @_; my ($newdefs, $line, $subfil); if ( !defined($level) ) { $level = "master"; } $newdefs = 0; print "Processing LaTeX file $fil.\n" if ( $level ne "defs" ); open (FIL, "<$fil") || die ("Cannot open $fil"); if ( $level eq "master" ) { while ( defined ($line = ) ) { #print "11".$line."\n"; last if ( $line =~ /^\s*\\begin{document}/ ); } } while ( defined ($line = ) ) { #print "22".$line."\n"; chomp($line); if ( $line =~ /^\s*\\input{(.*)}/ ) { for $subfil ( split(/,/,$1) ) { $newdefs |= process_latex_file (get_full_file_name($subfil), "sub"); # Note: Using ||= in the previous line prevents evaluation of # the process_latex_line command if $newdefs already # equals 1. } } else { $newdefs |= process_latex_line ($line); # Note: Using ||= in the previous line prevents evaluation of # the process_latex_line command if $newdefs already # equals 1. } } close (FIL); return ($newdefs); } ############################################################################### # # Searches a given LaTeX line for commands and environments. # ############################################################################### sub process_latex_line { my ($line) = @_; my ($newdefs, $command, $remainder); $newdefs = 0; cut_off_at_percent ($line); # print $line."\n"; if ( $line =~ /.*?\\([a-zA-Z]+)\s*(.*)/ ) { # print "Comm = $1\nArg = $2\n\n"; $command = $1; $remainder = $2; if ( $command eq "begin" ) { if ( $remainder =~ /{([a-zA-Z*]*)}(.*)/ ) { $command = $1; $remainder = $2; # print "Env = $command\n"; $newdefs = mark_def ($command, "environment"); } } elsif ( $command ne "end" ) { $newdefs = mark_def ($command, "command"); } $newdefs |= process_latex_line($remainder); # Note: Using ||= in the previous line prevents evaluation of # the process_latex_line command if $newdefs already # equals 1. } return ($newdefs); } ############################################################################### # # Put a definition on the list of necessary definitions. # ############################################################################### sub mark_def { my ($command, $category) = @_; my ($newdefs, $def); if ( $category eq "environment" ) { if ( ( defined($def = $environment_def{$command}) ) && ( $info_def_taken{$def} != 1 ) ) { $info_def_taken{$def} = 1; $newdefs = 1; # print " Known environment = $command\n"; } } else { if ( ( defined($def = $command_def{$command}) ) && ( $info_def_taken{$def} != 1 ) ) { $info_def_taken{$def} = 1; $newdefs = 1; # print " Known command = $command\n"; } if ( ( defined($def = $unknown_def{$command}) ) && ( $info_def_taken{$def} != 1 ) ) { $info_def_taken{$def} = 1; $newdefs = 1; # print " Known unknown command = $command\n"; } } # if ( !$newdefs ) # { # print "UNKNOWN = $command\n"; # } return ($newdefs); } ############################################################################### # # Remove the comment from a LaTeX line. # ############################################################################### sub cut_off_at_percent { my ($line) = @_; my ($part1, $part2); if ( $line =~ /(.*?)%(.*)/ ) { $part1 = $1; $part2 = $2; if ( $part1 !~ /\\$/ ) # no \% { return ($part1); } else { return ($part1."%".cut_off_at_percent($part2)); } } return ($line); } ############################################################################### # # Initialize the global variables. # ############################################################################### sub init_globs { %command_def = (); %environment_def = (); %unknown_def = (); @info_nr_def =(); %info_def_taken = (); $ndefs = 0; }