#!/usr/bin/perl -w # # Syntax: bib_extract [-h] [-o bibfile] latex_file [latex_files] # # Purpose: Searches the given latex_files for \cite, \citep, \citet, # \citeauthor, ..., and \nocite commands # and extracts the corresponding bibtex entries. # The program uses the environment variable $BIBINPUTS to locate # master .bib files that are not in the current directory or # that cannot be found using a relative or absolute path name. # The default extension for the LaTeX files is .tex. # If bibfile is not specified, the bibtex filename is derived # from the first LaTeX file. # The -h flag shows the help info for this command. # # Note: The command does not yet handle cross-references or \cite commands # in the bibtex entries. Use \nocite commands to add the missing # references in the LaTeX files. # Also, the arguments of \input and \include commands are not # processed (workaround: specify them as extra input arguments # to mk_bib). # Created: Jul 26, 1999 by Bart De Schutter # Last revised: Jun 14, 2009 by Bart De Schutter # # See http://www.deschutter.info/util/scripts.html for the latest version of # this script. # Status: public # Category: latex use strict; use Inout; ############################################################################### # # MAIN PROGRAM. # ############################################################################### #use vars qw(%cite_keys $bib_files $cat_bib_files); # global variables my (%cite_keys, $bib_files, $cite_all); # global variables my (@preamble_defs, @bib_entries, %string_def, %string_used); my ($bib_file, @latex_files); # Initialize the global variables. %cite_keys = (); @preamble_defs = (); @bib_entries = (); %string_def = (); %string_used = (); $bib_files = ""; $cite_all = 0; ($bib_file, @latex_files) = process_input_arguments(); process_latex_files (@latex_files); extract_bib_records (); write_bib_file ($bib_file); #list_missing_keys (); exit; ############################################################################### # # FUNCTION DEFINITIONS. # ############################################################################### ############################################################################### # # Process the input arguments and options. # ############################################################################### sub process_input_arguments { my ($i, $n_arg, @latex_files, $bib_file, $option); $n_arg = $#ARGV+1; if ( $n_arg < 1 ) { syntax_error("bib_extract requires at least 1 input argument."); } @latex_files = (); $bib_file = ""; for ( $i = 0; $i < $n_arg; $i++ ) { $option = $ARGV[$i]; if ( $option eq "-o" ) { $i++; if ( $i < $n_arg ) { $bib_file = $ARGV[$i]; if ( $bib_file !~ /\.bib$/ ) { $bib_file .= ".bib"; } } else { syntax_error('option -o requires an extra argument.'); } } elsif ( $option eq "-h" ) { system("sed '/^\$/q;/^#!/d;s/^# //;s/^#//' $0 >&2"); exit 1; } elsif ( $option =~ /^-/ ) { syntax_error('Unknown option $option or syntax error.'); } else { if ( $option !~ /\.tex$/ ) { $option .= ".tex"; } @latex_files = (@latex_files,$option); } } if ( $bib_file eq "" ) { $bib_file = strip_path_and_extension($latex_files[0]).".bib"; } return ($bib_file, @latex_files); } ############################################################################### # # Process the LaTeX files. # ############################################################################### sub process_latex_files { my (@latex_files) = @_; my ($line_no, $par_start, $line, $buffer, $file); foreach $file ( @latex_files ) { print "Processing $file.\n"; open (DATA, "<$file") || die ("Cannot open $file"); $line_no = 0; $par_start = $line_no+1; $buffer = ""; while ( defined($line = ) ) { $line_no++; chomp ($line); $line = cut_off_at_percent ($line); if ( $line =~ /^\s*$/ ) { process_buffer ($buffer, $file, $par_start, $par_start+$line_no-2); $buffer = ""; $par_start += $line_no; $line_no = 0; } else { $buffer .= $line; } } if ( $buffer ne "" ) { process_buffer ($buffer, $file, $par_start, $par_start+$line_no-1); } close (DATA); } } ############################################################################### # # Search the input buffer for \bibliography, \cite and \nocite commands # and process the arguments. # We assume that the optional argument of a \cite command does not contain # any ]. # # ############################################################################### sub process_buffer { my ($buffer, $data_file, $par_start, $par_end) = @_; my ($remainder, $new_buffer, $command); # # Check for \bibliography{...}, for \nocite{...}, or for # \cite{...}, \citet{...}, ... (possibly with an optional argument) # if ( $buffer =~ /\\(bibliography|nocite|cite[a-zA-Z]*|cite[a-zA-Z]*\[.*?\]){(.*)/ ) { $command = $1; $remainder = $2; # # bibliography # if ( $command eq "bibliography" ) { if ( $remainder =~ /^([\w\d\s,._\/()\[\]*@\-:]*)}(.*)/ ) { $new_buffer = $2; add_to_bib_files ($1); process_buffer ($new_buffer, $data_file, $par_start, $par_end); } else { error_exit("Wrong argument in \\bibliography command in " . "paragraph ". "$par_start-$par_end of file $data_file."); } } else # # cite command # { if ( $remainder =~ /^\*}/ ) { print STDERR ("\nWARNING: \\$command\{*\} command in " . "paragraph $par_start-$par_end of file\n" . " $data_file.\n\n"); $cite_all = 1; } elsif ( $remainder =~ /^([\w\d\s,.\/_()\[\]*@\-:]*)}(.*)/ ) { $new_buffer = $2; add_to_cite_keys ($1); process_buffer ($new_buffer, $data_file, $par_start, $par_end); } else { error_exit("Wrong argument in \\$command command in paragraph ". "$par_start-$par_end of file $data_file."); } } } } ############################################################################### # # Add a label to the list of cite keys. # ############################################################################### sub add_to_cite_keys { my ($key_string) = @_; my (@keys, $key); @keys = split (/,/,$key_string); foreach $key ( @keys ) { $key =~ s/\s//g; if ( !defined($cite_keys{$key}) ) { $cite_keys{$key} = 1; } } } ############################################################################### # # Add a file to the list of master .bib files. # ############################################################################### sub add_to_bib_files { my ($bibfile_string) = @_; my (@files, $file, $bib_file_base, $answer); $bib_file_base = $bib_file; $bib_file_base =~ s/\.bib$//; $bib_file_base =~ s/.*\///; @files = split (/,/,$bibfile_string); foreach $file (@files) { $file =~ s/\s//g; if ( ( $bib_file_base =~ /^$file/ ) || ( $file =~ /^$bib_file_base/ ) ) { print STDERR "\nWARNING: Included bibtex file \"$file\" matches ". "output\n"; print STDERR " file \"$bib_file\".\n\n"; print STDERR " Danger of overwriting $file.bib\n\n"; print STDERR "Continue? y/n [n] "; chomp ($answer = ); if ( $answer !~ /^y/i ) { print "\nAborting.\n\n"; exit 1; } else { print "\nContinuing.\n\n"; } } if ( ( $bib_files !~ /,$file,/ ) && ( $bib_files !~ /^$file,/ ) ) { $bib_files .= $file.","; } } } ############################################################################### # # Remove the comment from a LaTeX line. # ############################################################################### sub cut_off_at_percent { my ($line) = @_; my ($part1, $part2); if ( $line =~ /(.*?)%(.*)/ ) { $part1 = $1; $part2 = $2; if ( $part1 !~ /\\$/ ) # no \% { return ($part1); } else { return ($part1."%".cut_off_at_percent($part2)); } } return ($line); } ############################################################################### # # Concatenate all bib files. # ############################################################################### #sub cat_all_bib_files #{ # my ($outfile) = @_; # my ($bib_file); # # open (OUT, ">$outfile") || die ("Cannot open $outfile"); # foreach $bib_file ( split(/,/,$bib_files) ) # { # $bib_file = get_full_path ($bib_file); # open (BIB, "<$bib_file") || die ("Cannot open $bib_file"); # append_file (\*OUT, \*BIB); # close (BIB); # } # close (OUT); #} ############################################################################### # # Write the bibtex file. # ############################################################################### sub write_bib_file { my ($outfile) = @_; my ($str_key, $str_def, $used); open (OUT, ">$outfile") || die ("Cannot open $outfile"); if ( $#preamble_defs >= 0 ) { print OUT @preamble_defs; } for $str_key (keys(%string_used)) { if ( defined($used = $string_used{$str_key}) && $used && defined($str_def = $string_def{$str_key}) ) { print OUT "\@string\{".$str_key."=\"".$str_def."\"\}\n"; } } if ( $#bib_entries >= 0 ) { write_bib_entries (\*OUT, @bib_entries); } close (OUT); } ############################################################################### # # Extract bib data records. # ############################################################################### sub extract_bib_records { my ($bib_file, $record, $line); foreach $bib_file ( split(/,/,$bib_files) ) { $bib_file = get_full_path ($bib_file); open (BIB, "<$bib_file") || die ("Cannot open $bib_file"); $record = ""; while ( defined($line = ) ) { if ( $line =~ /^\s*\@/ ) { process_bib_record ($record); $record = $line; } elsif ( $line !~ /^%/ ) { $record .= $line; } } process_bib_record ($record); close (BIB); } } ############################################################################### # # Append a file to another file. # ############################################################################### sub append_file { my ($fh_out, $fh_in) = @_; my ($line); while ( defined($line=<$fh_in>) ) { chomp($line); print $fh_out "$line\n"; } } ############################################################################### # # Determine the full pathname of a master .bib file. # ############################################################################### sub get_full_path { my ($fil) = @_; my ($newfil, $dir); if ( $fil !~ /.*\.bib$/ ) { $fil .= ".bib"; } if ( -f $fil ) { return ($fil); } foreach $dir ( split(/:/,$ENV{'BIBINPUTS'}) ) { if ( $dir ne "." ) { $newfil = $dir."/".$fil; if ( -f $newfil ) { return ($newfil); } } } error_exit ("File $fil not found in the bibtex search path."); } ############################################################################### # # Process a record from a bibtex data file. # ############################################################################### sub process_bib_record { my ($record) = @_; my ($label, $str_key, $str_def); if ( $record =~ /^\s*\@preamble/i ) { push @preamble_defs, $record; } elsif ( $record =~ /^\s*\@string\{\s*(.*)\s*=\s*"(.*)"\s*\}\s*/si ) { $str_key = $1; $str_def = $2; if ( !defined($string_def{$str_key}) ) { $string_def{$str_key} = $str_def; $string_used{$str_key} = 0; } } elsif ( $record =~ /^\s*\@[a-zA-Z]*\{\s*([\w\d\s,._()\[\]*@\-:\/]*),/ ) { $label = $1; if ( $cite_all || ( defined ($cite_keys{$label}) ) ) { if ( ( defined ($cite_keys{$label}) ) && ( $cite_keys{$label} == -1 ) ) { print STDERR "\nWARNING: Duplicate entry in bibtex files ". "for cite key $label.\n\n"; } $cite_keys{$label} = -1; # to indicate that it has been removed. push @bib_entries, $record; mark_used_strings ($record); } } } ############################################################################### # # List the missing keys. # ############################################################################### sub list_missing_keys { my ($label); foreach $label ( keys(%cite_keys) ) { if ( $cite_keys{$label} == 1 ) { print STDERR "\nWARNING: Key $label not defined in the ". "given bibtex files.\n\n"; } } } ############################################################################### # # Writes a list of bib entries to a file, separated by 2 blank lines. # ############################################################################### sub write_bib_entries { my ($fh_out, @rec_list) = @_; print $fh_out "\n\n"; print $fh_out @rec_list; print $fh_out "\n"; } ############################################################################### # # Extracts string keys from a record and marks them as used. # ############################################################################### sub mark_used_strings { my ($record) = @_; my ($line, @lines, $prev_line); @lines = split (/\n/, $record); $prev_line = ""; foreach $line ( @lines[1..$#lines-1] ) { if ( $line =~ /=/ ) { process_bib_line ($prev_line); $prev_line = $line; } else { $prev_line .= $line; } } process_bib_line ($prev_line); } ############################################################################### # # Processes a bibtex line by extracts string keys and marking them as # used. # ############################################################################### sub process_bib_line { my ($bib_line) = @_; my ($key, @keys); $bib_line =~ s/,\s*$//; # Remove trailing , $bib_line =~ s/^.*?=//; # Remove bibtex keyword assignment. $bib_line =~ s/\\\\//g; # Remove \\ $bib_line =~ s/\\[{}]//g; # Remove \{ and \} while ( $bib_line =~ /(.*){.*?}(.*)/ ) { $bib_line = $1.$2; } $bib_line =~ s/\s+/ /g; @keys = split (/ /, $bib_line); for $key (@keys) { $string_used{$key} = 1; } }