#!/usr/bin/perl # # $Id: bibkeys,v 1.22 2002/08/31 20:00:39 potse Exp $ # # Adds cite keys to a .bib file, and removes duplicates (= same # authors, title, and journal). # # This is useful with .bib files generated by Reference Manager using # bibtex.os, which may not have useful cite keys. # # This program uses the Text::BibTeX Perl module # $usage = "\n USAGE: $0 infile outfile\n\n"; use Text::BibTeX; $fi = shift or die($usage); $fo = shift or die($usage); open FO, ">$fo" or die "cannot open output file"; $date = `date`; $Revision = "(version"; $noisiveR = ")"; print FO "% bibkey $Revision: 1.22 $noisiveR output; $date\n\n" or die "cannot open output file"; $infile = new Text::BibTeX::File "$fi"; # # # # either use this or use write_entry below # # # close FO; # $outfile = new Text::BibTeX::File ">$fo"; $totrecs = 0; while ($entry = new Text::BibTeX::Entry $infile){ $totrecs++; # # unpack the record and generate components for citekey # warn "error in input" unless $entry->parse_ok; ($author, $year) = $entry->get('author','year'); $old_key = $entry->key; # cannot get these with ->get $type = $entry->type; @authors = $entry->names('author'); @lparts = $authors[0]->part('last'); # last name of first author @vparts = $authors[0]->part('von'); # von name of first author $name = $lparts[0]; # first part of last name, used for key ($name, $rem) = split(' ', $name); # make sure there are no spaces in it $name =~ s/[{}]//g; # and remove braces $flname = join(' ', (@vparts, @lparts)); # print name # # remove trailing period from title field # $title = $entry->get('title'); if($title =~ /(.*)\.$/){ $entry->set('title', $1); } # # create a unique identification of each entry # $ident = ""; foreach $a (@authors){ $ident .= join '', $a->part('first'); $ident .= join '', $a->part('von'); $ident .= join '', $a->part('last'); $ident .= join '', $a->part('jr'); } $ident .= $title . $entry->get('journal'); $ident =~ s/\s//g; # ignore spaces # print "$ident\n"; if($Handled{$ident}){ # skip duplicates $dk = $Handled{$ident}; print " duplicate of [$dk]\n"; next; } # # determine the new citekey, and add bibkeys specific fields # if(length($name)>7){ $key = lc sprintf("%.4s%d", $name, $year); }else{ $key = lc sprintf("%s%d", $name, $year); } if($Keys{$key}){ $citekey = $key . chr($Keys{$key}+96); # make it unique }else{ $citekey = $key; } $Keys{$key}++; # $entry->set('bibkeys_nr', $totrecs, 'bibkeys_ok', $old_key); $entry->set_key($citekey); # cannot set this with ->set # # For duplicate recognition. Could store anything as a value; # only the key matters, but it may come in handy later. # $Handled{$ident} = $citekey; # # remove unneccessary fields # $entry->delete('note', 'keywords'); # don't need these $entry->delete('reftype'); # RefMan/bibtex.os debugging info # # insert/protect spaces in journal name # if($entry->exists('journal')){ $journal = $entry->get('journal'); $journal =~ s/\.([A-z])/\.\\ \1/g; # not at end! $journal =~ s/\s+/ /g; $entry->set('journal', $journal); } # # tell what we did # print sprintf "%3d. %-14s $Keys{$key} $type by $flname\n", $totrecs, "[$citekey]"; # # pack and write # # $entry->write($outfile); # Text::BibTeX method # write_entry($entry); } ############# end of main program ######################################## ## ## functions ## sub write_entry($entry) { $type = $entry->type; $key = $entry->key; print FO "\@$type" . "{$key,\n"; foreach $f ($entry->fieldlist){ write_field($f, $entry->get($f)); } print FO "}\n\n"; # finish entry } # # write a single field; special handling for some fields # # Variables used by the formats start with $fmt_ # sub write_field($field, $value) { my $field = shift; my $value = shift; $fmt_field = $field; # used by format FO->format_name("Bibfield"); # default format if(($field =~ "month") && ($value =~ /^...$/)){ $fmt_value = "$value,"; # no braces for simple month $fmt_value =~ tr/[A-Z]/[a-z]/; # make lowercase write FO; }elsif($field =~ /(author)|(editor)/){ # # special formatting for authors/editors: # a special format is used, employing an array (@fmt_aut) # that is shift'ed to force one author on each line # @authors = $entry->split ('author'); $tmp = join ' and;', @authors; # re-insert 'and' @fmt_aut = split ';', "{$tmp},"; $fmt_first_author = shift @fmt_aut; FO->format_name("Bibfield_author"); write FO; }elsif($field =~ /(abstract)|(institution)/){ # special formatting for $fmt_value = "{$value},"; # multi-paragraph fields @abslines = split "\n\n", $fmt_value; $fmt_firstline = shift @abslines; FO->format_name("Bibfield_abstract_first"); # first paragraph write FO; FO->format_name("Bibfield_abstract"); # other paragraphs foreach $fmt_line (@abslines){ write FO; } }else{ if(length($field) > 18){ FO->format_name("Bibfield_long"); } $fmt_value = "{$value},"; # attach braces and comma write FO; } } # # format definitions for the write's above: # format Bibfield = @<<<<<<<<<<<<<<<<= ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $fmt_field, $fmt_value ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $fmt_value . # # for fieldnames longer than 18 characters # format Bibfield_long = @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $fmt_field, = ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $fmt_value ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $fmt_value . # # A more compact format for abstracts and institutions; # need two formats and a write for each paragraph to force newlines. # Note that there's an empty line in the second format. Each `line' is in # fact a paragraph. # format Bibfield_abstract_first = @<<<<<<<<<<<<<<<<= ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $fmt_field, $fmt_firstline ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~ $fmt_firstline ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $fmt_firstline . format Bibfield_abstract = ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $fmt_line . # # format for authors # format Bibfield_author = @<<<<<<<<<<<<<<<<= ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $fmt_field, $fmt_first_author ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ $fmt_first_author ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~ shift @fmt_aut .