#!/usr/bin/perl
#
# MakeBackref
#
# by Mark Potse <m.potse@amc.uva.nl>
#
# Fixes bibliographic backreferences made by LaTeX with the
# backref package (part of the hypperref bundle).
#
# $Id: MakeBackref,v 1.1 2002/03/22 11:46:34 potse Exp potse $
#

$usage = "USAGE: MakeBackref infile.brf [outfile]\n";

#
# This script improves a .brf file, which contains bibliographic
# backreferences made by LaTeX with the backref package (part of
# the hypperref bundle). The backref macros write a line in the
# brf file for each \cite command. The line specifies one or more
# labels, a page number, a section number, and optional hyperref
# information. This script gathers the page numbers for each label,
# and writes a new brf file containing one line for each label,
# abusing the page information for a list of page numbers, complete
# with "page" or "pages", commas, "and", and a period in the right
# places. The resulting file can be used by the pbackref package. 
#
# The second input argument is optional; if it is omitted, the first
# specifies both input and output file. This can be done because
# input and output stages are separated in this program, and it writes
# a tag in the output file that prevents it from handling it again.
#

($infile, $outfile) = @ARGV;
if($#ARGV<0 || $#ARGV>1 ){
    print $usage;
}
if(!$outfile){
    $outfile = $infile;
}

#
# First we slurp the entire brf file in a hash, with an entry
# for each label. The value of each element is a comma-separated list
# of page numbers
#
# As an extra service, we remove spaces from the cite commands,
# to prevent mis-matches later.
#
#
open INFILE, $infile;
while(<INFILE>){
    if(/%% Created by MakeBackref/){
	print "already handled this file!\n";
	exit;
    }
    s/\s+//g;                        # remove spaces
    if(/\\backcite{(.*)}{{(.*)}{(.*)}{(.*)}}/){   # no space here anymore
	@labels = split  ',', $1;
	foreach $label (@labels){
	    $Refs{$label} .= "$2  ";     # page
	}
    }
    else{
	print "match failed on: ";
	print;
    }
}

#
# Close the input file before opening the output file, because
# they may be the same file. Add a tag that can be used to prevent
# re-using this file.
#
close INFILE;
open OUTFILE, ">$outfile";
print OUTFILE "%% Created by MakeBackref from $infile\n\n";

$keys = keys %Refs;              # scalar context --> length of hash
print "$keys references... ";

#
# Now write a new \backcite command for each reference.
#
foreach $label (keys %Refs){
    $#pages = -1;
    $prev = -1;
    foreach $page (split ' ', $Refs{$label}){
	if($page != $prev){
	    push @pages, $page;         # push if unequal to previous
	}
	$prev = $page;
    }
    print OUTFILE "\\backcite {$label}{{";    
    $N = $#pages;
    #
    # one or more than one pages...
    #
    if($N==-1){
	print OUTFILE " ";
    }elsif($N==0){
	print OUTFILE "page~";         # Cited on ...
    }else{
	print OUTFILE "pages ";        # Cited on ...
    }
    for($n=0; $n <= $N; $n++){
	$page = $pages[$n];
	#
	# collate three or more subsequent pages:
	#
	$j = $n;
	while(($j<$N) and ($pages[$j+1]==($pages[$j]+1))){ $j++; }
	if($j>$n+1){
	    $pp = $pages[$j];
	    print OUTFILE "$page--$pp";
	    $n = $j;                       # skip them
	}else{
	    print OUTFILE "$page";
	}
	#
	# see what's coming and decide what to put after the page number
	#
	if($n==$N){
	    print OUTFILE ".";              # last
	}elsif($n==$N-1){
	    if($N<2){
		print OUTFILE " and~";      # first of two
	    }else{
		print OUTFILE ", and~";     # pre-last of more than two
	    }
	}else{
	    print OUTFILE ", ";             # other
	}
    }
    print OUTFILE "}{1}{foo}}\n";
}

print "done.\n";
close OUTFILE;
