#!/bin/bash -f
# ADDBIB 0.2, Aug, 2007  K. Goldstein
VERSION=0.2
# 
#######################################################################
# 
#  ADDBIB 0.1, Feb, 2007  K. Goldstein
#  (based on FINDBIB 2.0 by F. Nesti).
#
#   Usage: findbib foo.tex
#
#  The script searchs for new citations in a latex AUX file and adds them to
#  a bibtex database. If the latex file is called foo.tex it assumes the bibtex file is 
#  called foo.bib
#
#  Every \cite{...} of a reference in SPIRES-Bibtex standard form 
#  (Author:YEARaa)  or in the form of an arXiv reference 
#  (arXive-name/yymmnnn OR yymmnnn OR yymm.nnnn)
#  is searched in WWW-SPIRES.SLAC.STANFORD.EDU and all the bibtex records
#  added the file foo.BIB 
#
#  The value of the variable ARXIV sets the default arxiv to search if the
#  cite is of the form \cite{yymmxxx}
#
# To change the default arxiv change the value below:
ARXIV='hep-th/'
#ARXIV='hep-ph/'
#
####################################################################
#
# The script uses awk, sed, lynx,  bibtex and somes LaTeX. You will have to install
# these to make sure it works.
#
####################################################################
# How the scrip works:
#
#
# Steps: 
#
# Generate an AUX file with LaTeX,
# searches the AUX file for labels,
# compares them with the records in the file $1.bib  
# searches SPIRES for new records,
# adds them in $1.bib
# calls BibTeX on the file (!).
#
#######################################################################
#
#  ADDBIB 0.2, May, 2007  K. Goldstein
#
#  add suport for the new arxiv format yymm.xxxx
#
#######################################################################
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation (version 2)
#    
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#    
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#
########################################
#
# Useful functions

        #######################
        # randomly pick a mirror
        ######################

get_url() {
#cycle through the mirrors randomly
        MIRRORS=("http://www-library.desy.de" "http://www-spires.fnal.gov" "http://usparc.ihep.su" "http://www-spires.dur.ac.uk" "http://www.yukawa.kyoto-u.ac.jp"  "http://www.slac.stanford.edu" "http://www.spires.lipi.go.id")
        element_count=${#MIRRORS[@]}
        let mirror=$RANDOM%$element_count
        BASE=${MIRRORS[$mirror]}
        echo Querying  $BASE
        #####################
        eval "$1=$BASE"/spires/find/hep/www?""
}

get_labels() {
# extract the labels from the .aux file
echo "---------- EXTRACTING LABELS FROM $FILE: -------------"

awk -F"}" '/\\citation\{/ {for(i=1;i<NF+1;i++) print $i}' $FILE | \
awk       '/\\citation\{/ {print substr($0,match($0,/\\citation\{/)+RLENGTH,length)}'\
							  - | sort -u | tee bib.labels_a 

echo "---------- EXTRACTING RECORDS FROM $BIB: -------------"


awk -F"{" '/Article/ {print $2}' $BIB | sed -e's/,//g' | sort | tee bib.labels_b 

echo "---------- FINDING NEW LABELS: -------------"

while read label_a
do
	if [[ ! `grep  $label_a bib.labels_b` ]]
	then
		echo $label_a | tee -a bib.labels
	fi
done <  bib.labels_a

rm -f bib.labels_a bib.labels_b 

nothing_new=0

if [[ ! -f bib.labels ]] 
then
	nothing_new=1
fi
}

########################################################

# Process the input parameters

if  [ $1 ] 
then
    if test -f $1 
    then
	TEXF=${1%.tex}
	latex $1
	FILE=${1%.tex}.aux
	BIB=${1%.tex}.bib
    else 
	echo "No such file $1."
	exit
    fi
else
    echo "usage: findbib texfile.tex"
    exit
fi
echo $TEXF $FILE $BIB

# extract the labels from the .aux file

get_labels

if [ $nothing_new -eq 1 ]
then
	echo "No new citations"	
	exit 
else
# start getting the records from spires

 echo "---------- REQUESTING RECORDS FROM spires.slac.stanford.edu:"
# echo "%%" >> $BIB
# echo "%% NEW RECORDS FOR $FILE. GENERATED BY FINDBIB 2.1 ON" \
#			`date +"%x, AT %X."`>>$BIB 
# echo "%%" >> $BIB

 while read label
 do 
	echo "------------------------------------------------------------"
    flag=0
    echo "Searching for label:"
    ###########################################	
    # is the record of the form Author:yyyyxx ?
    ###########################################	
    if [[ $label = *:[0-9][0-9][0-9][0-9][a-z][a-z]* ]]
    then
	 LABEL=`echo $label | sed -es=:=/=`
	AUTHOR=${LABEL%/*}
	  DATE=`echo ${LABEL#*/} | sed -e's=[a-z]*$==' `
	   KEY=`echo ${LABEL#*/} | sed -e's=^....==' `
	# add a space in two word names:
	AUTHOR_SPACE=`echo $AUTHOR | sed -e 's/\([a-z]\)\([A-Z]\)/\1 \2/g' ` 
	#AUTHOR_SPACE=`echo $AUTHOR | sed  's/\([a-z]\)\([A-Z]\)/\1 \2/g'` 
	SEARCH=A+$AUTHOR_SPACE+AND+DATE+$DATE
	echo $AUTHOR_SPACE $DATE $KEY 
    ###########################################	
    # is the record of the form *-*:yymmxxx ?
    ###########################################	
    elif [[ $label = *-*/[0-9][0-9][0-9][0-9][0-9][0-9][0-9] ]] 
    then
	SEARCH="EPRINT $label"
	echo $label 
    ###########################################	
    # is the record of the form yymmxxx ?
    ###########################################	
    elif [[ $label = [0-9][0-9][0-9][0-9][0-9][0-9][0-9] ]] 
    then
	SEARCH="EPRINT $ARXIV$label"
	echo $label 
    ###########################################	
    # is the record of the new form yymm.xxxx ?
    ###########################################	
    elif [[ $label = [0-9][0-9][0-9][0-9]\.[0-9][0-9][0-9][0-9] ]] 
    then
	SEARCH="EPRINT $label"
	echo $label  
   ###########################################	
    # is the record some other format ?
    ###########################################	
    else 
	echo $label: PERSONAL LABEL
	flag=1
    fi

    i=0; NN=0

    # start a loop to search through the results

    while test $flag = "0"
    do
    
	# download the webpage from spires:
	get_url URL
    	#URL="http://www-spires.slac.stanford.edu/spires/find/hep/www?"
	OPTIONS="rawcmd=$SEARCH&skip=$i&FORMAT=wwwbriefbibtex"
    	lynx -source "$URL$OPTIONS" | sed -e '/@Article{/ s/ //g' > out
        
        # increment our search - there are 25 results/page
    	let i=i+25
	# find out the number of hits for our search
    	if [[ $NN -eq 0 ]]
	then
		if [[ `grep Paper out` ]] 
		then
		 	NN=`grep Paper out | sed -e 's/^.*of <b>\([0-9]*\).*/\1/' | sort -u`  
		fi
		echo  $NN "record(s) found"
    	fi
	if [[ $NN -gt 25 ]]  
	then  
		echo "Searching-----> " $i  
	fi
	# check to see if there are no results:
    	if [[ $NN -eq 0 ]]
    	then
		flag=1
		echo " PAPER NOT FOUND."
        # check to see if the label we want is on the page:
    	elif [[ ! `grep $label out` = "" ]]
    	then 
		flag=1
		echo "Found:"
		if [[ $SEARCH = EPRINT* ]] 
		then 
			if [[ ! `grep "<!-- START RESULTS -->" out` = "" ]]
			then 
				echo "@Article{$label," >> $BIB
				awk '/@Article\{/,/^}/' out | awk '!/@Article/' - \
				| tee -a $BIB \
				| grep title | sed 's/  */ /g'
			else
				echo "PAPER NOT FOUND"
			fi 
		else
			awk /$label/,'/^}/' out \
			| tee -a $BIB \
			| grep title | sed 's/  */ /g' 
		fi	
   	# check to see if there are papers left:
	elif [[ $i -gt $NN ]]
    	then
		flag=1
		echo " PAPER NOT FOUND."
    	fi

    done


done < bib.labels

# clean up
rm -f bib.labels out

echo "---------- DONE. RECORDS WRITTEN TO $BIB."
echo "---------- NOW RUNNING BIBTEX:"

bibtex $TEXF

fi