#! /usr/bin/env python3
# make_pubs_3.ppy
from __future__ import print_function # python3 style print

# Read ~/tx/resume/input/pubs.tex and generate html files

# Note: First part is almost identical to ~/tx/res/input/rev_pubs.py

import sys
import time
import re
# For regular expression handling, esp. re.sub

#===============================================
# Main routine
#===============================================

def main():

  global if_aip,fp,fl,paper_id

  # Announce operation

  f=open('/home/dhv/tx/resume/input/pubs.tex','r')
  infile=f.read()
  f.close()

  # Remove spaces in otherwise blank lines
  infile=re.sub('\n +\n','\n\n',infile)
  # Two or more blank lines become just two
  infile=re.sub('\n\n+','\n\n',infile)

  # Convert string-file into list of pubs
  publist=infile.split('\n\n')
  # Strip final \n from last entry for consistency
  if publist[-1][-1]=='\n':
    publist[-1]=publist[-1][0:-1]
  num=len(publist)

  # Note that entry zero is really just the %%% comment block
  print(num-1,'entries found in ~/tx/resume/input/pubs.tex...')

  # Announce operation
  # print 'Making ~/pu/pubs/pub_list.html and pub_list_local.html...'

  # Reverse the list, make index list, and add \n's
  ind=[]
  list=[]
  for j in reversed(range(1,num)):
    list+=[publist[j]+'\n']
    ind+=[j]
  n=len(list)

  # Open two files to write
  fp=open('pub_list.html','w')
  fl=open('pub_list_local.html','w')

  # Print headers
  header=get_text('header')
  fp.write(header)
  fl.write(header)

  no_tag_list=[]

  # Now step through the entries and parse each

  for j in range(n):
    # write header for new entry
    nnn='%d' % ind[j]
    nnt='   '+nnn
    str='<a name="'+nnn+'">\n<p><li value="'+nnt[-3:]+'">\n'
    fp.write(str)
    fl.write(str)

    entry=list[j]
    mtext=''
    if_aip=False
    lines=entry.split('\n')

    # first line should be id line
    if lines[0].startswith('%id '):
      line=re.sub(' +',' ',lines[0])  # Remove dup spaces
      parts=line.split(' ')
      lenp=len(parts)
      if lenp < 2:
        print('*** id line has no number')
        print(lines[0])
        print('*** HALTING EXECUTION ***')
        sys.exit(1)
      if int(parts[1]) != ind[j]:
        print('*** id entry number does not match for entry')
        print(lines[0])
        print('*** HALTING EXECUTION ***')
        sys.exit(1)
      if lenp == 2:
        paper_id=''
        trailer=''
        if ind[j]>120: no_tag_list+=[ind[j]]
      if lenp >= 3:
        paper_id=parts[2]
      if lenp >= 4:
        bib_tag=parts[3]
      lines=lines[1:]
    else:
      print('*** Error: entry does not start with id tag ***')
      print('\n'.join(lines))
      print('*** HALTING EXECUTION ***')
      sys.exit(1)
    for line in lines:
      line=line+'\n'
      if line.startswith('%html '):
        line=line[6:]
      if line.startswith('%priv '):
        line='LOC:<font color="green">'+line[6:-1]+'</font>:LOC\n'
      if line.startswith('%break'):
        mtext=mtext+'<br>\n'
        parse_latex(mtext)
        mtext=''
      elif line.startswith('%link '):
        parse_latex(mtext)
        mtext=''
        parse_links(line[6:])
      else:
        comment=line.startswith('%') or line.startswith('\\null')
        if len(line)>1 and not comment:
          mtext=mtext+line
    mtext=mtext+'\n'
    parse_latex(mtext)
    if paper_id!='':
      fl.write('&nbsp&nbsp<em><font color="green">'+paper_id+'</font></em>\n')
    if lenp >= 4:
      fl.write('&nbsp&nbsp<em><font color="magenta">'+bib_tag+'</font></em>\n')

  # Print footers
  footer=get_text('footer')
  fp.write(footer)
  fl.write(footer)

  # print 'Missing tags for entries above 120:'
  nnn=len(no_tag_list)
  if nnn>0:
    tt='Later missing tags: '   # above 120
    for j in range(nnn):
      tt+='%4d' % no_tag_list[nnn-j-1]
      if j%15 == 14: tt+='\n'
    print(tt)

  # Announce operation
  print('Done.')

  return

#===============================================
# Parse latex and print
#===============================================
def parse_latex(text):
  global if_aip,fp,fl,paper_id
  if len(text)==0:
    return

  # accents and diacriticals
  text=text.replace('\\\'e','&eacute;')
  text=text.replace('\\`e','&egrave;')
  text=text.replace('\\\'a','&aacute;')
  text=text.replace('\\\'I','&Iacute;')
  text=text.replace('\\\'E','&Eacute;')
  text=text.replace('\\\'c','&cacute;')
  text=text.replace('\\~n','&ntilde;')
  text=text.replace('\\\'{\\i}','&iacute;')
  text=text.replace('\\"o','&ouml;')
  text=text.replace('\\"u','&uuml;')
  text=text.replace('\\"a','&auml;')

  # latex constructs that get replaced by a simple space
  text=text.replace('\\-','')
  text=text.replace('\\ ',' ')
  text=text.replace('\\break',' ')
  text=text.replace('\\thinspace',' ')
  text=text.replace('.~','. ')

  # special math constructs in their entirety
  text=text.replace('$^\\circ$',' degree')
  text=text.replace("$'$",'&prime;')
  text=text.replace("$''$",'&Prime;')    # suspicious ...
  text=text.replace('$\\beta$','&beta;')
  text=text.replace('$^{\\rm th}$','th')
  text=text.replace('$\\bar1$','[1-bar]')

  # now do replacements that require regular expression handling
  # hints:
  #   r'string' is a raw string (not parsed by Python before passing)
  #   ? makes .* the search 'not greedy' (as short as possible)

  text=re.sub(r'{\\bf (.*?)}',r'\1',text)
  text=re.sub(r'\$_([0-9,d,u-z])\$',r'<font size="-2">\1</font>',text)
  text=re.sub(r'\$_{(.*?)}\$',r'<font size="-2">\1</font>',text)
  text=re.sub(r'\$([A-Z]*?)\$',r'\1',text)

  text=re.sub(r'{\\sl (.*?)}',r'<em>\1</em>',text,re.DOTALL)
  text=re.sub(r'{\\it (.*?)}',r'<em>\1</em>',text,re.DOTALL)
  # DOTALL means that . includes the possibility of a newline
  # Official Python 2.7 syntax: "...,flags=re.DOTALL)"
  # But that doesn't seem to work with Python 2.6

  # clean up remaining math constructs
  text=text.replace('$Z_2$','Z<font size="-2">2</font>')
  text=text.replace('$g$','<em>g</em>')
  # warn if any are left
  if '$' in text:
    print('*** Note $ in:  '+text)

  # finally, do special processing on author names

  skip_authors=False      # Set True for debug and to compare old style
  if skip_authors:
    text=text.replace('``','"')
    text=text.replace("''",'"')
  else:

    [head,x,tail]=text.partition('``')
    #   If above fails, it leaves '' in 2nd and 3rd entries of tuple

    # call routine for parsing author names
    if x!='': text=parse_auth(head)+'"'+tail.replace("''",'"')

    text=text.replace('``','"')  # in case there was another open quote

  # fl.write(text)
  # fp.write(text)

  # Now see if there is '%priv' (local) text for special treatment
  fp.write(re.sub(r'LOC:(.*?):LOC\n','',text))
  fl.write(re.sub(r'LOC:(.*?):LOC\n','\\1\n',text))

  return

#===============================================
# Parse author names and abbreviate
#===============================================
def parse_auth(text):
  if text.startswith('See'): return(text)
  text=text.replace('\n',' ')   # Need to format with linefeeds later!
  text=re.sub(' +',' ',text)    # Multiple spaces -> single
  text=text.strip(', ')
  names=re.split(', and | and |,',text)   # Split on any of three
  text=''
  line=''
  middle_abbrev=['Seok','Jie','Wook','Nyung','Luigi','Nihat',
     'Young','Andrei','Lawrence','Joon','Anil','Hight','Gilad','Roy']
  middle_save=['De','de','Van','Di','Banaszak','Cargill']
  na=len(names)   # number of names in the author list
  for ia in range(na):
    name=names[ia].strip(' ')
    name=name.replace("&Eacute;amonn","Eamonn")  #rare special case
    parts=name.split(' ')
    nn=len(parts)   # number of parts to one author name
    # print(' %d ' % num +' '+name)   # debug
    if nn <= 1:
      print('*** Author name too short ***')
      print(name, parts)
      sys.exit(1)
    elif nn > 4:
      print('*** Author name too long ***')
      print(name, parts)
      sys.exit(1)
    aname=''
    for j in range(nn):
      part=parts[j]
      # Note: re.search looks anywhere in string
      #       re.match looks only at beginning of string
      if re.match(r'[A-Z].*\.$',part) != None:
        aname+=part
      elif j==0:        # First name
        # Modified here so Sang-Wook becomes S.W. etc.
        for fnpart in part.split('-'):
          aname+=fnpart[0]+'.'
      elif j==nn-1:     # Last name
        aname+=' '+part
      elif part in middle_save:   # Middle name(s)
        aname+=' '+part
      elif part in middle_abbrev:
        aname+=part[0]+'.'
      else:
        # issue warning if this middle name not recognized
        print('*** unrecognized middle name:  '+part)
        aname+=part[0]+'.'
    # aname is now abbreviated author name (no commas etc.)
    if na > 1 and ia==na-1:
      aname='and '+aname
    if len(line)+len(aname) >= 72:
      text+=line[:-1]+'\n'
      line=''
    if na==2 and ia==0:
      line+=aname+' '
    else:
      line+=aname+', '
  if line != '':
    text+=line[:-1]+'\n'
  return(text)

#===============================================
# Parse links and print
#===============================================
def parse_links(in_text):
  global if_aip,fp,fl,paper_id
  stuff=in_text.strip(' ()\n')
  # fb.write(stuff+'\n')
  items=stuff.split(',')
  if len(items)==0:
    return
  text_p=''
  text_l=''
  for item in items:
    item=item.strip()
    text_p+=parse_link(True,item)
    text_l+=parse_link(False,item)
  text_p=text_p.strip(', \n')
  text_l=text_l.strip(', \n ')
  if len(text_p)>0:
    fp.write('<em>('+text_p+')</em>\n')
  if len(text_l)>0:
    fl.write('<em>('+text_l+')</em>\n')
  return

#===============================================
# Parse one link
#===============================================
def parse_link(if_public,text):
  global if_aip,fp,fl,paper_id
  # if_public=True :  For pub_list.html
  # if_public=False:  For pub_list_local.html

  show_untagged=True
  taglist = ['c','j','k','p','r','v','w','x','y','z']

  type=None
  for tag in taglist:
    tys=tag+'s-'      # eg, cs-
    tye='-'+tag+'e'   # eg, -ce
    if text == tys+tye[1:]:   # eg, cs-ce
      text=tys+paper_id+tye   # eg, cs-paper_id-ce
    if text.startswith(tys) and text.endswith(tye):
      type=tag
      text=text[3:-3]
      break           # terminate for loop

  if type == None:  # Tag not found
    if if_public: print('Untagged:  '+text)   # Not necessarily bad
    return(text)
  
  # Now start tag processing
  aa=''
  zz=''
  if type == 'j' or type == 'k':
    if text.startswith('AP') or text.startswith('JA') or text.startswith('scitat'):
      if_aip=True
    if type == 'j':
      aa='<a href=\"http://'
    if type == 'k':
      aa='<a href=\"https://'
    zz='">journal link</a>'
    jtag=text[0:3] 
    if jtag == 'PRL' or jtag == 'PRB' or jtag == 'RMP' or jtag == 'PRM':
      jref=text.split('/')
      if len(jref) != 3:
        print('Error: len(jref) =', len(jref))
        sys.exit(1)
      if jref[0] == 'PRB':
        jrn='PhysRevB'
      elif jref[0] == 'PRL':
        jrn='PhysRevLett'
      elif jref[0] == 'PRM':
        jrn='PhysRevMaterials'
      elif jref[0] == 'RMP':
        jrn='RevModPhys'
      else:
        print('Error jrn')
        sys.exit(1)
      if jref[1][0] != 'v':
        print('Error: should be volume number')
        sys.exit(1)
      page=jref[2]
      if page[0] == 'e' or page[0] == 'p':
        tpage=page[1:]
      elif page[0] == 'l':
        tpage='L'+page[1:]
      else:
        print('Error: should be page number')
        sys.exit(1)
      text='link.aps.org/doi/10.1103/'
      text=text+jrn+'.'+jref[1][1:]+'.'+tpage
    text=re.sub('^PR','publish.aps.org/abstract/PR',text)
    text=re.sub('^RM','publish.aps.org/abstract/RM',text)
    text=re.sub('^AP','link.aip.org/link/?AP',text)
    text=re.sub('^JA','link.aip.org/link/?JA',text)
    text=re.sub('^DOI','dx.doi.org/',text)
  elif type == 'c':
    aa='<a href=\"local_copy/'
    if if_public and if_aip:
      zz='.html">local copy</a>'
    else:
      zz='.pdf">local copy</a>'
    if text == '':
      text=paper_id
  elif type == 'x':
    if if_public:
      aa='<a href="http://arxiv.org/abs/'
      zz='/index.html">cond-mat archive</a>'
    else:
      aa='<a href="archive/'
      zz='/pap.pdf">cond-mat archive</a>'
  elif type == 'w':
    if if_public:
      text=''
    else:
      aa='<a href="archive/'
      zz='/pap.pdf">private submitted copy</a>'
  elif type == 'p':
    aa='<a href="local_preprint/'
    zz='.html">local preprint</a>'
  elif type == 'r':
    if if_public:
      text='<a href="tools/request.html">request article</a>'
    else:
      aa='<a href="archive/'
      zz='/pap.pdf">archive copy</a>'
    if text == '':
      text=paper_id
  elif type == 'z':
    if text != 'index.html':
      text = 'supp.pdf'
    aa='<a href="supp/'+paper_id+'/'
    zz='">local copy of supplement</a>'
  elif type == 'y':
    if if_public:
      text=''
    else:
      if text != 'index.html':
        text = 'supp.pdf'
      aa='<a href="supp/'+paper_id+'/'
      zz='">private copy of supplement</a>'
  elif type == 'v':
    if if_public:
      text=''
    else:
      aa='<a href="local_copy/'
      zz='.pdf">private local copy</a>'
  if text != '': text=aa+text+zz+',\n'
  return(text)

#===============================================
# Provide header and footer text
#===============================================
def get_text(tag):

  if tag == 'header':

    text='<html>\n<body background="gifs/GrayDotted.gif">\n'
    text+='<TITLE>Vanderbilt Publication List</TITLE>\n'
    text+='<H3>Publication List for David Vanderbilt</H3>\n\n<p>\n'
    text+='A link to the electronic <em>journal copy</em> is given if possible.  Where\n'
    text+='<a href="tools/copyright.html">copyright policy</a> allows, a link to a\n'
    text+='<em>local copy</em> of the electronic journal article may also appear.  If\n'
    text+='these are unavailable, a link to a\n'
    text+='<em><a href="http://arxiv.org/archive/cond-mat">cond-mat archive</a>\n'
    text+='preprint</em> or to a\n'
    text+='<em>local preprint</em>\n'
    text+='may be given.\n\n<p>\n'
    text+='See also <a href="https://scholar.google.com/citations?user=uY1warIAAAAJ&hl=en&oi=ao">Google Scholar profile</a>.\n\n<p>\n'
    text+='Updated '+time.asctime(time.localtime(time.time()))+':\n\n<ol>\n'

  elif tag == 'footer':

    text='<a name="end">\n</ol>\n\n'
    text+='<p>My PhD Thesis, "A Theoretical Study of Defects in'
    text+=' Amorphous Semiconductors"\n(MIT, 1981) is available as a\n'
    text+='<a href="local_preprint/dv_thesis.pdf">Scanned PDF (7MB)</a>.\n'
    text+='</p>\n\n<p>\n\n'
    text+='In case you cannot download an article, try sending me\n'
    text+='<A HREF = "mailto:dhv@physics.rutgers.edu">email</a>; I might be\n'
    text+='able to send it to you.\n<p>\n'
    text+='Also please notify me of broken links above.\n\n'
    text+='</body>\n</html>\n'

  return(text)

#===============================================
main()
#===============================================
