import os, sys, pathlib, re
import itertools
from lxml import html
import argparse
# NOTES:
# = \xa0 =
class equationRenumber:
def __init__(self, rootDir, buildType):
'''
ref, target: targets that require checking or a transformation
checkFiles: list of htmlFiles that need to be checked
eref, etarget: existing references and targets
Keep a list of files with each reference tag
eref[#tag] = [htmlFile,htmlFile,....]
etargets[#tag] = htmlFile location of the target
There should only be one unique target key amongst
any number of files.
fixanchor:
fixtarget:
fixcaption:
'''
if os.path.isdir(rootDir):
os.chdir(rootDir)
self.htmlFiles = []
self.parsedFiles = []
self.meta = {
'ref': {},
'target': {},
'targets': [],
'eref': {},
'etarget': {},
'fixdiv': {},
'checkFiles': [],
'fixanchor': {},
'fixtarget': {},
'fixcaption': {},
}
self.rootDir = os.getcwd()
self.curDir = os.getcwd()
self.buildType = buildType
self.breakpoint = False
self.verbose = False
self.updates = False
def getHtmlFiles(self):
for root, dirs, files in os.walk(self.rootDir):
for name in files:
if pathlib.Path(name).suffix == ".html":
self.htmlFiles.append(os.path.join(root, name))
if self.verbose: print("Scanned %s (%d)" % (self.rootDir,len(self.htmlFiles)))
def walkDoc(self, tree):
'''This walks the document finding all the elements.'''
hrefs = tree.xpath('//a[@href!=""]')
# no links found, end of path
if len(hrefs) == 0:
return
for node in hrefs:
# Make sure we are in the same directory as the tree element to get
# relative paths correct.
if self.curDir != os.path.dirname(tree.docinfo.URL):
self.curDir = os.path.dirname(tree.docinfo.URL)
os.chdir(self.curDir)
page = node.get('href')
if pathlib.Path(page).suffix == ".html":
#htmlFile = os.path.join(self.rootDir, pathlib.Path(page).resolve())
htmlFile = pathlib.Path(page).resolve().as_posix()
#if htmlFile == '/var/www/html/index.html':
# import pdb; pdb.set_trace()
#if self.breakpoint == True:
# import pdb; pdb.set_trace()
# URL for current file/tree being parsed: tree.docinfo.URL
#import pdb; pdb.set_trace()
#if self.verbose: print(htmlFile)
if pathlib.Path(htmlFile).is_file():
if not(htmlFile in self.parsedFiles):
#print(htmlFile)
#if htmlFile == '/var/www/html/MOM6/esmg/_build/html/api/modules.html':
# self.breakpoint = True
self.parsedFiles.append(htmlFile)
newTree = html.parse(htmlFile)
self.walkDoc(newTree)
#import pdb; pdb.set_trace()
def htmlWalk(self, start):
self.htmlFiles.sort()
#print(self.rootDir,":",len(self.htmlFiles))
#print(self.htmlFiles[0])
startFile = os.path.join(self.rootDir, start)
#if self.verbose:
# print("Root:",self.rootDir)
# print("Start:",startFile)
if startFile in self.htmlFiles:
self.parsedFiles.append(startFile)
newTree = html.parse(startFile)
self.walkDoc(newTree)
def showUnresolved(self):
'''List unresolved files'''
for htmlFile in self.htmlFiles:
if not(htmlFile) in self.parsedFiles:
print(" unresolved>",htmlFile)
def collectEquationLabels(self):
'''
Label equations sequentially within the tree that is mapped, then
sequentially label equations in apparently orphaned content.
'''
# Fix links/references (doxygen/sphinx)
for htmlFile in self.parsedFiles:
tree = html.parse(htmlFile)
# Search for \eqref and \eqref2 in html (doxygen)
nodes = tree.xpath("//*[text()]")
if len(nodes) > 0:
#if self.verbose:
# print(htmlFile,len(nodes))
ct = 0
for node in nodes:
fullText = "%s%s" % (node.text, node.tail)
for m in re.finditer('\\\eqref2{(.*?)}', fullText):
if self.verbose:
ct = ct + 1
if ct == 1:
print(os.path.basename(htmlFile),len(nodes))
print(' found eqref2>%03d-%03d: %s' % (m.start(), m.end(), m.group(0)))
tag_string = m.groups()[0]
fc = tag_string.find(',')
if fc >= 0:
tag = tag_string[0:fc]
if not(htmlFile in self.meta['ref'].keys()):
self.meta['ref'][htmlFile] = []
if not(tag in self.meta['ref'][htmlFile]):
self.meta['ref'][htmlFile].append(tag)
for m in re.finditer('\\\eqref{(.*?)}', fullText):
if self.verbose:
ct = ct + 1
if ct == 1:
print(os.path.basename(htmlFile),len(nodes))
print(' found eqref>%03d-%03d: %s' % (m.start(), m.end(), m.group(0)))
tag = m.groups()[0]
# Doxygen 1.8.13
tag = tag.lower()
tag = tag.translate(tag.maketrans(':_','--'))
if not(htmlFile in self.meta['ref'].keys()):
self.meta['ref'][htmlFile] = []
if not(tag in self.meta['ref'][htmlFile]):
self.meta['ref'][htmlFile].append(tag)
# fixtarget: Search for lone labels in formulas (doxygen)
# Add
node prior to formula as a target
#
# (1)
nodes = tree.xpath("//p[@class='formulaDsp']")
if len(nodes) > 0:
#if self.verbose:
# print(htmlFile,len(nodes))
ct = 0
for node in nodes:
# skip nodes that already have a before the
prevNode = node.getprevious()
if prevNode == None:
continue
if prevNode.tag == 'center':
hasId = prevNode.get('id')
hasClass = prevNode.get('class')
if hasId and hasClass.find('math') != -1:
continue
#import pdb; pdb.set_trace()
fullText = "%s%s" % (node.text, node.tail)
for m in re.finditer('\\\\label{(.*?)}', fullText):
# If first label begins with html: then skip
tag_string = m.groups()[0]
if tag_string.find('html:') == 0:
break
if self.verbose:
ct = ct + 1
if ct == 1:
print(os.path.basename(htmlFile), len(nodes), 'dox')
print(' fixtarget>%03d-%03d: %s' % (m.start(), m.end(), m.group(0)))
tag = tag_string
if not(tag in self.meta['fixtarget'].keys()):
self.meta['fixtarget'][tag] = []
if not(htmlFile in self.meta['fixtarget'][tag]):
self.meta['fixtarget'][tag].append(htmlFile)
#import pdb; pdb.set_trace()
# fixanchor: Convert lone anchors in front of equations to formal targets
# TODO: We do not have a full operational example yet. This is unfinished.
# TODO: We do not know if we need to change the id. Assume we need to add equation- prefix.
# Rule: If the anchor is in a
, the next
must be the formula
# for this to work.
# Replace lone anchors with a
node
#
# (1)
# NOTE: Do not fix the tag name here as we will use it later to
# quickly grab the node we are going to fix.
nodes = tree.xpath("//p/a[@class='anchor']")
if len(nodes) > 0:
ct = 0
for node in nodes:
tag = node.get('id')
nextParentNode = node.getparent().getnext()
if nextParentNode.tag == 'p' and nextParentNode.get('class') == 'formulaDsp':
#if tag.find('equation-') != 0:
# tag = "equation-%s" % (tag)
if self.verbose:
ct = ct + 1
if ct == 1:
print(os.path.basename(htmlFile),len(nodes),'dox')
print(' fixanchor>%s' % (tag))
if not(tag in self.meta['fixanchor'].keys()):
self.meta['fixanchor'][tag] = []
if not(htmlFile in self.meta['fixanchor'][tag]):
self.meta['fixanchor'][tag].append(htmlFile)
# Search for \label{html: (doxygen/sphinx)
nodes = tree.xpath("//*[text()]")
if len(nodes) > 0:
#if self.verbose:
# print(htmlFile,len(nodes))
ct = 0
for node in nodes:
fullText = "%s%s" % (node.text, node.tail)
for m in re.finditer('\\\\label{(html:.*?)}\\\\notag', fullText):
if self.verbose:
ct = ct + 1
if ct == 1:
print(htmlFile,len(nodes))
print('%03d-%03d: %s' % (m.start(), m.end(), m.group(0)))
tag_string = m.groups()[0]
fc = tag_string.find(":")
if fc >= 0:
tag = tag_string[fc+1:]
if not(tag in self.meta['target'].keys()):
self.meta['target'][tag] = htmlFile
# Collect existing equation labels and references (sphinx)
# These do not need modification other than renumbering
# Link/Reference
# (2)
# Some of these references will not match with actual math references due to the use of equation-
# as part of the tag.
nodes = tree.xpath("//a[contains(@class,'reference') and contains(@href,'#equation')]")
if len(nodes) > 0:
for node in nodes:
href = node.get('href')
href = href.split('#')
if len(href) > 1:
tag = href[1]
if not(tag in self.meta['eref']):
self.meta['eref'][tag] = []
if self.verbose:
print("check eref>%s (%s)" % (tag,os.path.basename(htmlFile)))
if not(htmlFile in self.meta['eref'][tag]):
self.meta['eref'][tag].append(htmlFile)
if not(htmlFile in self.meta['checkFiles']):
self.meta['checkFiles'].append(htmlFile)
# Anchors (sphinx)
#
# (1)\[**MATH**\]
# NOTE: Some anchors do not have as above.
# \[**MATH**\]
# For sphinx, transform to
nodes = tree.xpath("//div[contains(@class,'math')]")
if len(nodes) > 0:
ct = 0
for node in nodes:
tag = node.get('id')
if tag is None:
continue
# Check for equation- prefix and that there is a span node with class="eqno"
#import pdb; pdb.set_trace()
children = node.getchildren()
fixDiv = False
if len(children) == 0:
fixDiv = True
else:
if children[0].tag != 'span':
fixDiv = True
else:
if children[0].get('class') != 'eqno':
fixDiv = True
if fixDiv:
# If we don't have any children, then we need to fix this div possibly
if not(tag in self.meta['fixdiv'].keys()):
self.meta['fixdiv'][tag] = []
if not(htmlFile in self.meta['fixdiv'][tag]):
self.meta['fixdiv'][tag].append(htmlFile)
# if we do not have an equation- prefix, add one
if tag.find('equation-') != 0:
tag = "equation-%s" % (tag)
#import pdb; pdb.set_trace()
if not(tag in self.meta['targets']):
self.meta['targets'].append(tag)
if not(tag in self.meta['etarget'].keys()):
#self.meta['etarget'][tag] = []
ct = ct + 1
if self.verbose:
if ct == 1:
print(os.path.basename(htmlFile))
print("target>%s %s" % (tag,fixDiv))
if not(tag in self.meta['etarget'].keys()):
self.meta['etarget'][tag] = htmlFile
if not(htmlFile in self.meta['checkFiles']):
self.meta['checkFiles'].append(htmlFile)
# Search for fixed anchors in case the program is re-run for renumbering
# Doxygen only
nodes = tree.xpath("//center[contains(@class,'math')]")
if len(nodes) > 0:
for node in nodes:
tag = node.get('id')
if tag is None:
continue
if not(tag in self.meta['targets']):
self.meta['targets'].append(tag)
#if not(tag in self.meta['etarget']):
if tag in self.meta['etarget'].keys():
print("ERROR: Duplicate target found in %s (%s)" % (htmlFile, tag))
sys.exit(1)
#self.meta['etarget'][tag] = []
else:
self.meta['etarget'][tag] = htmlFile
if not(htmlFile in self.meta['checkFiles']):
self.meta['checkFiles'].append(htmlFile)
# fix captions: doxygen only
# Scan figure captions for math and allow them to be shown
# in MathJax
# These are fixed on the fly since we are not dealing with targets and
# references
# From: \f$h(x)\f$
# To : \(h(x)\)
self.updates = False
refPattern = {}
nodes = []
if self.buildType == 'doxygen':
nodes = tree.xpath("//div[@class='caption']")
if len(nodes) == 0:
# Older doxygen:
nodes = tree.xpath("//span[@class='caption-text']")
if self.buildType == 'sphinx':
# Older doxygen:
nodes = tree.xpath("//span[@class='caption-text']")
#if len(nodes) == 0: This was XML...
# # Even older doxygen:
# nodes = tree.xpath("//image[@type='html']")
if len(nodes) > 0:
for node in nodes:
txt = ""
if node.text != None:
txt = node.text
if node.tail != None:
txt = "%s%s" % (txt, node.tail)
refPattern[0] = '(\\\\f\$.*?\\\\f\$)'
refPattern[1] = '\\\\f\$(.*?)\\\\f\$'
m = re.search(refPattern[0],txt)
if m:
self.fixCaptionMath(refPattern, node)
else:
refPattern[0] = '(\$.*?\$)'
refPattern[1] = '\$(.*?)\$'
m = re.search(refPattern[0],txt)
if m:
self.fixCaptionMath(refPattern, node)
# Write the tree out if it was modified
if self.updates:
# Write tree back out to file
output = html.tostring(tree)
#import pdb; pdb.set_trace()
fo = open(htmlFile, 'wb')
fo.write(output)
fo.close()
def fixCaptionMath(self, refPattern, node):
doFix = True
# For the div with the caption, we want to keep scanning
# until we rewrite all the equations we find.
#import pdb; pdb.set_trace()
while doFix:
doFix = False
#print("S>",html.tostring(node))
nodes = [node]
if len(node.getchildren()) > 0:
nodes = nodes + node.getchildren()
#import pdb; pdb.set_trace()
for x in nodes:
span = html.Element("span")
span.set("class","math notranslate nohighlight")
#print("T>",x.tag)
#import pdb; pdb.set_trace()
# Check text
m = None
if x.text:
m = re.search(refPattern[0],x.text)
if m:
doFix = True
self.updates = True
txhead = x.text[0:m.start()]
txtail = x.text[m.end():]
m2 = re.search(refPattern[1],m.groups()[0])
#import pdb; pdb.set_trace()
span.text = "\\(%s\\)" % (m2.groups()[0])
# Do insert
x.insert(len(x.getchildren()),span)
if len(txhead) > 0:
x.text = txhead
else:
x.text = ''
if txtail and len(txtail)>0:
span.tail = txtail
#import pdb; pdb.set_trace()
#aatext = 0
# Check tail
m = None
if x.tail:
m = re.search(refPattern[0],x.tail)
if m:
doFix = True
self.updates = True
txhead = x.tail[0:m.start()]
txtail = x.tail[m.end():]
m2 = re.search(refPattern[1],m.groups()[0])
span.text = "\\(%s\\)" % (m2.groups()[0])
# We have to add to the node and shift text around
node.insert(len(node.getchildren()),span)
x.tail = txhead
span.tail = txtail
#import pdb; pdb.set_trace()
#aatail = 0
#print("E>",html.tostring(node))
#import pdb; pdb.set_trace()
def insertRefNode(self, refPattern, node, aNode):
nodect = 0
nodetotal = len([x for x in node.iter()])
# We want to iterate over all text within the node no
# matter how deep
for x in node.iter():
# For DOM, we have to convert
# from: Before \eqref{aref} and after
# to: Before (X) and after
#
# For the TEXT part of a node
# DOM from:
# .text = "Before \eqref{aref} and after"
#
.tail = None
# len(
.getchildren()) = 0
#
# DOM to:
#
.text = "Before"
#
.tail = None
# len(
.getchildren()) = 1
#
.getchildren()[0] =
# .text = "(X)"
# .tail = " and after"
# For the TAIL part of a node
# the manipulation is similar
#
# from: Before (X) and after \eqref{bref} iterations
# to: Before (X) and after (Y) iterations
# DOM from:
# DOM to:
# Check the text portion of the node
# In this scenario, we always insert to the front of the
# node: x.insert(0, aNode), and update the tail of the new
# child which resides at slot [0].
m = re.search(refPattern,x.text)
if m:
if self.verbose:
print('%03d-%03d: %s' % (m.start(), m.end(), m.group(0)))
txhead = x.text[0:m.start()]
txtail = x.text[m.end():]
if aNode.tail and len(aNode.tail) > 0:
txtail = "%s%s" % (aNode.tail, txtail)
#print('text>',html.tostring(node))
#import pdb; pdb.set_trace()
x.text = txhead
x.insert(0, aNode)
cNode = x.getchildren()[0]
#print(html.tostring(node))
#import pdb; pdb.set_trace()
# We have to adjust the tail of the child
# we just inserted
if len(txtail) > 0:
cNode.tail = txtail
#print(html.tostring(node))
#import pdb; pdb.set_trace()
return
# For the tail portion of a node, this assumes we are already
# a child of the node we are updating.
# We have to get the position of the child in the node.
# Insert a child after the child we found.
# Adjust the tail of the inserted child.
m = re.search(refPattern,x.tail)
if m:
if self.verbose:
print('%03d-%03d: %s' % (m.start(), m.end(), m.group(0)))
# For this match, we split the text between the two tails
txhead = x.tail[0:m.start()]
txtail = x.tail[m.end():]
if aNode.tail and len(aNode.tail) > 0:
txtail = "%s%s" % (aNode.tail, txtail)
#print('tail>',html.tostring(x.getparent()))
#import pdb; pdb.set_trace()
x.tail = txhead
# We have to determine the position of this child in the parent
childpos = -1
if x in x.getparent().getchildren():
childpos = x.getparent().getchildren().index(x)
x.getparent().insert(childpos+1, aNode)
#print(html.tostring(x.getparent()))
#import pdb; pdb.set_trace()
# Update the new childNode
childNode = x.getparent().getchildren()[childpos+1]
if len(txtail) > 0 :
childNode.tail = txtail
#print('after>',html.tostring(x.getparent()))
#import pdb; pdb.set_trace()
def updateTarget(self, node, m, fn):
# We do not need to do crazy things here as this is typically
# not nested within the same node.
if self.verbose:
print(' updateTarget>%03d-%03d: %s' % (m.start(), m.end(), m.group(0)))
repl = m.group(0)
# The real item to replace is "\\label{}\\notag \\\\ "
replStr = "%s \\\\ " % (repl)
node.text = node.text.replace(replStr, '')
self.updates = True
# Sphinx updates
if self.buildType == 'sphinx':
node.tail = node.tail.replace(replStr, '')
tag_string = m.groups()[0]
fc = tag_string.find(":")
if fc >= 0:
tag = "equation-%s" % (tag_string[fc+1:])
if not(tag in self.meta['targets']):
self.meta['targets'].append(tag)
self.meta['target'][tag] = fn
try:
eqno = self.meta['targets'].index(tag)+1
except:
eqno = 0
# Only update references that have a formula target
if eqno > 0:
eqstr = "(%d)" % (eqno)
node.text = eqstr
else:
if self.verbose: print("WARNING: no target for %s" % (tag))
#import pdb; pdb.set_trace()
# Doxygen update targets
if self.buildType == 'doxygen':
tag_string = m.groups()[0]
fc = tag_string.find(":")
if fc >= 0:
tag = "equation-%s" % (tag_string[fc+1:])
# Doxygen 1.8.13 & to match sphinx syntax for tags
tag = tag.lower()
tag = tag.translate(tag.maketrans(':_','--'))
if not(tag in self.meta['targets']):
self.meta['targets'].append(tag)
self.meta['target'][tag] = fn
# This is now fixed and should be added to etarget now
if not(tag in self.meta['etarget'].keys()):
self.meta['etarget'][tag] = fn
try:
eqno = self.meta['targets'].index(tag)+1
except:
eqno = 0
# Don't set the equation node, set the parent id as in below
#node.set('id',tag)
# this also needs some node manipulation to add the
# number and a permalink tag
prevNode = node.getprevious()
ele = html.Element("center")
ele.set("id",tag)
ele.set("class",'math notranslate nohighlight')
prevNode.addnext(ele)
eleNode = prevNode.getnext()
span = html.Element("span")
span.set("class","eqno")
span.text = "(%d)" % (eqno)
eleNode.insert(0, span)
spanNode = eleNode.getchildren()[0]
a = html.Element("a")
a.set('class','headerlink')
a.set('href','#%s' % (tag))
a.set('title','Permalink to this equation')
a.text = '¶'
spanNode.insert(0, a)
def checkSphinxLinks(self, target):
# sphinx
# Target
#
#
(1)
# Ref/Link
#
Hydrostatic balance
# NOTE: Only check/change text() target if it starts with '('.
htmlFiles = self.meta['eref'][target]
for htmlFile in htmlFiles:
tree = html.parse(htmlFile)
self.updates = False
node = tree.xpath("//a[@class='reference internal' and contains(@href,'%s')]" % (target))
if node:
node = node[0]
txt = node.text
if txt == None:
continue
if txt.find('(') == 0:
try:
eqno = self.meta['targets'].index(target)+1
except:
eqno = 0
#import pdb; pdb.set_trace()
if eqno > 0:
eqnoStr = "(%s)" % (eqno)
if eqnoStr != node.text:
#import pdb; pdb.set_trace()
node.text = eqnoStr
self.updates = True
if self.updates:
# Write tree back out to file
output = html.tostring(tree)
#import pdb; pdb.set_trace()
fo = open(htmlFile, 'wb')
fo.write(output)
fo.close()
def checkSphinxTargets(self, target):
# sphinx
#
#
(1)
# TODO: This is always a list of one, rework this routine
htmlFiles = [self.meta['etarget'][target]]
for htmlFile in htmlFiles:
tree = html.parse(htmlFile)
self.updates = False
node = tree.xpath("//div[@id='%s']/span" % (target))
if node:
node = node[0]
try:
eqno = self.meta['targets'].index(target)+1
except:
eqno = 0
#import pdb; pdb.set_trace()
if eqno > 0:
eqnoStr = "(%s)" % (eqno)
if eqnoStr != node.text:
node.text = eqnoStr
self.updates = True
if self.updates:
# Write tree back out to file
output = html.tostring(tree)
#import pdb; pdb.set_trace()
fo = open(htmlFile, 'wb')
fo.write(output)
fo.close()
def fixEquationTargets(self):
# fixdiv
if self.verbose:
print("fixdiv")
targets = list(self.meta['fixdiv'].keys())
for target in targets:
htmlFiles = self.meta['fixdiv'][target]
ntag = target
if ntag.find('equation-') != 0:
ntag = "equation-%s" % (ntag)
for htmlFile in htmlFiles:
tree = html.parse(htmlFile)
self.updates = False
if self.verbose:
print(" - %s fixdiv tag %s" % (os.path.basename(htmlFile), ntag))
nodes = tree.xpath("//div[@id='%s']" % (target))
#import pdb; pdb.set_trace()
for node in nodes:
spanNode = html.Element('span')
spanNode.set('class','eqno')
try:
eqno = self.meta['targets'].index(ntag)+1
except:
eqno = 0
spanNode.text = "(new)"
if eqno > 0:
eqnoStr = "(%s)" % (eqno)
if eqnoStr != spanNode.text:
spanNode.text = eqnoStr
else:
print("Warning: %s tag not found." % (ntag))
#import pdb; pdb.set_trace()
node.insert(0, spanNode)
permNode = node.getchildren()[0]
a = html.Element("a")
a.set('class','headerlink')
a.set('href','#%s' % (ntag))
a.set('title','Permalink to this equation')
a.text = '¶'
permNode.insert(0, a)
self.updates = True
#import pdb; pdb.set_trace()
if self.updates:
# Write tree back out to file
output = html.tostring(tree)
#import pdb; pdb.set_trace()
fo = open(htmlFile, 'wb')
fo.write(output)
fo.close()
# fixtarget: Search for lone labels in formulas (doxygen)
# Add
node prior to formula as a target
#
# (1)
if self.verbose:
print("fixtarget")
targets = list(self.meta['fixtarget'].keys())
for target in targets:
htmlFiles = self.meta['fixtarget'][target]
ntag = target
tag = ''
for htmlFile in htmlFiles:
if self.verbose:
print(" fixtarget> %s %s" % (ntag, os.path.basename(htmlFile)))
tree = html.parse(htmlFile)
self.updates = False
nodes = tree.xpath("//p[@class='formulaDsp']")
# We have to hunt for the node with the tag we need to update
for node in nodes:
fullText = "%s%s" % (node.text, node.tail)
for m in re.finditer('\\\label{(.*?)}', fullText):
tag = m.groups()[0]
if tag == ntag:
break
if tag == ntag:
break
# We found the tag in the appropriate formula
# Add node and update the file
# Just before the node
if tag == ntag:
# Maybe newer doxygen?
#if tag.find('eq:') == 0:
# tag = tag.replace('eq:','equation-')
# Doxygen 1.8.13 & to match sphinx syntax for tags
tag = tag.lower()
tag = tag.translate(tag.maketrans(':_','--'))
if tag.find('equation-') != 0:
tag = "equation-%s" % (tag)
if not(tag in self.meta['targets']):
self.meta['targets'].append(tag)
if not(tag in self.meta['target'].keys()):
self.meta['target'][tag] = htmlFile
# This is now fixed and should be added to etarget now
if not(tag in self.meta['etarget'].keys()):
self.meta['etarget'][tag] = htmlFile
#prevNode = node.getprevious()
ele = html.Element("center")
ele.set("id",tag)
ele.set("class",'math notranslate nohighlight')
#prevNode.addnext(ele)
node.addprevious(ele)
eleNode = node.getprevious()
#import pdb; pdb.set_trace()
try:
eqno = self.meta['targets'].index(tag)+1
except:
eqno = 0
span = html.Element("span")
span.set("class","eqno")
span.text = "(%d)" % (eqno)
eleNode.insert(0, span)
#import pdb; pdb.set_trace()
spanNode = eleNode.getchildren()[0]
a = html.Element("a")
a.set('class','headerlink')
a.set('href','#%s' % (tag))
a.set('title','Permalink to this equation')
a.text = '¶'
spanNode.insert(0, a)
self.updates = True
# Update the file if there were updates
if self.updates:
# Write tree back out to file
output = html.tostring(tree)
#import pdb; pdb.set_trace()
fo = open(htmlFile, 'wb')
fo.write(output)
fo.close()
# fixanchors
# Replace lone anchors with a
node
# Prev:
#
# Replaced:
#
# (1)
# NOTE: We fix the tag name here (if needed)
if self.verbose:
print("fixanchor")
targets = list(self.meta['fixanchor'].keys())
for target in targets:
htmlFiles = self.meta['fixanchor'][target]
tag = target
for htmlFile in htmlFiles:
if self.verbose:
print(" fixanchor> %s %s" % (tag, os.path.basename(htmlFile)))
tree = html.parse(htmlFile)
self.updates = False
nodes = tree.xpath("//a[@class='anchor' and @id='%s']" % (tag))
# Doxygen 1.8.13 & to match sphinx syntax for tags
tag = tag.lower()
tag = tag.translate(tag.maketrans(':_','--'))
if tag.find('equation-') != 0:
tag = "equation-%s" % (tag)
if not(tag in self.meta['targets']):
self.meta['targets'].append(tag)
if not(tag in self.meta['target'].keys()):
self.meta['target'][tag] = htmlFile
# This is now fixed and should be added to etarget now
if not(tag in self.meta['etarget'].keys()):
self.meta['etarget'][tag] = htmlFile
for node in nodes:
# Convert found node to center and then setup
# the rest
node.tag = 'center'
node.set("id",tag)
node.set("class",'math notranslate nohighlight')
eleNode = node
#import pdb; pdb.set_trace()
try:
eqno = self.meta['targets'].index(tag)+1
except:
eqno = 0
span = html.Element("span")
span.set("class","eqno")
span.text = "(%d)" % (eqno)
eleNode.insert(0, span)
#import pdb; pdb.set_trace()
spanNode = eleNode.getchildren()[0]
a = html.Element("a")
a.set('class','headerlink')
a.set('href','#%s' % (tag))
a.set('title','Permalink to this equation')
a.text = '¶'
spanNode.insert(0, a)
self.updates = True
# Update the file if there were updates
if self.updates:
# Write tree back out to file
output = html.tostring(tree)
#import pdb; pdb.set_trace()
fo = open(htmlFile, 'wb')
fo.write(output)
fo.close()
#import pdb; pdb.set_trace()
def updateEquationLinks(self):
# Update targets (doxygen)
# Update links/references (sphinx)
# Create a list so we can update the dictionary on the fly
targets = list(self.meta['target'].keys())
for target in targets:
fn = self.meta['target'][target]
self.updates = False
tree = html.parse(fn)
nodes = tree.xpath("//*[text()]")
if len(nodes) > 0:
if self.verbose:
print(fn,len(nodes))
for node in nodes:
# Keep looping until there are no more updates to find
scanText = True
while scanText:
fullText = "%s%s" % (node.text, node.tail)
# Set to False unless an update is detected
scanText = False
m = re.search('\\\\label{(html:.*?)}\\\\notag', fullText)
if m:
self.updateTarget(node, m, fn)
scanText = True
# Check sphinx links/references
if node.tag == 'a' and node.get('class') == 'reference internal':
tag = node.get('href')
if tag.find('#equation') == 0:
tag = tag[1:]
try:
eqno = self.meta['targets'].index(tag)+1
except:
eqno = 0
# Only update references that have a formula target
if eqno > 0:
eqnoStr = "(%s)" % (eqno)
if eqnoStr != node.text:
node.text = eqnoStr
scanText = True
else:
if self.verbose: print("WARNING: no target for %s" % (tag))
# After all the updates are done, if the flag to update is
# set, rewrite the html file.
if self.updates:
# Write tree back out to file
output = html.tostring(tree)
#import pdb; pdb.set_trace()
fo = open(fn, 'wb')
fo.write(output)
fo.close()
# Check Sphinx targets
for target in self.meta['etarget'].keys():
self.checkSphinxTargets(target)
# Check Sphinx references
for ref in self.meta['eref'].keys():
self.checkSphinxLinks(ref)
# Update links (doxygen & sphinx?)
refPattern = '\\\eqref{(.*?)}'
refPattern2 = '\\\eqref2{(.*?)}'
for ref in self.meta['ref'].keys():
fn = ref
self.updates = False
tree = html.parse(fn)
# Manipulation of the document is very hard. We need to rescan
# the entire document each time there is a change in structure
# to find all the updates. To seed this loop, we pass a -1.
updatesFound = -1
npass = 0
while updatesFound != 0:
updatesFound = 0
npass = npass + 1
if self.buildType in ('doxygen','sphinx'):
nodes = tree.xpath("//*[text()]")
if len(nodes) > 0:
if self.verbose:
print(fn,npass,len(nodes))
for node in nodes:
if node.text == None:
continue
if node.tail:
fullText = "%s%s" % (node.text, node.tail)
else:
fullText = node.text
m = re.search(refPattern, fullText)
# \eqref
if m:
if self.verbose:
print(' eqref>%03d-%03d: %s' % (m.start(), m.end(), m.group(0)))
repl = m.group(0)
tag = m.groups()[0]
# Newer doxygen?
#if tag.find('eq:') != -1:
# tag = tag.replace('eq:','')
# Doxygen 1.8.13
tag = tag.lower()
tag = tag.translate(tag.maketrans(':_','--'))
fullTag = tag
if tag.find('equation-') != 0:
fullTag = "equation-%s" % (tag)
try:
computePath = os.path.relpath(os.path.dirname(self.meta['etarget'][fullTag]),os.path.dirname(fn))
except:
print("WARNING: Target tag not found: %s" % (fullTag))
#import pdb; pdb.set_trace()
continue
# If we are in the same directory, do not specify a path
if computePath == ".":
computePath = ""
else:
computePath = "%s/" % (computePath)
computePath = "%s#%s" % (os.path.basename(self.meta['etarget'][fullTag]),fullTag)
# Replace \\eqref{} with that looks like sphinx
try:
eqno = self.meta['targets'].index(fullTag)+1
except:
self.meta['targets'].append(tag)
eqno = self.meta['targets'].index(fullTag)+1
aNode = html.Element('a')
aNode.set('href',computePath)
aNode.set('class','reference internal')
aNode.text = "(%d)" % (eqno)
aNode.tail = ''
self.insertRefNode(refPattern, node, aNode)
updatesFound = updatesFound + 1
self.updates = True
# This is now a cleaned up link to a target
if (not fullTag in self.meta['eref'].keys()):
self.meta['eref'][fullTag] = []
if (not fn in self.meta['eref'][fullTag]):
self.meta['eref'][fullTag].append(fn)
# \eqref2
m = re.search(refPattern2, fullText)
if m:
if self.verbose:
print(' eqref2>%03d-%03d: %s' % (m.start(), m.end(), m.group(0)))
repl = m.group(0)
tag_string = m.groups()[0]
fc = tag_string.find(',')
if fc >= 0:
tag = tag_string[0:fc]
fullTag = "equation-%s" % (tag)
try:
computePath = os.path.relpath(os.path.dirname(self.meta['target'][tag]),os.path.dirname(fn))
except:
print("WARNING: Target tag not found: %s" % (tag))
#import pdb; pdb.set_trace()
continue
# If we are in the same directory, do not specify a path
if computePath == ".":
computePath = ""
else:
computePath = "%s/" % (computePath)
computePath = "%s#%s" % (os.path.basename(self.meta['target'][tag]),fullTag)
# Replace \\eqref2{} references that translate to something similar to:
# (7) - momentum
try:
eqno = self.meta['targets'].index(fullTag)+1
except:
eqno = 0
aNode = html.Element('a')
aNode.set('href',computePath)
aNode.set('class','reference internal')
aNode.text = "(%d)" % (eqno)
aNode.tail = " - %s" % (tag_string[fc+1:])
self.insertRefNode(refPattern2, node, aNode)
updatesFound = updatesFound + 1
self.updates = True
# This is now a cleaned up link to a target
if (not fullTag in self.meta['eref'].keys()):
self.meta['eref'][fullTag] = []
if (not fn in self.meta['eref'][fullTag]):
self.meta['eref'][fullTag].append(fn)
if self.updates:
# Write tree back out to file
#import pdb; pdb.set_trace()
output = html.tostring(tree)
fo = open(fn, 'wb')
fo.write(output)
fo.close()
# Collect a list of files to scan and update existing targets and references
#targets = self.meta['checkFiles']
#for target in targets:
# import pdb; pdb.set_trace()
# continue
parser = argparse.ArgumentParser()
parser.add_argument("-v", "--verbose", action="store_true", help="Turn on verbosity")
parser.add_argument("-d", "--dir", type=str, help="Root directory")
parser.add_argument("-p", "--project", type=str, help="Project directory")
parser.add_argument("-b", "--build", type=str, help="Build type")
parser.add_argument("-s", "--start", type=str, help="Start file")
parser.add_argument("-l", "--links", action="store_true", help="Show targets, links and references")
args = parser.parse_args()
rootDirectory = '/var/www/html'
verbose = False
showLinks = False
startFile = 'index.html'
projectDirectory = None
buildType = 'doxygen'
if args.verbose:
verbose = True
if args.links:
showLinks = True
if args.dir:
rootDirectory = args.dir
if args.project:
projectDirectory = args.project
if args.start:
startFile = args.start
if args.build:
buildType = args.build
if os.path.isdir(rootDirectory):
#if verbose: print("Found root:",rootDirectory)
os.chdir(rootDirectory)
if os.path.isdir(projectDirectory):
#if verbose: print("Found project:",projectDirectory)
mathProc = equationRenumber(projectDirectory, buildType)
mathProc.verbose = verbose
if mathProc.verbose:
print("** getHtmlFiles")
mathProc.getHtmlFiles()
if mathProc.verbose:
print("** htmlWalk")
mathProc.htmlWalk(startFile)
if mathProc.verbose:
print("** collectEquationLabels")
mathProc.collectEquationLabels()
if mathProc.verbose:
print("** fixEquationLinks")
mathProc.fixEquationTargets()
if mathProc.verbose:
print("** updateEquationLinks")
mathProc.updateEquationLinks()
else:
print("ERROR: Project directory not found (%s). Exiting." % (projectDirectory))
sys.exit(1)
else:
print("ERROR: Root directory not found (%s). Exiting." % (rootDirectory))
sys.exit(1)
if showLinks:
#import pdb; pdb.set_trace()
mathProc.meta['targets'].sort()
print()
msg = "Pages with formula targets and references"
print(msg)
print('-' * len(msg))
for target in mathProc.meta['targets']:
# Not all targets are formulas
# Remove equation- prefix
tag = target
# There should be only one target per tag
pages = []
if tag in mathProc.meta['etarget'].keys():
pages.append(mathProc.meta['etarget'][tag])
for page in pages:
print("target>%s: %s" % (tag, page))
linkedPages = []
if tag in mathProc.meta['ref'].keys():
for lpage in mathProc.meta['ref'][tag]:
if not(lpage in linkedPages):
linkedPages.append(page)
if tag in mathProc.meta['eref'].keys():
for lpage in mathProc.meta['eref'][tag]:
if not(lpage in linkedPages):
linkedPages.append(page)
for lPage in linkedPages:
print(" %s" % (lPage))
#import pdb; pdb.set_trace()