can't appear inside
try:
if len(lines) == 1 and \
TARGET in ('html', 'xhtml') and \
re.match('^\s*
.*\s*$', lines[0]):
result = [lines[0]]
except: pass
return result
def verb(self):
"Verbatim lines are not masked, so there's no need to unmask"
result = []
result.append(TAGS['blockVerbOpen'])
for line in self.hold():
if self.prop('mapped') == 'table':
line = MacroMaster().expand(line)
if not rules['verbblocknotescaped']:
line = doEscape(TARGET,line)
if rules['indentverbblock']:
line = ' '+line
if rules['verbblockfinalescape']:
line = doFinalEscape(TARGET, line)
result.append(line)
#TODO maybe use if not TAGS['blockVerbClose']
if TARGET != 'pm6':
result.append(TAGS['blockVerbClose'])
return result
def table(self):
# Rewrite all table cells by the unmasked and escaped data
lines = self._get_escaped_hold()
for i in xrange(len(lines)):
cells = string.split(lines[i], SEPARATOR)
self.tableparser.rows[i]['cells'] = cells
return self.tableparser.dump()
def quote(self):
result = []
myre = regex['quote']
open = TAGS['blockQuoteOpen'] # block based
close = TAGS['blockQuoteClose']
qline = TAGS['blockQuoteLine'] # line based
indent = tagindent = '\t'*self.depth
if rules['tagnotindentable']: tagindent = ''
if not rules['keepquoteindent']: indent = ''
if open: result.append(tagindent+open) # open block
for item in self.hold():
if type(item) == type([]):
result.extend(item) # subquotes
else:
item = myre.sub('', item) # del TABs
if rules['barinsidequote']:
item = get_tagged_bar(item)
item = self._last_escapes(item)
item = qline*self.depth + item
result.append(indent+item) # quote line
if close: result.append(tagindent+close) # close block
return result
def deflist(self): return self.list('deflist')
def numlist(self): return self.list('numlist')
def list(self, name='list'):
result = []
items = self.hold()
indent = self.prop('indent')
tagindent = indent
listopen = TAGS.get(name+'Open')
listclose = TAGS.get(name+'Close')
listline = TAGS.get(name+'ItemLine')
itemcount = 0
if rules['tagnotindentable']: tagindent = ''
if not rules['keeplistindent']: indent = tagindent = ''
if name == 'deflist':
itemopen = TAGS[name+'Item1Open']
itemclose = TAGS[name+'Item2Close']
itemsep = TAGS[name+'Item1Close']+\
TAGS[name+'Item2Open']
else:
itemopen = TAGS[name+'ItemOpen']
itemclose = TAGS[name+'ItemClose']
itemsep = ''
# ItemLine: number of leading chars identifies list depth
if listline:
itemopen = listline*self.depth + itemopen
# Adds trailing space on opening tags
if (name == 'list' and rules['spacedlistitemopen']) or \
(name == 'numlist' and rules['spacednumlistitemopen']):
itemopen = itemopen + ' '
# Remove two-blanks from list ending mark, to avoid
items[-1] = self._remove_twoblanks(items[-1])
# Open list (not nestable lists are only opened at mother)
if listopen and not \
(rules['listnotnested'] and BLOCK.depth != 1):
result.append(tagindent+listopen)
# Tag each list item (multiline items)
itemopenorig = itemopen
for item in items:
# Add "manual" item count for noautonum targets
itemcount = itemcount + 1
if name == 'numlist' and not rules['autonumberlist']:
n = str(itemcount)
itemopen = regex['x'].sub(n, itemopenorig)
del n
# Tag it
item[0] = self._last_escapes(item[0])
if name == 'deflist':
z,term,rest = string.split(item[0],SEPARATOR,2)
item[0] = rest
if not item[0]: del item[0] # to avoid
result.append(tagindent+itemopen+term+itemsep)
else:
fullitem = tagindent+itemopen
result.append(string.replace(
item[0], SEPARATOR, fullitem))
del item[0]
# Process next lines for this item (if any)
for line in item:
if type(line) == type([]): # sublist inside
result.extend(line)
else:
line = self._last_escapes(line)
# Blank lines turns to
if not line and rules['parainsidelist']:
line = string.rstrip(indent +\
TAGS['paragraphOpen']+\
TAGS['paragraphClose'])
# Some targets don't like identation here (wiki)
if not rules['keeplistindent'] or (name == 'deflist' and rules['deflisttextstrip']):
line = string.lstrip(line)
# Maybe we have a line prefix to add? (wiki)
if name == 'deflist' and TAGS['deflistItem2LinePrefix']:
line = TAGS['deflistItem2LinePrefix'] + line
result.append(line)
# Close item (if needed)
if itemclose: result.append(tagindent+itemclose)
# Close list (not nestable lists are only closed at mother)
if listclose and not \
(rules['listnotnested'] and BLOCK.depth != 1):
result.append(tagindent+listclose)
if rules['blankendmotherlist'] and BLOCK.depth == 1:
result.append('')
return result
##############################################################################
class MacroMaster:
def __init__(self, config={}):
self.name = ''
self.config = config or CONF
self.infile = self.config['sourcefile']
self.outfile = self.config['outfile']
self.currdate = time.localtime(time.time())
self.rgx = regex.get('macros') or getRegexes()['macros']
self.fileinfo = { 'infile': None, 'outfile': None }
self.dft_fmt = MACROS
def walk_file_format(self, fmt):
"Walks the %%{in/out}file format string, expanding the % flags"
i = 0; ret = '' # counter/hold
while i < len(fmt): # char by char
c = fmt[i]; i = i + 1
if c == '%': # hot char!
if i == len(fmt): # % at the end
ret = ret + c
break
c = fmt[i]; i = i + 1 # read next
ret = ret + self.expand_file_flag(c)
else:
ret = ret +c # common char
return ret
def expand_file_flag(self, flag):
"%f: filename %F: filename (w/o extension)"
"%d: dirname %D: dirname (only parent dir)"
"%p: file path %e: extension"
info = self.fileinfo[self.name] # get dict
if flag == '%': x = '%' # %% -> %
elif flag == 'f': x = info['name']
elif flag == 'F': x = re.sub('\.[^.]*$','',info['name'])
elif flag == 'd': x = info['dir']
elif flag == 'D': x = os.path.split(info['dir'])[-1]
elif flag == 'p': x = info['path']
elif flag == 'e': x = re.search('.(\.([^.]+))?$',info['name']
).group(2) or ''
#TODO simpler way for %e ?
else : x = '%'+flag # false alarm
return x
def set_file_info(self, macroname):
if self.fileinfo.get(macroname): return # already done
file = getattr(self, self.name) # self.infile
if file == STDOUT or file == MODULEOUT:
dir = ''; path = name = file
else:
path = os.path.abspath(file)
dir = os.path.dirname(path)
name = os.path.basename(path)
self.fileinfo[macroname] = {'path':path,'dir':dir,'name':name}
def expand(self, line=''):
"Expand all macros found on the line"
while self.rgx.search(line):
m = self.rgx.search(line)
name = self.name = string.lower(m.group('name'))
fmt = m.group('fmt') or self.dft_fmt.get(name)
if name == 'date':
txt = time.strftime(fmt,self.currdate)
elif name == 'mtime':
if self.infile in (STDIN, MODULEIN):
fdate = self.currdate
else:
mtime = os.path.getmtime(self.infile)
fdate = time.localtime(mtime)
txt = time.strftime(fmt,fdate)
elif name == 'infile' or name == 'outfile':
self.set_file_info(name)
txt = self.walk_file_format(fmt)
else:
Error("Unknown macro name '%s'"%name)
line = self.rgx.sub(txt,line,1)
return line
##############################################################################
def dumpConfig(source_raw, parsed_config):
onoff = {1:_('ON'), 0:_('OFF')}
data = [
(_('RC file') , RC_RAW ),
(_('source document'), source_raw ),
(_('command line') , CMDLINE_RAW)
]
# First show all RAW data found
for label, cfg in data:
print _('RAW config for %s')%label
for target,key,val in cfg:
target = '(%s)'%target
key = dotted_spaces("%-14s"%key)
val = val or _('ON')
print ' %-8s %s: %s'%(target,key,val)
print
# Then the parsed results of all of them
print _('Full PARSED config')
keys = parsed_config.keys() ; keys.sort() # sorted
for key in keys:
val = parsed_config[key]
# Filters are the last
if key == 'preproc' or key == 'postproc':
continue
# Flag beautifier
if key in FLAGS.keys() or key in ACTIONS.keys():
val = onoff.get(val) or val
# List beautifier
if type(val) == type([]):
if key == 'options': sep = ' '
else : sep = ', '
val = string.join(val, sep)
print "%25s: %s"%(dotted_spaces("%-14s"%key),val)
print
print _('Active filters')
for filter in ['preproc','postproc']:
for rule in parsed_config.get(filter) or []:
print "%25s: %s -> %s"%(
dotted_spaces("%-14s"%filter),rule[0],rule[1])
def get_file_body(file):
"Returns all the document BODY lines"
return process_source_file(file, noconf=1)[1][2]
def finish_him(outlist, config):
"Writing output to screen or file"
outfile = config['outfile']
outlist = unmaskEscapeChar(outlist)
outlist = expandLineBreaks(outlist)
# Apply PostProc filters
if config['postproc']:
filters = compile_filters(config['postproc'],
_('Invalid PostProc filter regex'))
postoutlist = []
errmsg = _('Invalid PostProc filter replacement')
for line in outlist:
for rgx,repl in filters:
try: line = rgx.sub(repl, line)
except: Error("%s: '%s'"%(errmsg, repl))
postoutlist.append(line)
outlist = postoutlist[:]
if outfile == MODULEOUT:
return outlist
elif outfile == STDOUT:
if GUI:
return outlist, config
else:
for line in outlist: print line
else:
Savefile(outfile, addLineBreaks(outlist))
if not GUI and not QUIET:
print _('%s wrote %s')%(my_name,outfile)
if config['split']:
if not QUIET: print "--- html..."
sgml2html = 'sgml2html -s %s -l %s %s'%(
config['split'],config['lang'] or lang,outfile)
if not QUIET: print "Running system command:", sgml2html
os.system(sgml2html)
def toc_inside_body(body, toc, config):
ret = []
if AUTOTOC: return body # nothing to expand
toc_mark = MaskMaster().tocmask
# Expand toc mark with TOC contents
for line in body:
if string.count(line, toc_mark): # toc mark found
if config['toc']:
ret.extend(toc) # include if --toc
else:
pass # or remove %%toc line
else:
ret.append(line) # common line
return ret
def toc_tagger(toc, config):
"Convert t2t-marked TOC (it is a list) to target-tagged TOC"
ret = []
# Tag if TOC-only TOC "by hand" (target don't have a TOC tag)
if config['toc-only'] or (config['toc'] and not TAGS['TOC']):
fakeconf = config.copy()
fakeconf['headers'] = 0
fakeconf['toc-only'] = 0
fakeconf['mask-email'] = 0
fakeconf['preproc'] = []
fakeconf['postproc'] = []
fakeconf['css-sugar'] = 0
ret,foo = convert(toc, fakeconf)
set_global_config(config) # restore config
# Target TOC is a tag
elif config['toc'] and TAGS['TOC']:
ret = [TAGS['TOC']]
return ret
def toc_formatter(toc, config):
"Formats TOC for automatic placement between headers and body"
if config['toc-only']: return toc # no formatting needed
if not config['toc'] : return [] # TOC disabled
ret = toc
# TOC open/close tags (if any)
if TAGS['tocOpen' ]: ret.insert(0, TAGS['tocOpen'])
if TAGS['tocClose']: ret.append(TAGS['tocClose'])
# Autotoc specific formatting
if AUTOTOC:
if rules['autotocwithbars']: # TOC between bars
para = TAGS['paragraphOpen']+TAGS['paragraphClose']
bar = regex['x'].sub('-'*72,TAGS['bar1'])
tocbar = [para, bar, para]
ret = tocbar + ret + tocbar
if rules['blankendautotoc']: # blank line after TOC
ret.append('')
if rules['autotocnewpagebefore']: # page break before TOC
ret.insert(0,TAGS['pageBreak'])
if rules['autotocnewpageafter']: # page break after TOC
ret.append(TAGS['pageBreak'])
return ret
def doHeader(headers, config):
if not config['headers']: return []
if not headers: headers = ['','','']
target = config['target']
if not HEADER_TEMPLATE.has_key(target):
Error("doheader: Unknow target '%s'"%target)
if target in ('html','xhtml') and config.get('css-sugar'):
template = string.split(HEADER_TEMPLATE[target+'css'], '\n')
else:
template = string.split(HEADER_TEMPLATE[target], '\n')
head_data = {'STYLE':[], 'ENCODING':''}
for key in head_data.keys():
val = config.get(string.lower(key))
# Remove .sty extension from each style filename (freaking tex)
# XXX Can't handle --style foo.sty,bar.sty
if target == 'tex' and key == 'STYLE':
val = map(lambda x:re.sub('(?i)\.sty$','',x), val)
if key == 'ENCODING':
val = get_encoding_string(val, target)
head_data[key] = val
# Parse header contents
for i in 0,1,2:
# Expand macros
contents = MacroMaster(config=config).expand(headers[i])
# Escapes - on tex, just do it if any \tag{} present
if target != 'tex' or \
(target == 'tex' and re.search(r'\\\w+{', contents)):
contents = doEscape(target, contents)
if target == 'lout':
contents = doFinalEscape(target, contents)
head_data['HEADER%d'%(i+1)] = contents
# css-inside removes STYLE line
#XXX In tex, this also removes the modules call (%!style:amsfonts)
if target in ('html','xhtml') and config.get('css-inside') and \
config.get('style'):
head_data['STYLE'] = []
Debug("Header Data: %s"%head_data, 1)
# Scan for empty dictionary keys
# If found, scan template lines for that key reference
# If found, remove the reference
# If there isn't any other key reference on the same line, remove it
#TODO loop by template line > key
for key in head_data.keys():
if head_data.get(key): continue
for line in template:
if string.count(line, '%%(%s)s'%key):
sline = string.replace(line, '%%(%s)s'%key, '')
if not re.search(r'%\([A-Z0-9]+\)s', sline):
template.remove(line)
# Style is a multiple tag.
# - If none or just one, use default template
# - If two or more, insert extra lines in a loop (and remove original)
styles = head_data['STYLE']
if len(styles) == 1:
head_data['STYLE'] = styles[0]
elif len(styles) > 1:
style_mark = '%(STYLE)s'
for i in xrange(len(template)):
if string.count(template[i], style_mark):
while styles:
template.insert(i+1,
string.replace(
template[i],
style_mark,
styles.pop()))
del template[i]
break
# Populate template with data (dict expansion)
template = string.join(template, '\n') % head_data
# Adding CSS contents into template (for --css-inside)
# This code sux. Dirty++
if target in ('html','xhtml') and config.get('css-inside') and \
config.get('style'):
set_global_config(config) # usually on convert(), needed here
for i in xrange(len(config['style'])):
cssfile = config['style'][i]
if not os.path.isabs(cssfile):
infile = config.get('sourcefile')
cssfile = os.path.join(
os.path.dirname(infile), cssfile)
try:
contents = Readfile(cssfile, 1)
css = "\n%s\n%s\n%s\n%s\n" % (
doCommentLine("Included %s" % cssfile),
TAGS['cssOpen'],
string.join(contents, '\n'),
TAGS['cssClose'])
# Style now is content, needs escaping (tex)
#css = maskEscapeChar(css)
except:
errmsg = "CSS include failed for %s" % cssfile
css = "\n%s\n" % (doCommentLine(errmsg))
# Insert this CSS file contents on the template
template = re.sub('(?i)()', css+r'\1', template)
# template = re.sub(r'(?i)(\\begin{document})',
# css+'\n'+r'\1', template) # tex
# The last blank line to keep everything separated
template = re.sub('(?i)()', '\n'+r'\1', template)
return string.split(template, '\n')
def doCommentLine(txt):
# The -- string ends a (h|sg|xht)ml comment :(
txt = maskEscapeChar(txt)
if string.count(TAGS['comment'], '--') and \
string.count(txt, '--'):
txt = re.sub('-(?=-)', r'-\\', txt)
if TAGS['comment']:
return regex['x'].sub(txt, TAGS['comment'])
return ''
def doFooter(config):
if not config['headers']: return []
ret = []
target = config['target']
cmdline = config['realcmdline']
typename = target
if target == 'tex': typename = 'LaTeX2e'
ppgd = '%s code generated by %s %s (%s)'%(
typename,my_name,my_version,my_url)
cmdline = 'cmdline: %s %s'%(my_name, string.join(cmdline, ' '))
ret.append('')
ret.append(doCommentLine(ppgd))
ret.append(doCommentLine(cmdline))
ret.append(TAGS['EOD'])
return ret
def doEscape(target,txt):
"Target-specific special escapes. Apply *before* insert any tag."
tmpmask = 'vvvvThisEscapingSuxvvvv'
if target in ('html','sgml','xhtml'):
txt = re.sub('&','&',txt)
txt = re.sub('<','<',txt)
txt = re.sub('>','>',txt)
if target == 'sgml':
txt = re.sub('\xff','ÿ',txt) # "+y
elif target == 'pm6':
txt = re.sub('<','<\#60>',txt)
elif target == 'mgp':
txt = re.sub('^%',' %',txt) # add leading blank to avoid parse
elif target == 'man':
txt = re.sub("^([.'])", '\\&\\1',txt) # command ID
txt = string.replace(txt,ESCCHAR, ESCCHAR+'e') # \e
elif target == 'lout':
# TIP: / moved to FinalEscape to avoid //italic//
# TIP: these are also converted by lout: ... --- --
txt = string.replace(txt, ESCCHAR, tmpmask) # \
txt = string.replace(txt, '"', '"%s""'%ESCCHAR) # "\""
txt = re.sub('([|&{}@#^~])', '"\\1"',txt) # "@"
txt = string.replace(txt, tmpmask, '"%s"'%(ESCCHAR*2)) # "\\"
elif target == 'tex':
# Mark literal \ to be changed to $\backslash$ later
txt = string.replace( txt, ESCCHAR, tmpmask)
txt = re.sub('([#$&%{}])', ESCCHAR+r'\1' , txt) # \%
txt = re.sub('([~^])' , ESCCHAR+r'\1{}', txt) # \~{}
txt = re.sub('([<|>])' , r'$\1$', txt) # $>$
txt = string.replace(txt, tmpmask,
maskEscapeChar(r'$\backslash$'))
# TIP the _ is escaped at the end
return txt
# TODO man: where - really needs to be escaped?
def doFinalEscape(target, txt):
"Last escapes of each line"
if target == 'pm6' : txt = string.replace(txt,ESCCHAR+'<',r'<\#92><')
elif target == 'man' : txt = string.replace(txt, '-', r'\-')
elif target == 'sgml': txt = string.replace(txt, '[', '[')
elif target == 'lout': txt = string.replace(txt, '/', '"/"')
elif target == 'tex' :
txt = string.replace(txt, '_', r'\_')
txt = string.replace(txt, 'vvvvTexUndervvvv', '_') # shame!
return txt
def EscapeCharHandler(action, data):
"Mask/Unmask the Escape Char on the given string"
if not string.strip(data): return data
if action not in ('mask','unmask'):
Error("EscapeCharHandler: Invalid action '%s'"%action)
if action == 'mask': return string.replace(data,'\\',ESCCHAR)
else: return string.replace(data,ESCCHAR,'\\')
def maskEscapeChar(data):
"Replace any Escape Char \ with a text mask (Input: str or list)"
if type(data) == type([]):
return map(lambda x: EscapeCharHandler('mask', x), data)
return EscapeCharHandler('mask',data)
def unmaskEscapeChar(data):
"Undo the Escape char \ masking (Input: str or list)"
if type(data) == type([]):
return map(lambda x: EscapeCharHandler('unmask', x), data)
return EscapeCharHandler('unmask',data)
def addLineBreaks(mylist):
"use LB to respect sys.platform"
ret = []
for line in mylist:
line = string.replace(line,'\n',LB) # embedded \n's
ret.append(line+LB) # add final line break
return ret
# Convert ['foo\nbar'] to ['foo', 'bar']
def expandLineBreaks(mylist):
ret = []
for line in mylist:
ret.extend(string.split(line, '\n'))
return ret
def compile_filters(filters, errmsg='Filter'):
if filters:
for i in xrange(len(filters)):
patt,repl = filters[i]
try: rgx = re.compile(patt)
except: Error("%s: '%s'"%(errmsg, patt))
filters[i] = (rgx,repl)
return filters
def enclose_me(tagname, txt):
return TAGS.get(tagname+'Open') + txt + TAGS.get(tagname+'Close')
def beautify_me(name, line):
"where name is: bold, italic, underline or strike"
# Exception: Doesn't parse an horizontal bar as strike
if name == 'strike' and regex['bar'].search(line): return line
name = 'font%s' % string.capitalize(name)
open = TAGS['%sOpen'%name]
close = TAGS['%sClose'%name]
txt = r'%s\1%s'%(open, close)
line = regex[name].sub(txt,line)
return line
def get_tagged_link(label, url):
ret = ''
target = CONF['target']
image_re = regex['img']
# Set link type
if regex['email'].match(url):
linktype = 'email'
else:
linktype = 'url';
# Escape specials from TEXT parts
label = doEscape(target,label)
# Escape specials from link URL
if not rules['linkable'] or rules['escapeurl']:
url = doEscape(target, url)
# Adding protocol to guessed link
guessurl = ''
if linktype == 'url' and \
re.match('(?i)'+regex['_urlskel']['guess'], url):
if url[0] in 'Ww': guessurl = 'http://' +url
else : guessurl = 'ftp://' +url
# Not link aware targets -> protocol is useless
if not rules['linkable']: guessurl = ''
# Simple link (not guessed)
if not label and not guessurl:
if CONF['mask-email'] and linktype == 'email':
# Do the email mask feature (no TAGs, just text)
url = string.replace(url,'@',' (a) ')
url = string.replace(url,'.',' ')
url = "<%s>" % url
if rules['linkable']: url = doEscape(target, url)
ret = url
else:
# Just add link data to tag
tag = TAGS[linktype]
ret = regex['x'].sub(url,tag)
# Named link or guessed simple link
else:
# Adjusts for guessed link
if not label: label = url # no protocol
if guessurl : url = guessurl # with protocol
# Image inside link!
if image_re.match(label):
if rules['imglinkable']: # get image tag
label = parse_images(label)
else: # img@link !supported
label = "(%s)"%image_re.match(label).group(1)
# Putting data on the right appearance order
if rules['linkable']:
urlorder = [url, label] # link before label
else:
urlorder = [label, url] # label before link
# Add link data to tag (replace \a's)
ret = TAGS["%sMark"%linktype]
for data in urlorder:
ret = regex['x'].sub(data,ret,1)
return ret
def parse_deflist_term(line):
"Extract and parse definition list term contents"
img_re = regex['img']
term = regex['deflist'].search(line).group(3)
# Mask image inside term as (image.jpg), where not supported
if not rules['imgasdefterm'] and img_re.search(term):
while img_re.search(term):
imgfile = img_re.search(term).group(1)
term = img_re.sub('(%s)'%imgfile, term, 1)
#TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :(
return term
def get_tagged_bar(line):
m = regex['bar'].search(line)
if not m: return line
txt = m.group(2)
# Map strong bar to pagebreak
if rules['mapbar2pagebreak'] and TAGS['pageBreak']:
TAGS['bar2'] = TAGS['pageBreak']
# Set bar type
if txt[0] == '=': bar = TAGS['bar2']
else : bar = TAGS['bar1']
# To avoid comment tag confusion like
if string.count(TAGS['comment'], '--'):
txt =