Skip to content
This repository was archived by the owner on Feb 5, 2021. It is now read-only.

Commit a7ac0aa

Browse files
authored
Release 1.5.0
1 parent 26e72ba commit a7ac0aa

File tree

1 file changed

+108
-33
lines changed

1 file changed

+108
-33
lines changed

tex2txt.py

+108-33
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ class Aux: pass
8888
Simple('zB', 'z.~B. '),
8989
# Simple('zB', r'z.\\,B. '),
9090

91+
# see LAB:VERBATIM below
9192
# Macro('verb', 'A', '[verbatim]'),
9293
Macro('verb', 'A', r'\1'),
9394
# Macro(r'verb\*', 'A', '[verbatim*]'),
@@ -560,6 +561,7 @@ def EnvBegin(name, args='', repl=''):
560561
def re_code_args(args, repl, who, s, no_backslash=False):
561562
# return regular expression for 'OAA' code in args,
562563
# do some checks for replacment string repl
564+
# CROSS-CHECK with mark_internal_pre und mark_internal_post
563565
ret = ''
564566
for a in args:
565567
if a == 'A':
@@ -574,11 +576,11 @@ def err(e):
574576
fatal('error in replacement for ' + who + "('" + s + "', ...):\n" + e)
575577
if no_backslash and repl.count('\\'):
576578
err('no backslashs allowed')
577-
for m in re.finditer(r'\\(\d)', repl):
579+
for m in re.finditer(r'(?<!\\)(?:\\\\)*\\(\d)', repl):
578580
# avoid exceptions from re module
579581
n = int(m.group(1))
580582
if n < 1 or n > len(args):
581-
err('invalid "\\' + m.group(1) + '"')
583+
err('invalid reference "\\' + m.group(1) + '"')
582584
if re.search(r'(?<!\\\\)%', repl):
583585
# ensure that mark_linebreak and mark_deleted do work
584586
err(r"please use r'\\%' to insert escaped percent sign")
@@ -605,8 +607,9 @@ def err(e):
605607

606608
# these RE match beginning and end of arbitrary "standard" environments
607609
#
608-
re_begin_env = begin_lbr + r'[^\\{}]+\}'
609-
re_end_env = end_lbr + r'[^\\{}]+\}'
610+
environ_name = r'[^\\{}\n]+'
611+
re_begin_env = begin_lbr + environ_name + r'\}'
612+
re_end_env = end_lbr + environ_name + r'\}'
610613

611614
# UTF-8 characters;
612615
# name lookup, if char given e.g. from copy-and-paste:
@@ -662,15 +665,22 @@ def mysub(expr, repl, text, flags=0, extract=None):
662665
last = 0
663666
for m in re.finditer(expr, txt, flags=flags):
664667
t = m.group(0)
668+
if not t:
669+
continue
665670
if type(repl) is str:
666-
r = m.expand(repl)
667-
else: # repl is a callable
671+
r = myexpand(m, repl, text)
672+
else:
668673
r = repl(m)
674+
if type(r) is tuple:
675+
# replacement contains line number information
676+
nums2 = r[1]
677+
r = r[0]
678+
else:
679+
nums2 = None
669680
res += txt[last:m.start(0)]
670681
last = m.end(0)
671682
# lin: first line number of current replacement action
672683
lin = res.count('\n')
673-
res += r
674684
nt = t.count('\n')
675685
nr = r.count('\n')
676686
if extract:
@@ -679,17 +689,70 @@ def mysub(expr, repl, text, flags=0, extract=None):
679689
# ll: original line number of line lin
680690
ll = abs(numbers[lin])
681691
if nr > 0 or not r:
682-
numbers = numbers[:lin] + (-ll,) * nr + numbers[lin+nt:]
692+
if nums2:
693+
# replacement with line number information
694+
numbers = calc_numbers(res, r, numbers, lin, nt, nums2)
695+
else:
696+
numbers = numbers[:lin] + (-ll,) * nr + numbers[lin+nt:]
683697
else:
684698
# join to single line: keep correct line number
685699
numbers = numbers[:lin] + (-ll,) + numbers[lin+nt+1:]
700+
res += r
686701
return (res + txt[last:], numbers)
687702

703+
# helper function for mysub()
704+
#
705+
def calc_numbers(res, repl, numbers, lin, nt, nums2):
706+
t = text_combine((res, numbers[:lin+1]), ('', nums2))
707+
t = text_combine((repl, t[1]), ('', numbers[lin+nt:]))
708+
return t[1]
709+
710+
# combine (add) two text elements with line number information
711+
#
712+
def text_combine(text1, text2):
713+
space = (r'\A(' + mark_deleted + r'|' + re_begin_env
714+
+ r'|' + re_end_env + r'|\s)*\Z')
715+
(t1, n1) = text1
716+
(t2, n2) = text2
717+
i = t1.rfind('\n') + 1 # i == 0, if not found
718+
if re.search(space, t1[i:]):
719+
# only "space" after last line break in text1:
720+
# use first line number from text2 at junction
721+
n = n1[:-1] + n2
722+
else:
723+
# use last line number from text1 at junction
724+
n = n1[:-1] + (-abs(n1[-1]),) + n2[1:]
725+
return (t1 + t2, n)
726+
727+
# prepend and append plain strings to a text with line number information
728+
#
729+
def text_add_frame(pre, post, text):
730+
return (
731+
pre + text[0] + post,
732+
(-abs(text[1][0]),) * pre.count('\n')
733+
+ text[1]
734+
+ (-abs(text[1][-1]),) * post.count('\n')
735+
)
736+
737+
# extract text with line number information from a group of a match
738+
#
739+
def text_from_match(m ,grp, text):
740+
if m.string is not text[0]:
741+
fatal('text_from_match(): bad match object')
742+
beg = m.string[:m.start(grp)].count('\n')
743+
end = beg + m.group(grp).count('\n') + 1
744+
return (m.group(grp), text[1][beg:end])
745+
746+
# here, we could re-implement parsing of the repl string and provide
747+
# line number information, if a used capturing group spans multiple lines
748+
#
749+
def myexpand(m, repl, text):
750+
return m.expand(repl)
751+
688752
def mysearch(expr, text, flags=0):
689753
if type(text) is not tuple:
690754
fatal('wrong arg for mysearch()')
691755
return re.search(expr, text[0], flags=flags)
692-
693756
def text_get_txt(text):
694757
return text[0]
695758
def text_get_num(text):
@@ -755,12 +818,10 @@ def text_get_num(text):
755818
text = myopen(cmdline.file).read()
756819
else:
757820
text = sys.stdin.read()
758-
if not text or text[-1] != '\n':
759-
text += '\n'
760821

761822
# the initial list of line numbers: in fact "only" a tuple
762823
#
763-
numbers = tuple(range(1, text.count('\n') + 1))
824+
numbers = tuple(range(1, text.count('\n') + 2))
764825

765826
# for mysub():
766827
# text becomes a 2-tuple of text string and number list
@@ -779,6 +840,9 @@ def text_get_num(text):
779840
# \begin{verbatim(*)}...\end{verbatim(*)}
780841
# --> can be removed or replaced by fixed text with 'verbatim'
781842
# or r'verbatim\*' entry in parms.environments
843+
# --> complete removal without paragraph break:
844+
# \LTskip{\begin{verbatim}...\end{verbatim}}
845+
# or \LTalter{...}{replacement}
782846
# - expanded text of \verb(*) macro is enclosed in \verb(*){...}
783847
# --> can be removed or replaced with Macro('verb', 'A', ...)
784848
# or Macro(r'verb\*', 'A', ...) in parms.*_macros
@@ -788,17 +852,22 @@ def text_get_num(text):
788852
#
789853
def f(m):
790854
# enclose coded text in \begin{verbatim(*)}...\end{verbatim(*)},
791-
# enforce heading and trailing empty lines for environment content
792-
return (m.group(1) + '\n\n\\begin{verbatim' + verb_asterisk + '}'
793-
+ verbatim(m.group(3), mark_verbatim_tmp, verb_asterisk)
794-
+ '\\end{verbatim' + verb_asterisk + '}\n\n')
855+
# enforce heading and trailing empty lines for environment content,
856+
# even in case of single line like `A\begin{verbatim}X\end{verbatim}B'
857+
def g(m):
858+
return verbatim(m.group(0), mark_verbatim_tmp, verb_asterisk)
859+
t = text_from_match(m, 3, text)
860+
t = mysub(r'.|\n', g, t)
861+
return text_add_frame(
862+
m.group(1) + '\n\n\\begin{verbatim' + verb_asterisk + '}',
863+
'\\end{verbatim' + verb_asterisk + '}\n\n', t)
795864
verb_asterisk = '*'
796865
text = mysub(r'^(([^\n\\%]|\\.)*)' + begin_lbr + r'verbatim\*\}((.|\n)*?)'
797-
+ end_lbr + r'verbatim\*\}', f, text, flags=re.M)
866+
+ r'\\end\{verbatim\*\}', f, text, flags=re.M)
798867
# important: non-greedy repetition *?
799868
verb_asterisk = ''
800869
text = mysub(r'^(([^\n\\%]|\\.)*)' + begin_lbr + r'verbatim\}((.|\n)*?)'
801-
+ end_lbr + r'verbatim\}', f, text, flags=re.M)
870+
+ r'\\end\{verbatim\}', f, text, flags=re.M)
802871
# important: non-greedy repetition *?
803872

804873
def f(m):
@@ -936,7 +1005,7 @@ def f(m):
9361005
match = None
9371006
while flag:
9381007
# loop until no more replacements performed
939-
if cnt > 100:
1008+
if cnt > 2 * parms.max_depth_br:
9401009
fatal('infinite recursion in macro definition?',
9411010
match.group(0) if match else '')
9421011
cnt += 1
@@ -1190,26 +1259,26 @@ def split_sec(txt, first_on_line):
11901259
# parse the text of an equation environment
11911260
#
11921261
def parse_equ(equ):
1193-
# first resolve sub-environments (e.g. cases) in order
1194-
# to see interpunction
1195-
equ = re.sub(re_begin_env, '', equ)
1196-
equ = re.sub(re_end_env, '', equ)
1197-
# remove mark_deleted
1198-
equ = re.sub(mark_deleted, '', equ)
1262+
# first resolve sub-environments (e.g. cases) and mark_deleted
1263+
# in order to see interpunction
1264+
d = (r'((' + re_begin_env + r')|(' + re_end_env
1265+
+ r')|(' + mark_deleted + r'))')
1266+
equ = mysub(d, '', equ)
11991267

12001268
# then split into lines delimited by \\ alias mark_linebreak
12011269
# BUG (with warning for braced macro arguments):
12021270
# repl_line() and later repl_sec() may fail if \\ alias mark_linebreak
12031271
# or later & are argument of a macro
12041272
#
1205-
for f in re.finditer(braced, equ):
1273+
for f in re.finditer(braced, text_get_txt(equ)):
12061274
if re.search(mark_linebreak + r'|(?<!\\)&', f.group(1)):
12071275
warning('"\\\\" or "&" in {} braces (macro argument?):'
12081276
+ ' not properly handled',
1209-
re.sub(mark_linebreak, r'\\\\', equ))
1277+
re.sub(mark_linebreak, r'\\\\', text_get_txt(equ)))
12101278
break
1279+
12111280
# important: non-greedy *? repetition
1212-
line = r'((.|\n)*?)(' + mark_linebreak + r'|\Z)'
1281+
line = skip_space + r'((.|\n)*?)(' + mark_linebreak + r'|\Z)'
12131282
# return replacement for RE line
12141283
def repl_line(m):
12151284
# finally, split line into sections delimited by '&'
@@ -1224,9 +1293,11 @@ def repl_sec(m):
12241293
ret = split_sec(m.group(1), flag.first_on_line) + ' '
12251294
flag.first_on_line = False
12261295
return ret
1227-
return ' ' + re.sub(sec, repl_sec, m.group(1)) + '\n'
1296+
t = text_from_match(m, 1, equ)
1297+
t = mysub(sec, repl_sec, t)
1298+
return text_add_frame(' ', '\n', t)
12281299

1229-
return re.sub(line, repl_line, equ)
1300+
return mysub(line, repl_line, equ)
12301301

12311302
# replace equation environments listed above
12321303
#
@@ -1235,14 +1306,17 @@ def repl_sec(m):
12351306
re_args = re_code_args(args, replacement, 'EquEnv', name)
12361307
expr = re_nested_env(name, parms.max_depth_env, re_args)
12371308
def f(m):
1238-
return mark_begin_env + parse_equ(m.group('body')) + mark_end_env
1309+
t = text_from_match(m, 'body', text)
1310+
t = parse_equ(t)
1311+
return text_add_frame(mark_begin_env, mark_end_env, t)
12391312
text = mysub(expr, f, text)
12401313
continue
12411314
# environment with fixed replacement and added interpunction
12421315
env = re_nested_env(name, parms.max_depth_env, '')
12431316
re_code_args('', replacement, 'EquEnv', name, no_backslash=True)
12441317
def f(m):
1245-
txt = parse_equ(m.group('body')).strip()
1318+
txt = parse_equ(text_from_match(m, 'body', text))
1319+
txt = text_get_txt(txt).strip()
12461320
s = replacement
12471321
m = re.search(r'(' + parms.mathpunct + r')\Z', txt)
12481322
if m:
@@ -1274,7 +1348,8 @@ def f(m):
12741348
if m not in macsknown:
12751349
print('\\' + m)
12761350
envs = []
1277-
for m in re.finditer(begin_lbr + r'([^\\{}]+)\}', text_get_txt(text)):
1351+
for m in re.finditer(begin_lbr + r'(' + environ_name + r')\}',
1352+
text_get_txt(text)):
12781353
if m.group(1) not in envs:
12791354
envs += [m.group(1)]
12801355
envs.sort()

0 commit comments

Comments
 (0)