13
13
# along with this program. If not, see <https://www.gnu.org/licenses/>.
14
14
#
15
15
16
+ #######################################################################
17
+ #
16
18
# Python3:
17
19
# Extract raw text from LaTeX file, write result to standard output
18
20
#
19
- # . output suitable for check, e.g., with LanguageTool (LT)
20
- # . we make an effort to avoid creation of additional empty lines that
21
- # break sentences for LT; this keeps number of "false" LT warnings low
22
- # . line number changes caused by this approach can be compensated by
23
- # a small filter for LT messages using the file from option --nums
24
- # . interpunction in displayed equations can be checked to a certain extent
25
- #
26
- # - argument:
27
- # name of file with input text; read standard input if missing
28
- # - option --nums file: (file name)
29
- # file for storing original line numbers;
30
- # can be used later to correct line numbers in messages
31
- # - option --repl file: (file name)
32
- # file with replacements performed at the end, namely after
33
- # changing, e.g., inline maths to text and german hyphen "= to - ;
34
- # see LAB:SPELLING below for line syntax
35
- # - option --extr ma[,mb,...]: (list of macro names)
36
- # extract only first braced argument of these macros;
37
- # useful, e.g., for check of foreign-language text and footnotes
38
- # - option --lang xy: (language de or en, default: de)
39
- # used for adaptation of equation replacements, math operator names,
40
- # proof titles, and replacement of foreign-language text;
41
- # see LAB:LANGUAGE below
42
- # - option --unkn:
43
- # print list of "undeclared" macros and environments
44
- #
45
- # Some actions:
46
- # - \begin{...} and \end{...} of environments are deleted;
47
- # tailored behaviour for some environment types listed below
48
- # - text in heading macros as \section{...} is extracted
49
- # - placeholders for \ref, \eqref, \pageref, and \cite
50
- # - "undeclared" macros are silently ignored
51
- # - inline math $...$ is replaced with text from rotating collection
52
- # in variable parms.inline_math
53
- # - equation environments are resolved in a way suitable for check of
54
- # interpunction, argument of \text{...} is included into output text;
55
- # \[ ... \] is same as environment equation*;
56
- # see LAB:EQUATIONS below for example and detailed description
57
- # - some treatment for \item[...] labels, see LAB:ITEMS
58
- # - rare LT warnings can be suppressed using \LTadd, \LTskip,
59
- # and \LTalter (see below) in the LaTeX text with suitable macro
60
- # definitions there, e.g. adding something for LT only:
61
- # \newcommand{\LTadd}[1]{}
21
+ # Usage and main operations:
22
+ # - see README
62
23
#
63
24
# Principle of operation:
64
25
# - read complete input text into a string, then make replacements
@@ -98,6 +59,7 @@ class Aux: pass
98
59
# args:
99
60
# - A: mandatory {...} argument
100
61
# - O: optional [...] argument
62
+ # - P: mandatory [...] argument, see for instance \cite
101
63
# repl:
102
64
# - replacement pattern, r'\d' (d: single digit) extracts text
103
65
# from position d in args (counting from 1);
@@ -162,9 +124,12 @@ class Aux: pass
162
124
# BUG: quite probably, some macro is missing here ;-)
163
125
#
164
126
parms .system_macros = lambda : (
127
+ Macro ('cite' , 'A' , '[1]' ),
128
+ Macro ('cite' , 'PA' , r'[1, \1]' ),
165
129
Macro ('color' , 'A' ),
166
130
Macro ('colorbox' , 'AA' , r'\2' ),
167
131
Macro ('documentclass' , 'OA' ),
132
+ Macro ('eqref' , 'A' , '(7)' ),
168
133
Macro ('fcolorbox' , 'AAA' , r'\3' ),
169
134
Macro ('footnote' , 'OA' , '5' ),
170
135
Macro ('footnotemark' , 'O' , '5' ),
@@ -175,6 +140,8 @@ class Aux: pass
175
140
Macro ('includegraphics' , 'OA' ),
176
141
Macro ('input' , 'A' ),
177
142
Macro ('newcommand' , 'AOA' ),
143
+ Macro ('pageref' , 'A' , '99' ),
144
+ Macro ('ref' , 'A' , '13' ),
178
145
Macro ('texorpdfstring' , 'AA' , r'\1' ),
179
146
Macro ('textcolor' , 'AA' , r'\2' ),
180
147
Macro ('usepackage' , 'OA' ),
@@ -207,20 +174,6 @@ class Aux: pass
207
174
r'subsubsection\*?' ,
208
175
)
209
176
210
- # theorem environments from package amsthm with optional argument [...]:
211
- # display a title and text in optional argument as (...) with final dot
212
- #
213
- parms .theorem_environments = lambda : (
214
- # (environment name, text title)
215
- ('Anmerkung' , 'Anmerkung' ),
216
- ('Beispiel' , 'Beispiel' ),
217
- ('Definition' , 'Definition' ),
218
- ('Korollar' , 'Korollar' ),
219
- ('Nachweis' , 'Nachweis' ),
220
- ('Proposition' , 'Proposition' ),
221
- ('Satz' , 'Satz' ),
222
- )
223
-
224
177
# equation environments, partly from LaTeX package amsmath;
225
178
# see comments at LAB:EQUATIONS below
226
179
#
@@ -257,15 +210,49 @@ class Aux: pass
257
210
)
258
211
259
212
# at the end, we delete all unknown "standard" environment frames;
260
- # these are environments with options / arguments at \begin{...}
213
+ # here are environments with options / arguments at \begin{...},
214
+ # or with a replacement text for \begin{...}
261
215
#
262
- # EnvBegArg (name, args)
216
+ # EnvBegin (name, args='', repl='' )
263
217
# - args: as for Macro()
218
+ # - repl: as for Macro()
219
+ #
220
+ parms .environment_begins = lambda : (
221
+ EnvBegin ('figure' , 'O' ),
222
+ EnvBegin ('minipage' , 'A' ),
223
+ EnvBegin ('tabular' , 'A' ),
224
+
225
+ # proof: try replacement with option, and only after that without
226
+ EnvBegin ('proof' , 'P' , r'\1.' ),
227
+ EnvBegin ('proof' , '' , parms .proof_title + '.' ),
228
+
229
+ # theorems: same order as for proof
230
+ ) + tuple (EnvBegin (env , 'P' , title + r' 1.2 (\1).' )
231
+ for (env , title ) in parms .theorem_environments ()
232
+ ) + tuple (EnvBegin (env , '' , title + ' 1.2.' )
233
+ for (env , title ) in parms .theorem_environments ()
234
+ )
235
+
236
+ # theorem environments from package amsthm with optional argument [...]:
237
+ # display a title and text in optional argument as (...) with final dot
264
238
#
265
- parms .environments_with_args = lambda : (
266
- EnvBegArg ('figure' , 'O' ),
267
- EnvBegArg ('minipage' , 'A' ),
268
- EnvBegArg ('tabular' , 'A' ),
239
+ parms .theorem_environments = lambda : (
240
+ # (environment name, text title)
241
+ ('Anmerkung' , 'Anmerkung' ),
242
+ ('Beispiel' , 'Beispiel' ),
243
+ ('Definition' , 'Definition' ),
244
+ ('Korollar' , 'Korollar' ),
245
+ ('Nachweis' , 'Nachweis' ),
246
+ ('Proposition' , 'Proposition' ),
247
+ ('Satz' , 'Satz' ),
248
+
249
+ ('corollary' , 'Corollary' ),
250
+ ('definition' , 'Definition' ),
251
+ ('example' , 'Example' ),
252
+ ('lemma' , 'Lemma' ),
253
+ ('proposition' , 'Proposition' ),
254
+ ('remark' , 'Remark' ),
255
+ ('theorem' , 'Theorem' ),
269
256
)
270
257
271
258
# a list of 2-tuples for other things to be replaced
@@ -367,8 +354,6 @@ def set_language_en():
367
354
# further replacements performed below:
368
355
#
369
356
# - replacement of $...$ inline math
370
- # - proof environment
371
- # - macros for cross references
372
357
# - handling of displayed equations
373
358
# - some treatment of \item[...] labels
374
359
# - environments not listed above: \begin{...} and \end{...} deleted
@@ -470,8 +455,8 @@ def EquEnv(name, args='', repl=''):
470
455
return (name , args , repl )
471
456
def EnvRepl (name , repl = '' ):
472
457
return (name , repl )
473
- def EnvBegArg (name , args = '' ):
474
- return (name , args )
458
+ def EnvBegin (name , args = '' , repl = '' ):
459
+ return (name , args , repl )
475
460
def re_code_args (args , who , s ):
476
461
# return regular expression for 'OAA' code
477
462
ret = ''
@@ -480,9 +465,17 @@ def re_code_args(args, who, s):
480
465
ret += sp_braced
481
466
elif a == 'O' :
482
467
ret += r'(?:' + sp_bracketed + r')?'
468
+ elif a == 'P' :
469
+ ret += sp_bracketed
483
470
else :
484
471
fatal (who + "('" + s + "',...): bad argument code '" + args + "'" )
485
472
return ret
473
+ def check_repl_string (args , repl , who , s ):
474
+ for m in re .finditer (r'\\(\d)' , repl ):
475
+ n = int (m .group (1 ))
476
+ if n < 1 or n > len (args ):
477
+ fatal ('invalid "\\ ' + m .group (1 ) + '" in replacement for '
478
+ + who + "('" + s + "', ...)" )
486
479
487
480
# the expression r'\\to\b' does not work as expected on \to0
488
481
# --> use r'\\to' + end_mac
@@ -496,16 +489,9 @@ def re_code_args(args, who, s):
496
489
skip_space_macro = (r'(?:[ \t]*(?:\n(?=[ \t]*\S)(?![ \t]*\\begin'
497
490
+ end_mac + r'))?[ \t]*)' )
498
491
499
- # these RE match beginning and end of arbitrary "standard" environments,
500
- # and those with arguments at \begin as declared above
492
+ # these RE match beginning and end of arbitrary "standard" environments
501
493
#
502
- re_begin_env = op = ''
503
- for (name , args ) in parms .environments_with_args ():
504
- expr = begin_lbr + name + r'\}' + re_code_args (args , 'EnvBegArg' , name )
505
- re_begin_env += op + r'(?:' + expr + r')'
506
- op = r'|'
507
- re_begin_env += op + r'(?:' + begin_lbr + r'[^\\{}]+\})'
508
- re_begin_env = r'(?:' + re_begin_env + r')'
494
+ re_begin_env = begin_lbr + r'[^\\{}]+\}'
509
495
re_end_env = end_lbr + r'[^\\{}]+\}'
510
496
511
497
# UTF-8 characters;
@@ -722,15 +708,10 @@ def f(m):
722
708
f
723
709
)]
724
710
725
- for (s , t ) in parms .theorem_environments ():
726
- actions += [
727
- # first try with option ...
728
- (begin_lbr + s + r'\}' + sp_bracketed , t + r' 1.2 (\1).' ),
729
- # ... and then without
730
- (begin_lbr + s + r'\}' , t + r' 1.2.' ),
731
- # delete \end{...}
732
- (eat_eol (end_lbr + s + r'\}' ), eol2space ),
733
- ]
711
+ for (name , args , repl ) in parms .environment_begins ():
712
+ expr = begin_lbr + name + r'\}' + re_code_args (args , 'EnvBegin' , name )
713
+ check_repl_string (args , repl , 'EnvBegin' , name )
714
+ actions += [(expr , r'\\begin{%}' + repl )]
734
715
735
716
# replace $...$ by text from variable parms.inline_math
736
717
# BUG (with warning): fails e.g. on $x \text{ for $x>0$}$
@@ -743,27 +724,6 @@ def f(m):
743
724
return parms .inline_math [0 ]
744
725
actions += [(r'(?<!\\)\$((?:' + braced + r'|[^\\$]|\\.)*)\$' , f )]
745
726
746
- # proof environment with optional [...]:
747
- # extract text in [...] and append '.'
748
- #
749
- actions += [
750
- # first try version with option ...
751
- (begin_lbr + r'proof\}' + sp_bracketed , r'\1.' ),
752
- # ... then without
753
- (begin_lbr + r'proof\}' , parms .proof_title + '.' ),
754
- (eat_eol (end_lbr + r'proof\}' ), eol2space )
755
- ]
756
-
757
- # replace \cite, \eqref, \ref, \pageref
758
- #
759
- actions += [
760
- (r'\\cite' + sp_bracketed + sp_braced , r'[1, \1]' ),
761
- (r'\\cite' + sp_braced , '[1]' ),
762
- (r'\\eqref' + sp_braced , '(7)' ),
763
- (r'\\ref' + sp_braced , '13' ),
764
- (r'\\pageref' + sp_braced , '99' )
765
- ]
766
-
767
727
# now perform the collected replacement actions
768
728
#
769
729
for (expr , repl ) in actions :
@@ -786,12 +746,7 @@ def f(m):
786
746
expr = (r'(?:(?:' + expr + r'(?!' + skip_space + r'[[{])'
787
747
+ skip_space_macro + r')|(?:'
788
748
+ expr + re_code_args (args , 'Macro' , name ) + r'))' )
789
- for m in re .finditer (r'\\(\d)' , repl ):
790
- # make error messages more accessible (hopefully)
791
- n = int (m .group (1 ))
792
- if n < 1 or n > len (args ):
793
- fatal ('inavlid "\\ ' + m .group (1 ) + '" in replacement for "'
794
- + name + '"' )
749
+ check_repl_string (args , repl , 'Macro' , name )
795
750
while mysearch (expr , text ):
796
751
# macro might be nested
797
752
text = mysub (expr , mark_deleted + repl , text )
@@ -804,28 +759,24 @@ def f(m):
804
759
##################################################################
805
760
806
761
# example:
807
-
808
- """
809
- Thus,
810
- %
811
- \b egin{align}
812
- \mu &= f(x) \quad\t ext{for all } \mu\in\Omega, \n otag \\
813
- x &= \b egin{cases}
814
- 0 & \t ext{ for} \ y>0 \\
815
- 1 & \t ext{ in case} y\le 0.
816
- \end{cases} \label{lab}
817
- \end{align}
818
- """
819
-
762
+ #
763
+ # Thus,
764
+ # %
765
+ # \begin{align}
766
+ # \mu &= f(x) \quad\text{for all } \mu\in\Omega, \notag \\
767
+ # x &= \begin{cases}
768
+ # 0 & \text{ for} \ y>0 \\
769
+ # 1 & \text{ in case} y\le 0.
770
+ # \end{cases} \label{lab}
771
+ # \end{align}
772
+ #
820
773
# becomes with parms.change_repl_after_punct == True
821
774
# and --lang en:
822
-
823
- """
824
- Thus,
825
- U equal V for all W,
826
- X equal Y for Z
827
- Z in caseU.
828
- """
775
+ #
776
+ # Thus,
777
+ # U equal V for all W,
778
+ # X equal Y for Z
779
+ # Z in caseU.
829
780
830
781
# 1. split equation environment into 'lines' delimited by \\ alias \newline
831
782
# 2. split each 'line' into 'sections' delimited by &
@@ -1027,13 +978,12 @@ def f(m):
1027
978
if m not in macsknown :
1028
979
print ('\\ ' + m )
1029
980
envs = []
1030
- envsknown = ('%' ,) + tuple (e [0 ] for e in parms .environments_with_args ())
1031
981
for m in re .finditer (begin_lbr + r'([^\\{}]+)\}' , text_get_txt (text )):
1032
982
if m .group (1 ) not in envs :
1033
983
envs += [m .group (1 )]
1034
984
envs .sort ()
1035
985
for e in envs :
1036
- if e not in envsknown :
986
+ if e != '%' :
1037
987
print (r'\begin{' + e + '}' )
1038
988
exit ()
1039
989
0 commit comments