23
23
#
24
24
# Principle of operation:
25
25
# - read complete input text into a string, then make replacements
26
- # - replacements are performed via the wrapper mysub() in order
27
- # to observe deletion and inclusion of line breaks
26
+ # - replacements are performed via the "re-implementation" mysub() of
27
+ # re.sub() in order to observe deletion and inclusion of line breaks
28
28
# - in order to treat nested braces / brackets and some nested
29
29
# environments, we construct regular expressions by iteration;
30
30
# maximum recognized nesting depth (and thus length of these expressions)
@@ -63,8 +63,8 @@ class Aux: pass
63
63
# repl:
64
64
# - replacement pattern, r'\d' (d: single digit) extracts text
65
65
# from position d in args (counting from 1)
66
- # - escape rules: see replacement argument of re.sub() ;
67
- # include single backslash: repl=r'...\\...'
66
+ # - other escape rules: see escape handling at myexpand() below ;
67
+ # e.g., include a single backslash: repl=r'...\\...'
68
68
# - inclusion of % only as escaped version r'\\%' accepted, will be
69
69
# resolved to % at the end by resolve_escapes()
70
70
# - inclusion of double backslash \\ and replacement ending with \
@@ -641,7 +641,7 @@ def verbatim(s, mark, ast):
641
641
642
642
#######################################################################
643
643
#
644
- # This wrapper for re.sub() operates a small machinery for
644
+ # This "re-implementation" of re.sub() operates a small machinery for
645
645
# line number tracking.
646
646
# Argument text is a 2-tuple.
647
647
# text[0]: the text as string
@@ -668,14 +668,15 @@ def mysub(expr, repl, text, flags=0, extract=None):
668
668
if not t :
669
669
continue
670
670
if type (repl ) is str :
671
- r = myexpand (m , repl , text )
671
+ ex = myexpand (m , repl , text )
672
672
else :
673
- r = repl (m )
674
- if type (r ) is tuple :
673
+ ex = repl (m )
674
+ if type (ex ) is tuple :
675
675
# replacement contains line number information
676
- nums2 = r [ 1 ]
677
- r = r [ 0 ]
676
+ r = ex [ 0 ]
677
+ nums2 = ex [ 1 ]
678
678
else :
679
+ r = ex
679
680
nums2 = None
680
681
res += txt [last :m .start (0 )]
681
682
last = m .end (0 )
@@ -714,8 +715,8 @@ def text_combine(text1, text2):
714
715
+ r'|' + re_end_env + r'|\s)*\Z' )
715
716
(t1 , n1 ) = text1
716
717
(t2 , n2 ) = text2
717
- i = t1 .rfind ('\n ' ) + 1 # i == 0, if not found
718
- if re . search ( space , t1 [ i :]):
718
+ if n1 [ - 1 ] == n2 [ 0 ] or re . search ( space , t1 [ t1 .rfind ('\n ' )+ 1 :]):
719
+ # same line numbers at junction or
719
720
# only "space" after last line break in text1:
720
721
# use first line number from text2 at junction
721
722
n = n1 [:- 1 ] + n2
@@ -729,9 +730,8 @@ def text_combine(text1, text2):
729
730
def text_add_frame (pre , post , text ):
730
731
return (
731
732
pre + text [0 ] + post ,
732
- (- abs (text [1 ][0 ]),) * pre .count ('\n ' )
733
- + text [1 ]
734
- + (- abs (text [1 ][- 1 ]),) * post .count ('\n ' )
733
+ (text [1 ][0 ],) * pre .count ('\n ' ) + text [1 ]
734
+ + (text [1 ][- 1 ],) * post .count ('\n ' )
735
735
)
736
736
737
737
# extract text with line number information from a group of a match
@@ -743,11 +743,69 @@ def text_from_match(m ,grp, text):
743
743
end = beg + m .group (grp ).count ('\n ' ) + 1
744
744
return (m .group (grp ), text [1 ][beg :end ])
745
745
746
- # here, we could re-implement parsing of the repl string and provide
747
- # line number information, if a used capturing group spans multiple lines
746
+ # expansion of a match from replacement template repl:
747
+ # returned text element provides line number information,
748
+ # if repl contains a reference to a capturing group
748
749
#
749
750
def myexpand (m , repl , text ):
750
- return m .expand (repl )
751
+ # return m.expand(repl) # fail-save version
752
+ if not repl :
753
+ return ''
754
+
755
+ # first parse repl: build list 'ops' of
756
+ # (strings) and (numbers of referenced capturing groups)
757
+ # - compare parse_template() in /usr/lib/python?.?/sre_parse.py
758
+ escapes = {
759
+ 'a' : '\a ' , 'b' : '\b ' , 'f' : '\f ' , 'n' : '\n ' ,
760
+ 'r' : '\r ' , 't' : '\t ' , 'v' : '\v ' , '\\ ' : '\\ '
761
+ }
762
+ ops = []
763
+ first = None
764
+ cur_str = ''
765
+ i = 0
766
+ while i < len (repl ):
767
+ c = repl [i ]
768
+ i += 1
769
+ if c != '\\ ' :
770
+ cur_str += c
771
+ continue
772
+ if i >= len (repl ):
773
+ cur_str += '\\ '
774
+ break
775
+ c = repl [i ]
776
+ i += 1
777
+ if c in escapes :
778
+ cur_str += escapes [c ]
779
+ elif c in '0g' :
780
+ fatal ('myexpand(): escape sequences \\ 0... and \\ g<...>'
781
+ + ' not implemented' )
782
+ elif c .isdecimal ():
783
+ if cur_str :
784
+ ops += [cur_str ]
785
+ cur_str = ''
786
+ if first is None :
787
+ first = len (ops )
788
+ ops += [int (c )]
789
+ else :
790
+ cur_str += '\\ ' + c
791
+ if cur_str :
792
+ ops += [cur_str ]
793
+
794
+ if first is None :
795
+ # no group reference found, repl == '' was excluded above
796
+ return ops [0 ]
797
+
798
+ # build replacement text with line number information
799
+ t = text_from_match (m , ops [first ], text )
800
+ if first > 0 :
801
+ t = text_add_frame (ops [0 ], '' , t )
802
+ for i in range (first + 1 , len (ops )):
803
+ if type (ops [i ]) is int :
804
+ t2 = text_from_match (m , ops [i ], text )
805
+ t = text_combine (t , t2 )
806
+ else :
807
+ t = text_add_frame ('' , ops [i ], t )
808
+ return t
751
809
752
810
def mysearch (expr , text , flags = 0 ):
753
811
if type (text ) is not tuple :
@@ -1451,15 +1509,16 @@ def f(m):
1451
1509
s = r'\s+'
1452
1510
if not t :
1453
1511
continue
1454
-
1455
1512
if t [0 ].isalpha ():
1456
1513
t = r'\b' + t # require word boundary
1457
1514
if t [- 1 ].isalpha ():
1458
1515
t = t + r'\b'
1459
- r = s = ''
1460
- for i in range (i + 1 , len (lin )):
1461
- r += s + lin [i ]
1462
- s = ' '
1516
+
1517
+ r = ' ' .join (lin [i + 1 :])
1518
+ if re .search (r'(?<!\\)%' , r ):
1519
+ fatal ('please use escaped \\ % for replacement in file "'
1520
+ + cmdline .repl + '"' , r )
1521
+ r = re .sub ('\\ \\ ' , '\\ \\ \\ \\ ' , r ) # \ ==> \\
1463
1522
text = mysub (t , r , text )
1464
1523
1465
1524
0 commit comments