12
12
//! This implementation also deviates from the Python version in not treating `\r` specially, which
13
13
//! I believe is more compliant.
14
14
//!
15
- //! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes
16
- //! directly as a micro-optimization.
15
+ //! This is a string-friendly wrapper around the [bytes] module that works on the underlying byte
16
+ //! slices. The algorithms in this crate are oblivious to UTF-8 high bytes, so working directly
17
+ //! with bytes is a safe micro-optimization.
17
18
//!
18
19
//! Disabling the `std` feature (which is enabled by default) will allow the crate to work in
19
20
//! `no_std` environments, where the `alloc` crate, and a global allocator, are available.
@@ -33,122 +34,38 @@ pub mod bytes;
33
34
34
35
/// An iterator that takes an input string and splits it into the words using the same syntax as
35
36
/// the POSIX shell.
36
- pub struct Shlex < ' a > {
37
- in_iter : core:: str:: Bytes < ' a > ,
38
- /// The number of newlines read so far, plus one.
39
- pub line_no : usize ,
40
- /// An input string is erroneous if it ends while inside a quotation or right after an
41
- /// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that
42
- /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to
43
- /// true; best to check it after you're done iterating.
44
- pub had_error : bool ,
45
- }
37
+ ///
38
+ /// See [`bytes::Shlex`].
39
+ pub struct Shlex < ' a > ( bytes:: Shlex < ' a > ) ;
46
40
47
41
impl < ' a > Shlex < ' a > {
48
42
pub fn new ( in_str : & ' a str ) -> Self {
49
- Shlex {
50
- in_iter : in_str. bytes ( ) ,
51
- line_no : 1 ,
52
- had_error : false ,
53
- }
54
- }
55
-
56
- fn parse_word ( & mut self , mut ch : u8 ) -> Option < String > {
57
- let mut result: Vec < u8 > = Vec :: new ( ) ;
58
- loop {
59
- match ch as char {
60
- '"' => if let Err ( ( ) ) = self . parse_double ( & mut result) {
61
- self . had_error = true ;
62
- return None ;
63
- } ,
64
- '\'' => if let Err ( ( ) ) = self . parse_single ( & mut result) {
65
- self . had_error = true ;
66
- return None ;
67
- } ,
68
- '\\' => if let Some ( ch2) = self . next_char ( ) {
69
- if ch2 != '\n' as u8 { result. push ( ch2) ; }
70
- } else {
71
- self . had_error = true ;
72
- return None ;
73
- } ,
74
- ' ' | '\t' | '\n' => { break ; } ,
75
- _ => { result. push ( ch as u8 ) ; } ,
76
- }
77
- if let Some ( ch2) = self . next_char ( ) { ch = ch2; } else { break ; }
78
- }
79
- unsafe { Some ( String :: from_utf8_unchecked ( result) ) }
43
+ Self ( bytes:: Shlex :: new ( in_str. as_bytes ( ) ) )
80
44
}
45
+ }
81
46
82
- fn parse_double ( & mut self , result : & mut Vec < u8 > ) -> Result < ( ) , ( ) > {
83
- loop {
84
- if let Some ( ch2) = self . next_char ( ) {
85
- match ch2 as char {
86
- '\\' => {
87
- if let Some ( ch3) = self . next_char ( ) {
88
- match ch3 as char {
89
- // \$ => $
90
- '$' | '`' | '"' | '\\' => { result. push ( ch3) ; } ,
91
- // \<newline> => nothing
92
- '\n' => { } ,
93
- // \x => =x
94
- _ => { result. push ( '\\' as u8 ) ; result. push ( ch3) ; }
95
- }
96
- } else {
97
- return Err ( ( ) ) ;
98
- }
99
- } ,
100
- '"' => { return Ok ( ( ) ) ; } ,
101
- _ => { result. push ( ch2) ; } ,
102
- }
103
- } else {
104
- return Err ( ( ) ) ;
105
- }
106
- }
47
+ impl < ' a > Iterator for Shlex < ' a > {
48
+ type Item = String ;
49
+ fn next ( & mut self ) -> Option < String > {
50
+ self . 0 . next ( ) . map ( |byte_word| {
51
+ // Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8.
52
+ unsafe { String :: from_utf8_unchecked ( byte_word) }
53
+ } )
107
54
}
55
+ }
108
56
109
- fn parse_single ( & mut self , result : & mut Vec < u8 > ) -> Result < ( ) , ( ) > {
110
- loop {
111
- if let Some ( ch2) = self . next_char ( ) {
112
- match ch2 as char {
113
- '\'' => { return Ok ( ( ) ) ; } ,
114
- _ => { result. push ( ch2) ; } ,
115
- }
116
- } else {
117
- return Err ( ( ) ) ;
118
- }
119
- }
120
- }
57
+ impl < ' a > core:: ops:: Deref for Shlex < ' a > {
58
+ type Target = bytes:: Shlex < ' a > ;
121
59
122
- fn next_char ( & mut self ) -> Option < u8 > {
123
- let res = self . in_iter . next ( ) ;
124
- if res == Some ( '\n' as u8 ) { self . line_no += 1 ; }
125
- res
60
+ fn deref ( & self ) -> & Self :: Target {
61
+ & self . 0
126
62
}
127
63
}
128
64
129
- impl < ' a > Iterator for Shlex < ' a > {
130
- type Item = String ;
131
- fn next ( & mut self ) -> Option < String > {
132
- if let Some ( mut ch) = self . next_char ( ) {
133
- // skip initial whitespace
134
- loop {
135
- match ch as char {
136
- ' ' | '\t' | '\n' => { } ,
137
- '#' => {
138
- while let Some ( ch2) = self . next_char ( ) {
139
- if ch2 as char == '\n' { break ; }
140
- }
141
- } ,
142
- _ => { break ; }
143
- }
144
- if let Some ( ch2) = self . next_char ( ) { ch = ch2; } else { return None ; }
145
- }
146
- self . parse_word ( ch)
147
- } else { // no initial character
148
- None
149
- }
65
+ impl < ' a > core:: ops:: DerefMut for Shlex < ' a > {
66
+ fn deref_mut ( & mut self ) -> & mut Self :: Target {
67
+ & mut self . 0
150
68
}
151
-
152
69
}
153
70
154
71
/// Convenience function that consumes the whole string at once. Returns None if the input was
0 commit comments