@@ -7,21 +7,19 @@ import (
7
7
"strings"
8
8
)
9
9
10
- const (
11
- nestedNamePattern = `\(#(nested_[a-z0-9_]+)\)`
10
+ var (
11
+ fieldNameRegex = regexp .MustCompile ("[\\ *|-]\\ s+`([a-z0-9_\\ ./]+)`" ) // * `xxx`
12
+ nestedObjectRegex = regexp .MustCompile (`<a\s+name="([a-z0-9_]+)">` ) // <a name="xxx">
13
+ nestedHashTagRegex = regexp .MustCompile (`\(#(nested_[a-z0-9_]+)\)` ) // #(nested_xxx)
14
+ horizontalLineRegex = regexp .MustCompile ("- - -|-{10,}" ) // - - - or ------------
12
15
13
- itemNamePattern = "\\ * `([a-z0-9_\\ ./]+)`"
14
- nestedLinkPattern = `<a\s+name="([a-z0-9_]+)">`
15
-
16
- sectionSeparator = "## "
17
- nestedObjectSeparator = `<a name="nested_`
18
- listItemSeparator = "* `"
16
+ sectionSeparator = "## "
19
17
)
20
18
21
19
// DocumentParser parse *.html.markdown resource doc files.
22
20
type DocumentParser struct {
23
- argumentRoot * node
24
- attriibuteRoot * node
21
+ root * node
22
+ nestedBlock map [ string ] string
25
23
}
26
24
27
25
type node struct {
@@ -31,15 +29,17 @@ type node struct {
31
29
}
32
30
33
31
func NewParser () * DocumentParser {
34
- return & DocumentParser {}
32
+ return & DocumentParser {
33
+ nestedBlock : make (map [string ]string ),
34
+ }
35
35
}
36
36
37
- func (d * DocumentParser ) Arguments () []string {
37
+ func (d * DocumentParser ) FlattenFields () []string {
38
38
var paths []string
39
39
traverse (
40
40
& paths ,
41
41
"" ,
42
- d .argumentRoot ,
42
+ d .root ,
43
43
)
44
44
sort .Strings (paths )
45
45
return paths
@@ -63,17 +63,6 @@ func traverse(paths *[]string, path string, n *node) {
63
63
}
64
64
}
65
65
66
- func (d * DocumentParser ) Attributes () []string {
67
- var paths []string
68
- traverse (
69
- & paths ,
70
- "" ,
71
- d .attriibuteRoot ,
72
- )
73
- sort .Strings (paths )
74
- return paths
75
- }
76
-
77
66
// Parse parse a resource document markdown's arguments and attributes section.
78
67
// The parsed file format is defined in mmv1/templates/terraform/resource.html.markdown.tmpl.
79
68
func (d * DocumentParser ) Parse (src []byte ) error {
@@ -86,51 +75,43 @@ func (d *DocumentParser) Parse(src []byte) error {
86
75
argument = p
87
76
}
88
77
}
89
- if len (argument ) != 0 {
90
- argumentParts := strings .Split (argument , "- - -" )
91
- for _ , part := range argumentParts {
92
- n , err := d .parseSection (part )
93
- if err != nil {
78
+ for _ , text := range []string {argument , attribute } {
79
+ if len (text ) != 0 {
80
+ sections := horizontalLineRegex .Split (text , - 1 )
81
+ var allTopLevelFieldSections string
82
+ for _ , part := range sections {
83
+ topLevelPropertySection , err := d .extractNestedObject (part )
84
+ if err != nil {
85
+ return err
86
+ }
87
+ allTopLevelFieldSections += topLevelPropertySection
88
+ }
89
+ root := & node {
90
+ text : allTopLevelFieldSections ,
91
+ }
92
+ if err := d .bfs (root , d .nestedBlock ); err != nil {
94
93
return err
95
94
}
96
- if d .argumentRoot == nil {
97
- d .argumentRoot = n
95
+ if d .root == nil {
96
+ d .root = root
98
97
} else {
99
- d .argumentRoot .children = append (d .argumentRoot .children , n .children ... )
98
+ d .root .children = append (d .root .children , root .children ... )
100
99
}
101
100
}
102
101
}
103
- if len (attribute ) != 0 {
104
- n , err := d .parseSection (attribute )
105
- if err != nil {
106
- return err
107
- }
108
- d .attriibuteRoot = n
109
- }
110
102
return nil
111
103
}
112
104
113
- func (d * DocumentParser ) parseSection (input string ) (* node , error ) {
114
- parts := strings .Split (input , "\n " + nestedObjectSeparator )
115
- nestedBlock := make (map [string ]string )
105
+ func (d * DocumentParser ) extractNestedObject (input string ) (string , error ) {
106
+ parts := splitWithRegexp (input , nestedObjectRegex )
116
107
for _ , p := range parts [1 :] {
117
- nestedName , err := findPattern (nestedObjectSeparator + p , nestedLinkPattern )
118
- if err != nil {
119
- return nil , err
120
- }
108
+ nestedName := findPattern (p , nestedObjectRegex )
121
109
if nestedName == "" {
122
- return nil , fmt .Errorf ("could not find nested object name in %s" , nestedObjectSeparator + p )
110
+ return "" , fmt .Errorf ("could not find nested object name in %s" , p )
123
111
}
124
- nestedBlock [nestedName ] = p
125
- }
126
- // bfs to traverse the first part without nested blocks.
127
- root := & node {
128
- text : parts [0 ],
112
+ d .nestedBlock [nestedName ] = p
129
113
}
130
- if err := d .bfs (root , nestedBlock ); err != nil {
131
- return nil , err
132
- }
133
- return root , nil
114
+ return parts [0 ], nil
134
115
}
135
116
136
117
func (d * DocumentParser ) bfs (root * node , nestedBlock map [string ]string ) error {
@@ -143,24 +124,22 @@ func (d *DocumentParser) bfs(root *node, nestedBlock map[string]string) error {
143
124
l := len (queue )
144
125
for _ , cur := range queue {
145
126
// the separator should always at the beginning of the line
146
- items := strings . Split (cur .text , " \n " + listItemSeparator )
147
- for _ , item := range items [1 :] {
148
- text := listItemSeparator + item
149
- itemName , err := findItemName ( text )
150
- if err != nil {
151
- return err
127
+ parts := splitWithRegexp (cur .text , fieldNameRegex )
128
+ for _ , p := range parts [1 :] {
129
+ p = strings . ReplaceAll ( p , " \n " , "" )
130
+ fieldName := findPattern ( p , fieldNameRegex )
131
+ if fieldName == "" {
132
+ return fmt . Errorf ( "could not find field name in %s" , p )
152
133
}
153
134
// There is a special case in some hand written resource eg. in compute_instance, where its attributes is in a.0.b.0.c format.
154
- itemName = strings .ReplaceAll (itemName , ".0." , "." )
155
- nestedName , err := findNestedName (text )
156
- if err != nil {
157
- return err
158
- }
135
+ fieldName = strings .ReplaceAll (fieldName , ".0." , "." )
159
136
newNode := & node {
160
- name : itemName ,
137
+ name : fieldName ,
161
138
}
162
139
cur .children = append (cur .children , newNode )
163
- if text , ok := nestedBlock [nestedName ]; ok {
140
+
141
+ nestedHashTag := findPattern (p , nestedHashTagRegex )
142
+ if text , ok := nestedBlock [nestedHashTag ]; ok {
164
143
newNode .text = text
165
144
queue = append (queue , newNode )
166
145
}
@@ -172,31 +151,27 @@ func (d *DocumentParser) bfs(root *node, nestedBlock map[string]string) error {
172
151
return nil
173
152
}
174
153
175
- func findItemName (text string ) (name string , err error ) {
176
- name , err = findPattern (text , itemNamePattern )
177
- if err != nil {
178
- return "" , err
179
- }
180
- if name == "" {
181
- return "" , fmt .Errorf ("cannot find item name from %s" , text )
154
+ func findPattern (text string , re * regexp.Regexp ) string {
155
+ match := re .FindStringSubmatch (text )
156
+ if match != nil {
157
+ return match [1 ]
182
158
}
183
- return
159
+ return ""
184
160
}
185
161
186
- func findPattern (text string , pattern string ) ( string , error ) {
187
- re , err := regexp . Compile ( pattern )
188
- if err != nil {
189
- return "" , err
162
+ func splitWithRegexp (text string , re * regexp. Regexp ) [] string {
163
+ matches := re . FindAllStringIndex ( text , - 1 )
164
+ if len ( matches ) == 0 {
165
+ return [] string { text }
190
166
}
191
- match := re .FindStringSubmatch (text )
167
+ var parts []string
168
+ start := 0
169
+ for _ , match := range matches {
170
+ end := match [0 ]
192
171
193
- if match != nil {
194
- return match [ 1 ], nil
172
+ parts = append ( parts , text [ start : end ])
173
+ start = end
195
174
}
196
- return "" , nil
197
- }
198
-
199
- func findNestedName (text string ) (string , error ) {
200
- s := strings .ReplaceAll (text , "\n " , "" )
201
- return findPattern (s , nestedNamePattern )
175
+ parts = append (parts , text [start :])
176
+ return parts
202
177
}
0 commit comments