Skip to content

Commit efb4abe

Browse files
authored
v1.0.X-performance
Hardened (#159) - performance, sync, path filtering and whitelisting improvements
2 parents cbf3ef5 + 6a0a5b3 commit efb4abe

File tree

7 files changed

+327
-172
lines changed

7 files changed

+327
-172
lines changed

cmd/api.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ func processWithRegex(policy Policy, data []byte, rgPath string) error {
168168
}
169169
func executeAssureForAPI(policy Policy, rgPath, filePath string) (bool, error) {
170170
// Create a temporary file to store the search patterns
171-
searchPatternFile, err := createSearchPatternFile(policy.Regex)
171+
searchPatternFile, err := createSearchPatternFile(policy.Regex, NormalizeFilename(policy.ID))
172172
if err != nil {
173173
return false, fmt.Errorf("error creating search pattern file: %w", err)
174174
}

cmd/assure.go

+130-29
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"os"
77
"os/exec"
88
"path/filepath"
9+
"sync"
910
)
1011

1112
// ProcessAssureType handles the assurance process for policies of type "assure"
@@ -23,7 +24,7 @@ func ProcessAssureType(policy Policy, rgPath string, targetDir string, filePaths
2324

2425
func executeAssure(policy Policy, rgPath string, targetDir string, filesToAssure []string) error {
2526
// Create a temporary file to store the search patterns
26-
searchPatternFile, err := createSearchPatternFile(policy.Regex)
27+
searchPatternFile, err := createSearchPatternFile(policy.Regex, NormalizeFilename(policy.ID))
2728
if err != nil {
2829
log.Error().Err(err).Msg("error creating search pattern file")
2930
return fmt.Errorf("error creating search pattern file: %w", err)
@@ -49,6 +50,8 @@ func executeAssure(policy Policy, rgPath string, targetDir string, filesToAssure
4950
writer := bufio.NewWriter(jsonoutfile)
5051
defer writer.Flush()
5152

53+
processedIgnorePatterns := processIgnorePatterns(policyData.Config.Flags.Ignore)
54+
5255
codePatternAssureJSON := []string{
5356
"--pcre2",
5457
"--no-heading",
@@ -64,39 +67,24 @@ func executeAssure(policy Policy, rgPath string, targetDir string, filesToAssure
6467
return fmt.Errorf("no target directory defined")
6568
}
6669

67-
// Append the file targets
68-
if len(filesToAssure) > 0 {
69-
codePatternAssureJSON = append(codePatternAssureJSON, filesToAssure...)
70-
} else if policy.FilePattern == "" {
71-
codePatternAssureJSON = append(codePatternAssureJSON, targetDir)
70+
codePatternAssureJSON = append(codePatternAssureJSON, processedIgnorePatterns...)
71+
72+
matchesFound := true
73+
74+
// Parallel execution for large file sets
75+
if policy.FilePattern == "" {
76+
log.Warn().Str("policy", policy.ID).Msg("ASSURE Policy without a filepattern is suboptimal")
77+
}
78+
if len(filesToAssure) > 25 {
79+
matchesFound, err = executeParallelAssure(rgPath, codePatternAssureJSON, filesToAssure, writer)
7280
} else {
73-
return fmt.Errorf("no files matched policy pattern")
81+
matchesFound, err = executeSingleAssure(rgPath, codePatternAssureJSON, filesToAssure, targetDir, policy, writer)
7482
}
7583

76-
// Execute the ripgrep command for JSON output
77-
cmdJSON := exec.Command(rgPath, codePatternAssureJSON...)
78-
cmdJSON.Stdout = writer
79-
cmdJSON.Stderr = os.Stderr
84+
if err != nil {
8085

81-
log.Debug().Msgf("Creating JSON output for assure policy %s... ", policy.ID)
82-
err = cmdJSON.Run()
86+
log.Error().Err(err).Msg("error executing ripgrep batch")
8387

84-
// Check if ripgrep found any matches
85-
matchesFound := true
86-
if err != nil {
87-
if exitError, ok := err.(*exec.ExitError); ok {
88-
// Exit code 1 in ripgrep means "no matches found"
89-
if exitError.ExitCode() == 1 {
90-
matchesFound = false
91-
err = nil // Reset error as this is the expected outcome for assure
92-
} else {
93-
log.Error().Err(err).Msg("error executing ripgrep for JSON output")
94-
return fmt.Errorf("error executing ripgrep for JSON output: %w", err)
95-
}
96-
} else {
97-
log.Error().Err(err).Msg("error executing ripgrep for JSON output")
98-
return fmt.Errorf("error executing ripgrep for JSON output: %w", err)
99-
}
10088
}
10189

10290
// Patch the JSON output file
@@ -107,6 +95,7 @@ func executeAssure(policy Policy, rgPath string, targetDir string, filesToAssure
10795
}
10896

10997
log.Debug().Msgf("JSON output for assure policy %s written to: %s ", policy.ID, jsonOutputFile)
98+
log.Debug().Msgf("Scanned ~%d files for policy %s", len(filesToAssure), policy.ID)
11099

111100
// Determine the status based on whether matches were found
112101
status := "NOT FOUND"
@@ -143,3 +132,115 @@ func executeAssure(policy Policy, rgPath string, targetDir string, filesToAssure
143132

144133
return nil
145134
}
135+
136+
func executeParallelAssure(rgPath string, baseArgs []string, filesToScan []string, writer *bufio.Writer) (bool, error) {
137+
138+
const batchSize = 25
139+
matched := true
140+
var wg sync.WaitGroup
141+
errChan := make(chan error, len(filesToScan)/batchSize+1)
142+
var mu sync.Mutex
143+
144+
for i := 0; i < len(filesToScan); i += batchSize {
145+
end := i + batchSize
146+
if end > len(filesToScan) {
147+
end = len(filesToScan)
148+
}
149+
batch := filesToScan[i:end]
150+
151+
// log.Debug().Msgf("RGM: %v", batch)
152+
153+
wg.Add(1)
154+
go func(batch []string) {
155+
defer wg.Done()
156+
args := append(baseArgs, batch...)
157+
cmd := exec.Command(rgPath, args...)
158+
output, err := cmd.Output()
159+
160+
if err != nil {
161+
162+
if exitError, ok := err.(*exec.ExitError); ok {
163+
// Exit code 1 in ripgrep means "no matches found"
164+
if exitError.ExitCode() == 1 {
165+
matched = false
166+
err = nil // Reset error as this is the expected outcome for assure
167+
}
168+
}
169+
170+
if exitError, ok := err.(*exec.ExitError); ok && exitError.ExitCode() != 1 {
171+
errChan <- fmt.Errorf("error executing ripgrep: %w", err)
172+
return
173+
}
174+
}
175+
176+
mu.Lock()
177+
_, writeErr := writer.Write(output)
178+
if writeErr == nil {
179+
writeErr = writer.Flush()
180+
}
181+
mu.Unlock()
182+
183+
if writeErr != nil {
184+
errChan <- fmt.Errorf("error writing output: %w", writeErr)
185+
}
186+
}(batch)
187+
}
188+
189+
wg.Wait()
190+
close(errChan)
191+
192+
for err := range errChan {
193+
if err != nil {
194+
return matched, err
195+
}
196+
}
197+
198+
return matched, nil
199+
}
200+
201+
func executeSingleAssure(rgPath string, baseArgs []string, filesToScan []string, targetDir string, policy Policy, writer *bufio.Writer) (bool, error) {
202+
203+
if len(filesToScan) > 0 {
204+
baseArgs = append(baseArgs, filesToScan...)
205+
} else {
206+
log.Error().Str("policy", policy.ID).Msgf("no files matched policy pattern on target : %s", targetDir)
207+
}
208+
209+
matched := true
210+
211+
// log.Debug().Msgf("RGS: %v", baseArgs)
212+
213+
cmdJSON := exec.Command(rgPath, baseArgs...)
214+
cmdJSON.Stdout = writer
215+
cmdJSON.Stderr = os.Stderr
216+
217+
log.Debug().Msgf("Creating JSON output for policy %s... ", policy.ID)
218+
err := cmdJSON.Run()
219+
if err != nil {
220+
if exitError, ok := err.(*exec.ExitError); ok {
221+
222+
if exitError.ExitCode() == 1 {
223+
matched = false
224+
err = nil // Reset error as this is the expected outcome for assure
225+
}
226+
227+
if exitError.ExitCode() == 2 {
228+
log.Warn().Msgf("RG exited with code 2")
229+
log.Debug().Msgf("RG Error Args: %v", baseArgs)
230+
if len(exitError.Stderr) > 0 {
231+
log.Debug().Msgf("RG exited with code 2 stderr: %s", string(exitError.Stderr))
232+
}
233+
}
234+
if exitError.ExitCode() != 1 {
235+
log.Error().Err(err).Msg("error executing ripgrep for JSON output")
236+
return matched, fmt.Errorf("error executing ripgrep for JSON output: %w", err)
237+
}
238+
239+
} else {
240+
log.Error().Err(err).Msg("error executing ripgrep for JSON output")
241+
return matched, fmt.Errorf("error executing ripgrep for JSON output: %w", err)
242+
}
243+
}
244+
245+
return matched, nil
246+
}

cmd/audit.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,11 @@ func processPolicy(policy Policy, allFileInfos []FileInfo, rgPath string) {
209209

210210
if policy.Type == "json" || policy.Type == "yaml" || policy.Type == "ini" || policy.Type == "scan" || policy.Type == "assure" {
211211

212-
log.Debug().Msgf(" Processing files for policy %s: ", policy.ID)
213-
for _, file := range filesToProcess {
214-
log.Debug().Msgf(" %s: %s ", file.Path, file.Hash)
212+
log.Debug().Str("policy", policy.ID).Msgf(" Processing files for policy %s ", policy.ID)
213+
if len(filesToProcess) < 15 {
214+
for _, file := range filesToProcess {
215+
log.Debug().Str("policy", policy.ID).Msgf(" %s: %s ", file.Path, file.Hash)
216+
}
215217
}
216218

217219
normalizedID := NormalizeFilename(policy.ID)

cmd/aux.go

+30-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package cmd
22

33
import (
4+
"bufio"
45
"crypto/sha256"
56
"encoding/hex"
67
"encoding/json"
@@ -22,29 +23,40 @@ import (
2223
"gopkg.in/yaml.v3"
2324
)
2425

25-
// patchJSONOutputFile reads the ripgrep JSON output, patches it to create valid JSON, and writes it back to the file
2626
func patchJSONOutputFile(filePath string) error {
27-
// Read the contents of the file
28-
content, err := os.ReadFile(filePath)
27+
// Open the file
28+
file, err := os.Open(filePath)
2929
if err != nil {
30-
return fmt.Errorf("error reading JSON file: %v", err)
30+
return fmt.Errorf("error opening JSON file: %v", err)
3131
}
32+
defer file.Close()
3233

33-
// Split the input into separate JSON objects
34-
objects := strings.Split(string(content), "}\n{")
34+
var validObjects []map[string]interface{}
3535

36-
// Add the missing brackets to create a JSON array
37-
jsonArray := "[" + strings.Join(objects, "},\n{") + "]"
36+
// Read the file line by line
37+
scanner := bufio.NewScanner(file)
38+
for scanner.Scan() {
39+
line := strings.TrimSpace(scanner.Text())
40+
if line == "" {
41+
continue
42+
}
3843

39-
// Parse the JSON array to validate it
40-
var parsed []interface{}
41-
err = json.Unmarshal([]byte(jsonArray), &parsed)
42-
if err != nil {
43-
return fmt.Errorf("error parsing JSON: %v", err)
44+
// Try to parse each line as a separate JSON object
45+
var obj map[string]interface{}
46+
if err := json.Unmarshal([]byte(line), &obj); err == nil {
47+
validObjects = append(validObjects, obj)
48+
} else {
49+
// Log the error and skip this line
50+
fmt.Printf("Skipping invalid JSON line: %s\n", line)
51+
}
52+
}
53+
54+
if err := scanner.Err(); err != nil {
55+
return fmt.Errorf("error reading file: %v", err)
4456
}
4557

46-
// Re-marshal the parsed data to get a properly formatted JSON string
47-
validJSON, err := json.MarshalIndent(parsed, "", " ")
58+
// Marshal the array of objects into a properly formatted JSON string
59+
validJSON, err := json.MarshalIndent(validObjects, "", " ")
4860
if err != nil {
4961
return fmt.Errorf("error marshaling JSON: %v", err)
5062
}
@@ -334,7 +346,7 @@ func createOutputDirectories(isObserve bool) error {
334346
for _, dir := range dirs {
335347
if outputDir != "" {
336348
dir = filepath.Join(outputDir, dir)
337-
log.Debug().Msgf("Creating directory: %s", dir)
349+
// log.Debug().Msgf("Creating directory: %s", dir)
338350

339351
}
340352
if err := os.MkdirAll(dir, 0755); err != nil {
@@ -350,7 +362,7 @@ func cleanupOutputDirectories() error {
350362
if outputDir != "" {
351363
for i, dir := range dirsToClean {
352364
dirsToClean[i] = filepath.Join(outputDir, dir)
353-
log.Debug().Msgf("Cleaning up directories: %v", dirsToClean)
365+
// log.Debug().Msgf("Cleaning up directories: %v", dirsToClean)
354366
}
355367
}
356368
var wg sync.WaitGroup
@@ -367,7 +379,7 @@ func cleanupOutputDirectories() error {
367379
return
368380
}
369381

370-
log.Debug().Msgf("Cleaned up directory: %s ", d)
382+
// log.Debug().Msgf("Cleaned up directory: %s ", d)
371383
}(dir)
372384
}
373385

0 commit comments

Comments
 (0)