Skip to content

Commit 7a3260c

Browse files
committed
encoding/jsonschema: fetch the test suite GitHub repo via a zip
Rather than `git clone` followed by `git checkout`, which is slower and requires the VCS tool to be installed, we can directly download a ZIP archive from GitHub via the documented API: https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#download-a-repository-archive-zip Note that this API is specific to GitHub, but practically every VCS hosting service provides an extremely similar API. As a bonus, fetching a ZIP means we can open it as an io/fs.FS directly as long as we hold it all in memory, which is easy at the current size of about 470KiB. The runtime of this `go generate` step drops from about 1.5s to about 1.2s on my laptop. As measured by the logger which now includes precise timestamps, the fetching of the archive is now about 250ms, now comparable to Go compilation at ~200ms and loading CUE at ~500ms. While here, I noticed an unchecked error and a confusing error return at the end of the function. Add a log line at the end too, to measure how long the entire program took to do its job, as well as the filename as the log prefix, so that the source is obvious in `go generate`. I verified that it all works as expected by deleting the files and re-generating and updating them from scratch. The only changes are to stale skip strings, which Roger is fixing in a parallel chain of CLs. Signed-off-by: Daniel Martí <[email protected]> Change-Id: Ia780ade3ca1ce27ef755a026f7545b173dafb17e Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1200925 TryBot-Result: CUEcueckoo <[email protected]> Reviewed-by: Roger Peppe <[email protected]>
1 parent 0aeef9f commit 7a3260c

File tree

1 file changed

+32
-25
lines changed

1 file changed

+32
-25
lines changed

encoding/jsonschema/vendor_external.go

+32-25
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,16 @@
2121
package main
2222

2323
import (
24+
"archive/zip"
25+
"bytes"
2426
"errors"
2527
"flag"
2628
"fmt"
29+
"io"
2730
"io/fs"
2831
"log"
32+
"net/http"
2933
"os"
30-
"os/exec"
3134
"path"
3235
"path/filepath"
3336
"strings"
@@ -45,6 +48,7 @@ func main() {
4548
fmt.Fprintf(os.Stderr, "usage: vendor-external commit\n")
4649
os.Exit(2)
4750
}
51+
log.SetFlags(log.Lshortfile | log.Ltime | log.Lmicroseconds)
4852
flag.Parse()
4953
if flag.NArg() != 1 {
5054
flag.Usage()
@@ -55,31 +59,42 @@ func main() {
5559
}
5660

5761
func doVendor(commit string) error {
58-
tmpDir, err := os.MkdirTemp("", "")
62+
// Fetch a commit from GitHub via their archive ZIP endpoint, which is a lot faster
63+
// than git cloning just to retrieve a single commit's files.
64+
// See: https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#download-a-repository-archive-zip
65+
zipURL := fmt.Sprintf("https://github.com/json-schema-org/JSON-Schema-Test-Suite/archive/%s.zip", commit)
66+
log.Printf("fetching %s", zipURL)
67+
resp, err := http.Get(zipURL)
5968
if err != nil {
6069
return err
6170
}
62-
defer os.RemoveAll(tmpDir)
63-
logf("cloning %s", testRepo)
64-
if err := runCmd(tmpDir, "git", "clone", "-q", testRepo, "."); err != nil {
65-
return err
66-
}
67-
logf("checking out commit %s", commit)
68-
if err := runCmd(tmpDir, "git", "checkout", "-q", commit); err != nil {
71+
defer resp.Body.Close()
72+
zipBytes, err := io.ReadAll(resp.Body)
73+
if err != nil {
6974
return err
7075
}
71-
logf("reading old test data")
76+
77+
log.Printf("reading old test data")
7278
oldTests, err := externaltest.ReadTestDir(testDir)
7379
if err != nil && !errors.Is(err, externaltest.ErrNotFound) {
7480
return err
7581
}
76-
logf("copying files to %s", testDir)
7782

83+
log.Printf("copying files to %s", testDir)
7884
testSubdir := filepath.Join(testDir, "tests")
7985
if err := os.RemoveAll(testSubdir); err != nil {
8086
return err
8187
}
82-
fsys := os.DirFS(filepath.Join(tmpDir, "tests"))
88+
zipr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
89+
if err != nil {
90+
return err
91+
}
92+
// Note that GitHub produces archives with a top-level directory representing
93+
// the name of the repository and the version which was retrieved.
94+
fsys, err := fs.Sub(zipr, fmt.Sprintf("JSON-Schema-Test-Suite-%s/tests", commit))
95+
if err != nil {
96+
return err
97+
}
8398
err = fs.WalkDir(fsys, ".", func(filename string, d fs.DirEntry, err error) error {
8499
if err != nil {
85100
return err
@@ -107,6 +122,9 @@ func doVendor(commit string) error {
107122
}
108123
return nil
109124
})
125+
if err != nil {
126+
return err
127+
}
110128

111129
// Read the test data back that we've just written and attempt
112130
// to populate skip data from the original test data.
@@ -152,23 +170,12 @@ func doVendor(commit string) error {
152170
if err := externaltest.WriteTestDir(testDir, newTests); err != nil {
153171
return err
154172
}
155-
return err
173+
log.Printf("finished")
174+
return nil
156175
}
157176

158177
type skipKey struct {
159178
filename string
160179
schema string
161180
test string
162181
}
163-
164-
func runCmd(dir string, name string, args ...string) error {
165-
c := exec.Command(name, args...)
166-
c.Dir = dir
167-
c.Stdout = os.Stdout
168-
c.Stderr = os.Stderr
169-
return c.Run()
170-
}
171-
172-
func logf(f string, a ...any) {
173-
fmt.Fprintf(os.Stderr, "%s\n", fmt.Sprintf(f, a...))
174-
}

0 commit comments

Comments
 (0)