Skip to content

Commit 51e1025

Browse files
authored
zip: Merge upstream (#631)
* zip: Merge upstream Add Go 1.19 improvements.
1 parent 9bbb415 commit 51e1025

File tree

10 files changed

+494
-64
lines changed

10 files changed

+494
-64
lines changed

zip/fuzz_test.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//go:build go1.18
2+
// +build go1.18
3+
4+
// Copyright 2021 The Go Authors. All rights reserved.
5+
// Use of this source code is governed by a BSD-style
6+
// license that can be found in the LICENSE file.
7+
8+
package zip
9+
10+
import (
11+
"bytes"
12+
"io"
13+
"os"
14+
"path/filepath"
15+
"testing"
16+
)
17+
18+
func FuzzReader(f *testing.F) {
19+
testdata, err := os.ReadDir("testdata")
20+
if err != nil {
21+
f.Fatalf("failed to read testdata directory: %s", err)
22+
}
23+
for _, de := range testdata {
24+
if de.IsDir() {
25+
continue
26+
}
27+
b, err := os.ReadFile(filepath.Join("testdata", de.Name()))
28+
if err != nil {
29+
f.Fatalf("failed to read testdata: %s", err)
30+
}
31+
f.Add(b)
32+
}
33+
34+
f.Fuzz(func(t *testing.T, b []byte) {
35+
r, err := NewReader(bytes.NewReader(b), int64(len(b)))
36+
if err != nil {
37+
return
38+
}
39+
40+
type file struct {
41+
header *FileHeader
42+
content []byte
43+
}
44+
files := []file{}
45+
46+
for _, f := range r.File {
47+
fr, err := f.Open()
48+
if err != nil {
49+
continue
50+
}
51+
content, err := io.ReadAll(fr)
52+
if err != nil {
53+
continue
54+
}
55+
files = append(files, file{header: &f.FileHeader, content: content})
56+
if _, err := r.Open(f.Name); err != nil {
57+
continue
58+
}
59+
}
60+
61+
// If we were unable to read anything out of the archive don't
62+
// bother trying to roundtrip it.
63+
if len(files) == 0 {
64+
return
65+
}
66+
67+
w := NewWriter(io.Discard)
68+
for _, f := range files {
69+
ww, err := w.CreateHeader(f.header)
70+
if err != nil {
71+
t.Fatalf("unable to write previously parsed header: %s", err)
72+
}
73+
if _, err := ww.Write(f.content); err != nil {
74+
t.Fatalf("unable to write previously parsed content: %s", err)
75+
}
76+
}
77+
78+
if err := w.Close(); err != nil {
79+
t.Fatalf("Unable to write archive: %s", err)
80+
}
81+
82+
// TODO: We may want to check if the archive roundtrips.
83+
})
84+
}

zip/reader.go

Lines changed: 92 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ type Reader struct {
3333
Comment string
3434
decompressors map[uint16]Decompressor
3535

36+
// Some JAR files are zip files with a prefix that is a bash script.
37+
// The baseOffset field is the start of the zip file proper.
38+
baseOffset int64
39+
3640
// fileList is a list of files sorted by ename,
3741
// for use by the Open method.
3842
fileListOnce sync.Once
@@ -52,9 +56,8 @@ type File struct {
5256
FileHeader
5357
zip *Reader
5458
zipr io.ReaderAt
55-
headerOffset int64
59+
headerOffset int64 // includes overall ZIP archive baseOffset
5660
zip64 bool // zip64 extended information extra field presence
57-
descErr error // error reading the data descriptor during init
5861
}
5962

6063
// OpenReader will open the Zip file specified by name and return a ReadCloser.
@@ -91,23 +94,24 @@ func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
9194
}
9295

9396
func (z *Reader) init(r io.ReaderAt, size int64) error {
94-
end, err := readDirectoryEnd(r, size)
97+
end, baseOffset, err := readDirectoryEnd(r, size)
9598
if err != nil {
9699
return err
97100
}
98101
z.r = r
102+
z.baseOffset = baseOffset
99103
// Since the number of directory records is not validated, it is not
100104
// safe to preallocate z.File without first checking that the specified
101105
// number of files is reasonable, since a malformed archive may
102106
// indicate it contains up to 1 << 128 - 1 files. Since each file has a
103107
// header which will be _at least_ 30 bytes we can safely preallocate
104108
// if (data size / 30) >= end.directoryRecords.
105-
if (uint64(size)-end.directorySize)/30 >= end.directoryRecords {
109+
if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords {
106110
z.File = make([]*File, 0, end.directoryRecords)
107111
}
108112
z.Comment = end.comment
109113
rs := io.NewSectionReader(r, 0, size)
110-
if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil {
114+
if _, err = rs.Seek(z.baseOffset+int64(end.directoryOffset), io.SeekStart); err != nil {
111115
return err
112116
}
113117
buf := bufio.NewReader(rs)
@@ -119,12 +123,27 @@ func (z *Reader) init(r io.ReaderAt, size int64) error {
119123
for {
120124
f := &File{zip: z, zipr: r}
121125
err = readDirectoryHeader(f, buf)
126+
127+
// For compatibility with other zip programs,
128+
// if we have a non-zero base offset and can't read
129+
// the first directory header, try again with a zero
130+
// base offset.
131+
if err == ErrFormat && z.baseOffset != 0 && len(z.File) == 0 {
132+
z.baseOffset = 0
133+
if _, err = rs.Seek(int64(end.directoryOffset), io.SeekStart); err != nil {
134+
return err
135+
}
136+
buf.Reset(rs)
137+
continue
138+
}
139+
122140
if err == ErrFormat || err == io.ErrUnexpectedEOF {
123141
break
124142
}
125143
if err != nil {
126144
return err
127145
}
146+
f.headerOffset += z.baseOffset
128147
z.File = append(z.File, f)
129148
}
130149
if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here
@@ -229,6 +248,9 @@ func (r *checksumReader) Read(b []byte) (n int, err error) {
229248
n, err = r.rc.Read(b)
230249
r.hash.Write(b[:n])
231250
r.nread += uint64(n)
251+
if r.nread > r.f.UncompressedSize64 {
252+
return 0, ErrFormat
253+
}
232254
if err == nil {
233255
return
234256
}
@@ -492,7 +514,7 @@ func readDataDescriptor(r io.Reader, f *File) error {
492514
return nil
493515
}
494516

495-
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) {
517+
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, baseOffset int64, err error) {
496518
// look for directoryEndSignature in the last 1k, then in the last 65k
497519
var buf []byte
498520
var directoryEndOffset int64
@@ -502,15 +524,15 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error)
502524
}
503525
buf = make([]byte, int(bLen))
504526
if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF {
505-
return nil, err
527+
return nil, 0, err
506528
}
507529
if p := findSignatureInBlock(buf); p >= 0 {
508530
buf = buf[p:]
509531
directoryEndOffset = size - bLen + int64(p)
510532
break
511533
}
512534
if i == 1 || bLen == size {
513-
return nil, ErrFormat
535+
return nil, 0, ErrFormat
514536
}
515537
}
516538

@@ -527,25 +549,29 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error)
527549
}
528550
l := int(d.commentLen)
529551
if l > len(b) {
530-
return nil, errors.New("zip: invalid comment length")
552+
return nil, 0, errors.New("zip: invalid comment length")
531553
}
532554
d.comment = string(b[:l])
533555

534556
// These values mean that the file can be a zip64 file
535557
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
536558
p, err := findDirectory64End(r, directoryEndOffset)
537559
if err == nil && p >= 0 {
560+
directoryEndOffset = p
538561
err = readDirectory64End(r, p, d)
539562
}
540563
if err != nil {
541-
return nil, err
564+
return nil, 0, err
542565
}
543566
}
567+
568+
baseOffset = directoryEndOffset - int64(d.directorySize) - int64(d.directoryOffset)
569+
544570
// Make sure directoryOffset points to somewhere in our file.
545-
if o := int64(d.directoryOffset); o < 0 || o >= size {
546-
return nil, ErrFormat
571+
if o := baseOffset + int64(d.directoryOffset); o < 0 || o >= size {
572+
return nil, 0, ErrFormat
547573
}
548-
return d, nil
574+
return d, baseOffset, nil
549575
}
550576

551577
// findDirectory64End tries to read the zip64 locator just before the
@@ -650,18 +676,22 @@ type fileListEntry struct {
650676
name string
651677
file *File
652678
isDir bool
679+
isDup bool
653680
}
654681

655682
type fileInfoDirEntry interface {
656683
fs.FileInfo
657684
fs.DirEntry
658685
}
659686

660-
func (e *fileListEntry) stat() fileInfoDirEntry {
687+
func (e *fileListEntry) stat() (fileInfoDirEntry, error) {
688+
if e.isDup {
689+
return nil, errors.New(e.name + ": duplicate entries in zip file")
690+
}
661691
if !e.isDir {
662-
return headerFileInfo{&e.file.FileHeader}
692+
return headerFileInfo{&e.file.FileHeader}, nil
663693
}
664-
return e
694+
return e, nil
665695
}
666696

667697
// Only used for directories.
@@ -696,32 +726,61 @@ func toValidName(name string) string {
696726

697727
func (r *Reader) initFileList() {
698728
r.fileListOnce.Do(func() {
729+
// files and knownDirs map from a file/directory name
730+
// to an index into the r.fileList entry that we are
731+
// building. They are used to mark duplicate entries.
732+
files := make(map[string]int)
733+
knownDirs := make(map[string]int)
734+
735+
// dirs[name] is true if name is known to be a directory,
736+
// because it appears as a prefix in a path.
699737
dirs := make(map[string]bool)
700-
knownDirs := make(map[string]bool)
738+
701739
for _, file := range r.File {
702740
isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/'
703741
name := toValidName(file.Name)
742+
if name == "" {
743+
continue
744+
}
745+
746+
if idx, ok := files[name]; ok {
747+
r.fileList[idx].isDup = true
748+
continue
749+
}
750+
if idx, ok := knownDirs[name]; ok {
751+
r.fileList[idx].isDup = true
752+
continue
753+
}
754+
704755
for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
705756
dirs[dir] = true
706757
}
758+
759+
idx := len(r.fileList)
707760
entry := fileListEntry{
708761
name: name,
709762
file: file,
710763
isDir: isDir,
711764
}
712765
r.fileList = append(r.fileList, entry)
713766
if isDir {
714-
knownDirs[name] = true
767+
knownDirs[name] = idx
768+
} else {
769+
files[name] = idx
715770
}
716771
}
717772
for dir := range dirs {
718-
if !knownDirs[dir] {
719-
entry := fileListEntry{
720-
name: dir,
721-
file: nil,
722-
isDir: true,
773+
if _, ok := knownDirs[dir]; !ok {
774+
if idx, ok := files[dir]; ok {
775+
r.fileList[idx].isDup = true
776+
} else {
777+
entry := fileListEntry{
778+
name: dir,
779+
file: nil,
780+
isDir: true,
781+
}
782+
r.fileList = append(r.fileList, entry)
723783
}
724-
r.fileList = append(r.fileList, entry)
725784
}
726785
}
727786

@@ -740,12 +799,11 @@ func fileEntryLess(x, y string) bool {
740799
// paths are always slash separated, with no
741800
// leading / or ../ elements.
742801
func (r *Reader) Open(name string) (fs.File, error) {
802+
r.initFileList()
803+
743804
if !fs.ValidPath(name) {
744805
return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
745806
}
746-
747-
r.initFileList()
748-
749807
e := r.openLookup(name)
750808
if e == nil {
751809
return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
@@ -761,7 +819,7 @@ func (r *Reader) Open(name string) (fs.File, error) {
761819
}
762820

763821
func split(name string) (dir, elem string, isDir bool) {
764-
if name[len(name)-1] == '/' {
822+
if len(name) > 0 && name[len(name)-1] == '/' {
765823
isDir = true
766824
name = name[:len(name)-1]
767825
}
@@ -817,7 +875,7 @@ type openDir struct {
817875
}
818876

819877
func (d *openDir) Close() error { return nil }
820-
func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat(), nil }
878+
func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat() }
821879

822880
func (d *openDir) Read([]byte) (int, error) {
823881
return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")}
@@ -836,7 +894,11 @@ func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) {
836894
}
837895
list := make([]fs.DirEntry, n)
838896
for i := range list {
839-
list[i] = d.files[d.offset+i].stat()
897+
s, err := d.files[d.offset+i].stat()
898+
if err != nil {
899+
return nil, err
900+
}
901+
list[i] = s
840902
}
841903
d.offset += n
842904
return list, nil

0 commit comments

Comments
 (0)