@@ -73,30 +73,31 @@ func idct(src *block) {
73
73
// Horizontal 1-D IDCT.
74
74
for y := 0 ; y < 8 ; y ++ {
75
75
y8 := y * 8
76
+ s := src [y8 : y8 + 8 : y8 + 8 ] // Small cap improves performance, see https://golang.org/issue/27857
76
77
// If all the AC components are zero, then the IDCT is trivial.
77
- if src [ y8 + 1 ] == 0 && src [ y8 + 2 ] == 0 && src [ y8 + 3 ] == 0 &&
78
- src [ y8 + 4 ] == 0 && src [ y8 + 5 ] == 0 && src [ y8 + 6 ] == 0 && src [ y8 + 7 ] == 0 {
79
- dc := src [ y8 + 0 ] << 3
80
- src [ y8 + 0 ] = dc
81
- src [ y8 + 1 ] = dc
82
- src [ y8 + 2 ] = dc
83
- src [ y8 + 3 ] = dc
84
- src [ y8 + 4 ] = dc
85
- src [ y8 + 5 ] = dc
86
- src [ y8 + 6 ] = dc
87
- src [ y8 + 7 ] = dc
78
+ if s [ 1 ] == 0 && s [ 2 ] == 0 && s [ 3 ] == 0 &&
79
+ s [ 4 ] == 0 && s [ 5 ] == 0 && s [ 6 ] == 0 && s [ 7 ] == 0 {
80
+ dc := s [ 0 ] << 3
81
+ s [ 0 ] = dc
82
+ s [ 1 ] = dc
83
+ s [ 2 ] = dc
84
+ s [ 3 ] = dc
85
+ s [ 4 ] = dc
86
+ s [ 5 ] = dc
87
+ s [ 6 ] = dc
88
+ s [ 7 ] = dc
88
89
continue
89
90
}
90
91
91
92
// Prescale.
92
- x0 := (src [ y8 + 0 ] << 11 ) + 128
93
- x1 := src [ y8 + 4 ] << 11
94
- x2 := src [ y8 + 6 ]
95
- x3 := src [ y8 + 2 ]
96
- x4 := src [ y8 + 1 ]
97
- x5 := src [ y8 + 7 ]
98
- x6 := src [ y8 + 5 ]
99
- x7 := src [ y8 + 3 ]
93
+ x0 := (s [ 0 ] << 11 ) + 128
94
+ x1 := s [ 4 ] << 11
95
+ x2 := s [ 6 ]
96
+ x3 := s [ 2 ]
97
+ x4 := s [ 1 ]
98
+ x5 := s [ 7 ]
99
+ x6 := s [ 5 ]
100
+ x7 := s [ 3 ]
100
101
101
102
// Stage 1.
102
103
x8 := w7 * (x4 + x5 )
@@ -126,31 +127,32 @@ func idct(src *block) {
126
127
x4 = (r2 * (x4 - x5 ) + 128 ) >> 8
127
128
128
129
// Stage 4.
129
- src [ y8 + 0 ] = (x7 + x1 ) >> 8
130
- src [ y8 + 1 ] = (x3 + x2 ) >> 8
131
- src [ y8 + 2 ] = (x0 + x4 ) >> 8
132
- src [ y8 + 3 ] = (x8 + x6 ) >> 8
133
- src [ y8 + 4 ] = (x8 - x6 ) >> 8
134
- src [ y8 + 5 ] = (x0 - x4 ) >> 8
135
- src [ y8 + 6 ] = (x3 - x2 ) >> 8
136
- src [ y8 + 7 ] = (x7 - x1 ) >> 8
130
+ s [ 0 ] = (x7 + x1 ) >> 8
131
+ s [ 1 ] = (x3 + x2 ) >> 8
132
+ s [ 2 ] = (x0 + x4 ) >> 8
133
+ s [ 3 ] = (x8 + x6 ) >> 8
134
+ s [ 4 ] = (x8 - x6 ) >> 8
135
+ s [ 5 ] = (x0 - x4 ) >> 8
136
+ s [ 6 ] = (x3 - x2 ) >> 8
137
+ s [ 7 ] = (x7 - x1 ) >> 8
137
138
}
138
139
139
140
// Vertical 1-D IDCT.
140
141
for x := 0 ; x < 8 ; x ++ {
141
142
// Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial.
142
143
// However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so
143
144
// we do not bother to check for the all-zero case.
145
+ s := src [x : x + 57 : x + 57 ] // Small cap improves performance, see https://golang.org/issue/27857
144
146
145
147
// Prescale.
146
- y0 := (src [8 * 0 + x ] << 8 ) + 8192
147
- y1 := src [8 * 4 + x ] << 8
148
- y2 := src [8 * 6 + x ]
149
- y3 := src [8 * 2 + x ]
150
- y4 := src [8 * 1 + x ]
151
- y5 := src [8 * 7 + x ]
152
- y6 := src [8 * 5 + x ]
153
- y7 := src [8 * 3 + x ]
148
+ y0 := (s [8 * 0 ] << 8 ) + 8192
149
+ y1 := s [8 * 4 ] << 8
150
+ y2 := s [8 * 6 ]
151
+ y3 := s [8 * 2 ]
152
+ y4 := s [8 * 1 ]
153
+ y5 := s [8 * 7 ]
154
+ y6 := s [8 * 5 ]
155
+ y7 := s [8 * 3 ]
154
156
155
157
// Stage 1.
156
158
y8 := w7 * (y4 + y5 ) + 4
@@ -180,13 +182,13 @@ func idct(src *block) {
180
182
y4 = (r2 * (y4 - y5 ) + 128 ) >> 8
181
183
182
184
// Stage 4.
183
- src [8 * 0 + x ] = (y7 + y1 ) >> 14
184
- src [8 * 1 + x ] = (y3 + y2 ) >> 14
185
- src [8 * 2 + x ] = (y0 + y4 ) >> 14
186
- src [8 * 3 + x ] = (y8 + y6 ) >> 14
187
- src [8 * 4 + x ] = (y8 - y6 ) >> 14
188
- src [8 * 5 + x ] = (y0 - y4 ) >> 14
189
- src [8 * 6 + x ] = (y3 - y2 ) >> 14
190
- src [8 * 7 + x ] = (y7 - y1 ) >> 14
185
+ s [8 * 0 ] = (y7 + y1 ) >> 14
186
+ s [8 * 1 ] = (y3 + y2 ) >> 14
187
+ s [8 * 2 ] = (y0 + y4 ) >> 14
188
+ s [8 * 3 ] = (y8 + y6 ) >> 14
189
+ s [8 * 4 ] = (y8 - y6 ) >> 14
190
+ s [8 * 5 ] = (y0 - y4 ) >> 14
191
+ s [8 * 6 ] = (y3 - y2 ) >> 14
192
+ s [8 * 7 ] = (y7 - y1 ) >> 14
191
193
}
192
194
}
0 commit comments