Skip to content

Commit 24ff3f4

Browse files
statistics: avoid fmsketch calculation for single-column index (#41931)
ref #41930
1 parent a498911 commit 24ff3f4

File tree

3 files changed

+50
-10
lines changed

3 files changed

+50
-10
lines changed

statistics/handle/handletest/handle_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1070,7 +1070,7 @@ partition by range (a) (
10701070
tk.MustQuery("select distinct_count, null_count, tot_col_size, correlation=0 from mysql.stats_histograms where is_index=0 order by table_id asc").Check(
10711071
testkit.Rows("15 1 17 1", "6 1 7 0", "9 0 10 0"))
10721072
tk.MustQuery("select distinct_count, null_count, tot_col_size, correlation=0 from mysql.stats_histograms where is_index=1 order by table_id asc").Check(
1073-
testkit.Rows("15 1 0 1", "6 1 6 1", "9 0 10 1"))
1073+
testkit.Rows("15 1 0 1", "6 1 7 1", "9 0 10 1"))
10741074

10751075
tk.MustQuery("show stats_buckets where is_index=0").Check(
10761076
// db table partition col is_idx bucket_id count repeats lower upper ndv

statistics/integration_test.go

+24
Original file line numberDiff line numberDiff line change
@@ -669,3 +669,27 @@ func TestShowHistogramsLoadStatus(t *testing.T) {
669669
}
670670
}
671671
}
672+
673+
func TestSingleColumnIndexNDV(t *testing.T) {
674+
store, dom := testkit.CreateMockStoreAndDomain(t)
675+
tk := testkit.NewTestKit(t, store)
676+
h := dom.StatsHandle()
677+
tk.MustExec("use test")
678+
tk.MustExec("create table t(a int, b int, c varchar(20), d varchar(20), index idx_a(a), index idx_b(b), index idx_c(c), index idx_d(d))")
679+
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
680+
tk.MustExec("insert into t values (1, 1, 'xxx', 'zzz'), (2, 2, 'yyy', 'zzz'), (1, 3, null, 'zzz')")
681+
for i := 0; i < 5; i++ {
682+
tk.MustExec("insert into t select * from t")
683+
}
684+
tk.MustExec("analyze table t")
685+
rows := tk.MustQuery("show stats_histograms where db_name = 'test' and table_name = 't'").Sort().Rows()
686+
expectedResults := [][]string{
687+
{"a", "2", "0"}, {"b", "3", "0"}, {"c", "2", "32"}, {"d", "1", "0"},
688+
{"idx_a", "2", "0"}, {"idx_b", "3", "0"}, {"idx_c", "2", "32"}, {"idx_d", "1", "0"},
689+
}
690+
for i, row := range rows {
691+
require.Equal(t, expectedResults[i][0], row[3]) // column_name
692+
require.Equal(t, expectedResults[i][1], row[6]) // distinct_count
693+
require.Equal(t, expectedResults[i][2], row[7]) // null_count
694+
}
695+
}

statistics/row_sampler.go

+25-9
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ func (s *RowSampleBuilder) Collect() (RowSampleCollector, error) {
172172
return nil, err
173173
}
174174
if chk.NumRows() == 0 {
175-
return collector, nil
175+
break
176176
}
177177
collector.Base().Count += int64(chk.NumRows())
178178
for row := it.Begin(); row != it.End(); row = it.Next() {
@@ -212,6 +212,20 @@ func (s *RowSampleBuilder) Collect() (RowSampleCollector, error) {
212212
collector.sampleRow(newCols, s.Rng)
213213
}
214214
}
215+
for i, group := range s.ColGroups {
216+
if len(group) != 1 {
217+
continue
218+
}
219+
// For the single-column group, its FMSketch is the same as that of the corresponding column. Hence, we don't
220+
// maintain its FMSketch in collectColumnGroups. We just copy the corresponding column's FMSketch after
221+
// iterating all rows. Also, we can directly copy TotalSize and NullCount.
222+
colIdx := group[0]
223+
colGroupIdx := len(s.ColsFieldType) + i
224+
collector.Base().FMSketches[colGroupIdx] = collector.Base().FMSketches[colIdx].Copy()
225+
collector.Base().NullCount[colGroupIdx] = collector.Base().NullCount[colIdx]
226+
collector.Base().TotalSizes[colGroupIdx] = collector.Base().TotalSizes[colIdx]
227+
}
228+
return collector, nil
215229
}
216230

217231
func (s *baseCollector) collectColumns(sc *stmtctx.StatementContext, cols []types.Datum, sizes []int64) error {
@@ -234,17 +248,19 @@ func (s *baseCollector) collectColumnGroups(sc *stmtctx.StatementContext, cols [
234248
colLen := len(cols)
235249
datumBuffer := make([]types.Datum, 0, len(cols))
236250
for i, group := range colGroups {
251+
if len(group) == 1 {
252+
// For the single-column group, its FMSketch is the same as that of the corresponding column. Hence, we
253+
// don't need to maintain its FMSketch. We just copy the corresponding column's FMSketch after iterating
254+
// all rows. Also, we can directly copy TotalSize and NullCount.
255+
continue
256+
}
257+
// We don't maintain the null counts information for the multi-column group.
237258
datumBuffer = datumBuffer[:0]
238-
hasNull := true
239259
for _, c := range group {
240260
datumBuffer = append(datumBuffer, cols[c])
241-
hasNull = hasNull && cols[c].IsNull()
242-
s.TotalSizes[colLen+i] += sizes[c] - 1
243-
}
244-
// We don't maintain the null counts information for the multi-column group
245-
if hasNull && len(group) == 1 {
246-
s.NullCount[colLen+i]++
247-
continue
261+
if !cols[c].IsNull() {
262+
s.TotalSizes[colLen+i] += sizes[c] - 1
263+
}
248264
}
249265
err := s.FMSketches[colLen+i].InsertRowValue(sc, datumBuffer)
250266
if err != nil {

0 commit comments

Comments
 (0)