Skip to content

Commit 229160a

Browse files
bharath-techiedk2k
authored andcommitted
[Star tree] Add date field rounding support in star tree (opensearch-project#15249)
--------- Signed-off-by: Bharathwaj G <[email protected]>
1 parent 594e8cd commit 229160a

27 files changed

+1710
-259
lines changed

server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java

Lines changed: 190 additions & 31 deletions
Large diffs are not rendered by default.

server/src/main/java/org/opensearch/common/Rounding.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ public enum DateTimeUnit {
9595
WEEK_OF_WEEKYEAR((byte) 1, "week", IsoFields.WEEK_OF_WEEK_BASED_YEAR, true, TimeUnit.DAYS.toMillis(7)) {
9696
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(7);
9797

98-
long roundFloor(long utcMillis) {
98+
public long roundFloor(long utcMillis) {
9999
return DateUtils.roundWeekOfWeekYear(utcMillis);
100100
}
101101

@@ -107,7 +107,7 @@ long extraLocalOffsetLookup() {
107107
YEAR_OF_CENTURY((byte) 2, "year", ChronoField.YEAR_OF_ERA, false, 12) {
108108
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(366);
109109

110-
long roundFloor(long utcMillis) {
110+
public long roundFloor(long utcMillis) {
111111
return DateUtils.roundYear(utcMillis);
112112
}
113113

@@ -118,7 +118,7 @@ long extraLocalOffsetLookup() {
118118
QUARTER_OF_YEAR((byte) 3, "quarter", IsoFields.QUARTER_OF_YEAR, false, 3) {
119119
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(92);
120120

121-
long roundFloor(long utcMillis) {
121+
public long roundFloor(long utcMillis) {
122122
return DateUtils.roundQuarterOfYear(utcMillis);
123123
}
124124

@@ -129,7 +129,7 @@ long extraLocalOffsetLookup() {
129129
MONTH_OF_YEAR((byte) 4, "month", ChronoField.MONTH_OF_YEAR, false, 1) {
130130
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(31);
131131

132-
long roundFloor(long utcMillis) {
132+
public long roundFloor(long utcMillis) {
133133
return DateUtils.roundMonthOfYear(utcMillis);
134134
}
135135

@@ -138,7 +138,7 @@ long extraLocalOffsetLookup() {
138138
}
139139
},
140140
DAY_OF_MONTH((byte) 5, "day", ChronoField.DAY_OF_MONTH, true, ChronoField.DAY_OF_MONTH.getBaseUnit().getDuration().toMillis()) {
141-
long roundFloor(long utcMillis) {
141+
public long roundFloor(long utcMillis) {
142142
return DateUtils.roundFloor(utcMillis, this.ratio);
143143
}
144144

@@ -147,7 +147,7 @@ long extraLocalOffsetLookup() {
147147
}
148148
},
149149
HOUR_OF_DAY((byte) 6, "hour", ChronoField.HOUR_OF_DAY, true, ChronoField.HOUR_OF_DAY.getBaseUnit().getDuration().toMillis()) {
150-
long roundFloor(long utcMillis) {
150+
public long roundFloor(long utcMillis) {
151151
return DateUtils.roundFloor(utcMillis, ratio);
152152
}
153153

@@ -162,7 +162,7 @@ long extraLocalOffsetLookup() {
162162
true,
163163
ChronoField.MINUTE_OF_HOUR.getBaseUnit().getDuration().toMillis()
164164
) {
165-
long roundFloor(long utcMillis) {
165+
public long roundFloor(long utcMillis) {
166166
return DateUtils.roundFloor(utcMillis, ratio);
167167
}
168168

@@ -177,7 +177,7 @@ long extraLocalOffsetLookup() {
177177
true,
178178
ChronoField.SECOND_OF_MINUTE.getBaseUnit().getDuration().toMillis()
179179
) {
180-
long roundFloor(long utcMillis) {
180+
public long roundFloor(long utcMillis) {
181181
return DateUtils.roundFloor(utcMillis, ratio);
182182
}
183183

@@ -210,7 +210,7 @@ public long extraLocalOffsetLookup() {
210210
* @param utcMillis the milliseconds since the epoch
211211
* @return the rounded down milliseconds since the epoch
212212
*/
213-
abstract long roundFloor(long utcMillis);
213+
public abstract long roundFloor(long utcMillis);
214214

215215
/**
216216
* When looking up {@link LocalTimeOffset} go this many milliseconds
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.compositeindex.datacube;
10+
11+
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding;
12+
13+
import java.util.HashMap;
14+
import java.util.Map;
15+
import java.util.concurrent.TimeUnit;
16+
17+
import static java.util.Collections.unmodifiableMap;
18+
19+
/**
20+
* Enum representing the extended date time units supported for star tree index as part of index mapping.
21+
* The enum values are:
22+
* <ul>
23+
* <li>HALF_HOUR_OF_DAY: Represents half hour of day rounding</li>
24+
* <li>QUARTER_HOUR_OF_DAY: Represents quarter hour of day rounding</li>
25+
* </ul>
26+
* <p>
27+
* The enum also provides a static map of date field units to their corresponding ExtendedDateTimeUnit instances.
28+
*
29+
* @see org.opensearch.common.Rounding.DateTimeUnit for more information on the dateTimeUnit enum and rounding logic.
30+
*
31+
* @opensearch.experimental
32+
*/
33+
public enum DataCubeDateTimeUnit implements DateTimeUnitRounding {
34+
HALF_HOUR_OF_DAY("half-hour") {
35+
@Override
36+
public long roundFloor(long utcMillis) {
37+
return utcMillis - (utcMillis % TimeUnit.MINUTES.toMillis(30));
38+
}
39+
},
40+
QUARTER_HOUR_OF_DAY("quarter-hour") {
41+
@Override
42+
public long roundFloor(long utcMillis) {
43+
return utcMillis - (utcMillis % TimeUnit.MINUTES.toMillis(15));
44+
}
45+
};
46+
47+
public static final Map<String, DataCubeDateTimeUnit> DATE_FIELD_UNITS;
48+
static {
49+
Map<String, DataCubeDateTimeUnit> dateFieldUnits = new HashMap<>();
50+
dateFieldUnits.put("30m", DataCubeDateTimeUnit.HALF_HOUR_OF_DAY);
51+
dateFieldUnits.put("half-hour", DataCubeDateTimeUnit.HALF_HOUR_OF_DAY);
52+
dateFieldUnits.put("15m", DataCubeDateTimeUnit.QUARTER_HOUR_OF_DAY);
53+
dateFieldUnits.put("quarter-hour", DataCubeDateTimeUnit.QUARTER_HOUR_OF_DAY);
54+
DATE_FIELD_UNITS = unmodifiableMap(dateFieldUnits);
55+
}
56+
57+
private final String shortName;
58+
59+
DataCubeDateTimeUnit(String shortName) {
60+
this.shortName = shortName;
61+
}
62+
63+
/**
64+
* This rounds down the supplied milliseconds since the epoch down to the next unit. In order to retain performance this method
65+
* should be as fast as possible and not try to convert dates to java-time objects if possible
66+
*
67+
* @param utcMillis the milliseconds since the epoch
68+
* @return the rounded down milliseconds since the epoch
69+
*/
70+
@Override
71+
public abstract long roundFloor(long utcMillis);
72+
73+
@Override
74+
public String shortName() {
75+
return shortName;
76+
}
77+
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/DateDimension.java

Lines changed: 105 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,21 @@
1010

1111
import org.opensearch.common.Rounding;
1212
import org.opensearch.common.annotation.ExperimentalApi;
13+
import org.opensearch.common.time.DateUtils;
1314
import org.opensearch.core.xcontent.XContentBuilder;
15+
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding;
1416
import org.opensearch.index.mapper.CompositeDataCubeFieldType;
17+
import org.opensearch.index.mapper.DateFieldMapper;
1518

1619
import java.io.IOException;
20+
import java.util.ArrayList;
21+
import java.util.Comparator;
22+
import java.util.HashMap;
1723
import java.util.List;
24+
import java.util.Map;
1825
import java.util.Objects;
26+
import java.util.function.Consumer;
27+
import java.util.stream.Collectors;
1928

2029
/**
2130
* Date dimension class
@@ -24,27 +33,78 @@
2433
*/
2534
@ExperimentalApi
2635
public class DateDimension implements Dimension {
27-
private final List<Rounding.DateTimeUnit> calendarIntervals;
36+
private final List<DateTimeUnitRounding> calendarIntervals;
2837
public static final String CALENDAR_INTERVALS = "calendar_intervals";
2938
public static final String DATE = "date";
3039
private final String field;
40+
private final List<DateTimeUnitRounding> sortedCalendarIntervals;
41+
private final DateFieldMapper.Resolution resolution;
3142

32-
public DateDimension(String field, List<Rounding.DateTimeUnit> calendarIntervals) {
43+
public DateDimension(String field, List<DateTimeUnitRounding> calendarIntervals, DateFieldMapper.Resolution resolution) {
3344
this.field = field;
3445
this.calendarIntervals = calendarIntervals;
46+
// Sort from the lowest unit to the highest unit
47+
this.sortedCalendarIntervals = getSortedDateTimeUnits(calendarIntervals);
48+
if (resolution == null) {
49+
this.resolution = DateFieldMapper.Resolution.MILLISECONDS;
50+
} else {
51+
this.resolution = resolution;
52+
}
3553
}
3654

37-
public List<Rounding.DateTimeUnit> getIntervals() {
55+
public List<DateTimeUnitRounding> getIntervals() {
3856
return calendarIntervals;
3957
}
4058

59+
public List<DateTimeUnitRounding> getSortedCalendarIntervals() {
60+
return sortedCalendarIntervals;
61+
}
62+
63+
/**
64+
* Sets the dimension values in sorted order in the provided array starting from the given index.
65+
*
66+
* @param val The value to be set
67+
* @param dimSetter Consumer which sets the dimensions
68+
*/
69+
@Override
70+
public void setDimensionValues(final Long val, final Consumer<Long> dimSetter) {
71+
for (DateTimeUnitRounding dateTimeUnit : sortedCalendarIntervals) {
72+
if (val == null) {
73+
dimSetter.accept(null);
74+
} else {
75+
Long roundedValue = dateTimeUnit.roundFloor(storedDurationSinceEpoch(val));
76+
dimSetter.accept(roundedValue);
77+
}
78+
}
79+
}
80+
81+
/**
82+
* Converts nanoseconds to milliseconds based on the resolution of the field
83+
*/
84+
private long storedDurationSinceEpoch(long nanoSecondsSinceEpoch) {
85+
if (resolution.equals(DateFieldMapper.Resolution.NANOSECONDS)) return DateUtils.toMilliSeconds(nanoSecondsSinceEpoch);
86+
return nanoSecondsSinceEpoch;
87+
}
88+
89+
/**
90+
* Returns the list of fields that represent the dimension
91+
*/
92+
@Override
93+
public List<String> getSubDimensionNames() {
94+
List<String> fields = new ArrayList<>(calendarIntervals.size());
95+
for (DateTimeUnitRounding interval : sortedCalendarIntervals) {
96+
fields.add(field + "_" + interval.shortName());
97+
}
98+
return fields;
99+
}
100+
41101
@Override
42102
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
43-
builder.startObject();
103+
builder.startObject("date_dimension");
44104
builder.field(CompositeDataCubeFieldType.NAME, this.getField());
45105
builder.field(CompositeDataCubeFieldType.TYPE, DATE);
46106
builder.startArray(CALENDAR_INTERVALS);
47-
for (Rounding.DateTimeUnit interval : calendarIntervals) {
107+
for (DateTimeUnitRounding interval : calendarIntervals) {
48108
builder.value(interval.shortName());
49109
}
50110
builder.endArray();
@@ -69,4 +129,44 @@ public int hashCode() {
69129
public String getField() {
70130
return field;
71131
}
132+
133+
@Override
134+
public int getNumSubDimensions() {
135+
return calendarIntervals.size();
136+
}
137+
138+
/**
139+
* DateTimeUnit Comparator which tracks dateTimeUnits in sorted order from second unit to year unit
140+
*/
141+
public static class DateTimeUnitComparator implements Comparator<DateTimeUnitRounding> {
142+
public static final Map<String, Integer> ORDERED_DATE_TIME_UNIT = new HashMap<>();
143+
144+
static {
145+
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.SECOND_OF_MINUTE.shortName(), 1);
146+
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.MINUTES_OF_HOUR.shortName(), 2);
147+
ORDERED_DATE_TIME_UNIT.put(DataCubeDateTimeUnit.QUARTER_HOUR_OF_DAY.shortName(), 3);
148+
ORDERED_DATE_TIME_UNIT.put(DataCubeDateTimeUnit.HALF_HOUR_OF_DAY.shortName(), 4);
149+
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.HOUR_OF_DAY.shortName(), 5);
150+
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.DAY_OF_MONTH.shortName(), 6);
151+
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.WEEK_OF_WEEKYEAR.shortName(), 7);
152+
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.MONTH_OF_YEAR.shortName(), 8);
153+
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.QUARTER_OF_YEAR.shortName(), 9);
154+
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.YEAR_OF_CENTURY.shortName(), 10);
155+
}
156+
157+
@Override
158+
public int compare(DateTimeUnitRounding unit1, DateTimeUnitRounding unit2) {
159+
return Integer.compare(
160+
ORDERED_DATE_TIME_UNIT.getOrDefault(unit1.shortName(), Integer.MAX_VALUE),
161+
ORDERED_DATE_TIME_UNIT.getOrDefault(unit2.shortName(), Integer.MAX_VALUE)
162+
);
163+
}
164+
}
165+
166+
/**
167+
* Returns a sorted list of dateTimeUnits based on the DateTimeUnitComparator
168+
*/
169+
public static List<DateTimeUnitRounding> getSortedDateTimeUnits(List<DateTimeUnitRounding> dateTimeUnits) {
170+
return dateTimeUnits.stream().sorted(new DateTimeUnitComparator()).collect(Collectors.toList());
171+
}
72172
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/Dimension.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,35 @@
1111
import org.opensearch.common.annotation.ExperimentalApi;
1212
import org.opensearch.core.xcontent.ToXContent;
1313

14+
import java.util.List;
15+
import java.util.function.Consumer;
16+
1417
/**
1518
* Base interface for data-cube dimensions
1619
*
1720
* @opensearch.experimental
1821
*/
1922
@ExperimentalApi
2023
public interface Dimension extends ToXContent {
24+
2125
String getField();
26+
27+
/**
28+
* Returns the number of dimension values that gets added to star tree document
29+
* as part of this dimension
30+
*/
31+
int getNumSubDimensions();
32+
33+
/**
34+
* Sets the dimension values with the consumer
35+
*
36+
* @param value The value to be set
37+
* @param dimSetter Consumer which sets the dimensions
38+
*/
39+
void setDimensionValues(final Long value, final Consumer<Long> dimSetter);
40+
41+
/**
42+
* Returns the list of dimension fields that represent the dimension
43+
*/
44+
List<String> getSubDimensionNames();
2245
}

0 commit comments

Comments
 (0)