@@ -34,20 +34,6 @@ MlBreakEngine::~MlBreakEngine() {}
34
34
35
35
namespace {
36
36
const char16_t INVALID = u' |' ;
37
- const int32_t MAX_FEATURE = 13 ;
38
- const int32_t MAX_FEATURE_LENGTH = 11 ;
39
-
40
- void concatChar (const char16_t *str, const UChar32 *arr, int32_t length, char16_t *feature, UErrorCode &status) {
41
- if (U_FAILURE (status)) {
42
- return ;
43
- }
44
- UnicodeString result (str);
45
- for (int i = 0 ; i < length; i++) {
46
- result.append (arr[i]);
47
- }
48
- U_ASSERT (result.length () < MAX_FEATURE_LENGTH);
49
- result.extract (feature, MAX_FEATURE_LENGTH, status); // NUL-terminates
50
- }
51
37
}
52
38
53
39
int32_t MlBreakEngine::divideUpRange (UText *inText, int32_t rangeStart, int32_t rangeEnd,
@@ -144,96 +130,68 @@ int32_t MlBreakEngine::divideUpRange(UText *inText, int32_t rangeStart, int32_t
144
130
145
131
void MlBreakEngine::evaluateBreakpoint (UChar32* elementList, int32_t index, int32_t &numBreaks,
146
132
UVector32 &boundary, UErrorCode &status) const {
147
- char16_t featureList[MAX_FEATURE][MAX_FEATURE_LENGTH];
148
133
if (U_FAILURE (status)) {
149
134
return ;
150
135
}
151
136
152
- UChar32 arr[4 ] = {-1 , -1 , -1 , -1 };
153
- int32_t length = 0 , listLength = 0 ;
154
-
155
- const UChar32 w1 = elementList[0 ];
156
- const UChar32 w2 = elementList[1 ];
157
- const UChar32 w3 = elementList[2 ];
158
- const UChar32 w4 = elementList[3 ];
159
- const UChar32 w5 = elementList[4 ];
160
- const UChar32 w6 = elementList[5 ];
137
+ UnicodeString feature;
138
+ int32_t score = fNegativeSum ;
161
139
162
- length = 1 ;
163
- if (w1 != INVALID) {
164
- arr[ 0 ] = w1;
165
- concatChar ( u" UW1:" , arr, length, featureList[listLength++], status );
140
+ if (elementList[ 0 ] != INVALID) {
141
+ // When the key doesn't exist, Hashtable.geti(key) returns 0 and 2 * 0 = 0.
142
+ // So, we can skip to check whether fModel includes key featureList[j] or not.
143
+ score += ( 2 * fModel . geti (feature. setTo ( u" UW1:" , 4 ). append (elementList[ 0 ])) );
166
144
}
167
- if (w2 != INVALID) {
168
- arr[0 ] = w2;
169
- concatChar (u" UW2:" , arr, length, featureList[listLength++], status);
145
+ if (elementList[1 ] != INVALID) {
146
+ score += (2 * fModel .geti (feature.setTo (u" UW2:" , 4 ).append (elementList[1 ])));
170
147
}
171
- if (w3 != INVALID) {
172
- arr[0 ] = w3;
173
- concatChar (u" UW3:" , arr, length, featureList[listLength++], status);
148
+ if (elementList[2 ] != INVALID) {
149
+ score += (2 * fModel .geti (feature.setTo (u" UW3:" , 4 ).append (elementList[2 ])));
174
150
}
175
- if (w4 != INVALID) {
176
- arr[0 ] = w4;
177
- concatChar (u" UW4:" , arr, length, featureList[listLength++], status);
151
+ if (elementList[3 ] != INVALID) {
152
+ score += (2 * fModel .geti (feature.setTo (u" UW4:" , 4 ).append (elementList[3 ])));
178
153
}
179
- if (w5 != INVALID) {
180
- arr[0 ] = w5;
181
- concatChar (u" UW5:" , arr, length, featureList[listLength++], status);
154
+ if (elementList[4 ] != INVALID) {
155
+ score += (2 * fModel .geti (feature.setTo (u" UW5:" , 4 ).append (elementList[4 ])));
182
156
}
183
- if (w6 != INVALID) {
184
- arr[0 ] = w6;
185
- concatChar (u" UW6:" , arr, length, featureList[listLength++], status);
157
+ if (elementList[5 ] != INVALID) {
158
+ score += (2 * fModel .geti (feature.setTo (u" UW6:" , 4 ).append (elementList[5 ])));
186
159
}
187
- length = 2 ;
188
- if (w2 != INVALID && w3 != INVALID) {
189
- arr[0 ] = w2;
190
- arr[1 ] = w3;
191
- concatChar (u" BW1:" , arr, length, featureList[listLength++], status);
160
+ if (elementList[1 ] != INVALID && elementList[2 ] != INVALID) {
161
+ score += (2 * fModel .geti (
162
+ feature.setTo (u" BW1:" , 4 ).append (elementList[1 ]).append (elementList[2 ])));
192
163
}
193
- if (w3 != INVALID && w4 != INVALID) {
194
- arr[0 ] = w3;
195
- arr[1 ] = w4;
196
- concatChar (u" BW2:" , arr, length, featureList[listLength++], status);
164
+ if (elementList[2 ] != INVALID && elementList[3 ] != INVALID) {
165
+ score += (2 * fModel .geti (
166
+ feature.setTo (u" BW2:" , 4 ).append (elementList[2 ]).append (elementList[3 ])));
197
167
}
198
- if (w4 != INVALID && w5 != INVALID) {
199
- arr[0 ] = w4;
200
- arr[1 ] = w5;
201
- concatChar (u" BW3:" , arr, length, featureList[listLength++], status);
168
+ if (elementList[3 ] != INVALID && elementList[4 ] != INVALID) {
169
+ score += (2 * fModel .geti (
170
+ feature.setTo (u" BW3:" , 4 ).append (elementList[3 ]).append (elementList[4 ])));
202
171
}
203
- length = 3 ;
204
- if (w1 != INVALID && w2 != INVALID && w3 != INVALID) {
205
- arr[0 ] = w1;
206
- arr[1 ] = w2;
207
- arr[2 ] = w3;
208
- concatChar (u" TW1:" , arr, length, featureList[listLength++], status);
172
+ if (elementList[0 ] != INVALID && elementList[1 ] != INVALID && elementList[2 ] != INVALID) {
173
+ score += (2 * fModel .geti (feature.setTo (u" TW1:" , 4 )
174
+ .append (elementList[0 ])
175
+ .append (elementList[1 ])
176
+ .append (elementList[2 ])));
209
177
}
210
- if (w2 != INVALID && w3 != INVALID && w4 != INVALID) {
211
- arr[ 0 ] = w2;
212
- arr [1 ] = w3;
213
- arr [2 ] = w4;
214
- concatChar ( u" TW2: " , arr, length, featureList[listLength++], status );
178
+ if (elementList[ 1 ] != INVALID && elementList[ 2 ] != INVALID && elementList[ 3 ] != INVALID) {
179
+ score += ( 2 * fModel . geti (feature. setTo ( u" TW2: " , 4 )
180
+ . append (elementList [1 ])
181
+ . append (elementList [2 ])
182
+ . append (elementList[ 3 ])) );
215
183
}
216
- if (w3 != INVALID && w4 != INVALID && w5 != INVALID) {
217
- arr[ 0 ] = w3;
218
- arr[ 1 ] = w4;
219
- arr[ 2 ] = w5;
220
- concatChar ( u" TW3: " , arr, length, featureList[listLength++], status );
184
+ if (elementList[ 2 ] != INVALID && elementList[ 3 ] != INVALID && elementList[ 4 ] != INVALID) {
185
+ score += ( 2 * fModel . geti (feature. setTo ( u" TW3: " , 4 )
186
+ . append (elementList[ 2 ])
187
+ . append (elementList[ 3 ])
188
+ . append (elementList[ 4 ])) );
221
189
}
222
- if (w4 != INVALID && w5 != INVALID && w6 != INVALID) {
223
- arr[0 ] = w4;
224
- arr[1 ] = w5;
225
- arr[2 ] = w6;
226
- concatChar (u" TW4:" , arr, length, featureList[listLength++], status);
227
- }
228
- if (U_FAILURE (status)) {
229
- return ;
230
- }
231
- int32_t score = fNegativeSum ;
232
- for (int32_t j = 0 ; j < listLength; j++) {
233
- UnicodeString key (featureList[j]);
234
- if (fModel .containsKey (key)) {
235
- score += (2 * fModel .geti (key));
236
- }
190
+ if (elementList[3 ] != INVALID && elementList[4 ] != INVALID && elementList[5 ] != INVALID) {
191
+ score += (2 * fModel .geti (feature.setTo (u" TW4:" , 4 )
192
+ .append (elementList[3 ])
193
+ .append (elementList[4 ])
194
+ .append (elementList[5 ])));
237
195
}
238
196
if (score > 0 ) {
239
197
boundary.addElement (index , status);
0 commit comments