@@ -51,6 +51,20 @@ static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level,
51
51
}
52
52
}
53
53
54
+ static std::string GetID (const char *prefix, int page_number, int counter) {
55
+ std::stringstream idstr;
56
+ // IDs will only have the counter for the first page to keep them consistent
57
+ // with the IDs assigned before this change was made.
58
+ // From the second page on, IDs will also contain the page number to make them unique.
59
+ if (page_number == 0 ) {
60
+ idstr << prefix << " _" << counter;
61
+ } else {
62
+ idstr << prefix << " _" << page_number << " _" << counter;
63
+ }
64
+
65
+ return idstr.str ();
66
+ }
67
+
54
68
// /
55
69
// / Append the ALTO XML for the beginning of the document
56
70
// /
@@ -168,7 +182,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
168
182
case PT_PULLOUT_IMAGE: {
169
183
// Handle all kinds of images.
170
184
// TODO: optionally add TYPE, for example TYPE="photo".
171
- alto_str << " \t\t\t\t <Illustration ID=\" cblock_ " << page_number << " _ " << bcnt++ << " \" " ;
185
+ alto_str << " \t\t\t\t <Illustration ID=\" " << GetID ( " cblock " , page_number, bcnt++) << " \" " ;
172
186
AddBoxToAlto (res_it.get (), RIL_BLOCK, alto_str);
173
187
alto_str << " </Illustration>\n " ;
174
188
res_it->Next (RIL_BLOCK);
@@ -177,7 +191,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
177
191
case PT_HORZ_LINE:
178
192
case PT_VERT_LINE:
179
193
// Handle horizontal and vertical lines.
180
- alto_str << " \t\t\t\t <GraphicalElement ID=\" cblock_ " << page_number << " _ " << bcnt++ << " \" " ;
194
+ alto_str << " \t\t\t\t <GraphicalElement ID=\" " << GetID ( " cblock " , page_number, bcnt++) << " \" " ;
181
195
AddBoxToAlto (res_it.get (), RIL_BLOCK, alto_str);
182
196
alto_str << " </GraphicalElement >\n " ;
183
197
res_it->Next (RIL_BLOCK);
@@ -190,24 +204,24 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
190
204
}
191
205
192
206
if (res_it->IsAtBeginningOf (RIL_BLOCK)) {
193
- alto_str << " \t\t\t\t <ComposedBlock ID=\" cblock_ " << page_number << " _ " << bcnt << " \" " ;
207
+ alto_str << " \t\t\t\t <ComposedBlock ID=\" " << GetID ( " cblock " , page_number, bcnt) << " \" " ;
194
208
AddBoxToAlto (res_it.get (), RIL_BLOCK, alto_str);
195
209
alto_str << " \n " ;
196
210
}
197
211
198
212
if (res_it->IsAtBeginningOf (RIL_PARA)) {
199
- alto_str << " \t\t\t\t\t <TextBlock ID=\" block_ " << page_number << " _ " << tcnt << " \" " ;
213
+ alto_str << " \t\t\t\t\t <TextBlock ID=\" " << GetID ( " block " , page_number, tcnt) << " \" " ;
200
214
AddBoxToAlto (res_it.get (), RIL_PARA, alto_str);
201
215
alto_str << " \n " ;
202
216
}
203
217
204
218
if (res_it->IsAtBeginningOf (RIL_TEXTLINE)) {
205
- alto_str << " \t\t\t\t\t\t <TextLine ID=\" line_ " << page_number << " _ " << lcnt << " \" " ;
219
+ alto_str << " \t\t\t\t\t\t <TextLine ID=\" " << GetID ( " line " , page_number, lcnt) << " \" " ;
206
220
AddBoxToAlto (res_it.get (), RIL_TEXTLINE, alto_str);
207
221
alto_str << " \n " ;
208
222
}
209
223
210
- alto_str << " \t\t\t\t\t\t\t <String ID=\" string_ " << page_number << " _ " << wcnt << " \" " ;
224
+ alto_str << " \t\t\t\t\t\t\t <String ID=\" " << GetID ( " string " , page_number, wcnt) << " \" " ;
211
225
AddBoxToAlto (res_it.get (), RIL_WORD, alto_str);
212
226
alto_str << " CONTENT=\" " ;
213
227
0 commit comments