|
200 | 200 | ("frr", "North_Frisian"),
|
201 | 201 | ("nd", "North_Ndebele"),
|
202 | 202 | ("sme", "North_Sami"),
|
203 |
| - ("se", "Northern_Sami"), |
204 | 203 | ("nso", "Northern_Sotho"),
|
205 | 204 | ("nb", "Norwegian_Bokmaal"),
|
206 | 205 | ("nn", "Norwegian_Nynorsk"),
|
|
346 | 345 | ("xh", "xho"),
|
347 | 346 | ("yo", "yor"),
|
348 | 347 | ("zu", "zul"),
|
| 348 | + |
| 349 | + # this is a weird case where a 2 letter code was available, |
| 350 | + # but UD used the 3 letter code instead |
| 351 | + ("se", "sme"), |
349 | 352 | )
|
350 | 353 |
|
351 | 354 | for two, three in two_to_three_letters_raw:
|
352 |
| - assert two in lcode2lang |
353 |
| - assert three not in lcode2lang |
354 |
| - assert three not in lang2lcode |
355 |
| - lang2lcode[three] = two |
356 |
| - lcode2lang[three] = lcode2lang[two] |
| 355 | + if two in lcode2lang: |
| 356 | + assert two in lcode2lang |
| 357 | + assert three not in lcode2lang |
| 358 | + assert three not in lang2lcode |
| 359 | + lang2lcode[three] = two |
| 360 | + lcode2lang[three] = lcode2lang[two] |
| 361 | + elif three in lcode2lang: |
| 362 | + assert three in lcode2lang |
| 363 | + assert two not in lcode2lang |
| 364 | + assert two not in lang2lcode |
| 365 | + lang2lcode[two] = three |
| 366 | + lcode2lang[two] = lcode2lang[three] |
| 367 | + else: |
| 368 | + raise AssertionError("Found a proposed alias %s -> %s when neither code was already known" % (two, three)) |
357 | 369 |
|
358 | 370 | two_to_three_letters = {
|
359 | 371 | two: three for two, three in two_to_three_letters_raw
|
360 | 372 | }
|
361 | 373 |
|
| 374 | +three_to_two_letters = { |
| 375 | + three: two for two, three in two_to_three_letters_raw |
| 376 | +} |
| 377 | + |
362 | 378 | assert len(two_to_three_letters) == len(two_to_three_letters_raw)
|
| 379 | +assert len(three_to_two_letters) == len(two_to_three_letters_raw) |
363 | 380 |
|
364 | 381 | # additional useful code to language mapping
|
365 | 382 | # added after dict invert to avoid conflict
|
|
0 commit comments