Skip to content

Commit 570a58c

Browse files
committed
Use an alias for se / sme, as per #1279
1 parent 5b3c8b3 commit 570a58c

File tree

2 files changed

+20
-6
lines changed

2 files changed

+20
-6
lines changed

stanza/models/common/constant.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,6 @@
200200
("frr", "North_Frisian"),
201201
("nd", "North_Ndebele"),
202202
("sme", "North_Sami"),
203-
("se", "Northern_Sami"),
204203
("nso", "Northern_Sotho"),
205204
("nb", "Norwegian_Bokmaal"),
206205
("nn", "Norwegian_Nynorsk"),
@@ -346,14 +345,27 @@
346345
("xh", "xho"),
347346
("yo", "yor"),
348347
("zu", "zul"),
348+
349+
# this is a weird case where a 2 letter code was available,
350+
# but UD used the 3 letter code instead
351+
("se", "sme"),
349352
)
350353

351354
for two, three in two_to_three_letters_raw:
352-
assert two in lcode2lang
353-
assert three not in lcode2lang
354-
assert three not in lang2lcode
355-
lang2lcode[three] = two
356-
lcode2lang[three] = lcode2lang[two]
355+
if two in lcode2lang:
356+
assert two in lcode2lang
357+
assert three not in lcode2lang
358+
assert three not in lang2lcode
359+
lang2lcode[three] = two
360+
lcode2lang[three] = lcode2lang[two]
361+
elif three in lcode2lang:
362+
assert three in lcode2lang
363+
assert two not in lcode2lang
364+
assert two not in lang2lcode
365+
lang2lcode[two] = three
366+
lcode2lang[two] = lcode2lang[three]
367+
else:
368+
raise AssertionError("Found a proposed alias %s -> %s when neither code was already known" % (two, three))
357369

358370
two_to_three_letters = {
359371
two: three for two, three in two_to_three_letters_raw

stanza/resources/prepare_resources.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,8 @@ def process_lcode(args):
482482
resources_new[lang_name.lower()] = {'alias': lang.lower()}
483483
if lang.lower() in two_to_three_letters:
484484
resources_new[two_to_three_letters[lang.lower()]] = {'alias': lang.lower()}
485+
elif lang.lower() in two_to_three_letters.values():
486+
resources_new[lang.lower()] = {'alias': two_to_three_letters[lang.lower()]}
485487
print("Processed lcode aliases. Writing resources.json")
486488
json.dump(resources_new, open(os.path.join(args.output_dir, 'resources.json'), 'w'), indent=2)
487489

0 commit comments

Comments
 (0)