-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathopus-2021-02-24.yml
52 lines (52 loc) · 1.44 KB
/
opus-2021-02-24.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
release: zho-ukr/opus-2021-02-24.zip
release-date: 2021-02-24
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm4k)
subwords:
source: spm32k
target: spm4k
subword-models:
source: source.spm
target: target.spm
source-languages:
- cmn
- nan
- yue
target-languages:
- ukr
training-data:
cmn-ukr: Tatoeba-train (13589)
cmn_Hans-ukr: Tatoeba-train (217169)
cmn_Hant-ukr: Tatoeba-train (204840)
nan-ukr: Tatoeba-train (59)
yue_Hans-ukr: Tatoeba-train (27)
yue_Hant-ukr: Tatoeba-train (5123)
validation-data:
cmn-ukr: Tatoeba-dev, 31
cmn_Hans-ukr: Tatoeba-dev, 487
cmn_Hant-ukr: Tatoeba-dev, 466
ukr-yue_Hant: Tatoeba-dev, 14
total-size-shuffled: 997
devset-selected: top 997 lines of Tatoeba-dev.src.shuffled!
test-data:
Tatoeba-test.cmn_Hans-ukr: 853/5212
Tatoeba-test.cmn_Hant-ukr: 530/2804
Tatoeba-test.cmn-ukr: 8/34
Tatoeba-test.zho-ukr: 1575/9193
Tatoeba-test.yue_Hans-ukr: 82/521
Tatoeba-test.yue_Hant-ukr: 102/574
BLEU-scores:
Tatoeba-test.cmn_Hans-ukr: 8.9
Tatoeba-test.cmn_Hant-ukr: 13.2
Tatoeba-test.cmn-ukr: 63.1
Tatoeba-test.zho-ukr: 10.3
Tatoeba-test.yue_Hans-ukr: 5.7
Tatoeba-test.yue_Hant-ukr: 8.8
chr-F-scores:
Tatoeba-test.cmn_Hans-ukr: 0.276
Tatoeba-test.cmn_Hant-ukr: 0.296
Tatoeba-test.cmn-ukr: 0.540
Tatoeba-test.zho-ukr: 0.277
Tatoeba-test.yue_Hans-ukr: 0.236
Tatoeba-test.yue_Hant-ukr: 0.229