-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathopus2m-2020-08-01.yml
121 lines (121 loc) · 2.93 KB
/
opus2m-2020-08-01.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
release: eng-trk/opus2m-2020-08-01.zip
release-date: 2020-08-01
dataset-name: opus2m
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- eng
target-languages:
- aze
- bak
- chv
- crh
- kaz
- kir
- kjh
- kum
- ota
- sah
- tat
- tuk
- tur
- tyv
- uig
- uzb
use-target-labels:
- ">>aze_Latn<<"
- ">>bak<<"
- ">>chv<<"
- ">>crh<<"
- ">>crh_Latn<<"
- ">>kaz_Cyrl<<"
- ">>kaz_Latn<<"
- ">>kir_Cyrl<<"
- ">>kjh<<"
- ">>kum<<"
- ">>ota_Arab<<"
- ">>ota_Latn<<"
- ">>sah<<"
- ">>tat<<"
- ">>tat_Arab<<"
- ">>tat_Latn<<"
- ">>tuk<<"
- ">>tuk_Latn<<"
- ">>tur<<"
- ">>tyv<<"
- ">>uig_Arab<<"
- ">>uig_Cyrl<<"
- ">>uzb_Cyrl<<"
- ">>uzb_Latn<<"
test-data:
Tatoeba-test.eng-aze: 2659/10046
Tatoeba-test.eng-bak: 39/140
Tatoeba-test.eng-chv: 333/1358
Tatoeba-test.eng-crh: 22/81
Tatoeba-test.eng-kaz: 397/1668
Tatoeba-test.eng-kir: 118/428
Tatoeba-test.eng-kjh: 17/48
Tatoeba-test.eng-kum: 8/25
Tatoeba-test.eng-multi: 10000/46183
Tatoeba-test.eng-ota: 678/3328
Tatoeba-test.eng-sah: 39/131
Tatoeba-test.eng-tat: 1451/6996
Tatoeba-test.eng-tuk: 2500/12809
Tatoeba-test.eng-tur: 10000/49076
Tatoeba-test.eng-tyv: 5/19
Tatoeba-test.eng-uig: 3024/13084
Tatoeba-test.eng-uzb: 457/1514
newsdev2016-entr.eng-tur: 1001/14044
newstest2016-entr.eng-tur: 3000/44195
newstest2017-entr.eng-tur: 3007/45049
newstest2018-entr.eng-tur: 3000/45944
BLEU-scores:
Tatoeba-test.eng-aze: 26.8
Tatoeba-test.eng-bak: 7.6
Tatoeba-test.eng-chv: 4.3
Tatoeba-test.eng-crh: 8.1
Tatoeba-test.eng-kaz: 11.1
Tatoeba-test.eng-kir: 28.6
Tatoeba-test.eng-kjh: 1.0
Tatoeba-test.eng-kum: 2.2
Tatoeba-test.eng-multi: 19.9
Tatoeba-test.eng-ota: 0.5
Tatoeba-test.eng-sah: 0.7
Tatoeba-test.eng-tat: 9.7
Tatoeba-test.eng-tuk: 5.9
Tatoeba-test.eng-tur: 34.6
Tatoeba-test.eng-tyv: 5.4
Tatoeba-test.eng-uig: 0.1
Tatoeba-test.eng-uzb: 3.4
newsdev2016-entr.eng-tur: 10.1
newstest2016-entr.eng-tur: 9.2
newstest2017-entr.eng-tur: 9.0
newstest2018-entr.eng-tur: 9.2
chr-F-scores:
Tatoeba-test.eng-aze: 0.577
Tatoeba-test.eng-bak: 0.308
Tatoeba-test.eng-chv: 0.270
Tatoeba-test.eng-crh: 0.330
Tatoeba-test.eng-kaz: 0.359
Tatoeba-test.eng-kir: 0.524
Tatoeba-test.eng-kjh: 0.041
Tatoeba-test.eng-kum: 0.075
Tatoeba-test.eng-multi: 0.455
Tatoeba-test.eng-ota: 0.065
Tatoeba-test.eng-sah: 0.030
Tatoeba-test.eng-tat: 0.316
Tatoeba-test.eng-tuk: 0.317
Tatoeba-test.eng-tur: 0.623
Tatoeba-test.eng-tyv: 0.210
Tatoeba-test.eng-uig: 0.155
Tatoeba-test.eng-uzb: 0.275
newsdev2016-entr.eng-tur: 0.437
newstest2016-entr.eng-tur: 0.410
newstest2017-entr.eng-tur: 0.410
newstest2018-entr.eng-tur: 0.413