-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathopus-2021-02-23.yml
155 lines (155 loc) · 4.58 KB
/
opus-2021-02-23.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
release: zls-zls/opus-2021-02-23.zip
release-date: 2021-02-23
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
use-target-labels:
- ">>bos_Cyrl<<"
- ">>bos_Latn<<"
- ">>bul<<"
- ">>hbs<<"
- ">>hbs_Cyrl<<"
- ">>hrv<<"
- ">>mkd<<"
- ">>srp_Cyrl<<"
- ">>srp_Latn<<"
source-languages:
- bos
- bul
- hbs
- hrv
- mkd
- srp
target-languages:
- bos
- bul
- hbs
- hrv
- mkd
- srp
training-data:
bos_Cyrl-bul: Tatoeba-train (71)
bos_Cyrl-eng: Tatoeba-train (225)
bos_Cyrl-mkd: Tatoeba-train (1)
bos_Latn-bul: Tatoeba-train (1000000)
bos_Latn-eng: Tatoeba-train (1000000)
bos_Latn-mkd: Tatoeba-train (1000000)
bul-bos_Cyrl: Tatoeba-train (71)
bul-bos_Latn: Tatoeba-train (1000000)
bul-eng: Tatoeba-train (1000000)
bul-hbs: Tatoeba-train (11168)
bul-hbs_Cyrl: Tatoeba-train (412)
bul-hrv: Tatoeba-train (1000000)
bul-mkd: Tatoeba-train (1000000)
bul-srp_Cyrl: Tatoeba-train (1000000)
bul-srp_Latn: Tatoeba-train (1000000)
cnr-eng: Tatoeba-train (13)
cnr_Latn-eng: Tatoeba-train (56214)
eng-bos_Cyrl: Tatoeba-train (225)
eng-bos_Latn: Tatoeba-train (1000000)
eng-bul: Tatoeba-train (1000000)
eng-cnr: Tatoeba-train (13)
eng-cnr_Latn: Tatoeba-train (56214)
eng-hbs: Tatoeba-train (12895)
eng-hbs_Cyrl: Tatoeba-train (334)
eng-hrv: Tatoeba-train (1000000)
eng-mkd: Tatoeba-train (1000000)
eng-slv: Tatoeba-train (1000000)
eng-srp_Cyrl: Tatoeba-train (1000000)
eng-srp_Latn: Tatoeba-train (1000000)
hbs-bul: Tatoeba-train (11168)
hbs-eng: Tatoeba-train (12895)
hbs-mkd: Tatoeba-train (1000000)
hbs_Cyrl-bul: Tatoeba-train (412)
hbs_Cyrl-eng: Tatoeba-train (334)
hbs_Cyrl-mkd: Tatoeba-train (1)
hrv-bul: Tatoeba-train (1000000)
hrv-eng: Tatoeba-train (1000000)
hrv-mkd: Tatoeba-train (1000000)
mkd-bos_Cyrl: Tatoeba-train (1)
mkd-bos_Latn: Tatoeba-train (1000000)
mkd-bul: Tatoeba-train (1000000)
mkd-eng: Tatoeba-train (1000000)
mkd-hbs: Tatoeba-train (1000000)
mkd-hbs_Cyrl: Tatoeba-train (1)
mkd-hrv: Tatoeba-train (1000000)
mkd-srp_Cyrl: Tatoeba-train (555715)
mkd-srp_Latn: Tatoeba-train (1000000)
slv-eng: Tatoeba-train (1000000)
srp_Cyrl-bul: Tatoeba-train (1000000)
srp_Cyrl-eng: Tatoeba-train (1000000)
srp_Cyrl-mkd: Tatoeba-train (555715)
srp_Latn-bul: Tatoeba-train (1000000)
srp_Latn-eng: Tatoeba-train (1000000)
srp_Latn-mkd: Tatoeba-train (1000000)
validation-data:
bos_Latn-bul: Tatoeba-dev, 141
bos_Latn-eng: Tatoeba-dev, 199
bos_Latn-mkd: Tatoeba-dev, 204
bos_Latn-bul: Tatoeba-dev, 141
bul-eng: Tatoeba-dev, 7753
bul-hbs: Tatoeba-dev, 1000
bul-hrv: Tatoeba-dev, 375
bul-mkd: Tatoeba-dev, 1000
bul-srp_Cyrl: Tatoeba-dev, 43
bul-srp_Latn: Tatoeba-dev, 414
bos_Latn-eng: Tatoeba-dev, 199
bul-eng: Tatoeba-dev, 7753
eng-hbs: Tatoeba-dev, 6431
eng-hrv: Tatoeba-dev, 947
eng-mkd: Tatoeba-dev, 70284
eng-slv: Tatoeba-dev, 1000
eng-srp_Cyrl: Tatoeba-dev, 1049
eng-srp_Latn: Tatoeba-dev, 4236
bul-hbs: Tatoeba-dev, 1000
eng-hbs: Tatoeba-dev, 6431
hbs-mkd: Tatoeba-dev, 1000
bul-hrv: Tatoeba-dev, 375
eng-hrv: Tatoeba-dev, 947
hrv-mkd: Tatoeba-dev, 421
bos_Latn-mkd: Tatoeba-dev, 204
bul-mkd: Tatoeba-dev, 1000
eng-mkd: Tatoeba-dev, 70284
hbs-mkd: Tatoeba-dev, 1000
hrv-mkd: Tatoeba-dev, 421
mkd-srp_Cyrl: Tatoeba-dev, 87
mkd-srp_Latn: Tatoeba-dev, 287
eng-slv: Tatoeba-dev, 1000
bul-srp_Cyrl: Tatoeba-dev, 43
eng-srp_Cyrl: Tatoeba-dev, 1049
mkd-srp_Cyrl: Tatoeba-dev, 87
bul-srp_Latn: Tatoeba-dev, 414
eng-srp_Latn: Tatoeba-dev, 4236
mkd-srp_Latn: Tatoeba-dev, 287
total-size-shuffled: 185232
devset-selected: top 5000 lines of Tatoeba-dev.src.shuffled!
test-data:
Tatoeba-test.bul-hbs: 1/6
Tatoeba-test.bul-mkd: 3/17
Tatoeba-test.hbs-bul: 1/6
Tatoeba-test.hbs-mkd: 1/6
Tatoeba-test.mkd-bul: 3/17
Tatoeba-test.mkd-hbs: 1/6
Tatoeba-test.multi-multi: 14/82
BLEU-scores:
Tatoeba-test.bul-hbs: 53.7
Tatoeba-test.bul-mkd: 37.5
Tatoeba-test.hbs-bul: 100.0
Tatoeba-test.hbs-mkd: 38.0
Tatoeba-test.mkd-bul: 54.2
Tatoeba-test.mkd-hbs: 53.7
Tatoeba-test.multi-multi: 48.3
chr-F-scores:
Tatoeba-test.bul-hbs: 0.871
Tatoeba-test.bul-mkd: 0.688
Tatoeba-test.hbs-bul: 1.000
Tatoeba-test.hbs-mkd: 0.718
Tatoeba-test.mkd-bul: 0.788
Tatoeba-test.mkd-hbs: 0.871
Tatoeba-test.multi-multi: 0.687