-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathopus1m-2021-05-15.yml
97 lines (97 loc) · 2.59 KB
/
opus1m-2021-05-15.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
release: art-por/opus1m-2021-05-15.zip
release-date: 2021-05-15
dataset-name: opus1m
modeltype: transformer-align
vocabulary:
source: opus1m.spm32k-spm32k.vocab.yml
target: opus1m.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- avk
- bzt
- epo
- ido
- ile
- ina
- jbo
- lfn
- nov
target-languages:
- pob
- por
use-target-labels:
- ">>pob<<"
- ">>por<<"
training-data:
epo-pob: Tatoeba-train (42197)
epo-por: Tatoeba-train (207266)
ido-por: Tatoeba-train (19)
ido_Latn-por: Tatoeba-train (22307)
ina_Latn-por: Tatoeba-train (19169)
validation-data:
epo-por: Tatoeba-dev, 45065
ido-por: Tatoeba-dev, 1
ido_Latn-por: Tatoeba-dev, 999
ile_Latn-por: Tatoeba-dev, 96
ina_Latn-por: Tatoeba-dev, 7080
jbo_Latn-por: Tatoeba-dev, 134
nov_Latn-por: Tatoeba-dev, 3
total-size-shuffled: 2935
devset-selected: top 2935 lines of Tatoeba-dev.src.shuffled
test-data:
Tatoeba-test.avk-por: 1/6
Tatoeba-test.bzt-por: 14/71
Tatoeba-test.epo-por: 10000/89903
Tatoeba-test.ido-por: 20/98
Tatoeba-test.ile-por: 57/335
Tatoeba-test.ina-por: 2500/23444
Tatoeba-test.jbo-por: 2/9
Tatoeba-test.lfn_Cyrl-por: 220/1927
Tatoeba-test.lfn_Latn-por: 1725/22989
Tatoeba-test.lfn-por: 1945/24916
Tatoeba-test.nov-por: 3/15
Tatoeba-test.multi-por: 10000/95398
Tatoeba-test.sjn-por: 2/9
Tatoeba-test.tlh-por: 22/125
Tatoeba-test.tzl-por: 15/85
Tatoeba-test.vol-por: 15/80
BLEU-scores:
Tatoeba-test.avk-por: 0.0
Tatoeba-test.bzt-por: 1.5
Tatoeba-test.epo-por: 17.6
Tatoeba-test.ido-por: 5.4
Tatoeba-test.ile-por: 1.6
Tatoeba-test.ina-por: 2.4
Tatoeba-test.jbo-por: 0.0
Tatoeba-test.lfn_Cyrl-por: 0.2
Tatoeba-test.lfn_Latn-por: 1.7
Tatoeba-test.lfn-por: 1.5
Tatoeba-test.nov-por: 5.5
Tatoeba-test.multi-por: 12.3
Tatoeba-test.sjn-por: 0.0
Tatoeba-test.tlh-por: 1.0
Tatoeba-test.tzl-por: 1.2
Tatoeba-test.vol-por: 1.5
chr-F-scores:
Tatoeba-test.avk-por: 0.061
Tatoeba-test.bzt-por: 0.098
Tatoeba-test.epo-por: 0.393
Tatoeba-test.ido-por: 0.256
Tatoeba-test.ile-por: 0.160
Tatoeba-test.ina-por: 0.208
Tatoeba-test.jbo-por: 0.078
Tatoeba-test.lfn_Cyrl-por: 0.114
Tatoeba-test.lfn_Latn-por: 0.182
Tatoeba-test.lfn-por: 0.176
Tatoeba-test.nov-por: 0.151
Tatoeba-test.multi-por: 0.318
Tatoeba-test.sjn-por: 0.070
Tatoeba-test.tlh-por: 0.080
Tatoeba-test.tzl-por: 0.121
Tatoeba-test.vol-por: 0.111