Skip to content

Commit c6c80fb

Browse files
Titousenseifacebook-github-bot
authored andcommitted
remove LMTask_Deprecated and LMLSTM_Deprecated (facebookresearch#882)
Summary: Pull Request resolved: facebookresearch#882 remove LMTask_Deprecated and LMLSTM_Deprecated and fix corresponding config adapters. The language model data handler is not needed anymore because it's replaced with tensorizers. Differential Revision: D16695501 fbshipit-source-id: 1b8e2e3823b455f52cdb20ccd76ccbce59175298
1 parent 69cc0c6 commit c6c80fb

File tree

10 files changed

+51
-455
lines changed

10 files changed

+51
-455
lines changed

demo/configs/multitask_sst_lm.json

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
"version": 14,
23
"config": {
34
"task": {
45
"DisjointMultitask": {
@@ -43,25 +44,38 @@
4344
},
4445
"LM": {
4546
"LMTask": {
46-
"features": {
47-
"shared_module_key": "SHARED_EMBEDDING",
48-
"word_feat": {
49-
"vocab_file": "base_dir/SST-2/sst_vocab.txt",
50-
"vocab_size": 15000,
51-
"vocab_from_train_data": false
52-
}
53-
},
54-
"data_handler": {
55-
"LanguageModelDataHandler": {
56-
"train_path": "base_dir/wikitext2/train.txt",
57-
"eval_path": "base_dir/wikitext2/valid.txt",
58-
"test_path": "base_dir/wikitext2/test.txt",
59-
"train_batch_size": 16,
60-
"eval_batch_size": 16,
61-
"test_batch_size": 16
47+
"data": {
48+
"source": {
49+
"TSVDataSource": {
50+
"field_names": ["text"],
51+
"train_filename": "base_dir/wikitext2/train.txt",
52+
"test_filename": "base_dir/wikitext2/test.txt",
53+
"eval_filename": "base_dir/wikitext2/valid.txt"
54+
}
55+
},
56+
"batcher": {
57+
"PoolingBatcher": {
58+
"eval_batch_size": 16,
59+
"test_batch_size": 16,
60+
"train_batch_size": 16
61+
}
6262
}
6363
},
6464
"model": {
65+
"embedding": {
66+
"shared_module_key": "SHARED_EMBEDDING",
67+
},
68+
"inputs": {
69+
"tokens": {
70+
"vocab": {
71+
"build_from_data": false,
72+
"size_from_data": 15000,
73+
"vocab_files": [
74+
"base_dir/SST-2/sst_vocab.txt"
75+
]
76+
}
77+
}
78+
},
6579
"representation": {
6680
"shared_module_key": "SHARED_LSTM"
6781
},

pytext/builtin_task.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
EnsembleTask,
2020
JointTextTask_Deprecated,
2121
LMTask,
22-
LMTask_Deprecated,
2322
MaskedLMTask,
2423
NewBertClassificationTask,
2524
NewBertPairClassificationTask,
@@ -79,7 +78,6 @@ def register_builtin_tasks():
7978
EnsembleTask,
8079
JointTextTask_Deprecated,
8180
LMTask,
82-
LMTask_Deprecated,
8381
MaskedLMTask,
8482
NewBertClassificationTask,
8583
NewBertPairClassificationTask,

pytext/config/config_adapter.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,6 @@ def old_tasks_deprecated(json_config):
287287
deprecate(json_config, "I18NJointTextTask")
288288
deprecate(json_config, "JointTextTask")
289289
deprecate(json_config, "KDDocClassificationTask")
290-
deprecate(json_config, "LMTask")
291290
deprecate(json_config, "NLGSeq2SeqTask")
292291
deprecate(json_config, "PairClassificationTask")
293292
deprecate(json_config, "PairwiseAttentionClassificationTask")
@@ -359,9 +358,9 @@ def v6_to_v7(json_config):
359358
@register_adapter(from_version=7)
360359
def lm_model_deprecated(json_config):
361360
"""
362-
Rename LM model to _Deprecated (LMTask is already deprecated in v5)
361+
Was: Rename LM model to _Deprecated (LMTask is already deprecated in v5)
362+
Now: LMLSTM_Deprecated is removed, nothing else to do.
363363
"""
364-
deprecate(json_config, "LMLSTM")
365364
return json_config
366365

367366

pytext/config/test/json_config/v8.json

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
{
33
"original": {
44
"task": {
5-
"LMTask_Deprecated": {
6-
"data_handler": {
7-
"LanguageModelDataHandler": {
8-
"train_path": "tests/data/train_data_tiny.tsv",
9-
"eval_path": "tests/data/test_data_tiny.tsv",
10-
"test_path": "tests/data/test_data_tiny.tsv"
5+
"LMTask": {
6+
"data": {
7+
"source": {
8+
"TSVDataSource": {
9+
"field_names": ["text"],
10+
"train_filename": "pytext/tests/data/alarm_lm_tiny.tsv",
11+
"test_filename": "pytext/tests/data/alarm_lm_tiny.tsv",
12+
"eval_filename": "pytext/tests/data/alarm_lm_tiny.tsv"
13+
}
1114
}
1215
}
1316
}
@@ -16,12 +19,15 @@
1619
},
1720
"adapted": {
1821
"task": {
19-
"LMTask_Deprecated": {
20-
"data_handler": {
21-
"LanguageModelDataHandler": {
22-
"train_path": "tests/data/train_data_tiny.tsv",
23-
"eval_path": "tests/data/test_data_tiny.tsv",
24-
"test_path": "tests/data/test_data_tiny.tsv"
22+
"LMTask": {
23+
"data": {
24+
"source": {
25+
"TSVDataSource": {
26+
"field_names": ["text"],
27+
"train_filename": "pytext/tests/data/alarm_lm_tiny.tsv",
28+
"test_filename": "pytext/tests/data/alarm_lm_tiny.tsv",
29+
"eval_filename": "pytext/tests/data/alarm_lm_tiny.tsv"
30+
}
2531
}
2632
}
2733
}

pytext/data/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from .disjoint_multitask_data_handler import DisjointMultitaskDataHandler
1717
from .doc_classification_data_handler import DocClassificationDataHandler, RawData
1818
from .joint_data_handler import JointModelDataHandler
19-
from .language_model_data_handler import LanguageModelDataHandler
2019
from .query_document_pairwise_ranking_data_handler import (
2120
QueryDocumentPairwiseRankingDataHandler,
2221
)
@@ -40,7 +39,6 @@
4039
"EvalBatchSampler",
4140
"generator_iterator",
4241
"JointModelDataHandler",
43-
"LanguageModelDataHandler",
4442
"PoolingBatcher",
4543
"RandomizedBatchSampler",
4644
"QueryDocumentPairwiseRankingDataHandler",

pytext/data/language_model_data_handler.py

Lines changed: 0 additions & 155 deletions
This file was deleted.

0 commit comments

Comments
 (0)