Skip to content

Commit 627ec44

Browse files
Titousenseifacebook-github-bot
authored andcommitted
remove LMTask_Deprecated and LMLSTM_Deprecated (facebookresearch#882)
Summary: Pull Request resolved: facebookresearch#882 remove LMTask_Deprecated and LMLSTM_Deprecated and fix corresponding config adapters. The language model data handler is not needed anymore because it's replaced with tensorizers. Differential Revision: D16695501 fbshipit-source-id: 87001512b2ac8bf403deb65cb33d090a033d286e
1 parent 4ad9805 commit 627ec44

File tree

10 files changed

+160
-425
lines changed

10 files changed

+160
-425
lines changed

pytext/builtin_task.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
EnsembleTask,
2121
JointTextTask_Deprecated,
2222
LMTask,
23-
LMTask_Deprecated,
2423
MaskedLMTask,
2524
NewBertClassificationTask,
2625
NewBertPairClassificationTask,
@@ -79,7 +78,6 @@ def register_builtin_tasks():
7978
EnsembleTask,
8079
JointTextTask_Deprecated,
8180
LMTask,
82-
LMTask_Deprecated,
8381
MaskedLMTask,
8482
NewBertClassificationTask,
8583
NewBertPairClassificationTask,

pytext/config/config_adapter.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,12 +544,79 @@ def update_model_config(model_config):
544544
return json_config
545545

546546

547+
@register_adapter(from_version=14)
548+
def remove_lmtask_deprecated(json_config):
549+
for section in find_dicts_containing_key(json_config, "LMTask_Deprecated"):
550+
task = section.pop("LMTask_Deprecated")
551+
552+
create_parameter(task, "data.source", {"TSVDataSource": {}})
553+
rename_parameter(task, "data_handler.eval_path", "data.source.eval_filename")
554+
rename_parameter(task, "data_handler.test_path", "data.source.test_filename")
555+
rename_parameter(task, "data_handler.train_path", "data.source.train_filename")
556+
create_parameter(task, "data.source.field_names", ["text"])
557+
558+
rename_parameter(
559+
task, "data_handler.append_bos", "model.inputs.tokens.add_bos_token"
560+
)
561+
rename_parameter(
562+
task, "data_handler.append_eos", "model.inputs.tokens.add_eos_token"
563+
)
564+
565+
rename_parameter(
566+
task, "features.shared_module_key", "model.embedding.shared_module_key"
567+
)
568+
rename_parameter(
569+
task, "features.word_feat.embed_dim", "model.embedding.embed_dim"
570+
)
571+
572+
create_parameter(task, "data.batcher", {"PoolingBatcher": {}})
573+
rename_parameter(
574+
task, "data_handler.eval_batch_size", "data.batcher.eval_batch_size"
575+
)
576+
rename_parameter(
577+
task, "data_handler.test_batch_size", "data.batcher.test_batch_size"
578+
)
579+
rename_parameter(
580+
task, "data_handler.train_batch_size", "data.batcher.train_batch_size"
581+
)
582+
583+
rename_parameter(
584+
task,
585+
"features.word_feat.vocab_size",
586+
"model.inputs.tokens.vocab.size_from_data",
587+
)
588+
rename_parameter(
589+
task,
590+
"features.word_feat.vocab_from_train_data",
591+
"model.inputs.tokens.vocab.build_from_data",
592+
)
593+
594+
rename_parameter(
595+
task,
596+
"features.word_feat.vocab_file",
597+
"model.inputs.tokens.vocab.vocab_files",
598+
lambda x: [{"filepath": x}],
599+
)
600+
601+
delete_parameter(task, "data_handler")
602+
delete_parameter(task, "features")
603+
delete_parameter(task, "featurizer")
604+
605+
section["LMTask"] = task
606+
607+
return json_config
608+
609+
547610
def upgrade_one_version(json_config):
548611
current_version = json_config.get("version", 0)
549612
adapter = ADAPTERS.get(current_version)
550613
if not adapter:
551614
raise Exception(f"no adapter found for version {current_version}")
552615
json_config = adapter(json_config)
616+
eprint(
617+
f"WARNING - Applying old config adapter for version={current_version}. "
618+
"Please consider migrating your old configs to the latest version."
619+
)
553620
json_config["version"] = current_version + 1
554621
return json_config
555622

pytext/config/pytext_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,4 +141,4 @@ class TestConfig(ConfigBase):
141141
test_out_path: str = ""
142142

143143

144-
LATEST_VERSION = 14
144+
LATEST_VERSION = 15
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
[
2+
{
3+
"original": {
4+
"task": {
5+
"LMTask_Deprecated": {
6+
"features": {
7+
"shared_module_key": "SHARED_EMBEDDING",
8+
"word_feat": {
9+
"vocab_file": "base_dir/SST-2/sst_vocab.txt",
10+
"vocab_size": 15000,
11+
"vocab_from_train_data": false
12+
}
13+
},
14+
"data_handler": {
15+
"LanguageModelDataHandler": {
16+
"train_path": "base_dir/wikitext2/train.txt",
17+
"eval_path": "base_dir/wikitext2/valid.txt",
18+
"test_path": "base_dir/wikitext2/test.txt",
19+
"train_batch_size": 16,
20+
"eval_batch_size": 16,
21+
"test_batch_size": 16
22+
}
23+
},
24+
"model": {
25+
"representation": {
26+
"shared_module_key": "SHARED_LSTM"
27+
},
28+
"decoder": {
29+
"hidden_dims": [
30+
10
31+
]
32+
}
33+
}
34+
}
35+
},
36+
"version": 14
37+
},
38+
"adapted": {
39+
"task": {
40+
"LMTask": {
41+
"data": {
42+
"source": {
43+
"TSVDataSource": {
44+
"field_names": ["text"],
45+
"train_filename": "base_dir/wikitext2/train.txt",
46+
"test_filename": "base_dir/wikitext2/test.txt",
47+
"eval_filename": "base_dir/wikitext2/valid.txt"
48+
}
49+
},
50+
"batcher": {
51+
"PoolingBatcher": {
52+
"eval_batch_size": 16,
53+
"test_batch_size": 16,
54+
"train_batch_size": 16
55+
}
56+
}
57+
},
58+
"model": {
59+
"embedding": {
60+
"shared_module_key": "SHARED_EMBEDDING"
61+
},
62+
"inputs": {
63+
"tokens": {
64+
"vocab": {
65+
"build_from_data": false,
66+
"size_from_data": 15000,
67+
"vocab_files": [
68+
{
69+
"filepath": "base_dir/SST-2/sst_vocab.txt"
70+
}
71+
]
72+
}
73+
}
74+
},
75+
"representation": {
76+
"shared_module_key": "SHARED_LSTM"
77+
},
78+
"decoder": {
79+
"hidden_dims": [
80+
10
81+
]
82+
}
83+
}
84+
}
85+
},
86+
"version": 15
87+
}
88+
}
89+
]
90+
91+

pytext/data/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from .disjoint_multitask_data_handler import DisjointMultitaskDataHandler
1717
from .doc_classification_data_handler import DocClassificationDataHandler, RawData
1818
from .joint_data_handler import JointModelDataHandler
19-
from .language_model_data_handler import LanguageModelDataHandler
2019
from .query_document_pairwise_ranking_data_handler import (
2120
QueryDocumentPairwiseRankingDataHandler,
2221
)
@@ -40,7 +39,6 @@
4039
"EvalBatchSampler",
4140
"generator_iterator",
4241
"JointModelDataHandler",
43-
"LanguageModelDataHandler",
4442
"PoolingBatcher",
4543
"RandomizedBatchSampler",
4644
"QueryDocumentPairwiseRankingDataHandler",

pytext/data/language_model_data_handler.py

Lines changed: 0 additions & 155 deletions
This file was deleted.

0 commit comments

Comments
 (0)