Skip to content
This repository was archived by the owner on Nov 22, 2022. It is now read-only.

Commit 1e66ac4

Browse files
Titousenseifacebook-github-bot
authored andcommitted
remove LMTask_Deprecated and LMLSTM_Deprecated (#882)
Summary: Pull Request resolved: #882 remove LMTask_Deprecated and LMLSTM_Deprecated and fix corresponding config adapters. The language model data handler is not needed anymore because it's replaced with tensorizers. Reviewed By: chenyangyu1988 Differential Revision: D16695501 fbshipit-source-id: 3cd41f4971f81a66f2f05dc9d05447335240a29b
1 parent a170dd4 commit 1e66ac4

File tree

10 files changed

+156
-425
lines changed

10 files changed

+156
-425
lines changed

pytext/builtin_task.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
EnsembleTask,
2020
IntentSlotTask,
2121
LMTask,
22-
LMTask_Deprecated,
2322
MaskedLMTask,
2423
NewBertClassificationTask,
2524
NewBertPairClassificationTask,
@@ -77,7 +76,6 @@ def register_builtin_tasks():
7776
EnsembleTask,
7877
IntentSlotTask,
7978
LMTask,
80-
LMTask_Deprecated,
8179
MaskedLMTask,
8280
NewBertClassificationTask,
8381
NewBertPairClassificationTask,

pytext/config/config_adapter.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,69 @@ def flatten_deprecated_ensemble_config(json_config):
552552
return json_config
553553

554554

555+
@register_adapter(from_version=15)
556+
def remove_lmtask_deprecated(json_config):
557+
for section in find_dicts_containing_key(json_config, "LMTask_Deprecated"):
558+
task = section.pop("LMTask_Deprecated")
559+
560+
create_parameter(task, "data.source", {"TSVDataSource": {}})
561+
rename_parameter(task, "data_handler.eval_path", "data.source.eval_filename")
562+
rename_parameter(task, "data_handler.test_path", "data.source.test_filename")
563+
rename_parameter(task, "data_handler.train_path", "data.source.train_filename")
564+
create_parameter(task, "data.source.field_names", ["text"])
565+
566+
rename_parameter(
567+
task, "data_handler.append_bos", "model.inputs.tokens.add_bos_token"
568+
)
569+
rename_parameter(
570+
task, "data_handler.append_eos", "model.inputs.tokens.add_eos_token"
571+
)
572+
573+
rename_parameter(
574+
task, "features.shared_module_key", "model.embedding.shared_module_key"
575+
)
576+
rename_parameter(
577+
task, "features.word_feat.embed_dim", "model.embedding.embed_dim"
578+
)
579+
580+
create_parameter(task, "data.batcher", {"PoolingBatcher": {}})
581+
rename_parameter(
582+
task, "data_handler.eval_batch_size", "data.batcher.eval_batch_size"
583+
)
584+
rename_parameter(
585+
task, "data_handler.test_batch_size", "data.batcher.test_batch_size"
586+
)
587+
rename_parameter(
588+
task, "data_handler.train_batch_size", "data.batcher.train_batch_size"
589+
)
590+
591+
rename_parameter(
592+
task,
593+
"features.word_feat.vocab_size",
594+
"model.inputs.tokens.vocab.size_from_data",
595+
)
596+
rename_parameter(
597+
task,
598+
"features.word_feat.vocab_from_train_data",
599+
"model.inputs.tokens.vocab.build_from_data",
600+
)
601+
602+
rename_parameter(
603+
task,
604+
"features.word_feat.vocab_file",
605+
"model.inputs.tokens.vocab.vocab_files",
606+
lambda x: [{"filepath": x}],
607+
)
608+
609+
delete_parameter(task, "data_handler")
610+
delete_parameter(task, "features")
611+
delete_parameter(task, "featurizer")
612+
613+
section["LMTask"] = task
614+
615+
return json_config
616+
617+
555618
def upgrade_one_version(json_config):
556619
current_version = json_config.get("version", 0)
557620
adapter = ADAPTERS.get(current_version)

pytext/config/pytext_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,4 +141,4 @@ class TestConfig(ConfigBase):
141141
test_out_path: str = ""
142142

143143

144-
LATEST_VERSION = 15
144+
LATEST_VERSION = 16
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
[
2+
{
3+
"original": {
4+
"task": {
5+
"LMTask_Deprecated": {
6+
"features": {
7+
"shared_module_key": "SHARED_EMBEDDING",
8+
"word_feat": {
9+
"vocab_file": "base_dir/SST-2/sst_vocab.txt",
10+
"vocab_size": 15000,
11+
"vocab_from_train_data": false
12+
}
13+
},
14+
"data_handler": {
15+
"LanguageModelDataHandler": {
16+
"train_path": "base_dir/wikitext2/train.txt",
17+
"eval_path": "base_dir/wikitext2/valid.txt",
18+
"test_path": "base_dir/wikitext2/test.txt",
19+
"train_batch_size": 16,
20+
"eval_batch_size": 16,
21+
"test_batch_size": 16
22+
}
23+
},
24+
"model": {
25+
"representation": {
26+
"shared_module_key": "SHARED_LSTM"
27+
},
28+
"decoder": {
29+
"hidden_dims": [
30+
10
31+
]
32+
}
33+
}
34+
}
35+
},
36+
"version": 15
37+
},
38+
"adapted": {
39+
"task": {
40+
"LMTask": {
41+
"data": {
42+
"source": {
43+
"TSVDataSource": {
44+
"field_names": ["text"],
45+
"train_filename": "base_dir/wikitext2/train.txt",
46+
"test_filename": "base_dir/wikitext2/test.txt",
47+
"eval_filename": "base_dir/wikitext2/valid.txt"
48+
}
49+
},
50+
"batcher": {
51+
"PoolingBatcher": {
52+
"eval_batch_size": 16,
53+
"test_batch_size": 16,
54+
"train_batch_size": 16
55+
}
56+
}
57+
},
58+
"model": {
59+
"embedding": {
60+
"shared_module_key": "SHARED_EMBEDDING"
61+
},
62+
"inputs": {
63+
"tokens": {
64+
"vocab": {
65+
"build_from_data": false,
66+
"size_from_data": 15000,
67+
"vocab_files": [
68+
{
69+
"filepath": "base_dir/SST-2/sst_vocab.txt"
70+
}
71+
]
72+
}
73+
}
74+
},
75+
"representation": {
76+
"shared_module_key": "SHARED_LSTM"
77+
},
78+
"decoder": {
79+
"hidden_dims": [
80+
10
81+
]
82+
}
83+
}
84+
}
85+
},
86+
"version": 16
87+
}
88+
}
89+
]
90+
91+

pytext/data/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from .disjoint_multitask_data_handler import DisjointMultitaskDataHandler
1616
from .doc_classification_data_handler import DocClassificationDataHandler, RawData
1717
from .joint_data_handler import JointModelDataHandler
18-
from .language_model_data_handler import LanguageModelDataHandler
1918
from .query_document_pairwise_ranking_data_handler import (
2019
QueryDocumentPairwiseRankingDataHandler,
2120
)
@@ -38,7 +37,6 @@
3837
"EvalBatchSampler",
3938
"generator_iterator",
4039
"JointModelDataHandler",
41-
"LanguageModelDataHandler",
4240
"PoolingBatcher",
4341
"RandomizedBatchSampler",
4442
"QueryDocumentPairwiseRankingDataHandler",

pytext/data/language_model_data_handler.py

Lines changed: 0 additions & 155 deletions
This file was deleted.

0 commit comments

Comments
 (0)