Skip to content
This repository was archived by the owner on Nov 22, 2022. It is now read-only.

Commit c80a724

Browse files
geof90facebook-github-bot
authored andcommitted
Migrate Wit workflows to new data design, delete WordTaggingTask_Deprecated and WordTaggingModel
Summary: as title Differential Revision: D16827032 fbshipit-source-id: 7d937a2af43d1e49e1a64bf3e4253e52fdca43da
1 parent 49067b7 commit c80a724

File tree

6 files changed

+50
-198
lines changed

6 files changed

+50
-198
lines changed

pytext/builtin_task.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
SeqNNTask_Deprecated,
3333
SquadQATask,
3434
WordTaggingTask,
35-
WordTaggingTask_Deprecated,
3635
)
3736

3837

@@ -90,7 +89,6 @@ def register_builtin_tasks():
9089
SeqNNTask,
9190
SeqNNTask_Deprecated,
9291
SquadQATask,
93-
WordTaggingTask,
94-
WordTaggingTask_Deprecated,
92+
WordTaggingTask
9593
)
9694
)

pytext/config/config_adapter.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,6 @@ def old_tasks_deprecated(json_config):
315315
deprecate(json_config, "ShallowTaggingTask")
316316
deprecate(json_config, "SpanClassificationTask")
317317
deprecate(json_config, "TreeParserTask")
318-
deprecate(json_config, "WordTaggingTask")
319318

320319
return json_config
321320

@@ -385,7 +384,6 @@ def new_tasks_rename(json_config):
385384
"QueryDocumentPairwiseRankingModel",
386385
"QueryDocumentPairwiseRankingModel_Deprecated",
387386
)
388-
rename(json_config, "WordTaggingModel", "WordTaggingModel_Deprecated")
389387
# New
390388
rename(json_config, "NewDocModel", "DocModel")
391389
rename(json_config, "NewDocRegressionModel", "DocRegressionModel")

pytext/config/test/json_config/v6.json

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -25,85 +25,6 @@
2525
"version": 6
2626
}
2727
},
28-
{
29-
"original": {
30-
"task": {
31-
"WordTaggingTask": {
32-
"data_handler": {
33-
"train_path": "tests/data/train_data_tiny.tsv",
34-
"eval_path": "tests/data/test_data_tiny.tsv",
35-
"test_path": "tests/data/test_data_tiny.tsv"
36-
}
37-
}
38-
},
39-
"version": 5
40-
},
41-
"adapted": {
42-
"task": {
43-
"WordTaggingTask_Deprecated": {
44-
"data_handler": {
45-
"train_path": "tests/data/train_data_tiny.tsv",
46-
"eval_path": "tests/data/test_data_tiny.tsv",
47-
"test_path": "tests/data/test_data_tiny.tsv"
48-
}
49-
}
50-
},
51-
"version": 6
52-
}
53-
},
54-
{
55-
"original": {
56-
"task": {
57-
"WordTaggingTask": {
58-
"data_handler": {
59-
"train_path": "tests/data/train_data_tiny.tsv",
60-
"eval_path": "tests/data/test_data_tiny.tsv",
61-
"test_path": "tests/data/test_data_tiny.tsv"
62-
}
63-
}
64-
},
65-
"version": 5
66-
},
67-
"adapted": {
68-
"task": {
69-
"WordTaggingTask_Deprecated": {
70-
"data_handler": {
71-
"train_path": "tests/data/train_data_tiny.tsv",
72-
"eval_path": "tests/data/test_data_tiny.tsv",
73-
"test_path": "tests/data/test_data_tiny.tsv"
74-
}
75-
}
76-
},
77-
"version": 6
78-
}
79-
},
80-
{
81-
"original": {
82-
"task": {
83-
"WordTaggingTask": {
84-
"data_handler": {
85-
"train_path": "tests/data/train_data_tiny.tsv",
86-
"eval_path": "tests/data/test_data_tiny.tsv",
87-
"test_path": "tests/data/test_data_tiny.tsv"
88-
}
89-
}
90-
},
91-
"version": 5
92-
},
93-
"adapted": {
94-
"task": {
95-
"WordTaggingTask_Deprecated": {
96-
"data_handler": {
97-
"train_path": "tests/data/train_data_tiny.tsv",
98-
"eval_path": "tests/data/test_data_tiny.tsv",
99-
"test_path": "tests/data/test_data_tiny.tsv"
100-
}
101-
}
102-
},
103-
"version": 6
104-
}
105-
},
106-
10728
{
10829
"original": {
10930
"task": {

pytext/exporters/test/text_model_exporter_test.py

Lines changed: 27 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
DocClassificationTask_Deprecated,
1818
IntentSlotTask,
1919
SeqNNTask_Deprecated,
20-
WordTaggingTask_Deprecated,
20+
WordTaggingTask,
2121
)
2222
from pytext.common.constants import DatasetFieldName
2323
from pytext.config import config_from_json
@@ -225,23 +225,7 @@
225225
"output_layer": {
226226
"WordTaggingOutputLayer": {}
227227
}
228-
},
229-
"features": {
230-
"dict_feat": {
231-
"embed_dim": 10
232-
},
233-
"char_feat": {
234-
"embed_dim": 5,
235-
"cnn": {
236-
"kernel_num": 2,
237-
"kernel_sizes": [2, 3]
238-
}
239-
}
240-
},
241-
"featurizer": {
242-
"SimpleFeaturizer": {}
243-
},
244-
"exporter": {}
228+
}
245229
}
246230
""",
247231
"""
@@ -258,17 +242,7 @@
258242
"output_layer": {
259243
"CRFOutputLayer": {}
260244
}
261-
},
262-
"features": {
263-
"word_feat": {},
264-
"dict_feat": {
265-
"embed_dim": 10
266-
}
267-
},
268-
"featurizer": {
269-
"SimpleFeaturizer": {}
270-
},
271-
"exporter": {}
245+
}
272246
}
273247
""",
274248
]
@@ -490,11 +464,21 @@ def test_wordblstm_export_to_caffe2(
490464
test_num_chars,
491465
):
492466
for WORD_CONFIG in WORD_CONFIGS:
493-
config = self._get_config(WordTaggingTask_Deprecated.Config, WORD_CONFIG)
494-
metadata = self._get_metadata(0, num_word_classes)
495-
py_model = create_model(config.model, config.features, metadata)
496-
exporter = create_exporter(
497-
config.exporter, config.features, config.labels, metadata
467+
config = self._get_config(WordTaggingTask.Config, WORD_CONFIG)
468+
tensorizers, data = _NewTask._init_tensorizers(config)
469+
word_labels = ["__UNKNOWN__", "NoLabel", "person"]
470+
tensorizers["labels"].vocab = Vocabulary(word_labels)
471+
tensorizers["tokens"].vocab = Vocabulary(WORD_VOCAB)
472+
py_model = _NewTask._init_model(config.model, tensorizers)
473+
dummy_test_input = self._get_rand_input_intent_slot(
474+
BATCH_SIZE, W_VOCAB_SIZE, test_num_words
475+
)
476+
exporter = ModelExporter(
477+
ModelExporter.Config(),
478+
py_model.get_export_input_names(tensorizers),
479+
dummy_test_input,
480+
py_model.vocab_to_export(tensorizers),
481+
py_model.get_export_output_names(tensorizers),
498482
)
499483
with tempfile.NamedTemporaryFile(
500484
delete=False, suffix=".{}".format(".predictor")
@@ -503,15 +487,8 @@ def test_wordblstm_export_to_caffe2(
503487
workspace.ResetWorkspace()
504488
pred_net = pe.prepare_prediction_net(pred_file.name, CAFFE2_DB_TYPE)
505489
for _i in range(num_predictions):
506-
test_inputs = self._get_rand_input(
507-
config.features,
508-
BATCH_SIZE,
509-
W_VOCAB_SIZE,
510-
DICT_VOCAB_SIZE,
511-
CHAR_VOCAB_SIZE,
512-
test_num_words,
513-
test_num_dict_feat,
514-
test_num_chars,
490+
test_inputs = self._get_rand_input_intent_slot(
491+
BATCH_SIZE, W_VOCAB_SIZE, test_num_words
515492
)
516493
self._feed_c2_input(
517494
workspace,
@@ -520,16 +497,21 @@ def test_wordblstm_export_to_caffe2(
520497
metadata.feature_itos_map,
521498
)
522499
workspace.RunNetOnce(pred_net)
523-
c2_out = [list(workspace.FetchBlob(o_name)) for o_name in output_names]
500+
word_output_names = [
501+
"{}:{}".format("word_scores", class_name) for class_name in word_labels
502+
]
524503
py_model.eval()
525504
py_outs = py_model(*test_inputs)
526505
context = {SEQ_LENS: test_inputs[-1]}
527506
target = None
528507
pred, score = py_model.get_pred(py_outs, target, context)
508+
c2_word_out = []
509+
for o_name in word_output_names:
510+
c2_word_out.extend(list(workspace.FetchBlob(o_name)))
529511

530512
np.testing.assert_array_almost_equal(
531513
torch.transpose(score, 1, 2).contiguous().view(-1).detach().numpy(),
532-
np.array(c2_out).flatten(),
514+
np.array(c2_word_out).flatten(),
533515
)
534516

535517
def _get_rand_input_intent_slot(

pytext/models/word_model.py

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from pytext.models.representations.pass_through import PassThroughRepresentation
2222

2323

24-
class WordTaggingModel_Deprecated(Model):
24+
class WordTaggingModel(Model):
2525
"""
2626
Word tagging model. It can be used for any task that requires predicting the
2727
tag for a word/token. For example, the following tasks can be modeled as word
@@ -33,31 +33,6 @@ class WordTaggingModel_Deprecated(Model):
3333
It can be instantiated just like any other :class:`~Model`.
3434
"""
3535

36-
class Config(Model.Config):
37-
representation: Union[
38-
BiLSTMSlotAttention.Config,
39-
BSeqCNNRepresentation.Config,
40-
PassThroughRepresentation.Config,
41-
DeepCNNRepresentation.Config,
42-
] = BiLSTMSlotAttention.Config()
43-
output_layer: Union[
44-
WordTaggingOutputLayer.Config, CRFOutputLayer.Config
45-
] = WordTaggingOutputLayer.Config()
46-
decoder: MLPDecoder.Config = MLPDecoder.Config()
47-
48-
def __init__(self, *args, **kwargs):
49-
super().__init__(*args, **kwargs)
50-
# CRF module has parameters and it's forward function is not called in
51-
# model's forward function because of ONNX compatibility issue. This will
52-
# not work with DDP, thus setting find_unused_parameters to False to work
53-
# around, can be removed once DDP support params not used in model forward
54-
# function
55-
if isinstance(self.output_layer, CRFOutputLayer):
56-
self.find_unused_parameters = False
57-
58-
59-
class WordTaggingModel(Model):
60-
6136
__EXPANSIBLE__ = True
6237

6338
class Config(Model.Config):
@@ -72,6 +47,7 @@ class ModelInput(Model.Config.ModelInput):
7247
BiLSTMSlotAttention.Config, # TODO: make default when sorting solved
7348
BSeqCNNRepresentation.Config,
7449
PassThroughRepresentation.Config,
50+
DeepCNNRepresentation.Config,
7551
] = PassThroughRepresentation.Config()
7652
output_layer: Union[
7753
WordTaggingOutputLayer.Config, CRFOutputLayer.Config

0 commit comments

Comments
 (0)