Skip to content
This repository was archived by the owner on Nov 22, 2022. It is now read-only.

Commit 8aded25

Browse files
hudevenfacebook-github-bot
authored andcommitted
Back out "[PyText] remove SeqNNTask_Deprecated" (#947)
Summary: Pull Request resolved: #947 Original commit changeset: 04ec5a6458a8 Reviewed By: liaimi Differential Revision: D17176512 fbshipit-source-id: cd2c0ccd6eee6b245eb5725ee65f608ecd65e49f
1 parent 202762c commit 8aded25

File tree

8 files changed

+222
-5
lines changed

8 files changed

+222
-5
lines changed

pytext/builtin_task.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
SemanticParsingTask,
2828
SemanticParsingTask_Deprecated,
2929
SeqNNTask,
30+
SeqNNTask_Deprecated,
3031
SquadQATask,
3132
WordTaggingTask,
3233
WordTaggingTask_Deprecated,
@@ -83,6 +84,7 @@ def register_builtin_tasks():
8384
SemanticParsingTask,
8485
SemanticParsingTask_Deprecated,
8586
SeqNNTask,
87+
SeqNNTask_Deprecated,
8688
SquadQATask,
8789
WordTaggingTask,
8890
WordTaggingTask_Deprecated,

pytext/config/test/json_config/v6.json

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,31 @@
102102
},
103103
"version": 6
104104
}
105+
},
106+
{
107+
"original": {
108+
"task": {
109+
"SeqNNTask": {
110+
"data_handler": {
111+
"train_path": "tests/data/train_data_tiny.tsv",
112+
"eval_path": "tests/data/test_data_tiny.tsv",
113+
"test_path": "tests/data/test_data_tiny.tsv"
114+
}
115+
}
116+
},
117+
"version": 5
118+
},
119+
"adapted": {
120+
"task": {
121+
"SeqNNTask_Deprecated": {
122+
"data_handler": {
123+
"train_path": "tests/data/train_data_tiny.tsv",
124+
"eval_path": "tests/data/test_data_tiny.tsv",
125+
"test_path": "tests/data/test_data_tiny.tsv"
126+
}
127+
}
128+
},
129+
"version": 6
130+
}
105131
}
106132
]

pytext/data/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .disjoint_multitask_data_handler import DisjointMultitaskDataHandler
1616
from .doc_classification_data_handler import DocClassificationDataHandler, RawData
1717
from .joint_data_handler import JointModelDataHandler
18+
from .seq_data_handler import SeqModelDataHandler
1819
from .tensorizers import Tensorizer
1920

2021

@@ -37,5 +38,6 @@
3738
"RandomizedBatchSampler",
3839
"RawData",
3940
"RoundRobinBatchSampler",
41+
"SeqModelDataHandler",
4042
"Tensorizer",
4143
]

pytext/data/seq_data_handler.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
3+
4+
from typing import Any, Dict, List
5+
6+
from pytext.common.constants import DatasetFieldName, DFColumn
7+
from pytext.config import ConfigBase
8+
from pytext.config.doc_classification import ModelInput
9+
from pytext.config.field_config import DocLabelConfig, FeatureConfig
10+
from pytext.data.featurizer import InputRecord
11+
from pytext.fields import (
12+
DocLabelField,
13+
Field,
14+
FloatVectorField,
15+
RawField,
16+
SeqFeatureField,
17+
)
18+
from pytext.utils import data
19+
20+
from .joint_data_handler import JointModelDataHandler
21+
22+
23+
SEQ_LENS = "seq_lens"
24+
25+
26+
class SeqModelDataHandler(JointModelDataHandler):
27+
class Config(JointModelDataHandler.Config):
28+
columns_to_read: List[str] = [DFColumn.DOC_LABEL, DFColumn.UTTERANCE]
29+
pretrained_embeds_file: str = ""
30+
31+
FULL_FEATURES = [DatasetFieldName.TEXT_FIELD]
32+
33+
@classmethod
34+
def from_config(
35+
cls,
36+
config: Config,
37+
feature_config: FeatureConfig,
38+
label_config: DocLabelConfig,
39+
**kwargs
40+
):
41+
word_feat_config = feature_config.word_feat
42+
dense_feat_config = feature_config.dense_feat
43+
features: Dict[str, Field] = {
44+
ModelInput.WORD_FEAT: SeqFeatureField(
45+
pretrained_embeddings_path=word_feat_config.pretrained_embeddings_path,
46+
embed_dim=word_feat_config.embed_dim,
47+
embedding_init_strategy=word_feat_config.embedding_init_strategy,
48+
vocab_file=word_feat_config.vocab_file,
49+
vocab_size=word_feat_config.vocab_size,
50+
vocab_from_train_data=word_feat_config.vocab_from_train_data,
51+
)
52+
}
53+
if dense_feat_config:
54+
features[ModelInput.DENSE_FEAT] = FloatVectorField(
55+
dim=dense_feat_config.dim,
56+
dim_error_check=dense_feat_config.dim_error_check,
57+
)
58+
59+
labels: Dict[str, Field] = {DocLabelConfig._name: DocLabelField()}
60+
extra_fields: Dict[str, Field] = {DatasetFieldName.UTTERANCE_FIELD: RawField()}
61+
62+
return cls(
63+
raw_columns=config.columns_to_read,
64+
labels=labels,
65+
features=features,
66+
extra_fields=extra_fields,
67+
shuffle=config.shuffle,
68+
train_path=config.train_path,
69+
eval_path=config.eval_path,
70+
test_path=config.test_path,
71+
train_batch_size=config.train_batch_size,
72+
eval_batch_size=config.eval_batch_size,
73+
test_batch_size=config.test_batch_size,
74+
**kwargs
75+
)
76+
77+
def preprocess_row(self, row_data: Dict[str, Any]) -> Dict[str, Any]:
78+
sequence = data.parse_json_array(row_data[DFColumn.UTTERANCE])
79+
80+
features_list = [
81+
self.featurizer.featurize(InputRecord(raw_text=utterance))
82+
for utterance in sequence
83+
]
84+
res = {
85+
# features
86+
ModelInput.WORD_FEAT: [utterance.tokens for utterance in features_list],
87+
# labels
88+
DatasetFieldName.DOC_LABEL_FIELD: row_data[DFColumn.DOC_LABEL],
89+
DatasetFieldName.UTTERANCE_FIELD: row_data[DFColumn.UTTERANCE],
90+
}
91+
if DFColumn.DENSE_FEAT in row_data:
92+
res[ModelInput.DENSE_FEAT] = row_data.get(DFColumn.DENSE_FEAT)
93+
return res
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
3+
4+
import unittest
5+
6+
import numpy as np
7+
from pytext.common.constants import DFColumn
8+
from pytext.config.field_config import DocLabelConfig, FeatureConfig
9+
from pytext.data import SeqModelDataHandler
10+
from pytext.data.featurizer import SimpleFeaturizer
11+
12+
13+
class SeqModelDataHandlerTest(unittest.TestCase):
14+
def setUp(self):
15+
self.train_data = [
16+
{
17+
DFColumn.DOC_LABEL: "cu:discuss_where",
18+
DFColumn.UTTERANCE: '["where do you wanna meet?", "MPK"]',
19+
}
20+
]
21+
22+
self.eval_data = [
23+
{
24+
DFColumn.DOC_LABEL: "cu:discuss_where",
25+
DFColumn.UTTERANCE: '["how about SF?", "sounds good"]',
26+
},
27+
{DFColumn.DOC_LABEL: "cu:other", DFColumn.UTTERANCE: '["lol"]'},
28+
]
29+
30+
self.test_data = [
31+
{
32+
DFColumn.DOC_LABEL: "cu:discuss_where",
33+
DFColumn.UTTERANCE: '["MPK sounds good to me"]',
34+
},
35+
{
36+
DFColumn.DOC_LABEL: "cu:other",
37+
DFColumn.UTTERANCE: '["great", "awesome"]',
38+
},
39+
]
40+
41+
self.dh = SeqModelDataHandler.from_config(
42+
SeqModelDataHandler.Config(),
43+
FeatureConfig(),
44+
DocLabelConfig(),
45+
featurizer=SimpleFeaturizer.from_config(
46+
SimpleFeaturizer.Config(), FeatureConfig()
47+
),
48+
)
49+
50+
def test_intermediate_result(self):
51+
data = self.dh.gen_dataset(self.train_data)
52+
self.assertListEqual(
53+
data.examples[0].word_feat,
54+
[["where", "do", "you", "wanna", "meet?"], ["mpk"]],
55+
)
56+
57+
def test_process_data(self):
58+
self.dh.init_metadata_from_raw_data(
59+
self.train_data, self.eval_data, self.test_data
60+
)
61+
train_iter = self.dh.get_train_iter_from_raw_data(self.train_data, 1)
62+
for input, target, _ in train_iter:
63+
np.testing.assert_array_almost_equal(
64+
input[0][0].numpy(), [[6, 2, 7, 5, 3], [4, 1, 1, 1, 1]]
65+
)
66+
np.testing.assert_array_almost_equal(input[1].numpy(), [2])
67+
np.testing.assert_array_almost_equal(target[0].numpy(), [0])

pytext/exporters/test/text_model_exporter_test.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from pytext.builtin_task import (
1717
DocClassificationTask_Deprecated,
1818
IntentSlotTask,
19+
SeqNNTask_Deprecated,
1920
WordTaggingTask_Deprecated,
2021
)
2122
from pytext.common.constants import DatasetFieldName
@@ -663,8 +664,7 @@ def test_joint_export_to_caffe2(
663664
test_num_chars=st.integers(1, 7),
664665
test_num_seq=st.integers(1, 7),
665666
)
666-
# TODO: migrate to SeqNNTask T48066205
667-
def DISABLED_test_seq_nn_export_to_caffe2(
667+
def test_seq_nn_export_to_caffe2(
668668
self,
669669
export_num_words,
670670
export_num_dict_feat,
@@ -676,8 +676,6 @@ def DISABLED_test_seq_nn_export_to_caffe2(
676676
test_num_chars,
677677
test_num_seq,
678678
):
679-
# SeqNNTask_Deprecated is removed, set to None to fix lint
680-
SeqNNTask_Deprecated = None
681679
config = self._get_config(SeqNNTask_Deprecated.Config, SEQ_NN_CONFIG)
682680
metadata = self._get_seq_metadata(num_doc_classes, 0)
683681
py_model = create_model(config.model, config.features, metadata)

pytext/models/seq_models/seqnn.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,22 @@
1010
from pytext.models.representations.seq_rep import SeqRepresentation
1111

1212

13+
class SeqNNModel_Deprecated(Model):
14+
"""
15+
Classification model with sequence of utterances as input.
16+
It uses a docnn model (CNN or LSTM) to generate vector representation
17+
for each sequence, and then use an LSTM or BLSTM to capture the dynamics
18+
and produce labels for each sequence.
19+
20+
DEPRECATED: Use SeqNNModel
21+
"""
22+
23+
class Config(ConfigBase):
24+
representation: SeqRepresentation.Config = SeqRepresentation.Config()
25+
output_layer: ClassificationOutputLayer.Config = ClassificationOutputLayer.Config()
26+
decoder: MLPDecoder.Config = MLPDecoder.Config()
27+
28+
1329
class SeqNNModel(DocModel):
1430
"""
1531
Classification model with sequence of utterances as input.

pytext/task/tasks.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
CompositionalDataHandler,
1111
DocClassificationDataHandler,
1212
JointModelDataHandler,
13+
SeqModelDataHandler,
1314
)
1415
from pytext.data.bert_tensorizer import BERTTensorizer
1516
from pytext.data.data import Data
@@ -54,7 +55,7 @@
5455
RNNGParser_Deprecated,
5556
)
5657
from pytext.models.seq_models.contextual_intent_slot import ContextualIntentSlotModel
57-
from pytext.models.seq_models.seqnn import SeqNNModel
58+
from pytext.models.seq_models.seqnn import SeqNNModel, SeqNNModel_Deprecated
5859
from pytext.models.word_model import WordTaggingModel, WordTaggingModel_Deprecated
5960
from pytext.task import Task_Deprecated
6061
from pytext.task.new_task import NewTask
@@ -251,6 +252,18 @@ class Config(NewTask.Config):
251252
)
252253

253254

255+
class SeqNNTask_Deprecated(Task_Deprecated):
256+
class Config(Task_Deprecated.Config):
257+
model: SeqNNModel_Deprecated.Config = SeqNNModel_Deprecated.Config()
258+
trainer: Trainer.Config = Trainer.Config()
259+
labels: DocLabelConfig = DocLabelConfig()
260+
data_handler: SeqModelDataHandler.Config = SeqModelDataHandler.Config()
261+
metric_reporter: ClassificationMetricReporter.Config = (
262+
ClassificationMetricReporter.Config()
263+
)
264+
exporter: Optional[DenseFeatureExporter.Config] = None
265+
266+
254267
class SeqNNTask(NewTask):
255268
class Config(NewTask.Config):
256269
model: SeqNNModel.Config = SeqNNModel.Config()

0 commit comments

Comments
 (0)