Migrate Wit workflows to new data design, delete WordTaggingTask_Deprecated and WordTaggingModel

geof90 · facebook-github-bot · commit c80a7247f1c8 · 2019-08-15T10:52:13.000-07:00
Summary: as title

Differential Revision: D16827032

fbshipit-source-id: 7d937a2af43d1e49e1a64bf3e4253e52fdca43da
diff --git a/pytext/builtin_task.py b/pytext/builtin_task.py
@@ -32,7 +32,6 @@
     SeqNNTask_Deprecated,
     SquadQATask,
     WordTaggingTask,
-    WordTaggingTask_Deprecated,
 )
 
 
@@ -90,7 +89,6 @@ def register_builtin_tasks():
             SeqNNTask,
             SeqNNTask_Deprecated,
             SquadQATask,
-            WordTaggingTask,
-            WordTaggingTask_Deprecated,
+            WordTaggingTask
         )
     )
diff --git a/pytext/config/config_adapter.py b/pytext/config/config_adapter.py
@@ -315,7 +315,6 @@ def old_tasks_deprecated(json_config):
     deprecate(json_config, "ShallowTaggingTask")
     deprecate(json_config, "SpanClassificationTask")
     deprecate(json_config, "TreeParserTask")
-    deprecate(json_config, "WordTaggingTask")
 
     return json_config
 
@@ -385,7 +384,6 @@ def new_tasks_rename(json_config):
         "QueryDocumentPairwiseRankingModel",
         "QueryDocumentPairwiseRankingModel_Deprecated",
     )
-    rename(json_config, "WordTaggingModel", "WordTaggingModel_Deprecated")
     # New
     rename(json_config, "NewDocModel", "DocModel")
     rename(json_config, "NewDocRegressionModel", "DocRegressionModel")
diff --git a/pytext/config/test/json_config/v6.json b/pytext/config/test/json_config/v6.json
@@ -25,85 +25,6 @@
       "version": 6
     }
   },
-  {
-    "original": {
-      "task": {
-        "WordTaggingTask": {
-          "data_handler": {
-            "train_path": "tests/data/train_data_tiny.tsv",
-            "eval_path": "tests/data/test_data_tiny.tsv",
-            "test_path": "tests/data/test_data_tiny.tsv"
-          }
-        }
-      },
-      "version": 5
-    },
-    "adapted": {
-      "task": {
-        "WordTaggingTask_Deprecated": {
-          "data_handler": {
-            "train_path": "tests/data/train_data_tiny.tsv",
-            "eval_path": "tests/data/test_data_tiny.tsv",
-            "test_path": "tests/data/test_data_tiny.tsv"
-          }
-        }
-      },
-      "version": 6
-    }
-  },
-  {
-    "original": {
-      "task": {
-        "WordTaggingTask": {
-          "data_handler": {
-            "train_path": "tests/data/train_data_tiny.tsv",
-            "eval_path": "tests/data/test_data_tiny.tsv",
-            "test_path": "tests/data/test_data_tiny.tsv"
-          }
-        }
-      },
-      "version": 5
-    },
-    "adapted": {
-      "task": {
-        "WordTaggingTask_Deprecated": {
-          "data_handler": {
-            "train_path": "tests/data/train_data_tiny.tsv",
-            "eval_path": "tests/data/test_data_tiny.tsv",
-            "test_path": "tests/data/test_data_tiny.tsv"
-          }
-        }
-      },
-      "version": 6
-    }
-  },
-  {
-    "original": {
-      "task": {
-        "WordTaggingTask": {
-          "data_handler": {
-            "train_path": "tests/data/train_data_tiny.tsv",
-            "eval_path": "tests/data/test_data_tiny.tsv",
-            "test_path": "tests/data/test_data_tiny.tsv"
-          }
-        }
-      },
-      "version": 5
-    },
-    "adapted": {
-      "task": {
-        "WordTaggingTask_Deprecated": {
-          "data_handler": {
-            "train_path": "tests/data/train_data_tiny.tsv",
-            "eval_path": "tests/data/test_data_tiny.tsv",
-            "test_path": "tests/data/test_data_tiny.tsv"
-          }
-        }
-      },
-      "version": 6
-    }
-  },
-
   {
     "original": {
       "task": {
diff --git a/pytext/exporters/test/text_model_exporter_test.py b/pytext/exporters/test/text_model_exporter_test.py
@@ -17,7 +17,7 @@
     DocClassificationTask_Deprecated,
     IntentSlotTask,
     SeqNNTask_Deprecated,
-    WordTaggingTask_Deprecated,
+    WordTaggingTask,
 )
 from pytext.common.constants import DatasetFieldName
 from pytext.config import config_from_json
@@ -225,23 +225,7 @@
     "output_layer": {
       "WordTaggingOutputLayer": {}
     }
-  },
-  "features": {
-    "dict_feat": {
-      "embed_dim": 10
-    },
-    "char_feat": {
-      "embed_dim": 5,
-      "cnn": {
-        "kernel_num": 2,
-        "kernel_sizes": [2, 3]
-      }
-    }
-  },
-  "featurizer": {
-    "SimpleFeaturizer": {}
-  },
-  "exporter": {}
+  }
 }
 """,
     """
@@ -258,17 +242,7 @@
     "output_layer": {
       "CRFOutputLayer": {}
     }
-  },
-  "features": {
-    "word_feat": {},
-    "dict_feat": {
-      "embed_dim": 10
-    }
-  },
-  "featurizer": {
-    "SimpleFeaturizer": {}
-  },
-  "exporter": {}
+  }
 }
 """,
 ]
@@ -490,11 +464,21 @@ def test_wordblstm_export_to_caffe2(
         test_num_chars,
     ):
         for WORD_CONFIG in WORD_CONFIGS:
-            config = self._get_config(WordTaggingTask_Deprecated.Config, WORD_CONFIG)
-            metadata = self._get_metadata(0, num_word_classes)
-            py_model = create_model(config.model, config.features, metadata)
-            exporter = create_exporter(
-                config.exporter, config.features, config.labels, metadata
+            config = self._get_config(WordTaggingTask.Config, WORD_CONFIG)
+            tensorizers, data = _NewTask._init_tensorizers(config)
+            word_labels = ["__UNKNOWN__", "NoLabel", "person"]
+            tensorizers["labels"].vocab = Vocabulary(word_labels)
+            tensorizers["tokens"].vocab = Vocabulary(WORD_VOCAB)
+            py_model = _NewTask._init_model(config.model, tensorizers)
+            dummy_test_input = self._get_rand_input_intent_slot(
+                BATCH_SIZE, W_VOCAB_SIZE, test_num_words
+            )
+            exporter = ModelExporter(
+                ModelExporter.Config(),
+                py_model.get_export_input_names(tensorizers),
+                dummy_test_input,
+                py_model.vocab_to_export(tensorizers),
+                py_model.get_export_output_names(tensorizers),
             )
             with tempfile.NamedTemporaryFile(
                 delete=False, suffix=".{}".format(".predictor")
@@ -503,15 +487,8 @@ def test_wordblstm_export_to_caffe2(
                 workspace.ResetWorkspace()
             pred_net = pe.prepare_prediction_net(pred_file.name, CAFFE2_DB_TYPE)
             for _i in range(num_predictions):
-                test_inputs = self._get_rand_input(
-                    config.features,
-                    BATCH_SIZE,
-                    W_VOCAB_SIZE,
-                    DICT_VOCAB_SIZE,
-                    CHAR_VOCAB_SIZE,
-                    test_num_words,
-                    test_num_dict_feat,
-                    test_num_chars,
+                test_inputs = self._get_rand_input_intent_slot(
+                    BATCH_SIZE, W_VOCAB_SIZE, test_num_words
                 )
                 self._feed_c2_input(
                     workspace,
@@ -520,16 +497,21 @@ def test_wordblstm_export_to_caffe2(
                     metadata.feature_itos_map,
                 )
                 workspace.RunNetOnce(pred_net)
-                c2_out = [list(workspace.FetchBlob(o_name)) for o_name in output_names]
+                word_output_names = [
+                    "{}:{}".format("word_scores", class_name) for class_name in word_labels
+                ]
                 py_model.eval()
                 py_outs = py_model(*test_inputs)
                 context = {SEQ_LENS: test_inputs[-1]}
                 target = None
                 pred, score = py_model.get_pred(py_outs, target, context)
+                c2_word_out = []
+                for o_name in word_output_names:
+                    c2_word_out.extend(list(workspace.FetchBlob(o_name)))
 
                 np.testing.assert_array_almost_equal(
                     torch.transpose(score, 1, 2).contiguous().view(-1).detach().numpy(),
-                    np.array(c2_out).flatten(),
+                    np.array(c2_word_out).flatten(),
                 )
 
     def _get_rand_input_intent_slot(
diff --git a/pytext/models/word_model.py b/pytext/models/word_model.py
@@ -21,7 +21,7 @@
 from pytext.models.representations.pass_through import PassThroughRepresentation
 
 
-class WordTaggingModel_Deprecated(Model):
+class WordTaggingModel(Model):
     """
     Word tagging model. It can be used for any task that requires predicting the
     tag for a word/token. For example, the following tasks can be modeled as word
@@ -33,31 +33,6 @@ class WordTaggingModel_Deprecated(Model):
     It can be instantiated just like any other :class:`~Model`.
     """
 
-    class Config(Model.Config):
-        representation: Union[
-            BiLSTMSlotAttention.Config,
-            BSeqCNNRepresentation.Config,
-            PassThroughRepresentation.Config,
-            DeepCNNRepresentation.Config,
-        ] = BiLSTMSlotAttention.Config()
-        output_layer: Union[
-            WordTaggingOutputLayer.Config, CRFOutputLayer.Config
-        ] = WordTaggingOutputLayer.Config()
-        decoder: MLPDecoder.Config = MLPDecoder.Config()
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        # CRF module has parameters and it's forward function is not called in
-        # model's forward function because of ONNX compatibility issue. This will
-        # not work with DDP, thus setting find_unused_parameters to False to work
-        # around, can be removed once DDP support params not used in model forward
-        # function
-        if isinstance(self.output_layer, CRFOutputLayer):
-            self.find_unused_parameters = False
-
-
-class WordTaggingModel(Model):
-
     __EXPANSIBLE__ = True
 
     class Config(Model.Config):
@@ -72,6 +47,7 @@ class ModelInput(Model.Config.ModelInput):
             BiLSTMSlotAttention.Config,  # TODO: make default when sorting solved
             BSeqCNNRepresentation.Config,
             PassThroughRepresentation.Config,
+            DeepCNNRepresentation.Config,
         ] = PassThroughRepresentation.Config()
         output_layer: Union[
             WordTaggingOutputLayer.Config, CRFOutputLayer.Config
diff --git a/pytext/task/tasks.py b/pytext/task/tasks.py

Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,6 @@`
`32`	`32`	`SeqNNTask_Deprecated,`
`33`	`33`	`SquadQATask,`
`34`	`34`	`WordTaggingTask,`
`35`		`- WordTaggingTask_Deprecated,`
`36`	`35`	`)`
`37`	`36`
`38`	`37`
`@@ -90,7 +89,6 @@ def register_builtin_tasks():`
`90`	`89`	`SeqNNTask,`
`91`	`90`	`SeqNNTask_Deprecated,`
`92`	`91`	`SquadQATask,`
`93`		`- WordTaggingTask,`
`94`		`- WordTaggingTask_Deprecated,`
	`92`	`+ WordTaggingTask`
`95`	`93`	`)`
`96`	`94`	`)`