implemented causal convolutions for deep cnn representation

shreydesai · facebook-github-bot · commit 694941fc52a7 · 2019-07-20T16:15:26.000-07:00
Differential Revision: D16403554

fbshipit-source-id: 85ffe203f12cee7fb113c821673e7a9529a81623
diff --git a/pytext/config/module_config.py b/pytext/config/module_config.py
@@ -26,6 +26,8 @@ class CNNParams(ConfigBase):
     weight_norm: bool = False
     # Enables dilated convolutions
     dilated: bool = False
+    # Enables causal convolutions
+    causal: bool = False
 
 
 class PoolingType(Enum):
diff --git a/pytext/models/representations/deepcnn.py b/pytext/models/representations/deepcnn.py
@@ -8,6 +8,22 @@
 from pytext.models.representations.representation_base import RepresentationBase
 
 
+class Trim1d(nn.Module):
+    """
+    Trims a 1d convolutional output. Used to implement history-padding
+    by removing excess padding from the right.
+
+    """
+
+    def __init__(self, trim):
+        super(Trim1d, self).__init__()
+
+        self.trim = trim
+
+    def forward(self, x):
+        return x[:, :, : -self.trim].contiguous()
+
+
 class DeepCNNRepresentation(RepresentationBase):
     """
     `DeepCNNRepresentation` implements CNN representation layer
@@ -32,8 +48,10 @@ def __init__(self, config: Config, embed_dim: int) -> None:
         kernel_sizes = config.cnn.kernel_sizes
         weight_norm = config.cnn.weight_norm
         dilated = config.cnn.dilated
+        causal = config.cnn.causal
 
         conv_layers = []
+        trim_layers = []
         linear_layers = []
         in_channels = embed_dim
 
@@ -48,7 +66,7 @@ def __init__(self, config: Config, embed_dim: int) -> None:
             linear_layers.append(proj)
 
             dilation = 2 ** i if dilated else 1
-            padding = (k - 1) // 2
+            padding = (k - 1) * dilation if causal else ((k - 1) // 2) * dilation
 
             single_conv = nn.Conv1d(
                 in_channels, 2 * out_channels, k, padding=padding, dilation=dilation
@@ -58,6 +76,9 @@ def __init__(self, config: Config, embed_dim: int) -> None:
             )
             conv_layers.append(single_conv)
 
+            trim = Trim1d(padding) if causal else None
+            trim_layers.append(trim)
+
             in_channels = out_channels
 
         self.convs = nn.ModuleList(conv_layers)
@@ -71,13 +92,15 @@ def forward(self, inputs: torch.Tensor, *args) -> torch.Tensor:
         inputs = self.dropout(inputs)
         # bsz * seq_len * embed_dim -> bsz * embed_dim * seq_len
         words = inputs.permute(0, 2, 1)
-        for conv, proj in zip(self.convs, self.projections):
-            if proj is None:
-                residual = words
-            else:
+        for conv, trim, proj in zip(self.convs, self.trims, self.projections):
+            if proj:
                 tranposed = words.permute(0, 2, 1)
                 residual = proj(tranposed).permute(0, 2, 1)
+            else:
+                residual = words
             words = conv(words)
+            if trim:
+                words = trim(words)
             words = self.glu(words)
             words = (words + residual) * math.sqrt(0.5)
         return words.permute(0, 2, 1)