keras-team · divyashreepathihalli · Sep 25, 2024 · Aug 8, 2024 · Aug 12, 2024 · Aug 15, 2024
diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py
@@ -66,6 +66,8 @@
 from keras_hub.src.models.bloom.bloom_tokenizer import BloomTokenizer
 from keras_hub.src.models.causal_lm import CausalLM
 from keras_hub.src.models.causal_lm_preprocessor import CausalLMPreprocessor
+from keras_hub.src.models.clip.clip_preprocessor import CLIPPreprocessor
+from keras_hub.src.models.clip.clip_tokenizer import CLIPTokenizer
 from keras_hub.src.models.csp_darknet.csp_darknet_backbone import (
     CSPDarkNetBackbone,
 )
@@ -257,14 +259,25 @@
 from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer
 from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM
 from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor
+from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import (
+    StableDiffusion3Backbone,
+)
+from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_text_to_image import (
+    StableDiffusion3TextToImage,
+)
+from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_text_to_image_preprocessor import (
+    StableDiffusion3TextToImagePreprocessor,
+)
 from keras_hub.src.models.t5.t5_backbone import T5Backbone
+from keras_hub.src.models.t5.t5_preprocessor import T5Preprocessor
 from keras_hub.src.models.t5.t5_tokenizer import T5Tokenizer
 from keras_hub.src.models.task import Task
 from keras_hub.src.models.text_classifier import TextClassifier
 from keras_hub.src.models.text_classifier import TextClassifier as Classifier
 from keras_hub.src.models.text_classifier_preprocessor import (
     TextClassifierPreprocessor,
 )
+from keras_hub.src.models.text_to_image import TextToImage
 from keras_hub.src.models.vgg.vgg_backbone import VGGBackbone
 from keras_hub.src.models.vgg.vgg_image_classifier import VGGImageClassifier
 from keras_hub.src.models.vit_det.vit_det_backbone import ViTDetBackbone

diff --git a/keras_hub/api/tokenizers/__init__.py b/keras_hub/api/tokenizers/__init__.py
@@ -21,6 +21,7 @@
 from keras_hub.src.models.bart.bart_tokenizer import BartTokenizer
 from keras_hub.src.models.bert.bert_tokenizer import BertTokenizer
 from keras_hub.src.models.bloom.bloom_tokenizer import BloomTokenizer
+from keras_hub.src.models.clip.clip_tokenizer import CLIPTokenizer
 from keras_hub.src.models.deberta_v3.deberta_v3_tokenizer import (
     DebertaV3Tokenizer,
 )

diff --git a/...rc/models/stable_diffusion_v3/__init__.py → keras_hub/src/models/clip/__init__.py b/...rc/models/stable_diffusion_v3/__init__.py → keras_hub/src/models/clip/__init__.py
diff --git a/...stable_diffusion_v3/clip_encoder_block.py → ...hub/src/models/clip/clip_encoder_block.py b/...stable_diffusion_v3/clip_encoder_block.py → ...hub/src/models/clip/clip_encoder_block.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from keras import dtype_policies
 from keras import layers
 from keras import ops
 
@@ -43,7 +44,7 @@ def __init__(
             intermediate_activation = quick_gelu
 
         self.layer_norm_1 = layers.LayerNormalization(
-            epsilon=0.00001, dtype=self.dtype_policy, name="layer_norm_1"
+            epsilon=1e-5, dtype="float32", name="layer_norm_1"
         )
         self.attention = layers.MultiHeadAttention(
             num_heads,
@@ -52,7 +53,7 @@ def __init__(
             name="attention",
         )
         self.layer_norm_2 = layers.LayerNormalization(
-            epsilon=0.00001, dtype=self.dtype_policy, name="layer_norm_2"
+            epsilon=1e-5, dtype="float32", name="layer_norm_2"
         )
         self.dense_1 = layers.Dense(
             self.intermediate_dim, dtype=self.dtype_policy, name="dense_1"
@@ -67,6 +68,11 @@ def __init__(
     def build(self, input_shape):
         self.layer_norm_1.build(input_shape)
         self.attention.build(input_shape, input_shape, input_shape)
+        # Before Keras 3.2, there was no setter for `dtype_policy`. Directly
+        # assign a `DTypePolicy` instead.
+        self.attention._softmax.dtype_policy = dtype_policies.DTypePolicy(
+            "float32"
+        )
         self.layer_norm_2.build(input_shape)
         self.dense_1.build(input_shape)
         input_shape = self.dense_1.compute_output_shape(input_shape)

diff --git a/keras_hub/src/models/clip/clip_preprocessor.py b/keras_hub/src/models/clip/clip_preprocessor.py
@@ -0,0 +1,147 @@
+# Copyright 2024 The KerasHub Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import keras
+
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.layers.preprocessing.start_end_packer import StartEndPacker
+from keras_hub.src.models.clip.clip_tokenizer import CLIPTokenizer
+from keras_hub.src.models.preprocessor import Preprocessor
+from keras_hub.src.utils.tensor_utils import preprocessing_function
+
+try:
+    import tensorflow as tf
+except ImportError:
+    tf = None
+
+
+@keras_hub_export("keras_hub.models.CLIPPreprocessor")
+class CLIPPreprocessor(Preprocessor):
+    """CLIP preprocessing layer which tokenizes and packs inputs.
+
+    This preprocessing layer will do 2 things:
+
+    - Tokenize the inputs using the `tokenizer`.
+    - Construct a dictionary with keys `"token_ids"`, `"padding_mask"`.
+
+    This layer can be used directly with `tf.data.Dataset.map` to preprocess
+    string data in the `(x, y, sample_weight)` format used by
+    `keras.Model.fit`.
+
+    The call method of this layer accepts three arguments, `x`, `y`, and
+    `sample_weight`. `x` can be a python string or tensor representing a single
+    segment, a list of python strings representing a batch of single segments,
+    or a list of tensors representing multiple segments to be packed together.
+    `y` and `sample_weight` are both optional, can have any format, and will be
+    passed through unaltered.
+
+    `CLIPPreprocessor` forces the input to have only one segment, as CLIP is
+    mainly used for generation tasks. For tasks having multi-segment inputs
+    like "glue/mnli", please use a model designed for classification purposes
+    such as BERT or RoBERTa.
+
+    Args:
+        tokenizer: A `keras_hub.models.CLIPTokenizer` instance.
+        sequence_length: The length of the packed inputs.
+        add_start_token: If `True`, the preprocessor will prepend the tokenizer
+            start token to each input sequence.
+        add_end_token: If `True`, the preprocessor will append the tokenizer
+            end token to each input sequence.
+        to_lower: bool. Whether to lower the inputs.
+
+    Call arguments:
+        x: A string, `tf.Tensor` or list of python strings.
+        y: Any label data. Will be passed through unaltered.
+        sample_weight: Any label weight data. Will be passed through unaltered.
+        sequence_length: Pass to override the configured `sequence_length` of
+            the layer.
+    """
+
+    # TODO: Add example once we have a CLIP model.
+
+    tokenizer_cls = CLIPTokenizer
+
+    def __init__(
+        self,
+        tokenizer,
+        sequence_length=77,
+        add_start_token=True,
+        add_end_token=True,
+        to_lower=True,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.tokenizer = tokenizer
+        self.packer = None
+        self.sequence_length = sequence_length
+        self.add_start_token = add_start_token
+        self.add_end_token = add_end_token
+        self.to_lower = to_lower
+
+    def build(self, input_shape):
+        # Defer packer creation to `build()` so that we can be sure tokenizer
+        # assets have loaded when restoring a saved model.
+        self.packer = StartEndPacker(
+            start_value=self.tokenizer.start_token_id,
+            end_value=self.tokenizer.end_token_id,
+            pad_value=self.tokenizer.end_token_id,
+            sequence_length=self.sequence_length,
+            return_padding_mask=True,
+        )
+        self.built = True
+
+    @preprocessing_function
+    def call(
+        self,
+        x,
+        y=None,
+        sample_weight=None,
+        sequence_length=None,
+    ):
+        sequence_length = sequence_length or self.sequence_length
+        if self.to_lower:
+            x = tf.strings.lower(x)
+        token_ids, padding_mask = self.packer(
+            self.tokenizer(x),
+            sequence_length=sequence_length,
+            add_start_value=self.add_start_token,
+            add_end_value=self.add_end_token,
+        )
+        x = {
+            "token_ids": token_ids,
+            "padding_mask": padding_mask,
+        }
+        return keras.utils.pack_x_y_sample_weight(x, y, sample_weight)
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "sequence_length": self.sequence_length,
+                "add_start_token": self.add_start_token,
+                "add_end_token": self.add_end_token,
+                "to_lower": self.to_lower,
+            }
+        )
+        return config
+
+    @property
+    def sequence_length(self):
+        """The padded length of model input sequences."""
+        return self._sequence_length
+
+    @sequence_length.setter
+    def sequence_length(self, value):
+        self._sequence_length = value
+        if self.packer is not None:
+            self.packer.sequence_length = value
diff --git a/...le_diffusion_v3/clip_preprocessor_test.py → ...src/models/clip/clip_preprocessor_test.py b/...le_diffusion_v3/clip_preprocessor_test.py → ...src/models/clip/clip_preprocessor_test.py
@@ -13,12 +13,8 @@
 # limitations under the License.
 import pytest
 
-from keras_hub.src.models.stable_diffusion_v3.clip_preprocessor import (
-    CLIPPreprocessor,
-)
-from keras_hub.src.models.stable_diffusion_v3.clip_tokenizer import (
-    CLIPTokenizer,
-)
+from keras_hub.src.models.clip.clip_preprocessor import CLIPPreprocessor
+from keras_hub.src.models.clip.clip_tokenizer import CLIPTokenizer
 from keras_hub.src.tests.test_case import TestCase
 
 
@@ -43,7 +39,7 @@ def test_preprocessor_basics(self):
             input_data=self.input_data,
             expected_output={
                 "token_ids": [[5, 1, 2, 1, 3, 4, 4, 4]],
-                "padding_mask": [[1, 1, 1, 1, 1, 0, 0, 0]],
+                "padding_mask": [[1, 1, 1, 1, 1, 1, 0, 0]],
             },
         )
 
@@ -54,17 +50,16 @@ def test_no_start_end_token(self):
             sequence_length=8,
             add_start_token=False,
             add_end_token=False,
-            pad_with_end_token=False,
         )
         x = preprocessor(input_data)
-        self.assertAllEqual(x["token_ids"], [[1, 2, 1, 3, 0, 0, 0, 0]] * 4)
+        self.assertAllEqual(x["token_ids"], [[1, 2, 1, 3, 4, 4, 4, 4]] * 4)
         self.assertAllEqual(x["padding_mask"], [[1, 1, 1, 1, 0, 0, 0, 0]] * 4)
 
     def test_sequence_length_override(self):
         input_data = " airplane airport"
         preprocessor = CLIPPreprocessor(**self.init_kwargs)
-        x = preprocessor(input_data, sequence_length=4)
-        self.assertAllEqual(x["token_ids"], [5, 1, 2, 1])
+        x = preprocessor(input_data, sequence_length=5)
+        self.assertAllEqual(x["token_ids"], [5, 1, 2, 1, 4])
 
     @pytest.mark.kaggle_key_required
     @pytest.mark.extra_large