Skip to content

Add DeepLabV3Plus segmentation #1799

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
a089a8b
Add VGG16 backbone (#1737)
divyashreepathihalli Aug 8, 2024
73b7bad
Add `ResNetBackbone` and `ResNetImageClassifier` (#1765)
james77777778 Aug 12, 2024
26afc7e
Add CSP DarkNet backbone and classifier (#1774)
sachinprasadhs Aug 15, 2024
00ab4d5
Add `FeaturePyramidBackbone` and port weights from `timm` for `ResNet…
james77777778 Aug 15, 2024
9860756
Add DenseNet (#1775)
sachinprasadhs Aug 16, 2024
ececd14
Merge remote-tracking branch 'upstream/master' into keras-hub
divyashreepathihalli Aug 16, 2024
fd6f977
Add ViTDetBackbone (#1776)
divyashreepathihalli Aug 20, 2024
fc485d6
Add Mix transformer (#1780)
sachinprasadhs Aug 20, 2024
2797851
update input_image_shape -> image_shape (#1785)
divyashreepathihalli Aug 21, 2024
18f8880
Create __init__.py (#1788)
sachinprasadhs Aug 22, 2024
2ee893c
Hack package build script to rename to keras-hub (#1793)
mattdangerw Aug 26, 2024
fdf6b6b
Add CLIP and T5XXL for StableDiffusionV3 (#1790)
james77777778 Aug 26, 2024
18dddf4
Add DeepLabV3Plus segmentation
sachinprasadhs Aug 26, 2024
744b233
init file
sachinprasadhs Aug 26, 2024
98c0811
api gen
sachinprasadhs Aug 26, 2024
b40617c
Add Segmentation base class
sachinprasadhs Aug 26, 2024
7470b84
format fix
sachinprasadhs Aug 27, 2024
68a5a62
add dependency package
sachinprasadhs Aug 27, 2024
8473170
nit
sachinprasadhs Aug 28, 2024
beae2f4
Add Bounding Box Utils (#1791)
sineeli Aug 28, 2024
9289ab7
mobilenet_v3 added in keras-nlp (#1782)
ushareng Aug 28, 2024
09f470f
Pkgoogle/efficient net migration (#1778)
pkgoogle Aug 28, 2024
be8888d
Add the ResNet_vd backbone (#1766)
gowthamkpr Aug 28, 2024
536474a
Add `VAEImageDecoder` for StableDiffusionV3 (#1796)
james77777778 Aug 28, 2024
0fbd84b
Replace `Backbone` with `keras.Model` in `CLIPTextEncoder` and `T5XXL…
james77777778 Aug 28, 2024
9143468
Add pyramid output for densenet, cspDarknet (#1801)
sachinprasadhs Sep 3, 2024
791d7f6
Add `MMDiT` for StableDiffusionV3 (#1806)
james77777778 Sep 4, 2024
339669f
Add remaining bbox utils (#1804)
sineeli Sep 4, 2024
0a978d2
Merge remote-tracking branch 'upstream/keras-hub' into segmentation
sachinprasadhs Sep 4, 2024
f31ad9c
Add Deeplabv3 and v3plus in the same backbone and segmenter
sachinprasadhs Sep 17, 2024
2d82550
Merge 'upstream/keras-hub' into segmentation
sachinprasadhs Sep 17, 2024
fc1a3a5
fix imports
sachinprasadhs Sep 17, 2024
c172031
nit
sachinprasadhs Sep 17, 2024
3b6c045
testcase changes
sachinprasadhs Sep 17, 2024
704d119
Segmeter >> ImageSegmenter
sachinprasadhs Sep 17, 2024
64050d5
resolve conflict
sachinprasadhs Sep 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions keras_nlp/api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@
from keras_nlp.src.models.deberta_v3.deberta_v3_tokenizer import (
DebertaV3Tokenizer,
)
from keras_nlp.src.models.deeplab_v3.deeplab_v3_backbone import (
DeepLabV3Backbone,
)
from keras_nlp.src.models.deeplab_v3.deeplab_v3_segmenter import (
DeepLabV3ImageSegmenter,
)
from keras_nlp.src.models.densenet.densenet_backbone import DenseNetBackbone
from keras_nlp.src.models.densenet.densenet_image_classifier import (
DenseNetImageClassifier,
Expand Down Expand Up @@ -172,6 +178,7 @@
)
from keras_nlp.src.models.gpt_neo_x.gpt_neo_x_tokenizer import GPTNeoXTokenizer
from keras_nlp.src.models.image_classifier import ImageClassifier
from keras_nlp.src.models.image_segmenter import ImageSegmenter
from keras_nlp.src.models.llama3.llama3_backbone import Llama3Backbone
from keras_nlp.src.models.llama3.llama3_causal_lm import Llama3CausalLM
from keras_nlp.src.models.llama3.llama3_causal_lm_preprocessor import (
Expand Down
13 changes: 13 additions & 0 deletions keras_nlp/src/models/deeplab_v3/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2024 The KerasNLP Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
235 changes: 235 additions & 0 deletions keras_nlp/src/models/deeplab_v3/deeplab_v3_backbone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
# Copyright 2024 The KerasNLP Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import keras

from keras_nlp.src.api_export import keras_nlp_export
from keras_nlp.src.models.backbone import Backbone
from keras_nlp.src.models.deeplab_v3.deeplab_v3_layers import (
SpatialPyramidPooling,
)


@keras_nlp_export("keras_nlp.models.DeepLabV3Backbone")
class DeepLabV3Backbone(Backbone):
"""DeepLabV3 & DeepLabV3Plus architecture for semantic segmentation.

This class implements a DeepLabV3 & DeepLabV3Plus architecture as described in
[Encoder-Decoder with Atrous Separable Convolution for Semantic Image
Segmentation](https://arxiv.org/abs/1802.02611)(ECCV 2018)
and [Rethinking Atrous Convolution for Semantic Image Segmentation](
https://arxiv.org/abs/1706.05587)(CVPR 2017)

Args:
image_encoder: `keras.Model`. The backbone network for the model that is
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indent args

used as a feature extractor for the Encoder. Should
either be a `keras_nlp.models.backbones.backbone.Backbone` or a
`keras.Model` that implements the `pyramid_outputs`
property with keys "P2", "P3" etc as values. A
somewhat sensible backbone to use in many cases is the
`keras_nlp.models.ResNetBackbone.from_preset("resnet_v2_50")`.
projection_filters: int, number of filters in the convolution layer
projecting low-level features from the `backbone`.
spatial_pyramid_pooling_key: str, layer level to extract and perform
`spatial_pyramid_pooling`, one of the key from the `backbone`
`pyramid_outputs`
property such as "P4", "P5" etc.
upsampling_size: Int, or tuple of 2 integers. The upsampling factors for
rows and columns of `spatial_pyramid_pooling` layer.
If `low_level_feature_key` is given then `spatial_pyramid_pooling`s
layer resolution should match with the `low_level_feature`s layer
resolution to concatenate both the layers for combined encoder outputs.
dilation_rates: A `list` of integers for parallel dilated conv.
Applied only when Default `SpatialPyramidPooling` is used. Usually a
sample choice of rates are [6, 12, 18].
low_level_feature_key: (Optional) str, layer level to extract the feature
from one of the key from the `backbone`s `pyramid_outputs`
property such as "P2", "P3" etc which will be the Decoder block.
Required only when the DeepLabV3Plus architecture needs to be applied.
activation: str or callable. The activation function to use on
the `Dense` layer. Set `activation=None` to return the output
logits. Defaults to `"softmax"`.
spatial_pyramid_pooling: (Optional) a `keras.layers.Layer`. Also known
as Atrous Spatial Pyramid Pooling (ASPP). Performs spatial pooling
on different spatial levels in the pyramid, with dilation. If
provided, the feature map from the backbone is passed to it inside
the DeepLabV3 Encoder, otherwise SpatialPyramidPooling layer is used.
segmentation_head: (Optional) a `keras.layers.Layer`. If provided, the
outputs of the DeepLabV3 encoder is passed to this layer and it
will be considered as the last layer before final segmentaion layer ,
otherwise a default DeepLabV3 convolutional head is used.

Example:
```python
image_encoder = keras_nlp.models.ResNetBackbone.from_preset("resnet_v2_50")

model = keras_nlp.models.DeepLabV3Backbone(
image_encoder= image_encoder,
projection_filters=48,
low_level_feature_key="P2",
spatial_pyramid_pooling_key="P5",
upsampling_size = 8,
dilation_rates = [6, 12, 18]
)
```
"""

def __init__(
self,
image_encoder,
spatial_pyramid_pooling_key,
upsampling_size,
dilation_rates,
low_level_feature_key=None,
projection_filters=48,
spatial_pyramid_pooling=None,
segmentation_head=None,
**kwargs,
):
if not isinstance(image_encoder, keras.Model):
raise ValueError(
"Argument `image_encoder` must be a `keras.Model` instance. Received instead "
f"backbone={image_encoder} (of type {type(image_encoder)})."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove backbone=, in this case we don't even no what got passed in

)
data_format = keras.config.image_data_format()
channel_axis = -1 if data_format == "channels_last" else 1
# === Functional Model ===
inputs = keras.layers.Input((None, None, 3))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we are taking in image_size generally, and defaulting it (None, None, 3), primarily so that users can pass a different number of channels.


fpn_model = keras.Model(
image_encoder.inputs, image_encoder.pyramid_outputs
)

fpn_outputs = fpn_model(inputs)

if spatial_pyramid_pooling is None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there particular reasons to do different pooling for deeplabv3 models in particular? If not, let's leave this argument out for now, and see if anyone asks for it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Atrous pooling is what makes the main difference in DeepLab architecture and it is the standard way of doing.
Only thing users might provide their own pooling might be by making variations of ASPP.
We can make it as default and remove it from config.

spatial_pyramid_pooling = SpatialPyramidPooling(
dilation_rates=dilation_rates
)
spatial_backbone_features = fpn_outputs[spatial_pyramid_pooling_key]
spp_outputs = spatial_pyramid_pooling(spatial_backbone_features)

encoder_outputs = keras.layers.UpSampling2D(
size=upsampling_size,
interpolation="bilinear",
name="encoder_output_upsampling",
data_format=data_format,
)(spp_outputs)

if low_level_feature_key:
decoder_feature = fpn_outputs[low_level_feature_key]
low_level_projected_features = apply_low_level_feature_network(
decoder_feature, projection_filters, channel_axis
)

encoder_outputs = keras.layers.Concatenate(axis=channel_axis)(
[encoder_outputs, low_level_projected_features]
)
# upsampling to the original image size
upsampling = (2 ** int(spatial_pyramid_pooling_key[-1])) // (
int(upsampling_size[0])
if isinstance(upsampling_size, tuple)
else upsampling_size
)
if segmentation_head is None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking that the segmentation head is exactly what lives in the DeepLabV3ImageSegmenter class. That way we can kill this argument. If you want a custom head, just instantiate the backbone and add your own with the functional api. WDYT?

I guess one sub-question here. If you were training from a pretrained weights, but with a set of segmentation classes than the pretrained mode, would you generally want these head weights randomly initialized, or is started with the pretrained weights important?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we do choose to leave this in the backbone, I would probably leave it unconfigurable and see anyone asks. We'd probably be better off calling it an "upscaler" as it currently stands, but the simplest options is just killing the arg.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we move it to DeepLabV3ImageSegmenter, user has to provide segmentation head weights as well if the num_classes is provided, that would make it more complicated I think.

or is started with the pretrained weights important?
It is better to have the pretrained weights.

I think as you suggested, it is better to get rid of the argument.

x = keras.layers.Conv2D(
name="segmentation_head_conv",
filters=256,
kernel_size=1,
padding="same",
use_bias=False,
data_format=data_format,
)(encoder_outputs)
x = keras.layers.BatchNormalization(
name="segmentation_head_norm", axis=channel_axis
)(x)
x = keras.layers.ReLU(name="segmentation_head_relu")(x)
x = keras.layers.UpSampling2D(
size=upsampling,
interpolation="bilinear",
data_format=data_format,
)(x)
else:
x = segmentation_head(encoder_outputs)

super().__init__(inputs=inputs, outputs=x, **kwargs)

# === Config ===
self.image_encoder = image_encoder
self.spatial_pyramid_pooling = spatial_pyramid_pooling
self.projection_filters = projection_filters
self.upsampling_size = upsampling_size
self.segmentation_head = segmentation_head
self.dilation_rates = dilation_rates
self.low_level_feature_key = low_level_feature_key
self.spatial_pyramid_pooling_key = spatial_pyramid_pooling_key

def get_config(self):
return {
"image_encoder": keras.saving.serialize_keras_object(
self.image_encoder
),
"spatial_pyramid_pooling": keras.saving.serialize_keras_object(
self.spatial_pyramid_pooling
),
"projection_filters": self.projection_filters,
"segmentation_head": keras.saving.serialize_keras_object(
self.segmentation_head
),
"dilation_rates": self.dilation_rates,
"upsampling_size": self.upsampling_size,
"low_level_feature_key": self.low_level_feature_key,
"spatial_pyramid_pooling_key": self.spatial_pyramid_pooling_key,
}

@classmethod
def from_config(cls, config):
if "image_encoder" in config and isinstance(
config["image_encoder"], dict
):
config["image_encoder"] = keras.layers.deserialize(
config["image_encoder"]
)
if "spatial_pyramid_pooling" in config and isinstance(
config["spatial_pyramid_pooling"], dict
):
config["spatial_pyramid_pooling"] = keras.layers.deserialize(
config["spatial_pyramid_pooling"]
)
if "segmentation_head" in config and isinstance(
config["segmentation_head"], dict
):
config["segmentation_head"] = keras.layers.deserialize(
config["segmentation_head"]
)
return super().from_config(config)


def apply_low_level_feature_network(
input_tensor, projection_filters, channel_axis
):
data_format = keras.config.image_data_format()
x = keras.layers.Conv2D(
name="low_level_feature_conv",
filters=projection_filters,
kernel_size=1,
padding="same",
use_bias=False,
data_format=data_format,
)(input_tensor)

x = keras.layers.BatchNormalization(
name="low_level_feature_norm", axis=channel_axis
)(x)
x = keras.layers.ReLU(name="low_level_feature_relu")(x)
return x
63 changes: 63 additions & 0 deletions keras_nlp/src/models/deeplab_v3/deeplab_v3_backbone_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2024 The KerasNLP Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pytest

from keras_nlp.src.models.deeplab_v3.deeplab_v3_backbone import (
DeepLabV3Backbone,
)
from keras_nlp.src.models.resnet.resnet_backbone import ResNetBackbone
from keras_nlp.src.tests.test_case import TestCase


class DeepLabV3Test(TestCase):
def setUp(self):
self.resnet_kwargs = {
"input_conv_filters": [64],
"input_conv_kernel_sizes": [7],
"stackwise_num_filters": [64, 64, 64],
"stackwise_num_blocks": [2, 2, 2],
"stackwise_num_strides": [1, 2, 2],
"pooling": "avg",
"block_type": "basic_block",
"use_pre_activation": False,
}
self.image_encoder = ResNetBackbone(**self.resnet_kwargs)
self.init_kwargs = {
"image_encoder": self.image_encoder,
"low_level_feature_key": "P2",
"spatial_pyramid_pooling_key": "P4",
"dilation_rates": [6, 12, 18],
"upsampling_size": 4,
}
self.input_data = np.ones((2, 96, 96, 3), dtype="float32")

def test_segmentation_basics(self):
self.run_vision_backbone_test(
cls=DeepLabV3Backbone,
init_kwargs=self.init_kwargs,
input_data=self.input_data,
expected_output_shape=(2, 96, 96, 256),
run_mixed_precision_check=False,
run_quantization_check=False,
)

@pytest.mark.large
def test_saved_model(self):
self.run_model_saving_test(
cls=DeepLabV3Backbone,
init_kwargs=self.init_kwargs,
input_data=self.input_data,
)
Loading