From 716ae6496699701a2e783b2d20c7a7a7bcaf77f9 Mon Sep 17 00:00:00 2001 From: David Landup Date: Thu, 26 Sep 2024 22:04:09 +0900 Subject: [PATCH 01/66] initial commit - tf-based, kcv --- keras_hub/src/models/segformer/segformer.py | 209 +++++++++++++++ .../src/models/segformer/segformer_aliases.py | 251 ++++++++++++++++++ .../src/models/segformer/segformer_presets.py | 95 +++++++ .../src/models/segformer/segformer_tests.py | 142 ++++++++++ 4 files changed, 697 insertions(+) create mode 100644 keras_hub/src/models/segformer/segformer.py create mode 100644 keras_hub/src/models/segformer/segformer_aliases.py create mode 100644 keras_hub/src/models/segformer/segformer_presets.py create mode 100644 keras_hub/src/models/segformer/segformer_tests.py diff --git a/keras_hub/src/models/segformer/segformer.py b/keras_hub/src/models/segformer/segformer.py new file mode 100644 index 0000000000..71d7241a71 --- /dev/null +++ b/keras_hub/src/models/segformer/segformer.py @@ -0,0 +1,209 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +from keras_cv.src.api_export import keras_cv_export +from keras_cv.src.backend import keras +from keras_cv.src.models import MiTBackbone +from keras_cv.src.models.segmentation.segformer.segformer_presets import ( # noqa: E501 + presets, +) +from keras_cv.src.models.segmentation.segformer.segformer_presets import ( # noqa: E501 + presets_with_weights, +) +from keras_cv.src.models.task import Task +from keras_cv.src.utils.python_utils import classproperty +from keras_cv.src.utils.train import get_feature_extractor + + +@keras_cv_export( + ["keras_cv.models.SegFormer", "keras_cv.models.segmentation.SegFormer"] +) +class SegFormer(Task): + """A Keras model implementing the SegFormer architecture for semantic + segmentation. + + References: + - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) # noqa: E501 + - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer) # noqa: E501 + + Args: + backbone: `keras.Model`. The backbone network for the model that is + used as a feature extractor for the SegFormer encoder. + It is *intended* to be used only with the MiT backbone model which + was created specifically for SegFormers. It should either be a + `keras_cv.models.backbones.backbone.Backbone` or a `tf.keras.Model` + that implements the `pyramid_level_inputs` property with keys + "P2", "P3", "P4", and "P5" and layer names as + values. + num_classes: int, the number of classes for the detection model, + including the background class. + projection_filters: int, number of filters in the + convolution layer projecting the concatenated features into + a segmentation map. Defaults to 256`. + + Example: + + Using the class with a `backbone`: + + ```python + import tensorflow as tf + import keras_cv + + images = np.ones(shape=(1, 96, 96, 3)) + labels = np.zeros(shape=(1, 96, 96, 1)) + backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet") + model = keras_cv.models.segmentation.SegFormer( + num_classes=1, backbone=backbone, + ) + + # Evaluate model + model(images) + + # Train model + model.compile( + optimizer="adam", + loss=keras.losses.BinaryCrossentropy(from_logits=False), + metrics=["accuracy"], + ) + model.fit(images, labels, epochs=3) + ``` + """ + + def __init__( + self, + backbone, + num_classes, + projection_filters=256, + **kwargs, + ): + if not isinstance(backbone, keras.layers.Layer) or not isinstance( + backbone, keras.Model + ): + raise ValueError( + "Argument `backbone` must be a `keras.layers.Layer` instance " + f" or `keras.Model`. Received instead " + f"backbone={backbone} (of type {type(backbone)})." + ) + + inputs = backbone.input + + feature_extractor = get_feature_extractor( + backbone, list(backbone.pyramid_level_inputs.values()) + ) + # Multi-level dictionary + features = list(feature_extractor(inputs).values()) + + # Get H and W of level one output + _, H, W, _ = features[0].shape + # Project all multi-level outputs onto the same dimensionality + # and feature map shape + multi_layer_outs = [] + for feature_dim, feature in zip(backbone.embedding_dims, features): + out = keras.layers.Dense( + projection_filters, name=f"linear_{feature_dim}" + )(feature) + out = keras.layers.Resizing(H, W, interpolation="bilinear")(out) + multi_layer_outs.append(out) + + # Concat now-equal feature maps + concatenated_outs = keras.layers.Concatenate(axis=3)( + multi_layer_outs[::-1] + ) + + # Fuse concatenated features into a segmentation map + seg = keras.Sequential( + [ + keras.layers.Conv2D( + filters=projection_filters, kernel_size=1, use_bias=False + ), + keras.layers.BatchNormalization(), + keras.layers.Activation("relu"), + ] + )(concatenated_outs) + + seg = keras.layers.Dropout(0.1)(seg) + seg = keras.layers.Conv2D( + filters=num_classes, kernel_size=1, activation="softmax" + )(seg) + + output = keras.layers.Resizing( + height=inputs.shape[1], + width=inputs.shape[2], + interpolation="bilinear", + )(seg) + + super().__init__( + inputs=inputs, + outputs=output, + **kwargs, + ) + + self.num_classes = num_classes + self.projection_filters = projection_filters + self.backbone = backbone + + def get_config(self): + config = super().get_config() + config.update( + { + "num_classes": self.num_classes, + "projection_filters": self.projection_filters, + "backbone": keras.saving.serialize_keras_object(self.backbone), + } + ) + return config + + @classmethod + def from_preset( + cls, + preset, + num_classes, + load_weights=None, + input_shape=None, + **kwargs, + ): + aliases = { + "segformer_b0": "mit_b0", + "segformer_b1": "mit_b1", + "segformer_b2": "mit_b2", + "segformer_b3": "mit_b3", + "segformer_b4": "mit_b4", + "segformer_b5": "mit_b5", + } + if preset in aliases: + preset = aliases[preset] + return super().from_preset( + preset, + load_weights=load_weights, + num_classes=num_classes, + input_shape=input_shape, + **kwargs, + ) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return copy.deepcopy(presets) + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return copy.deepcopy(presets_with_weights) + + @classproperty + def backbone_presets(cls): + return copy.deepcopy(MiTBackbone.presets) \ No newline at end of file diff --git a/keras_hub/src/models/segformer/segformer_aliases.py b/keras_hub/src/models/segformer/segformer_aliases.py new file mode 100644 index 0000000000..00a5c32a71 --- /dev/null +++ b/keras_hub/src/models/segformer/segformer_aliases.py @@ -0,0 +1,251 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +from keras_cv.src.api_export import keras_cv_export +from keras_cv.src.models.segmentation.segformer.segformer import SegFormer +from keras_cv.src.models.segmentation.segformer.segformer_presets import presets +from keras_cv.src.utils.python_utils import classproperty + +ALIAS_DOCSTRING = """SegFormer model. + + For transfer learning use cases, make sure to read the + [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/). + + Args: + backbone: a KerasCV backbone for feature extraction. + num_classes: the number of classes for segmentation, including the background class. + + Example: + ```python + input_data = tf.ones(shape=(8, 224, 224, 3)) + + # Randomly initialized backbone + backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet") + segformer = keras_cv.models.SegFormer(backbone=backbone, num_classes=19) + output = model(input_data) + ``` +""" # noqa: E501 + + +@keras_cv_export("keras_cv.models.SegFormerB0") +class SegFormerB0(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b0", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b0": copy.deepcopy(presets["segformer_b0"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +@keras_cv_export("keras_cv.models.SegFormerB1") +class SegFormerB1(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b1", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b1": copy.deepcopy(presets["segformer_b1"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +@keras_cv_export("keras_cv.models.SegFormerB2") +class SegFormerB2(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b2", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b2": copy.deepcopy(presets["segformer_b2"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +@keras_cv_export("keras_cv.models.SegFormerB3") +class SegFormerB3(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b3", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b3": copy.deepcopy(presets["segformer_b3"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +@keras_cv_export("keras_cv.models.SegFormerB4") +class SegFormerB4(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b4", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b4": copy.deepcopy(presets["segformer_b4"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +@keras_cv_export("keras_cv.models.SegFormerB5") +class SegFormerB5(SegFormer): + def __new__( + cls, + num_classes, + **kwargs, + ): + # Pack args in kwargs + kwargs.update( + { + "num_classes": num_classes, + } + ) + return SegFormer.from_preset("segformer_b5", **kwargs) + + @classproperty + def presets(cls): + """Dictionary of preset names and configurations.""" + return { + "segformer_b5": copy.deepcopy(presets["segformer_b5"]), + } + + @classproperty + def presets_with_weights(cls): + """Dictionary of preset names and configurations that include + weights.""" + return cls.presets + + +setattr( + SegFormerB0, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB0"), +) + +setattr( + SegFormerB1, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB1"), +) + +setattr( + SegFormerB2, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB2"), +) + +setattr( + SegFormerB3, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB3"), +) + +setattr( + SegFormerB4, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB4"), +) + +setattr( + SegFormerB5, + "__doc__", + ALIAS_DOCSTRING.format(name="SegFormerB5"), +) \ No newline at end of file diff --git a/keras_hub/src/models/segformer/segformer_presets.py b/keras_hub/src/models/segformer/segformer_presets.py new file mode 100644 index 0000000000..d5bbcd22a4 --- /dev/null +++ b/keras_hub/src/models/segformer/segformer_presets.py @@ -0,0 +1,95 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SegFormer model preset configurations.""" + +from keras_cv.src.models.backbones.mix_transformer.mix_transformer_backbone_presets import ( # noqa: E501 + backbone_presets, +) + +presets_no_weights = { + "segformer_b0": { + "metadata": { + "description": ("SegFormer model with MiTB0 backbone."), + "params": 3719027, + "official_name": "SegFormerB0", + "path": "segformer_b0", + }, + "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b0/2", + }, + "segformer_b1": { + "metadata": { + "description": ("SegFormer model with MiTB1 backbone."), + "params": 13682643, + "official_name": "SegFormerB1", + "path": "segformer_b1", + }, + "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b1/2", + }, + "segformer_b2": { + "metadata": { + "description": ("SegFormer model with MiTB2 backbone."), + "params": 24727507, + "official_name": "SegFormerB2", + "path": "segformer_b2", + }, + "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b2/2", + }, + "segformer_b3": { + "metadata": { + "description": ("SegFormer model with MiTB3 backbone."), + "params": 44603347, + "official_name": "SegFormerB3", + "path": "segformer_b3", + }, + "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b3/2", + }, + "segformer_b4": { + "metadata": { + "description": ("SegFormer model with MiTB4 backbone."), + "params": 61373907, + "official_name": "SegFormerB4", + "path": "segformer_b4", + }, + "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b4/2", + }, + "segformer_b5": { + "metadata": { + "description": ("SegFormer model with MiTB5 backbone."), + "params": 81974227, + "official_name": "SegFormerB5", + "path": "segformer_b5", + }, + "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b5/2", + }, +} + +presets_with_weights = { + "segformer_b0_imagenet": { + "metadata": { + "description": ( + "SegFormer model with a pretrained MiTB0 backbone." + ), + "params": 3719027, + "official_name": "SegFormerB0", + "path": "segformer_b0", + }, + "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b0_imagenet/2", # noqa: E501 + }, +} + +presets = { + **backbone_presets, # Add MiTBackbone presets + **presets_no_weights, + **presets_with_weights, +} \ No newline at end of file diff --git a/keras_hub/src/models/segformer/segformer_tests.py b/keras_hub/src/models/segformer/segformer_tests.py new file mode 100644 index 0000000000..f481c10c40 --- /dev/null +++ b/keras_hub/src/models/segformer/segformer_tests.py @@ -0,0 +1,142 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +import pytest +import tensorflow as tf + +from keras_cv.src.backend import keras +from keras_cv.src.backend import ops +from keras_cv.src.backend.config import keras_3 +from keras_cv.src.models import MiTBackbone +from keras_cv.src.models import SegFormer +from keras_cv.src.tests.test_case import TestCase + + +class SegFormerTest(TestCase): + def test_segformer_construction(self): + backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3]) + model = SegFormer(backbone=backbone, num_classes=2) + model.compile( + optimizer="adam", + loss=keras.losses.BinaryCrossentropy(), + metrics=["accuracy"], + ) + + def test_segformer_preset_construction(self): + model = SegFormer.from_preset( + "segformer_b0", num_classes=2, input_shape=[512, 512, 3] + ) + model.compile( + optimizer="adam", + loss=keras.losses.BinaryCrossentropy(), + metrics=["accuracy"], + ) + + def test_segformer_preset_error(self): + with self.assertRaises(TypeError): + _ = SegFormer.from_preset("segformer_b0") + + @pytest.mark.large + def DISABLED_test_segformer_call(self): + # TODO: Test of output comparison Fails + backbone = MiTBackbone.from_preset("mit_b0") + mit_model = SegFormer(backbone=backbone, num_classes=2) + + images = np.random.uniform(size=(2, 224, 224, 3)) + mit_output = mit_model(images) + mit_pred = mit_model.predict(images) + + seg_model = SegFormer.from_preset("segformer_b0", num_classes=2) + seg_output = seg_model(images) + seg_pred = seg_model.predict(images) + + self.assertAllClose(mit_output, seg_output) + self.assertAllClose(mit_pred, seg_pred) + + @pytest.mark.large + def test_weights_change(self): + target_size = [512, 512, 2] + + images = tf.ones(shape=[1] + [512, 512, 3]) + labels = tf.zeros(shape=[1] + target_size) + ds = tf.data.Dataset.from_tensor_slices((images, labels)) + ds = ds.repeat(2) + ds = ds.batch(2) + + backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3]) + model = SegFormer(backbone=backbone, num_classes=2) + + model.compile( + optimizer="adam", + loss=keras.losses.BinaryCrossentropy(), + metrics=["accuracy"], + ) + + original_weights = model.get_weights() + model.fit(ds, epochs=1) + updated_weights = model.get_weights() + + for w1, w2 in zip(original_weights, updated_weights): + self.assertNotAllEqual(w1, w2) + self.assertFalse(ops.any(ops.isnan(w2))) + + @pytest.mark.large # Saving is slow, so mark these large. + def test_saved_model(self): + target_size = [512, 512, 3] + + backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3]) + model = SegFormer(backbone=backbone, num_classes=2) + + input_batch = np.ones(shape=[2] + target_size) + model_output = model(input_batch) + + save_path = os.path.join(self.get_temp_dir(), "model.keras") + if keras_3(): + model.save(save_path) + else: + model.save(save_path, save_format="keras_v3") + restored_model = keras.models.load_model(save_path) + + # Check we got the real object back. + self.assertIsInstance(restored_model, SegFormer) + + # Check that output matches. + restored_output = restored_model(input_batch) + self.assertAllClose(model_output, restored_output) + + @pytest.mark.large # Saving is slow, so mark these large. + def test_preset_saved_model(self): + target_size = [224, 224, 3] + + model = SegFormer.from_preset("segformer_b0", num_classes=2) + + input_batch = np.ones(shape=[2] + target_size) + model_output = model(input_batch) + + save_path = os.path.join(self.get_temp_dir(), "model.keras") + if keras_3(): + model.save(save_path) + else: + model.save(save_path, save_format="keras_v3") + restored_model = keras.models.load_model(save_path) + + # Check we got the real object back. + self.assertIsInstance(restored_model, SegFormer) + + # Check that output matches. + restored_output = restored_model(input_batch) + self.assertAllClose(model_output, restored_output) \ No newline at end of file From 71bd40bc3dddbb78b0860c1d4521884569b29db5 Mon Sep 17 00:00:00 2001 From: David Landup Date: Fri, 27 Sep 2024 17:35:14 +0900 Subject: [PATCH 02/66] porting to keras_hub structure - removing aliases, presets, etc. --- keras_hub/src/models/image_segmenter.py | 106 ++++++++ keras_hub/src/models/segformer/__init__.py | 20 ++ .../src/models/segformer/segformer_aliases.py | 251 ------------------ .../{segformer.py => segformer_backbone.py} | 148 +++++------ .../segformer/segformer_image_segmenter.py | 147 ++++++++++ .../src/models/segformer/segformer_presets.py | 2 +- .../src/models/segformer/segformer_tests.py | 19 +- 7 files changed, 348 insertions(+), 345 deletions(-) create mode 100644 keras_hub/src/models/image_segmenter.py create mode 100644 keras_hub/src/models/segformer/__init__.py delete mode 100644 keras_hub/src/models/segformer/segformer_aliases.py rename keras_hub/src/models/segformer/{segformer.py => segformer_backbone.py} (63%) create mode 100644 keras_hub/src/models/segformer/segformer_image_segmenter.py diff --git a/keras_hub/src/models/image_segmenter.py b/keras_hub/src/models/image_segmenter.py new file mode 100644 index 0000000000..c75776cb71 --- /dev/null +++ b/keras_hub/src/models/image_segmenter.py @@ -0,0 +1,106 @@ +# Copyright 2024 The KerasHub Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import keras + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.task import Task + + +@keras_hub_export("keras_hub.models.ImageSegmenter") +class ImageSegmenter(Task): + """Base class for all image segmentation tasks. + + `ImageSegmenter` tasks wrap a `keras_hub.models.Task` and + a `keras_hub.models.Preprocessor` to create a model that can be used for + image segmentation. + + All `ImageSegmenter` tasks include a `from_preset()` constructor which can + be used to load a pre-trained config and weights. + `ImageSegmenter` tasks take an additional + `num_classes` argument, the number of segmentation classes. + + To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` + labels where `x` is a image and `y` is a label from `[0, num_classes)`. + + Example: + ```python + model = keras_hub.models.ImageSegmenter.from_preset( + "deeplab_resnet", + num_classes=2, + ) + images = np.ones(shape=(1, 288, 288, 3)) + labels = np.zeros(shape=(1, 288, 288, 1)) + + output = model(images) + pred_labels = output[0] + + model.fit(images, labels, epochs=3) + ``` + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Default compilation. + self.compile() + + def compile( + self, + optimizer="auto", + loss="auto", + *, + metrics="auto", + **kwargs, + ): + """Configures the `ImageSegmenter` task for training. + + The `ImageSegmenter` task extends the default compilation signature of + `keras.Model.compile` with defaults for `optimizer`, `loss`, and + `metrics`. To override these defaults, pass any value + to these arguments during compilation. + + Args: + optimizer: `"auto"`, an optimizer name, or a `keras.Optimizer` + instance. Defaults to `"auto"`, which uses the default optimizer + for the given model and task. See `keras.Model.compile` and + `keras.optimizers` for more info on possible `optimizer` values. + loss: `"auto"`, a loss name, or a `keras.losses.Loss` instance. + Defaults to `"auto"`, where a + `keras.losses.SparseCategoricalCrossentropy` loss will be + applied for the classification task. See + `keras.Model.compile` and `keras.losses` for more info on + possible `loss` values. + metrics: `"auto"`, or a list of metrics to be evaluated by + the model during training and testing. Defaults to `"auto"`, + where a `keras.metrics.SparseCategoricalAccuracy` will be + applied to track the accuracy of the model during training. + See `keras.Model.compile` and `keras.metrics` for + more info on possible `metrics` values. + **kwargs: See `keras.Model.compile` for a full list of arguments + supported by the compile method. + """ + if optimizer == "auto": + optimizer = keras.optimizers.Adam(5e-5) + if loss == "auto": + activation = getattr(self, "activation", None) + activation = keras.activations.get(activation) + from_logits = activation != keras.activations.softmax + loss = keras.losses.CategoricalCrossentropy(from_logits=from_logits) + if metrics == "auto": + metrics = [keras.metrics.CategoricalAccuracy()] + super().compile( + optimizer=optimizer, + loss=loss, + metrics=metrics, + **kwargs, + ) diff --git a/keras_hub/src/models/segformer/__init__.py b/keras_hub/src/models/segformer/__init__.py new file mode 100644 index 0000000000..98a7037950 --- /dev/null +++ b/keras_hub/src/models/segformer/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2024 The KerasHub Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from keras_hub.src.models.segformer.segformer_image_segmenter import ( + SegFormerImageSegmenter, +) +from keras_hub.src.models.segformer.segformer_presets import presets +from keras_hub.src.utils.preset_utils import register_presets + +register_presets(presets, SegFormerImageSegmenter) diff --git a/keras_hub/src/models/segformer/segformer_aliases.py b/keras_hub/src/models/segformer/segformer_aliases.py deleted file mode 100644 index 00a5c32a71..0000000000 --- a/keras_hub/src/models/segformer/segformer_aliases.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright 2023 The KerasCV Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import copy - -from keras_cv.src.api_export import keras_cv_export -from keras_cv.src.models.segmentation.segformer.segformer import SegFormer -from keras_cv.src.models.segmentation.segformer.segformer_presets import presets -from keras_cv.src.utils.python_utils import classproperty - -ALIAS_DOCSTRING = """SegFormer model. - - For transfer learning use cases, make sure to read the - [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/). - - Args: - backbone: a KerasCV backbone for feature extraction. - num_classes: the number of classes for segmentation, including the background class. - - Example: - ```python - input_data = tf.ones(shape=(8, 224, 224, 3)) - - # Randomly initialized backbone - backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet") - segformer = keras_cv.models.SegFormer(backbone=backbone, num_classes=19) - output = model(input_data) - ``` -""" # noqa: E501 - - -@keras_cv_export("keras_cv.models.SegFormerB0") -class SegFormerB0(SegFormer): - def __new__( - cls, - num_classes, - **kwargs, - ): - # Pack args in kwargs - kwargs.update( - { - "num_classes": num_classes, - } - ) - return SegFormer.from_preset("segformer_b0", **kwargs) - - @classproperty - def presets(cls): - """Dictionary of preset names and configurations.""" - return { - "segformer_b0": copy.deepcopy(presets["segformer_b0"]), - } - - @classproperty - def presets_with_weights(cls): - """Dictionary of preset names and configurations that include - weights.""" - return cls.presets - - -@keras_cv_export("keras_cv.models.SegFormerB1") -class SegFormerB1(SegFormer): - def __new__( - cls, - num_classes, - **kwargs, - ): - # Pack args in kwargs - kwargs.update( - { - "num_classes": num_classes, - } - ) - return SegFormer.from_preset("segformer_b1", **kwargs) - - @classproperty - def presets(cls): - """Dictionary of preset names and configurations.""" - return { - "segformer_b1": copy.deepcopy(presets["segformer_b1"]), - } - - @classproperty - def presets_with_weights(cls): - """Dictionary of preset names and configurations that include - weights.""" - return cls.presets - - -@keras_cv_export("keras_cv.models.SegFormerB2") -class SegFormerB2(SegFormer): - def __new__( - cls, - num_classes, - **kwargs, - ): - # Pack args in kwargs - kwargs.update( - { - "num_classes": num_classes, - } - ) - return SegFormer.from_preset("segformer_b2", **kwargs) - - @classproperty - def presets(cls): - """Dictionary of preset names and configurations.""" - return { - "segformer_b2": copy.deepcopy(presets["segformer_b2"]), - } - - @classproperty - def presets_with_weights(cls): - """Dictionary of preset names and configurations that include - weights.""" - return cls.presets - - -@keras_cv_export("keras_cv.models.SegFormerB3") -class SegFormerB3(SegFormer): - def __new__( - cls, - num_classes, - **kwargs, - ): - # Pack args in kwargs - kwargs.update( - { - "num_classes": num_classes, - } - ) - return SegFormer.from_preset("segformer_b3", **kwargs) - - @classproperty - def presets(cls): - """Dictionary of preset names and configurations.""" - return { - "segformer_b3": copy.deepcopy(presets["segformer_b3"]), - } - - @classproperty - def presets_with_weights(cls): - """Dictionary of preset names and configurations that include - weights.""" - return cls.presets - - -@keras_cv_export("keras_cv.models.SegFormerB4") -class SegFormerB4(SegFormer): - def __new__( - cls, - num_classes, - **kwargs, - ): - # Pack args in kwargs - kwargs.update( - { - "num_classes": num_classes, - } - ) - return SegFormer.from_preset("segformer_b4", **kwargs) - - @classproperty - def presets(cls): - """Dictionary of preset names and configurations.""" - return { - "segformer_b4": copy.deepcopy(presets["segformer_b4"]), - } - - @classproperty - def presets_with_weights(cls): - """Dictionary of preset names and configurations that include - weights.""" - return cls.presets - - -@keras_cv_export("keras_cv.models.SegFormerB5") -class SegFormerB5(SegFormer): - def __new__( - cls, - num_classes, - **kwargs, - ): - # Pack args in kwargs - kwargs.update( - { - "num_classes": num_classes, - } - ) - return SegFormer.from_preset("segformer_b5", **kwargs) - - @classproperty - def presets(cls): - """Dictionary of preset names and configurations.""" - return { - "segformer_b5": copy.deepcopy(presets["segformer_b5"]), - } - - @classproperty - def presets_with_weights(cls): - """Dictionary of preset names and configurations that include - weights.""" - return cls.presets - - -setattr( - SegFormerB0, - "__doc__", - ALIAS_DOCSTRING.format(name="SegFormerB0"), -) - -setattr( - SegFormerB1, - "__doc__", - ALIAS_DOCSTRING.format(name="SegFormerB1"), -) - -setattr( - SegFormerB2, - "__doc__", - ALIAS_DOCSTRING.format(name="SegFormerB2"), -) - -setattr( - SegFormerB3, - "__doc__", - ALIAS_DOCSTRING.format(name="SegFormerB3"), -) - -setattr( - SegFormerB4, - "__doc__", - ALIAS_DOCSTRING.format(name="SegFormerB4"), -) - -setattr( - SegFormerB5, - "__doc__", - ALIAS_DOCSTRING.format(name="SegFormerB5"), -) \ No newline at end of file diff --git a/keras_hub/src/models/segformer/segformer.py b/keras_hub/src/models/segformer/segformer_backbone.py similarity index 63% rename from keras_hub/src/models/segformer/segformer.py rename to keras_hub/src/models/segformer/segformer_backbone.py index 71d7241a71..f8627c5442 100644 --- a/keras_hub/src/models/segformer/segformer.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -1,4 +1,4 @@ -# Copyright 2023 The KerasCV Authors +# Copyright 2024 The KerasHub Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,27 +11,38 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -import copy +import keras -from keras_cv.src.api_export import keras_cv_export -from keras_cv.src.backend import keras -from keras_cv.src.models import MiTBackbone -from keras_cv.src.models.segmentation.segformer.segformer_presets import ( # noqa: E501 - presets, -) -from keras_cv.src.models.segmentation.segformer.segformer_presets import ( # noqa: E501 - presets_with_weights, +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.backbone import Backbone +from keras_hub.src.models.mix_transformer.mix_transformer_backbone import ( + MiTBackbone, ) -from keras_cv.src.models.task import Task -from keras_cv.src.utils.python_utils import classproperty -from keras_cv.src.utils.train import get_feature_extractor +from keras_hub.src.models.segformer.segformer_presets import presets + +# from keras_cv.src.utils.python_utils import classproperty +# from keras_cv.src.utils.train import get_feature_extractor -@keras_cv_export( - ["keras_cv.models.SegFormer", "keras_cv.models.segmentation.SegFormer"] +@keras_hub_export( + [ + "keras_hub_export.models.SegFormer", + "keras_hub_export.models.segmentation.SegFormer", + ] ) -class SegFormer(Task): +class SegFormerBackbone(Backbone): """A Keras model implementing the SegFormer architecture for semantic segmentation. @@ -82,6 +93,8 @@ class SegFormer(Task): ``` """ + backbone_cls = MiTBackbone + def __init__( self, backbone, @@ -100,6 +113,30 @@ def __init__( inputs = backbone.input + # === Layers === + + self.mlp_blocks = [] + + for feature_dim, feature in zip(backbone.embedding_dims, features): + self.mlp_blocks.append( + keras.layers.Dense( + projection_filters, name=f"linear_{feature_dim}" + ) + ) + + self.resizing = keras.layers.Resizing(H, W, interpolation="bilinear") + self.concat = keras.layers.Concatenate(axis=3) + self.segmentation = keras.Sequential( + [ + keras.layers.Conv2D( + filters=projection_filters, kernel_size=1, use_bias=False + ), + keras.layers.BatchNormalization(), + keras.layers.Activation("relu"), + ] + ) + + # === Functional Model === feature_extractor = get_feature_extractor( backbone, list(backbone.pyramid_level_inputs.values()) ) @@ -111,43 +148,22 @@ def __init__( # Project all multi-level outputs onto the same dimensionality # and feature map shape multi_layer_outs = [] - for feature_dim, feature in zip(backbone.embedding_dims, features): - out = keras.layers.Dense( - projection_filters, name=f"linear_{feature_dim}" - )(feature) - out = keras.layers.Resizing(H, W, interpolation="bilinear")(out) + for index, (feature_dim, feature) in enumerate( + zip(backbone.embedding_dims, features) + ): + out = self.mlp_blocks[index](feature) + out = self.resizing(out) multi_layer_outs.append(out) # Concat now-equal feature maps - concatenated_outs = keras.layers.Concatenate(axis=3)( - multi_layer_outs[::-1] - ) + concatenated_outs = self.concat(multi_layer_outs[::-1]) # Fuse concatenated features into a segmentation map - seg = keras.Sequential( - [ - keras.layers.Conv2D( - filters=projection_filters, kernel_size=1, use_bias=False - ), - keras.layers.BatchNormalization(), - keras.layers.Activation("relu"), - ] - )(concatenated_outs) - - seg = keras.layers.Dropout(0.1)(seg) - seg = keras.layers.Conv2D( - filters=num_classes, kernel_size=1, activation="softmax" - )(seg) - - output = keras.layers.Resizing( - height=inputs.shape[1], - width=inputs.shape[2], - interpolation="bilinear", - )(seg) + seg = self.segmentation(concatenated_outs) super().__init__( inputs=inputs, - outputs=output, + outputs=seg, **kwargs, ) @@ -165,45 +181,3 @@ def get_config(self): } ) return config - - @classmethod - def from_preset( - cls, - preset, - num_classes, - load_weights=None, - input_shape=None, - **kwargs, - ): - aliases = { - "segformer_b0": "mit_b0", - "segformer_b1": "mit_b1", - "segformer_b2": "mit_b2", - "segformer_b3": "mit_b3", - "segformer_b4": "mit_b4", - "segformer_b5": "mit_b5", - } - if preset in aliases: - preset = aliases[preset] - return super().from_preset( - preset, - load_weights=load_weights, - num_classes=num_classes, - input_shape=input_shape, - **kwargs, - ) - - @classproperty - def presets(cls): - """Dictionary of preset names and configurations.""" - return copy.deepcopy(presets) - - @classproperty - def presets_with_weights(cls): - """Dictionary of preset names and configurations that include - weights.""" - return copy.deepcopy(presets_with_weights) - - @classproperty - def backbone_presets(cls): - return copy.deepcopy(MiTBackbone.presets) \ No newline at end of file diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py new file mode 100644 index 0000000000..e8e2586fb5 --- /dev/null +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -0,0 +1,147 @@ +# Copyright 2024 The KerasHub Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import keras + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.image_segmenter import ImageSegmenter +from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone + + +@keras_hub_export( + [ + "keras_hub_export.models.SegFormer", + "keras_hub_export.models.segmentation.SegFormer", + ] +) +class SegFormerImageSegmenter(ImageSegmenter): + """A Keras model implementing the SegFormer architecture for semantic + segmentation. + + References: + - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) # noqa: E501 + - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer) # noqa: E501 + + Args: + backbone: `keras.Model`. The backbone network for the model that is + used as a feature extractor for the SegFormer encoder. + It is *intended* to be used only with the MiT backbone model which + was created specifically for SegFormers. It should either be a + `keras_cv.models.backbones.backbone.Backbone` or a `tf.keras.Model` + that implements the `pyramid_level_inputs` property with keys + "P2", "P3", "P4", and "P5" and layer names as + values. + num_classes: int, the number of classes for the detection model, + including the background class. + projection_filters: int, number of filters in the + convolution layer projecting the concatenated features into + a segmentation map. Defaults to 256`. + + Example: + + Using the class with a `backbone`: + + ```python + import tensorflow as tf + import keras_cv + + images = np.ones(shape=(1, 96, 96, 3)) + labels = np.zeros(shape=(1, 96, 96, 1)) + backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet") + model = keras_cv.models.segmentation.SegFormer( + num_classes=1, backbone=backbone, + ) + + # Evaluate model + model(images) + + # Train model + model.compile( + optimizer="adam", + loss=keras.losses.BinaryCrossentropy(from_logits=False), + metrics=["accuracy"], + ) + model.fit(images, labels, epochs=3) + ``` + """ + + backbone_cls = SegFormerBackbone + + def __init__( + self, + backbone, + num_classes, + projection_filters=256, + **kwargs, + ): + if not isinstance(backbone, keras.layers.Layer) or not isinstance( + backbone, keras.Model + ): + raise ValueError( + "Argument `backbone` must be a `keras.layers.Layer` instance " + f" or `keras.Model`. Received instead " + f"backbone={backbone} (of type {type(backbone)})." + ) + + # === Layers === + self.backbone = backbone + self.dropout = keras.layers.Dropout(0.1) + self.output_segmentation = keras.layers.Conv2D( + filters=num_classes, kernel_size=1, activation="softmax" + ) + self.resizing = keras.layers.Resizing( + height=inputs.shape[1], + width=inputs.shape[2], + interpolation="bilinear", + ) + + # === Functional Model === + inputs = self.backbone.input + x = self.backbone(inputs) + x = self.dropout(x) + x = self.output_segmentation(x) + output = self.resizing(x) + + super().__init__( + inputs=inputs, + outputs=output, + **kwargs, + ) + + # === Config === + self.num_classes = num_classes + self.projection_filters = projection_filters + self.backbone = backbone + + def get_config(self): + config = super().get_config() + config.update( + { + "num_classes": self.num_classes, + "projection_filters": self.projection_filters, + "backbone": keras.saving.serialize_keras_object(self.backbone), + } + ) + return config diff --git a/keras_hub/src/models/segformer/segformer_presets.py b/keras_hub/src/models/segformer/segformer_presets.py index d5bbcd22a4..0a055eaf25 100644 --- a/keras_hub/src/models/segformer/segformer_presets.py +++ b/keras_hub/src/models/segformer/segformer_presets.py @@ -92,4 +92,4 @@ **backbone_presets, # Add MiTBackbone presets **presets_no_weights, **presets_with_weights, -} \ No newline at end of file +} diff --git a/keras_hub/src/models/segformer/segformer_tests.py b/keras_hub/src/models/segformer/segformer_tests.py index f481c10c40..169fec6c7f 100644 --- a/keras_hub/src/models/segformer/segformer_tests.py +++ b/keras_hub/src/models/segformer/segformer_tests.py @@ -1,4 +1,4 @@ -# Copyright 2023 The KerasCV Authors +# Copyright 2024 The KerasHub Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,16 +11,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import numpy as np import pytest import tensorflow as tf - -from keras_cv.src.backend import keras -from keras_cv.src.backend import ops -from keras_cv.src.backend.config import keras_3 from keras_cv.src.models import MiTBackbone from keras_cv.src.models import SegFormer from keras_cv.src.tests.test_case import TestCase @@ -139,4 +146,4 @@ def test_preset_saved_model(self): # Check that output matches. restored_output = restored_model(input_batch) - self.assertAllClose(model_output, restored_output) \ No newline at end of file + self.assertAllClose(model_output, restored_output) From 8894a86af5ff0a9a3a9427f04ec9bac2fed16abd Mon Sep 17 00:00:00 2001 From: David Landup Date: Fri, 27 Sep 2024 18:23:54 +0900 Subject: [PATCH 03/66] enable instantiation of segformer backbone with custom MiT backbone --- keras_hub/src/models/segformer/__init__.py | 1 + .../models/segformer/segformer_backbone.py | 61 ++++++++----------- .../segformer/segformer_image_segmenter.py | 7 ++- .../src/models/segformer/segformer_presets.py | 5 -- 4 files changed, 32 insertions(+), 42 deletions(-) diff --git a/keras_hub/src/models/segformer/__init__.py b/keras_hub/src/models/segformer/__init__.py index 98a7037950..2555ca71fc 100644 --- a/keras_hub/src/models/segformer/__init__.py +++ b/keras_hub/src/models/segformer/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone from keras_hub.src.models.segformer.segformer_image_segmenter import ( SegFormerImageSegmenter, ) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index f8627c5442..f1d5d65241 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -32,14 +32,11 @@ ) from keras_hub.src.models.segformer.segformer_presets import presets -# from keras_cv.src.utils.python_utils import classproperty -# from keras_cv.src.utils.train import get_feature_extractor - @keras_hub_export( [ - "keras_hub_export.models.SegFormer", - "keras_hub_export.models.segmentation.SegFormer", + "keras_hub.models.SegFormerBackbone", + "keras_hub.models.segmentation.SegFormerBackbone", ] ) class SegFormerBackbone(Backbone): @@ -67,29 +64,25 @@ class SegFormerBackbone(Backbone): Example: - Using the class with a `backbone`: + Using the class with a custom `backbone`: ```python import tensorflow as tf - import keras_cv - - images = np.ones(shape=(1, 96, 96, 3)) - labels = np.zeros(shape=(1, 96, 96, 1)) - backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet") - model = keras_cv.models.segmentation.SegFormer( - num_classes=1, backbone=backbone, + import keras_hub + + backbone = keras_hub.models.MiTBackbone( + depths=[2, 2, 2, 2], + image_shape=(16, 16, 3), + hidden_dims=[4, 8], + num_layers=2, + blockwise_num_heads=[1, 2], + blockwise_sr_ratios=[8, 4], + end_value=0.1, + patch_sizes=[7, 3], + strides=[4, 2], ) - # Evaluate model - model(images) - - # Train model - model.compile( - optimizer="adam", - loss=keras.losses.BinaryCrossentropy(from_logits=False), - metrics=["accuracy"], - ) - model.fit(images, labels, epochs=3) + model = SegFormerBackbone(backbone=backbone, num_classes=4) ``` """ @@ -111,13 +104,20 @@ def __init__( f"backbone={backbone} (of type {type(backbone)})." ) - inputs = backbone.input + self.feature_extractor = keras.Model( + backbone.outputs, backbone.pyramid_outputs + ) + + inputs = backbone.output + features = self.feature_extractor(inputs) + # Get H and W of level one output + _, H, W, _ = features["P1"].shape # === Layers === self.mlp_blocks = [] - for feature_dim, feature in zip(backbone.embedding_dims, features): + for feature_dim, feature in zip(backbone.hidden_dims, features): self.mlp_blocks.append( keras.layers.Dense( projection_filters, name=f"linear_{feature_dim}" @@ -137,21 +137,14 @@ def __init__( ) # === Functional Model === - feature_extractor = get_feature_extractor( - backbone, list(backbone.pyramid_level_inputs.values()) - ) - # Multi-level dictionary - features = list(feature_extractor(inputs).values()) - # Get H and W of level one output - _, H, W, _ = features[0].shape # Project all multi-level outputs onto the same dimensionality # and feature map shape multi_layer_outs = [] for index, (feature_dim, feature) in enumerate( - zip(backbone.embedding_dims, features) + zip(backbone.hidden_dims, features) ): - out = self.mlp_blocks[index](feature) + out = self.mlp_blocks[index](features[feature]) out = self.resizing(out) multi_layer_outs.append(out) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index e8e2586fb5..db5760a606 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -32,8 +32,8 @@ @keras_hub_export( [ - "keras_hub_export.models.SegFormer", - "keras_hub_export.models.segmentation.SegFormer", + "keras_hub.models.SegFormerImageSegmenter", + "keras_hub.models.segmentation.SegFormerImageSegmenter", ] ) class SegFormerImageSegmenter(ImageSegmenter): @@ -105,6 +105,8 @@ def __init__( f"backbone={backbone} (of type {type(backbone)})." ) + inputs = self.backbone.input + # === Layers === self.backbone = backbone self.dropout = keras.layers.Dropout(0.1) @@ -118,7 +120,6 @@ def __init__( ) # === Functional Model === - inputs = self.backbone.input x = self.backbone(inputs) x = self.dropout(x) x = self.output_segmentation(x) diff --git a/keras_hub/src/models/segformer/segformer_presets.py b/keras_hub/src/models/segformer/segformer_presets.py index 0a055eaf25..12ac38160f 100644 --- a/keras_hub/src/models/segformer/segformer_presets.py +++ b/keras_hub/src/models/segformer/segformer_presets.py @@ -13,10 +13,6 @@ # limitations under the License. """SegFormer model preset configurations.""" -from keras_cv.src.models.backbones.mix_transformer.mix_transformer_backbone_presets import ( # noqa: E501 - backbone_presets, -) - presets_no_weights = { "segformer_b0": { "metadata": { @@ -89,7 +85,6 @@ } presets = { - **backbone_presets, # Add MiTBackbone presets **presets_no_weights, **presets_with_weights, } From b66c65925934b19219593a44bc24e62a55be2466 Mon Sep 17 00:00:00 2001 From: David Landup Date: Fri, 27 Sep 2024 18:25:29 +0900 Subject: [PATCH 04/66] remove num_classes from backbone --- keras_hub/src/models/segformer/segformer_backbone.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index f1d5d65241..f2b997f504 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -91,7 +91,6 @@ class SegFormerBackbone(Backbone): def __init__( self, backbone, - num_classes, projection_filters=256, **kwargs, ): @@ -160,7 +159,6 @@ def __init__( **kwargs, ) - self.num_classes = num_classes self.projection_filters = projection_filters self.backbone = backbone @@ -168,7 +166,6 @@ def get_config(self): config = super().get_config() config.update( { - "num_classes": self.num_classes, "projection_filters": self.projection_filters, "backbone": keras.saving.serialize_keras_object(self.backbone), } From 392ec36c8637754fe3d8bbe78f75a2233dcc04c7 Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 18:02:10 +0900 Subject: [PATCH 05/66] fix input --- keras_hub/src/models/segformer/segformer_backbone.py | 9 +++++---- .../src/models/segformer/segformer_image_segmenter.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index f2b997f504..fbd57388bc 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -104,10 +104,11 @@ def __init__( ) self.feature_extractor = keras.Model( - backbone.outputs, backbone.pyramid_outputs + backbone.inputs, backbone.pyramid_outputs ) - inputs = backbone.output + inputs = keras.layers.Input(shape=backbone.input.shape[1:]) + features = self.feature_extractor(inputs) # Get H and W of level one output _, H, W, _ = features["P1"].shape @@ -125,7 +126,7 @@ def __init__( self.resizing = keras.layers.Resizing(H, W, interpolation="bilinear") self.concat = keras.layers.Concatenate(axis=3) - self.segmentation = keras.Sequential( + self.linear_fuse = keras.Sequential( [ keras.layers.Conv2D( filters=projection_filters, kernel_size=1, use_bias=False @@ -151,7 +152,7 @@ def __init__( concatenated_outs = self.concat(multi_layer_outs[::-1]) # Fuse concatenated features into a segmentation map - seg = self.segmentation(concatenated_outs) + seg = self.linear_fuse(concatenated_outs) super().__init__( inputs=inputs, diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index db5760a606..88cc32743a 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -105,7 +105,7 @@ def __init__( f"backbone={backbone} (of type {type(backbone)})." ) - inputs = self.backbone.input + inputs = backbone.input # === Layers === self.backbone = backbone From d80d8d05c85e54ae399035c61fed1b4d5fb34616 Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 18:27:38 +0900 Subject: [PATCH 06/66] add imports to __init__ --- keras_hub/api/models/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 400284e487..b08e1b7655 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -255,6 +255,10 @@ RobertaTextClassifierPreprocessor as RobertaPreprocessor, ) from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer +from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone +from keras_hub.src.models.segformer.segformer_image_segmenter import ( + SegFormerImageSegmenter, +) from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor from keras_hub.src.models.t5.t5_backbone import T5Backbone From 1571677d110b4d0adda22c2d29637c216240510a Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 18:54:14 +0900 Subject: [PATCH 07/66] update preset --- keras_hub/src/models/segformer/__init__.py | 11 +++++++++++ keras_hub/src/models/segformer/segformer_presets.py | 13 ++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/keras_hub/src/models/segformer/__init__.py b/keras_hub/src/models/segformer/__init__.py index 2555ca71fc..a282445aec 100644 --- a/keras_hub/src/models/segformer/__init__.py +++ b/keras_hub/src/models/segformer/__init__.py @@ -11,6 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone from keras_hub.src.models.segformer.segformer_image_segmenter import ( SegFormerImageSegmenter, diff --git a/keras_hub/src/models/segformer/segformer_presets.py b/keras_hub/src/models/segformer/segformer_presets.py index 12ac38160f..07fa1e9bac 100644 --- a/keras_hub/src/models/segformer/segformer_presets.py +++ b/keras_hub/src/models/segformer/segformer_presets.py @@ -1,4 +1,4 @@ -# Copyright 2023 The KerasCV Authors +# Copyright 2024 The KerasHub Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,6 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """SegFormer model preset configurations.""" presets_no_weights = { From 4b82a16a43d9e4d1b3b4d2b663d301820c75e3ab Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 18:55:46 +0900 Subject: [PATCH 08/66] update docstrings --- .../src/models/segformer/segformer_backbone.py | 17 +++++++++-------- .../segformer/segformer_image_segmenter.py | 17 ++++++++++++++--- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index fbd57388bc..1b5df71b00 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -72,17 +72,18 @@ class SegFormerBackbone(Backbone): backbone = keras_hub.models.MiTBackbone( depths=[2, 2, 2, 2], - image_shape=(16, 16, 3), - hidden_dims=[4, 8], - num_layers=2, - blockwise_num_heads=[1, 2], - blockwise_sr_ratios=[8, 4], + image_shape=(224, 224, 3), + hidden_dims=[32, 64, 160, 256], + num_layers=4, + blockwise_num_heads=[1, 2, 5, 8], + blockwise_sr_ratios=[8, 4, 2, 1], end_value=0.1, - patch_sizes=[7, 3], - strides=[4, 2], + patch_sizes=[7, 3, 3, 3], + strides=[4, 2, 2, 2], ) - model = SegFormerBackbone(backbone=backbone, num_classes=4) + + segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=backbone) ``` """ diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 88cc32743a..5c543acb9e 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -69,10 +69,21 @@ class SegFormerImageSegmenter(ImageSegmenter): images = np.ones(shape=(1, 96, 96, 3)) labels = np.zeros(shape=(1, 96, 96, 1)) - backbone = keras_cv.models.MiTBackbone.from_preset("mit_b0_imagenet") - model = keras_cv.models.segmentation.SegFormer( - num_classes=1, backbone=backbone, + + encoder = keras_hub.models.MiTBackbone( + depths=[2, 2, 2, 2], + image_shape=(224, 224, 3), + hidden_dims=[32, 64, 160, 256], + num_layers=4, + blockwise_num_heads=[1, 2, 5, 8], + blockwise_sr_ratios=[8, 4, 2, 1], + end_value=0.1, + patch_sizes=[7, 3, 3, 3], + strides=[4, 2, 2, 2], ) + + segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=encoder) + segformer = SegFormerImageSegmenter(backbone=segformer_backbone, num_classes=4) # Evaluate model model(images) From 9b260e760fe76b69e5e547ec6ea4f0595aa814df Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 19:09:25 +0900 Subject: [PATCH 09/66] add basic tests --- keras_hub/api/models/__init__.py | 4 +- .../segformer/segformer_image_segmenter.py | 2 +- .../src/models/segformer/segformer_tests.py | 159 ++++++------------ 3 files changed, 55 insertions(+), 110 deletions(-) diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 47722fddbf..99ee936859 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -248,12 +248,12 @@ RobertaTextClassifierPreprocessor as RobertaPreprocessor, ) from keras_hub.src.models.roberta.roberta_tokenizer import RobertaTokenizer +from keras_hub.src.models.sam.sam_backbone import SAMBackbone +from keras_hub.src.models.sam.sam_image_segmenter import SAMImageSegmenter from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone from keras_hub.src.models.segformer.segformer_image_segmenter import ( SegFormerImageSegmenter, ) -from keras_hub.src.models.sam.sam_backbone import SAMBackbone -from keras_hub.src.models.sam.sam_image_segmenter import SAMImageSegmenter from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import ( diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 5c543acb9e..0e2d9bd494 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -81,7 +81,7 @@ class SegFormerImageSegmenter(ImageSegmenter): patch_sizes=[7, 3, 3, 3], strides=[4, 2, 2, 2], ) - + segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=encoder) segformer = SegFormerImageSegmenter(backbone=segformer_backbone, num_classes=4) diff --git a/keras_hub/src/models/segformer/segformer_tests.py b/keras_hub/src/models/segformer/segformer_tests.py index 169fec6c7f..74fb808024 100644 --- a/keras_hub/src/models/segformer/segformer_tests.py +++ b/keras_hub/src/models/segformer/segformer_tests.py @@ -25,125 +25,70 @@ import os +import keras import numpy as np import pytest -import tensorflow as tf -from keras_cv.src.models import MiTBackbone -from keras_cv.src.models import SegFormer -from keras_cv.src.tests.test_case import TestCase + +from keras_hub.api.models import MiTBackbone +from keras_hub.api.models import SegFormerBackbone +from keras_hub.api.models import SegFormerImageSegmenter +from keras_hub.src.tests.test_case import TestCase class SegFormerTest(TestCase): - def test_segformer_construction(self): - backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3]) - model = SegFormer(backbone=backbone, num_classes=2) - model.compile( - optimizer="adam", - loss=keras.losses.BinaryCrossentropy(), - metrics=["accuracy"], + def test_segformer_backbone_construction(self): + backbone = MiTBackbone( + depths=[2, 2], + image_shape=(224, 224, 3), + hidden_dims=[32, 64], + num_layers=2, + blockwise_num_heads=[1, 2], + blockwise_sr_ratios=[8, 4], + end_value=0.1, + patch_sizes=[7, 3], + strides=[4, 2], ) - - def test_segformer_preset_construction(self): - model = SegFormer.from_preset( - "segformer_b0", num_classes=2, input_shape=[512, 512, 3] + SegFormerBackbone(backbone=backbone) + + def test_segformer_segmenter_construction(self): + backbone = MiTBackbone( + depths=[2, 2], + image_shape=(224, 224, 3), + hidden_dims=[32, 64], + num_layers=2, + blockwise_num_heads=[1, 2], + blockwise_sr_ratios=[8, 4], + end_value=0.1, + patch_sizes=[7, 3], + strides=[4, 2], ) - model.compile( - optimizer="adam", - loss=keras.losses.BinaryCrossentropy(), - metrics=["accuracy"], + segformer_backbone = SegFormerBackbone(backbone=backbone) + segformer = SegFormerImageSegmenter( + backbone=segformer_backbone, num_classes=4 ) - def test_segformer_preset_error(self): - with self.assertRaises(TypeError): - _ = SegFormer.from_preset("segformer_b0") - @pytest.mark.large def DISABLED_test_segformer_call(self): # TODO: Test of output comparison Fails - backbone = MiTBackbone.from_preset("mit_b0") - mit_model = SegFormer(backbone=backbone, num_classes=2) - - images = np.random.uniform(size=(2, 224, 224, 3)) - mit_output = mit_model(images) - mit_pred = mit_model.predict(images) - - seg_model = SegFormer.from_preset("segformer_b0", num_classes=2) - seg_output = seg_model(images) - seg_pred = seg_model.predict(images) - - self.assertAllClose(mit_output, seg_output) - self.assertAllClose(mit_pred, seg_pred) - - @pytest.mark.large - def test_weights_change(self): - target_size = [512, 512, 2] - - images = tf.ones(shape=[1] + [512, 512, 3]) - labels = tf.zeros(shape=[1] + target_size) - ds = tf.data.Dataset.from_tensor_slices((images, labels)) - ds = ds.repeat(2) - ds = ds.batch(2) - - backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3]) - model = SegFormer(backbone=backbone, num_classes=2) - - model.compile( - optimizer="adam", - loss=keras.losses.BinaryCrossentropy(), - metrics=["accuracy"], + backbone = MiTBackbone( + depths=[2, 2], + image_shape=(224, 224, 3), + hidden_dims=[32, 64], + num_layers=2, + blockwise_num_heads=[1, 2], + blockwise_sr_ratios=[8, 4], + end_value=0.1, + patch_sizes=[7, 3], + strides=[4, 2], + ) + model = SegFormerBackbone(backbone=backbone) + segformer = SegFormerImageSegmenter( + backbone=segformer_backbone, num_classes=4 ) - original_weights = model.get_weights() - model.fit(ds, epochs=1) - updated_weights = model.get_weights() - - for w1, w2 in zip(original_weights, updated_weights): - self.assertNotAllEqual(w1, w2) - self.assertFalse(ops.any(ops.isnan(w2))) - - @pytest.mark.large # Saving is slow, so mark these large. - def test_saved_model(self): - target_size = [512, 512, 3] - - backbone = MiTBackbone.from_preset("mit_b0", input_shape=[512, 512, 3]) - model = SegFormer(backbone=backbone, num_classes=2) - - input_batch = np.ones(shape=[2] + target_size) - model_output = model(input_batch) - - save_path = os.path.join(self.get_temp_dir(), "model.keras") - if keras_3(): - model.save(save_path) - else: - model.save(save_path, save_format="keras_v3") - restored_model = keras.models.load_model(save_path) - - # Check we got the real object back. - self.assertIsInstance(restored_model, SegFormer) - - # Check that output matches. - restored_output = restored_model(input_batch) - self.assertAllClose(model_output, restored_output) - - @pytest.mark.large # Saving is slow, so mark these large. - def test_preset_saved_model(self): - target_size = [224, 224, 3] - - model = SegFormer.from_preset("segformer_b0", num_classes=2) - - input_batch = np.ones(shape=[2] + target_size) - model_output = model(input_batch) - - save_path = os.path.join(self.get_temp_dir(), "model.keras") - if keras_3(): - model.save(save_path) - else: - model.save(save_path, save_format="keras_v3") - restored_model = keras.models.load_model(save_path) - - # Check we got the real object back. - self.assertIsInstance(restored_model, SegFormer) + images = np.random.uniform(size=(2, 224, 224, 3)) + segformer_output = segformer(images) + segformer_predict = segformer.predict(images) - # Check that output matches. - restored_output = restored_model(input_batch) - self.assertAllClose(model_output, restored_output) + assert segformer_output.shape == images.shape + assert segformer_predict.shape == images.shape From b93954f239b3a3a74b4d5cf4942c9701d2467864 Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 19:14:25 +0900 Subject: [PATCH 10/66] remove redundant imports --- keras_hub/src/models/segformer/segformer_tests.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_tests.py b/keras_hub/src/models/segformer/segformer_tests.py index 74fb808024..e73f3f2ecc 100644 --- a/keras_hub/src/models/segformer/segformer_tests.py +++ b/keras_hub/src/models/segformer/segformer_tests.py @@ -23,9 +23,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os - -import keras import numpy as np import pytest @@ -63,9 +60,7 @@ def test_segformer_segmenter_construction(self): strides=[4, 2], ) segformer_backbone = SegFormerBackbone(backbone=backbone) - segformer = SegFormerImageSegmenter( - backbone=segformer_backbone, num_classes=4 - ) + SegFormerImageSegmenter(backbone=segformer_backbone, num_classes=4) @pytest.mark.large def DISABLED_test_segformer_call(self): @@ -81,7 +76,7 @@ def DISABLED_test_segformer_call(self): patch_sizes=[7, 3], strides=[4, 2], ) - model = SegFormerBackbone(backbone=backbone) + segformer_backbone = SegFormerBackbone(backbone=backbone) segformer = SegFormerImageSegmenter( backbone=segformer_backbone, num_classes=4 ) From 159dca5be2a88072f6c8a0d55c8cea2881a9bd20 Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 19:20:00 +0900 Subject: [PATCH 11/66] update docstrings --- .../models/segformer/segformer_backbone.py | 2 -- .../segformer/segformer_image_segmenter.py | 20 ++++++------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index 1b5df71b00..32bb22ae06 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -67,7 +67,6 @@ class SegFormerBackbone(Backbone): Using the class with a custom `backbone`: ```python - import tensorflow as tf import keras_hub backbone = keras_hub.models.MiTBackbone( @@ -82,7 +81,6 @@ class SegFormerBackbone(Backbone): strides=[4, 2, 2, 2], ) - segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=backbone) ``` """ diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 0e2d9bd494..7e4a820333 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -64,15 +64,16 @@ class SegFormerImageSegmenter(ImageSegmenter): Using the class with a `backbone`: ```python - import tensorflow as tf - import keras_cv + import keras + import keras_hub + import numpy as np images = np.ones(shape=(1, 96, 96, 3)) labels = np.zeros(shape=(1, 96, 96, 1)) encoder = keras_hub.models.MiTBackbone( depths=[2, 2, 2, 2], - image_shape=(224, 224, 3), + image_shape=(96, 96, 3), hidden_dims=[32, 64, 160, 256], num_layers=4, blockwise_num_heads=[1, 2, 5, 8], @@ -83,18 +84,9 @@ class SegFormerImageSegmenter(ImageSegmenter): ) segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=encoder) - segformer = SegFormerImageSegmenter(backbone=segformer_backbone, num_classes=4) + segformer = keras_hub.models.SegFormerImageSegmenter(backbone=segformer_backbone, num_classes=4) - # Evaluate model - model(images) - - # Train model - model.compile( - optimizer="adam", - loss=keras.losses.BinaryCrossentropy(from_logits=False), - metrics=["accuracy"], - ) - model.fit(images, labels, epochs=3) + segformer(images) ``` """ From 3ec02dd8651e62e8968c76c06bd0eae681a4ca2a Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 19:30:00 +0900 Subject: [PATCH 12/66] remove unused import --- keras_hub/src/models/segformer/segformer_backbone.py | 1 - 1 file changed, 1 deletion(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index 32bb22ae06..a91b68faad 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -30,7 +30,6 @@ from keras_hub.src.models.mix_transformer.mix_transformer_backbone import ( MiTBackbone, ) -from keras_hub.src.models.segformer.segformer_presets import presets @keras_hub_export( From 7b6286e7b1aad347a2ac51f9021f9a05224e6b72 Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 19:41:34 +0900 Subject: [PATCH 13/66] running api_gen.py --- keras_hub/api/models/__init__.py | 1 + keras_hub/api/models/segmentation/__init__.py | 10 ++++++++++ .../models/mix_transformer/mix_transformer_backbone.py | 8 +++++--- 3 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 keras_hub/api/models/segmentation/__init__.py diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 99ee936859..ad0a018df1 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -4,6 +4,7 @@ since your modifications would be overwritten. """ +from keras_hub.api.models import segmentation from keras_hub.src.models.albert.albert_backbone import AlbertBackbone from keras_hub.src.models.albert.albert_masked_lm import AlbertMaskedLM from keras_hub.src.models.albert.albert_masked_lm_preprocessor import ( diff --git a/keras_hub/api/models/segmentation/__init__.py b/keras_hub/api/models/segmentation/__init__.py new file mode 100644 index 0000000000..51d6245308 --- /dev/null +++ b/keras_hub/api/models/segmentation/__init__.py @@ -0,0 +1,10 @@ +"""DO NOT EDIT. + +This file was autogenerated. Do not edit it by hand, +since your modifications would be overwritten. +""" + +from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone +from keras_hub.src.models.segformer.segformer_image_segmenter import ( + SegFormerImageSegmenter, +) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py b/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py index 838d7b2692..e9e80cb508 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py @@ -20,7 +20,7 @@ def __init__( num_layers, blockwise_num_heads, blockwise_sr_ratios, - end_value, + dropout_rate, patch_sizes, strides, image_shape=(None, None, 3), @@ -45,7 +45,8 @@ def __init__( ratio to perform for each layer on the sequence before key and value projections. If set to > 1, a `Conv2D` layer is used to reduce the length of the sequence. - end_value: The end value of the sequence. + dropout_rate: The final dropout rate applied at the end of the + sequence of DropPath layers. image_shape: optional shape tuple, defaults to (None, None, 3). hidden_dims: the embedding dims per hierarchical layer, used as the levels of the feature pyramid. @@ -73,7 +74,8 @@ def __init__( model.fit(images, labels, epochs=3) ``` """ - dpr = [x for x in np.linspace(0.0, end_value, sum(depths))] + # DropPath Rates - used at different levels of the model's depth + dpr = [x for x in np.linspace(0.0, dropout_rate, sum(depths))] # === Layers === cur = 0 From c40fdcdb08d87c21bda3e81b43a3fee68c2d1e1f Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 19:42:44 +0900 Subject: [PATCH 14/66] undo refactor of mit --- .../mix_transformer_backbone.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py b/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py index e9e80cb508..6986be7c45 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py @@ -1,3 +1,16 @@ +# Copyright 2024 The KerasHub Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import keras import numpy as np from keras import ops @@ -20,7 +33,7 @@ def __init__( num_layers, blockwise_num_heads, blockwise_sr_ratios, - dropout_rate, + end_value, patch_sizes, strides, image_shape=(None, None, 3), @@ -45,8 +58,7 @@ def __init__( ratio to perform for each layer on the sequence before key and value projections. If set to > 1, a `Conv2D` layer is used to reduce the length of the sequence. - dropout_rate: The final dropout rate applied at the end of the - sequence of DropPath layers. + end_value: The end value of the sequence. image_shape: optional shape tuple, defaults to (None, None, 3). hidden_dims: the embedding dims per hierarchical layer, used as the levels of the feature pyramid. @@ -74,8 +86,7 @@ def __init__( model.fit(images, labels, epochs=3) ``` """ - # DropPath Rates - used at different levels of the model's depth - dpr = [x for x in np.linspace(0.0, dropout_rate, sum(depths))] + dpr = [x for x in np.linspace(0.0, end_value, sum(depths))] # === Layers === cur = 0 From 9a135443585aae62b4c73231cb92c80861836f73 Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 21:16:22 +0900 Subject: [PATCH 15/66] update docstrings --- keras_hub/src/models/segformer/segformer_backbone.py | 8 ++++---- .../src/models/segformer/segformer_image_segmenter.py | 5 +---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index a91b68faad..019cdc1a48 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -51,10 +51,10 @@ class SegFormerBackbone(Backbone): used as a feature extractor for the SegFormer encoder. It is *intended* to be used only with the MiT backbone model which was created specifically for SegFormers. It should either be a - `keras_cv.models.backbones.backbone.Backbone` or a `tf.keras.Model` - that implements the `pyramid_level_inputs` property with keys - "P2", "P3", "P4", and "P5" and layer names as - values. + `keras_hub.src.models.backbone.Backbone` a model subclassing + `from keras_hub.src.models.feature_pyramid_backbone.FeaturePyramidBackbone`, + or a `keras.Model` that has a `pyramid_outputs` property which is + a dictionary with keys "P2", "P3", "P4", and "P5" and layer names as values. num_classes: int, the number of classes for the detection model, including the background class. projection_filters: int, number of filters in the diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 7e4a820333..20d6eab209 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -49,10 +49,7 @@ class SegFormerImageSegmenter(ImageSegmenter): used as a feature extractor for the SegFormer encoder. It is *intended* to be used only with the MiT backbone model which was created specifically for SegFormers. It should either be a - `keras_cv.models.backbones.backbone.Backbone` or a `tf.keras.Model` - that implements the `pyramid_level_inputs` property with keys - "P2", "P3", "P4", and "P5" and layer names as - values. + `keras_hub.src.models.backbone.Backbone` or a `keras.Model`. num_classes: int, the number of classes for the detection model, including the background class. projection_filters: int, number of filters in the From 4dc3fff487f6b802252ca934293797fe71cc7601 Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 21:23:33 +0900 Subject: [PATCH 16/66] add presets for mit --- .../mix_transformer_presets.py | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 keras_hub/src/models/mix_transformer/mix_transformer_presets.py diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py new file mode 100644 index 0000000000..aaf00d9df9 --- /dev/null +++ b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py @@ -0,0 +1,102 @@ +# Copyright 2024 The KerasHub Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MiT model preset configurations.""" + +backbone_presets_no_weights = { + "mit_b0": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 8 transformer blocks." + ), + "params": 3321962, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://keras/mit/keras/mit_b0/2", + }, + "mit_b1": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 8 transformer blocks." + ), + "params": 13156554, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://keras/mit/keras/mit_b1/2", + }, + "mit_b2": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 16 transformer blocks." + ), + "params": 24201418, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://keras/mit/keras/mit_b2/2", + }, + "mit_b3": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 28 transformer blocks." + ), + "params": 44077258, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://keras/mit/keras/mit_b3/2", + }, + "mit_b4": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 41 transformer blocks." + ), + "params": 60847818, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://keras/mit/keras/mit_b4/2", + }, + "mit_b5": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 52 transformer blocks." + ), + "params": 81448138, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://keras/mit/keras/mit_b5/2", + }, +} + +backbone_presets_with_weights = { + "mit_b0_imagenet": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 8 transformer blocks. Pre-trained on ImageNet-1K and scores 69% top-1 accuracy on the validation set." # noqa: E501 + ), + "params": 3321962, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://keras/mit/keras/mit_b0_imagenet/2", + }, +} + +backbone_presets = { + **backbone_presets_no_weights, + **backbone_presets_with_weights, +} From 191656ce2fcd095375e95d39c7ac2682e369fe99 Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 29 Sep 2024 21:42:12 +0900 Subject: [PATCH 17/66] add standin paths --- .../mix_transformer/mix_transformer_presets.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py index aaf00d9df9..5e5e504cee 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py @@ -23,7 +23,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://keras/mit/keras/mit_b0/2", + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b0", # Not uploaded yet }, "mit_b1": { "metadata": { @@ -34,7 +34,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://keras/mit/keras/mit_b1/2", + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b1", # Not uploaded yet }, "mit_b2": { "metadata": { @@ -45,7 +45,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://keras/mit/keras/mit_b2/2", + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b2", # Not uploaded yet }, "mit_b3": { "metadata": { @@ -56,7 +56,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://keras/mit/keras/mit_b3/2", + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b3", # Not uploaded yet }, "mit_b4": { "metadata": { @@ -67,7 +67,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://keras/mit/keras/mit_b4/2", + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b4", # Not uploaded yet }, "mit_b5": { "metadata": { @@ -78,7 +78,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://keras/mit/keras/mit_b5/2", + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b5", # Not uploaded yet }, } @@ -92,7 +92,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://keras/mit/keras/mit_b0_imagenet/2", + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b0", # Not uploaded yet }, } From 9e47564f3cd8f24b5261b6e4e763690a79db1e52 Mon Sep 17 00:00:00 2001 From: David Landup Date: Mon, 30 Sep 2024 09:36:57 +0900 Subject: [PATCH 18/66] add presets for segformer backbone --- keras_hub/src/models/segformer/__init__.py | 4 + .../models/segformer/segformer_backbone.py | 10 +++ .../segformer/segformer_backbone_presets.py | 85 +++++++++++++++++++ .../segformer/segformer_image_segmenter.py | 12 +++ 4 files changed, 111 insertions(+) create mode 100644 keras_hub/src/models/segformer/segformer_backbone_presets.py diff --git a/keras_hub/src/models/segformer/__init__.py b/keras_hub/src/models/segformer/__init__.py index a282445aec..3b54c0785a 100644 --- a/keras_hub/src/models/segformer/__init__.py +++ b/keras_hub/src/models/segformer/__init__.py @@ -23,6 +23,9 @@ # See the License for the specific language governing permissions and # limitations under the License. from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone +from keras_hub.src.models.segformer.segformer_backbone_presets import ( + presets as backbone_presets, +) from keras_hub.src.models.segformer.segformer_image_segmenter import ( SegFormerImageSegmenter, ) @@ -30,3 +33,4 @@ from keras_hub.src.utils.preset_utils import register_presets register_presets(presets, SegFormerImageSegmenter) +register_presets(backbone_presets, SegFormerBackbone) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index 019cdc1a48..2ce97f2639 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -82,6 +82,16 @@ class SegFormerBackbone(Backbone): segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=backbone) ``` + + Using the class with a preset `backbone`: + + ```python + import keras_hub + + backbone = keras_hub.models.MiTBackbone.from_preset("path_to_be_added") + segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=backbone) + ``` + """ backbone_cls = MiTBackbone diff --git a/keras_hub/src/models/segformer/segformer_backbone_presets.py b/keras_hub/src/models/segformer/segformer_backbone_presets.py new file mode 100644 index 0000000000..bd5d992f86 --- /dev/null +++ b/keras_hub/src/models/segformer/segformer_backbone_presets.py @@ -0,0 +1,85 @@ +# Copyright 2024 The KerasHub Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""SegFormerBackbone model preset configurations.""" + +presets_no_weights = { + "segformer_b0_backbone": { + "metadata": { + "description": ("SegFormerBackbone model with MiTB0 encoder."), + "params": 3719027, + "official_name": "SegFormerB0Backbone", + "path": "segformer_b0_backbone", + }, + "kaggle_handle": "kaggle://TBA", + }, + "segformer_b1_backbone": { + "metadata": { + "description": ("SegFormerBackbone model with MiTB1 encoder."), + "params": 13682643, + "official_name": "SegFormerB1Backbone", + "path": "segformer_b1_backbone", + }, + "kaggle_handle": "kaggle://TBA", + }, + "segformer_b2_backbone": { + "metadata": { + "description": ("SegFormerBackbone model with MiTB2 encoder."), + "params": 24727507, + "official_name": "SegFormerB2Backbone", + "path": "segformer_b2_backbone", + }, + "kaggle_handle": "kaggle://TBA", + }, + "segformer_b3_backbone": { + "metadata": { + "description": ("SegFormerBackbone model with MiTB3 encoder."), + "params": 44603347, + "official_name": "SegFormerB3Backbone", + "path": "segformer_b3_backbone", + }, + "kaggle_handle": "kaggle://TBA", + }, + "segformer_b4_backbone": { + "metadata": { + "description": ("SegFormerBackbone model with MiTB4 encoder."), + "params": 61373907, + "official_name": "SegFormerB4Backbone", + "path": "segformer_b4_backbone", + }, + "kaggle_handle": "kaggle://TBA", + }, + "segformer_b5_backbone": { + "metadata": { + "description": ("SegFormerBackbone model with MiTB5 encoder."), + "params": 81974227, + "official_name": "SegFormerB5Backbone", + "path": "segformer_b5_backbone", + }, + "kaggle_handle": "kaggle://TBA", + }, +} + + +presets = {**presets_no_weights} diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 20d6eab209..31f290e995 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -84,6 +84,18 @@ class SegFormerImageSegmenter(ImageSegmenter): segformer = keras_hub.models.SegFormerImageSegmenter(backbone=segformer_backbone, num_classes=4) segformer(images) + ``` + + Using the class with a preset backbone: + + ```python + import keras_hub + + encoder = keras_hub.models.MiTBackbone.from_preset("path_to_be_added") + segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=encoder) + + segformer = keras_hub.models.SegFormerImageSegmenter(backbone=segformer_backbone, num_classes=4) + ``` """ From 98bb69dabaf5bf54ff7f47afccf5d7f372a5545f Mon Sep 17 00:00:00 2001 From: David Landup Date: Mon, 30 Sep 2024 09:40:16 +0900 Subject: [PATCH 19/66] register presets in __init__.py --- keras_hub/src/models/mix_transformer/__init__.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/keras_hub/src/models/mix_transformer/__init__.py b/keras_hub/src/models/mix_transformer/__init__.py index e69de29bb2..f2292a35ad 100644 --- a/keras_hub/src/models/mix_transformer/__init__.py +++ b/keras_hub/src/models/mix_transformer/__init__.py @@ -0,0 +1,12 @@ +from keras_hub.src.models.mix_transformer.mix_transformer_backbone import ( + MiTBackbone, +) +from keras_hub.src.models.mix_transformer.mix_transformer_classifier import ( + MiTImageClassifier, +) +from keras_hub.src.models.mix_transformer.mix_transformer_presets import ( + backbone_presets, +) +from keras_hub.src.utils.preset_utils import register_presets + +register_presets(backbone_presets, MiTBackbone) From 21ed1672c9c18a77e2aae0b1b4adb3f9ba544bce Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 1 Oct 2024 20:23:02 +0900 Subject: [PATCH 20/66] addressing comments --- keras_hub/src/models/segformer/__init__.py | 2 - .../models/segformer/segformer_backbone.py | 49 ++++++++++--------- .../segformer/segformer_backbone_presets.py | 2 - .../segformer/segformer_image_segmenter.py | 2 - .../src/models/segformer/segformer_presets.py | 2 - .../src/models/segformer/segformer_tests.py | 2 - 6 files changed, 27 insertions(+), 32 deletions(-) diff --git a/keras_hub/src/models/segformer/__init__.py b/keras_hub/src/models/segformer/__init__.py index 3b54c0785a..248924cc59 100644 --- a/keras_hub/src/models/segformer/__init__.py +++ b/keras_hub/src/models/segformer/__init__.py @@ -1,5 +1,3 @@ -# Copyright 2024 The KerasHub Authors -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index 2ce97f2639..01289a6aad 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -1,5 +1,3 @@ -# Copyright 2024 The KerasHub Authors -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -35,19 +33,24 @@ @keras_hub_export( [ "keras_hub.models.SegFormerBackbone", - "keras_hub.models.segmentation.SegFormerBackbone", ] ) class SegFormerBackbone(Backbone): - """A Keras model implementing the SegFormer architecture for semantic - segmentation. + """A Keras model implementing the SegFormer architecture for semantic segmentation. + + This class implements the majority of the SegFormer architecture described in + [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers] + (https://arxiv.org/abs/2105.15203) and [based on the TensorFlow implementation from DeepVision] + (https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer). - References: - - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) # noqa: E501 - - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer) # noqa: E501 + SegFormers are meant to be used with the MixTransformer (MiT) encoder family, and + and use a very lightweight all-MLP decoder head. + + The MiT encoder uses a hierarchical transformer which outputs features at multiple scales, + similar to that of the hierarchical outputs typically associated with CNNs. Args: - backbone: `keras.Model`. The backbone network for the model that is + image_encoder: `keras.Model`. The backbone network for the model that is used as a feature extractor for the SegFormer encoder. It is *intended* to be used only with the MiT backbone model which was created specifically for SegFormers. It should either be a @@ -80,7 +83,7 @@ class SegFormerBackbone(Backbone): strides=[4, 2, 2, 2], ) - segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=backbone) + segformer_backbone = keras_hub.models.SegFormerBackbone(image_encoder=backbone) ``` Using the class with a preset `backbone`: @@ -89,7 +92,7 @@ class SegFormerBackbone(Backbone): import keras_hub backbone = keras_hub.models.MiTBackbone.from_preset("path_to_be_added") - segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=backbone) + segformer_backbone = keras_hub.models.SegFormerBackbone(image_encoder=backbone) ``` """ @@ -98,24 +101,24 @@ class SegFormerBackbone(Backbone): def __init__( self, - backbone, + image_encoder, projection_filters=256, **kwargs, ): - if not isinstance(backbone, keras.layers.Layer) or not isinstance( - backbone, keras.Model + if not isinstance(image_encoder, keras.layers.Layer) or not isinstance( + image_encoder, keras.Model ): raise ValueError( - "Argument `backbone` must be a `keras.layers.Layer` instance " + "Argument `image_encoder` must be a `keras.layers.Layer` instance " f" or `keras.Model`. Received instead " - f"backbone={backbone} (of type {type(backbone)})." + f"image_encoder={image_encoder} (of type {type(image_encoder)})." ) self.feature_extractor = keras.Model( - backbone.inputs, backbone.pyramid_outputs + image_encoder.inputs, image_encoder.pyramid_outputs ) - inputs = keras.layers.Input(shape=backbone.input.shape[1:]) + inputs = keras.layers.Input(shape=image_encoder.input.shape[1:]) features = self.feature_extractor(inputs) # Get H and W of level one output @@ -125,7 +128,7 @@ def __init__( self.mlp_blocks = [] - for feature_dim, feature in zip(backbone.hidden_dims, features): + for feature_dim, feature in zip(image_encoder.hidden_dims, features): self.mlp_blocks.append( keras.layers.Dense( projection_filters, name=f"linear_{feature_dim}" @@ -150,7 +153,7 @@ def __init__( # and feature map shape multi_layer_outs = [] for index, (feature_dim, feature) in enumerate( - zip(backbone.hidden_dims, features) + zip(image_encoder.hidden_dims, features) ): out = self.mlp_blocks[index](features[feature]) out = self.resizing(out) @@ -169,14 +172,16 @@ def __init__( ) self.projection_filters = projection_filters - self.backbone = backbone + self.image_encoder = image_encoder def get_config(self): config = super().get_config() config.update( { "projection_filters": self.projection_filters, - "backbone": keras.saving.serialize_keras_object(self.backbone), + "image_encoder": keras.saving.serialize_keras_object( + self.image_encoder + ), } ) return config diff --git a/keras_hub/src/models/segformer/segformer_backbone_presets.py b/keras_hub/src/models/segformer/segformer_backbone_presets.py index bd5d992f86..2cd1243de0 100644 --- a/keras_hub/src/models/segformer/segformer_backbone_presets.py +++ b/keras_hub/src/models/segformer/segformer_backbone_presets.py @@ -1,5 +1,3 @@ -# Copyright 2024 The KerasHub Authors -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 31f290e995..db43783589 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -1,5 +1,3 @@ -# Copyright 2024 The KerasHub Authors -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/keras_hub/src/models/segformer/segformer_presets.py b/keras_hub/src/models/segformer/segformer_presets.py index 07fa1e9bac..fcbfb4f9c3 100644 --- a/keras_hub/src/models/segformer/segformer_presets.py +++ b/keras_hub/src/models/segformer/segformer_presets.py @@ -1,5 +1,3 @@ -# Copyright 2024 The KerasHub Authors -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/keras_hub/src/models/segformer/segformer_tests.py b/keras_hub/src/models/segformer/segformer_tests.py index e73f3f2ecc..202d080f5b 100644 --- a/keras_hub/src/models/segformer/segformer_tests.py +++ b/keras_hub/src/models/segformer/segformer_tests.py @@ -1,5 +1,3 @@ -# Copyright 2024 The KerasHub Authors -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at From f6720ac81da6d13b8996672e035559e556844e54 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 1 Oct 2024 20:30:14 +0900 Subject: [PATCH 21/66] addressing comments --- .../models/segformer/segformer_backbone.py | 28 +++++++++---------- .../segformer/segformer_image_segmenter.py | 10 +++---- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index 01289a6aad..4c3ae28925 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -52,11 +52,12 @@ class SegFormerBackbone(Backbone): Args: image_encoder: `keras.Model`. The backbone network for the model that is used as a feature extractor for the SegFormer encoder. - It is *intended* to be used only with the MiT backbone model which - was created specifically for SegFormers. It should either be a - `keras_hub.src.models.backbone.Backbone` a model subclassing - `from keras_hub.src.models.feature_pyramid_backbone.FeaturePyramidBackbone`, - or a `keras.Model` that has a `pyramid_outputs` property which is + It is *intended* to be used only with the MiT backbone model + (`keras_hub.models.MiTBackbone`) which was created + specifically for SegFormers. + Alternatively, can be a `keras_hub.models.Backbone` a model subclassing + `keras_hub.models.FeaturePyramidBackbone`, or a `keras.Model` + that has a `pyramid_outputs` property which is a dictionary with keys "P2", "P3", "P4", and "P5" and layer names as values. num_classes: int, the number of classes for the detection model, including the background class. @@ -114,17 +115,15 @@ def __init__( f"image_encoder={image_encoder} (of type {type(image_encoder)})." ) + # === Layers === self.feature_extractor = keras.Model( image_encoder.inputs, image_encoder.pyramid_outputs ) - inputs = keras.layers.Input(shape=image_encoder.input.shape[1:]) features = self.feature_extractor(inputs) - # Get H and W of level one output - _, H, W, _ = features["P1"].shape - - # === Layers === + # Get height and width of level one output + _, height, width, _ = features["P1"].shape self.mlp_blocks = [] @@ -135,7 +134,9 @@ def __init__( ) ) - self.resizing = keras.layers.Resizing(H, W, interpolation="bilinear") + self.resizing = keras.layers.Resizing( + height, width, interpolation="bilinear" + ) self.concat = keras.layers.Concatenate(axis=3) self.linear_fuse = keras.Sequential( [ @@ -148,9 +149,8 @@ def __init__( ) # === Functional Model === - - # Project all multi-level outputs onto the same dimensionality - # and feature map shape + # Project all multi-level outputs onto + # the same dimensionality and feature map shape multi_layer_outs = [] for index, (feature_dim, feature) in enumerate( zip(image_encoder.hidden_dims, features) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index db43783589..c3497002ac 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -78,8 +78,8 @@ class SegFormerImageSegmenter(ImageSegmenter): strides=[4, 2, 2, 2], ) - segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=encoder) - segformer = keras_hub.models.SegFormerImageSegmenter(backbone=segformer_backbone, num_classes=4) + backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder) + segformer = keras_hub.models.SegFormerImageSegmenter(backbone=backbone, num_classes=4) segformer(images) ``` @@ -89,10 +89,10 @@ class SegFormerImageSegmenter(ImageSegmenter): ```python import keras_hub - encoder = keras_hub.models.MiTBackbone.from_preset("path_to_be_added") - segformer_backbone = keras_hub.models.SegFormerBackbone(backbone=encoder) + image_encoder = keras_hub.models.MiTBackbone.from_preset("path_to_be_added") + backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder) - segformer = keras_hub.models.SegFormerImageSegmenter(backbone=segformer_backbone, num_classes=4) + segformer = keras_hub.models.SegFormerImageSegmenter(backbone=backbone, num_classes=4) ``` """ From b0806f2eccefb0b3623e65225dae6ab1731b3e48 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 1 Oct 2024 20:36:54 +0900 Subject: [PATCH 22/66] addressing comments --- .../models/segformer/segformer_backbone.py | 16 +++---- .../segformer/segformer_image_segmenter.py | 45 ++++++++++--------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index 4c3ae28925..a475620f65 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -30,11 +30,7 @@ ) -@keras_hub_export( - [ - "keras_hub.models.SegFormerBackbone", - ] -) +@keras_hub_export("keras_hub.models.SegFormerBackbone") class SegFormerBackbone(Backbone): """A Keras model implementing the SegFormer architecture for semantic segmentation. @@ -84,7 +80,7 @@ class SegFormerBackbone(Backbone): strides=[4, 2, 2, 2], ) - segformer_backbone = keras_hub.models.SegFormerBackbone(image_encoder=backbone) + segformer_backbone = keras_hub.models.SegFormerBackbone(image_encoder=backbone, projection_filters=256) ``` Using the class with a preset `backbone`: @@ -93,7 +89,7 @@ class SegFormerBackbone(Backbone): import keras_hub backbone = keras_hub.models.MiTBackbone.from_preset("path_to_be_added") - segformer_backbone = keras_hub.models.SegFormerBackbone(image_encoder=backbone) + segformer_backbone = keras_hub.models.SegFormerBackbone(image_encoder=backbone, projection_filters=256) ``` """ @@ -103,7 +99,7 @@ class SegFormerBackbone(Backbone): def __init__( self, image_encoder, - projection_filters=256, + projection_filters, **kwargs, ): if not isinstance(image_encoder, keras.layers.Layer) or not isinstance( @@ -116,10 +112,11 @@ def __init__( ) # === Layers === + inputs = keras.layers.Input(shape=image_encoder.input.shape[1:]) + self.feature_extractor = keras.Model( image_encoder.inputs, image_encoder.pyramid_outputs ) - inputs = keras.layers.Input(shape=image_encoder.input.shape[1:]) features = self.feature_extractor(inputs) # Get height and width of level one output @@ -171,6 +168,7 @@ def __init__( **kwargs, ) + # === Config === self.projection_filters = projection_filters self.image_encoder = image_encoder diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index c3497002ac..779429c2b0 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -28,32 +28,38 @@ from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone -@keras_hub_export( - [ - "keras_hub.models.SegFormerImageSegmenter", - "keras_hub.models.segmentation.SegFormerImageSegmenter", - ] -) +@keras_hub_export("keras_hub.models.SegFormerImageSegmenter") class SegFormerImageSegmenter(ImageSegmenter): - """A Keras model implementing the SegFormer architecture for semantic - segmentation. + """A Keras model implementing the SegFormer architecture for semantic segmentation. - References: - - [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) # noqa: E501 - - [Based on the TensorFlow implementation from DeepVision](https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer) # noqa: E501 + This class implements the segmentation head of the SegFormer architecture described in + [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers] + (https://arxiv.org/abs/2105.15203) and [based on the TensorFlow implementation from DeepVision] + (https://github.com/DavidLandup0/deepvision/tree/main/deepvision/models/segmentation/segformer). + + SegFormers are meant to be used with the MixTransformer (MiT) encoder family, and + and use a very lightweight all-MLP decoder head. + + The MiT encoder uses a hierarchical transformer which outputs features at multiple scales, + similar to that of the hierarchical outputs typically associated with CNNs. Args: - backbone: `keras.Model`. The backbone network for the model that is + image_encoder: `keras.Model`. The backbone network for the model that is used as a feature extractor for the SegFormer encoder. - It is *intended* to be used only with the MiT backbone model which - was created specifically for SegFormers. It should either be a - `keras_hub.src.models.backbone.Backbone` or a `keras.Model`. + It is *intended* to be used only with the MiT backbone model + (`keras_hub.models.MiTBackbone`) which was created + specifically for SegFormers. + Alternatively, can be a `keras_hub.models.Backbone` a model subclassing + `keras_hub.models.FeaturePyramidBackbone`, or a `keras.Model` + that has a `pyramid_outputs` property which is + a dictionary with keys "P2", "P3", "P4", and "P5" and layer names as values. num_classes: int, the number of classes for the detection model, including the background class. projection_filters: int, number of filters in the convolution layer projecting the concatenated features into a segmentation map. Defaults to 256`. + Example: Using the class with a `backbone`: @@ -78,7 +84,7 @@ class SegFormerImageSegmenter(ImageSegmenter): strides=[4, 2, 2, 2], ) - backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder) + backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder, projection_filters=256) segformer = keras_hub.models.SegFormerImageSegmenter(backbone=backbone, num_classes=4) segformer(images) @@ -90,7 +96,7 @@ class SegFormerImageSegmenter(ImageSegmenter): import keras_hub image_encoder = keras_hub.models.MiTBackbone.from_preset("path_to_be_added") - backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder) + backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder, projection_filters=256) segformer = keras_hub.models.SegFormerImageSegmenter(backbone=backbone, num_classes=4) @@ -103,7 +109,6 @@ def __init__( self, backbone, num_classes, - projection_filters=256, **kwargs, ): if not isinstance(backbone, keras.layers.Layer) or not isinstance( @@ -115,9 +120,9 @@ def __init__( f"backbone={backbone} (of type {type(backbone)})." ) + # === Layers === inputs = backbone.input - # === Layers === self.backbone = backbone self.dropout = keras.layers.Dropout(0.1) self.output_segmentation = keras.layers.Conv2D( @@ -143,7 +148,6 @@ def __init__( # === Config === self.num_classes = num_classes - self.projection_filters = projection_filters self.backbone = backbone def get_config(self): @@ -151,7 +155,6 @@ def get_config(self): config.update( { "num_classes": self.num_classes, - "projection_filters": self.projection_filters, "backbone": keras.saving.serialize_keras_object(self.backbone), } ) From 0549be71bf7529e2c55190cee9c8b4a8351367e7 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 1 Oct 2024 21:11:55 +0900 Subject: [PATCH 23/66] update most tests --- .../models/segformer/segformer_backbone.py | 2 +- .../segformer/segformer_backbone_tests.py | 99 +++++++++++++++++++ .../segformer/segformer_image_segmenter.py | 2 +- ....py => segformer_image_segmenter_tests.py} | 48 +++------ 4 files changed, 117 insertions(+), 34 deletions(-) create mode 100644 keras_hub/src/models/segformer/segformer_backbone_tests.py rename keras_hub/src/models/segformer/{segformer_tests.py => segformer_image_segmenter_tests.py} (63%) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index a475620f65..c6f4bac7c3 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -75,7 +75,7 @@ class SegFormerBackbone(Backbone): num_layers=4, blockwise_num_heads=[1, 2, 5, 8], blockwise_sr_ratios=[8, 4, 2, 1], - end_value=0.1, + max_drop_path_rate=0.1, patch_sizes=[7, 3, 3, 3], strides=[4, 2, 2, 2], ) diff --git a/keras_hub/src/models/segformer/segformer_backbone_tests.py b/keras_hub/src/models/segformer/segformer_backbone_tests.py new file mode 100644 index 0000000000..441a144111 --- /dev/null +++ b/keras_hub/src/models/segformer/segformer_backbone_tests.py @@ -0,0 +1,99 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +from keras import ops + +from keras_hub.api.models import MiTBackbone +from keras_hub.api.models import SegFormerBackbone +from keras_hub.src.tests.test_case import TestCase + + +class SegFormerTest(TestCase): + def setUp(self): + image_encoder = MiTBackbone( + depths=[2, 2], + image_shape=(224, 224, 3), + hidden_dims=[32, 64], + num_layers=2, + blockwise_num_heads=[1, 2], + blockwise_sr_ratios=[8, 4], + max_drop_path_rate=0.1, + patch_sizes=[7, 3], + strides=[4, 2], + ) + projection_filters = 256 + self.input_size = 224 + self.input_data = ops.ones((2, self.input_size, self.input_size, 3)) + + self.init_kwargs = { + "projection_filters": projection_filters, + "image_encoder": image_encoder, + } + + def test_segformer_backbone_construction(self): + + SegFormerBackbone( + image_encoder=self.init_kwargs["image_encoder"], + projection_filters=self.init_kwargs["projection_filters"], + ) + + @pytest.mark.large + def test_segformer_call(self): + segformer_backbone = SegFormerBackbone( + image_encoder=self.init_kwargs["image_encoder"], + projection_filters=self.init_kwargs["projection_filters"], + ) + + images = np.random.uniform(size=(2, 224, 224, 3)) + segformer_output = segformer_backbone(images) + segformer_predict = segformer_backbone.predict(images) + + assert segformer_output.shape == images.shape + assert segformer_predict.shape == images.shape + + def test_backbone_basics(self): + + self.run_vision_backbone_test( + cls=SegFormerBackbone, + init_kwargs={**self.init_kwargs}, + input_data=self.input_data, + expected_output_shape=(2, 56, 56, 256), + ) + + def test_task(self): + self.run_task_test( + cls=SegFormerBackbone, + init_kwargs={**self.init_kwargs}, + train_data=self.input_data, + expected_output_shape=(2, 224, 224), + ) + + @pytest.mark.large + def test_saved_model(self): + self.run_model_saving_test( + cls=SegFormerBackbone, + init_kwargs={**self.init_kwargs}, + input_data=self.input_data, + ) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 779429c2b0..8732108c74 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -79,7 +79,7 @@ class SegFormerImageSegmenter(ImageSegmenter): num_layers=4, blockwise_num_heads=[1, 2, 5, 8], blockwise_sr_ratios=[8, 4, 2, 1], - end_value=0.1, + max_drop_path_rate=0.1, patch_sizes=[7, 3, 3, 3], strides=[4, 2, 2, 2], ) diff --git a/keras_hub/src/models/segformer/segformer_tests.py b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py similarity index 63% rename from keras_hub/src/models/segformer/segformer_tests.py rename to keras_hub/src/models/segformer/segformer_image_segmenter_tests.py index 202d080f5b..28b840866e 100644 --- a/keras_hub/src/models/segformer/segformer_tests.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py @@ -23,6 +23,7 @@ import numpy as np import pytest +from keras import ops from keras_hub.api.models import MiTBackbone from keras_hub.api.models import SegFormerBackbone @@ -31,52 +32,35 @@ class SegFormerTest(TestCase): - def test_segformer_backbone_construction(self): - backbone = MiTBackbone( + def setUp(self): + image_encoder = MiTBackbone( depths=[2, 2], image_shape=(224, 224, 3), hidden_dims=[32, 64], num_layers=2, blockwise_num_heads=[1, 2], blockwise_sr_ratios=[8, 4], - end_value=0.1, + max_drop_path_rate=0.1, patch_sizes=[7, 3], strides=[4, 2], ) - SegFormerBackbone(backbone=backbone) + projection_filters = 256 + self.backbone = SegFormerBackbone( + image_encoder=image_encoder, projection_filters=projection_filters + ) + + self.input_data = ops.ones((2, self.input_size, self.input_size, 3)) + + self.init_kwargs = {"projection_filters": projection_filters} def test_segformer_segmenter_construction(self): - backbone = MiTBackbone( - depths=[2, 2], - image_shape=(224, 224, 3), - hidden_dims=[32, 64], - num_layers=2, - blockwise_num_heads=[1, 2], - blockwise_sr_ratios=[8, 4], - end_value=0.1, - patch_sizes=[7, 3], - strides=[4, 2], - ) - segformer_backbone = SegFormerBackbone(backbone=backbone) - SegFormerImageSegmenter(backbone=segformer_backbone, num_classes=4) + SegFormerImageSegmenter(backbone=self.segformer_backbone, num_classes=4) @pytest.mark.large - def DISABLED_test_segformer_call(self): - # TODO: Test of output comparison Fails - backbone = MiTBackbone( - depths=[2, 2], - image_shape=(224, 224, 3), - hidden_dims=[32, 64], - num_layers=2, - blockwise_num_heads=[1, 2], - blockwise_sr_ratios=[8, 4], - end_value=0.1, - patch_sizes=[7, 3], - strides=[4, 2], - ) - segformer_backbone = SegFormerBackbone(backbone=backbone) + def test_segformer_call(self): + segformer = SegFormerImageSegmenter( - backbone=segformer_backbone, num_classes=4 + backbone=self.backbone, num_classes=4 ) images = np.random.uniform(size=(2, 224, 224, 3)) From 4f66776fa608d1a381a62e2a03174b84c350d963 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 2 Oct 2024 17:04:48 +0900 Subject: [PATCH 24/66] add remaining tests --- .../segformer/segformer_backbone_tests.py | 4 ++-- .../segformer_image_segmenter_tests.py | 21 +++++++++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone_tests.py b/keras_hub/src/models/segformer/segformer_backbone_tests.py index 441a144111..f79695c26a 100644 --- a/keras_hub/src/models/segformer/segformer_backbone_tests.py +++ b/keras_hub/src/models/segformer/segformer_backbone_tests.py @@ -66,7 +66,7 @@ def test_segformer_call(self): projection_filters=self.init_kwargs["projection_filters"], ) - images = np.random.uniform(size=(2, 224, 224, 3)) + images = np.random.uniform(size=(2, 56, 56, 256)) segformer_output = segformer_backbone(images) segformer_predict = segformer_backbone.predict(images) @@ -87,7 +87,7 @@ def test_task(self): cls=SegFormerBackbone, init_kwargs={**self.init_kwargs}, train_data=self.input_data, - expected_output_shape=(2, 224, 224), + expected_output_shape=(2, 56, 56, 256), ) @pytest.mark.large diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py index 28b840866e..80fa618a97 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py @@ -49,12 +49,13 @@ def setUp(self): image_encoder=image_encoder, projection_filters=projection_filters ) + self.input_size = 224 self.input_data = ops.ones((2, self.input_size, self.input_size, 3)) - self.init_kwargs = {"projection_filters": projection_filters} + self.init_kwargs = {"backbone": self.backbone, "num_classes": 4} def test_segformer_segmenter_construction(self): - SegFormerImageSegmenter(backbone=self.segformer_backbone, num_classes=4) + SegFormerImageSegmenter(backbone=self.backbone, num_classes=4) @pytest.mark.large def test_segformer_call(self): @@ -69,3 +70,19 @@ def test_segformer_call(self): assert segformer_output.shape == images.shape assert segformer_predict.shape == images.shape + + def test_task(self): + self.run_task_test( + cls=SegFormerImageSegmenter, + init_kwargs={**self.init_kwargs}, + train_data=self.input_data, + expected_output_shape=(2, 224, 224), + ) + + @pytest.mark.large + def test_saved_model(self): + self.run_model_saving_test( + cls=SegFormerImageSegmenter, + init_kwargs={**self.init_kwargs}, + input_data=self.input_data, + ) From f0b3e5665497476f4eb4b94418239d94eec49990 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 2 Oct 2024 17:14:05 +0900 Subject: [PATCH 25/66] remove copyright --- .../src/models/mix_transformer/mix_transformer_backbone.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py b/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py index bc089b689f..bb60fd850a 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py @@ -1,5 +1,3 @@ -# Copyright 2024 The KerasHub Authors -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at From 8c36b6e97664c453c67f94295ee5de6502a0183a Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 2 Oct 2024 17:21:47 +0900 Subject: [PATCH 26/66] fix test --- keras_hub/src/models/segformer/segformer_backbone_tests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone_tests.py b/keras_hub/src/models/segformer/segformer_backbone_tests.py index f79695c26a..75f1a8d1c4 100644 --- a/keras_hub/src/models/segformer/segformer_backbone_tests.py +++ b/keras_hub/src/models/segformer/segformer_backbone_tests.py @@ -66,12 +66,12 @@ def test_segformer_call(self): projection_filters=self.init_kwargs["projection_filters"], ) - images = np.random.uniform(size=(2, 56, 56, 256)) + images = np.random.uniform(size=(2, 224, 224, 3)) segformer_output = segformer_backbone(images) segformer_predict = segformer_backbone.predict(images) - assert segformer_output.shape == images.shape - assert segformer_predict.shape == images.shape + assert segformer_output.shape == (2, 56, 56, 256) + assert segformer_predict.shape == (2, 56, 56, 256) def test_backbone_basics(self): From 9e1a9d6e43f312667c70d25161070f70668d8338 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 2 Oct 2024 17:31:37 +0900 Subject: [PATCH 27/66] override from_config --- keras_hub/src/models/segformer/segformer_backbone.py | 10 ++++++++++ .../src/models/segformer/segformer_image_segmenter.py | 10 ++++++++++ .../segformer/segformer_image_segmenter_tests.py | 2 +- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index c6f4bac7c3..dd45da8648 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -183,3 +183,13 @@ def get_config(self): } ) return config + + @classmethod + def from_config(cls, config): + if "image_encoder" in config and isinstance( + config["image_encoder"], dict + ): + config["image_encoder"] = keras.layers.deserialize( + config["image_encoder"] + ) + return super().from_config(config) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 8732108c74..c260b3b9c1 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -159,3 +159,13 @@ def get_config(self): } ) return config + + @classmethod + def from_config(cls, config): + if "image_encoder" in config and isinstance( + config["image_encoder"], dict + ): + config["image_encoder"] = keras.layers.deserialize( + config["image_encoder"] + ) + return super().from_config(config) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py index 80fa618a97..bc6dc4265e 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py @@ -64,7 +64,7 @@ def test_segformer_call(self): backbone=self.backbone, num_classes=4 ) - images = np.random.uniform(size=(2, 224, 224, 3)) + images = np.random.uniform(size=(2, 224, 224, 4)) segformer_output = segformer(images) segformer_predict = segformer.predict(images) From 0c92729bf888a400e621ecab575e53246235abb5 Mon Sep 17 00:00:00 2001 From: David Landup Date: Mon, 7 Oct 2024 15:45:12 +0900 Subject: [PATCH 28/66] fix op in overlapping patching and embedding, start adding conversion utils --- .../mix_transformer/mix_transformer_layers.py | 4 +- .../convert_mix_transformer.py | 128 ++++++++++++++++++ 2 files changed, 129 insertions(+), 3 deletions(-) create mode 100644 tools/checkpoint_conversion/convert_mix_transformer.py diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py index 42402da7ea..1446e0e382 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py @@ -38,9 +38,7 @@ def __init__(self, project_dim=32, patch_size=7, stride=4, **kwargs): def call(self, x): x = self.proj(x) - # B, H, W, C - shape = x.shape - x = ops.reshape(x, (-1, shape[1] * shape[2], shape[3])) + x = ops.reshape(x, (-1, x.shape[1] * x.shape[2], x.shape[3])) x = self.norm(x) return x diff --git a/tools/checkpoint_conversion/convert_mix_transformer.py b/tools/checkpoint_conversion/convert_mix_transformer.py new file mode 100644 index 0000000000..0c9120fbb0 --- /dev/null +++ b/tools/checkpoint_conversion/convert_mix_transformer.py @@ -0,0 +1,128 @@ +from transformers import SegformerForSemanticSegmentation + +import keras_hub + +DOWNLOAD_URL = "nvidia/segformer-b0-finetuned-ade-512-512" + + +def set_conv_weights(conv_layer, state_dict): + conv_weights = state_dict["weight"].numpy().transpose(2, 3, 1, 0) + conv_bias = state_dict["bias"].numpy() + conv_layer.set_weights([conv_weights, conv_bias]) + + +def set_dwconv_weights(conv_layer, state_dict): + conv_weights = state_dict["dwconv.weight"].numpy().transpose(2, 3, 0, 1) + conv_bias = state_dict["dwconv.bias"].numpy() + conv_layer.set_weights([conv_weights, conv_bias]) + + +def set_layer_norm_weights(layer_norm, state_dict): + gamma = state_dict["weight"].numpy() + beta = state_dict["bias"].numpy() + layer_norm.set_weights([gamma, beta]) + + +def set_dense_weights(dense_layer, state_dict): + weight = state_dict["weight"].numpy().T + bias = state_dict["bias"].numpy() + dense_layer.set_weights([weight, bias]) + + +def set_hierarchical_encoder_weights(keras_layer, pytorch_layer, key): + + set_layer_norm_weights( + keras_layer.norm1, pytorch_layer.layer_norm_1.state_dict() + ) + + set_dense_weights( + keras_layer.attn.q, pytorch_layer.attention.self.query.state_dict() + ) + set_dense_weights( + keras_layer.attn.k, pytorch_layer.attention.self.key.state_dict() + ) + set_dense_weights( + keras_layer.attn.v, pytorch_layer.attention.self.value.state_dict() + ) + set_dense_weights( + keras_layer.attn.proj, pytorch_layer.attention.output.dense.state_dict() + ) + + if keras_layer.attn.sr_ratio > 1: + set_conv_weights( + keras_layer.attn.sr, pytorch_layer.attention.self.sr.state_dict() + ) + set_layer_norm_weights( + keras_layer.attn.norm, + pytorch_layer.attention.self.layer_norm.state_dict(), + ) + + set_layer_norm_weights( + keras_layer.norm2, pytorch_layer.layer_norm_2.state_dict() + ) + + set_dense_weights( + keras_layer.mlp.fc1, pytorch_layer.mlp.dense1.state_dict() + ) + set_dwconv_weights( + keras_layer.mlp.dwconv, pytorch_layer.mlp.dwconv.state_dict() + ) + set_dense_weights( + keras_layer.mlp.fc2, pytorch_layer.mlp.dense2.state_dict() + ) + + +def main(): + model = SegformerForSemanticSegmentation.from_pretrained(DOWNLOAD_URL) + original_mit = original_mit = model.segformer.encoder + + keras_mit = keras_hub.models.MiTBackbone( + depths=[2, 2, 2, 2], + image_shape=(224, 224, 3), + hidden_dims=[32, 64, 160, 256], + num_layers=4, + blockwise_num_heads=[1, 2, 5, 8], + blockwise_sr_ratios=[8, 4, 2, 1], + max_drop_path_rate=0.1, + patch_sizes=[7, 3, 3, 3], + strides=[4, 2, 2, 2], + ) + + # Indices for the different patch embeddings and layer norms + proj_indices = [1, 6, 11, 16] + layer_norm_indices = [4, 9, 14, 19] + hierarchical_encoder_indices = [ + (2, 0, 0), + (3, 0, 1), + (7, 1, 0), + (8, 1, 1), + (12, 2, 0), + (13, 2, 1), + (17, 3, 0), + (18, 3, 1), + ] + + # Loop through the indices to set convolutional and normalization weights + for i, idx in enumerate(proj_indices): + set_conv_weights( + keras_mit.layers[idx].proj, + original_mit.patch_embeddings[i].proj.state_dict(), + ) + set_layer_norm_weights( + keras_mit.layers[idx].norm, + original_mit.patch_embeddings[i].layer_norm.state_dict(), + ) + + # Set layer normalization weights + for i, idx in enumerate(layer_norm_indices): + set_layer_norm_weights( + keras_mit.layers[idx], original_mit.layer_norm[i].state_dict() + ) + + # Set hierarchical encoder weights + for layer_idx, block_idx, key in hierarchical_encoder_indices: + set_hierarchical_encoder_weights( + keras_mit.layers[layer_idx], + original_mit.block[block_idx][int(key)], + key=key, + ) From 6638cb1c16dc133cd1c63b7e4b1997e049838314 Mon Sep 17 00:00:00 2001 From: David Landup Date: Mon, 7 Oct 2024 15:55:58 +0900 Subject: [PATCH 29/66] style --- keras_hub/src/models/mix_transformer/mix_transformer_layers.py | 2 +- tools/checkpoint_conversion/convert_mix_transformer.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py index 1446e0e382..1586bb2629 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py @@ -34,7 +34,7 @@ def __init__(self, project_dim=32, patch_size=7, stride=4, **kwargs): strides=stride, padding="same", ) - self.norm = keras.layers.LayerNormalization() + self.norm = keras.layers.LayerNormalization(epsilon=1e-5) def call(self, x): x = self.proj(x) diff --git a/tools/checkpoint_conversion/convert_mix_transformer.py b/tools/checkpoint_conversion/convert_mix_transformer.py index 0c9120fbb0..97ccb0e1b5 100644 --- a/tools/checkpoint_conversion/convert_mix_transformer.py +++ b/tools/checkpoint_conversion/convert_mix_transformer.py @@ -126,3 +126,5 @@ def main(): original_mit.block[block_idx][int(key)], key=key, ) + + keras_mit.save("mit.keras") From 9a3f82d52940c48b038b5c2d06c18ad11f0130c5 Mon Sep 17 00:00:00 2001 From: David Landup Date: Mon, 7 Oct 2024 20:03:46 +0900 Subject: [PATCH 30/66] add padding to MiT patchingandembedding --- .../src/models/mix_transformer/mix_transformer_layers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py index 1586bb2629..fc5180ca90 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py @@ -28,15 +28,21 @@ def __init__(self, project_dim=32, patch_size=7, stride=4, **kwargs): self.patch_size = patch_size self.stride = stride + padding_size = self.patch_size // 2 + + self.padding = keras.layers.ZeroPadding2D( + padding=(padding_size, padding_size) + ) self.proj = keras.layers.Conv2D( filters=project_dim, kernel_size=patch_size, strides=stride, - padding="same", + padding="valid", ) self.norm = keras.layers.LayerNormalization(epsilon=1e-5) def call(self, x): + x = self.padding(x) x = self.proj(x) x = ops.reshape(x, (-1, x.shape[1] * x.shape[2], x.shape[3])) x = self.norm(x) From 76a6dd2e6f8d2f23f7b199ee73b71b62ceef259e Mon Sep 17 00:00:00 2001 From: David Landup Date: Mon, 7 Oct 2024 20:22:23 +0900 Subject: [PATCH 31/66] update to support other presets --- .../convert_mix_transformer.py | 81 +++++++++++++++---- 1 file changed, 64 insertions(+), 17 deletions(-) diff --git a/tools/checkpoint_conversion/convert_mix_transformer.py b/tools/checkpoint_conversion/convert_mix_transformer.py index 97ccb0e1b5..4b0c2ebe9b 100644 --- a/tools/checkpoint_conversion/convert_mix_transformer.py +++ b/tools/checkpoint_conversion/convert_mix_transformer.py @@ -1,8 +1,62 @@ +from absl import flags from transformers import SegformerForSemanticSegmentation import keras_hub -DOWNLOAD_URL = "nvidia/segformer-b0-finetuned-ade-512-512" +FLAGS = flags.FLAGS + + +DOWNLOAD_URLS = { + "B0": "nvidia/segformer-b0-finetuned-ade-512-512", + "B1": "nvidia/segformer-b1-finetuned-ade-512-512", + "B2": "nvidia/segformer-b2-finetuned-ade-512-512", + "B3": "nvidia/segformer-b3-finetuned-ade-512-512", + "B4": "nvidia/segformer-b4-finetuned-ade-512-512", + "B5": "nvidia/segformer-b5-finetuned-ade-512-512", +} + + +MODEL_CONFIGS = { + "B0": {"hidden_dims": [32, 64, 160, 256], "depths": [2, 2, 2, 2]}, + "B1": {"hidden_dims": [64, 128, 320, 512], "depths": [2, 2, 2, 2]}, + "B2": {"hidden_dims": [64, 128, 320, 512], "depths": [3, 4, 6, 3]}, + "B3": {"hidden_dims": [64, 128, 320, 512], "depths": [3, 4, 18, 3]}, + "B4": {"hidden_dims": [64, 128, 320, 512], "depths": [3, 8, 27, 3]}, + "B5": {"hidden_dims": [64, 128, 320, 512], "depths": [3, 6, 40, 3]}, +} + +flags.DEFINE_string( + "preset", None, f'Must be one of {",".join(DOWNLOAD_URLS.keys())}' +) + + +# Function to dynamically generate indices based on the preset depth +def get_indices_from_depths(depths): + proj_indices = [] + layer_norm_indices = [] + hierarchical_encoder_indices = [] + + current_index = 1 # This will track the layer index for keras_mit.layers + + # Loop through the depth of each stage (depths of block layers for each stage) + for stage_idx, depth in enumerate(depths): + # Patch embedding for each stage + proj_indices.append(current_index) + layer_norm_indices.append( + current_index + 3 + ) # LayerNorm appears 3 layers after the proj layer + + # Hierarchical encoder blocks + for block_idx in range(depth): + hierarchical_encoder_indices.append( + (current_index + 1 + block_idx * 5, stage_idx, block_idx) + ) + + current_index += ( + 5 * depth + ) # Each block takes 5 layers in Keras implementation + + return proj_indices, layer_norm_indices, hierarchical_encoder_indices def set_conv_weights(conv_layer, state_dict): @@ -73,13 +127,15 @@ def set_hierarchical_encoder_weights(keras_layer, pytorch_layer, key): def main(): - model = SegformerForSemanticSegmentation.from_pretrained(DOWNLOAD_URL) + model = SegformerForSemanticSegmentation.from_pretrained( + DOWNLOAD_URLS[FLAGS.preset] + ) original_mit = original_mit = model.segformer.encoder keras_mit = keras_hub.models.MiTBackbone( - depths=[2, 2, 2, 2], + depths=MODEL_CONFIGS[FLAGS.preset]["depths"], image_shape=(224, 224, 3), - hidden_dims=[32, 64, 160, 256], + hidden_dims=MODEL_CONFIGS[FLAGS.preset]["hidden_dims"], num_layers=4, blockwise_num_heads=[1, 2, 5, 8], blockwise_sr_ratios=[8, 4, 2, 1], @@ -89,18 +145,9 @@ def main(): ) # Indices for the different patch embeddings and layer norms - proj_indices = [1, 6, 11, 16] - layer_norm_indices = [4, 9, 14, 19] - hierarchical_encoder_indices = [ - (2, 0, 0), - (3, 0, 1), - (7, 1, 0), - (8, 1, 1), - (12, 2, 0), - (13, 2, 1), - (17, 3, 0), - (18, 3, 1), - ] + proj_indices, layer_norm_indices, hierarchical_encoder_indices = ( + get_indices_from_depths(MODEL_CONFIGS[FLAGS.preset]["depths"]) + ) # Loop through the indices to set convolutional and normalization weights for i, idx in enumerate(proj_indices): @@ -127,4 +174,4 @@ def main(): key=key, ) - keras_mit.save("mit.keras") + keras_mit.save(f"mit_{FLAGS.preset}.keras") From 7b06c7968ab6fa4c61532e855d7184b1ebe462ea Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 09:44:13 +0900 Subject: [PATCH 32/66] update conversin script --- .../convert_mix_transformer.py | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/tools/checkpoint_conversion/convert_mix_transformer.py b/tools/checkpoint_conversion/convert_mix_transformer.py index 4b0c2ebe9b..b946526166 100644 --- a/tools/checkpoint_conversion/convert_mix_transformer.py +++ b/tools/checkpoint_conversion/convert_mix_transformer.py @@ -1,3 +1,7 @@ +# Usage example +# python tools/checkpoint_conversion/convert_mix_transformer.py --preset "B0" + +from absl import app from absl import flags from transformers import SegformerForSemanticSegmentation @@ -30,33 +34,31 @@ ) -# Function to dynamically generate indices based on the preset depth def get_indices_from_depths(depths): proj_indices = [] - layer_norm_indices = [] + norm_indices = [] hierarchical_encoder_indices = [] - current_index = 1 # This will track the layer index for keras_mit.layers + current_layer_idx = 1 - # Loop through the depth of each stage (depths of block layers for each stage) - for stage_idx, depth in enumerate(depths): - # Patch embedding for each stage - proj_indices.append(current_index) - layer_norm_indices.append( - current_index + 3 - ) # LayerNorm appears 3 layers after the proj layer + for layer_idx, depth in enumerate(depths): + # Add projection index (before the hierarchical encoders) + proj_indices.append(current_layer_idx) - # Hierarchical encoder blocks + # Hierarchical encoder block indices for block_idx in range(depth): hierarchical_encoder_indices.append( - (current_index + 1 + block_idx * 5, stage_idx, block_idx) + (current_layer_idx + 1, layer_idx, block_idx) ) + current_layer_idx += 1 + + # Add normalization index (after the hierarchical encoders) + norm_indices.append(current_layer_idx + 1) - current_index += ( - 5 * depth - ) # Each block takes 5 layers in Keras implementation + # Skip to the next layer after output_level + current_layer_idx += 3 - return proj_indices, layer_norm_indices, hierarchical_encoder_indices + return proj_indices, norm_indices, hierarchical_encoder_indices def set_conv_weights(conv_layer, state_dict): @@ -126,12 +128,14 @@ def set_hierarchical_encoder_weights(keras_layer, pytorch_layer, key): ) -def main(): +def main(_): + print("\n-> Loading HuggingFace model") model = SegformerForSemanticSegmentation.from_pretrained( DOWNLOAD_URLS[FLAGS.preset] ) original_mit = original_mit = model.segformer.encoder + print("\n-> Instantiating KerasHub Model") keras_mit = keras_hub.models.MiTBackbone( depths=MODEL_CONFIGS[FLAGS.preset]["depths"], image_shape=(224, 224, 3), @@ -149,6 +153,7 @@ def main(): get_indices_from_depths(MODEL_CONFIGS[FLAGS.preset]["depths"]) ) + print("\n-> Converting weights...") # Loop through the indices to set convolutional and normalization weights for i, idx in enumerate(proj_indices): set_conv_weights( @@ -174,4 +179,11 @@ def main(): key=key, ) - keras_mit.save(f"mit_{FLAGS.preset}.keras") + save_filepath = f"mit_{FLAGS.preset}.keras" + print(f"\n-> Saving converted KerasHub model in {save_filepath}") + keras_mit.save(save_filepath) + + +if __name__ == "__main__": + flags.mark_flag_as_required("preset") + app.run(main) From 6e9728fe83e0e8e10f0a67e9cadb745bac811f3e Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 09:47:44 +0900 Subject: [PATCH 33/66] fix link for b5 --- tools/checkpoint_conversion/convert_mix_transformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/checkpoint_conversion/convert_mix_transformer.py b/tools/checkpoint_conversion/convert_mix_transformer.py index b946526166..2982bdabc8 100644 --- a/tools/checkpoint_conversion/convert_mix_transformer.py +++ b/tools/checkpoint_conversion/convert_mix_transformer.py @@ -16,7 +16,7 @@ "B2": "nvidia/segformer-b2-finetuned-ade-512-512", "B3": "nvidia/segformer-b3-finetuned-ade-512-512", "B4": "nvidia/segformer-b4-finetuned-ade-512-512", - "B5": "nvidia/segformer-b5-finetuned-ade-512-512", + "B5": "nvidia/segformer-b5-finetuned-ade-640-640", } From 71190226a92e38ecb8233dc8874dc0a6af8a8e86 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 09:52:43 +0900 Subject: [PATCH 34/66] add cityscapes weights --- .../convert_mix_transformer.py | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/checkpoint_conversion/convert_mix_transformer.py b/tools/checkpoint_conversion/convert_mix_transformer.py index 2982bdabc8..754663d44f 100644 --- a/tools/checkpoint_conversion/convert_mix_transformer.py +++ b/tools/checkpoint_conversion/convert_mix_transformer.py @@ -1,5 +1,5 @@ # Usage example -# python tools/checkpoint_conversion/convert_mix_transformer.py --preset "B0" +# python tools/checkpoint_conversion/convert_mix_transformer.py --preset "B0_ade_512" from absl import app from absl import flags @@ -11,12 +11,18 @@ DOWNLOAD_URLS = { - "B0": "nvidia/segformer-b0-finetuned-ade-512-512", - "B1": "nvidia/segformer-b1-finetuned-ade-512-512", - "B2": "nvidia/segformer-b2-finetuned-ade-512-512", - "B3": "nvidia/segformer-b3-finetuned-ade-512-512", - "B4": "nvidia/segformer-b4-finetuned-ade-512-512", - "B5": "nvidia/segformer-b5-finetuned-ade-640-640", + "B0_ade_512": "nvidia/segformer-b0-finetuned-ade-512-512", + "B1_ade_512": "nvidia/segformer-b1-finetuned-ade-512-512", + "B2_ade_512": "nvidia/segformer-b2-finetuned-ade-512-512", + "B3_ade_512": "nvidia/segformer-b3-finetuned-ade-512-512", + "B4_ade_512": "nvidia/segformer-b4-finetuned-ade-512-512", + "B5_ade_640": "nvidia/segformer-b5-finetuned-ade-640-640", + "B0_cityscapes_1024": "nvidia/segformer-b0-finetuned-cityscapes-1024-1024", + "B1_cityscapes_1024": "nvidia/segformer-b1-finetuned-cityscapes-1024-1024", + "B2_cityscapes_1024": "nvidia/segformer-b2-finetuned-cityscapes-1024-1024", + "B3_cityscapes_1024": "nvidia/segformer-b3-finetuned-cityscapes-1024-1024", + "B4_cityscapes_1024": "nvidia/segformer-b4-finetuned-cityscapes-1024-1024", + "B5_cityscapes_1024": "nvidia/segformer-b5-finetuned-cityscapes-1024-1024", } @@ -135,11 +141,12 @@ def main(_): ) original_mit = original_mit = model.segformer.encoder + model_type = FLAGS.preset.split("_")[0] print("\n-> Instantiating KerasHub Model") keras_mit = keras_hub.models.MiTBackbone( - depths=MODEL_CONFIGS[FLAGS.preset]["depths"], + depths=MODEL_CONFIGS[model_type]["depths"], image_shape=(224, 224, 3), - hidden_dims=MODEL_CONFIGS[FLAGS.preset]["hidden_dims"], + hidden_dims=MODEL_CONFIGS[model_type]["hidden_dims"], num_layers=4, blockwise_num_heads=[1, 2, 5, 8], blockwise_sr_ratios=[8, 4, 2, 1], @@ -150,7 +157,7 @@ def main(_): # Indices for the different patch embeddings and layer norms proj_indices, layer_norm_indices, hierarchical_encoder_indices = ( - get_indices_from_depths(MODEL_CONFIGS[FLAGS.preset]["depths"]) + get_indices_from_depths(MODEL_CONFIGS[model_type]["depths"]) ) print("\n-> Converting weights...") From 8ea5f63a6ae6270861742569c21396924491adab Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 09:54:51 +0900 Subject: [PATCH 35/66] update presets --- .../mix_transformer_presets.py | 77 +++++++++++++++---- 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py index 5e5e504cee..96eab04088 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py @@ -13,8 +13,8 @@ # limitations under the License. """MiT model preset configurations.""" -backbone_presets_no_weights = { - "mit_b0": { +backbone_presets_with_weights = { + "mit_b0_ade20k_512": { "metadata": { "description": ( "MiT (MixTransformer) model with 8 transformer blocks." @@ -25,7 +25,7 @@ }, "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b0", # Not uploaded yet }, - "mit_b1": { + "mit_b1_ade20k_512": { "metadata": { "description": ( "MiT (MixTransformer) model with 8 transformer blocks." @@ -36,7 +36,7 @@ }, "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b1", # Not uploaded yet }, - "mit_b2": { + "mit_b2_ade20k_512": { "metadata": { "description": ( "MiT (MixTransformer) model with 16 transformer blocks." @@ -47,7 +47,7 @@ }, "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b2", # Not uploaded yet }, - "mit_b3": { + "mit_b3_ade20k_512": { "metadata": { "description": ( "MiT (MixTransformer) model with 28 transformer blocks." @@ -58,7 +58,7 @@ }, "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b3", # Not uploaded yet }, - "mit_b4": { + "mit_b4_ade20k_512": { "metadata": { "description": ( "MiT (MixTransformer) model with 41 transformer blocks." @@ -69,7 +69,7 @@ }, "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b4", # Not uploaded yet }, - "mit_b5": { + "mit_b5_ade20k_512": { "metadata": { "description": ( "MiT (MixTransformer) model with 52 transformer blocks." @@ -80,13 +80,10 @@ }, "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b5", # Not uploaded yet }, -} - -backbone_presets_with_weights = { - "mit_b0_imagenet": { + "mit_b0_cityscapes_1024": { "metadata": { "description": ( - "MiT (MixTransformer) model with 8 transformer blocks. Pre-trained on ImageNet-1K and scores 69% top-1 accuracy on the validation set." # noqa: E501 + "MiT (MixTransformer) model with 8 transformer blocks." ), "params": 3321962, "official_name": "MiT", @@ -94,9 +91,63 @@ }, "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b0", # Not uploaded yet }, + "mit_b1_cityscapes_1024": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 8 transformer blocks." + ), + "params": 13156554, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b1", # Not uploaded yet + }, + "mit_b2_cityscapes_1024": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 16 transformer blocks." + ), + "params": 24201418, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b2", # Not uploaded yet + }, + "mit_b3_cityscapes_1024": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 28 transformer blocks." + ), + "params": 44077258, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b3", # Not uploaded yet + }, + "mit_b4_cityscapes_1024": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 41 transformer blocks." + ), + "params": 60847818, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b4", # Not uploaded yet + }, + "mit_b5_cityscapes_1024": { + "metadata": { + "description": ( + "MiT (MixTransformer) model with 52 transformer blocks." + ), + "params": 81448138, + "official_name": "MiT", + "path": "mit", + }, + "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b5", # Not uploaded yet + }, } backbone_presets = { - **backbone_presets_no_weights, **backbone_presets_with_weights, } From 0705748fc80d2b68a953a5039ea3972ec815bdf1 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 10:25:40 +0900 Subject: [PATCH 36/66] update presets --- .../mix_transformer_presets.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py index 96eab04088..63daf19094 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py @@ -23,7 +23,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b0", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b0_ade_512", }, "mit_b1_ade20k_512": { "metadata": { @@ -34,7 +34,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b1", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b1_ade_512", }, "mit_b2_ade20k_512": { "metadata": { @@ -45,7 +45,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b2", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b2_ade_512", }, "mit_b3_ade20k_512": { "metadata": { @@ -56,7 +56,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b3", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b3_ade_512", }, "mit_b4_ade20k_512": { "metadata": { @@ -67,9 +67,9 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b4", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b4_ade_512", }, - "mit_b5_ade20k_512": { + "mit_b5_ade20k_640": { "metadata": { "description": ( "MiT (MixTransformer) model with 52 transformer blocks." @@ -78,7 +78,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b5", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b5_ade_512", }, "mit_b0_cityscapes_1024": { "metadata": { @@ -89,7 +89,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b0", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b0_cityscapes_1024", }, "mit_b1_cityscapes_1024": { "metadata": { @@ -100,7 +100,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b1", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b1_cityscapes_1024", }, "mit_b2_cityscapes_1024": { "metadata": { @@ -111,7 +111,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b2", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b2_cityscapes_1024", }, "mit_b3_cityscapes_1024": { "metadata": { @@ -122,7 +122,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b3", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b3_cityscapes_1024", }, "mit_b4_cityscapes_1024": { "metadata": { @@ -133,7 +133,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b4", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b4_cityscapes_1024", }, "mit_b5_cityscapes_1024": { "metadata": { @@ -144,7 +144,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mit/keras/mit_b5", # Not uploaded yet + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b5_cityscapes_1024", }, } From eb1c2361b6363f0880a0c6e36f4d76ed032e3b6e Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 11:01:33 +0900 Subject: [PATCH 37/66] update conversion script to make directories --- .../mix_transformer/mix_transformer_presets.py | 12 ++++++------ .../checkpoint_conversion/convert_mix_transformer.py | 7 ++++++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py index 63daf19094..3e3db15117 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py @@ -23,7 +23,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b0_ade_512", + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b0_ade20k_512", }, "mit_b1_ade20k_512": { "metadata": { @@ -34,7 +34,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b1_ade_512", + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b1_ade20k_512", }, "mit_b2_ade20k_512": { "metadata": { @@ -45,7 +45,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b2_ade_512", + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b2_ade20k_512", }, "mit_b3_ade20k_512": { "metadata": { @@ -56,7 +56,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b3_ade_512", + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b3_ade20k_512", }, "mit_b4_ade20k_512": { "metadata": { @@ -67,7 +67,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b4_ade_512", + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b4_ade20k_512", }, "mit_b5_ade20k_640": { "metadata": { @@ -78,7 +78,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b5_ade_512", + "kaggle_handle": "kaggle://kerashub/mix-transformer/keras/mit_b5_ade20k_512", }, "mit_b0_cityscapes_1024": { "metadata": { diff --git a/tools/checkpoint_conversion/convert_mix_transformer.py b/tools/checkpoint_conversion/convert_mix_transformer.py index 754663d44f..f8cbc3a9cf 100644 --- a/tools/checkpoint_conversion/convert_mix_transformer.py +++ b/tools/checkpoint_conversion/convert_mix_transformer.py @@ -1,6 +1,8 @@ # Usage example # python tools/checkpoint_conversion/convert_mix_transformer.py --preset "B0_ade_512" +import os + from absl import app from absl import flags from transformers import SegformerForSemanticSegmentation @@ -186,7 +188,10 @@ def main(_): key=key, ) - save_filepath = f"mit_{FLAGS.preset}.keras" + directory = f"MiT_{model_type}" + os.makedirs(directory, exist_ok=True) + + save_filepath = os.path.join(directory, f"model.weights.h5") print(f"\n-> Saving converted KerasHub model in {save_filepath}") keras_mit.save(save_filepath) From fc4259827941eacf85902fe9ee889d6991beb9fa Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 17:56:43 +0900 Subject: [PATCH 38/66] use save_preset --- tools/checkpoint_conversion/convert_mix_transformer.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tools/checkpoint_conversion/convert_mix_transformer.py b/tools/checkpoint_conversion/convert_mix_transformer.py index f8cbc3a9cf..3cd2bf9236 100644 --- a/tools/checkpoint_conversion/convert_mix_transformer.py +++ b/tools/checkpoint_conversion/convert_mix_transformer.py @@ -1,8 +1,6 @@ # Usage example # python tools/checkpoint_conversion/convert_mix_transformer.py --preset "B0_ade_512" -import os - from absl import app from absl import flags from transformers import SegformerForSemanticSegmentation @@ -189,11 +187,8 @@ def main(_): ) directory = f"MiT_{model_type}" - os.makedirs(directory, exist_ok=True) - - save_filepath = os.path.join(directory, f"model.weights.h5") - print(f"\n-> Saving converted KerasHub model in {save_filepath}") - keras_mit.save(save_filepath) + print(f"\n-> Saving converted KerasHub model in {directory}") + keras_mit.save_to_preset(directory) if __name__ == "__main__": From 4274c60b16568909467d99aeb269ff8d5c3bb63a Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 18:17:14 +0900 Subject: [PATCH 39/66] change name of output dir --- keras_hub/src/utils/preset_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/keras_hub/src/utils/preset_utils.py b/keras_hub/src/utils/preset_utils.py index 65af19df7f..30d48a2834 100644 --- a/keras_hub/src/utils/preset_utils.py +++ b/keras_hub/src/utils/preset_utils.py @@ -155,7 +155,9 @@ def get_file(preset, path): f"version). Received: preset={preset}" ) try: - return kagglehub.model_download(kaggle_handle, path) + return kagglehub.model_download( + kaggle_handle, path, force_download=True + ) except KaggleApiHTTPError as e: message = str(e) if message.find("403 Client Error"): From dc72ea7873fc86797d20b253b3d42f8ec9b1b427 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 18:32:16 +0900 Subject: [PATCH 40/66] add preprocessor flow --- .../mix_transformer_classifier_preprocessor.py | 16 ++++++++++++++++ .../mix_transformer_image_converter.py | 10 ++++++++++ .../convert_mix_transformer.py | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 keras_hub/src/models/mix_transformer/mix_transformer_classifier_preprocessor.py create mode 100644 keras_hub/src/models/mix_transformer/mix_transformer_image_converter.py diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_classifier_preprocessor.py b/keras_hub/src/models/mix_transformer/mix_transformer_classifier_preprocessor.py new file mode 100644 index 0000000000..61c994c5fb --- /dev/null +++ b/keras_hub/src/models/mix_transformer/mix_transformer_classifier_preprocessor.py @@ -0,0 +1,16 @@ +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.image_classifier_preprocessor import ( + ImageClassifierPreprocessor, +) +from keras_hub.src.models.mix_transformer.mix_transformer_backbone import ( + MiTBackbone, +) +from keras_hub.src.models.mix_transformer.mix_transformer_image_converter import ( + MiTImageConverter, +) + + +@keras_hub_export("keras_hub.models.MiTImageClassifierPreprocessor") +class MiTImageClassifierPreprocessor(ImageClassifierPreprocessor): + backbone_cls = MiTBackbone + image_converter_cls = MiTImageConverter diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_image_converter.py b/keras_hub/src/models/mix_transformer/mix_transformer_image_converter.py new file mode 100644 index 0000000000..d5c8f1f70d --- /dev/null +++ b/keras_hub/src/models/mix_transformer/mix_transformer_image_converter.py @@ -0,0 +1,10 @@ +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.layers.preprocessing.resizing_image_converter import ( + ResizingImageConverter, +) +from keras_hub.src.models.mix_transformer import MiTBackbone + + +@keras_hub_export("keras_hub.layers.MiTImageConverter") +class MiTImageConverter(ResizingImageConverter): + backbone_cls = MiTBackbone diff --git a/tools/checkpoint_conversion/convert_mix_transformer.py b/tools/checkpoint_conversion/convert_mix_transformer.py index 3cd2bf9236..6419cc405e 100644 --- a/tools/checkpoint_conversion/convert_mix_transformer.py +++ b/tools/checkpoint_conversion/convert_mix_transformer.py @@ -186,7 +186,7 @@ def main(_): key=key, ) - directory = f"MiT_{model_type}" + directory = f"MiT_{FLAGS.preset}" print(f"\n-> Saving converted KerasHub model in {directory}") keras_mit.save_to_preset(directory) From 65f1822c1cb46c1d26b6df4ba8b35f16bb218c48 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 18:37:17 +0900 Subject: [PATCH 41/66] api gen and add preprocessor to mits --- keras_hub/api/layers/__init__.py | 3 +++ keras_hub/api/models/__init__.py | 3 +++ .../mix_transformer/mix_transformer_classifier.py | 14 +++++++++----- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py index a287bcca88..8c7b3fcfe1 100644 --- a/keras_hub/api/layers/__init__.py +++ b/keras_hub/api/layers/__init__.py @@ -40,6 +40,9 @@ from keras_hub.src.models.densenet.densenet_image_converter import ( DenseNetImageConverter, ) +from keras_hub.src.models.mix_transformer.mix_transformer_image_converter import ( + MiTImageConverter, +) from keras_hub.src.models.pali_gemma.pali_gemma_image_converter import ( PaliGemmaImageConverter, ) diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 5e7cbb1c9d..e375454fd6 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -197,6 +197,9 @@ from keras_hub.src.models.mix_transformer.mix_transformer_classifier import ( MiTImageClassifier, ) +from keras_hub.src.models.mix_transformer.mix_transformer_classifier_preprocessor import ( + MiTImageClassifierPreprocessor, +) from keras_hub.src.models.mobilenet.mobilenet_backbone import MobileNetBackbone from keras_hub.src.models.mobilenet.mobilenet_image_classifier import ( MobileNetImageClassifier, diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py b/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py index 5fea71f417..ff83a0588f 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py @@ -5,6 +5,9 @@ from keras_hub.src.models.mix_transformer.mix_transformer_backbone import ( MiTBackbone, ) +from keras_hub.src.models.mix_transformer.mix_transformer_classifier_preprocessor import ( + MiTImageClassifierPreprocessor, +) @keras_hub_export("keras_hub.models.MiTImageClassifier") @@ -30,7 +33,7 @@ class MiTImageClassifier(ImageClassifier): # Load preset and train images = np.ones((2, 224, 224, 3), dtype="float32") classifier = keras_hub.models.MiTImageClassifier.from_preset( - "mit_b0_imagenet") + "mit_b0_ade20k_512") classifier.predict(images) ``` @@ -40,14 +43,14 @@ class MiTImageClassifier(ImageClassifier): images = np.ones((2, 224, 224, 3), dtype="float32") labels = [0, 3] classifier = keras_hub.models.MixTransformerImageClassifier.from_preset( - "mit_b0_imagenet") + "mit_b0_ade20k_512") classifier.fit(x=images, y=labels, batch_size=2) ``` Call `fit()` with custom loss, optimizer and backbone. ```python classifier = keras_hub.models.MiTImageClassifier.from_preset( - "mit_b0_imagenet") + "mit_b0_ade20k_512") classifier.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=keras.optimizers.Adam(5e-5), @@ -75,18 +78,19 @@ class MiTImageClassifier(ImageClassifier): """ backbone_cls = MiTBackbone + preprocessor_cls = MiTImageClassifierPreprocessor def __init__( self, backbone, num_classes, activation="softmax", - preprocessor=None, # adding this dummy arg for saved model test - # TODO: once preprocessor flow is figured out, this needs to be updated + preprocessor=None, **kwargs, ): # === Layers === self.backbone = backbone + self.preprocessor = preprocessor self.output_dense = keras.layers.Dense( num_classes, activation=activation, From 000d7d06e4f10965b21c1b09648e4da6d4777535 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 18:51:32 +0900 Subject: [PATCH 42/66] conform to new image classifier style --- .../mix_transformer_classifier.py | 110 ------------------ 1 file changed, 110 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py b/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py index ff83a0588f..05c80b53f8 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py @@ -1,5 +1,3 @@ -import keras - from keras_hub.src.api_export import keras_hub_export from keras_hub.src.models.image_classifier import ImageClassifier from keras_hub.src.models.mix_transformer.mix_transformer_backbone import ( @@ -9,115 +7,7 @@ MiTImageClassifierPreprocessor, ) - @keras_hub_export("keras_hub.models.MiTImageClassifier") class MiTImageClassifier(ImageClassifier): - """MiTImageClassifier image classifier model. - - Args: - backbone: A `keras_hub.models.MiTBackbone` instance. - num_classes: int. The number of classes to predict. - activation: `None`, str or callable. The activation function to use on - the `Dense` layer. Set `activation=None` to return the output - logits. Defaults to `"softmax"`. - - To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` - where `x` is a tensor and `y` is a integer from `[0, num_classes)`. - All `ImageClassifier` tasks include a `from_preset()` constructor which can - be used to load a pre-trained config and weights. - - Examples: - - Call `predict()` to run inference. - ```python - # Load preset and train - images = np.ones((2, 224, 224, 3), dtype="float32") - classifier = keras_hub.models.MiTImageClassifier.from_preset( - "mit_b0_ade20k_512") - classifier.predict(images) - ``` - - Call `fit()` on a single batch. - ```python - # Load preset and train - images = np.ones((2, 224, 224, 3), dtype="float32") - labels = [0, 3] - classifier = keras_hub.models.MixTransformerImageClassifier.from_preset( - "mit_b0_ade20k_512") - classifier.fit(x=images, y=labels, batch_size=2) - ``` - - Call `fit()` with custom loss, optimizer and backbone. - ```python - classifier = keras_hub.models.MiTImageClassifier.from_preset( - "mit_b0_ade20k_512") - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - optimizer=keras.optimizers.Adam(5e-5), - ) - classifier.backbone.trainable = False - classifier.fit(x=images, y=labels, batch_size=2) - ``` - - Custom backbone. - ```python - images = np.ones((2, 224, 224, 3), dtype="float32") - labels = [0, 3] - backbone = keras_hub.models.MiTBackbone( - stackwise_num_filters=[128, 256, 512, 1024], - stackwise_depth=[3, 9, 9, 3], - block_type="basic_block", - image_shape = (224, 224, 3), - ) - classifier = keras_hub.models.MiTImageClassifier( - backbone=backbone, - num_classes=4, - ) - classifier.fit(x=images, y=labels, batch_size=2) - ``` - """ - backbone_cls = MiTBackbone preprocessor_cls = MiTImageClassifierPreprocessor - - def __init__( - self, - backbone, - num_classes, - activation="softmax", - preprocessor=None, - **kwargs, - ): - # === Layers === - self.backbone = backbone - self.preprocessor = preprocessor - self.output_dense = keras.layers.Dense( - num_classes, - activation=activation, - name="predictions", - ) - - # === Functional Model === - inputs = self.backbone.input - x = self.backbone(inputs) - outputs = self.output_dense(x) - super().__init__( - inputs=inputs, - outputs=outputs, - **kwargs, - ) - - # === Config === - self.num_classes = num_classes - self.activation = activation - - def get_config(self): - # Backbone serialized in `super` - config = super().get_config() - config.update( - { - "num_classes": self.num_classes, - "activation": self.activation, - } - ) - return config From fa34f9e24f762eea0593efb9d220b5ddd12d766c Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 18:54:30 +0900 Subject: [PATCH 43/66] format --- .../src/models/mix_transformer/mix_transformer_classifier.py | 1 + 1 file changed, 1 insertion(+) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py b/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py index 05c80b53f8..beab6646ba 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_classifier.py @@ -7,6 +7,7 @@ MiTImageClassifierPreprocessor, ) + @keras_hub_export("keras_hub.models.MiTImageClassifier") class MiTImageClassifier(ImageClassifier): backbone_cls = MiTBackbone From e3c6dc6cdaf372ff602730ecb3de54afb1f2beb3 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 18:56:59 +0900 Subject: [PATCH 44/66] resizing image converter -> ImageConverter --- .../mix_transformer/mix_transformer_image_converter.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_image_converter.py b/keras_hub/src/models/mix_transformer/mix_transformer_image_converter.py index d5c8f1f70d..e59ea26b66 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_image_converter.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_image_converter.py @@ -1,10 +1,8 @@ from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.layers.preprocessing.resizing_image_converter import ( - ResizingImageConverter, -) +from keras_hub.src.layers.preprocessing.image_converter import ImageConverter from keras_hub.src.models.mix_transformer import MiTBackbone @keras_hub_export("keras_hub.layers.MiTImageConverter") -class MiTImageConverter(ResizingImageConverter): +class MiTImageConverter(ImageConverter): backbone_cls = MiTBackbone From 38dadef63d50e2dff8d44e766cd84426e2830955 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 19:15:03 +0900 Subject: [PATCH 45/66] merge mit branch into segformer branch --- keras_hub/src/models/segformer/segformer_backbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index dd45da8648..de47d5c697 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -88,7 +88,7 @@ class SegFormerBackbone(Backbone): ```python import keras_hub - backbone = keras_hub.models.MiTBackbone.from_preset("path_to_be_added") + backbone = keras_hub.models.MiTBackbone.from_preset("mit_b0_ade20k_512") segformer_backbone = keras_hub.models.SegFormerBackbone(image_encoder=backbone, projection_filters=256) ``` From 983642ceb643c598b7b769578acc755aa2a61823 Mon Sep 17 00:00:00 2001 From: David Landup Date: Tue, 8 Oct 2024 20:45:27 +0900 Subject: [PATCH 46/66] add preprocessor and converter --- keras_hub/api/layers/__init__.py | 3 +++ keras_hub/api/models/__init__.py | 3 +++ .../segformer/segformer_image_converter.py | 8 +++++++ .../segformer_image_segmenter_preprocessor.py | 24 +++++++++++++++++++ 4 files changed, 38 insertions(+) create mode 100644 keras_hub/src/models/segformer/segformer_image_converter.py create mode 100644 keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py index c216befdb9..66d26a6cf7 100644 --- a/keras_hub/api/layers/__init__.py +++ b/keras_hub/api/layers/__init__.py @@ -52,6 +52,9 @@ from keras_hub.src.models.sam.sam_image_converter import SAMImageConverter from keras_hub.src.models.sam.sam_mask_decoder import SAMMaskDecoder from keras_hub.src.models.sam.sam_prompt_encoder import SAMPromptEncoder +from keras_hub.src.models.segformer.segformer_image_converter import ( + SegFormerImageConverter, +) from keras_hub.src.models.whisper.whisper_audio_converter import ( WhisperAudioConverter, ) diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index b1e347201f..88e9b68e05 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -272,6 +272,9 @@ from keras_hub.src.models.segformer.segformer_image_segmenter import ( SegFormerImageSegmenter, ) +from keras_hub.src.models.segformer.segformer_image_segmenter_preprocessor import ( + SegFormerImageSegmenterPreprocessor, +) from keras_hub.src.models.seq_2_seq_lm import Seq2SeqLM from keras_hub.src.models.seq_2_seq_lm_preprocessor import Seq2SeqLMPreprocessor from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import ( diff --git a/keras_hub/src/models/segformer/segformer_image_converter.py b/keras_hub/src/models/segformer/segformer_image_converter.py new file mode 100644 index 0000000000..44febd6833 --- /dev/null +++ b/keras_hub/src/models/segformer/segformer_image_converter.py @@ -0,0 +1,8 @@ +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.layers.preprocessing.image_converter import ImageConverter +from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone + + +@keras_hub_export("keras_hub.layers.SegFormerImageConverter") +class SegFormerImageConverter(ImageConverter): + backbone_cls = SegFormerBackbone diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py b/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py new file mode 100644 index 0000000000..6b52c78b55 --- /dev/null +++ b/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py @@ -0,0 +1,24 @@ +import keras + +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.models.image_segmenter_preprocessor import ( + ImageSegmenterPreprocessor, +) +from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone +from keras_hub.src.models.segformer.segformer_image_converter import ( + SegFormerImageConverter, +) +from keras_hub.src.utils.tensor_utils import preprocessing_function + + +@keras_hub_export("keras_hub.models.SegFormerImageSegmenterPreprocessor") +class SegFormerImageSegmenterPreprocessor(ImageSegmenterPreprocessor): + backbone_cls = SegFormerBackbone + image_converter_cls = SegFormerImageConverter + + @preprocessing_function + def call(self, x, y=None, sample_weight=None): + images = x["images"] + if self.image_converter: + x["images"] = self.image_converter(images) + return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) From 2a8ffcb6e8b13dcbd281a71c09c416d14566e2c1 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 9 Oct 2024 16:41:03 +0900 Subject: [PATCH 47/66] address comments --- .../src/models/mix_transformer/mix_transformer_presets.py | 2 -- keras_hub/src/utils/preset_utils.py | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py index 3e3db15117..840fa9e1d3 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py @@ -1,5 +1,3 @@ -# Copyright 2024 The KerasHub Authors -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/keras_hub/src/utils/preset_utils.py b/keras_hub/src/utils/preset_utils.py index 30d48a2834..65af19df7f 100644 --- a/keras_hub/src/utils/preset_utils.py +++ b/keras_hub/src/utils/preset_utils.py @@ -155,9 +155,7 @@ def get_file(preset, path): f"version). Received: preset={preset}" ) try: - return kagglehub.model_download( - kaggle_handle, path, force_download=True - ) + return kagglehub.model_download(kaggle_handle, path) except KaggleApiHTTPError as e: message = str(e) if message.find("403 Client Error"): From 5b5eb93b955369eeefb0a344a655c89061c337bc Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 9 Oct 2024 16:44:32 +0900 Subject: [PATCH 48/66] clarify backbone usage --- keras_hub/src/models/segformer/segformer_backbone.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index de47d5c697..273368c325 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -48,13 +48,9 @@ class SegFormerBackbone(Backbone): Args: image_encoder: `keras.Model`. The backbone network for the model that is used as a feature extractor for the SegFormer encoder. - It is *intended* to be used only with the MiT backbone model + Should be used with the MiT backbone model (`keras_hub.models.MiTBackbone`) which was created specifically for SegFormers. - Alternatively, can be a `keras_hub.models.Backbone` a model subclassing - `keras_hub.models.FeaturePyramidBackbone`, or a `keras.Model` - that has a `pyramid_outputs` property which is - a dictionary with keys "P2", "P3", "P4", and "P5" and layer names as values. num_classes: int, the number of classes for the detection model, including the background class. projection_filters: int, number of filters in the From fcdadb34dc29e0d59837278e373bfc1178778adf Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 9 Oct 2024 21:25:19 +0900 Subject: [PATCH 49/66] add conversion script --- .../models/segformer/segformer_backbone.py | 2 +- .../segformer/segformer_image_segmenter.py | 2 +- .../convert_segformer_checkpoints.py | 85 +++++++++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 tools/checkpoint_conversion/convert_segformer_checkpoints.py diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index 273368c325..f54c8aee42 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -136,7 +136,7 @@ def __init__( keras.layers.Conv2D( filters=projection_filters, kernel_size=1, use_bias=False ), - keras.layers.BatchNormalization(), + keras.layers.BatchNormalization(epsilon=1e-5, momentum=0.1), keras.layers.Activation("relu"), ] ) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index c260b3b9c1..913dad8098 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -126,7 +126,7 @@ def __init__( self.backbone = backbone self.dropout = keras.layers.Dropout(0.1) self.output_segmentation = keras.layers.Conv2D( - filters=num_classes, kernel_size=1, activation="softmax" + filters=num_classes, kernel_size=1 ) self.resizing = keras.layers.Resizing( height=inputs.shape[1], diff --git a/tools/checkpoint_conversion/convert_segformer_checkpoints.py b/tools/checkpoint_conversion/convert_segformer_checkpoints.py new file mode 100644 index 0000000000..31f35a7b48 --- /dev/null +++ b/tools/checkpoint_conversion/convert_segformer_checkpoints.py @@ -0,0 +1,85 @@ +# Usage example +# python tools/checkpoint_conversion/convert_mix_transformer.py --preset "B0_ade_512" + +from absl import app +from absl import flags +from transformers import SegformerForSemanticSegmentation + +import keras_hub + +FLAGS = flags.FLAGS + + +DOWNLOAD_URLS = { + "b0_ade20k_512": "nvidia/segformer-b0-finetuned-ade-512-512", + "b1_ade20k_512": "nvidia/segformer-b1-finetuned-ade-512-512", + "b2_ade20k_512": "nvidia/segformer-b2-finetuned-ade-512-512", + "b3_ade20k_512": "nvidia/segformer-b3-finetuned-ade-512-512", + "b4_ade20k_512": "nvidia/segformer-b4-finetuned-ade-512-512", + "b5_ade20k_640": "nvidia/segformer-b5-finetuned-ade-640-640", + "b0_cityscapes_1024": "nvidia/segformer-b0-finetuned-cityscapes-1024-1024", + "b1_cityscapes_1024": "nvidia/segformer-b1-finetuned-cityscapes-1024-1024", + "b2_cityscapes_1024": "nvidia/segformer-b2-finetuned-cityscapes-1024-1024", + "b3_cityscapes_1024": "nvidia/segformer-b3-finetuned-cityscapes-1024-1024", + "b4_cityscapes_1024": "nvidia/segformer-b4-finetuned-cityscapes-1024-1024", + "b5_cityscapes_1024": "nvidia/segformer-b5-finetuned-cityscapes-1024-1024", +} + +flags.DEFINE_string( + "preset", None, f'Must be one of {",".join(DOWNLOAD_URLS.keys())}' +) + +def set_conv_weights(conv_layer, state_dict): + conv_weights = state_dict["weight"].numpy().transpose(2, 3, 1, 0) + bias = None + if "bias" in state_dict.keys(): + bias = state_dict["bias"].numpy() + conv_layer.set_weights([conv_weights, bias]) + else: + conv_layer.set_weights([conv_weights]) + +def set_dense_weights(dense_layer, state_dict): + weight = state_dict["weight"].numpy().T + bias = state_dict["bias"].numpy() + dense_layer.set_weights([weight, bias]) + +def set_batchnorm_weights(bn_layer, state_dict): + gamma = state_dict["weight"].numpy() + beta = state_dict["bias"].numpy() + running_mean = state_dict["running_mean"].numpy() + running_var = state_dict["running_var"].numpy() + + bn_layer.set_weights([gamma, beta, running_mean, running_var]) + + +def main(_): + print("\n-> Loading HuggingFace model") + model = SegformerForSemanticSegmentation.from_pretrained( + DOWNLOAD_URLS[FLAGS.preset] + ) + original_segformer = model.segformer + + print("\n-> Instantiating KerasHub Model") + encoder = keras_hub.models.MiTBackbone.from_preset(FLAGS.preset) + + set_dense_weights(encoder.layers[5], original_segformer.decode_head.linear_c[0].proj.state_dict()) + set_dense_weights(encoder.layers[4], original_segformer.decode_head.linear_c[1].proj.state_dict()) + set_dense_weights(encoder.layers[3], original_segformer.decode_head.linear_c[2].proj.state_dict()) + set_dense_weights(encoder.layers[2], original_segformer.decode_head.linear_c[3].proj.state_dict()) + set_conv_weights(encoder.layers[-1].layers[0], original_segformer.decode_head.linear_fuse.state_dict()) + set_batchnorm_weights(encoder.layers[-1].layers[1], original_segformer.decode_head.batch_norm.state_dict()) + + keras_mit = keras_hub.models.SegFormerBackbone(image_encoder=encoder) + + set_conv_weights(keras_mit.layers[-2], original_segformer.decode_head.classifier.state_dict()) + + print("\n-> Converting weights...") + + directory = f"MiT_{FLAGS.preset}" + print(f"\n-> Saving converted KerasHub model in {directory}") + keras_mit.save_to_preset(directory) + + +if __name__ == "__main__": + flags.mark_flag_as_required("preset") + app.run(main) From 68cef655b8224d164fa879706384736f1320c2e8 Mon Sep 17 00:00:00 2001 From: David Landup Date: Mon, 14 Oct 2024 19:58:49 +0900 Subject: [PATCH 50/66] numerical equivalence changes --- .../src/models/mix_transformer/mix_transformer_backbone.py | 2 +- .../src/models/mix_transformer/mix_transformer_layers.py | 3 +-- keras_hub/src/models/segformer/segformer_backbone.py | 5 +++-- keras_hub/src/models/segformer/segformer_image_segmenter.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py b/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py index bb60fd850a..91b5dcefaf 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_backbone.py @@ -115,7 +115,7 @@ def __init__( ] transformer_blocks.append(transformer_block) cur += depths[i] - layer_norms.append(keras.layers.LayerNormalization()) + layer_norms.append(keras.layers.LayerNormalization(epsilon=1e-5)) # === Functional Model === image_input = keras.layers.Input(shape=image_shape) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py index fc5180ca90..b6629f6f3f 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py @@ -189,9 +189,8 @@ def __init__(self, project_dim, num_heads, sr_ratio): filters=project_dim, kernel_size=sr_ratio, strides=sr_ratio, - padding="same", ) - self.norm = keras.layers.LayerNormalization() + self.norm = keras.layers.LayerNormalization(epsilon=1e-5) def call(self, x): input_shape = ops.shape(x) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index f54c8aee42..e9c79f3ce7 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -121,6 +121,7 @@ def __init__( self.mlp_blocks = [] for feature_dim, feature in zip(image_encoder.hidden_dims, features): + print(feature_dim, feature) self.mlp_blocks.append( keras.layers.Dense( projection_filters, name=f"linear_{feature_dim}" @@ -130,13 +131,13 @@ def __init__( self.resizing = keras.layers.Resizing( height, width, interpolation="bilinear" ) - self.concat = keras.layers.Concatenate(axis=3) + self.concat = keras.layers.Concatenate(axis=-1) self.linear_fuse = keras.Sequential( [ keras.layers.Conv2D( filters=projection_filters, kernel_size=1, use_bias=False ), - keras.layers.BatchNormalization(epsilon=1e-5, momentum=0.1), + keras.layers.BatchNormalization(epsilon=1e-5, momentum=0.9), keras.layers.Activation("relu"), ] ) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 913dad8098..a07d3dc9d5 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -126,7 +126,7 @@ def __init__( self.backbone = backbone self.dropout = keras.layers.Dropout(0.1) self.output_segmentation = keras.layers.Conv2D( - filters=num_classes, kernel_size=1 + filters=num_classes, kernel_size=1, strides=1 ) self.resizing = keras.layers.Resizing( height=inputs.shape[1], From 205ae4ac2462db13b8ba0e74d628525575f79b24 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 03:57:00 +0900 Subject: [PATCH 51/66] fix numerical inaccuracies --- .../src/models/mix_transformer/mix_transformer_layers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py index b6629f6f3f..61e6cca148 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py @@ -183,6 +183,7 @@ def __init__(self, project_dim, num_heads, sr_ratio): self.k = keras.layers.Dense(project_dim) self.v = keras.layers.Dense(project_dim) self.proj = keras.layers.Dense(project_dim) + self.dropout = keras.layers.Dropout(0.1) if sr_ratio > 1: self.sr = keras.layers.Conv2D( @@ -211,7 +212,7 @@ def call(self, x): if self.sr_ratio > 1: x = ops.reshape( - ops.transpose(x, [0, 2, 1]), + x, (B, H, W, C), ) x = self.sr(x) @@ -240,8 +241,9 @@ def call(self, x): attn = (q @ ops.transpose(k, [0, 1, 3, 2])) * self.scale attn = ops.nn.softmax(attn, axis=-1) + attn = self.dropout(attn) - attn = attn @ v + attn = keras.ops.matmul(attn, v) attn = ops.reshape( ops.transpose(attn, [0, 2, 1, 3]), [input_shape[0], input_shape[1], input_shape[2]], From 8b5fa44a30df1ffaa8f6c35b0b5da453da7653d5 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 03:59:08 +0900 Subject: [PATCH 52/66] update conversion script --- .../convert_segformer_checkpoints.py | 44 ++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/tools/checkpoint_conversion/convert_segformer_checkpoints.py b/tools/checkpoint_conversion/convert_segformer_checkpoints.py index 31f35a7b48..1f6e90ef54 100644 --- a/tools/checkpoint_conversion/convert_segformer_checkpoints.py +++ b/tools/checkpoint_conversion/convert_segformer_checkpoints.py @@ -29,6 +29,7 @@ "preset", None, f'Must be one of {",".join(DOWNLOAD_URLS.keys())}' ) + def set_conv_weights(conv_layer, state_dict): conv_weights = state_dict["weight"].numpy().transpose(2, 3, 1, 0) bias = None @@ -37,18 +38,20 @@ def set_conv_weights(conv_layer, state_dict): conv_layer.set_weights([conv_weights, bias]) else: conv_layer.set_weights([conv_weights]) - + + def set_dense_weights(dense_layer, state_dict): weight = state_dict["weight"].numpy().T bias = state_dict["bias"].numpy() dense_layer.set_weights([weight, bias]) - + + def set_batchnorm_weights(bn_layer, state_dict): gamma = state_dict["weight"].numpy() beta = state_dict["bias"].numpy() running_mean = state_dict["running_mean"].numpy() running_var = state_dict["running_var"].numpy() - + bn_layer.set_weights([gamma, beta, running_mean, running_var]) @@ -62,16 +65,37 @@ def main(_): print("\n-> Instantiating KerasHub Model") encoder = keras_hub.models.MiTBackbone.from_preset(FLAGS.preset) - set_dense_weights(encoder.layers[5], original_segformer.decode_head.linear_c[0].proj.state_dict()) - set_dense_weights(encoder.layers[4], original_segformer.decode_head.linear_c[1].proj.state_dict()) - set_dense_weights(encoder.layers[3], original_segformer.decode_head.linear_c[2].proj.state_dict()) - set_dense_weights(encoder.layers[2], original_segformer.decode_head.linear_c[3].proj.state_dict()) - set_conv_weights(encoder.layers[-1].layers[0], original_segformer.decode_head.linear_fuse.state_dict()) - set_batchnorm_weights(encoder.layers[-1].layers[1], original_segformer.decode_head.batch_norm.state_dict()) + set_dense_weights( + encoder.layers[5], + original_segformer.decode_head.linear_c[0].proj.state_dict(), + ) + set_dense_weights( + encoder.layers[4], + original_segformer.decode_head.linear_c[1].proj.state_dict(), + ) + set_dense_weights( + encoder.layers[3], + original_segformer.decode_head.linear_c[2].proj.state_dict(), + ) + set_dense_weights( + encoder.layers[2], + original_segformer.decode_head.linear_c[3].proj.state_dict(), + ) + set_conv_weights( + encoder.layers[-1].layers[0], + original_segformer.decode_head.linear_fuse.state_dict(), + ) + set_batchnorm_weights( + encoder.layers[-1].layers[1], + original_segformer.decode_head.batch_norm.state_dict(), + ) keras_mit = keras_hub.models.SegFormerBackbone(image_encoder=encoder) - set_conv_weights(keras_mit.layers[-2], original_segformer.decode_head.classifier.state_dict()) + set_conv_weights( + keras_mit.layers[-2], + original_segformer.decode_head.classifier.state_dict(), + ) print("\n-> Converting weights...") From de17c0364e17f0ee39b9ddb854c39c7d5aaa19cf Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 04:10:24 +0900 Subject: [PATCH 53/66] update conversion script --- .../models/segformer/segformer_backbone.py | 1 - .../convert_segformer_checkpoints.py | 33 +++++++++++-------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index e9c79f3ce7..5266b61515 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -121,7 +121,6 @@ def __init__( self.mlp_blocks = [] for feature_dim, feature in zip(image_encoder.hidden_dims, features): - print(feature_dim, feature) self.mlp_blocks.append( keras.layers.Dense( projection_filters, name=f"linear_{feature_dim}" diff --git a/tools/checkpoint_conversion/convert_segformer_checkpoints.py b/tools/checkpoint_conversion/convert_segformer_checkpoints.py index 1f6e90ef54..9558110605 100644 --- a/tools/checkpoint_conversion/convert_segformer_checkpoints.py +++ b/tools/checkpoint_conversion/convert_segformer_checkpoints.py @@ -1,6 +1,7 @@ # Usage example # python tools/checkpoint_conversion/convert_mix_transformer.py --preset "B0_ade_512" +import numpy as np from absl import app from absl import flags from transformers import SegformerForSemanticSegmentation @@ -57,51 +58,55 @@ def set_batchnorm_weights(bn_layer, state_dict): def main(_): print("\n-> Loading HuggingFace model") - model = SegformerForSemanticSegmentation.from_pretrained( + original_segformer = SegformerForSemanticSegmentation.from_pretrained( DOWNLOAD_URLS[FLAGS.preset] ) - original_segformer = model.segformer print("\n-> Instantiating KerasHub Model") - encoder = keras_hub.models.MiTBackbone.from_preset(FLAGS.preset) + encoder = keras_hub.models.MiTBackbone.from_preset("mit_" + FLAGS.preset) + segformer_backbone = keras_hub.models.SegFormerBackbone( + image_encoder=encoder, projection_filters=256 + ) + segformer_segmenter = keras_hub.models.SegFormerImageSegmenter( + backbone=segformer_backbone, num_classes=150 + ) + segformer_backbone(np.random.rand(1, 224, 224, 3)) set_dense_weights( - encoder.layers[5], + segformer_backbone.layers[5], original_segformer.decode_head.linear_c[0].proj.state_dict(), ) set_dense_weights( - encoder.layers[4], + segformer_backbone.layers[4], original_segformer.decode_head.linear_c[1].proj.state_dict(), ) set_dense_weights( - encoder.layers[3], + segformer_backbone.layers[3], original_segformer.decode_head.linear_c[2].proj.state_dict(), ) set_dense_weights( - encoder.layers[2], + segformer_backbone.layers[2], original_segformer.decode_head.linear_c[3].proj.state_dict(), ) set_conv_weights( - encoder.layers[-1].layers[0], + segformer_backbone.layers[-1].layers[0], original_segformer.decode_head.linear_fuse.state_dict(), ) set_batchnorm_weights( - encoder.layers[-1].layers[1], + segformer_backbone.layers[-1].layers[1], original_segformer.decode_head.batch_norm.state_dict(), ) - keras_mit = keras_hub.models.SegFormerBackbone(image_encoder=encoder) - set_conv_weights( - keras_mit.layers[-2], + segformer_segmenter.layers[-2], original_segformer.decode_head.classifier.state_dict(), ) print("\n-> Converting weights...") - directory = f"MiT_{FLAGS.preset}" + directory = f"SegFormer_{FLAGS.preset}" print(f"\n-> Saving converted KerasHub model in {directory}") - keras_mit.save_to_preset(directory) + segformer_backbone.save_to_preset(directory) if __name__ == "__main__": From 04ba1eb828213de28f39e7f867822893bb2c1360 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 13:55:31 +0900 Subject: [PATCH 54/66] remove transpose --- .../models/mix_transformer/mix_transformer_layers.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py index 61e6cca148..b949fcb6e2 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_layers.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_layers.py @@ -184,6 +184,7 @@ def __init__(self, project_dim, num_heads, sr_ratio): self.v = keras.layers.Dense(project_dim) self.proj = keras.layers.Dense(project_dim) self.dropout = keras.layers.Dropout(0.1) + self.proj_drop = keras.layers.Dropout(0.1) if sr_ratio > 1: self.sr = keras.layers.Conv2D( @@ -196,7 +197,7 @@ def __init__(self, project_dim, num_heads, sr_ratio): def call(self, x): input_shape = ops.shape(x) H, W = int(math.sqrt(input_shape[1])), int(math.sqrt(input_shape[1])) - B, C = input_shape[0], input_shape[2] + B, N, C = input_shape[0], input_shape[1], input_shape[2] q = self.q(x) q = ops.reshape( @@ -216,8 +217,7 @@ def call(self, x): (B, H, W, C), ) x = self.sr(x) - x = ops.reshape(x, [input_shape[0], input_shape[2], -1]) - x = ops.transpose(x, [0, 2, 1]) + x = ops.reshape(x, [B, -1, C]) x = self.norm(x) k = self.k(x) @@ -243,13 +243,14 @@ def call(self, x): attn = ops.nn.softmax(attn, axis=-1) attn = self.dropout(attn) - attn = keras.ops.matmul(attn, v) + attn = attn @ v attn = ops.reshape( ops.transpose(attn, [0, 2, 1, 3]), - [input_shape[0], input_shape[1], input_shape[2]], + [B, N, C], ) x = self.proj(attn) + x = self.proj_drop(x) return x From 9e04b6e97bd26458d6128c0099b0160c7f4c08c5 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 14:13:13 +0900 Subject: [PATCH 55/66] add preprocessor to segformer class --- .../segformer/segformer_image_segmenter.py | 6 ++++ .../convert_segformer_checkpoints.py | 29 +++++++++++++++++-- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index a07d3dc9d5..9e3ed73766 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -26,6 +26,9 @@ from keras_hub.src.api_export import keras_hub_export from keras_hub.src.models.image_segmenter import ImageSegmenter from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone +from keras_hub.src.models.segformer.segformer_image_segmenter_preprocessor import ( + SegFormerImageSegmenterPreprocessor, +) @keras_hub_export("keras_hub.models.SegFormerImageSegmenter") @@ -104,11 +107,13 @@ class SegFormerImageSegmenter(ImageSegmenter): """ backbone_cls = SegFormerBackbone + preprocessor_cls = SegFormerImageSegmenterPreprocessor def __init__( self, backbone, num_classes, + preprocessor=None, **kwargs, ): if not isinstance(backbone, keras.layers.Layer) or not isinstance( @@ -124,6 +129,7 @@ def __init__( inputs = backbone.input self.backbone = backbone + self.preprocessor = preprocessor self.dropout = keras.layers.Dropout(0.1) self.output_segmentation = keras.layers.Conv2D( filters=num_classes, kernel_size=1, strides=1 diff --git a/tools/checkpoint_conversion/convert_segformer_checkpoints.py b/tools/checkpoint_conversion/convert_segformer_checkpoints.py index 9558110605..eeb9f32083 100644 --- a/tools/checkpoint_conversion/convert_segformer_checkpoints.py +++ b/tools/checkpoint_conversion/convert_segformer_checkpoints.py @@ -7,9 +7,27 @@ from transformers import SegformerForSemanticSegmentation import keras_hub +from keras_hub.src.models.segformer.segformer_image_segmenter_preprocessor import ( + SegFormerImageSegmenterPreprocessor, +) FLAGS = flags.FLAGS +PROJECTION_FILTERS = { + "b0_ade20k_512": 256, + "b1_ade20k_512": 256, + "b2_ade20k_512": 768, + "b3_ade20k_512": 768, + "b4_ade20k_512": 768, + "b5_ade20k_640": 768, + "b0_cityscapes_1024": 256, + "b1_cityscapes_1024": 256, + "b2_cityscapes_1024": 768, + "b3_cityscapes_1024": 768, + "b4_cityscapes_1024": 768, + "b5_cityscapes_1024": 768, +} + DOWNLOAD_URLS = { "b0_ade20k_512": "nvidia/segformer-b0-finetuned-ade-512-512", @@ -65,10 +83,15 @@ def main(_): print("\n-> Instantiating KerasHub Model") encoder = keras_hub.models.MiTBackbone.from_preset("mit_" + FLAGS.preset) segformer_backbone = keras_hub.models.SegFormerBackbone( - image_encoder=encoder, projection_filters=256 + image_encoder=encoder, + projection_filters=PROJECTION_FILTERS[FLAGS.preset], ) + num_classes = 150 if "ade20k" in FLAGS.preset else 30 + preprocessor = SegFormerImageSegmenterPreprocessor() segformer_segmenter = keras_hub.models.SegFormerImageSegmenter( - backbone=segformer_backbone, num_classes=150 + backbone=segformer_backbone, + num_classes=num_classes, + preprocessor=preprocessor, ) segformer_backbone(np.random.rand(1, 224, 224, 3)) @@ -106,7 +129,7 @@ def main(_): directory = f"SegFormer_{FLAGS.preset}" print(f"\n-> Saving converted KerasHub model in {directory}") - segformer_backbone.save_to_preset(directory) + segformer_segmenter.save_to_preset(directory) if __name__ == "__main__": From e9e8ed577fffb8fb5776e34bbd308cefff38885f Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 14:14:59 +0900 Subject: [PATCH 56/66] fix preset path --- keras_hub/src/models/mix_transformer/mix_transformer_presets.py | 2 +- tools/checkpoint_conversion/convert_segformer_checkpoints.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py index 2408f5529b..9c2a5fe362 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_presets.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_presets.py @@ -76,7 +76,7 @@ "official_name": "MiT", "path": "mit", }, - "kaggle_handle": "kaggle://keras/mit/keras/mit_b5_ade20k_512/1", + "kaggle_handle": "kaggle://keras/mit/keras/mit_b5_ade20k_640/1", }, "mit_b0_cityscapes_1024": { "metadata": { diff --git a/tools/checkpoint_conversion/convert_segformer_checkpoints.py b/tools/checkpoint_conversion/convert_segformer_checkpoints.py index eeb9f32083..61d060b80a 100644 --- a/tools/checkpoint_conversion/convert_segformer_checkpoints.py +++ b/tools/checkpoint_conversion/convert_segformer_checkpoints.py @@ -86,7 +86,7 @@ def main(_): image_encoder=encoder, projection_filters=PROJECTION_FILTERS[FLAGS.preset], ) - num_classes = 150 if "ade20k" in FLAGS.preset else 30 + num_classes = 150 if "ade20k" in FLAGS.preset else 19 preprocessor = SegFormerImageSegmenterPreprocessor() segformer_segmenter = keras_hub.models.SegFormerImageSegmenter( backbone=segformer_backbone, From a7a21f68ef83047b64b506c6d660c25eaefd89b7 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 14:34:32 +0900 Subject: [PATCH 57/66] update test shape --- .../mix_transformer/mix_transformer_backbone_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_backbone_test.py b/keras_hub/src/models/mix_transformer/mix_transformer_backbone_test.py index b3840f5c07..51fd0e20a9 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_backbone_test.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_backbone_test.py @@ -11,7 +11,7 @@ class MiTBackboneTest(TestCase): def setUp(self): self.init_kwargs = { "depths": [2, 2], - "image_shape": (16, 16, 3), + "image_shape": (32, 32, 3), "hidden_dims": [4, 8], "num_layers": 2, "blockwise_num_heads": [1, 2], @@ -20,7 +20,7 @@ def setUp(self): "patch_sizes": [7, 3], "strides": [4, 2], } - self.input_size = 16 + self.input_size = 32 self.input_data = np.ones( (2, self.input_size, self.input_size, 3), dtype="float32" ) @@ -30,9 +30,9 @@ def test_backbone_basics(self): cls=MiTBackbone, init_kwargs=self.init_kwargs, input_data=self.input_data, - expected_output_shape=(2, 2, 2, 8), + expected_output_shape=(2, 4, 4, 8), expected_pyramid_output_keys=["P1", "P2"], - expected_pyramid_image_sizes=[(4, 4), (2, 2)], + expected_pyramid_image_sizes=[(8, 8), (4, 4)], run_quantization_check=False, run_mixed_precision_check=False, run_data_format_check=False, From 28e1297a806f3955ee41907d8b15afed239a4135 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 14:45:28 +0900 Subject: [PATCH 58/66] update presets --- .../src/models/segformer/segformer_presets.py | 119 +++++++++++++----- 1 file changed, 89 insertions(+), 30 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_presets.py b/keras_hub/src/models/segformer/segformer_presets.py index fcbfb4f9c3..b734a964ac 100644 --- a/keras_hub/src/models/segformer/segformer_presets.py +++ b/keras_hub/src/models/segformer/segformer_presets.py @@ -22,78 +22,137 @@ # limitations under the License. """SegFormer model preset configurations.""" -presets_no_weights = { - "segformer_b0": { +presets = { + "segformer_b0_ade20k_512": { "metadata": { - "description": ("SegFormer model with MiTB0 backbone."), + "description": ( + "SegFormer model with MiTB0 backbone fine-tuned on ADE20k in 512x512 resolution." + ), "params": 3719027, "official_name": "SegFormerB0", "path": "segformer_b0", }, - "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b0/2", + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b0_ade20k_512", }, - "segformer_b1": { + "segformer_b1_ade20k_512": { "metadata": { - "description": ("SegFormer model with MiTB1 backbone."), + "description": ( + "SegFormer model with MiTB1 backbone fine-tuned on ADE20k in 512x512 resolution." + ), "params": 13682643, "official_name": "SegFormerB1", "path": "segformer_b1", }, - "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b1/2", + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b1_ade20k_512", }, - "segformer_b2": { + "segformer_b2_ade20k_512": { "metadata": { - "description": ("SegFormer model with MiTB2 backbone."), + "description": ( + "SegFormer model with MiTB2 backbone fine-tuned on ADE20k in 512x512 resolution." + ), "params": 24727507, "official_name": "SegFormerB2", "path": "segformer_b2", }, - "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b2/2", + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b2_ade20k_512", }, - "segformer_b3": { + "segformer_b3_ade20k_512": { "metadata": { - "description": ("SegFormer model with MiTB3 backbone."), + "description": ( + "SegFormer model with MiTB3 backbone fine-tuned on ADE20k in 512x512 resolution." + ), "params": 44603347, "official_name": "SegFormerB3", "path": "segformer_b3", }, - "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b3/2", + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b3_ade20k_512", }, - "segformer_b4": { + "segformer_b4_ade20k_512": { "metadata": { - "description": ("SegFormer model with MiTB4 backbone."), + "description": ( + "SegFormer model with MiTB4 backbone fine-tuned on ADE20k in 512x512 resolution." + ), "params": 61373907, "official_name": "SegFormerB4", "path": "segformer_b4", }, - "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b4/2", + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b4_ade20k_512", }, - "segformer_b5": { + "segformer_b5_ade20k_640": { "metadata": { - "description": ("SegFormer model with MiTB5 backbone."), + "description": ( + "SegFormer model with MiTB5 backbone fine-tuned on ADE20k in 640x640 resolution." + ), "params": 81974227, "official_name": "SegFormerB5", "path": "segformer_b5", }, - "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b5/2", + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b5_ade20k_640", }, -} - -presets_with_weights = { - "segformer_b0_imagenet": { + "segformer_b0_cityscapes_1024": { "metadata": { "description": ( - "SegFormer model with a pretrained MiTB0 backbone." + "SegFormer model with MiTB0 backbone fine-tuned on Cityscapes in 1024x1024 resolution." ), "params": 3719027, "official_name": "SegFormerB0", "path": "segformer_b0", }, - "kaggle_handle": "kaggle://keras/segformer/keras/segformer_b0_imagenet/2", # noqa: E501 + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b0_cityscapes_1024", + }, + "segformer_b1_cityscapes_1024": { + "metadata": { + "description": ( + "SegFormer model with MiTB1 backbone fine-tuned on Cityscapes in 1024x1024 resolution." + ), + "params": 13682643, + "official_name": "SegFormerB1", + "path": "segformer_b1", + }, + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b1_ade20k_512", + }, + "segformer_b2_cityscapes_1024": { + "metadata": { + "description": ( + "SegFormer model with MiTB2 backbone fine-tuned on Cityscapes in 1024x1024 resolution." + ), + "params": 24727507, + "official_name": "SegFormerB2", + "path": "segformer_b2", + }, + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b2_cityscapes_1024", + }, + "segformer_b3_cityscapes_1024": { + "metadata": { + "description": ( + "SegFormer model with MiTB3 backbone fine-tuned on Cityscapes in 1024x1024 resolution." + ), + "params": 44603347, + "official_name": "SegFormerB3", + "path": "segformer_b3", + }, + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b3_cityscapes_1024", + }, + "segformer_b4_cityscapes_1024": { + "metadata": { + "description": ( + "SegFormer model with MiTB4 backbone fine-tuned on Cityscapes in 1024x1024 resolution." + ), + "params": 61373907, + "official_name": "SegFormerB4", + "path": "segformer_b4", + }, + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b4_cityscapes_1024", + }, + "segformer_b5_cityscapes_1024": { + "metadata": { + "description": ( + "SegFormer model with MiTB5 backbone fine-tuned on Cityscapes in 1024x1024 resolution." + ), + "params": 81974227, + "official_name": "SegFormerB5", + "path": "segformer_b5", + }, + "kaggle_handle": "kaggle://kerashub/segformer/keras/segformer_b5_cityscapes_1024", }, -} - -presets = { - **presets_no_weights, - **presets_with_weights, } From fc8fffe28d05252bf8d94e89841e8981c4dd38a8 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 15:20:23 +0900 Subject: [PATCH 59/66] update test shape --- .../mix_transformer/mix_transformer_classifier_test.py | 6 +++--- .../convert_segformer_checkpoints.py | 10 ++++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/keras_hub/src/models/mix_transformer/mix_transformer_classifier_test.py b/keras_hub/src/models/mix_transformer/mix_transformer_classifier_test.py index fb7ff5ce2b..2f935e9516 100644 --- a/keras_hub/src/models/mix_transformer/mix_transformer_classifier_test.py +++ b/keras_hub/src/models/mix_transformer/mix_transformer_classifier_test.py @@ -13,11 +13,11 @@ class MiTImageClassifierTest(TestCase): def setUp(self): # Setup model. - self.images = np.ones((2, 16, 16, 3), dtype="float32") + self.images = np.ones((2, 32, 32, 3), dtype="float32") self.labels = [0, 3] self.backbone = MiTBackbone( depths=[2, 2, 2, 2], - image_shape=(16, 16, 3), + image_shape=(32, 32, 3), hidden_dims=[4, 8], num_layers=2, blockwise_num_heads=[1, 2], @@ -44,7 +44,7 @@ def test_classifier_basics(self): cls=MiTImageClassifier, init_kwargs=self.init_kwargs, train_data=self.train_data, - expected_output_shape=(2, 2), + expected_output_shape=(4, 4), ) @pytest.mark.large diff --git a/tools/checkpoint_conversion/convert_segformer_checkpoints.py b/tools/checkpoint_conversion/convert_segformer_checkpoints.py index 61d060b80a..230cf5227d 100644 --- a/tools/checkpoint_conversion/convert_segformer_checkpoints.py +++ b/tools/checkpoint_conversion/convert_segformer_checkpoints.py @@ -81,19 +81,25 @@ def main(_): ) print("\n-> Instantiating KerasHub Model") - encoder = keras_hub.models.MiTBackbone.from_preset("mit_" + FLAGS.preset) + + resolution = int(FLAGS.preset.split("_")[-1]) + + encoder = keras_hub.models.MiTBackbone.from_preset( + "mit_" + FLAGS.preset, image_shape=(resolution, resolution, 3) + ) segformer_backbone = keras_hub.models.SegFormerBackbone( image_encoder=encoder, projection_filters=PROJECTION_FILTERS[FLAGS.preset], ) num_classes = 150 if "ade20k" in FLAGS.preset else 19 + preprocessor = SegFormerImageSegmenterPreprocessor() segformer_segmenter = keras_hub.models.SegFormerImageSegmenter( backbone=segformer_backbone, num_classes=num_classes, preprocessor=preprocessor, ) - segformer_backbone(np.random.rand(1, 224, 224, 3)) + segformer_backbone(np.random.rand(1, resolution, resolution, 3)) set_dense_weights( segformer_backbone.layers[5], From fa89a09f96c16ce6c1641e655c7cd2479249f2c4 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 15:26:21 +0900 Subject: [PATCH 60/66] expand docstrings --- .../segformer/segformer_image_segmenter.py | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 9e3ed73766..668028a8f4 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -65,7 +65,26 @@ class SegFormerImageSegmenter(ImageSegmenter): Example: - Using the class with a `backbone`: + Using presets: + + ```python + import keras_hub + import numpy as np + + segmenter = keras_hub.models.SegFormerImageSegmenter.from_preset("segformer_b0_ade20k_512") + + images = np.random.rand(1, 512, 512, 3) + segformer(images) + ``` + + Using the SegFormer backbone: + + ```python + encoder = keras_hub.models.MiTBackbone.from_preset("mit_b0_ade20k_512") + backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder, projection_filters=256) + ``` + + Using the SegFormer backbone with a custom encoder: ```python import keras @@ -93,16 +112,14 @@ class SegFormerImageSegmenter(ImageSegmenter): segformer(images) ``` - Using the class with a preset backbone: + Using the segmentor class with a preset backbone: ```python import keras_hub - image_encoder = keras_hub.models.MiTBackbone.from_preset("path_to_be_added") + image_encoder = keras_hub.models.MiTBackbone.from_preset("mit_b0_ade20k_512") backbone = keras_hub.models.SegFormerBackbone(image_encoder=encoder, projection_filters=256) - segformer = keras_hub.models.SegFormerImageSegmenter(backbone=backbone, num_classes=4) - ``` """ From 6e3d3d114447178624cda84731f3cdc1562bd540 Mon Sep 17 00:00:00 2001 From: David Landup Date: Wed, 16 Oct 2024 16:30:15 +0900 Subject: [PATCH 61/66] add rescaling and normalization to preprocessor --- .../segformer_image_segmenter_preprocessor.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py b/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py index 6b52c78b55..cf32ebb193 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py @@ -10,6 +10,9 @@ ) from keras_hub.src.utils.tensor_utils import preprocessing_function +IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406] +IMAGENET_DEFAULT_STD = [0.229, 0.224, 0.225] + @keras_hub_export("keras_hub.models.SegFormerImageSegmenterPreprocessor") class SegFormerImageSegmenterPreprocessor(ImageSegmenterPreprocessor): @@ -18,7 +21,10 @@ class SegFormerImageSegmenterPreprocessor(ImageSegmenterPreprocessor): @preprocessing_function def call(self, x, y=None, sample_weight=None): - images = x["images"] if self.image_converter: - x["images"] = self.image_converter(images) + x = self.image_converter(x) + + x = x / 255 + x = (x - IMAGENET_DEFAULT_MEAN) / IMAGENET_DEFAULT_STD + return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) From 5092d717f9bbcbf1d9eaf0deb3527a447d67367e Mon Sep 17 00:00:00 2001 From: David Landup Date: Thu, 17 Oct 2024 13:16:13 +0900 Subject: [PATCH 62/66] remove backbone presets, remove copyrights, remove backbone cls from segmenter --- .../models/segformer/segformer_backbone.py | 28 ------- .../segformer/segformer_backbone_presets.py | 83 ------------------- .../segformer/segformer_backbone_tests.py | 23 ----- .../segformer/segformer_image_segmenter.py | 23 ----- .../segformer_image_segmenter_tests.py | 23 ----- .../src/models/segformer/segformer_presets.py | 22 ----- 6 files changed, 202 deletions(-) delete mode 100644 keras_hub/src/models/segformer/segformer_backbone_presets.py diff --git a/keras_hub/src/models/segformer/segformer_backbone.py b/keras_hub/src/models/segformer/segformer_backbone.py index 5266b61515..f5563b4c02 100644 --- a/keras_hub/src/models/segformer/segformer_backbone.py +++ b/keras_hub/src/models/segformer/segformer_backbone.py @@ -1,33 +1,7 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import keras from keras_hub.src.api_export import keras_hub_export from keras_hub.src.models.backbone import Backbone -from keras_hub.src.models.mix_transformer.mix_transformer_backbone import ( - MiTBackbone, -) @keras_hub_export("keras_hub.models.SegFormerBackbone") @@ -90,8 +64,6 @@ class SegFormerBackbone(Backbone): """ - backbone_cls = MiTBackbone - def __init__( self, image_encoder, diff --git a/keras_hub/src/models/segformer/segformer_backbone_presets.py b/keras_hub/src/models/segformer/segformer_backbone_presets.py deleted file mode 100644 index 2cd1243de0..0000000000 --- a/keras_hub/src/models/segformer/segformer_backbone_presets.py +++ /dev/null @@ -1,83 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""SegFormerBackbone model preset configurations.""" - -presets_no_weights = { - "segformer_b0_backbone": { - "metadata": { - "description": ("SegFormerBackbone model with MiTB0 encoder."), - "params": 3719027, - "official_name": "SegFormerB0Backbone", - "path": "segformer_b0_backbone", - }, - "kaggle_handle": "kaggle://TBA", - }, - "segformer_b1_backbone": { - "metadata": { - "description": ("SegFormerBackbone model with MiTB1 encoder."), - "params": 13682643, - "official_name": "SegFormerB1Backbone", - "path": "segformer_b1_backbone", - }, - "kaggle_handle": "kaggle://TBA", - }, - "segformer_b2_backbone": { - "metadata": { - "description": ("SegFormerBackbone model with MiTB2 encoder."), - "params": 24727507, - "official_name": "SegFormerB2Backbone", - "path": "segformer_b2_backbone", - }, - "kaggle_handle": "kaggle://TBA", - }, - "segformer_b3_backbone": { - "metadata": { - "description": ("SegFormerBackbone model with MiTB3 encoder."), - "params": 44603347, - "official_name": "SegFormerB3Backbone", - "path": "segformer_b3_backbone", - }, - "kaggle_handle": "kaggle://TBA", - }, - "segformer_b4_backbone": { - "metadata": { - "description": ("SegFormerBackbone model with MiTB4 encoder."), - "params": 61373907, - "official_name": "SegFormerB4Backbone", - "path": "segformer_b4_backbone", - }, - "kaggle_handle": "kaggle://TBA", - }, - "segformer_b5_backbone": { - "metadata": { - "description": ("SegFormerBackbone model with MiTB5 encoder."), - "params": 81974227, - "official_name": "SegFormerB5Backbone", - "path": "segformer_b5_backbone", - }, - "kaggle_handle": "kaggle://TBA", - }, -} - - -presets = {**presets_no_weights} diff --git a/keras_hub/src/models/segformer/segformer_backbone_tests.py b/keras_hub/src/models/segformer/segformer_backbone_tests.py index 75f1a8d1c4..22133763e7 100644 --- a/keras_hub/src/models/segformer/segformer_backbone_tests.py +++ b/keras_hub/src/models/segformer/segformer_backbone_tests.py @@ -1,26 +1,3 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import numpy as np import pytest from keras import ops diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 668028a8f4..6c0bca0a83 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -1,26 +1,3 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import keras from keras_hub.src.api_export import keras_hub_export diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py index bc6dc4265e..4ad2e8bc6f 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py @@ -1,26 +1,3 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - import numpy as np import pytest from keras import ops diff --git a/keras_hub/src/models/segformer/segformer_presets.py b/keras_hub/src/models/segformer/segformer_presets.py index b734a964ac..2c0fff0a50 100644 --- a/keras_hub/src/models/segformer/segformer_presets.py +++ b/keras_hub/src/models/segformer/segformer_presets.py @@ -1,25 +1,3 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. """SegFormer model preset configurations.""" presets = { From ec3e1eccbd30d0af9922217a029d331ad44605a9 Mon Sep 17 00:00:00 2001 From: David Landup Date: Thu, 17 Oct 2024 13:21:40 +0900 Subject: [PATCH 63/66] remove copyright and unused import --- keras_hub/src/models/segformer/__init__.py | 26 ------------------- .../segformer/segformer_image_segmenter.py | 4 +-- 2 files changed, 2 insertions(+), 28 deletions(-) diff --git a/keras_hub/src/models/segformer/__init__.py b/keras_hub/src/models/segformer/__init__.py index 248924cc59..3a95690dba 100644 --- a/keras_hub/src/models/segformer/__init__.py +++ b/keras_hub/src/models/segformer/__init__.py @@ -1,29 +1,4 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. from keras_hub.src.models.segformer.segformer_backbone import SegFormerBackbone -from keras_hub.src.models.segformer.segformer_backbone_presets import ( - presets as backbone_presets, -) from keras_hub.src.models.segformer.segformer_image_segmenter import ( SegFormerImageSegmenter, ) @@ -31,4 +6,3 @@ from keras_hub.src.utils.preset_utils import register_presets register_presets(presets, SegFormerImageSegmenter) -register_presets(backbone_presets, SegFormerBackbone) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter.py b/keras_hub/src/models/segformer/segformer_image_segmenter.py index 6c0bca0a83..1b00c7a754 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter.py @@ -125,7 +125,7 @@ def __init__( self.backbone = backbone self.preprocessor = preprocessor self.dropout = keras.layers.Dropout(0.1) - self.output_segmentation = keras.layers.Conv2D( + self.output_segmentation_head = keras.layers.Conv2D( filters=num_classes, kernel_size=1, strides=1 ) self.resizing = keras.layers.Resizing( @@ -137,7 +137,7 @@ def __init__( # === Functional Model === x = self.backbone(inputs) x = self.dropout(x) - x = self.output_segmentation(x) + x = self.output_segmentation_head(x) output = self.resizing(x) super().__init__( From 54c24a90a25902c16ad68c8e8cc8a4e00df5397b Mon Sep 17 00:00:00 2001 From: David Landup Date: Thu, 17 Oct 2024 13:23:33 +0900 Subject: [PATCH 64/66] apply same transformation to masks as input images --- .../models/segformer/segformer_image_segmenter_preprocessor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py b/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py index cf32ebb193..fd8c5fba35 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter_preprocessor.py @@ -23,6 +23,7 @@ class SegFormerImageSegmenterPreprocessor(ImageSegmenterPreprocessor): def call(self, x, y=None, sample_weight=None): if self.image_converter: x = self.image_converter(x) + y = self.image_converter(y) x = x / 255 x = (x - IMAGENET_DEFAULT_MEAN) / IMAGENET_DEFAULT_STD From 225942de233a4b168648dbf112aa99c5d6e8800d Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 20 Oct 2024 15:44:36 +0900 Subject: [PATCH 65/66] fix import --- keras_hub/api/layers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py index 16aebb3151..78a26075d1 100644 --- a/keras_hub/api/layers/__init__.py +++ b/keras_hub/api/layers/__init__.py @@ -53,7 +53,7 @@ from keras_hub.src.models.segformer.segformer_image_converter import ( SegFormerImageConverter, ) -from keras_hub.src.models.vgg.vgg_image_classifier import VGGImageConverter +from keras_hub.src.models.vgg.vgg_image_converter import VGGImageConverter from keras_hub.src.models.whisper.whisper_audio_converter import ( WhisperAudioConverter, ) From c7a61667da202f152b46cb73dc2c5da92459b5d8 Mon Sep 17 00:00:00 2001 From: David Landup Date: Sun, 20 Oct 2024 16:05:53 +0900 Subject: [PATCH 66/66] fix shape in tests --- keras_hub/src/models/mit/mit_backbone_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/keras_hub/src/models/mit/mit_backbone_test.py b/keras_hub/src/models/mit/mit_backbone_test.py index 553a266e5b..88c58e96a2 100644 --- a/keras_hub/src/models/mit/mit_backbone_test.py +++ b/keras_hub/src/models/mit/mit_backbone_test.py @@ -9,7 +9,7 @@ class MiTBackboneTest(TestCase): def setUp(self): self.init_kwargs = { "depths": [2, 2], - "image_shape": (16, 16, 3), + "image_shape": (32, 32, 3), "hidden_dims": [4, 8], "num_layers": 2, "blockwise_num_heads": [1, 2], @@ -18,7 +18,7 @@ def setUp(self): "patch_sizes": [7, 3], "strides": [4, 2], } - self.input_size = 16 + self.input_size = 32 self.input_data = np.ones( (2, self.input_size, self.input_size, 3), dtype="float32" ) @@ -28,9 +28,9 @@ def test_backbone_basics(self): cls=MiTBackbone, init_kwargs=self.init_kwargs, input_data=self.input_data, - expected_output_shape=(2, 2, 2, 8), + expected_output_shape=(2, 4, 4, 8), expected_pyramid_output_keys=["P1", "P2"], - expected_pyramid_image_sizes=[(4, 4), (2, 2)], + expected_pyramid_image_sizes=[(8, 8), (4, 4)], run_quantization_check=False, run_mixed_precision_check=False, run_data_format_check=False,