mlcommons
diff --git a/‎benchmark/training/image_classification/README.md‎
Lines changed: 1 addition & 1 deletion b/‎benchmark/training/image_classification/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/training/image_classification/download_cifar10_train_resnet.sh‎
100644100755
Lines changed: 1 addition & 1 deletion b/‎benchmark/training/image_classification/download_cifar10_train_resnet.sh‎
100644100755
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/training/image_classification/keras_model.py‎
Lines changed: 165 additions & 7 deletions b/‎benchmark/training/image_classification/keras_model.py‎
Lines changed: 165 additions & 7 deletions
@@ -13,7 +13,7 @@ Dataset: Cifar10
 Run the following commands to go through the whole training and validation process
 
 ``` Bash
-# Prepare Python venv (Python 3.7+ and pip>20 required)
+# Prepare Python venv (TF2.14, Python 3.11 and pip 26 required)
 ./prepare_training_env.sh
 
 # Download training, train model, test the model
 
@@ -4,7 +4,7 @@ wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
 tar -xvf cifar-10-python.tar.gz
 
 # load performance subset
-. venv/bin/activate
+# . venv/bin/activate
 python3 perf_samples_loader.py
 
 # train ans test the model
 
@@ -12,8 +12,9 @@
 
 import tensorflow as tf
 from tensorflow.keras.models import Model
+from tensorflow.keras.applications import EfficientNetV2S
 from tensorflow.keras.layers import Input, Dense, Activation, Flatten, BatchNormalization
-from tensorflow.keras.layers import Conv2D, AveragePooling2D, MaxPooling2D
+from tensorflow.keras.layers import Conv2D, AveragePooling2D, MaxPooling2D, Resizing
 from tensorflow.keras.regularizers import l2
 
 #get model
@@ -29,12 +30,14 @@ def get_quant_model_name():
     else:
         return "pretrainedResnet"
 
-#define model
-def resnet_v1_eembc():
+#define models
+
+# 200k params
+def resnet_v1_eembc(conv_filters=26):
     # Resnet parameters
     input_shape=[32,32,3] # default size for cifar10
     num_classes=10 # default class number for cifar10
-    num_filters = 16 # this should be 64 for an official resnet model
+    num_filters = conv_filters # this should be 64 for an official resnet model
 
     # Input layer, change kernel size to 7x7 and strides to 2 for an official resnet
     inputs = Input(shape=input_shape)
@@ -76,7 +79,7 @@ def resnet_v1_eembc():
     # Second stack
 
     # Weight layers
-    num_filters = 32 # Filters need to be double for each stack
+    num_filters = conv_filters * 2 # Filters need to be double for each stack
     y = Conv2D(num_filters,
                   kernel_size=3,
                   strides=2,
@@ -109,7 +112,7 @@ def resnet_v1_eembc():
     # Third stack
 
     # Weight layers
-    num_filters = 64
+    num_filters = conv_filters * 4
     y = Conv2D(num_filters,
                   kernel_size=3,
                   strides=2,
@@ -144,7 +147,7 @@ def resnet_v1_eembc():
     # Uncomments to use it
 
 #    # Weight layers
-#    num_filters = 128
+#    num_filters = conv_filters * 8
 #    y = Conv2D(num_filters,
 #                  kernel_size=3,
 #                  strides=2,
@@ -185,3 +188,158 @@ def resnet_v1_eembc():
     # Instantiate model.
     model = Model(inputs=inputs, outputs=outputs)
     return model
+
+# EffectiveNet V2S
+def effnet_v2s(transfer=True):
+    # EffNet parameters
+    input_shape=[224,224,3] # default size for cifar10
+    num_classes=10 # default class number for cifar10
+
+    # Input layer
+    inputs = Input(shape=input_shape)
+    # x = Resizing(224, 224)(inputs)
+    effnet = tf.keras.applications.EfficientNetV2S(include_top=False, weights="imagenet", include_preprocessing=True)
+    if transfer:
+        for layer in effnet.layers:
+            layer.trainable = False
+    x = effnet(inputs)
+
+    # Final classification layer.
+    pool_size = int(np.amin(x.shape[1:3]))
+    x = AveragePooling2D(pool_size=pool_size)(x)
+    y = Flatten()(x)
+    outputs = Dense(num_classes,
+                    activation='softmax',
+                    kernel_initializer='he_normal')(y)
+
+    # Instantiate model.
+    model = Model(inputs=inputs, outputs=outputs)
+    return model
+
+# class Distiller(tf.keras.Model):
+#     def __init__(self, student, teacher):
+#         super().__init__()
+#         self.teacher = teacher
+#         self.student = student
+
+#     def compile(
+#         self,
+#         optimizer,
+#         metrics,
+#         student_loss_fn,
+#         distillation_loss_fn,
+#         alpha=0.1,
+#         temperature=3,
+#     ):
+#         """Configure the distiller.
+
+#         Args:
+#             optimizer: Keras optimizer for the student weights
+#             metrics: Keras metrics for evaluation
+#             student_loss_fn: Loss function of difference between student
+#                 predictions and ground-truth
+#             distillation_loss_fn: Loss function of difference between soft
+#                 student predictions and soft teacher predictions
+#             alpha: weight to student_loss_fn and 1-alpha to distillation_loss_fn
+#             temperature: Temperature for softening probability distributions.
+#                 Larger temperature gives softer distributions.
+#         """
+#         super().compile(optimizer=optimizer, metrics=metrics)
+#         self.student_loss_fn = student_loss_fn
+#         self.distillation_loss_fn = distillation_loss_fn
+#         self.alpha = alpha
+#         self.temperature = temperature
+
+#     def compute_loss(
+#         self, x=None, y=None, y_pred=None, sample_weight=None, allow_empty=False
+#     ):
+#         teacher_pred = self.teacher(x, training=False)
+#         student_loss = self.student_loss_fn(y, y_pred)
+
+#         distillation_loss = self.distillation_loss_fn(
+#             tf.nn.softmax(teacher_pred / self.temperature, axis=1),
+#             tf.nn.softmax(y_pred / self.temperature, axis=1),
+#         ) * (self.temperature**2)
+
+#         loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss
+#         return loss
+
+#     def call(self, x):
+#         return self.student(x)
+
+# Reference:
+# https://keras.io/examples/vision/knowledge_distillation/
+class Distiller(tf.keras.Model):
+    def __init__(self, student, teacher, batch_size):
+        super(Distiller, self).__init__()
+        self.student = student
+        self.teacher = teacher
+        self.batch_size = batch_size
+
+    def compile(
+        self,
+        optimizer,
+        metrics,
+        distillation_loss_fn,
+        temperature=2,
+    ):
+        super(Distiller, self).compile(optimizer=optimizer, metrics=metrics)
+        self.distillation_loss_fn = distillation_loss_fn
+        self.temperature = temperature
+
+    def train_step(self, data):
+        # Unpack data
+        x, _ = data
+
+        # Forward pass of teacher
+        teacher_predictions = self.teacher(x, training=False)
+
+        with tf.GradientTape() as tape:
+            # Forward pass of student
+            student_predictions = self.student(x, training=True)
+
+            # Compute loss
+            distillation_loss = self.distillation_loss_fn(
+                teacher_predictions / self.temperature,
+                student_predictions / self.temperature
+            )
+            distillation_loss = tf.nn.compute_average_loss(distillation_loss, 
+                                                      global_batch_size=self.batch_size)
+
+        # Compute gradients
+        trainable_vars = self.student.trainable_variables
+        gradients = tape.gradient(distillation_loss, trainable_vars)
+
+        # Update weights
+        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
+
+        # Report progress
+        results = {m.name: m.result() for m in self.metrics}
+        results.update(
+            {"distillation_loss": distillation_loss}
+        )
+        return results
+
+    def test_step(self, data):
+        # Unpack data
+        x, y = data
+
+        # Forward pass of teacher
+        teacher_predictions = self.teacher(x, training=False)
+        student_predictions = self.student(x, training=False)
+
+        # Calculate the loss
+        distillation_loss = self.distillation_loss_fn(
+            teacher_predictions / self.temperature,
+            student_predictions / self.temperature
+        )
+        distillation_loss = tf.nn.compute_average_loss(distillation_loss, 
+                                                      global_batch_size=self.batch_size)
+
+        # Report progress
+        self.compiled_metrics.update_state(y, student_predictions)
+        results = {m.name: m.result() for m in self.metrics}
+        results.update(
+            {"distillation_loss": distillation_loss}
+        )
+        return results