Cross Layers vs Fully Connected Layers

Nithish | | 5 minutes to read.

Generate Dataset

import numpy as np
import tensorflow as tf

SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)


def get_random_X_y_data(data_size=100_000):
      # data_size 
      # number of features = 3 
      X = np.random.randint(200, size=[data_size, 3]) / 200.
      y = X[:,0]**2 + X[:,0]*X[:,1] + X[:,1]*X[:,2] + X[:,2]**2

      return X, y


x, y = get_random_X_y_data()
num_train = 90000
train_x = x[:num_train]
train_y = y[:num_train]
eval_x = x[num_train:]
eval_y = y[num_train:]

Fully Connected Net

deepnet = tf.keras.Sequential([
      tf.keras.layers.Dense(3, activation="relu"),
      tf.keras.layers.Dense(3, activation="relu"),
      tf.keras.layers.Dense(3, activation="relu"),
      tf.keras.layers.Dense(1)
    ])


train_data = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(1000)
eval_data = tf.data.Dataset.from_tensor_slices((eval_x, eval_y)).batch(1000)

epochs = 100
learning_rate = 0.4

deepnet.compile(loss = tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adagrad(learning_rate))
deepnet.fit(train_data, epochs=epochs, verbose=False)

deepnet_result = deepnet.evaluate(eval_data, return_dict=False, verbose=False)

Network with Cross Layer

class CrossLayer(tf.keras.layers.Layer):
    def __init__(self, units):
        super(CrossLayer, self).__init__()
        self.dense_layer = tf.keras.layers.Dense(units)
    
    def call(self, prev_x, init_x):
        # performs
        # init_x . (w @ prev_x + b) + prev_x
        # . -> element wise multiplication
        # @ -> matrix multiplication
        # b is united in the self.w layer
        return init_x * (self.dense_layer(prev_x)) + prev_x
        

class CrossLayersStackedModel(tf.keras.Model):
    def __init__(self, num_features=3, num_layers=1):
        super(CrossLayersStackedModel, self).__init__()
        self.num_features = num_features
        self.cross_layers = [CrossLayer(num_features) for _ in range(num_layers)]
        self.dense = tf.keras.layers.Dense(1)
    
    def call(self, x):
        init_x = x
        prev_x = x # does it make a copy?
        for cross_layer in self.cross_layers:
            prev_x = cross_layer(prev_x, init_x)
        return self.dense(prev_x)

crossnet = CrossLayersStackedModel()


epochs = 100
learning_rate = 0.4

crossnet.compile(loss = tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adagrad(learning_rate))
crossnet.fit(train_data, epochs=epochs, verbose=False)
<keras.src.callbacks.history.History at 0x17ca6f210>
def calc_metrics(model):
    train_rmse = model.evaluate(train_data, return_dict=False, verbose=False)
    eval_rmse = model.evaluate(eval_data, return_dict=False, verbose=False)
    return train_rmse, eval_rmse


baseline_train_rmse = np.sqrt(np.mean((train_y - np.mean(train_y))**2))
baseline_eval_rmse = np.sqrt(np.mean((eval_y - np.mean(train_y))**2))
print('baseline', baseline_train_rmse, baseline_eval_rmse)
print(calc_metrics(deepnet))
print(calc_metrics(crossnet))
baseline - 0.6893 0.6889
deepnet  - (0.4751, 0.4745)
crossnet - (3.9747e-08, 4.0458e-08)

Comparison

Metric Mean Predictor Deepnet Crossnet
Number of Parameters 0 40 16
Train RMSE 6.9e-1 2.4e-3 5.5e-11
Eval RMSE 6.9e-1 2.4e-3 5.3e-11

References