......................................................................................................................................................
......................................................................................................................................................
......................................................................................................................................................
......................................................................................................................................................
......................................................................................................................................................
......................................................................................................................................................
......................................................................................................................................................
....11111111111111111111111111111111111111111111111111111....................................11111111111111111111111111111111111111111111111111111....
.....100000000000000000000000000000000000000000000010010001.................................00000000000000000000000000000000000000000000100100001.....
.......1001...1111111111.......................1111111111001..............................1001.1111111...............11111111111111111111..1000.......
........1001..10000000000000001111........11100000000000110001..........................1000110000000000011..1110000000000000000000000001.1001........
.........10001.100000000000000000000011111000000000000000011001........................100110000000000000000011000000000000000000000000..0001.........
...........1001..00000011111100000000000001111111....000000110001....................1000.10000001.11110000000011111111111....10000001.1001...........
............10001.0000001.......111100000000001.......1000000.1001..................1001.1000001..1000011110000001...........1000000110001............
..............1001.1000001.........111110000000011......000000.10001..............10001.1000001..100000011..1000001........1000000011001..............
...............100011000000.......1000001.111111111......000001..1001............1001..1000001..1000001100000011111.......1000000110001...............
.................100110000001.....100000100000000000001111000001..10001........10001...0000111110000010000000000001.....10000001.1001.................
..................1000110000001....00000110000000000000001100001....1001......1001.....01110000100000111111100000001.110000001..0001..................
....................1001110000001.110000001111111111100001100001.....10001..1000......11000000010000011110000110001100000001..1001....................
.....................10001100000010110000000111..000011111100001111....10011001.....10000000011.00000000000000111100000001...0001.....................
.......................1001.10001000011000000000000000...100000100001...100001...110000000111...100000000001111000000001...1001.......................
........................10001.101000001111100000000001..1000001100000001..11..1100000001110001...100011111100000000011....0001........................
..........................1001...10000010011111100000..1000000...100000001..1000000001..100000.......1100000000001110...1001..........................
...........................1000...00000110000111...1..1000001......10000000110000011.....000001.111000000000011110001..0001...........................
.............................1001..0000011000000000111111001.........110000000111........100000100000000011..1000000.1001.............................
..............................1000..000000110000000000001111........1101100000001.....11111000001000111.....100000110001..............................
................................1001.1000000111110000000000000011.100000011000001111000000110001111......1100000011001................................
.................................1000.10000000011011111100000001100000001...11100000000000111001111...110000000110001.................................
...................................1001..1000001000001....1111100000011111.1000000000011.....10001111000000001.1001...................................
....................................1001...1101100000100011111100001100000011100011110001.....11110000000011..1001....................................
......................................1001.....00000100000000000111.110000000001..1000000011000000000011....1001......................................
.......................................1001....00000111100000000011000011100000001..100000001100001111.....1001.......................................
.........................................1001..100000100011111011000000011.100011110011000000011110001...1001.........................................
..........................................1001..100000000001..10000001110001.1100000000110000000000001..1001..........................................
............................................1001.100000001..10000001110000000110000001111.10000000001.1001............................................
.............................................1001..00011..10000000.....10000000111110000001.10000001.1001.............................................
...............................................1001.1....00000001....1001110000001..110000001.1001.1001...............................................
................................................0001...10000001....10000001110000001..10000000....1000................................................
...............................................1001...10000001...100000001....10000001..10000001...1001...............................................
.............................................1001....1000111....00000001........11111111111000001....1001.............................................
............................................1001....111100000110000001.........110000000000100000111..1001............................................
..........................................1001...10000000000110000011111...110000000000000001000001001..1001..........................................
.........................................1001.100000000000110000001100000011110000011111111..10000010001.1001.........................................
.......................................100111000000011111..0000001110000000000111....1000000..000001000001.1001.......................................
......................................1001100000001100001.10000011111111000000000111000000001.0000011000001.1001......................................
....................................1001100000001..100000.111111000000001.1100011000000001....000001.1000000..1001....................................
...................................100110000001.....000001.10000000001111....110000000111111.100001...1000001..1001...................................
.................................100011000001.......11100010000000111000001110000000110000000011111.....000001...0001.................................
................................1001.1000001....1110111100110011....11000000011101...11000000000000000111000001...1001................................
..............................1000..1000001.1100000001001001..........11000000001.........111100000000001000000.....0001..............................
.............................1001..10011110000000000011100001.......100011100000001......100011111110000010000011....1001.............................
...........................1000....111000000000011.....100000....1000000001.1000000011...000000........1111000011011...0001...........................
..........................1001...1100000000011..1111001.000001.1000000011.....1100000001100000..1111......100000100001..1001..........................
........................10001..10000000011..110000000001.0011000000001....11.....1000001000001..000000111..0000010000001..0001........................
.......................1001..10000000111.100000000000000110000000011....100001.....1101100000..000000000000111001100000001.1001.......................
.....................10001.10000000110001100000011.00000110000011......10011001.......10000010111000100000000011...100000001.0001.....................
....................1001.10000001..1000000111111111100000100111......10001..10001......0000010000111...1100000001....10000001.1001....................
..................1000110000001.....100000000000001000001111000.....1001......1001.....000001000000000000110000001.....10000001.0001..................
.................1001.1000000......101110000000011000000.000000...10001........10001...1000001100000000000011000001......1000000.1001.................
...............1000110000001.......100001.1111110000001.1000001..1001............1001...000001..11110000011..100001.......1000000110001...............
..............1001.1000001..........000001.1000000001..1000001.10001...............0001..000000..100000000111110001.........1000000.1001..............
............10001.0000001.........11100000011100001...0000001.1001..................1001..0000001..11000000000011111.........1000000..0001............
...........1001.1000000011110000000001000000001111..10000001.0001....................1000..1000001...111000000000000000001111100000001.1001...........
.........10001.1000000000000000000000011100000000000000001.1001........................1001.1000000000000111110000000000000000000000001..0001.........
........1001..1000000000000000111111......11100000000000..0001..........................1000..100000000000011.....11111100000000000000001.1001........
......10001....................................11111111.1001..............................1001.111111111...................................10001......
.....100000000000000000000000000000000000000000000000000001................................100000000000000000000000000000000000000000000000000001.....
....11111111111111111111111111111111111111111111111111111....................................11111111111111111111111111111111111111111111111111111....
......................................................................................................................................................
......................................................................................................................................................
......................................................................................................................................................
......................................................................................................................................................
......................................................................................................................................................
......................................................................................................................................................
......................................................................................................................................................

Feature Crossing is a simple but powerful method used to create new features, by combining existing features.

The DCN cross layer, described first in this paper and expanded in this paper, is an elegant method for automatic creation and selection of predictive feature crosses within a neural network model. Note that this is by no means the only automated feature-crossing solution that has been proposed (e.g. Factorisation Machines, DeepFM, DNN2LR, AutoCross etc.).

Although the notation of the cross layer is quite straightforward, the precise behaviour of the layer wasn’t immediately apparent to me, especially where multiple cross layers are stacked in sequence. So, in pursuit of a deeper understanding, I explicitly wrote out the output of 3 sequential cross layers for a simple 3-dimensional input vector. I’ve published it here in the hope that it will clear things up for you as it did for me.

What follows:

  1. Two examples of feature crossing, to illustrate what feature crossing is

  2. A diagram of the structure of the DCN-V2 cross layer

  3. The cross layer written out explicitly for an example input

Note that in application, cross layers usually appear in sequence and/or in parallel with other sorts of neural network layers such as dense layers.

1. Two Examples of Feature Crossing

Features are crossed simply by multiplying their values together.

Here is an example of a feature (prob_X_expected) created by crossing 2 numeric features:

Here is an example of a new feature (gender_X_location) created by crossing 2 categorical features:

gender_male gender_female location_Africa location_Europe male_X_Africa male_X_Europe female_X_Africa female_X_Europe
0 1 0 1 0 0 0 1
0 1 1 0 0 0 1 0
1 0 0 1 0 1 0 0
1 0 1 0 1 0 0 0

2. A Diagram of the Structure of the Cross Layer

(from the original DCN-V2 paper)

3. The Cross Layer Written Out Explicitly for an Example Input

\[\begin{array}{lcl} x_{l+1} &=& x_0 \odot (W_l x_l + b_l) + x_l \\ x_0 &=& \begin{bmatrix} \color{orange}{a} \\ \color{green}{b} \\ \color{brown}{c} \\ \end{bmatrix} \\ x_1 &=& x_0 \odot (W_0 x_0 + b_0) + x_0 \\ &=& \begin{bmatrix} \color{orange}{a} \\ \color{green}{b} \\ \color{brown}{c} \\ \end{bmatrix} \odot \Bigg( \begin{bmatrix} {^0}w_{11} & {^0}w_{12} & {^0}w_{13} \\ {^0}w_{21} & {^0}w_{22} & {^0}w_{23} \\ {^0}w_{31} & {^0}w_{32} & {^0}w_{33} \\ \end{bmatrix} \begin{bmatrix} \color{orange}{a} \\ \color{green}{b} \\ \color{brown}{c} \\ \end{bmatrix} + \begin{bmatrix} {^0}b_1 \\ {^0}b_2 \\ {^0}b_3 \\ \end{bmatrix} \Bigg) + \begin{bmatrix} \color{orange}{a} \\ \color{green}{b} \\ \color{brown}{c} \\ \end{bmatrix} \\ &=& \begin{bmatrix} \color{orange}{a}( {^0}w_{11}\color{orange}{a} + {^0}w_{12}\color{green}{b} + {^0}w_{13}\color{brown}{c} + {^0}b_1 ) + \color{orange}{a} \\ \color{green}{b}( {^0}w_{21}\color{orange}{a} + {^0}w_{22}\color{green}{b} + {^0}w_{23}\color{brown}{c} + {^0}b_2 ) + \color{green}{b} \\ \color{brown}{c}( {^0}w_{31}\color{orange}{a} + {^0}w_{32}\color{green}{b} + {^0}w_{33}\color{brown}{c} + {^0}b_3 ) + \color{brown}{c} \end{bmatrix} \\ &=& \begin{bmatrix} {^0}w_{11}\color{orange}{a^2} + {^0}w_{12}\color{orange}{a}\color{green}{b} + {^0}w_{13}\color{orange}{a}\color{brown}{c} + \color{orange}{a}({^0}b_1+1) \\ {^0}w_{21}\color{orange}{a}\color{green}{b} + {^0}w_{22}\color{green}{b^2} + {^0}w_{23}\color{green}{b}\color{brown}{c} + \color{green}{b}({^0}b_2+1) \\ {^0}w_{31}\color{orange}{a}\color{brown}{c} + {^0}w_{32}\color{green}{b}\color{brown}{c} + {^0}w_{33}\color{brown}{c^2} + \color{brown}{c}( {^0}b_3+1 ) ) \end{bmatrix} \\ x_2 &=& x_0 \odot (W_1 x_1 + b_1 ) + x_1 \\ &=& \underset{x_0}{ \underbrace{ \begin{bmatrix} \color{orange}{a} \\ \color{green}{b} \\ \color{brown}{c} \\ \end{bmatrix} } } \odot \Bigg( \underset{W_1}{ \underbrace{ \begin{bmatrix} {^1}w_{11} & {^1}w_{12} & {^1}w_{13} \\ {^1}w_{21} & {^1}w_{22} & {^1}w_{23} \\ {^1}w_{31} & {^1}w_{32} & {^1}w_{33} \\ \end{bmatrix} } } \underset{x_1}{ \underbrace{ \begin{bmatrix} {^0}w_{11}\color{orange}{a^2} + {^0}w_{12}\color{orange}{a}\color{green}{b} + {^0}w_{13}\color{orange}{a}\color{brown}{c} + \color{orange}{a}({^0}b_1+1) \\ {^0}w_{21}\color{orange}{a}\color{green}{b} + {^0}w_{22}\color{green}{b^2} + {^0}w_{23}\color{green}{b}\color{brown}{c} + \color{green}{b}({^0}b_2+1) \\ {^0}w_{31}\color{orange}{a}\color{brown}{c} + {^0}w_{32}\color{green}{b}\color{brown}{c} + {^0}w_{33}\color{brown}{c^2} + \color{brown}{c}( {^0}b_3+1 ) ) \end{bmatrix} } } + \underset{b_1}{ \underbrace{ \begin{bmatrix} {^1}b_1 \\ {^1}b_2 \\ {^1}b_3 \\ \end{bmatrix} } } \Bigg) + \underset{x_1}{ \underbrace{ \begin{bmatrix} {^0}w_{11}\color{orange}{a^2} + {^0}w_{12}\color{orange}{a}\color{green}{b} + {^0}w_{13}\color{orange}{a}\color{brown}{c} + \color{orange}{a}({^0}b_1+1) \\ {^0}w_{21}\color{orange}{a}\color{green}{b} + {^0}w_{22}\color{green}{b^2} + {^0}w_{23}\color{green}{b}\color{brown}{c} + \color{green}{b}({^0}b_2+1) \\ {^0}w_{31}\color{orange}{a}\color{brown}{c} + {^0}w_{32}\color{green}{b}\color{brown}{c} + {^0}w_{33}\color{brown}{c^2} + \color{brown}{c}( {^0}b_3+1 ) ) \end{bmatrix} } } \\ &=& \begin{bmatrix} \color{orange}{a} \\ \color{green}{b} \\ \color{brown}{c} \\ \end{bmatrix} \odot \begin{bmatrix} {^1}w_{11}( {^0}w_{11}\color{orange}{a^2} + {^0}w_{12}\color{orange}{a}\color{green}{b} + {^0}w_{13}\color{orange}{a}\color{brown}{c} + \color{orange}{a}{^0}b_1+\color{orange}{a} ) + {^1}w_{12}( {^0}w_{21}\color{orange}{a}\color{green}{b} + {^0}w_{22}\color{green}{b^2} + {^0}w_{23}\color{green}{b}\color{brown}{c} + \color{green}{b}{^0}b_2 + \color{green}{b} ) + {^1}w_{13}( {^0}w_{31}\color{orange}{a}\color{brown}{c} + {^0}w_{32}\color{green}{b}\color{brown}{c} + {^0}w_{33}\color{brown}{c^2} + \color{brown}{c}{^0}b_3 + \color{brown}{c} ) + {^1}b_1 \\ {^1}w_{21}( {^0}w_{11}\color{orange}{a^2} + {^0}w_{12}\color{orange}{a}\color{green}{b} + {^0}w_{13}\color{orange}{a}\color{brown}{c} + \color{orange}{a}{^0}b_1+\color{orange}{a} ) + {^1}w_{22}( {^0}w_{21}\color{orange}{a}\color{green}{b} + {^0}w_{22}\color{green}{b^2} + {^0}w_{23}\color{green}{b}\color{brown}{c} + \color{green}{b}{^0}b_2 + \color{green}{b} ) + {^1}w_{23}( {^0}w_{31}\color{orange}{a}\color{brown}{c} + {^0}w_{32}\color{green}{b}\color{brown}{c} + {^0}w_{33}\color{brown}{c^2} + \color{brown}{c}{^0}b_3 + \color{brown}{c} ) + {^1}b_2 \\ {^1}w_{31}( {^0}w_{11}\color{orange}{a^2} + {^0}w_{12}\color{orange}{a}\color{green}{b} + {^0}w_{13}\color{orange}{a}\color{brown}{c} + \color{orange}{a}{^0}b_1+\color{orange}{a} ) + {^1}w_{32}( {^0}w_{21}\color{orange}{a}\color{green}{b} + {^0}w_{22}\color{green}{b^2} + {^0}w_{23}\color{green}{b}\color{brown}{c} + \color{green}{b}{^0}b_2 + \color{green}{b} ) + {^1}w_{33}( {^0}w_{31}\color{orange}{a}\color{brown}{c} + {^0}w_{32}\color{green}{b}\color{brown}{c} + {^0}w_{33}\color{brown}{c^2} + \color{brown}{c}{^0}b_3 + \color{brown}{c} ) + {^1}b_3 \end{bmatrix} + \begin{bmatrix} {^0}w_{11}\color{orange}{a^2} + {^0}w_{12}\color{orange}{a}\color{green}{b} + {^0}w_{13}\color{orange}{a}\color{brown}{c} + \color{orange}{a}({^0}b_1+1) \\ {^0}w_{21}\color{orange}{a}\color{green}{b} + {^0}w_{22}\color{green}{b^2} + {^0}w_{23}\color{green}{b}\color{brown}{c} + \color{green}{b}({^0}b_2+1) \\ {^0}w_{31}\color{orange}{a}\color{brown}{c} + {^0}w_{32}\color{green}{b}\color{brown}{c} + {^0}w_{33}\color{brown}{c^2} + \color{brown}{c}( {^0}b_3+1 ) \end{bmatrix} \\ &=& \begin{bmatrix} {^1}w_{11}( {^0}w_{11}\color{orange}{a^3} + {^0}w_{12}\color{orange}{a^2}\color{green}{b} + {^0}w_{13}\color{orange}{a^2}\color{brown}{c} + {^0}b_1\color{orange}{a^2} + \color{orange}{a^2} ) + {^1}w_{12}( {^0}w_{21}\color{orange}{a^2}\color{green}{b} + {^0}w_{22}\color{orange}{a}\color{green}{b^2} + {^0}w_{23}\color{orange}{a}\color{green}{b}\color{brown}{c} + {^0}b_2\color{orange}{a}\color{green}{b} + \color{orange}{a}\color{green}{b} ) + {^1}w_{13}( {^0}w_{31}\color{orange}{a^2}\color{brown}{c} + {^0}w_{32}\color{orange}{a}\color{green}{b}\color{brown}{c} + {^0}w_{33}\color{orange}{a}\color{brown}{c^2} + {^0}b_3\color{orange}{a}\color{brown}{c} + \color{orange}{a}\color{brown}{c} ) + {^1}b_1 \color{orange}{a} + {^0}w_{11}\color{orange}{a^2} + {^0}w_{12}\color{orange}{a}\color{green}{b} + {^0}w_{13}\color{orange}{a}\color{brown}{c} + \color{orange}{a}({^0}b_1+1) \\ {^1}w_{21}( {^0}w_{11}\color{orange}{a^2}\color{green}{b} + {^0}w_{12}\color{orange}{a}\color{green}{b^2} + {^0}w_{13}\color{orange}{a}\color{green}{b}\color{brown}{c} + {^0}b_1\color{orange}{a}\color{green}{b} + \color{orange}{a}\color{green}{b} ) + {^1}w_{22}( {^0}w_{21}\color{orange}{a}\color{green}{b^2} + {^0}w_{22}\color{green}{b^3} + {^0}w_{23}\color{green}{b^2}\color{brown}{c} + {^0}b_2\color{green}{b^2} + \color{green}{b^2} ) + {^1}w_{23}( {^0}w_{31}\color{orange}{a}\color{green}{b}\color{brown}{c} + {^0}w_{32}\color{green}{b^2}\color{brown}{c} + {^0}w_{33}\color{green}{b}\color{brown}{c^2} + {^0}b_3\color{green}{b}\color{brown}{c} + \color{green}{b}\color{brown}{c} ) + {^1}b_2\color{green}{b} + {^0}w_{21}\color{orange}{a}\color{green}{b} + {^0}w_{22}\color{green}{b^2} + {^0}w_{23}\color{green}{b}\color{brown}{c} + \color{green}{b}({^0}b_2+1) \\ {^1}w_{31}( {^0}w_{11}\color{orange}{a^2}\color{brown}{c} + {^0}w_{12}\color{orange}{a}\color{green}{b}\color{brown}{c} + {^0}w_{13}\color{orange}{a}\color{brown}{c^2} + {^0}b_1\color{orange}{a}\color{brown}{c} + \color{orange}{a}\color{brown}{c} ) + {^1}w_{32}( {^0}w_{21}\color{orange}{a}\color{green}{b}\color{brown}{c} + {^0}w_{22}\color{green}{b^2}\color{brown}{c} + {^0}w_{23}\color{green}{b}\color{brown}{c^2} + {^0}b_2\color{green}{b}\color{brown}{c} + \color{green}{b}\color{brown}{c} ) + {^1}w_{33}( {^0}w_{31}\color{orange}{a}\color{brown}{c^2} + {^0}w_{32}\color{green}{b}\color{brown}{c^2} + {^0}w_{33}\color{brown}{c^3} + {^0}b_3\color{brown}{c^2} + \color{brown}{c^2} ) + {^1}b_3\color{brown}{c} + {^0}w_{31}\color{orange}{a}\color{brown}{c} + {^0}w_{32}\color{green}{b}\color{brown}{c} + {^0}w_{33}\color{brown}{c^2} + \color{brown}{c}( {^0}b_3+1 ) \end{bmatrix} \\ x_3 &=& x_0 \odot (W_2 x_2 + b_2 ) + x_2 \\ &=& \underset{x_0}{ \underbrace{ \begin{bmatrix} \color{orange}{a} \\ \color{green}{b} \\ \color{brown}{c} \\ \end{bmatrix} } } \odot \Bigg( \underset{W_2}{ \underbrace{ \begin{bmatrix} {^2}w_{11} & {^2}w_{12} & {^2}w_{13} \\ {^2}w_{21} & {^2}w_{22} & {^2}w_{23} \\ {^2}w_{31} & {^2}w_{32} & {^2}w_{33} \\ \end{bmatrix} } } \underset{x_2}{ \underbrace{ \begin{bmatrix} . \\ . \\ . \end{bmatrix} } } + \underset{b_2}{ \underbrace{ \begin{bmatrix} {^2}b_1 \\ {^2}b_2 \\ {^2}b_3 \\ \end{bmatrix} } } \Bigg) + \underset{x_2}{ \underbrace{ \begin{bmatrix} . \\ . \\ . \end{bmatrix} } } \end{array}\]

If you don’t trust my algebra, here is a quick validation of my calculation using R:

# validate my calculation:
a <- -1
b <- 4
c <- 9
x0 <- matrix( c(a, b, c), nrow=3, ncol=1 ) 
w0 <- matrix( sample( -100:100, size=9, replace=TRUE),
              nrow = 3,
              ncol = 3
            )
b0 <- matrix( sample( -100:100, size=3, replace=TRUE),
              nrow = 3,
              ncol = 1
            )
w1 <- matrix( sample( -100:100, size=9, replace=TRUE),
              nrow = 3,
              ncol = 3
            )
b1 <- matrix( sample( -100:100, size=3, replace=TRUE),
              nrow = 3,
              ncol = 1
            )

x1 <- x0 * ( w0%*%x0 + b0 ) + x0
x2 <- x0 * ( w1%*%x1 + b1 ) + x1

x2
##         [,1]
## [1,]  130295
## [2,] 2871700
## [3,] -352647
c(
    w1[1,1]*( w0[1,1]*a^3 + w0[1,2]*a^2*b + w0[1,3]*a^2*c + b0[1]*a^2 + a^2 ) +
    w1[1,2]*( w0[2,1]*a^2*b + w0[2,2]*a*b^2 + w0[2,3]*a*b*c + b0[2]*a*b + a*b ) +
    w1[1,3]*( w0[3,1]*a^2*c + w0[3,2]*a*b*c + w0[3,3]*a*c^2 + b0[3]*a*c + a*c) +
    b1[1]*a +
    w0[1,1]*a^2 + 
    w0[1,2]*a*b +
    w0[1,3]*a*c +
    a * (b0[1] + 1)
    ,
    w1[2,1]*( w0[1,1]*a^2*b + w0[1,2]*a*b^2 + w0[1,3]*a*b*c + b0[1]*a*b + a*b ) +
    w1[2,2]*( w0[2,1]*a*b^2 + w0[2,2]*b^3 + w0[2,3]*b^2*c + b0[2]*b^2 + b^2 ) +
    w1[2,3]*( w0[3,1]*a*b*c + w0[3,2]*b^2*c + w0[3,3]*b*c^2 + b0[3]*b*c + b*c) +
    b1[2]*b +
    w0[2,1]*a*b + 
    w0[2,2]*b^2 +
    w0[2,3]*b*c +
    b * (b0[2] + 1)    
    ,
    w1[3,1]*( w0[1,1]*a^2*c + w0[1,2]*a*b*c + w0[1,3]*a*c^2 + b0[1]*a*c + a*c ) +
    w1[3,2]*( w0[2,1]*a*b*c + w0[2,2]*b^2*c + w0[2,3]*b*c^2 + b0[2]*b*c + b*c ) +
    w1[3,3]*( w0[3,1]*a*c^2 + w0[3,2]*b*c^2 + w0[3,3]*c^3 + b0[3]*c^2 + c^2) +
    b1[3]*c +
    w0[3,1]*a*c + 
    w0[3,2]*b*c +
    w0[3,3]*c^2 +
    c * (b0[3] + 1)        
    
)
## [1]  130295 2871700 -352647