machine learning - Someone can explain to me the design of a Convolutional NN with tensorflow (hand written digits)? (input img : 28 x 28 | output : 10 (n_classes)) -
i'm trying started cnn designs, found piece of code try infer design (f.maps size, strides ....).
what i've understoud have : input --> conv5-32 --> maxpool --> conv5-5 --> maxpool --> fc1 --> outputs.
what i'm not getting right input of fc1, why it's 7 7 ?
could please me ? (i'm beginner)
import tensorflow tf tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('/tmp/data/', one_hot=true) #parameters learning_rate = 0.001 training_iters = 200000 batch_size = 28 display_step = 10 #network parameters n_input = 784 n_output = 10 dropout = 0.75 #tf grath input x = tf.placeholder(tf.float32, [none,n_input]) y = tf.placeholder(tf.float32, [none, n_output]) keep_prob = tf.placeholder(tf.float32) # create wrappers simplicity def conv2d(x, w, b, strides=1): # conv2d wrapper, bias , relu activation x = tf.nn.conv2d(x, w, strides=[1, strides, strides, 1], padding='same') x = tf.nn.bias_add(x, b) return tf.nn.relu(x) def maxpool2d(x, k=2): # maxpool2d wrapper return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='same') # create model def conv_net(x, weights, biases, dropout): # reshape input picture x = tf.reshape(x, shape=[-1, 28, 28, 1]) # convolution layer conv1 = conv2d(x, weights['wc1'], biases['bc1']) # max pooling (down-sampling) conv1 = maxpool2d(conv1, k=2) # convolution layer conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) # max pooling (down-sampling) conv2 = maxpool2d(conv2, k=2) # connected layer # reshape conv2 output fit connected layer input fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) fc1 = tf.nn.relu(fc1) # apply dropout fc1 = tf.nn.dropout(fc1, dropout) # output, class prediction out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) return out # store layers weight & bias weights = { # 5x5 conv, 1 input, 32 outputs 'wc1': tf.variable(tf.random_normal([5, 5, 1, 32])), # 5x5 conv, 32 inputs, 64 outputs 'wc2': tf.variable(tf.random_normal([5, 5, 32, 5])), # connected, 7*7*64 inputs, 1024 outputs 'wd1': tf.variable(tf.random_normal([7*7*64, 1024])), # 1024 inputs, 10 outputs (class prediction) 'out': tf.variable(tf.random_normal([1024, n_classes])) } biases = { 'bc1': tf.variable(tf.random_normal([32])), 'bc2': tf.variable(tf.random_normal([64])), 'bd1': tf.variable(tf.random_normal([1024])), 'out': tf.variable(tf.random_normal([n_classes])) }
it's because of max-pooling. divides size of input 2 on each dimension.
so after first max-pooling, 28x28 becomes 14x14, 7x7 after second.
Comments
Post a Comment