19 July 2018
float16
) :float16
simply doesn't cover very many values:1 + 0.0001 = ?
float16
then may ignore changefloat32
float16
float32
store1 + 0.0001 = ?
10^-9 is missing :
scale_factor
(eg : 256)66,000 is infinite :
float32
inf
is a problemfloat32
float32
def build_training_model(inputs, labels, nlabel):
top_layer = build_forward_model(inputs)
logits = tf.layers.dense( top_layer, nlabel, activation=None)
loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
optim = tf.train.MomentumOpimizer(learning_rate=0.01, momentum=0.9)
grads, vars = zip( *optim.compute_gradients( loss ) )
grads, _ = tf.clip_by_global_norm( grads, 5.0 )
train_op = optim.apply_gradients(zip( grads, vars ))
return inputs, labels, loss, train_op
def build_training_model(inputs, labels, nlabel):
inputs = tf.cast( inputs, tf.float16 )
with tf.variable_scope('fp32_vars', custom_getter=float32_master_getter):
top_layer = build_forward_model(inputs)
logits = tf.layers.dense( top_layer, nlabel, activation=None)
logits = tf.cast( logits, tf.float32 )
loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
optim = tf.train.MomentumOpimizer(learning_rate=0.01, momentum=0.9)
loss_scale = 128.0
grads, vars = zip( *optim.compute_gradients( loss*loss_scale ) )
grads = [grad/loss_scale for grad in grads]
grads, _ = tf.clip_by_global_norm( grads, 5.0 )
train_op = optim.apply_gradients(zip( grads, vars ))
return inputs, labels, loss, train_op
float32
Master Weights (not the float16
s)float32
: softmax(), norm()
exp(), pow()
InputChannels
, OutputChannels
, BatchSize
ImageHeight
, ImageWidth
, KernelSize
InputFeatures
, OutputFeatures
, BatchSize
us-central1
(and cross your fingers)My blog : http://blog.mdda.net/
GitHub : mdda