- added support for training on multiple GPUs

randomrandom · randomrandom · commit 547bf484203e · 2017-06-28T13:08:59.000+03:00
diff --git a/README.md b/README.md
@@ -14,7 +14,7 @@ Where the Atrous CNN layers are similar to the ones in the bytenet encoder in [N
 
 The network support embedding initialization with pre-trained GloVe vectors ([GloVe: Gloval Vectors for Word Representations](https://nlp.stanford.edu/pubs/glove.pdf)) which handle even rare words quite well compared to word2vec.
 
-To speed up training the model pre-processes any input into "clean" file, which then utilizes for training. The data is read by line from the "clean" files for better memory management. All input data is split into the appropriate buckets and dynamic padding is applied, which provides better accuracy and speed up during training. The input pipeline can read from multiple data sources which makes addition of more data sources easy as long as they are preprocessed in the right format.
+To speed up training the model pre-processes any input into "clean" file, which then utilizes for training. The data is read by line from the "clean" files for better memory management. All input data is split into the appropriate buckets and dynamic padding is applied, which provides better accuracy and speed up during training. The input pipeline can read from multiple data sources which makes addition of more data sources easy as long as they are preprocessed in the right format. The model can be trained on multiple GPUs if the hardware provides this capability.
 
 <p align="center">
   <img src="https://raw.githubusercontent.com/randomrandom/deep-atrous-cnn-sentiment/master/png/queue_example.gif" width="1024"/>
@@ -49,6 +49,8 @@ Currently the only supported dataset is the one provided by the [Bag of Words Me
 The Kaggle dataset contains 25,000 labeled examples of movie reviews. Positive movie reviews are labeled with 1, while negative movie reviews are labeled with 0. The dataset is split into 20,000 training and 5,000 validation examples.
 ## Training the network
 
+The model can be trained across multiple GPUs to speed up the computations. In order to start the training:
+
 Execute
 <pre><code>
 python train.py ( <== Use all available GPUs )
diff --git a/train.py b/train.py
@@ -1,7 +1,6 @@
+from data.kaggle_loader import KaggleLoader
 from model.model import *
 from model.trainer import classifier_train
-from data.kaggle_loader import KaggleLoader
-
 
 __author__ = 'georgi.val.stoyan0v@gmail.com'
 
@@ -14,8 +13,8 @@
 data = KaggleLoader(BUCKETS, DATA_FILE, batch_size=BATCH_SIZE)
 validation = KaggleLoader(BUCKETS, DATA_FILE, used_for_test_data=True, batch_size=BATCH_SIZE)
 
-x, y = data.source, data.target
-val_x, val_y = validation.source, validation.target
+x, y = tf.split(data.source, tf.sg_gpus()), tf.split(data.target, tf.sg_gpus())
+val_x, val_y = tf.split(validation.source, tf.sg_gpus()), tf.split(validation.target, tf.sg_gpus())
 
 # session with multiple GPU support
 sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
@@ -28,25 +27,39 @@
 else:
     emb = tf.sg_emb(name='emb', voca_size=data.vocabulary_size, dim=embedding_dim)
 
-z_x = x.sg_lookup(emb=emb)
-v_x = val_x.sg_lookup(emb=emb)
 
-with tf.sg_context(name='model'):
-    train_classifier = classifier(z_x, NUM_LABELS, data.vocabulary_size)
+@tf.sg_parallel
+def get_train_loss(opt):
+    with tf.sg_context(name='model'):
+        z_x = opt.input[opt.gpu_index].sg_lookup(emb=emb)
+
+        train_classifier = classifier(z_x, NUM_LABELS, data.vocabulary_size)
+
+        # cross entropy loss with logit
+        loss = train_classifier.sg_ce(target=opt.target[opt.gpu_index])
+
+        return loss
+
+
+@tf.sg_parallel
+def get_val_metrics(opt):
+    with tf.sg_context(name='model', reuse=True):
+        tf.get_variable_scope().reuse_variables()
+
+        v_x = opt.input[opt.gpu_index].sg_lookup(emb=emb)
 
-    # cross entropy loss with logit
-    loss = train_classifier.sg_ce(target=y)
+        test_classifier = classifier(v_x, NUM_LABELS, validation.vocabulary_size)
 
-with tf.sg_context(name='model', reuse=True):
-    test_classifier = classifier(v_x, NUM_LABELS, validation.vocabulary_size)
+        # accuracy evaluation (validation set)
+        acc = (test_classifier.sg_softmax()
+               .sg_accuracy(target=opt.target[opt.gpu_index], name='accuracy'))
 
-    # accuracy evaluation (validation set)
-    acc = (test_classifier.sg_softmax()
-                    .sg_accuracy(target=val_y,name='val'))
+        # validation loss
+        val_loss = (test_classifier.sg_ce(target=opt.target[opt.gpu_index], name='validation'))
 
-    # validation loss
-    val_loss = (test_classifier.sg_ce(target=val_y))
+        return acc, val_loss
 
 # train
-classifier_train(sess=sess, log_interval=50, lr=1e-3, loss=loss, eval_metric=[acc, val_loss],
-        ep_size=data.num_batches, max_ep=150, early_stop=False, data=data)
+classifier_train(sess=sess, log_interval=50, lr=1e-3, loss=get_train_loss(input=x, target=y),
+                 eval_metric=get_val_metrics(input=val_x, target=val_y)[0],
+                 ep_size=data.num_batches, max_ep=10, early_stop=False, data=data)