Skip to content

Commit 547bf48

Browse files
committed
- added support for training on multiple GPUs
1 parent 490d2c6 commit 547bf48

File tree

2 files changed

+35
-20
lines changed

2 files changed

+35
-20
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Where the Atrous CNN layers are similar to the ones in the bytenet encoder in [N
1414

1515
The network support embedding initialization with pre-trained GloVe vectors ([GloVe: Gloval Vectors for Word Representations](https://nlp.stanford.edu/pubs/glove.pdf)) which handle even rare words quite well compared to word2vec.
1616

17-
To speed up training the model pre-processes any input into "clean" file, which then utilizes for training. The data is read by line from the "clean" files for better memory management. All input data is split into the appropriate buckets and dynamic padding is applied, which provides better accuracy and speed up during training. The input pipeline can read from multiple data sources which makes addition of more data sources easy as long as they are preprocessed in the right format.
17+
To speed up training the model pre-processes any input into "clean" file, which then utilizes for training. The data is read by line from the "clean" files for better memory management. All input data is split into the appropriate buckets and dynamic padding is applied, which provides better accuracy and speed up during training. The input pipeline can read from multiple data sources which makes addition of more data sources easy as long as they are preprocessed in the right format. The model can be trained on multiple GPUs if the hardware provides this capability.
1818

1919
<p align="center">
2020
<img src="https://raw.githubusercontent.com/randomrandom/deep-atrous-cnn-sentiment/master/png/queue_example.gif" width="1024"/>
@@ -49,6 +49,8 @@ Currently the only supported dataset is the one provided by the [Bag of Words Me
4949
The Kaggle dataset contains 25,000 labeled examples of movie reviews. Positive movie reviews are labeled with 1, while negative movie reviews are labeled with 0. The dataset is split into 20,000 training and 5,000 validation examples.
5050
## Training the network
5151

52+
The model can be trained across multiple GPUs to speed up the computations. In order to start the training:
53+
5254
Execute
5355
<pre><code>
5456
python train.py ( <== Use all available GPUs )

train.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1+
from data.kaggle_loader import KaggleLoader
12
from model.model import *
23
from model.trainer import classifier_train
3-
from data.kaggle_loader import KaggleLoader
4-
54

65
__author__ = 'georgi.val.stoyan0v@gmail.com'
76

@@ -14,8 +13,8 @@
1413
data = KaggleLoader(BUCKETS, DATA_FILE, batch_size=BATCH_SIZE)
1514
validation = KaggleLoader(BUCKETS, DATA_FILE, used_for_test_data=True, batch_size=BATCH_SIZE)
1615

17-
x, y = data.source, data.target
18-
val_x, val_y = validation.source, validation.target
16+
x, y = tf.split(data.source, tf.sg_gpus()), tf.split(data.target, tf.sg_gpus())
17+
val_x, val_y = tf.split(validation.source, tf.sg_gpus()), tf.split(validation.target, tf.sg_gpus())
1918

2019
# session with multiple GPU support
2120
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
@@ -28,25 +27,39 @@
2827
else:
2928
emb = tf.sg_emb(name='emb', voca_size=data.vocabulary_size, dim=embedding_dim)
3029

31-
z_x = x.sg_lookup(emb=emb)
32-
v_x = val_x.sg_lookup(emb=emb)
3330

34-
with tf.sg_context(name='model'):
35-
train_classifier = classifier(z_x, NUM_LABELS, data.vocabulary_size)
31+
@tf.sg_parallel
32+
def get_train_loss(opt):
33+
with tf.sg_context(name='model'):
34+
z_x = opt.input[opt.gpu_index].sg_lookup(emb=emb)
35+
36+
train_classifier = classifier(z_x, NUM_LABELS, data.vocabulary_size)
37+
38+
# cross entropy loss with logit
39+
loss = train_classifier.sg_ce(target=opt.target[opt.gpu_index])
40+
41+
return loss
42+
43+
44+
@tf.sg_parallel
45+
def get_val_metrics(opt):
46+
with tf.sg_context(name='model', reuse=True):
47+
tf.get_variable_scope().reuse_variables()
48+
49+
v_x = opt.input[opt.gpu_index].sg_lookup(emb=emb)
3650

37-
# cross entropy loss with logit
38-
loss = train_classifier.sg_ce(target=y)
51+
test_classifier = classifier(v_x, NUM_LABELS, validation.vocabulary_size)
3952

40-
with tf.sg_context(name='model', reuse=True):
41-
test_classifier = classifier(v_x, NUM_LABELS, validation.vocabulary_size)
53+
# accuracy evaluation (validation set)
54+
acc = (test_classifier.sg_softmax()
55+
.sg_accuracy(target=opt.target[opt.gpu_index], name='accuracy'))
4256

43-
# accuracy evaluation (validation set)
44-
acc = (test_classifier.sg_softmax()
45-
.sg_accuracy(target=val_y,name='val'))
57+
# validation loss
58+
val_loss = (test_classifier.sg_ce(target=opt.target[opt.gpu_index], name='validation'))
4659

47-
# validation loss
48-
val_loss = (test_classifier.sg_ce(target=val_y))
60+
return acc, val_loss
4961

5062
# train
51-
classifier_train(sess=sess, log_interval=50, lr=1e-3, loss=loss, eval_metric=[acc, val_loss],
52-
ep_size=data.num_batches, max_ep=150, early_stop=False, data=data)
63+
classifier_train(sess=sess, log_interval=50, lr=1e-3, loss=get_train_loss(input=x, target=y),
64+
eval_metric=get_val_metrics(input=val_x, target=val_y)[0],
65+
ep_size=data.num_batches, max_ep=10, early_stop=False, data=data)

0 commit comments

Comments
 (0)