44import numpy as np
55import sugartensor as tf
66from abc import abstractclassmethod
7+ from tensorflow .contrib .tensorboard .plugins import projector
78
89from data .preprocessors .kaggle_preprocessor import KagglePreprocessor
910
@@ -24,10 +25,14 @@ class BaseDataLoader(object):
2425 DEFAULT_VOCABULARY_SIZE = 50000
2526 DEFAULT_PRETRAINED_EMBEDDINGS = 'data/embeddings/glove.6B.300d.txt'
2627
28+ DEFAULT_META_DATA_FILE = 'metadata.tsv'
29+ DEFAULT_METADATA_DIR = 'asset/train/'
30+
2731 def __init__ (self , record_defaults , field_delim , data_column , bucket_boundaries , file_names ,
2832 skip_header_lines = _DEFAULT_SKIP_HEADER_LINES ,
2933 num_threads = _num_threads , batch_size = _batch_size , min_after_dequeue = _min_after_dequeue ,
30- capacity = _capacity , used_for_test_data = False , name = _name ):
34+ capacity = _capacity , used_for_test_data = False , meta_file = DEFAULT_META_DATA_FILE ,
35+ save_dir = DEFAULT_METADATA_DIR , name = _name ):
3136 self .__file_names = file_names
3237 self .__field_delim = field_delim
3338 self .__record_defaults = record_defaults
@@ -42,6 +47,8 @@ def __init__(self, record_defaults, field_delim, data_column, bucket_boundaries,
4247 self ._capacity = capacity
4348 self ._name = name
4449
50+ self .meta_file = meta_file
51+ self .save_dir = save_dir
4552 self .table = None
4653 self .num_threads = num_threads
4754 self .vocabulary_size = 0
@@ -210,7 +217,7 @@ def preload_embeddings(self, embed_dim, file_name=DEFAULT_PRETRAINED_EMBEDDINGS)
210217
211218 if word in dictionary :
212219 mapped_words = mapped_words + 1
213- pre_trained_emb [dictionary [word ]] = row [1 :]
220+ pre_trained_emb [dictionary [word ]- 1 ] = row [1 :]
214221 del missing_words [word ]
215222
216223 print ('Mapped words to pre-trained embeddings: %d' % mapped_words )
@@ -221,3 +228,27 @@ def preload_embeddings(self, embed_dim, file_name=DEFAULT_PRETRAINED_EMBEDDINGS)
221228 print ('Loaded pre-trained embeddings' )
222229
223230 return pre_trained_emb
231+
232+ def visualize_embeddings (self , sess , tensor , name ):
233+ """
234+ Visualises an embedding vector into Tensorboard
235+
236+ :param sess: Tensorflow session object
237+ :param tensor: The embedding tensor to be visualizd
238+ :param name: Name of the tensor
239+ """
240+
241+ # make directory if not exist
242+ if not tf .os .path .exists (self .save_dir ):
243+ tf .os .makedirs (self .save_dir )
244+
245+ # summary writer
246+ summary_writer = tf .summary .FileWriter (self .save_dir , graph = tf .get_default_graph ())
247+
248+ # embedding visualizer
249+ config = projector .ProjectorConfig ()
250+ emb = config .embeddings .add ()
251+ emb .tensor_name = name # tensor
252+ emb .metadata_path = tf .os .path .join (self .save_dir , self .meta_file ) # metadata file
253+ print (tf .os .path .abspath (emb .metadata_path ))
254+ projector .visualize_embeddings (summary_writer , config )
0 commit comments