|
| 1 | +import nltk |
| 2 | +import re |
| 3 | + |
| 4 | +f_name="Bibtex.bib" |
| 5 | +file = open("Bibtex.bib", 'r', encoding="utf8") |
| 6 | +holder="" |
| 7 | +import author |
| 8 | + |
| 9 | +author=author.author() |
| 10 | + |
| 11 | +for line in file: |
| 12 | + if (re.search('^, ',line)): |
| 13 | + tokens = nltk.word_tokenize(line) |
| 14 | + tagged_list = nltk.pos_tag(tokens) |
| 15 | + |
| 16 | + for k4, v4 in enumerate(tagged_list): |
| 17 | + for tagged_keys_for_use, _ in tagged_list: # splits out v4 |
| 18 | + |
| 19 | + # tagged_keys_for_use |
| 20 | + |
| 21 | + # tagged_list = tagged |
| 22 | + |
| 23 | + # highlights x and x1 related: |
| 24 | + x = tagged_list[k4][0] # x tuple |
| 25 | + x1 = tagged_keys_for_use # x1 string |
| 26 | + remove_end = tagged_list[0:-2] # cut space and last curly bracket |
| 27 | + for k,tuple_key_word in enumerate(remove_end, start = 0): |
| 28 | + tuple_key_word_zero=tuple_key_word[0] |
| 29 | + len_remove_end=len(remove_end) |
| 30 | + if(tuple_key_word[0]!=len(remove_end)): # remove_end may be line instead |
| 31 | + holder+=tuple_key_word[0]+' ' |
| 32 | + else: |
| 33 | + holder+=tuple_key_word[0] |
| 34 | + if(re.search('^journal',holder)): |
| 35 | + start_after_journal=slice(len_journal,None,1) |
| 36 | + no_journal=holder[start_after_journal] |
| 37 | +tokens_no_journal = nltk.word_tokenize(no_journal) |
| 38 | +tagged_tokens_no_journal = nltk.pos_tag(tokens_no_journal) |
| 39 | +no_journal_minus_starting_curly_bracket = tagged_tokens_no_journal[1:] # cut space and last curly bracket |
| 40 | +holder2='' |
| 41 | +list_of_keys=[] |
| 42 | +for k3,v3 in no_journal_minus_starting_curly_bracket: |
| 43 | + list_of_keys.append(k3) |
| 44 | +#print(list_of_keys) |
| 45 | +for k2,v2 in enumerate(list_of_keys): |
| 46 | + if (k2!=len(list_of_keys)): |
| 47 | + if (k2+1!=len(list_of_keys)): # avoids index out of range error |
| 48 | + #print('line 35:',list_of_keys[k2+1]) |
| 49 | + if(v2 != ',' and list_of_keys[k2+1]==','): # if this isn't but next element is a comma: |
| 50 | + #print('\n', 'line 37: start of 1 off block for comma next',sep="") |
| 51 | + #print('line 38: (k2) :',k2) |
| 52 | + #print('line 39: (v2), list_of_keys[k2+1]:',v2, list_of_keys[k2+1]) |
| 53 | + #print('line 40: so, list_of_keys[k2+1] = ', list_of_keys[k2+1]) |
| 54 | + holder2 += v2 # wait, save space for next element coming in. |
| 55 | + #print('line 42: holder2',holder2) |
| 56 | + #print('line 43: end of 1 off block for comma next', '\n') |
| 57 | + elif(v2 != ',' and list_of_keys[k2+1]!=','): # if this element is not a comma, and neither is next: |
| 58 | + holder2 += v2 + ' ' # then can add a space |
| 59 | + #print('line 46: holder2', holder2) |
| 60 | + elif(k2+1 == len(list_of_keys)): |
| 61 | + holder2 += v2 # experience |
| 62 | + else: |
| 63 | + # no space if last element |
| 64 | + holder2+=v2[0] |
| 65 | +print('line 52: (post last for-if, which assembles holder2) holder2:', holder2 ) |
0 commit comments