1- """
2- Script to update the custom dictionary 'main.txt' with new words from a given .po file.
3-
4- The script scans a specified .po file, ignoring certain metadata lines (e.g., lines starting with "#:").
5- It extracts all unique Greek and English words, compares them against the custom dictionary
6- under the 'dictionaries/' directory (sibling to the 'scripts/' directory), and adds any new words in alphabetical order.
7- """
8-
91import sys
102import os
113import re
124
135def scan_and_update (file_path ):
146 """
15- Scan the given .po file, extract words, and update the main dictionary.
7+ Scan the given .po file, extract words from msgstr blocks , and update the main dictionary.
168
179 If the dictionary does not exist, it creates a new one.
1810
@@ -47,21 +39,15 @@ def scan_and_update(file_path):
4739 print (f"Input file { file_path } not found." )
4840 return 0
4941
50- # Regular expression to ignore metadata lines like #: reference/executionmodel.rst:145
51- ignore_pattern = re .compile (r"^#:" )
52-
5342 # Regular expression to include accented Greek letters
54- word_pattern = re .compile (r'\b[a-zA-Zα-ωά-ώΑ-ΩΆ-Ώ ]+\b' , re .UNICODE )
43+ word_pattern = re .compile (r'\b[a-zA-Z\u03B1-\u03C9\u0386-\u03CE ]+\b' , re .UNICODE )
5544
5645 new_words = set ()
5746 entry_buffer = []
5847 collecting_msgstr = False
5948
60- # Step 4: Extract words from the .po file
49+ # Step 4: Extract words only from msgstr blocks
6150 for line in lines :
62- if ignore_pattern .match (line ):
63- continue # Ignore metadata lines
64-
6551 # Handle msgstr entries
6652 if line .startswith ("msgstr" ):
6753 collecting_msgstr = True
@@ -81,7 +67,6 @@ def scan_and_update(file_path):
8167 entry_buffer = []
8268 else :
8369 # Continue collecting multiline msgstr
84- # Remove surrounding quotes and append
8570 entry_buffer .append (line .strip ().strip ('"' ))
8671
8772 # Handle any remaining buffered text after the loop
@@ -115,4 +100,4 @@ def scan_and_update(file_path):
115100 print (f"The provided path '{ file_path } ' is not a valid file." )
116101 sys .exit (1 )
117102 # Process the input file and update the dictionary
118- new_word_count = scan_and_update (file_path )
103+ new_word_count = scan_and_update (file_path )
0 commit comments