1+
2+
3+ import ast
4+ import json
5+ import os
6+ import re
7+ from typing import Dict , List , Tuple
8+
9+
10+ ROOT_DIR = os .getcwd ()
11+
12+ SKIP_DIRS = {
13+ "node_modules" ,
14+ "courseProjectDocs" ,
15+ "courseProjectCode" ,
16+ ".git" ,
17+ "__pycache__" ,
18+ }
19+
20+ SOURCE_EXTENSIONS = {".py" }
21+
22+
23+ def count_python_functions (file_content : str ) -> Tuple [int , List [Tuple [int , int ]]]:
24+ try :
25+ tree = ast .parse (file_content )
26+ except SyntaxError :
27+ return 0 , []
28+
29+ function_spans = []
30+ for node in ast .walk (tree ):
31+ if isinstance (node , (ast .FunctionDef , ast .AsyncFunctionDef )):
32+ # end_lineno is available in Python 3.8+
33+ start_line = getattr (node , "lineno" , None )
34+ end_line = getattr (node , "end_lineno" , None )
35+ if start_line is not None and end_line is not None :
36+ function_spans .append ((start_line , end_line ))
37+ return len (function_spans ), function_spans
38+
39+
40+ def count_js_functions (file_content : str ) -> Tuple [int , List [Tuple [int , int ]]]:
41+ lines = file_content .splitlines ()
42+ count = 0
43+ spans = []
44+ for idx , line in enumerate (lines , start = 1 ):
45+ stripped = line .strip ()
46+ if stripped .startswith ("//" ) or stripped .startswith ("/*" ):
47+ continue
48+ if re .search (r"\bfunction\b" , stripped ) or re .search (r"=>" , stripped ):
49+ count += 1
50+ spans .append ((idx , idx ))
51+ return count , spans
52+
53+
54+ def approximate_cyclomatic_complexity (lines : List [str ]) -> int :
55+ complexity = 1 # Base complexity
56+ decision_keywords = [
57+ "if " , "for " , "while " , "case " , "switch " , "catch " , "&&" , "||" , "?" ,
58+ "elif " , "except " ,
59+ ]
60+ for line in lines :
61+ stripped = line .strip ()
62+ if not stripped or stripped .startswith ("#" ) or stripped .startswith ("//" ):
63+ continue
64+ for keyword in decision_keywords :
65+ if keyword in stripped :
66+ complexity += 1
67+ break
68+ return complexity
69+
70+
71+ def analyse_file (filepath : str ) -> Dict [str , object ]:
72+ try :
73+ with open (filepath , "r" , encoding = "utf-8" , errors = "ignore" ) as f :
74+ content = f .read ()
75+ except (OSError , UnicodeDecodeError ):
76+ return {}
77+
78+ lines = content .splitlines ()
79+ code_lines = 0
80+ comment_lines = 0
81+ in_block_comment = False
82+
83+ for line in lines :
84+ stripped = line .strip ()
85+ if not stripped :
86+ continue
87+ if in_block_comment :
88+ comment_lines += 1
89+ if "*/" in stripped :
90+ in_block_comment = False
91+ continue
92+ if stripped .startswith ("/*" ):
93+ comment_lines += 1
94+ if "*/" not in stripped :
95+ in_block_comment = True
96+ continue
97+ if stripped .startswith ("#" ) or stripped .startswith ("//" ):
98+ comment_lines += 1
99+ continue
100+ if stripped .startswith ("\" \" \" " ):
101+ comment_lines += 1
102+ continue
103+ code_lines += 1
104+
105+ ext = os .path .splitext (filepath )[1 ]
106+ functions_count = 0
107+ function_spans : List [Tuple [int , int ]] = []
108+ if ext == ".py" :
109+ functions_count , function_spans = count_python_functions (content )
110+ elif ext == ".js" :
111+ functions_count , function_spans = count_js_functions (content )
112+
113+ total_function_lines = 0
114+ for start , end in function_spans :
115+ if end >= start :
116+ total_function_lines += end - start + 1
117+ average_function_length = (
118+ (total_function_lines / functions_count ) if functions_count > 0 else 0
119+ )
120+
121+ complexity = approximate_cyclomatic_complexity (lines )
122+
123+ parts = filepath .lower ().split (os .sep )
124+ is_test_file = any (
125+ part .startswith ("test" ) for part in parts if part not in {"" , "." }
126+ )
127+
128+ test_functions_count = 0
129+ if is_test_file :
130+ if ext == ".py" :
131+ try :
132+ tree = ast .parse (content )
133+ except SyntaxError :
134+ tree = None
135+ if tree is not None :
136+ for node in ast .walk (tree ):
137+ if isinstance (node , (ast .FunctionDef , ast .AsyncFunctionDef )):
138+ if node .name .startswith ("test" ):
139+ test_functions_count += 1
140+ elif ext == ".js" :
141+ test_functions_count = len (re .findall (r"\b(it|describe)\s*\(" , content ))
142+
143+ return {
144+ "file" : filepath ,
145+ "lines_of_code" : code_lines ,
146+ "comment_lines" : comment_lines ,
147+ "comment_ratio" : (comment_lines / code_lines ) if code_lines > 0 else 0 ,
148+ "functions" : functions_count ,
149+ "average_function_length" : average_function_length ,
150+ "cyclomatic_complexity" : complexity ,
151+ "is_test_file" : is_test_file ,
152+ "test_functions" : test_functions_count ,
153+ }
154+
155+
156+ def walk_repository (root_dir : str ) -> List [Dict [str , object ]]:
157+ results = []
158+ for dirpath , dirnames , filenames in os .walk (root_dir ):
159+ # Remove skipped directories from traversal
160+ dirnames [:] = [d for d in dirnames if d not in SKIP_DIRS ]
161+ for filename in filenames :
162+ ext = os .path .splitext (filename )[1 ]
163+ if ext in SOURCE_EXTENSIONS :
164+ filepath = os .path .join (dirpath , filename )
165+ metrics = analyse_file (filepath )
166+ if metrics :
167+ results .append (metrics )
168+ return results
169+
170+
171+ def aggregate_metrics (results : List [Dict [str , object ]]) -> Dict [str , object ]:
172+
173+ total_code_lines = sum (item ["lines_of_code" ] for item in results )
174+ total_comment_lines = sum (item ["comment_lines" ] for item in results )
175+ total_functions = sum (item ["functions" ] for item in results )
176+ total_complexity = sum (item ["cyclomatic_complexity" ] for item in results )
177+ total_files = len (results )
178+
179+ total_function_lines = sum (
180+ item ["average_function_length" ] * item ["functions" ] for item in results
181+ )
182+ average_function_length = (
183+ total_function_lines / total_functions if total_functions > 0 else 0
184+ )
185+ comment_ratio = (
186+ (total_comment_lines / total_code_lines ) if total_code_lines > 0 else 0
187+ )
188+
189+ test_files = [item for item in results if item ["is_test_file" ]]
190+ total_test_files = len (test_files )
191+ total_test_lines = sum (item ["lines_of_code" ] for item in test_files )
192+ total_test_functions = sum (item ["test_functions" ] for item in test_files )
193+ test_ratio = (
194+ (total_test_lines / total_code_lines ) if total_code_lines > 0 else 0
195+ )
196+
197+ aggregated = {
198+ "total_files" : total_files ,
199+ "total_code_lines" : total_code_lines ,
200+ "total_comment_lines" : total_comment_lines ,
201+ "comment_ratio" : comment_ratio ,
202+ "total_functions" : total_functions ,
203+ "average_function_length" : average_function_length ,
204+ "total_cyclomatic_complexity" : total_complexity ,
205+ "total_test_files" : total_test_files ,
206+ "total_test_lines" : total_test_lines ,
207+ "total_test_functions" : total_test_functions ,
208+ "test_ratio" : test_ratio ,
209+ }
210+ return aggregated
211+
212+
213+ def main () -> None :
214+ results = walk_repository (ROOT_DIR )
215+ aggregated = aggregate_metrics (results )
216+ report = {
217+ "files" : results ,
218+ "summary" : aggregated ,
219+ }
220+ print (json .dumps (report , indent = 2 ))
221+
222+
223+ if __name__ == "__main__" :
224+ main ()
0 commit comments