@@ -26,30 +26,45 @@ class FileSystemStats:
2626class FileSystemNode (Source ): # pylint: disable=too-many-instance-attributes
2727 """Base class for filesystem nodes (files, directories, symlinks)."""
2828
29- name : str = ""
30- path_str : str = ""
31- path : Path | None = None
29+ # Required fields - use None defaults and validate in __post_init__
30+ name : str | None = None
31+ path_str : str | None = None
32+ path : "Path | None" = None
33+
34+ # Optional fields with sensible defaults
3235 size : int = 0
3336 file_count : int = 0
3437 dir_count : int = 0
3538 depth : int = 0
3639 children : list [FileSystemNode ] = field (default_factory = list )
40+
41+ # Class attribute for display type name (instead of fragile string manipulation)
42+ _display_type : str = "NODE"
43+
44+ def __post_init__ (self ) -> None :
45+ """Validate required fields after initialization."""
46+ if self .name is None :
47+ raise ValueError ("FileSystemNode requires 'name' field" )
48+ if self .path_str is None :
49+ raise ValueError ("FileSystemNode requires 'path_str' field" )
50+ if self .path is None :
51+ raise ValueError ("FileSystemNode requires 'path' field" )
3752
3853 @property
3954 def tree (self ) -> str :
4055 """Return the name of this node."""
41- return self .name
56+ return self .name or ""
4257
4358 def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
4459 """Return default tree representation with just the name."""
4560 current_prefix = "└── " if is_last else "├── "
46- return [f"{ prefix } { current_prefix } { self .name } " ]
61+ return [f"{ prefix } { current_prefix } { self .name or '' } " ]
4762
4863 def sort_children (self ) -> None :
4964 """Sort the children nodes of a directory according to a specific order."""
5065
5166 def _sort_key (child : FileSystemNode ) -> tuple [int , str ]:
52- name = child .name .lower ()
67+ name = ( child .name or "" ) .lower ()
5368 # Each child knows its own sort priority - polymorphism!
5469 priority = child .get_sort_priority ()
5570 if priority == 0 and (name == "readme" or name .startswith ("readme." )):
@@ -75,26 +90,60 @@ def content_string(self) -> str:
7590 A string representation of the node's content.
7691
7792 """
78- type_name = self .__class__ .__name__ .upper ().replace ("FILESYSTEM" , "" )
93+ # Use class attribute instead of fragile string manipulation
94+ type_name = self ._display_type
7995
8096 parts = [
8197 SEPARATOR ,
82- f"{ type_name } : { str (self .path_str ).replace (os .sep , '/' )} " ,
98+ f"{ type_name } : { str (self .path_str or '' ).replace (os .sep , '/' )} " ,
8399 SEPARATOR ,
84100 f"{ self .content } " ,
85101 ]
86102
87103 return "\n " .join (parts ) + "\n \n "
88104
89105 def get_content (self ) -> str :
90- """Return file content. Override in subclasses for specific behavior."""
91- if self .path is None :
106+ """Return file content with proper encoding detection."""
107+ from gitingest .utils .file_utils import _decodes , _get_preferred_encodings , _read_chunk
108+ from gitingest .utils .notebook import process_notebook
109+
110+ if not self .path :
92111 return "Error: No path specified"
93112
113+ # Handle notebook files specially
114+ if self .path .suffix == ".ipynb" :
115+ try :
116+ return process_notebook (self .path )
117+ except Exception as exc :
118+ return f"Error processing notebook: { exc } "
119+
120+ # Read a chunk to check if it's binary or text
121+ chunk = _read_chunk (self .path )
122+
123+ if chunk is None :
124+ return "Error reading file"
125+
126+ if chunk == b"" :
127+ return "[Empty file]"
128+
129+ # Check if it's binary
130+ if not _decodes (chunk , "utf-8" ):
131+ return "[Binary file]"
132+
133+ # Find the first encoding that decodes the sample
134+ good_enc : str | None = next (
135+ (enc for enc in _get_preferred_encodings () if _decodes (chunk , encoding = enc )),
136+ None ,
137+ )
138+
139+ if good_enc is None :
140+ return "Error: Unable to decode file with available encodings"
141+
94142 try :
95- return self .path .read_text (encoding = "utf-8" )
96- except Exception as e :
97- return f"Error reading content of { self .name } : { e } "
143+ with self .path .open (encoding = good_enc ) as fp :
144+ return fp .read ()
145+ except (OSError , UnicodeDecodeError ) as exc :
146+ return f"Error reading file with { good_enc !r} : { exc } "
98147
99148 def get_summary_info (self ) -> str :
100149 """Return summary information. Override in subclasses."""
@@ -110,11 +159,7 @@ def gather_contents(self) -> str:
110159
111160 def get_display_name (self ) -> str :
112161 """Get display name for tree view. Override in subclasses."""
113- return self .name
114-
115- def has_children (self ) -> bool :
116- """Return whether this node has children to display."""
117- return False
162+ return self .name or ""
118163
119164 @property
120165 def content (self ) -> str :
@@ -125,14 +170,16 @@ def content(self) -> str:
125170@dataclass
126171class FileSystemFile (FileSystemNode ):
127172 """Represents a file in the filesystem."""
173+
174+ _display_type : str = "FILE"
128175
129176 def get_sort_priority (self ) -> int :
130177 """Files have priority 0 for sorting."""
131178 return 0
132179
133180 def get_summary_info (self ) -> str :
134181 """Return file summary information."""
135- return f"File: { self .name } \n Lines: { len (self .content .splitlines ()):,} \n "
182+ return f"File: { self .name or '' } \n Lines: { len (self .content .splitlines ()):,} \n "
136183
137184 def is_single_file (self ) -> bool :
138185 """Files are single files."""
@@ -141,14 +188,15 @@ def is_single_file(self) -> bool:
141188 def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
142189 """Render the tree representation of this file."""
143190 current_prefix = "└── " if is_last else "├── "
144- return [f"{ prefix } { current_prefix } { self .name } " ]
191+ return [f"{ prefix } { current_prefix } { self .name or '' } " ]
145192
146193
147194@dataclass
148195class FileSystemDirectory (FileSystemNode ):
149196 """Represents a directory in the filesystem."""
150197
151198 file_count_total : int = 0
199+ _display_type : str = "DIRECTORY"
152200
153201 def get_content (self ) -> str :
154202 """Directories cannot have content."""
@@ -165,17 +213,13 @@ def gather_contents(self) -> str:
165213
166214 def get_display_name (self ) -> str :
167215 """Directories get a trailing slash."""
168- return self .name + "/"
169-
170- def has_children (self ) -> bool :
171- """Directories have children if the list is not empty."""
172- return bool (self .children )
216+ return (self .name or "" ) + "/"
173217
174218 def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
175219 """Render the tree representation of this directory."""
176220 lines = []
177221 current_prefix = "└── " if is_last else "├── "
178- display_name = self .name + "/"
222+ display_name = ( self .name or "" ) + "/"
179223 lines .append (f"{ prefix } { current_prefix } { display_name } " )
180224 if hasattr (self , "children" ) and self .children :
181225 new_prefix = prefix + (" " if is_last else "│ " )
@@ -195,13 +239,14 @@ class GitRepository(FileSystemDirectory):
195239 """A directory that contains a .git folder, representing a Git repository."""
196240
197241 git_info : dict = field (default_factory = dict ) # Store git metadata like branch, commit, etc.
242+ _display_type : str = "GIT_REPOSITORY"
198243
199244 def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
200245 """Render the tree representation of this git repository."""
201246 lines = []
202247 current_prefix = "└── " if is_last else "├── "
203248 # Mark as git repo in the tree
204- display_name = f"{ self .name } / (git repository)"
249+ display_name = f"{ self .name or '' } / (git repository)"
205250 lines .append (f"{ prefix } { current_prefix } { display_name } " )
206251 if hasattr (self , "children" ) and self .children :
207252 new_prefix = prefix + (" " if is_last else "│ " )
@@ -216,18 +261,18 @@ class FileSystemSymlink(FileSystemNode):
216261 """Represents a symbolic link in the filesystem."""
217262
218263 target : str = ""
219- # Add symlink-specific fields if needed
264+ _display_type : str = "SYMLINK"
220265
221266 def get_content (self ) -> str :
222267 """Symlinks content is what they point to."""
223268 return self .target
224269
225270 def get_display_name (self ) -> str :
226271 """Symlinks show target."""
227- return f"{ self .name } -> { self .target } "
272+ return f"{ self .name or '' } -> { self .target } "
228273
229274 def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
230275 """Render the tree representation of this symlink."""
231276 current_prefix = "└── " if is_last else "├── "
232- display_name = f"{ self .name } -> { self .target } " if self .target else self .name
277+ display_name = f"{ self .name or '' } -> { self .target } " if self .target else ( self .name or '' )
233278 return [f"{ prefix } { current_prefix } { display_name } " ]
0 commit comments