coderamp-labs
diff --git a/‎src/gitingest/ingestion.py‎
Lines changed: 0 additions & 27 deletions b/‎src/gitingest/ingestion.py‎
Lines changed: 0 additions & 27 deletions
diff --git a/‎src/gitingest/output_formatter.py‎
Lines changed: 13 additions & 0 deletions b/‎src/gitingest/output_formatter.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎src/gitingest/schemas/filesystem.py‎
Lines changed: 58 additions & 0 deletions b/‎src/gitingest/schemas/filesystem.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎src/server/ai_file_selector.py‎
Lines changed: 91 additions & 14 deletions b/‎src/server/ai_file_selector.py‎
Lines changed: 91 additions & 14 deletions
@@ -41,15 +41,6 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
         If the path cannot be found, is not a file, or the file has no content.
 
     """
-    logger.info(
-        "Starting file ingestion",
-        extra={
-            "slug": query.slug,
-            "subpath": query.subpath,
-            "local_path": str(query.local_path),
-            "max_file_size": query.max_file_size,
-        },
-    )
 
     subpath = Path(query.subpath.strip("/")).as_posix()
     path = query.local_path / subpath
@@ -84,13 +75,6 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
             msg = f"File {file_node.name} has no content"
             raise ValueError(msg)
 
-        logger.info(
-            "Single file processing completed",
-            extra={
-                "file_name": file_node.name,
-                "file_size": file_node.size,
-            },
-        )
         return format_node(file_node, query=query)
 
     logger.info("Processing directory", extra={"directory_path": str(path)})
@@ -106,17 +90,6 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
 
     _process_node(node=root_node, query=query, stats=stats)
 
-    logger.info(
-        "Directory processing completed",
-        extra={
-            "total_files": root_node.file_count,
-            "total_directories": root_node.dir_count,
-            "total_size_bytes": root_node.size,
-            "stats_total_files": stats.total_files,
-            "stats_total_size": stats.total_size,
-        },
-    )
-
     return format_node(root_node, query=query)
 
 
 
@@ -168,6 +168,19 @@ def _create_tree_structure(
         display_name += "/"
     elif node.type == FileSystemNodeType.SYMLINK:
         display_name += " -> " + readlink(node.path).name
+    
+    # Add likelihood score if this file was selected by AI (score > 0)
+    if node.likelihood_score > 0:
+        # Color code based on score
+        if node.likelihood_score >= 80:
+            score_indicator = f" [🟢 {node.likelihood_score}%]"
+        elif node.likelihood_score >= 60:
+            score_indicator = f" [🟡 {node.likelihood_score}%]"
+        elif node.likelihood_score >= 40:
+            score_indicator = f" [🟠 {node.likelihood_score}%]"
+        else:
+            score_indicator = f" [🔴 {node.likelihood_score}%]"
+        display_name += score_indicator
 
     tree_str += f"{prefix}{current_prefix}{display_name}\n"
 
 
@@ -14,6 +14,8 @@
 if TYPE_CHECKING:
     from pathlib import Path
 
+from pathlib import PurePath
+
 SEPARATOR = "=" * 48  # Tiktoken, the tokenizer openai uses, counts 2 tokens if we have more than 48
 
 
@@ -49,6 +51,7 @@ class FileSystemNode:  # pylint: disable=too-many-instance-attributes
     dir_count: int = 0
     depth: int = 0
     children: list[FileSystemNode] = field(default_factory=list)
+    likelihood_score: int = 0  # AI likelihood score (0-100) for file selection, 0 = default/not AI-selected
 
     def sort_children(self) -> None:
         """Sort the children nodes of a directory according to a specific order.
@@ -83,6 +86,61 @@ def _sort_key(child: FileSystemNode) -> tuple[int, str]:
 
         self.children.sort(key=_sort_key)
 
+    def map(self, func) -> None:
+        """Apply a function to all nodes in the tree (depth-first).
+        
+        Parameters
+        ----------
+        func : callable
+            Function to apply to each node. Takes a FileSystemNode as argument.
+            
+        Example
+        -------
+        >>> def print_file_names(node):
+        ...     if node.type == FileSystemNodeType.FILE:
+        ...         print(node.name)
+        >>> root_node.map(print_file_names)
+        """
+        # Apply function to current node
+        func(self)
+        
+        # Recursively apply to all children
+        if self.type == FileSystemNodeType.DIRECTORY and self.children:
+            for child in self.children:
+                child.map(func)
+
+    def __getitem__(self, path: str) -> "FileSystemNode | None":
+        """Get a node by its path string.
+        
+        Parameters
+        ----------
+        path : str
+            The path string to search for
+            
+        Returns
+        -------
+        FileSystemNode | None
+            The node with the matching path, or None if not found
+            
+        Example
+        -------
+        >>> file_node = root_node["src/main.py"]
+        >>> if file_node:
+        ...     file_node.likelihood_score = 95
+        """
+        # Check if this node matches (using OS-independent path comparison)
+        if self.path_str and PurePath(self.path_str) == PurePath(path):
+            return self
+        
+        # Search in children
+        if self.type == FileSystemNodeType.DIRECTORY and self.children:
+            for child in self.children:
+                result = child[path]
+                if result:
+                    return result
+        
+        return None
+
     @property
     def content_string(self) -> str:
         """Return the content of the node as a string, including path and content.
 
@@ -23,7 +23,8 @@
 class FileSelectionResponse(BaseModel):
     """Response model for AI file selection."""
 
-    selected_files: list[str]
+    selected_files: list[str]  # file paths selected by AI
+    selected_files_detailed: dict[str, dict] | None  # detailed info with reasoning
     reasoning: str
 
 
@@ -137,21 +138,29 @@ def _create_selection_prompt(
 {content_sample}
 
 CONSTRAINTS:
-- Target exactly {context_size_tokens:,} tokens in the final output
+- The output will be trimmed down to {context_size_tokens:,} tokens in the end.
 - Prioritize files that are most relevant to the user's request
 - Include key architectural files (main entry points, configuration, core modules)
 - Balance breadth (overview) with depth (important details)
 - Avoid redundant or duplicate content
 - Consider file dependencies and relationships
+- When in doubt, include the file
 
 RESPONSE FORMAT:
+For every file, include a level of "likelihood of being relevant" from 1 to 100.
+Multiple files can have the same likelihood.
 Return a JSON object with this exact structure:
 {{
-    "selected_files": [
-        "path/to/file1.py",
-        "path/to/file2.js",
-        "path/to/file3.md"
-    ],
+    "selected_files": {{
+        "path/to/file1.py": {{
+            "score": 90,
+            "reasoning": "Brief explanation of why this file has this score"
+        }},
+        "path/to/file2.py": {{
+            "score": 80,
+            "reasoning": "Brief explanation of why this file has this score"
+        }}
+    }},
     "reasoning": "Brief explanation of why these files were selected and how they serve the user's request."
 }}
 
@@ -219,14 +228,42 @@ async def select_files(
                 logger.warning("Failed to parse JSON response, attempting to extract files manually")
                 # Fallback: try to extract file paths from response
                 file_paths = re.findall(r'"([^"]+\.[a-zA-Z]+)"', response_text)
+                # Convert to new dict format with default scores
+                file_dict = {path: {"score": 50, "reasoning": "Default score"} for path in file_paths}
                 parsed_response = {
-                    "selected_files": file_paths,
+                    "selected_files": file_dict,
                     "reasoning": "Extracted files from AI response (JSON parsing failed)"
                 }
 
+            # Extract selected files and scores from AI response
+            selected_files_data = parsed_response.get("selected_files", {})
+            reasoning = parsed_response.get("reasoning", "No reasoning provided")
+            
+            # Convert new format to scores dict and preserve detailed info
+            selected_files_dict = {}
+            detailed_files = {}
+            for file_path, file_data in selected_files_data.items():
+                if isinstance(file_data, dict) and "score" in file_data:
+                    selected_files_dict[file_path] = file_data["score"]
+                    detailed_files[file_path] = file_data
+                else:
+                    # Fallback for old format or malformed data
+                    selected_files_dict[file_path] = file_data if isinstance(file_data, int) else 50
+                    detailed_files[file_path] = {"score": file_data if isinstance(file_data, int) else 50}
+            
+            logger.info("Applying AI scores to tree", extra={
+                "files_with_scores": len(selected_files_dict),
+                "sample_scores": dict(list(selected_files_dict.items())[:3]) if selected_files_dict else {}
+            })
+            
+            # Update tree nodes with likelihood scores
+            self._update_tree_scores(root_node, selected_files_dict)
+            
+            # Return the actual file paths for frontend display
             selection = FileSelectionResponse(
-                selected_files=parsed_response.get("selected_files", []),
-                reasoning=parsed_response.get("reasoning", "No reasoning provided")
+                selected_files=list(selected_files_dict.keys()),
+                selected_files_detailed=detailed_files if detailed_files else None,
+                reasoning=reasoning
             )
 
             logger.info("AI file selection completed", extra={
@@ -238,11 +275,13 @@ async def select_files(
 
         except Exception as e:
             logger.error("AI file selection failed", extra={"error": str(e)})
-            # Fallback: return all files up to a reasonable limit
-            all_files = self._extract_all_files(root_node)
+            # Set fallback scores directly on tree nodes
+            self._set_fallback_scores(root_node)
+            
             return FileSelectionResponse(
-                selected_files=all_files[:50],  # Limit to 50 files as fallback
-                reasoning=f"AI selection failed ({str(e)}), using fallback selection of key files"
+                selected_files=[],
+                selected_files_detailed=None,
+                reasoning=f"AI selection failed ({str(e)}), using fallback scoring"
             )
 
     def _extract_all_files(self, node: FileSystemNode, files: list[str] | None = None) -> list[str]:
@@ -258,6 +297,44 @@ def _extract_all_files(self, node: FileSystemNode, files: list[str] | None = Non
 
         return files
 
+    def _update_tree_scores(self, root_node: FileSystemNode, selected_files_dict: dict[str, int]) -> None:
+        """Update tree nodes with likelihood scores from AI selection."""
+        for path, score in selected_files_dict.items():
+            node = root_node[path]
+            if node:
+                node.likelihood_score = score
+                logger.debug("Updated node score", extra={
+                    "path": path,
+                    "score": score
+                })
+
+    def _set_fallback_scores(self, root_node: FileSystemNode) -> None:
+        """Set fallback scores for files when AI is not available."""
+        def set_fallback_score(node: FileSystemNode) -> None:
+            if node.type.value == "file":
+                # Use heuristics to score files
+                file_name = node.name.lower()
+                file_ext = node.path_str.split('.')[-1].lower() if node.path_str and '.' in node.path_str else ""
+                
+                # High importance files
+                if any(pattern in file_name for pattern in ['readme', 'main', 'index', 'app', 'server']):
+                    node.likelihood_score = 90
+                # Important extensions
+                elif file_ext in {'py', 'js', 'ts', 'java', 'cpp', 'c', 'go', 'rs'}:
+                    node.likelihood_score = 70
+                # Config files
+                elif file_ext in {'json', 'yaml', 'yml', 'toml', 'ini', 'env'}:
+                    node.likelihood_score = 60
+                # Documentation
+                elif file_ext in {'md', 'txt', 'rst'}:
+                    node.likelihood_score = 50
+                # Other files
+                else:
+                    node.likelihood_score = 30
+        
+        # Use the map function to apply fallback scores to all nodes
+        root_node.map(set_fallback_score)
+
 
 def get_ai_file_selector() -> AIFileSelector | None:
     """Get AI file selector instance, return None if not configured."""