Skip to content

Commit 4c0aa5f

Browse files
authored
Feature/mcp names utilize (#7)
* Send chat data source names via names payload * COD-529: Avoid prefixes in names
1 parent dd7db0d commit 4c0aa5f

12 files changed

+178
-179
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "codealive-mcp"
3-
version = "0.3.0"
3+
version = "0.4.0"
44
description = "MCP server for the CodeAlive API"
55
readme = "README.md"
66
requires-python = ">=3.11"

src/codealive_mcp_server.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@
5454
- Remember that context from previous messages is maintained in the same conversation
5555
5656
Flexible data source usage:
57-
- You can use a workspace ID as a single data source to search or chat across all its repositories at once
58-
- Alternatively, you can use specific repository IDs for more targeted searches
59-
- For complex queries, you can combine multiple repository IDs from different workspaces
57+
- You can use a workspace name as a single data source to search or chat across all its repositories at once
58+
- Alternatively, you can use specific repository names for more targeted searches
59+
- For complex queries, you can combine multiple repository names from different workspaces
6060
- Choose between workspace-level or repository-level access based on the scope of the query
6161
6262
Repository integration:

src/tests/test_chat_tool.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
@pytest.mark.asyncio
1111
@patch('tools.chat.get_api_key_from_context')
12-
async def test_consultant_with_simple_ids(mock_get_api_key):
13-
"""Test codebase consultant with simple string IDs."""
12+
async def test_consultant_with_simple_names(mock_get_api_key):
13+
"""Test codebase consultant with simple string names."""
1414
mock_get_api_key.return_value = "test_key"
1515

1616
ctx = MagicMock(spec=Context)
@@ -39,7 +39,7 @@ async def mock_aiter_lines():
3939

4040
ctx.request_context.lifespan_context = mock_codealive_context
4141

42-
# Test with simple string IDs
42+
# Test with simple string names
4343
result = await codebase_consultant(
4444
ctx=ctx,
4545
question="Test question",
@@ -50,19 +50,19 @@ async def mock_aiter_lines():
5050
call_args = mock_client.post.call_args
5151
request_data = call_args.kwargs["json"]
5252

53-
# Should convert simple IDs to {"id": "..."} format
54-
assert request_data["dataSources"] == [
55-
{"id": "repo123"},
56-
{"id": "repo456"}
53+
# Should convert simple names to the backend names array
54+
assert request_data["names"] == [
55+
"repo123",
56+
"repo456"
5757
]
5858

5959
assert result == "Hello world"
6060

6161

6262
@pytest.mark.asyncio
6363
@patch('tools.chat.get_api_key_from_context')
64-
async def test_consultant_preserves_string_ids(mock_get_api_key):
65-
"""Test codebase consultant preserves string IDs."""
64+
async def test_consultant_preserves_string_names(mock_get_api_key):
65+
"""Test codebase consultant preserves string names."""
6666
mock_get_api_key.return_value = "test_key"
6767

6868
ctx = MagicMock(spec=Context)
@@ -88,7 +88,7 @@ async def mock_aiter_lines():
8888

8989
ctx.request_context.lifespan_context = mock_codealive_context
9090

91-
# Test with string IDs
91+
# Test with string names
9292
result = await codebase_consultant(
9393
ctx=ctx,
9494
question="Test",
@@ -98,10 +98,10 @@ async def mock_aiter_lines():
9898
call_args = mock_client.post.call_args
9999
request_data = call_args.kwargs["json"]
100100

101-
# Should extract just the ID
102-
assert request_data["dataSources"] == [
103-
{"id": "repo123"},
104-
{"id": "repo456"}
101+
# Should extract just the normalized names
102+
assert request_data["names"] == [
103+
"repo123",
104+
"repo456"
105105
]
106106

107107
assert result == "Response"
@@ -145,8 +145,8 @@ async def mock_aiter_lines():
145145

146146
# Should include conversation ID
147147
assert request_data["conversationId"] == "conv_123"
148-
# Should not have data sources when continuing conversation
149-
assert "dataSources" not in request_data
148+
# Should not have explicit names when continuing conversation
149+
assert "names" not in request_data
150150

151151
assert result == "Continued"
152152

src/tests/test_error_handling.py

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pytest
44
from unittest.mock import AsyncMock, MagicMock
55
import httpx
6-
from utils.errors import handle_api_error, format_data_source_ids
6+
from utils.errors import handle_api_error, format_data_source_names
77

88

99
@pytest.mark.asyncio
@@ -109,57 +109,47 @@ async def test_handle_unknown_http_error():
109109
assert len(result) < 300
110110

111111

112-
def test_format_data_source_ids_strings():
113-
"""Test formatting simple string IDs."""
112+
def test_format_data_source_names_strings():
113+
"""Test formatting simple string names."""
114114
input_data = ["id1", "id2", "id3"]
115-
result = format_data_source_ids(input_data)
115+
result = format_data_source_names(input_data)
116116

117-
assert result == [
118-
{"id": "id1"},
119-
{"id": "id2"},
120-
{"id": "id3"}
121-
]
117+
assert result == ["id1", "id2", "id3"]
122118

123119

124-
def test_format_data_source_ids_dicts():
125-
"""Test formatting dictionary IDs."""
120+
def test_format_data_source_names_dicts():
121+
"""Test formatting dictionary inputs."""
126122
input_data = [
127123
{"id": "id1"},
128124
{"type": "repository", "id": "id2"},
125+
{"name": "repo-name"},
129126
{"id": "id3", "extra": "field"}
130127
]
131-
result = format_data_source_ids(input_data)
128+
result = format_data_source_names(input_data)
132129

133-
assert result == [
134-
{"id": "id1"},
135-
{"id": "id2"},
136-
{"id": "id3"}
137-
]
130+
assert result == ["id1", "id2", "repo-name", "id3"]
138131

139132

140-
def test_format_data_source_ids_mixed():
141-
"""Test formatting mixed format IDs."""
133+
def test_format_data_source_names_mixed():
134+
"""Test formatting mixed format inputs."""
142135
input_data = [
143136
"id1",
144137
{"id": "id2"},
145138
{"type": "workspace", "id": "id3"},
146139
"", # Empty string - should be skipped
147140
None, # None - should be skipped
148141
{"no_id": "field"}, # Missing id - should be skipped
142+
{"name": "repo-name"},
149143
"id4"
150144
]
151-
result = format_data_source_ids(input_data)
145+
result = format_data_source_names(input_data)
152146

153-
assert result == [
154-
{"id": "id1"},
155-
{"id": "id2"},
156-
{"id": "id3"},
157-
{"id": "id4"}
158-
]
147+
assert result == ["id1", "id2", "id3", "repo-name", "id4"]
159148

160149

161-
def test_format_data_source_ids_empty():
150+
def test_format_data_source_names_empty():
162151
"""Test formatting empty/None inputs."""
163-
assert format_data_source_ids(None) == []
164-
assert format_data_source_ids([]) == []
165-
assert format_data_source_ids([None, "", {}]) == []
152+
assert format_data_source_names(None) == []
153+
assert format_data_source_names([]) == []
154+
assert format_data_source_names([None, "", {}]) == []
155+

src/tests/test_parameter_normalization.py

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,41 +2,41 @@
22

33
import pytest
44
import json
5-
from utils.errors import normalize_data_source_ids
5+
from utils.errors import normalize_data_source_names
66

77

8-
class TestNormalizeDataSourceIds:
9-
"""Test the normalize_data_source_ids function with various input formats."""
8+
class TestNormalizeDataSourceNames:
9+
"""Test the normalize_data_source_names function with various input formats."""
1010

1111
def test_proper_array_input(self):
1212
"""Test that proper arrays are passed through unchanged."""
1313
input_data = ["repo1", "repo2", "repo3"]
14-
result = normalize_data_source_ids(input_data)
14+
result = normalize_data_source_names(input_data)
1515
assert result == ["repo1", "repo2", "repo3"]
1616

1717
def test_single_string_input(self):
1818
"""Test that single string is converted to array."""
1919
input_data = "repo1"
20-
result = normalize_data_source_ids(input_data)
20+
result = normalize_data_source_names(input_data)
2121
assert result == ["repo1"]
2222

2323
def test_json_encoded_string_input(self):
2424
"""Test that JSON-encoded strings are properly parsed."""
2525
input_data = '["repo1", "repo2"]'
26-
result = normalize_data_source_ids(input_data)
26+
result = normalize_data_source_names(input_data)
2727
assert result == ["repo1", "repo2"]
2828

2929
def test_malformed_json_string_fallback(self):
3030
"""Test that malformed JSON strings fall back to single ID."""
3131
input_data = '["repo1", "repo2"' # Missing closing bracket
32-
result = normalize_data_source_ids(input_data)
32+
result = normalize_data_source_names(input_data)
3333
assert result == ['["repo1", "repo2"'] # Treated as single ID
3434

3535
def test_empty_inputs(self):
3636
"""Test various empty input types."""
37-
assert normalize_data_source_ids(None) == []
38-
assert normalize_data_source_ids("") == []
39-
assert normalize_data_source_ids([]) == []
37+
assert normalize_data_source_names(None) == []
38+
assert normalize_data_source_names("") == []
39+
assert normalize_data_source_names([]) == []
4040

4141
def test_mixed_array_with_dicts(self):
4242
"""Test arrays containing both strings and dict objects."""
@@ -46,61 +46,70 @@ def test_mixed_array_with_dicts(self):
4646
"repo3",
4747
{"id": "workspace1", "type": "workspace"}
4848
]
49-
result = normalize_data_source_ids(input_data)
49+
result = normalize_data_source_names(input_data)
5050
assert result == ["repo1", "repo2", "repo3", "workspace1"]
5151

5252
def test_dict_without_id(self):
53-
"""Test that dicts without 'id' field are skipped."""
53+
"""Test that dicts without 'id' field use 'name' field if present."""
5454
input_data = [
5555
"repo1",
56-
{"name": "some-repo", "type": "repository"}, # No 'id' field
56+
{"name": "some-repo", "type": "repository"}, # No 'id' field, but has 'name'
5757
"repo2"
5858
]
59-
result = normalize_data_source_ids(input_data)
60-
assert result == ["repo1", "repo2"]
59+
result = normalize_data_source_names(input_data)
60+
assert result == ["repo1", "some-repo", "repo2"]
6161

6262
def test_empty_strings_preserved(self):
6363
"""Test that empty strings in arrays are preserved (might be intentional)."""
6464
input_data = ["repo1", "", "repo2", " ", "repo3"]
65-
result = normalize_data_source_ids(input_data)
65+
result = normalize_data_source_names(input_data)
6666
assert result == ["repo1", "", "repo2", " ", "repo3"] # All strings preserved
6767

6868
def test_non_list_non_string_input(self):
6969
"""Test handling of unexpected input types."""
70-
result = normalize_data_source_ids(123)
70+
result = normalize_data_source_names(123)
7171
assert result == ["123"]
7272

73-
result = normalize_data_source_ids({"id": "repo1"})
73+
result = normalize_data_source_names({"id": "repo1"})
7474
assert result == ["{'id': 'repo1'}"]
7575

7676
def test_claude_desktop_scenarios(self):
7777
"""Test specific scenarios from Claude Desktop serialization issues."""
7878
# Scenario 1: JSON string as seen in Claude Desktop logs
7979
claude_input_1 = '["67db4097fa23c0a98a8495c2"]'
80-
result_1 = normalize_data_source_ids(claude_input_1)
80+
result_1 = normalize_data_source_names(claude_input_1)
8181
assert result_1 == ["67db4097fa23c0a98a8495c2"]
8282

8383
# Scenario 2: Plain string as seen in Claude Desktop logs
8484
claude_input_2 = "67db4097fa23c0a98a8495c2"
85-
result_2 = normalize_data_source_ids(claude_input_2)
85+
result_2 = normalize_data_source_names(claude_input_2)
8686
assert result_2 == ["67db4097fa23c0a98a8495c2"]
8787

8888
# Scenario 3: Multiple IDs in JSON string
8989
claude_input_3 = '["repo1", "repo2", "workspace1"]'
90-
result_3 = normalize_data_source_ids(claude_input_3)
90+
result_3 = normalize_data_source_names(claude_input_3)
9191
assert result_3 == ["repo1", "repo2", "workspace1"]
9292

9393
def test_edge_cases(self):
9494
"""Test various edge cases."""
9595
# Whitespace-only JSON string
96-
assert normalize_data_source_ids("[]") == []
97-
assert normalize_data_source_ids("[ ]") == []
96+
assert normalize_data_source_names("[]") == []
97+
assert normalize_data_source_names("[ ]") == []
9898

9999
# Single item JSON array
100-
assert normalize_data_source_ids('["single"]') == ["single"]
100+
assert normalize_data_source_names('["single"]') == ["single"]
101101

102102
# JSON array with empty strings
103-
assert normalize_data_source_ids('["repo1", "", "repo2"]') == ["repo1", "", "repo2"]
103+
assert normalize_data_source_names('["repo1", "", "repo2"]') == ["repo1", "", "repo2"]
104+
105+
def test_dict_with_name_preferred(self):
106+
"""Dict inputs with explicit names should take precedence over IDs."""
107+
input_data = [
108+
{"id": "legacy-id", "name": "repo-main"},
109+
{"name": "workspace:analytics"}
110+
]
111+
result = normalize_data_source_names(input_data)
112+
assert result == ["repo-main", "workspace:analytics"]
104113

105114

106115
class TestParameterNormalizationIntegration:
@@ -113,10 +122,10 @@ def test_search_tool_parameter_handling(self):
113122

114123
# Verify the function accepts Union[str, List[str]]
115124
sig = inspect.signature(codebase_search)
116-
data_source_ids_param = sig.parameters['data_source_ids']
125+
data_sources_param = sig.parameters['data_sources']
117126

118127
# The annotation should accept both str and List[str]
119-
assert 'Union' in str(data_source_ids_param.annotation) or 'str' in str(data_source_ids_param.annotation)
128+
assert 'Union' in str(data_sources_param.annotation) or 'str' in str(data_sources_param.annotation)
120129

121130
def test_consultant_tool_parameter_handling(self):
122131
"""Test that consultant tool properly normalizes various parameter formats."""

src/tests/test_response_transformer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def test_data_preservation_with_content(self):
323323
"range": {"start": {"line": 18}, "end": {"line": 168}}
324324
},
325325
"score": 0.99,
326-
"content": "async def codebase_search(\n ctx: Context,\n query: str,\n data_source_ids: Optional[List[str]] = None,\n mode: str = \"auto\",\n include_content: bool = False\n) -> Dict:",
326+
"content": "async def codebase_search(\n ctx: Context,\n query: str,\n data_sources: Optional[List[str]] = None,\n mode: str = \"auto\",\n include_content: bool = False\n) -> Dict:",
327327
"dataSource": {
328328
"type": "repository",
329329
"id": "685b21230e3822f4efa9d073",

src/tests/test_search_tool.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ async def test_codebase_search_returns_xml_string(mock_get_api_key):
5151
result = await codebase_search(
5252
ctx=ctx,
5353
query="authenticate_user",
54-
data_source_ids=["test_id"],
54+
data_sources=["test-name"],
5555
mode="auto",
5656
include_content=False
5757
)
@@ -63,6 +63,11 @@ async def test_codebase_search_returns_xml_string(mock_get_api_key):
6363
assert "<results>" in result, "Should contain results tag"
6464
assert "<search_result" in result, "Should contain search_result tag"
6565

66+
# Verify the request used the Names query parameter
67+
call_args = mock_client.get.call_args
68+
params = call_args.kwargs["params"]
69+
assert ("Names", "test-name") in params
70+
6671

6772
@pytest.mark.asyncio
6873
async def test_codebase_search_empty_query_returns_error_string():
@@ -82,7 +87,7 @@ async def test_codebase_search_empty_query_returns_error_string():
8287
result = await codebase_search(
8388
ctx=ctx,
8489
query="",
85-
data_source_ids=["test_id"],
90+
data_sources=["test-name"],
8691
mode="auto",
8792
include_content=False
8893
)
@@ -138,7 +143,7 @@ def raise_404():
138143
result = await codebase_search(
139144
ctx=ctx,
140145
query="test query",
141-
data_source_ids=["invalid_id"],
146+
data_sources=["invalid-name"],
142147
mode="auto",
143148
include_content=False
144149
)

0 commit comments

Comments
 (0)