diff --git a/README.md b/README.md index d412735..b11b142 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,6 @@ If you see version numbers for all three, you are ready to proceed with the inst ## Installation - ### Running on Claude Desktop To configure Octagon MCP for Claude Desktop: @@ -276,6 +275,14 @@ Research the financial impact of Apple's privacy changes on digital advertising 2. **Connection Issues**: Make sure the connectivity to the Octagon API is working properly. 3. **Rate Limiting**: If you encounter rate limiting errors, reduce the frequency of your requests. +## Running Evals + +The evals package loads an mcp client that then runs the index.ts file, so there is no need to rebuild between tests. You can load environment variables by prefixing the npx command. Full documentation can be found [here](https://www.mcpevals.io/docs). + +```bash +OPENAI_API_KEY=your-key npx mcp-eval src/evals/evals.ts src/index.ts +``` + ## Installation ### Running with npx diff --git a/package.json b/package.json index 3c79f0e..fba52fc 100644 --- a/package.json +++ b/package.json @@ -38,7 +38,8 @@ "@modelcontextprotocol/sdk": "^1.0.0", "dotenv": "^16.3.1", "openai": "^4.20.1", - "zod": "^3.22.4" + "zod": "^3.22.4", + "mcp-evals": "^1.0.18" }, "devDependencies": { "@types/node": "^20.10.0", @@ -56,4 +57,4 @@ "url": "https://github.com/OctagonAI/octagon-mcp-server/issues" }, "homepage": "https://docs.octagonagents.com" -} +} \ No newline at end of file diff --git a/src/evals/evals.ts b/src/evals/evals.ts new file mode 100644 index 0000000..c48cf5d --- /dev/null +++ b/src/evals/evals.ts @@ -0,0 +1,59 @@ +//evals.ts + +import { EvalConfig } from 'mcp-evals'; +import { openai } from "@ai-sdk/openai"; +import { grade, EvalFunction } from "mcp-evals"; + +const octagonSecAgentEval: EvalFunction = { + name: "octagon-sec-agent Tool Evaluation", + description: "Evaluates the SEC filings analysis capabilities of the octagon-sec-agent", + run: async () => { + const result = await grade(openai("gpt-4"), "What was Apple's R&D expense as a percentage of revenue in their latest fiscal year?"); + return JSON.parse(result); + } +}; + +const octagonTranscriptsAgentEval: EvalFunction = { + name: "octagon-transcripts-agent Evaluation", + description: "Evaluates the accuracy and completeness of the octagon-transcripts-agent for analyzing earnings call transcripts", + run: async () => { + const result = await grade(openai("gpt-4"), "What did Amazon's CEO say about AWS growth expectations in the latest earnings call?"); + return JSON.parse(result); + } +}; + +const octagonFinancialsAgentEval: EvalFunction = { + name: "octagon-financials-agent Evaluation", + description: "Evaluates the financial analysis and ratio calculation capabilities of the octagon-financials-agent", + run: async () => { + const result = await grade(openai("gpt-4"), "Compare the gross margins, operating margins, and net margins of Apple, Microsoft, and Google over the last 3 years and provide insights on which company shows the strongest profitability trends."); + return JSON.parse(result); + } +}; + +const octagonStockDataAgentEval: EvalFunction = { + name: "Octagon Stock Data Agent Evaluation", + description: "Evaluates the performance of the Octagon Stock Data Agent for stock market data and valuation analysis", + run: async () => { + const result = await grade(openai("gpt-4"), "Compare Apple's stock performance to the S&P 500 over the last 6 months, including any significant events or catalysts that influenced price movements."); + return JSON.parse(result); + } +}; + +const octagonCompaniesAgentEval: EvalFunction = { + name: 'octagon-companies-agent Evaluation', + description: 'Evaluates the specialized private market intelligence tool for company info lookups and financials', + run: async () => { + const result = await grade(openai("gpt-4"), "List the top 5 companies in the AI sector by revenue growth"); + return JSON.parse(result); + } +}; + +const config: EvalConfig = { + model: openai("gpt-4"), + evals: [octagonSecAgentEval, octagonTranscriptsAgentEval, octagonFinancialsAgentEval, octagonStockDataAgentEval, octagonCompaniesAgentEval] +}; + +export default config; + +export const evals = [octagonSecAgentEval, octagonTranscriptsAgentEval, octagonFinancialsAgentEval, octagonStockDataAgentEval, octagonCompaniesAgentEval]; \ No newline at end of file