Use this file to discover all available pages before exploring further.
The Core API provides the fundamental building blocks for writing mcp-eval tests. These decorators and utilities make your tests clean, reusable, and powerful.
from mcp_eval.core import ( task, # Define a test task with_agent, # Specify which agent to use parametrize, # Run tests with multiple inputs setup, # Run before tests teardown, # Run after tests TestResult # Test execution results)
from mcp_agent.agents.agent_spec import AgentSpec@with_agent(AgentSpec( name="code_reviewer", instruction="You are a code reviewer. Be thorough and critical.", server_names=["filesystem", "git"]))@task("Code review test")async def test_code_review(agent): response = await agent.generate_str("Review this code: ...")
Decorator order matters! Always apply @with_agent above @task to ensure the agent is properly configured when the task runs.
@with_agent("default")@parametrize("number", [1, 2, 5, 10, 100])@task("Test with different numbers")async def test_numbers(agent, number): response = await agent.generate_str(f"Is {number} prime?") # Each number creates a separate test case await agent.assert_that( Expect.content.regex(r"(yes|no|prime|composite)") )
from mcp_eval.core import setup, teardownimport osimport tempfiletest_dir = None@setupdef prepare_test_environment(): """Create temporary test directory.""" global test_dir test_dir = tempfile.mkdtemp(prefix="mcp_test_") print(f"🚀 Created test directory: {test_dir}") # Set up test files with open(f"{test_dir}/test.txt", "w") as f: f.write("Test content")@teardowndef cleanup_test_environment(): """Clean up after tests.""" global test_dir if test_dir and os.path.exists(test_dir): import shutil shutil.rmtree(test_dir) print(f"🧹 Cleaned up {test_dir}")
@setupdef validate_environment(): """Ensure test environment is properly configured.""" import sys # Check Python version if sys.version_info < (3, 10): raise RuntimeError("Tests require Python 3.10+") # Check required environment variables required_vars = ["ANTHROPIC_API_KEY", "TEST_SERVER_URL"] missing = [var for var in required_vars if not os.getenv(var)] if missing: raise RuntimeError(f"Missing environment variables: {missing}") # Check MCP servers are accessible from mcp_eval.utils import check_server_health if not check_server_health("my_server"): raise RuntimeError("MCP server 'my_server' is not responding") print("✅ Environment validated successfully")
def analyze_test_results(results: list[TestResult]): """Analyze a batch of test results.""" total = len(results) passed = sum(1 for r in results if r.passed) total_duration = sum(r.duration_ms for r in results) total_cost = sum(r.metrics.get('cost_usd', 0) for r in results) print(f"\n📊 Test Summary:") print(f" Total tests: {total}") print(f" Passed: {passed}/{total} ({passed/total*100:.1f}%)") print(f" Total duration: {total_duration/1000:.2f}s") print(f" Total cost: ${total_cost:.4f}") # Find slowest tests slowest = sorted(results, key=lambda r: r.duration_ms, reverse=True)[:3] print(f"\n🐢 Slowest tests:") for result in slowest: print(f" {result.name}: {result.duration_ms:.0f}ms") # Find failed tests failed = [r for r in results if not r.passed] if failed: print(f"\n❌ Failed tests:") for result in failed: print(f" {result.name}: {result.error}")
Prefer selecting tests with your runner and environment rather than custom decorators.
# Run a single test function (pytest-style selector supported by the runner for decorator tests)mcp-eval run tests/test_fetch.py::test_fetch_case# Run pytest tests (use pytest)uv run pytest -q tests
Using pytest marks for conditions (when running under pytest):
from contextvars import ContextVartest_context = ContextVar('test_context', default={})@task("Test with context")async def test_with_context(agent, session): # Set context for this test ctx = test_context.get().copy() ctx['test_id'] = session.test_id ctx['start_time'] = time.time() test_context.set(ctx) response = await agent.generate_str("Test prompt") # Context is available throughout the test duration = time.time() - ctx['start_time'] print(f"Test {ctx['test_id']} took {duration:.2f}s")