๐Ÿ“ฆ agentscope-ai / OpenJudge

๐Ÿ“„ tools.py ยท 218 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218# -*- coding: utf-8 -*-
"""
Tool system for agentic evaluation.

This module provides the tool infrastructure for OpenJudge's agentic evaluation,
including the base tool class and standardized result format. Tools define what
capabilities an agent has (e.g., web search, code execution, file operations).

The tool system follows OpenAI's function calling format, which is the de-facto
standard for LLM tool calling and ensures compatibility with most LLM providers.

Classes:
    ToolResult: Standardized result format for tool execution.
    BaseTool: Abstract base class for all tools.

Example:
    >>> from openjudge.agentic import BaseTool, ToolResult
    >>>
    >>> class WebSearchTool(BaseTool):
    ...     schema = {
    ...         "type": "function",
    ...         "function": {
    ...             "name": "web_search",
    ...             "description": "Search the web for information",
    ...             "parameters": {
    ...                 "type": "object",
    ...                 "properties": {
    ...                     "query": {"type": "string", "description": "Search query"}
    ...                 },
    ...                 "required": ["query"]
    ...             }
    ...         }
    ...     }
    ...
    ...     async def aexecute(self, query: str, **kwargs) -> ToolResult:
    ...         # Perform search
    ...         return ToolResult(success=True, output="search results...")
"""

import asyncio
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional

from pydantic import BaseModel, Field

__all__ = [
    "ToolResult",
    "BaseTool",
]


class ToolResult(BaseModel):
    """Standardized result format for tool execution.

    This class provides a consistent interface for all tool outputs in OpenJudge,
    regardless of the underlying tool implementation. It captures both successful
    results and error conditions in a structured format.

    Attributes:
        success: Whether the tool execution was successful.
        output: The output data from the tool execution. Can be any type.
        error: Error message if the execution failed, None otherwise.
        metadata: Additional metadata from the execution (e.g., timing, source).

    Example:
        >>> # Successful execution
        >>> result = ToolResult(success=True, output="Search results: ...")
        >>> if result.success:
        ...     print(result.output)
        >>>
        >>> # Failed execution
        >>> result = ToolResult(success=False, error="Connection timeout")
        >>> if not result.success:
        ...     print(f"Error: {result.error}")
    """

    success: bool = Field(
        default=True,
        description="Whether the tool execution was successful",
    )
    output: Any = Field(
        default=None,
        description="The output data from the tool execution",
    )
    error: Optional[str] = Field(
        default=None,
        description="Error message if the execution failed",
    )
    metadata: Dict[str, Any] = Field(
        default_factory=dict,
        description="Additional metadata from the execution",
    )


class BaseTool(ABC):
    """Abstract base class for tools using OpenAI function calling format.

    All tools in OpenJudge should inherit from this class and implement:
    1. The `schema` class attribute with the tool's JSON schema
    2. The `aexecute` method for async execution

    The schema follows OpenAI's function calling format, which provides a
    standardized way to describe tool interfaces that works with most LLM
    providers (OpenAI, Anthropic, etc.).

    Attributes:
        schema: OpenAI function calling schema defining the tool's interface.
               Must include "type", "function.name", "function.description",
               and "function.parameters".

    Example:
        >>> class CodeExecutionTool(BaseTool):
        ...     schema = {
        ...         "type": "function",
        ...         "function": {
        ...             "name": "execute_code",
        ...             "description": "Execute Python code and return the result",
        ...             "parameters": {
        ...                 "type": "object",
        ...                 "properties": {
        ...                     "code": {
        ...                         "type": "string",
        ...                         "description": "Python code to execute"
        ...                     }
        ...                 },
        ...                 "required": ["code"]
        ...             }
        ...         }
        ...     }
        ...
        ...     async def aexecute(self, code: str, **kwargs) -> ToolResult:
        ...         try:
        ...             result = exec(code)
        ...             return ToolResult(success=True, output=result)
        ...         except Exception as e:
        ...             return ToolResult(success=False, error=str(e))
    """

    # OpenAI function calling format - subclasses MUST override this
    schema: Dict[str, Any] = {
        "type": "function",
        "function": {
            "name": "base_tool",
            "description": "Base tool - override this in subclasses",
            "parameters": {
                "type": "object",
                "properties": {},
                "required": [],
            },
        },
    }

    @property
    def name(self) -> str:
        """Get the tool name from schema.

        Returns:
            The tool name as defined in the schema, or "unknown" if not found.
        """
        return self.schema.get("function", {}).get("name", "unknown")

    @property
    def description(self) -> str:
        """Get the tool description from schema.

        Returns:
            The tool description as defined in the schema, or empty string if not found.
        """
        return self.schema.get("function", {}).get("description", "")

    @property
    def parameters(self) -> Dict[str, Any]:
        """Get the tool parameters schema.

        Returns:
            The parameters JSON schema as defined in the schema.
        """
        return self.schema.get("function", {}).get("parameters", {})

    @abstractmethod
    async def aexecute(self, **kwargs: Any) -> ToolResult:
        """Execute the tool asynchronously with given parameters.

        This is the main method that subclasses must implement. It should be
        async to support non-blocking I/O operations (e.g., network requests,
        file operations).

        Args:
            **kwargs: Tool-specific parameters as defined in the schema.
                     The parameter names and types should match the schema.

        Returns:
            ToolResult: The result of the tool execution, containing success
                       status, output data, and optional error/metadata.

        Raises:
            NotImplementedError: If the subclass does not implement this method.
        """

    def execute(self, **kwargs: Any) -> ToolResult:
        """Execute the tool synchronously.

        This is a convenience method for cases where async execution is not
        needed. It runs the async `aexecute` method in a new event loop.

        Note:
            This method creates a new event loop for each call, which may have
            performance implications. For better performance in async contexts,
            use `aexecute` directly.

        Args:
            **kwargs: Tool-specific parameters as defined in the schema.

        Returns:
            ToolResult: The result of the tool execution.
        """
        return asyncio.run(self.aexecute(**kwargs))