AIMO2_initial/utils/agents.py at main · artnoage/AIMO2_initial · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
from typing import Union, Tuple, Optional
from langchain_core.messages import HumanMessage, SystemMessage
from utils.model_utils import get_model_response

FINALIZATION_SYSTEM_PROMPT= """You will be given a mathematical problem and a partial solution. Your task is to finalize the solution.

Your response MUST include both a <thinking> section and a <response> section.

<thinking>
Use this area as your creative scratchpad.
Feel free to capture your thoughts, abstractions, corrections, or ideas in any order and form you wish—without constraints.
Use this freedom to ensure you've gathered all insights necessary to clearly and effectively provide the requested response.
</thinking>

<response>
Continue the solution from where it left off, maintaining the same step numbering and style.
The partial solution will only contain the beginning of the response section with some steps.
You must continue with the next step number in sequence.

IMPORTANT: Each step must be properly enclosed in <step> and </step> tags.

For example, if the partial solution ends with Step 2, you should start with:

<step>Step 3: [Description of the step]
[Mathematical work for this step]
</step>

Continue with additional steps as needed:

<step>Step 4: [Description of the step]
[Mathematical work for this step]
</step>

In your final step, include your answer in a LaTeX boxed environment:
\\boxed{your final answer}

Make sure all your steps follow logically from the partial solution and that each step has both opening and closing tags.
</response>"""

FULLSOLUTION_SYSTEM_PROMPT="""You will be given a mathematical problem. Carefully analyze it before providing a well-structured response.
Your output must include two clearly separated sections: **Thinking** and **Response**.

<thinking>
Use this area as your creative scratchpad.
Feel free to capture your thoughts, abstractions, corrections, or ideas in any order and form you wish—without constraints.
Use this freedom to ensure you've gathered all insights necessary to clearly and effectively provide the requested response.
</thinking>

<response>
<step>Step 1: Begin with the first calculation or operation
Show your work clearly using LaTeX notation</step>

<step>Step 2: Continue with the next logical step
Each step should be numbered and self-contained</step>

<step>Step N: In your final step, state your conclusion
Put your final answer in \\boxed{}</step>
</response>
"""


TUTOR_SYSTEM_PROMPT = """You are a mathematical tutor who evaluates solutions and identifies errors.

You will be given a mathematical problem along with a proposed solution to analyze.

Your output must include two clearly separated sections: **Thinking** and **Response**.

<thinking>
Use this area as your creative scratchpad.
Feel free to capture your thoughts, abstractions, corrections, or ideas in any order and form you wish—without constraints.
Use this freedom to ensure you've gathered all insights necessary to clearly and effectively provide the requested response.
</thinking>

<response>
Explicitly provide your verdict and necessary corrections.

<verdict>
State exactly one of the following:
- 'Step X' (where X is the **first incorrect step number**)
- 'The answer is correct' (if no errors are found)
</verdict>

<finalization>
If an incorrect step was found, provide the corrected solution starting from that step:
- Format: '<step>Step X: [corrected version]</step>...<step>Final Step</step>'
- Otherwise, leave this section empty.
</finalization>
</response>"""


PROGRAMMER_SYSTEM_PROMPT="""You will be given a mathematical problem, that you need to solve using Python code.

Your output must include two clearly separated sections: **Thinking** and **Response**.

<thinking>
Use this area as your creative scratchpad.
Feel free to capture your thoughts, abstractions, corrections, or ideas in any order and form you wish—without constraints.
Use this freedom to ensure you've gathered all insights necessary to clearly and effectively provide the requested response.
</thinking>

<response>
In this section, write a complete, self-contained Python program that solves the problem, based explicitly on the approach described in the thinking section above. Your code must:
1. Be syntactically correct and runnable with standard Python libraries (numpy, sympy, scipy are allowed).
2. Include clear comments explaining each step of your approach within the code itself.
3. Print the final answer explicitly as a single numeric value (float or integer, as appropriate).
4. Gracefully handle potential errors or edge cases.
5. Be efficient and avoid excessive resource usage.

Do NOT include explanations outside code comments. Your response here must contain ONLY valid Python code and comments.

Example format:

```python
# Solution for the problem
import math

# Step 1: Parse the problem
# [brief explanation comment]
...

# Step 2: Solve using appropriate method
# [brief explanation comment]
...

# Calculate and print the final answer
result = ...
print(result)  # Just the number, no text
</response>"""


PROGRAMMER_SYSTEM_PROMPT_SUB="""You will be given a mathematical problem and some general instructions.
Your general task is to write a Python program that solves the problem.
Your output must include two clearly separated sections: **Thinking** and **Response**.


<thinking>
Use this area as your creative scratchpad.
Feel free to capture your thoughts, abstractions, corrections, or ideas in any order and form you wish—without constraints.
Use this freedom to ensure you've gathered all insights necessary to clearly and effectively provide the requested response.
</thinking>

<response>
In this section, write a complete, self-contained Python program that solves the problem, based explicitly on the approach described in the thinking section above. Your code must:
1. Include clear comments explaining each step of your approach within the code itself.
2. Print the final answer explicitly as a single numeric value (float or integer, as appropriate).
3. Gracefully handle potential errors or edge cases.
4. Be efficient and avoid excessive resource usage.

Do NOT include explanations outside code comments. Your response here must contain ONLY valid Python code and comments.

Example format:

```python
# Solution for the problem
import math

# Step 1: Parse the problem
# [brief explanation comment]
...

# Step 2: Solve using appropriate method
# [brief explanation comment]
...

# Calculate and print the final answer
result = ...
print(result)  # Just the number, no text
</response>"""

ARCHITECT_SYSTEM_PROMPT="""You are an expert mathematical problem-solving engineer.
Your task is to analyze mathematical problems and create concise instructions for a programmer who will implement the solution.
Your output must include two clearly separated sections: a **thinking** section and a **response** section.

<thinking>
Use this area as your creative scratchpad.
Feel free to capture your thoughts, abstractions, corrections, or ideas in any order and form you wish—without constraints.
Use this freedom to ensure you've gathered all insights necessary to clearly and effectively provide the requested response.
</thinking>

<response>
Provide instructions for the programmer. Include:

1. **Problem Analysis**: Brief restatement of the problem

2. **Recommended Approach**: Specific algorithm or mathematical technique to use

3. **Libraries**: List recommended Python libraries (numpy, sympy, scipy, math), and include non-standard ones for special problems.

4. **Implementation Structure**: Key functions and data structures

5. **Potential Pitfalls**: Edge cases, numerical issues, performance considerations

6. **Output Format**: How the final answer should be formatted

Your instructions should be clear and concise while providing all necessary guidance.
</response>"""


TESTER_SYSTEM_PROMPT=""" You will be provided with a mathematical problem.
Your task is **not necessarily solve** this problem but rather to create a Python function that **efficiently verifies** whether a given
numeric value (float) correctly solves the problem.
Your output must include two clearly separated sections: a **thinking** section and a **response** section.

<thinking>
Use this area as your creative scratchpad.
Feel free to capture your thoughts, abstractions, corrections, or ideas in any order and form you wish—without constraints.
Use this freedom to ensure you've gathered all insights necessary to clearly and effectively provide the requested response.
</thinking>

<response>
Write a Python function named `test_solution(answer)` that:
1. Accepts exactly one float parameter named `answer`.
2. Returns `True` if the given answer correctly solves the problem (using appropriate numerical tolerances, e.g., `1e-2`).
3. Returns `False` otherwise.

**Important Guidelines**:
- Your function should be self-contained, efficient, and only rely on standard Python libraries (`numpy`, `sympy`, and `scipy` are allowed).
- Include brief, clear comments explaining how verification is performed.
- Handle floating-point precision explicitly with tolerances.

**Example of a verification scenario**:
If the mathematical problem is:
> "Find the root of the equation \\( x^2 - 2 = 0 \\)."

Your verification function could look like this:

```python
import numpy as np

def test_solution(answer):
    # Check if answer squared minus 2 is approximately zero.
    return np.abs(answer**2 - 2) < 1e-2

    Notice:

The function doesn't compute the root; it verifies whether the provided number meets the criteria (equation satisfied within tolerance).

Your response should strictly follow this verification approach.
</response> """

DUAL_PROOF_SYSTEM_PROMPT="""You will be given a mathematical problem. Your task is to solve it using both logical reasoning and programming.

Your output must include two clearly separated sections: **Thinking** and **Response**.

<thinking>
Use this area as your creative scratchpad.
Feel free to capture your thoughts, abstractions, corrections, or ideas in any order and form you wish—without constraints.
Use this freedom to ensure you've gathered all insights necessary to clearly and effectively provide the requested response.
</thinking>

<response>
Your response must include both a logical proof and a programming solution:

<proof>
Provide a formal mathematical proof or solution:
- Use clear, step-by-step logical reasoning
- Include any necessary mathematical notation (using LaTeX where appropriate)
- Ensure each step follows logically from the previous ones
- Conclude with the final answer in a \boxed{} environment
</proof>

<code>
Provide a complete, self-contained Python program that solves the problem:
- Include necessary imports (numpy, sympy, scipy are allowed)
- Use clear variable names and add comments explaining your approach
- Implement an efficient algorithm based on your thinking
- Print only the final numeric answer (no text)
- Handle potential edge cases appropriately
</code>

Both solutions should arrive at the same answer, though they may use different approaches.
</response>"""

TEST_DRIVEN_PROGRAMMER_SYSTEM_PROMPT="""You will be given a mathematical problem. Your task is to solve it using a test-driven programming approach.

Your output must include two clearly separated sections: **Thinking** and **Response**.

<thinking>
Use this area as your creative scratchpad.
Feel free to capture your thoughts, abstractions, corrections, or ideas in any order and form you wish—without constraints.
Use this freedom to ensure you've gathered all insights necessary to clearly and effectively provide the requested response.
</thinking>

<response>
Your response must include both a test suite and an implementation:

<test>
Write a Python function named `test_solution(answer)` that:
1. Accepts exactly one float parameter named `answer`.
2. Returns `True` if the given answer correctly solves the problem (using appropriate numerical tolerances, e.g., `1e-2`).
3. Returns `False` otherwise.

**Important Guidelines**:
- Your function should be self-contained, efficient, and only rely on standard Python libraries (`numpy`, `sympy`, and `scipy` are allowed).
- Include brief, clear comments explaining how verification is performed.
- Handle floating-point precision explicitly with tolerances.
- Write additional test cases using unittest or pytest to verify your implementation.

Example:
```python
import numpy as np

def test_solution(answer):
    # Check if answer satisfies the problem constraints
    # Use appropriate tolerance for floating-point comparison
    return np.abs(expected_value - answer) < 1e-2
```
</test>

<implementation>
Provide a complete, self-contained Python program that solves the problem:
- Include necessary imports (numpy, sympy, scipy are allowed)
- Use clear variable names and add comments explaining your approach
- Implement an efficient algorithm based on your thinking
- Print only the final numeric answer (no text)
- Ensure your implementation passes all the tests you've written
</implementation>

Both the tests and implementation should work together to solve the problem correctly.
</response>"""


class FinalizationAgent:
    """Agent that finalizes partial solutions"""

    def __init__(self, model):
        self.model = model

    async def generate(self, problem: str, partial_solution: str, return_prompt: bool = False) -> Union[str, Tuple[str, str]]:
        """Finalize a partial solution"""
        system_prompt = FINALIZATION_SYSTEM_PROMPT

        prompt = [SystemMessage(content=system_prompt),
            HumanMessage(content=(
                f"Problem: {problem}\n\n"
                f"Partial Solution: {partial_solution}"
            ))
        ]
        response = await get_model_response(self.model, prompt, max_tokens=8192)
        return (prompt[0].content, response) if return_prompt else response


class FullSolutionAgent:
    """Agent that provides complete solutions with analysis and steps"""

    def __init__(self, model):
        self.model = model

    async def generate(self, problem: str, return_prompt: bool = False) -> Union[str, Tuple[str, str]]:
        """Generate a complete solution with analysis and steps"""
        system_prompt = FULLSOLUTION_SYSTEM_PROMPT
        prompt = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=f"{problem}")
        ]
        response = await get_model_response(self.model, prompt, max_tokens=16384)
        return (system_prompt + "\n\n" + problem, response) if return_prompt else response


class TutorAgent:
    """Agent that evaluates mathematical solutions and identifies the first wrong step"""

    def __init__(self, model):
        self.model = model

    async def find_first_wrong_step(self, problem: str, solution: str, return_prompt: bool = False) -> Union[str, Tuple[str, str]]:
        """
        Analyze a solution and identify the first step that contains an error.
        Returns analysis, verdict and suggested correction in a structured format.
        """
        system_prompt = TUTOR_SYSTEM_PROMPT

        prompt = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=(
                "Here is a mathematical problem and a proposed solution:\n\n"
                f"Problem:\n{problem}\n\n"
                f"Proposed Solution:\n{solution}"
            ))
        ]
        response = await get_model_response(self.model, prompt, max_tokens=8192)
        return (system_prompt + "\n\n" + problem + "\n\n" + solution, response) if return_prompt else response

class ProgrammingAgent:
    """Agent that generates Python code to solve mathematical problems"""

    def __init__(self, model):
        self.model = model

    async def generate(self, problem: str, return_prompt: bool = False) -> Union[str, Tuple[str, str]]:
        """Generate Python code that solves the mathematical problem"""
        system_prompt = PROGRAMMER_SYSTEM_PROMPT

        prompt = [
            SystemMessage(content=system_prompt),
            HumanMessage(f"Problem:\n{problem}\n\n")
        ]
        response = await get_model_response(self.model, prompt, max_tokens=16384)
        return (system_prompt + "\n\n" + f"Problem:\n{problem}\n\n", response) if return_prompt else response


class ArchitectAgent:
    """Agent that analyzes problems and creates prompts for programming agents"""

    def __init__(self, model):
        self.model = model

    async def generate(self, problem: str, return_prompt: bool = False) -> Union[str, Tuple[str, str]]:
        """Generate engineering analysis and prompt for a programming agent"""
        system_prompt = ARCHITECT_SYSTEM_PROMPT

        prompt = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=f"Problem:\n{problem}\n\n")
        ]
        response = await get_model_response(self.model, prompt, max_tokens=8192)
        return (system_prompt + "\n\n" + f"Problem:\n{problem}\n\n", response) if return_prompt else response


class TestingAgent:
    """Agent that creates test functions for mathematical problems"""

    def __init__(self, model):
        self.model = model

    async def generate(self, problem: str, return_prompt: bool = False) -> Union[str, Tuple[str, str]]:
        """Generate a test function that verifies solutions to the mathematical problem"""
        system_prompt = TESTER_SYSTEM_PROMPT

        content = f"Problem:\n{problem}\n\n"

        prompt = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=content)
        ]
        response = await get_model_response(self.model, prompt, max_tokens=8192)
        return (system_prompt + "\n\n" + content, response) if return_prompt else response


class DualProofAgent:
    """Agent that provides both logical proof and programming solution"""

    def __init__(self, model):
        self.model = model

    async def generate(self, problem: str, return_prompt: bool = False) -> Union[str, Tuple[str, str]]:
        """Generate both a logical proof and a programming solution for the problem"""
        system_prompt = DUAL_PROOF_SYSTEM_PROMPT

        content = f"Problem:\n{problem}\n\n"

        prompt = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=content)
        ]
        response = await get_model_response(self.model, prompt, max_tokens=8192)
        return (system_prompt + "\n\n" + content, response) if return_prompt else response


class TestDrivenProgrammerAgent:
    """Agent that provides both test suite and implementation for a problem"""

    def __init__(self, model):
        self.model = model

    async def generate(self, problem: str, return_prompt: bool = False) -> Union[str, Tuple[str, str]]:
        """Generate both a test suite and an implementation for the problem"""
        system_prompt = TEST_DRIVEN_PROGRAMMER_SYSTEM_PROMPT

        content = f"Problem:\n{problem}\n\n"

        prompt = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=content)
        ]
        response = await get_model_response(self.model, prompt, max_tokens=8192)
        return (system_prompt + "\n\n" + content, response) if return_prompt else response