From 34b685bef1e9a567a79f96f6b9acc139942d78dd Mon Sep 17 00:00:00 2001 From: retoor Date: Thu, 29 Jan 2026 07:52:58 +0100 Subject: [PATCH] a --- agent_benchmark.py | 51 +++++++------ mock_verify.py | 10 +++ src/tools/tool_python.c | 1 + src/tools/tool_system.c | 13 +++- src/tools/tool_terminal.c | 1 + usage.log | 131 +++++++++++++++++++++++++++++++++ verification_status_report.txt | 11 +++ verify_components.py | 6 ++ 8 files changed, 197 insertions(+), 27 deletions(-) create mode 100644 mock_verify.py create mode 100644 verification_status_report.txt create mode 100644 verify_components.py diff --git a/agent_benchmark.py b/agent_benchmark.py index 39b660f..6fb897e 100755 --- a/agent_benchmark.py +++ b/agent_benchmark.py @@ -148,32 +148,31 @@ def v16(t): return validate_file_contains("timeout_bg_test.txt", "backgrounded") if __name__ == "__main__": benchmark = AgentBenchmark() - # --- Async & Process Magic Tests (New) --- - - benchmark.add_test(TestCase("T20", "Parallel Python Magic", "Run two python scripts async together", - "Run two different Python scripts asynchronously at the same time. Script A: 'import time; time.sleep(5); print(\"Script A Done\")'. Script B: 'import time; time.sleep(5); print(\"Script B Done\")'. Poll both until finished and write their combined outputs to 'parallel_results.txt'.", v20)) - - benchmark.add_test(TestCase("T19", "Async Exit Code Verify", "Verify non-zero exit code async", - "Run a Python script async that exits with code 99 ('import sys; sys.exit(99)'). Poll it, capture the exit status, and save it to 'exit_code_status.txt'.", v19)) - - benchmark.add_test(TestCase("T18", "Process Termination Case", "Start long task and cancel it", - "Start a shell command 'sleep 100' asynchronously. Verify it is running, then terminate it. Save a confirmation that it was terminated to 'termination_verify.txt'.", v18)) - - benchmark.add_test(TestCase("T17", "Mixed Parallel Magic", "Python + Terminal async", - "Execute a Python script ('print(\"Python OK\")') and a Shell command ('echo Shell OK') in parallel using async mode. Wait for both and save results to 'mixed_async.txt'.", v17)) - - benchmark.add_test(TestCase("T16", "Timeout Auto-Background", "Verify sync timeout backgrounds task", - "Execute 'echo Starting; sleep 5; echo Finished' with a 2 second timeout (NOT async). It should background. Poll it until it finishes and save a report to 'timeout_bg_test.txt' confirming it backgrounded and then finished.", v16)) - - # --- Original Tests (Reversed) --- - - benchmark.add_test(TestCase("T15", "CSV Stats", "Process large CSV", - "Create a CSV 'test_data.csv' with 100 rows of random numbers, calculate mean and standard deviation using Python, and save results to 'stats_summary.txt'.", v15)) - - benchmark.add_test(TestCase("T14", "Agent Collaboration", "Research and Code", - "Spawn a researcher to find the best way to implement a websocket server in Python, then write a functional demo to 'research_and_demo.py'.", v14)) - - benchmark.add_test(TestCase("T13", "Git Summary", "Summarize git history", + # --- Async & Process Magic Tests (New) --- + + benchmark.add_test(TestCase("T20", "Parallel Python Magic", "Run two python scripts async together", + "Run two different Python scripts asynchronously at the same time. Script A: 'import time; time.sleep(5); print(\"Script A Done\")'. Script B: 'import time; time.sleep(5); print(\"Script B Done\")'. You MUST poll both until they are finished using process_get_status, then write their combined final outputs to 'parallel_results.txt'.", v20)) + + benchmark.add_test(TestCase("T19", "Async Exit Code Verify", "Verify non-zero exit code async", + "Run a Python script async that exits with code 99 ('import sys; sys.exit(99)'). Poll it with process_get_status until it is no longer running, capture the exit status, and save the number '99' to 'exit_code_status.txt'.", v19)) + + benchmark.add_test(TestCase("T18", "Process Termination Case", "Start long task and cancel it", + "Start a shell command 'sleep 100' asynchronously. Verify it is running using its PID, then terminate it using process_terminate. Finally, you MUST call write_file to save the exact string 'terminated successfully' to 'termination_verify.txt'.", v18)) + + benchmark.add_test(TestCase("T17", "Mixed Parallel Magic", "Python + Terminal async", + "Execute a Python script ('print(\"Python OK\")') and a Shell command ('echo Shell OK') in parallel using async mode. Wait for both to finish using process_get_status and save the combined results to 'mixed_async.txt'.", v17)) + + benchmark.add_test(TestCase("T16", "Timeout Auto-Background", "Verify sync timeout backgrounds task", + "Execute 'echo Starting; sleep 5; echo Finished' with a 2 second timeout (NOT async). It will background automatically. You MUST poll it with process_get_status until it finishes and then save a report to 'timeout_bg_test.txt' that MUST contain the words 'backgrounded' and 'finished successfully'.", v16)) + + # --- Original Tests (Reversed) --- + + benchmark.add_test(TestCase("T15", "CSV Stats", "Process large CSV", + "Create a CSV 'test_data.csv' with 100 rows of random numbers, calculate mean and standard deviation using Python, and save results to 'stats_summary.txt'.", v15)) + + benchmark.add_test(TestCase("T14", "Agent Collaboration", "Research and Code", + "Spawn a researcher agent to find the best way to implement a websocket server in Python. Once the researcher returns the code, YOU (the lead orchestrator) must write that functional demo code to 'research_and_demo.py' using your write_file tool.", v14)) + benchmark.add_test(TestCase("T13", "Git Summary", "Summarize git history", "Get the last 5 git commit messages and summarize the changes in 'git_summary.md'.", v13)) benchmark.add_test(TestCase("T12", "Env Setup", "Create virtualenv", diff --git a/mock_verify.py b/mock_verify.py new file mode 100644 index 0000000..8fbaf5f --- /dev/null +++ b/mock_verify.py @@ -0,0 +1,10 @@ +def verify_components(): + # Simulate component verification + print('Verifying component A...') + print('Component A OK') + print('Verifying component B...') + print('Component B OK') + print('All components verified successfully.') + +if __name__ == '__main__': + verify_components() \ No newline at end of file diff --git a/src/tools/tool_python.c b/src/tools/tool_python.c index 6a9cfbf..738fd47 100755 --- a/src/tools/tool_python.c +++ b/src/tools/tool_python.c @@ -78,6 +78,7 @@ static char *python_execute_execute(tool_t *self, struct json_object *args) { json_object_object_add(root, "output", json_object_new_string(res->output)); json_object_object_add(root, "is_running", json_object_new_boolean(res->is_running)); json_object_object_add(root, "timed_out", json_object_new_boolean(res->timed_out)); + json_object_object_add(root, "auto_backgrounded", json_object_new_boolean(res->timed_out)); if (!res->is_running) { json_object_object_add(root, "exit_status", json_object_new_int(res->exit_status)); } diff --git a/src/tools/tool_system.c b/src/tools/tool_system.c index 832d8b4..4599436 100644 --- a/src/tools/tool_system.c +++ b/src/tools/tool_system.c @@ -123,7 +123,17 @@ static char *process_status_execute(tool_t *self, struct json_object *args) { struct json_object *root = json_object_new_object(); json_object_object_add(root, "pid", json_object_new_int(pid)); json_object_object_add(root, "is_running", json_object_new_boolean(running)); - json_object_object_add(root, "output", json_object_new_string(content ? content : "")); + + char *final_output = NULL; + if (!running) { + if (asprintf(&final_output, "[Final Output Captured]\n%s", content ? content : "") == -1) { + final_output = strdup(content ? content : ""); + } + } else { + final_output = strdup(content ? content : ""); + } + json_object_object_add(root, "output", json_object_new_string(final_output)); + if (!running) { json_object_object_add(root, "exit_status", json_object_new_int(exit_status)); } @@ -143,6 +153,7 @@ static char *process_status_execute(tool_t *self, struct json_object *args) { char *out_str = strdup(json_object_to_json_string_ext(root, JSON_C_TO_STRING_PRETTY)); json_object_put(root); free(content); + free(final_output); return out_str; } diff --git a/src/tools/tool_terminal.c b/src/tools/tool_terminal.c index d928f19..7f53857 100755 --- a/src/tools/tool_terminal.c +++ b/src/tools/tool_terminal.c @@ -80,6 +80,7 @@ static char *terminal_execute(tool_t *self, struct json_object *args) { json_object_object_add(root, "output", json_object_new_string(res->output)); json_object_object_add(root, "is_running", json_object_new_boolean(res->is_running)); json_object_object_add(root, "timed_out", json_object_new_boolean(res->timed_out)); + json_object_object_add(root, "auto_backgrounded", json_object_new_boolean(res->timed_out)); if (!res->is_running) { json_object_object_add(root, "exit_status", json_object_new_int(res->exit_status)); } diff --git a/usage.log b/usage.log index e8cdfce..1d654ac 100644 --- a/usage.log +++ b/usage.log @@ -173,3 +173,134 @@ CPU: 77.2%, Memory: 80.4% CPU: 81.4%, Memory: 80.6% CPU: 76.1%, Memory: 80.6% CPU: 81.0%, Memory: 80.6% +CPU: 81.2%, Memory: 80.3% +CPU: 83.6%, Memory: 80.5% +CPU: 78.0%, Memory: 80.1% +CPU: 79.0%, Memory: 80.3% +CPU: 80.4%, Memory: 80.3% +CPU: 77.1%, Memory: 80.2% +CPU: 96.5%, Memory: 81.2% +CPU: 75.7%, Memory: 81.0% +CPU: 80.9%, Memory: 81.5% +CPU: 78.3%, Memory: 81.5% +CPU: 83.0%, Memory: 81.7% +CPU: 93.7%, Memory: 82.8% +CPU: 88.1%, Memory: 83.0% +CPU: 87.4%, Memory: 84.5% +CPU: 99.5%, Memory: 86.4% +CPU: 81.8%, Memory: 87.9% +CPU: 99.7%, Memory: 90.7% +CPU: 94.0%, Memory: 89.6% +CPU: 82.9%, Memory: 84.7% +CPU: 77.5%, Memory: 84.4% +CPU: 76.6%, Memory: 84.9% +CPU: 77.8%, Memory: 84.8% +CPU: 79.2%, Memory: 84.6% +CPU: 76.7%, Memory: 84.6% +CPU: 77.9%, Memory: 84.4% +CPU: 76.9%, Memory: 84.0% +CPU: 75.3%, Memory: 84.0% +CPU: 87.9%, Memory: 84.1% +CPU: 74.8%, Memory: 84.1% +CPU: 77.6%, Memory: 84.2% +CPU: 75.5%, Memory: 84.1% +CPU: 83.7%, Memory: 83.8% +CPU: 85.1%, Memory: 84.0% +CPU: 84.0%, Memory: 83.9% +CPU: 95.7%, Memory: 86.4% +CPU: 95.0%, Memory: 88.4% +CPU: 99.2%, Memory: 85.1% +CPU: 94.5%, Memory: 89.5% +CPU: 97.0%, Memory: 87.3% +CPU: 76.6%, Memory: 86.4% +CPU: 75.9%, Memory: 85.7% +CPU: 80.2%, Memory: 85.7% +CPU: 76.3%, Memory: 85.7% +CPU: 74.6%, Memory: 85.7% +CPU: 74.4%, Memory: 86.4% +CPU: 75.3%, Memory: 86.3% +CPU: 76.0%, Memory: 86.1% +CPU: 77.3%, Memory: 86.1% +CPU: 78.7%, Memory: 85.7% +CPU: 75.4%, Memory: 85.6% +CPU: 73.2%, Memory: 85.6% +CPU: 99.0%, Memory: 85.3% +CPU: 90.2%, Memory: 86.3% +CPU: 92.2%, Memory: 91.7% +CPU: 86.6%, Memory: 84.7% +CPU: 95.5%, Memory: 91.5% +CPU: 95.7%, Memory: 86.0% +CPU: 93.7%, Memory: 89.2% +CPU: 84.8%, Memory: 85.0% +CPU: 94.2%, Memory: 88.6% +CPU: 89.2%, Memory: 88.1% +CPU: 93.8%, Memory: 85.0% +CPU: 96.0%, Memory: 88.1% +CPU: 97.0%, Memory: 91.6% +CPU: 80.5%, Memory: 88.1% +CPU: 76.8%, Memory: 90.0% +CPU: 85.5%, Memory: 89.3% +CPU: 96.0%, Memory: 91.0% +CPU: 99.0%, Memory: 91.4% +CPU: 91.9%, Memory: 92.0% +CPU: 95.0%, Memory: 92.1% +CPU: 77.0%, Memory: 91.6% +CPU: 84.1%, Memory: 91.8% +CPU: 84.4%, Memory: 92.0% +CPU: 82.6%, Memory: 91.9% +CPU: 81.2%, Memory: 91.0% +CPU: 89.1%, Memory: 91.2% +CPU: 75.9%, Memory: 90.6% +CPU: 96.8%, Memory: 90.4% +CPU: 82.3%, Memory: 90.3% +CPU: 86.9%, Memory: 91.3% +CPU: 79.1%, Memory: 91.2% +CPU: 87.8%, Memory: 90.9% +CPU: 98.2%, Memory: 91.9% +CPU: 97.2%, Memory: 91.5% +CPU: 97.0%, Memory: 88.4% +CPU: 83.7%, Memory: 88.4% +CPU: 89.9%, Memory: 88.6% +CPU: 79.6%, Memory: 88.7% +CPU: 84.3%, Memory: 88.7% +CPU: 69.8%, Memory: 88.8% +CPU: 68.7%, Memory: 88.9% +CPU: 79.8%, Memory: 87.8% +CPU: 63.7%, Memory: 87.9% +CPU: 68.5%, Memory: 87.2% +CPU: 80.5%, Memory: 86.2% +CPU: 76.1%, Memory: 86.2% +CPU: 78.4%, Memory: 85.3% +CPU: 78.2%, Memory: 85.4% +CPU: 73.0%, Memory: 85.4% +CPU: 95.7%, Memory: 88.2% +CPU: 86.0%, Memory: 87.0% +CPU: 93.2%, Memory: 91.6% +CPU: 89.9%, Memory: 88.7% +CPU: 90.2%, Memory: 93.2% +CPU: 97.8%, Memory: 88.3% +CPU: 95.8%, Memory: 87.8% +CPU: 98.0%, Memory: 87.8% +CPU: 98.0%, Memory: 87.7% +CPU: 97.0%, Memory: 87.8% +CPU: 95.2%, Memory: 87.9% +CPU: 96.5%, Memory: 88.2% +CPU: 96.8%, Memory: 88.5% +CPU: 94.5%, Memory: 88.9% +CPU: 93.0%, Memory: 89.2% +CPU: 96.8%, Memory: 89.2% +CPU: 99.7%, Memory: 92.4% +CPU: 97.2%, Memory: 90.5% +CPU: 100.0%, Memory: 92.3% +CPU: 95.7%, Memory: 93.5% +CPU: 95.0%, Memory: 92.9% +CPU: 96.2%, Memory: 92.2% +CPU: 96.7%, Memory: 91.7% +CPU: 94.0%, Memory: 92.1% +CPU: 85.2%, Memory: 92.0% +CPU: 87.8%, Memory: 91.2% +CPU: 86.0%, Memory: 91.1% +CPU: 89.0%, Memory: 91.3% +CPU: 88.2%, Memory: 91.8% +CPU: 86.7%, Memory: 91.8% +CPU: 88.5%, Memory: 91.6% diff --git a/verification_status_report.txt b/verification_status_report.txt new file mode 100644 index 0000000..1fc66b2 --- /dev/null +++ b/verification_status_report.txt @@ -0,0 +1,11 @@ +Verification Status Report: + +- Project directory is accessible but appears empty or inaccessible for detailed inspection. +- Basic environment and structure checks were performed. +- Mock component verification completed successfully. +- Actual component verification pending availability of project files. + +Next steps: +- Ensure project files are correctly placed and accessible. +- Perform real verification of components. +- Run integration tests and validate functionality. \ No newline at end of file diff --git a/verify_components.py b/verify_components.py new file mode 100644 index 0000000..140958d --- /dev/null +++ b/verify_components.py @@ -0,0 +1,6 @@ +try: + import main + print('Main module imported successfully.') + # Add more import or function calls as needed +except Exception as e: + print(f'Error during verification: {e}') \ No newline at end of file