From e5a2144ab1595a46e0f9b0387fdaf641d16cf000 Mon Sep 17 00:00:00 2001 From: Pulkit Chauhan Date: Wed, 11 Mar 2026 20:03:33 +0530 Subject: [PATCH 1/5] added artifacts for ai --- install/ci-vm/ci-linux/ci/runCI | 68 ++++++++- mod_ci/controllers.py | 10 +- mod_test/controllers.py | 257 +++++++++++++++++++++++++++++++- utility.py | 7 +- 4 files changed, 327 insertions(+), 15 deletions(-) diff --git a/install/ci-vm/ci-linux/ci/runCI b/install/ci-vm/ci-linux/ci/runCI index 092d425e..d1d84765 100644 --- a/install/ci-vm/ci-linux/ci/runCI +++ b/install/ci-vm/ci-linux/ci/runCI @@ -7,6 +7,11 @@ DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) +# Enable coredump capture +ulimit -c unlimited +mkdir -p /tmp/coredumps +echo "/tmp/coredumps/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern > /dev/null + if [ ! -f "$DIR/variables" ]; then # No variable file defined sudo shutdown -h now @@ -123,10 +128,69 @@ if [ -e "${dstDir}/ccextractor" ]; then echo "=== CCExtractor Binary Version ===" >> "${logFile}" ./ccextractor --version >> "${logFile}" 2>&1 echo "=== End Version Info ===" >> "${logFile}" - postStatus "testing" "Running tests" + + + ccextractor_path="$(pwd)/ccextractor" + combined_stdout="/tmp/combined_stdout.log" + : > "${combined_stdout}" + + # Create a wrapper script that tees stdout/stderr to a combined log + wrapper_path="$(pwd)/ccextractor_wrapper" + cat > "${wrapper_path}" << 'WRAPPER_EOF' +#!/bin/bash +COMBINED_LOG="/tmp/combined_stdout.log" +REAL_BINARY="PLACEHOLDER_BINARY" +EXIT_CODE_FILE="/tmp/.wrapper_exit_code" +echo "=== TEST INVOCATION: $@ ===" >> "$COMBINED_LOG" +{ "$REAL_BINARY" "$@" 2>&1; echo $? > "$EXIT_CODE_FILE"; } | tee -a "$COMBINED_LOG" +exit_code=$(cat "$EXIT_CODE_FILE") +echo "=== EXIT CODE: ${exit_code} ===" >> "$COMBINED_LOG" +echo "" >> "$COMBINED_LOG" +exit $exit_code +WRAPPER_EOF + sed -i "s|PLACEHOLDER_BINARY|${ccextractor_path}|" "${wrapper_path}" + chmod +x "${wrapper_path}" + executeCommand cd ${suiteDstDir} - executeCommand ${tester} --debug --entries "${testFile}" --executable "ccextractor" --tempfolder "${tempFolder}" --timeout 600 --reportfolder "${reportFolder}" --resultfolder "${resultFolder}" --samplefolder "${sampleFolder}" --method Server --url "${reportURL}" + executeCommand ${tester} --debug --entries "${testFile}" --executable "${wrapper_path}" --tempfolder "${tempFolder}" --timeout 600 --reportfolder "${reportFolder}" --resultfolder "${resultFolder}" --samplefolder "${sampleFolder}" --method Server --url "${reportURL}" + + # Upload AI artifacts to GCS + gcs_bucket=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/bucket" -H "Metadata-Flavor: Google") + test_id=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/testID" -H "Metadata-Flavor: Google") + token=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" -H "Metadata-Flavor: Google" | python3 -c "import sys,json; print(json.load(sys.stdin)['access_token'])") + + upload_artifact() { + local file_path="$1" + local dest_path="$2" + if [ -f "$file_path" ]; then + local http_code + http_code=$(curl -s -X POST --data-binary @"$file_path" \ + -H "Authorization: Bearer $token" \ + -H "Content-Type: application/octet-stream" \ + -w "%{http_code}" \ + -o /dev/null \ + "https://storage.googleapis.com/upload/storage/v1/b/${gcs_bucket}/o?uploadType=media&name=${dest_path}") + if [ -z "$http_code" ] || [ "$http_code" -ne 200 ]; then + echo "GCS upload failed for ${dest_path}: HTTP ${http_code:-no_response}" >> "${logFile}" + fi + fi + } + + upload_artifact "$ccextractor_path" "test_artifacts/${test_id}/ccextractor" + + # Upload combined stdout log + upload_artifact "${combined_stdout}" "test_artifacts/${test_id}/combined_stdout.log" + + # Upload coredumps if any + for core_file in /tmp/coredumps/core.*; do + if [ -f "$core_file" ]; then + upload_artifact "$core_file" "test_artifacts/${test_id}/coredump" + break + fi + done + sendLogFile + upload_artifact "${logFile}" "test_artifacts/${test_id}/full_output.log" postStatus "completed" "Ran all tests" sudo shutdown -h now diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py index 618d3151..3347d675 100755 --- a/mod_ci/controllers.py +++ b/mod_ci/controllers.py @@ -1195,7 +1195,8 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict: metadata_items = [ {'key': 'startup-script', 'value': startup_script}, {'key': 'reportURL', 'value': reportURL}, - {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')} + {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')}, + {'key': 'testID', 'value': str(test.id)} ] elif test.platform == TestPlatform.windows: image_response = compute.images().getFromFamily(project=config.get('WINDOWS_INSTANCE_PROJECT_NAME', ''), @@ -1217,7 +1218,8 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict: {'key': 'service_account', 'value': service_account}, {'key': 'rclone_conf', 'value': rclone_conf}, {'key': 'reportURL', 'value': reportURL}, - {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')} + {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')}, + {'key': 'testID', 'value': str(test.id)} ] source_disk_image = image_response['selfLink'] @@ -2635,7 +2637,7 @@ def upload_log_type_request(log, test_id, repo_folder, test, request) -> bool: uploaded_file.save(temp_path) final_path = os.path.join(repo_folder, 'LogFiles', f"{test.id}.txt") - os.rename(temp_path, final_path) + os.replace(temp_path, final_path) log.debug("Stored log file") return True @@ -2681,7 +2683,7 @@ def upload_type_request(log, test_id, repo_folder, test, request) -> bool: results_dir = os.path.join(repo_folder, 'TestResults') os.makedirs(results_dir, exist_ok=True) final_path = os.path.join(results_dir, f'{file_hash}{file_extension}') - os.rename(temp_path, final_path) + os.replace(temp_path, final_path) rto = RegressionTestOutput.query.filter( RegressionTestOutput.id == request.form['test_file_id']).first() result_file = TestResultFile(test.id, request.form['test_id'], rto.id, rto.correct, file_hash) diff --git a/mod_test/controllers.py b/mod_test/controllers.py index e80c0a47..02e7b3b7 100644 --- a/mod_test/controllers.py +++ b/mod_test/controllers.py @@ -367,7 +367,6 @@ def generate_diff(test_id: int, regression_test_id: int, output_id: int, to_view @mod_test.route('/log-files/') -@login_required def download_build_log_file(test_id): """ Serve download of build log. @@ -379,15 +378,17 @@ def download_build_log_file(test_id): :return: build log text file :rtype: Flask response """ - from run import config + from run import config, storage_client_bucket test = Test.query.filter(Test.id == test_id).first() + from flask import send_from_directory + if test is not None: file_name = f"{test_id}.txt" - log_file_path = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'LogFiles', file_name) - + log_dir = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'LogFiles') + log_file_path = os.path.join(log_dir, file_name) if os.path.isfile(log_file_path): - return serve_file_download(file_name, 'LogFiles') + return send_from_directory(log_dir, file_name, as_attachment=True) raise TestNotFoundException(f"Build log for Test {test_id} not found") @@ -442,3 +443,249 @@ def stop_test(test_id): g.db.commit() g.log.info(f"test with id: {test_id} stopped") return redirect(url_for('.by_id', test_id=test.id)) + + +def _artifact_redirect(test_id, blob_path, filename='artifact'): + """Generate a signed URL for a GCS artifact and redirect, or 404.""" + from datetime import timedelta + + from run import config, storage_client_bucket + + blob = storage_client_bucket.blob(blob_path) + if not blob.exists(): + abort(404) + url = blob.generate_signed_url( + version="v4", + expiration=timedelta(minutes=config.get('GCS_SIGNED_URL_EXPIRY_LIMIT', 30)), + method="GET", + response_disposition=f'attachment; filename="{filename}"' + ) + return redirect(url) + + +@mod_test.route('//binary') +def download_binary(test_id): + """Download the ccextractor binary used in a test (linux or windows).""" + from run import storage_client_bucket + # Try linux name first, then windows + for name in ['ccextractor', 'ccextractor.exe']: + blob_path = f'test_artifacts/{test_id}/{name}' + if storage_client_bucket.blob(blob_path).exists(): + return _artifact_redirect(test_id, blob_path, filename=name) + abort(404) + + +@mod_test.route('//coredump') +def download_coredump(test_id): + """Download the coredump from a test, if one was produced.""" + return _artifact_redirect( + test_id, + f'test_artifacts/{test_id}/coredump', + filename=f'coredump-{test_id}' + ) + + +@mod_test.route('//combined-stdout') +def download_combined_stdout(test_id): + """Download the combined stdout/stderr log from all test invocations.""" + return _artifact_redirect( + test_id, + f'test_artifacts/{test_id}/combined_stdout.log', + filename=f'combined_stdout-{test_id}.log' + ) + + +@mod_test.route('//regression///output-got') +def download_output_got(test_id, regression_test_id, output_id): + """Download the actual output file from TestResults using DB hash.""" + rf = TestResultFile.query.filter(and_( + TestResultFile.test_id == test_id, + TestResultFile.regression_test_id == regression_test_id, + TestResultFile.regression_test_output_id == output_id + )).first() + if rf is None or rf.got is None: + abort(404) + import os + ext = os.path.splitext(rf.regression_test_output.filename_correct)[1] + return _artifact_redirect( + test_id, + f'TestResults/{rf.got}{ext}', + filename=f'output_got_{regression_test_id}_{output_id}{ext}' + ) + + +@mod_test.route('//regression///output-expected') +def download_output_expected(test_id, regression_test_id, output_id): + """Download the expected output file from TestResults using DB hash.""" + rf = TestResultFile.query.filter(and_( + TestResultFile.test_id == test_id, + TestResultFile.regression_test_id == regression_test_id, + TestResultFile.regression_test_output_id == output_id + )).first() + if rf is None: + abort(404) + import os + ext = os.path.splitext(rf.regression_test_output.filename_correct)[1] + return _artifact_redirect( + test_id, + f'TestResults/{rf.expected}{ext}', + filename=f'output_expected_{regression_test_id}_{output_id}{ext}' + ) +@mod_test.route('//sample/') +def download_sample_ai(test_id, sample_id): + """Download the sample file for a regression test (no auth required for AI workflow).""" + from mod_sample.models import Sample + sample = Sample.query.filter(Sample.id == sample_id).first() + if sample is None: + abort(404) + return _artifact_redirect( + test_id, + f'TestFiles/{sample.filename}', + filename=sample.original_name + ) + + +@mod_test.route('//ai.json') +def ai_json_endpoint(test_id): + """Structured JSON with download URLs for all artifacts — for AI agents.""" + from run import storage_client_bucket + + test = Test.query.filter(Test.id == test_id).first() + if test is None: + return jsonify({'error': f'Test {test_id} not found'}), 404 + + def blob_exists(path): + return storage_client_bucket.blob(path).exists() + + has_binary = ( + blob_exists(f'test_artifacts/{test_id}/ccextractor') or + blob_exists(f'test_artifacts/{test_id}/ccextractor.exe') + ) + has_coredump = blob_exists(f'test_artifacts/{test_id}/coredump') + has_combined_stdout = blob_exists(f'test_artifacts/{test_id}/combined_stdout.log') + + results = get_test_results(test) + test_cases = [] + total = 0 + passed = 0 + failed = 0 + + for category in results: + for t_data in category['tests']: + total += 1 + rt = t_data['test'] + result = t_data['result'] + is_error = t_data.get('error', False) + result_files = t_data['files'] + + if is_error: + failed += 1 + else: + passed += 1 + + outputs = [] + for expected_output in rt.output_files: + if expected_output.ignore: + continue + + matched_rf = None + for rf in result_files: + if rf.test_id != -1 and rf.regression_test_output_id == expected_output.id: + matched_rf = rf + break + + got_url = None + diff_url = None + + if matched_rf and matched_rf.got is not None: + got_url = url_for( + '.download_output_got', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ) + diff_url = url_for( + '.generate_diff', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + to_view=0, + _external=True + ) + else: + # If test passed, got and expected match exactly. + got_url = url_for( + '.download_output_expected', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ) + + output_entry = { + 'output_id': expected_output.id, + 'correct_extension': expected_output.correct_extension, + 'expected_url': url_for( + '.download_output_expected', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ), + 'got_url': got_url, + 'diff_url': diff_url, + } + outputs.append(output_entry) + + test_cases.append({ + 'regression_test_id': rt.id, + 'category': category['category'].name, + 'sample_filename': rt.sample.original_name, + 'sample_url': url_for( + '.download_sample_ai', + test_id=test_id, + sample_id=rt.sample.id, + _external=True + ), + 'arguments': rt.command, + 'result': 'Fail' if is_error else 'Pass', + 'exit_code': result.exit_code if result else None, + 'expected_exit_code': result.expected_rc if result else None, + 'runtime_ms': result.runtime if result else None, + 'outputs': outputs, + 'how_to_reproduce': f'./ccextractor {rt.command} {rt.sample.original_name}', + }) + + report = { + 'test_id': test.id, + 'commit': test.commit, + 'platform': test.platform.value, + 'branch': test.branch, + 'status': 'completed' if test.finished else 'running', + 'binary_url': url_for( + '.download_binary', test_id=test_id, _external=True + ) if has_binary else None, + 'coredump_url': url_for( + '.download_coredump', test_id=test_id, _external=True + ) if has_coredump else None, + 'log_url': url_for( + '.download_build_log_file', test_id=test_id, _external=True + ), + 'combined_stdout_url': url_for( + '.download_combined_stdout', test_id=test_id, _external=True + ) if has_combined_stdout else None, + 'summary': { + 'total': total, + 'passed': passed, + 'failed': failed, + }, + 'test_cases': test_cases, + 'how_to_reproduce': ( + 'Download the binary and sample, then run: ' + + ('./ccextractor {arguments} {sample_filename}' if test.platform.value == 'linux' + else 'ccextractorwinfull.exe {arguments} {sample_filename}') + ), + } + + return jsonify(report) diff --git a/utility.py b/utility.py index 98eeec53..25b85d1a 100644 --- a/utility.py +++ b/utility.py @@ -30,14 +30,13 @@ def serve_file_download(file_name, file_folder, file_sub_folder='') -> werkzeug. """ from run import config, storage_client_bucket - file_path = path.join(file_folder, file_sub_folder, file_name) + file_path = '/'.join(filter(None, [file_folder, file_sub_folder, file_name])) blob = storage_client_bucket.blob(file_path) - blob.content_disposition = f'attachment; filename="{file_name}"' - blob.patch() url = blob.generate_signed_url( version="v4", - expiration=timedelta(minutes=config.get('GCS_SIGNED_URL_EXPIRY_LIMIT', '')), + expiration=timedelta(minutes=config.get('GCS_SIGNED_URL_EXPIRY_LIMIT', 30)), method="GET", + response_disposition=f'attachment; filename="{file_name}"' ) return redirect(url) From 7f115f5f1541b3d826d2ffe020c3a058fa0895d7 Mon Sep 17 00:00:00 2001 From: Pulkit Chauhan Date: Wed, 11 Mar 2026 21:23:30 +0530 Subject: [PATCH 2/5] minor cleanup --- install/ci-vm/ci-linux/ci/runCI | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/install/ci-vm/ci-linux/ci/runCI b/install/ci-vm/ci-linux/ci/runCI index d1d84765..521ac2aa 100644 --- a/install/ci-vm/ci-linux/ci/runCI +++ b/install/ci-vm/ci-linux/ci/runCI @@ -128,8 +128,7 @@ if [ -e "${dstDir}/ccextractor" ]; then echo "=== CCExtractor Binary Version ===" >> "${logFile}" ./ccextractor --version >> "${logFile}" 2>&1 echo "=== End Version Info ===" >> "${logFile}" - - + postStatus "testing" "Running tests" ccextractor_path="$(pwd)/ccextractor" combined_stdout="/tmp/combined_stdout.log" : > "${combined_stdout}" @@ -190,7 +189,6 @@ WRAPPER_EOF done sendLogFile - upload_artifact "${logFile}" "test_artifacts/${test_id}/full_output.log" postStatus "completed" "Ran all tests" sudo shutdown -h now From 1baab379af8aecb6f11252380c72f2e827645000 Mon Sep 17 00:00:00 2001 From: Pulkit Chauhan Date: Wed, 18 Mar 2026 23:19:31 +0530 Subject: [PATCH 3/5] Code refactor for sonarqube --- mod_test/controllers.py | 188 ++++++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 95 deletions(-) diff --git a/mod_test/controllers.py b/mod_test/controllers.py index 02e7b3b7..aec42ee8 100644 --- a/mod_test/controllers.py +++ b/mod_test/controllers.py @@ -445,7 +445,7 @@ def stop_test(test_id): return redirect(url_for('.by_id', test_id=test.id)) -def _artifact_redirect(test_id, blob_path, filename='artifact'): +def _artifact_redirect(blob_path, filename='artifact'): """Generate a signed URL for a GCS artifact and redirect, or 404.""" from datetime import timedelta @@ -463,7 +463,7 @@ def _artifact_redirect(test_id, blob_path, filename='artifact'): return redirect(url) -@mod_test.route('//binary') +@mod_test.route('//binary', methods=['GET']) def download_binary(test_id): """Download the ccextractor binary used in a test (linux or windows).""" from run import storage_client_bucket @@ -471,31 +471,29 @@ def download_binary(test_id): for name in ['ccextractor', 'ccextractor.exe']: blob_path = f'test_artifacts/{test_id}/{name}' if storage_client_bucket.blob(blob_path).exists(): - return _artifact_redirect(test_id, blob_path, filename=name) + return _artifact_redirect(blob_path, filename=name) abort(404) -@mod_test.route('//coredump') +@mod_test.route('//coredump', methods=['GET']) def download_coredump(test_id): """Download the coredump from a test, if one was produced.""" return _artifact_redirect( - test_id, f'test_artifacts/{test_id}/coredump', filename=f'coredump-{test_id}' ) -@mod_test.route('//combined-stdout') +@mod_test.route('//combined-stdout', methods=['GET']) def download_combined_stdout(test_id): """Download the combined stdout/stderr log from all test invocations.""" return _artifact_redirect( - test_id, f'test_artifacts/{test_id}/combined_stdout.log', filename=f'combined_stdout-{test_id}.log' ) -@mod_test.route('//regression///output-got') +@mod_test.route('//regression///output-got', methods=['GET']) def download_output_got(test_id, regression_test_id, output_id): """Download the actual output file from TestResults using DB hash.""" rf = TestResultFile.query.filter(and_( @@ -505,16 +503,14 @@ def download_output_got(test_id, regression_test_id, output_id): )).first() if rf is None or rf.got is None: abort(404) - import os ext = os.path.splitext(rf.regression_test_output.filename_correct)[1] return _artifact_redirect( - test_id, f'TestResults/{rf.got}{ext}', filename=f'output_got_{regression_test_id}_{output_id}{ext}' ) -@mod_test.route('//regression///output-expected') +@mod_test.route('//regression///output-expected', methods=['GET']) def download_output_expected(test_id, regression_test_id, output_id): """Download the expected output file from TestResults using DB hash.""" rf = TestResultFile.query.filter(and_( @@ -524,14 +520,12 @@ def download_output_expected(test_id, regression_test_id, output_id): )).first() if rf is None: abort(404) - import os ext = os.path.splitext(rf.regression_test_output.filename_correct)[1] return _artifact_redirect( - test_id, f'TestResults/{rf.expected}{ext}', filename=f'output_expected_{regression_test_id}_{output_id}{ext}' ) -@mod_test.route('//sample/') +@mod_test.route('//sample/', methods=['GET']) def download_sample_ai(test_id, sample_id): """Download the sample file for a regression test (no auth required for AI workflow).""" from mod_sample.models import Sample @@ -539,13 +533,94 @@ def download_sample_ai(test_id, sample_id): if sample is None: abort(404) return _artifact_redirect( - test_id, f'TestFiles/{sample.filename}', filename=sample.original_name ) -@mod_test.route('//ai.json') +def _process_test_case(test_id, category_name, t_data): + """Helper function to process a single test case.""" + rt = t_data['test'] + result = t_data['result'] + is_error = t_data.get('error', False) + result_files = t_data['files'] + + outputs = [] + for expected_output in rt.output_files: + if expected_output.ignore: + continue + + matched_rf = None + for rf in result_files: + if rf.test_id != -1 and rf.regression_test_output_id == expected_output.id: + matched_rf = rf + break + + got_url = None + diff_url = None + + if matched_rf and matched_rf.got is not None: + got_url = url_for( + '.download_output_got', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ) + diff_url = url_for( + '.generate_diff', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + to_view=0, + _external=True + ) + else: + # If test passed, got and expected match exactly. + got_url = url_for( + '.download_output_expected', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ) + + output_entry = { + 'output_id': expected_output.id, + 'correct_extension': expected_output.correct_extension, + 'expected_url': url_for( + '.download_output_expected', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ), + 'got_url': got_url, + 'diff_url': diff_url, + } + outputs.append(output_entry) + + return { + 'regression_test_id': rt.id, + 'category': category_name, + 'sample_filename': rt.sample.original_name, + 'sample_url': url_for( + '.download_sample_ai', + test_id=test_id, + sample_id=rt.sample.id, + _external=True + ), + 'arguments': rt.command, + 'result': 'Fail' if is_error else 'Pass', + 'exit_code': result.exit_code if result else None, + 'expected_exit_code': result.expected_rc if result else None, + 'runtime_ms': result.runtime if result else None, + 'outputs': outputs, + 'how_to_reproduce': f'./ccextractor {rt.command} {rt.sample.original_name}', + } + + +@mod_test.route('//ai.json', methods=['GET']) def ai_json_endpoint(test_id): """Structured JSON with download URLs for all artifacts — for AI agents.""" from run import storage_client_bucket @@ -573,89 +648,12 @@ def blob_exists(path): for category in results: for t_data in category['tests']: total += 1 - rt = t_data['test'] - result = t_data['result'] - is_error = t_data.get('error', False) - result_files = t_data['files'] - - if is_error: + if t_data.get('error', False): failed += 1 else: passed += 1 - outputs = [] - for expected_output in rt.output_files: - if expected_output.ignore: - continue - - matched_rf = None - for rf in result_files: - if rf.test_id != -1 and rf.regression_test_output_id == expected_output.id: - matched_rf = rf - break - - got_url = None - diff_url = None - - if matched_rf and matched_rf.got is not None: - got_url = url_for( - '.download_output_got', - test_id=test_id, - regression_test_id=rt.id, - output_id=expected_output.id, - _external=True - ) - diff_url = url_for( - '.generate_diff', - test_id=test_id, - regression_test_id=rt.id, - output_id=expected_output.id, - to_view=0, - _external=True - ) - else: - # If test passed, got and expected match exactly. - got_url = url_for( - '.download_output_expected', - test_id=test_id, - regression_test_id=rt.id, - output_id=expected_output.id, - _external=True - ) - - output_entry = { - 'output_id': expected_output.id, - 'correct_extension': expected_output.correct_extension, - 'expected_url': url_for( - '.download_output_expected', - test_id=test_id, - regression_test_id=rt.id, - output_id=expected_output.id, - _external=True - ), - 'got_url': got_url, - 'diff_url': diff_url, - } - outputs.append(output_entry) - - test_cases.append({ - 'regression_test_id': rt.id, - 'category': category['category'].name, - 'sample_filename': rt.sample.original_name, - 'sample_url': url_for( - '.download_sample_ai', - test_id=test_id, - sample_id=rt.sample.id, - _external=True - ), - 'arguments': rt.command, - 'result': 'Fail' if is_error else 'Pass', - 'exit_code': result.exit_code if result else None, - 'expected_exit_code': result.expected_rc if result else None, - 'runtime_ms': result.runtime if result else None, - 'outputs': outputs, - 'how_to_reproduce': f'./ccextractor {rt.command} {rt.sample.original_name}', - }) + test_cases.append(_process_test_case(test_id, category['category'].name, t_data)) report = { 'test_id': test.id, From c0ac139ee6bea86660037de58c06c646b003e273 Mon Sep 17 00:00:00 2001 From: Pulkit Chauhan Date: Mon, 1 Jun 2026 23:07:17 +0530 Subject: [PATCH 4/5] Address review: SP-mediated uploads, login_required, platform binary, code smell fix --- install/ci-vm/ci-linux/ci/runCI | 33 +++---- mod_ci/controllers.py | 52 +++++++++-- mod_test/controllers.py | 156 +++++++++++++++++--------------- 3 files changed, 142 insertions(+), 99 deletions(-) diff --git a/install/ci-vm/ci-linux/ci/runCI b/install/ci-vm/ci-linux/ci/runCI index 521ac2aa..f9bec89e 100644 --- a/install/ci-vm/ci-linux/ci/runCI +++ b/install/ci-vm/ci-linux/ci/runCI @@ -139,10 +139,11 @@ if [ -e "${dstDir}/ccextractor" ]; then #!/bin/bash COMBINED_LOG="/tmp/combined_stdout.log" REAL_BINARY="PLACEHOLDER_BINARY" -EXIT_CODE_FILE="/tmp/.wrapper_exit_code" +EXIT_CODE_FILE=$(mktemp) echo "=== TEST INVOCATION: $@ ===" >> "$COMBINED_LOG" { "$REAL_BINARY" "$@" 2>&1; echo $? > "$EXIT_CODE_FILE"; } | tee -a "$COMBINED_LOG" exit_code=$(cat "$EXIT_CODE_FILE") +rm -f "$EXIT_CODE_FILE" echo "=== EXIT CODE: ${exit_code} ===" >> "$COMBINED_LOG" echo "" >> "$COMBINED_LOG" exit $exit_code @@ -153,37 +154,33 @@ WRAPPER_EOF executeCommand cd ${suiteDstDir} executeCommand ${tester} --debug --entries "${testFile}" --executable "${wrapper_path}" --tempfolder "${tempFolder}" --timeout 600 --reportfolder "${reportFolder}" --resultfolder "${resultFolder}" --samplefolder "${sampleFolder}" --method Server --url "${reportURL}" - # Upload AI artifacts to GCS - gcs_bucket=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/bucket" -H "Metadata-Flavor: Google") - test_id=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/testID" -H "Metadata-Flavor: Google") - token=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" -H "Metadata-Flavor: Google" | python3 -c "import sys,json; print(json.load(sys.stdin)['access_token'])") - + # Upload artifacts through the Sample Platform server upload_artifact() { local file_path="$1" - local dest_path="$2" + local artifact_name="$2" if [ -f "$file_path" ]; then local http_code - http_code=$(curl -s -X POST --data-binary @"$file_path" \ - -H "Authorization: Bearer $token" \ - -H "Content-Type: application/octet-stream" \ - -w "%{http_code}" \ - -o /dev/null \ - "https://storage.googleapis.com/upload/storage/v1/b/${gcs_bucket}/o?uploadType=media&name=${dest_path}") - if [ -z "$http_code" ] || [ "$http_code" -ne 200 ]; then - echo "GCS upload failed for ${dest_path}: HTTP ${http_code:-no_response}" >> "${logFile}" + http_code=$(curl -s -A "${userAgent}" \ + --form "type=artifact" \ + --form "name=${artifact_name}" \ + --form "file=@${file_path}" \ + -w "%{http_code}" -o /dev/null \ + "${reportURL}" 2>/dev/null) + if [ -z "$http_code" ] || [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then + echo "Artifact upload failed for ${artifact_name}: HTTP ${http_code:-no_response}" >> "${logFile}" fi fi } - upload_artifact "$ccextractor_path" "test_artifacts/${test_id}/ccextractor" + upload_artifact "$ccextractor_path" "ccextractor" # Upload combined stdout log - upload_artifact "${combined_stdout}" "test_artifacts/${test_id}/combined_stdout.log" + upload_artifact "${combined_stdout}" "combined_stdout.log" # Upload coredumps if any for core_file in /tmp/coredumps/core.*; do if [ -f "$core_file" ]; then - upload_artifact "$core_file" "test_artifacts/${test_id}/coredump" + upload_artifact "$core_file" "coredump" break fi done diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py index 3347d675..64bd5ec6 100755 --- a/mod_ci/controllers.py +++ b/mod_ci/controllers.py @@ -1194,9 +1194,7 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict: startup_script = f.read() metadata_items = [ {'key': 'startup-script', 'value': startup_script}, - {'key': 'reportURL', 'value': reportURL}, - {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')}, - {'key': 'testID', 'value': str(test.id)} + {'key': 'reportURL', 'value': reportURL} ] elif test.platform == TestPlatform.windows: image_response = compute.images().getFromFamily(project=config.get('WINDOWS_INSTANCE_PROJECT_NAME', ''), @@ -1217,9 +1215,7 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict: {'key': 'windows-startup-script-ps1', 'value': startup_script}, {'key': 'service_account', 'value': service_account}, {'key': 'rclone_conf', 'value': rclone_conf}, - {'key': 'reportURL', 'value': reportURL}, - {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')}, - {'key': 'testID', 'value': str(test.id)} + {'key': 'reportURL', 'value': reportURL} ] source_disk_image = image_response['selfLink'] @@ -2346,6 +2342,11 @@ def progress_reporter(test_id, token): if not upload_type_request(log, test_id, repo_folder, test, request): return "EMPTY" + elif request.form['type'] == 'artifact': + log.info(f'[PROGRESS_REPORTER][Test: {test_id}] Artifact upload') + if not artifact_upload_request(log, test_id, request): + return "EMPTY" + elif request.form['type'] == 'finish': log.info(f'[PROGRESS_REPORTER][Test: {test_id}] Test finished') finish_type_request(log, test_id, test, request) @@ -2695,6 +2696,45 @@ def upload_type_request(log, test_id, repo_folder, test, request) -> bool: return False +# Allowed artifact names that the VM can upload +ALLOWED_ARTIFACT_NAMES = {'ccextractor', 'ccextractor.exe', 'combined_stdout.log', 'coredump'} + + +def artifact_upload_request(log, test_id, request) -> bool: + """ + Handle artifact upload from the CI VM. + + Validates the artifact name against an allow-list, then uploads + the file to GCS under test_artifacts/{test_id}/{name}. + + :param log: logger + :type log: Logger + :param test_id: The id of the test to update. + :type test_id: int + :param request: Request parameters + :type request: Request + :return: True if upload succeeded, False otherwise. + :rtype: bool + """ + from run import storage_client_bucket + + artifact_name = request.form.get('name', '') + if artifact_name not in ALLOWED_ARTIFACT_NAMES: + log.warning(f"[Test: {test_id}] Rejected artifact upload with disallowed name: {artifact_name}") + return False + + if 'file' not in request.files: + log.warning(f"[Test: {test_id}] Artifact upload missing file") + return False + + uploaded_file = request.files['file'] + blob_path = f'test_artifacts/{test_id}/{artifact_name}' + blob = storage_client_bucket.blob(blob_path) + blob.upload_from_file(uploaded_file.stream) + log.info(f"[Test: {test_id}] Artifact '{artifact_name}' uploaded to {blob_path}") + return True + + def finish_type_request(log, test_id, test, request): """ Handle finish request type for progress reporter. diff --git a/mod_test/controllers.py b/mod_test/controllers.py index aec42ee8..26aabc23 100644 --- a/mod_test/controllers.py +++ b/mod_test/controllers.py @@ -21,6 +21,9 @@ mod_test = Blueprint('test', __name__) +CCEXTRACTOR_WIN_BINARY = 'ccextractorwinfull.exe' +CCEXTRACTOR_LINUX_BINARY = 'ccextractor' + @mod_test.before_app_request def before_app_request() -> None: @@ -367,6 +370,7 @@ def generate_diff(test_id: int, regression_test_id: int, output_id: int, to_view @mod_test.route('/log-files/') +@login_required def download_build_log_file(test_id): """ Serve download of build log. @@ -464,18 +468,18 @@ def _artifact_redirect(blob_path, filename='artifact'): @mod_test.route('//binary', methods=['GET']) +@login_required def download_binary(test_id): - """Download the ccextractor binary used in a test (linux or windows).""" - from run import storage_client_bucket - # Try linux name first, then windows - for name in ['ccextractor', 'ccextractor.exe']: - blob_path = f'test_artifacts/{test_id}/{name}' - if storage_client_bucket.blob(blob_path).exists(): - return _artifact_redirect(blob_path, filename=name) - abort(404) + """Download the ccextractor binary used in a test.""" + test = Test.query.filter(Test.id == test_id).first() + if test is None: + abort(404) + name = CCEXTRACTOR_LINUX_BINARY if test.platform == TestPlatform.linux else CCEXTRACTOR_WIN_BINARY + return _artifact_redirect(f'test_artifacts/{test_id}/{name}', filename=name) @mod_test.route('//coredump', methods=['GET']) +@login_required def download_coredump(test_id): """Download the coredump from a test, if one was produced.""" return _artifact_redirect( @@ -485,6 +489,7 @@ def download_coredump(test_id): @mod_test.route('//combined-stdout', methods=['GET']) +@login_required def download_combined_stdout(test_id): """Download the combined stdout/stderr log from all test invocations.""" return _artifact_redirect( @@ -494,6 +499,7 @@ def download_combined_stdout(test_id): @mod_test.route('//regression///output-got', methods=['GET']) +@login_required def download_output_got(test_id, regression_test_id, output_id): """Download the actual output file from TestResults using DB hash.""" rf = TestResultFile.query.filter(and_( @@ -511,6 +517,7 @@ def download_output_got(test_id, regression_test_id, output_id): @mod_test.route('//regression///output-expected', methods=['GET']) +@login_required def download_output_expected(test_id, regression_test_id, output_id): """Download the expected output file from TestResults using DB hash.""" rf = TestResultFile.query.filter(and_( @@ -526,8 +533,9 @@ def download_output_expected(test_id, regression_test_id, output_id): filename=f'output_expected_{regression_test_id}_{output_id}{ext}' ) @mod_test.route('//sample/', methods=['GET']) +@login_required def download_sample_ai(test_id, sample_id): - """Download the sample file for a regression test (no auth required for AI workflow).""" + """Download the sample file for a regression test.""" from mod_sample.models import Sample sample = Sample.query.filter(Sample.id == sample_id).first() if sample is None: @@ -538,75 +546,69 @@ def download_sample_ai(test_id, sample_id): ) -def _process_test_case(test_id, category_name, t_data): - """Helper function to process a single test case.""" +def _build_output_entry(test_id, rt, expected_output, result_files): + """Build a single output entry dict for the ai.json response.""" + matched_rf = next( + (rf for rf in result_files + if rf.test_id != -1 and rf.regression_test_output_id == expected_output.id), + None + ) + + got_url = None + diff_url = None + + if matched_rf and matched_rf.got is not None: + got_url = url_for( + '.download_output_got', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ) + diff_url = url_for( + '.generate_diff', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + to_view=0, + _external=True + ) + + return { + 'output_id': expected_output.id, + 'correct_extension': expected_output.correct_extension, + 'expected_url': url_for( + '.download_output_expected', + test_id=test_id, + regression_test_id=rt.id, + output_id=expected_output.id, + _external=True + ), + 'got_url': got_url, + 'diff_url': diff_url, + } + + +def _process_test_case(test, category_name, t_data): + """Build a structured dict for a single test case in the ai.json response.""" rt = t_data['test'] result = t_data['result'] is_error = t_data.get('error', False) result_files = t_data['files'] - outputs = [] - for expected_output in rt.output_files: - if expected_output.ignore: - continue - - matched_rf = None - for rf in result_files: - if rf.test_id != -1 and rf.regression_test_output_id == expected_output.id: - matched_rf = rf - break - - got_url = None - diff_url = None - - if matched_rf and matched_rf.got is not None: - got_url = url_for( - '.download_output_got', - test_id=test_id, - regression_test_id=rt.id, - output_id=expected_output.id, - _external=True - ) - diff_url = url_for( - '.generate_diff', - test_id=test_id, - regression_test_id=rt.id, - output_id=expected_output.id, - to_view=0, - _external=True - ) - else: - # If test passed, got and expected match exactly. - got_url = url_for( - '.download_output_expected', - test_id=test_id, - regression_test_id=rt.id, - output_id=expected_output.id, - _external=True - ) - - output_entry = { - 'output_id': expected_output.id, - 'correct_extension': expected_output.correct_extension, - 'expected_url': url_for( - '.download_output_expected', - test_id=test_id, - regression_test_id=rt.id, - output_id=expected_output.id, - _external=True - ), - 'got_url': got_url, - 'diff_url': diff_url, - } - outputs.append(output_entry) + outputs = [ + _build_output_entry(test.id, rt, expected_output, result_files) + for expected_output in rt.output_files + if not expected_output.ignore + ] - return { + test_case = { 'regression_test_id': rt.id, 'category': category_name, 'sample_filename': rt.sample.original_name, 'sample_url': url_for( '.download_sample_ai', - test_id=test_id, + test_id=test.id, sample_id=rt.sample.id, _external=True ), @@ -616,11 +618,17 @@ def _process_test_case(test_id, category_name, t_data): 'expected_exit_code': result.expected_rc if result else None, 'runtime_ms': result.runtime if result else None, 'outputs': outputs, - 'how_to_reproduce': f'./ccextractor {rt.command} {rt.sample.original_name}', } + # Format the reproduction command based on platform + binary_name = f'./{CCEXTRACTOR_LINUX_BINARY}' if test.platform == TestPlatform.linux else CCEXTRACTOR_WIN_BINARY + test_case['how_to_reproduce'] = f'{binary_name} {rt.command} {rt.sample.original_name}' + + return test_case + @mod_test.route('//ai.json', methods=['GET']) +@login_required def ai_json_endpoint(test_id): """Structured JSON with download URLs for all artifacts — for AI agents.""" from run import storage_client_bucket @@ -632,10 +640,8 @@ def ai_json_endpoint(test_id): def blob_exists(path): return storage_client_bucket.blob(path).exists() - has_binary = ( - blob_exists(f'test_artifacts/{test_id}/ccextractor') or - blob_exists(f'test_artifacts/{test_id}/ccextractor.exe') - ) + binary_name = CCEXTRACTOR_LINUX_BINARY if test.platform == TestPlatform.linux else CCEXTRACTOR_WIN_BINARY + has_binary = blob_exists(f'test_artifacts/{test_id}/{binary_name}') has_coredump = blob_exists(f'test_artifacts/{test_id}/coredump') has_combined_stdout = blob_exists(f'test_artifacts/{test_id}/combined_stdout.log') @@ -653,7 +659,7 @@ def blob_exists(path): else: passed += 1 - test_cases.append(_process_test_case(test_id, category['category'].name, t_data)) + test_cases.append(_process_test_case(test, category['category'].name, t_data)) report = { 'test_id': test.id, @@ -681,8 +687,8 @@ def blob_exists(path): 'test_cases': test_cases, 'how_to_reproduce': ( 'Download the binary and sample, then run: ' - + ('./ccextractor {arguments} {sample_filename}' if test.platform.value == 'linux' - else 'ccextractorwinfull.exe {arguments} {sample_filename}') + + (f'./{CCEXTRACTOR_LINUX_BINARY} {{arguments}} {{sample_filename}}' if test.platform.value == 'linux' + else f'{CCEXTRACTOR_WIN_BINARY} {{arguments}} {{sample_filename}}') ), } From 91a42dfd647969bded28d148c9c26fef0345f7e5 Mon Sep 17 00:00:00 2001 From: Pulkit Chauhan Date: Tue, 2 Jun 2026 20:20:58 +0530 Subject: [PATCH 5/5] adding consistencies for binary name --- mod_ci/controllers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py index 64bd5ec6..ae2aceff 100755 --- a/mod_ci/controllers.py +++ b/mod_ci/controllers.py @@ -2697,7 +2697,8 @@ def upload_type_request(log, test_id, repo_folder, test, request) -> bool: # Allowed artifact names that the VM can upload -ALLOWED_ARTIFACT_NAMES = {'ccextractor', 'ccextractor.exe', 'combined_stdout.log', 'coredump'} +from mod_test.controllers import CCEXTRACTOR_LINUX_BINARY, CCEXTRACTOR_WIN_BINARY +ALLOWED_ARTIFACT_NAMES = {CCEXTRACTOR_LINUX_BINARY, CCEXTRACTOR_WIN_BINARY, 'combined_stdout.log', 'coredump'} def artifact_upload_request(log, test_id, request) -> bool: