diff --git a/mod_api/__init__.py b/mod_api/__init__.py new file mode 100644 index 00000000..c2379317 --- /dev/null +++ b/mod_api/__init__.py @@ -0,0 +1,23 @@ +""" +mod_api — JSON-only REST API for the CCExtractor CI/sample platform. + +Blueprint registered at /api/v1. All endpoints return structured JSON, +use scoped Bearer token auth, and enforce rate limiting. +""" + +from flask import Blueprint + +mod_api = Blueprint('api', __name__) + +# Import middleware (registers before_request, error handlers) +from mod_api.middleware import error_handler # noqa: E402, F401 +from mod_api.middleware import auth # noqa: E402, F401 +from mod_api.middleware import rate_limit # noqa: E402, F401 + +# Import routes (registers endpoint functions) +from mod_api.routes import auth as auth_routes # noqa: E402, F401 +from mod_api.routes import runs # noqa: E402, F401 +from mod_api.routes import samples # noqa: E402, F401 +from mod_api.routes import results # noqa: E402, F401 +from mod_api.routes import errors_logs # noqa: E402, F401 +from mod_api.routes import system # noqa: E402, F401 diff --git a/mod_api/middleware/__init__.py b/mod_api/middleware/__init__.py new file mode 100644 index 00000000..860b3ce0 --- /dev/null +++ b/mod_api/middleware/__init__.py @@ -0,0 +1 @@ +"""mod_api.middleware: auth, rate limiting, validation, and error handling.""" diff --git a/mod_api/middleware/auth.py b/mod_api/middleware/auth.py new file mode 100644 index 00000000..ffb51624 --- /dev/null +++ b/mod_api/middleware/auth.py @@ -0,0 +1,147 @@ +""" +Bearer token authentication and scope/role enforcement for API routes. + +Runs as a before_request hook on the api blueprint. Public endpoints +(token creation, health check) are exempted. On success, the authenticated +user and token are stored in flask.g for downstream handlers. + +HTTP semantics: + 401 = token missing, expired, revoked, or invalid + 403 = valid token but insufficient scope or role +""" + +import functools +from typing import List + +from flask import g, request + +from mod_api import mod_api +from mod_api.middleware.error_handler import make_error_response +from mod_api.models.api_token import ApiToken + +# These endpoints bypass auth entirely. +_PUBLIC_ENDPOINTS = frozenset([ + 'api.create_token', # POST /auth/tokens (uses email/password body) + 'api.system_health', # GET /system/health (uptime monitoring) +]) + + +@mod_api.before_request +def authenticate_request(): + """Validate Bearer token and attach user context to the request.""" + if request.endpoint in _PUBLIC_ENDPOINTS: + g.api_user = None + g.api_token = None + return + + auth_header = request.headers.get('Authorization', '') + if not auth_header: + return make_error_response( + 'unauthorized', + 'Bearer token is missing, expired, or invalid.', + http_status=401, + ) + + parts = auth_header.split(' ', 1) + if len(parts) != 2 or parts[0] != 'Bearer': + return make_error_response( + 'unauthorized', + 'Bearer token is missing, expired, or invalid.', + http_status=401, + ) + + token_value = parts[1].strip() + if not token_value or not token_value.startswith('spci_'): + return make_error_response( + 'unauthorized', + 'Bearer token is missing, expired, or invalid.', + http_status=401, + ) + + # Look up by prefix, then verify the full hash against each candidate. + prefix = ApiToken.extract_prefix(token_value) + candidates = ApiToken.query.filter_by(token_prefix=prefix).all() + + if not candidates: + return make_error_response( + 'unauthorized', + 'Bearer token is missing, expired, or invalid.', + http_status=401, + ) + + matched_token = None + for candidate in candidates: + if ApiToken.verify_token(token_value, candidate.token_hash): + matched_token = candidate + break + + if matched_token is None: + return make_error_response( + 'unauthorized', + 'Bearer token is missing, expired, or invalid.', + http_status=401, + ) + + if not matched_token.is_valid: + return make_error_response( + 'unauthorized', + 'Bearer token is missing, expired, or invalid.', + http_status=401, + ) + + g.api_token = matched_token + g.api_user = matched_token.user + + +def require_scope(scope: str): + """Decorator: reject the request if the token lacks ``scope``.""" + def decorator(f): + @functools.wraps(f) + def decorated_function(*args, **kwargs): + token = getattr(g, 'api_token', None) + if token is None: + return make_error_response( + 'unauthorized', + 'Bearer token is missing, expired, or invalid.', + http_status=401, + ) + if not token.has_scope(scope): + return make_error_response( + 'forbidden', + 'Token does not have the required scope for this operation.', + details={ + 'required_scope': scope, + 'token_scopes': token.scopes, + }, + http_status=403, + ) + return f(*args, **kwargs) + return decorated_function + return decorator + + +def require_roles(roles: List[str]): + """Decorator: reject the request if the user's role is not in ``roles``.""" + def decorator(f): + @functools.wraps(f) + def decorated_function(*args, **kwargs): + user = getattr(g, 'api_user', None) + if user is None: + return make_error_response( + 'unauthorized', + 'Bearer token is missing, expired, or invalid.', + http_status=401, + ) + if user.role.value not in roles: + return make_error_response( + 'forbidden', + 'Your role does not have permission for this operation.', + details={ + 'required_roles': roles, + 'user_role': user.role.value, + }, + http_status=403, + ) + return f(*args, **kwargs) + return decorated_function + return decorator diff --git a/mod_api/middleware/error_handler.py b/mod_api/middleware/error_handler.py new file mode 100644 index 00000000..fb64cfb4 --- /dev/null +++ b/mod_api/middleware/error_handler.py @@ -0,0 +1,149 @@ +""" +Structured JSON error responses for API routes. + +Intercepts standard HTTP errors (400, 401, 403, 404, 405, 422, 429, 500), +Marshmallow validation errors, and SQLAlchemy errors so that nothing under +/api/v1/* ever returns an HTML error page. + +Response shape: {"code": "...", "message": "...", "details": {...}} +""" + +from flask import jsonify, request +from marshmallow import ValidationError as MarshmallowValidationError +from sqlalchemy.exc import SQLAlchemyError + +from mod_api import mod_api + + +def make_error_response(code, message, details=None, http_status=400): + """Build a JSON error response conforming to the ErrorResponse schema.""" + body = { + 'code': code, + 'message': str(message)[:500], + 'details': details if details is not None else {}, + } + response = jsonify(body) + response.status_code = http_status + return response + + +@mod_api.app_errorhandler(400) +def handle_400(error): + """Bad request.""" + if not request.path.startswith('/api/v1'): + raise error + return make_error_response( + 'validation_error', + getattr(error, 'description', 'Bad request.'), + http_status=400, + ) + + +@mod_api.app_errorhandler(401) +def handle_401(error): + """Unauthorized.""" + if not request.path.startswith('/api/v1'): + raise error + return make_error_response( + 'unauthorized', + 'Bearer token is missing, expired, or invalid.', + http_status=401, + ) + + +@mod_api.app_errorhandler(403) +def handle_403(error): + """Forbidden.""" + if not request.path.startswith('/api/v1'): + raise error + return make_error_response( + 'forbidden', + 'Token does not have the required scope for this operation.', + http_status=403, + ) + + +@mod_api.app_errorhandler(404) +def handle_404(error): + """Not found.""" + if not request.path.startswith('/api/v1'): + raise error + return make_error_response( + 'not_found', + getattr(error, 'description', 'Resource not found.'), + http_status=404, + ) + + +@mod_api.app_errorhandler(405) +def handle_405(error): + """Method not allowed.""" + if not request.path.startswith('/api/v1'): + raise error + return make_error_response( + 'method_not_allowed', + 'Method not allowed.', + http_status=405, + ) + + +@mod_api.app_errorhandler(422) +def handle_422(error): + """Unprocessable entity.""" + if not request.path.startswith('/api/v1'): + raise error + return make_error_response( + 'unprocessable', + getattr(error, 'description', 'Request is valid JSON but semantically invalid.'), + http_status=422, + ) + + +@mod_api.app_errorhandler(429) +def handle_429(error): + """Rate limited.""" + if not request.path.startswith('/api/v1'): + raise error + return make_error_response( + 'rate_limited', + 'Rate limit exceeded.', + details={'retry_after': 30, 'limit': 120, 'window': '60s'}, + http_status=429, + ) + + +@mod_api.app_errorhandler(500) +def handle_500(error): + """Internal server error.""" + if not request.path.startswith('/api/v1'): + raise error + return make_error_response( + 'internal_error', + 'An unexpected error occurred.', + http_status=500, + ) + + +@mod_api.errorhandler(MarshmallowValidationError) +def handle_marshmallow_validation_error(error): + """Catch schema validation failures and return them as 400.""" + return make_error_response( + 'validation_error', + 'Request failed schema validation.', + details={'fields': error.messages}, + http_status=400, + ) + + +@mod_api.errorhandler(SQLAlchemyError) +def handle_sqlalchemy_error(error): + """Log the real error, but never expose raw SQL details to the client.""" + from flask import g + log = getattr(g, 'log', None) + if log: + log.error(f'Database error in API: {error}') + return make_error_response( + 'internal_error', + 'An unexpected database error occurred.', + http_status=500, + ) diff --git a/mod_api/middleware/rate_limit.py b/mod_api/middleware/rate_limit.py new file mode 100644 index 00000000..1f73da7b --- /dev/null +++ b/mod_api/middleware/rate_limit.py @@ -0,0 +1,116 @@ +""" +Per-client rate limiting for API endpoints. + +Limits: + POST /auth/tokens 5 req / 15 min (keyed by IP) + POST/DELETE/PUT/PATCH 20 req / min (keyed by token) + GET 120 req / min (keyed by token) + +Includes X-RateLimit-* headers on every response. + +Uses an in-memory dict for simplicity. For multi-process deployments, +swap this out for a Redis backend. +""" + +import time + +from flask import g, request + +from mod_api import mod_api + +_rate_limit_store = {} # key -> {'count': int, 'window_start': float} +_eviction_counter = 0 +_EVICTION_INTERVAL = 100 # run cleanup every N requests + + +def _evict_stale_entries(): + """Prune entries older than 15 min to bound memory usage.""" + global _eviction_counter + _eviction_counter += 1 + if _eviction_counter < _EVICTION_INTERVAL: + return + _eviction_counter = 0 + now = time.time() + stale_keys = [ + key for key, entry in _rate_limit_store.items() + if (now - entry['window_start']) > 900 + ] + for key in stale_keys: + del _rate_limit_store[key] + + +def _get_rate_limit_key(): + """Build the rate-limit bucket key for this request.""" + if request.endpoint == 'api.create_token': + return f'ip:{request.remote_addr}' + token = getattr(g, 'api_token', None) + if token: + return f'token:{token.id}' + return f'ip:{request.remote_addr}' + + +def _get_limits(): + """Return (max_requests, window_seconds) for the current endpoint.""" + if request.endpoint == 'api.create_token': + return 5, 900 + if request.method in ('POST', 'DELETE', 'PUT', 'PATCH'): + return 20, 60 + return 120, 60 + + +@mod_api.before_request +def check_rate_limit(): + """Reject the request if the client has exceeded their rate limit.""" + _evict_stale_entries() + + key = _get_rate_limit_key() + max_requests, window_seconds = _get_limits() + now = time.time() + + entry = _rate_limit_store.get(key) + + if entry is None or (now - entry['window_start']) >= window_seconds: + _rate_limit_store[key] = {'count': 1, 'window_start': now} + else: + entry['count'] += 1 + if entry['count'] > max_requests: + reset_at = int(entry['window_start'] + window_seconds) + retry_after = max(1, reset_at - int(now)) + from mod_api.middleware.error_handler import make_error_response + response = make_error_response( + 'rate_limited', + f'Rate limit exceeded. Retry after {retry_after} seconds.', + details={ + 'retry_after': retry_after, + 'limit': max_requests, + 'window': f'{window_seconds}s', + }, + http_status=429, + ) + response.headers['Retry-After'] = str(retry_after) + response.headers['X-RateLimit-Limit'] = str(max_requests) + response.headers['X-RateLimit-Remaining'] = '0' + response.headers['X-RateLimit-Reset'] = str(reset_at) + return response + + +@mod_api.after_request +def add_rate_limit_headers(response): + """Attach X-RateLimit-* headers to every response.""" + key = _get_rate_limit_key() + max_requests, window_seconds = _get_limits() + now = time.time() + + entry = _rate_limit_store.get(key) + if entry: + remaining = max(0, max_requests - entry['count']) + reset_at = int(entry['window_start'] + window_seconds) + else: + remaining = max_requests + reset_at = int(now + window_seconds) + + response.headers['X-RateLimit-Limit'] = str(max_requests) + response.headers['X-RateLimit-Remaining'] = str(remaining) + response.headers['X-RateLimit-Reset'] = str(reset_at) + + return response diff --git a/mod_api/middleware/validation.py b/mod_api/middleware/validation.py new file mode 100644 index 00000000..7bd27e2a --- /dev/null +++ b/mod_api/middleware/validation.py @@ -0,0 +1,204 @@ +""" +Request validation decorators for bodies, query params, and path IDs. + +All of these return 400 with field-level details on failure, so route +handlers can assume clean input. +""" + +import re +from functools import wraps + +from flask import request +from marshmallow import ValidationError as MarshmallowValidationError + +from mod_api.middleware.error_handler import make_error_response + +PATTERNS = { + 'commit_sha': re.compile(r'^[a-fA-F0-9]{40}$'), + 'sha256': re.compile(r'^[a-fA-F0-9]{64}$'), + 'repository': re.compile(r'^[a-zA-Z0-9_.\-]+/[a-zA-Z0-9_.\-]+$'), + 'branch': re.compile(r'^[A-Za-z0-9._/\-]+$'), + 'token_name': re.compile(r'^[a-zA-Z0-9_\-]+$'), + 'extension': re.compile(r'^[a-zA-Z0-9]+$'), +} + +# Whitelist of allowed sort params. Never pass raw user input to the ORM. +ALLOWED_RUN_SORTS = frozenset([ + 'created_at', '-created_at', + 'started_at', '-started_at', + 'run_id', '-run_id', +]) + + +def validate_body(schema_class): + """Validate the JSON body with a Marshmallow schema, pass result as ``validated_data``.""" + def decorator(f): + @wraps(f) + def decorated(*args, **kwargs): + json_data = request.get_json(silent=True) + if json_data is None: + return make_error_response( + 'validation_error', + 'Request body must be valid JSON.', + http_status=400, + ) + schema = schema_class() + try: + validated = schema.load(json_data) + except MarshmallowValidationError as e: + return make_error_response( + 'validation_error', + 'Request failed schema validation.', + details={'fields': e.messages}, + http_status=400, + ) + kwargs['validated_data'] = validated + return f(*args, **kwargs) + return decorated + return decorator + + +def validate_pagination(f): + """Extract and validate ``limit`` (1-100) and ``offset`` (>= 0) query params.""" + @wraps(f) + def decorated(*args, **kwargs): + try: + limit = int(request.args.get('limit', 50)) + except (ValueError, TypeError): + return make_error_response( + 'validation_error', + 'limit must be an integer.', + details={'fields': {'limit': 'Must be an integer between 1 and 100.'}}, + http_status=400, + ) + + try: + offset = int(request.args.get('offset', 0)) + except (ValueError, TypeError): + return make_error_response( + 'validation_error', + 'offset must be a non-negative integer.', + details={'fields': {'offset': 'Must be a non-negative integer.'}}, + http_status=400, + ) + + if limit < 1 or limit > 100: + return make_error_response( + 'validation_error', + 'limit must be between 1 and 100.', + details={'fields': {'limit': 'Must be between 1 and 100.'}}, + http_status=400, + ) + + if offset < 0: + return make_error_response( + 'validation_error', + 'offset must be non-negative.', + details={'fields': {'offset': 'Must be >= 0.'}}, + http_status=400, + ) + + kwargs['limit'] = limit + kwargs['offset'] = offset + return f(*args, **kwargs) + return decorated + + +def validate_path_id(param_name): + """Ensure a URL path parameter is a positive integer.""" + def decorator(f): + @wraps(f) + def decorated(*args, **kwargs): + value = kwargs.get(param_name) + try: + int_value = int(value) + except (ValueError, TypeError): + return make_error_response( + 'validation_error', + f'{param_name} must be a positive integer.', + details={'fields': {param_name: 'Must be a positive integer.'}}, + http_status=400, + ) + if int_value < 1: + return make_error_response( + 'validation_error', + f'{param_name} must be >= 1.', + details={'fields': {param_name: 'Must be >= 1. Zero and negative IDs are rejected.'}}, + http_status=400, + ) + kwargs[param_name] = int_value + return f(*args, **kwargs) + return decorated + return decorator + + +def validate_date_range(f): + """Parse ``created_after``/``created_before`` query params and reject inverted ranges.""" + @wraps(f) + def decorated(*args, **kwargs): + from datetime import datetime + + created_after_str = request.args.get('created_after') + created_before_str = request.args.get('created_before') + created_after = None + created_before = None + + if created_after_str: + try: + created_after = datetime.fromisoformat(created_after_str.replace('Z', '+00:00')) + except ValueError: + return make_error_response( + 'validation_error', + 'created_after must be a valid ISO 8601 datetime.', + details={'fields': {'created_after': 'Invalid ISO 8601 format.'}}, + http_status=400, + ) + + if created_before_str: + try: + created_before = datetime.fromisoformat(created_before_str.replace('Z', '+00:00')) + except ValueError: + return make_error_response( + 'validation_error', + 'created_before must be a valid ISO 8601 datetime.', + details={'fields': {'created_before': 'Invalid ISO 8601 format.'}}, + http_status=400, + ) + + if created_after and created_before and created_after > created_before: + return make_error_response( + 'validation_error', + 'created_after must not be after created_before.', + details={'fields': { + 'created_after': 'Must be before created_before.', + 'created_before': 'Must be after created_after.', + }}, + http_status=400, + ) + + kwargs['created_after'] = created_after + kwargs['created_before'] = created_before + return f(*args, **kwargs) + return decorated + + +def validate_sort(allowed=None): + """Validate the ``sort`` query param against a whitelist.""" + if allowed is None: + allowed = ALLOWED_RUN_SORTS + + def decorator(f): + @wraps(f) + def decorated(*args, **kwargs): + sort = request.args.get('sort', '-created_at') + if sort not in allowed: + return make_error_response( + 'validation_error', + f'sort must be one of: {", ".join(sorted(allowed))}', + details={'fields': {'sort': f'Must be one of: {sorted(allowed)}'}}, + http_status=400, + ) + kwargs['sort'] = sort + return f(*args, **kwargs) + return decorated + return decorator diff --git a/mod_api/models/__init__.py b/mod_api/models/__init__.py new file mode 100644 index 00000000..dcb36537 --- /dev/null +++ b/mod_api/models/__init__.py @@ -0,0 +1 @@ +"""mod_api.models: database models for the API module.""" diff --git a/mod_api/models/api_token.py b/mod_api/models/api_token.py new file mode 100644 index 00000000..f9c526ff --- /dev/null +++ b/mod_api/models/api_token.py @@ -0,0 +1,131 @@ +""" +ApiToken model: server-side storage for scoped API tokens. + +Tokens are opaque strings prefixed with 'spci_'. Only the argon2 hash +is persisted; the plaintext is returned exactly once at creation time. +""" + +import json +import secrets +from datetime import datetime, timedelta, timezone +from typing import List + +from argon2 import PasswordHasher +from argon2.exceptions import VerifyMismatchError +from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint +from sqlalchemy.orm import relationship + +from database import Base + +_ph = PasswordHasher() + +VALID_SCOPES = frozenset([ + 'runs:read', + 'runs:write', + 'results:read', + 'baselines:write', + 'system:read', +]) + +DEFAULT_SCOPES = ['runs:read', 'results:read'] + +TOKEN_PREFIX = 'spci_' +TOKEN_BYTE_LENGTH = 32 + + +class ApiToken(Base): + """Scoped API token bound to a user account.""" + + __tablename__ = 'api_token' + __table_args__ = ( + UniqueConstraint('user_id', 'token_name', name='uq_user_token_name'), + {'mysql_engine': 'InnoDB'}, + ) + + id = Column(Integer, primary_key=True) + user_id = Column( + Integer, + ForeignKey('user.id', onupdate='CASCADE', ondelete='CASCADE'), + nullable=False, + ) + user = relationship('User', uselist=False) + token_name = Column(String(50), nullable=False) + token_hash = Column(String(255), nullable=False) + token_prefix = Column(String(16), nullable=False, index=True) + scopes_json = Column(Text(), nullable=False) + created_at = Column(DateTime(timezone=True), nullable=False) + expires_at = Column(DateTime(timezone=True), nullable=False) + revoked_at = Column(DateTime(timezone=True), nullable=True) + + def __init__( + self, + user_id: int, + token_name: str, + token_hash: str, + token_prefix: str, + scopes: List[str], + expires_in_days: int = 30, + ) -> None: + self.user_id = user_id + self.token_name = token_name + self.token_hash = token_hash + self.token_prefix = token_prefix + self.scopes_json = json.dumps(scopes) + self.created_at = datetime.now(timezone.utc) + self.expires_at = self.created_at + timedelta(days=expires_in_days) + + def __repr__(self) -> str: + return f'' + + @property + def scopes(self) -> List[str]: + return json.loads(self.scopes_json) + + @property + def is_expired(self) -> bool: + now = datetime.now(timezone.utc) + expires = self.expires_at + if expires is None: + return True + # MySQL DATETIME columns don't preserve tzinfo; treat naive as UTC. + if expires.tzinfo is None: + expires = expires.replace(tzinfo=timezone.utc) + return now > expires + + @property + def is_revoked(self) -> bool: + return self.revoked_at is not None + + @property + def is_valid(self) -> bool: + return not self.is_expired and not self.is_revoked + + def has_scope(self, scope: str) -> bool: + return scope in self.scopes + + def revoke(self) -> None: + self.revoked_at = datetime.now(timezone.utc) + + @staticmethod + def generate_token() -> str: + """Create a new random token string with the spci_ prefix.""" + random_bytes = secrets.token_urlsafe(TOKEN_BYTE_LENGTH) + return f'{TOKEN_PREFIX}{random_bytes}' + + @staticmethod + def hash_token(plaintext: str) -> str: + """Hash a token with argon2 for storage.""" + return _ph.hash(plaintext) + + @staticmethod + def verify_token(plaintext: str, token_hash: str) -> bool: + """Verify a plaintext token against its stored argon2 hash.""" + try: + return _ph.verify(token_hash, plaintext) + except VerifyMismatchError: + return False + + @staticmethod + def extract_prefix(token: str) -> str: + """Return the first 16 chars used for DB lookup.""" + return token[:16] if len(token) >= 16 else token diff --git a/mod_api/routes/__init__.py b/mod_api/routes/__init__.py new file mode 100644 index 00000000..eac65b96 --- /dev/null +++ b/mod_api/routes/__init__.py @@ -0,0 +1 @@ +"""mod_api.routes — Endpoint handlers for the API.""" diff --git a/mod_api/schemas/__init__.py b/mod_api/schemas/__init__.py new file mode 100644 index 00000000..88996065 --- /dev/null +++ b/mod_api/schemas/__init__.py @@ -0,0 +1 @@ +"""mod_api.schemas: Marshmallow schemas for request/response validation.""" diff --git a/mod_api/schemas/auth.py b/mod_api/schemas/auth.py new file mode 100644 index 00000000..330ea9e6 --- /dev/null +++ b/mod_api/schemas/auth.py @@ -0,0 +1,48 @@ +"""Request/response schemas for token endpoints.""" + +from marshmallow import RAISE, Schema, fields, validate + +from mod_api.models.api_token import VALID_SCOPES + + +class TokenCreateRequestSchema(Schema): + """Schema for POST /auth/tokens request body.""" + + email = fields.Email(required=True) + password = fields.String( + required=True, + validate=validate.Length(min=5, max=128), + ) + token_name = fields.String( + required=True, + validate=[ + + validate.Length(min=1, max=50), + validate.Regexp( + r'^[a-zA-Z0-9_\-]+$', + error='token_name must match ^[a-zA-Z0-9_-]+$', + ), + ], + ) + expires_in_days = fields.Integer( + load_default=30, + validate=validate.Range(min=1, max=90), + ) + scopes = fields.List( + fields.String(validate=validate.OneOf(VALID_SCOPES)), + load_default=None, + validate=validate.Length(max=8), + ) + + class Meta: + unknown = RAISE # Reject unknown fields + + +class AuthTokenSchema(Schema): + """Schema for serializing the created token response.""" + + token = fields.String(required=True) + token_type = fields.String(dump_default='bearer') + token_name = fields.String(required=True) + scopes = fields.List(fields.String(), required=True) + expires_at = fields.DateTime(required=True) diff --git a/mod_api/schemas/common.py b/mod_api/schemas/common.py new file mode 100644 index 00000000..0c21fbaf --- /dev/null +++ b/mod_api/schemas/common.py @@ -0,0 +1,24 @@ +"""Common schemas: ErrorResponse, pagination wrappers.""" + +from marshmallow import Schema, fields + + +class ErrorResponseSchema(Schema): + """Standard error response body.""" + code = fields.String(required=True) + message = fields.String(required=True) + details = fields.Dict(keys=fields.String(), required=True, load_default={}) + + +class PaginationSchema(Schema): + """Offset-based pagination metadata.""" + limit = fields.Integer(required=True) + offset = fields.Integer(required=True) + total = fields.Integer(required=True) + next_offset = fields.Integer(allow_none=True, load_default=None) + + +class CursorPaginationSchema(Schema): + """Cursor-based pagination metadata.""" + limit = fields.Integer(required=True) + next_cursor = fields.String(allow_none=True, load_default=None) diff --git a/mod_api/schemas/errors.py b/mod_api/schemas/errors.py new file mode 100644 index 00000000..febba386 --- /dev/null +++ b/mod_api/schemas/errors.py @@ -0,0 +1,46 @@ +"""Schemas for error items, error summaries, and log lines.""" + +from marshmallow import Schema, fields, validate + + +class ErrorItemSchema(Schema): + """A single error derived from run results.""" + error_id = fields.String(required=True) + run_id = fields.Integer(required=True) + sample_id = fields.Integer(allow_none=True) + regression_id = fields.Integer(allow_none=True) + type = fields.String(required=True) + severity = fields.String( + required=True, + validate=validate.OneOf(['info', 'warning', 'error', 'critical']), + ) + message = fields.String(required=True) + location = fields.Dict(allow_none=True, load_default=None) + stack = fields.List(fields.String(), load_default=None) + occurred_at = fields.DateTime(allow_none=True) + + +class ErrorSummaryBucketSchema(Schema): + """One bucket in a grouped error summary.""" + key = fields.String(required=True) + count = fields.Integer(required=True) + severity = fields.String(required=True) + sample_ids = fields.List(fields.Integer(), load_default=[]) + first_seen_at = fields.DateTime(allow_none=True) + last_seen_at = fields.DateTime(allow_none=True) + + +class LogLineSchema(Schema): + """A single parsed line from a build log.""" + timestamp = fields.DateTime(allow_none=True) + level = fields.String( + required=True, + validate=validate.OneOf(['debug', 'info', 'warning', 'error', 'critical']), + ) + source = fields.String( + required=True, + validate=validate.OneOf(['orchestrator', 'worker', 'build', 'test_runner', 'web']), + ) + message = fields.String(required=True) + run_id = fields.Integer(required=True) + sample_id = fields.Integer(allow_none=True) diff --git a/mod_api/schemas/results.py b/mod_api/schemas/results.py new file mode 100644 index 00000000..3a95a925 --- /dev/null +++ b/mod_api/schemas/results.py @@ -0,0 +1,65 @@ +"""Schemas for expected/actual output, diffs, and baseline approvals.""" + +from marshmallow import RAISE, Schema, fields, validate + + +class OutputFileContentSchema(Schema): + """File content blob (expected or actual output).""" + filename = fields.String(required=True) + content = fields.String(required=True) + encoding = fields.String(required=True, validate=validate.OneOf(['utf-8', 'base64'])) + sha256 = fields.String(allow_none=True) + storage_status = fields.String( + required=True, + validate=validate.OneOf(['ok', 'degraded', 'missing']), + ) + + +class DiffHunkLineSchema(Schema): + """One line inside a diff hunk.""" + type = fields.String(required=True, validate=validate.OneOf(['add', 'delete', 'context'])) + content = fields.String(required=True) + + +class DiffHunkSchema(Schema): + """A contiguous block of changes in a diff.""" + header = fields.String(required=True) + lines = fields.List(fields.Nested(DiffHunkLineSchema), required=True) + + +class DiffSchema(Schema): + """Structured diff between expected and actual output.""" + status = fields.String(required=True, validate=validate.OneOf([ + 'identical', 'different', 'missing_actual', 'missing_expected', + ])) + expected_sha256 = fields.String(allow_none=True) + actual_sha256 = fields.String(allow_none=True) + stats = fields.Dict(required=True) + hunks = fields.List(fields.Nested(DiffHunkSchema), required=True) + + +class BaselineApprovalRequestSchema(Schema): + """POST /runs/{id}/samples/{sid}/baseline-approval body.""" + reason = fields.String( + required=True, + validate=validate.Length(min=10, max=1000), + ) + output_id = fields.Integer( + load_default=None, + validate=validate.Range(min=1), + ) + apply_to_variants = fields.Boolean(load_default=False) + + class Meta: + unknown = RAISE + + +class BaselineApprovalSchema(Schema): + """Response after submitting a baseline approval request.""" + approval_id = fields.String(required=True) + status = fields.String(required=True, validate=validate.OneOf([ + 'pending_review', 'approved', 'rejected', + ])) + requested_by = fields.String(required=True) + reason = fields.String(required=True) + created_at = fields.DateTime(required=True) diff --git a/mod_api/schemas/runs.py b/mod_api/schemas/runs.py new file mode 100644 index 00000000..6f773b91 --- /dev/null +++ b/mod_api/schemas/runs.py @@ -0,0 +1,102 @@ +"""Schemas for runs, run summaries, progress events, and run actions.""" + +from marshmallow import RAISE, Schema, fields, validate + + +class ProgressEventSchema(Schema): + """A single progress event in a run's timeline.""" + timestamp = fields.DateTime(required=True) + status = fields.String(required=True) + message = fields.String(required=True) + + +class RunSchema(Schema): + """Full run representation.""" + run_id = fields.Integer(required=True) + status = fields.String(required=True, validate=validate.OneOf([ + 'queued', 'running', 'pass', 'fail', 'canceled', 'error', 'incomplete', + ])) + platform = fields.String(required=True, validate=validate.OneOf(['linux', 'windows'])) + test_type = fields.String(required=True, validate=validate.OneOf(['commit', 'pr'])) + repository = fields.String(required=True) + branch = fields.String(required=True) + commit_sha = fields.String(required=True) + pr_number = fields.Integer(allow_none=True, load_default=None) + created_at = fields.DateTime(allow_none=True) + queued_at = fields.DateTime(allow_none=True) + started_at = fields.DateTime(allow_none=True) + completed_at = fields.DateTime(allow_none=True) + github_link = fields.String(allow_none=True) + + +class RunSummarySchema(Schema): + """Aggregated pass/fail counts for a run.""" + run_id = fields.Integer(required=True) + status = fields.String(required=True) + total_samples = fields.Integer(required=True) + pass_count = fields.Integer(required=True) + fail_count = fields.Integer(required=True) + skip_count = fields.Integer(required=True) + missing_output_count = fields.Integer(required=True) + runtime_ms = fields.Integer(allow_none=True) + + +class RunConfigSchema(Schema): + """The configuration used to launch a run.""" + run_id = fields.Integer(required=True) + platform = fields.String(required=True) + branch = fields.String(required=True) + commit_sha = fields.String(required=True) + regression_test_ids = fields.List(fields.Integer(), required=True) + + +class RunCreateRequestSchema(Schema): + """POST /runs request body.""" + commit_sha = fields.String( + required=True, + validate=validate.Regexp( + r'^[a-fA-F0-9]{40}$', + error='commit_sha must be a 40-character hex string.', + ), + ) + platform = fields.String( + required=True, + validate=validate.OneOf(['linux', 'windows']), + ) + branch = fields.String( + load_default='master', + validate=[ + validate.Length(max=100), + validate.Regexp( + r'^[A-Za-z0-9._/\-]+$', + error='branch must match ^[A-Za-z0-9._/-]+$', + ), + ], + ) + repository = fields.String( + load_default=None, + validate=[ + validate.Length(max=100), + validate.Regexp( + r'^[a-zA-Z0-9_.\-]+/[a-zA-Z0-9_.\-]+$', + error='repository must match owner/repo format.', + ), + ], + ) + regression_test_ids = fields.List( + fields.Integer(validate=validate.Range(min=1)), + load_default=None, + validate=validate.Length(max=500), + ) + + class Meta: + unknown = RAISE + + +class RunActionResultSchema(Schema): + """Response for cancel/retry actions.""" + run_id = fields.Integer(required=True) + new_run_id = fields.Integer(allow_none=True) + action = fields.String(required=True) + status = fields.String(required=True) + message = fields.String(required=True) diff --git a/mod_api/schemas/samples.py b/mod_api/schemas/samples.py new file mode 100644 index 00000000..19413502 --- /dev/null +++ b/mod_api/schemas/samples.py @@ -0,0 +1,65 @@ +"""Schemas for samples, run sample results, history entries, and regression tests.""" + +from marshmallow import Schema, fields, validate + + +class OutputFileSchema(Schema): + """One output file entry within a run sample result.""" + output_id = fields.Integer(required=True) + filename = fields.String(required=True) + status = fields.String(required=True, validate=validate.OneOf([ + 'match', 'diff_mismatch', 'missing_output', 'missing_expected', + ])) + + +class RunSampleSchema(Schema): + """A sample's result within a specific run.""" + regression_test_id = fields.Integer(required=True) + sample_id = fields.Integer(allow_none=True) + sample_name = fields.String(allow_none=True) + status = fields.String(required=True, validate=validate.OneOf([ + 'pass', 'fail', 'skipped', 'missing_output', 'running', 'not_started', + ])) + exit_code = fields.Integer(allow_none=True) + expected_rc = fields.Integer(allow_none=True) + runtime_ms = fields.Integer(allow_none=True) + command = fields.String(allow_none=True) + category = fields.String(allow_none=True) + outputs = fields.List(fields.Nested(OutputFileSchema), load_default=[]) + + +class SampleSchema(Schema): + """A media sample from the sample catalog.""" + sample_id = fields.Integer(required=True) + sha = fields.String(required=True) + extension = fields.String(required=True) + original_name = fields.String(required=True) + filename = fields.String(required=True) + tags = fields.List(fields.String(), load_default=[]) + regression_test_count = fields.Integer(load_default=0) + active = fields.Boolean(load_default=True) + + +class SampleHistoryEntrySchema(Schema): + """One row in a sample's cross-run history.""" + run_id = fields.Integer(required=True) + status = fields.String(required=True) + platform = fields.String(required=True) + branch = fields.String(required=True) + commit_sha = fields.String(required=True) + tested_at = fields.DateTime(allow_none=True) + failure_signature = fields.String(allow_none=True) + + +class RegressionTestSchema(Schema): + """A regression test definition.""" + regression_test_id = fields.Integer(required=True) + sample_id = fields.Integer(allow_none=True) + sample_name = fields.String(allow_none=True) + command = fields.String(required=True) + input_type = fields.String(required=True) + output_type = fields.String(required=True) + expected_rc = fields.Integer(required=True) + active = fields.Boolean(required=True) + categories = fields.List(fields.String(), load_default=[]) + description = fields.String(allow_none=True) diff --git a/mod_api/schemas/system.py b/mod_api/schemas/system.py new file mode 100644 index 00000000..3b0802cf --- /dev/null +++ b/mod_api/schemas/system.py @@ -0,0 +1,68 @@ +"""System schemas for health, queue, branches, environments, and artifacts.""" + +from marshmallow import Schema, fields, validate + + +class DependencyHealthSchema(Schema): + """Schema for a single system dependency status.""" + name = fields.String(required=True) + status = fields.String(required=True, validate=validate.OneOf(['ok', 'degraded', 'down'])) + message = fields.String(allow_none=True) + + +class SystemHealthSchema(Schema): + """Schema for the overall system health response.""" + status = fields.String( + required=True, + validate=validate.OneOf(['ok', 'degraded', 'down']), + ) + checked_at = fields.DateTime(required=True) + dependencies = fields.List(fields.Nested(DependencyHealthSchema), required=True) + + +class QueueJobSchema(Schema): + """Schema for a single job in the queue.""" + run_id = fields.Integer(required=True) + status = fields.String(required=True, validate=validate.OneOf(['queued', 'running'])) + platform = fields.String(required=True, validate=validate.OneOf(['linux', 'windows'])) + queued_at = fields.DateTime(required=True) + started_at = fields.DateTime(allow_none=True) + position = fields.Integer(allow_none=True) + + +class BranchSchema(Schema): + """Schema for a tracked branch.""" + repository = fields.String(required=True) + name = fields.String(required=True) + head_sha = fields.String(allow_none=True) + active = fields.Boolean(required=True) + + +class EnvironmentSchema(Schema): + """Schema for a test environment.""" + environment_id = fields.String(required=True) + platform = fields.String(required=True, validate=validate.OneOf(['linux', 'windows'])) + active = fields.Boolean(required=True) + runner_label = fields.String(allow_none=True) + average_duration_ms = fields.Integer(allow_none=True) + + +class ArtifactSchema(Schema): + """Schema for a run artifact.""" + artifact_id = fields.String(required=True) + run_id = fields.Integer(required=True) + sample_id = fields.Integer(allow_none=True) + type = fields.String( + required=True, + validate=validate.OneOf([ + 'build_log', 'sample_output', 'expected_output', 'diff', 'media_info', 'binary', + ]), + ) + filename = fields.String(required=True) + content_type = fields.String(required=True) + size_bytes = fields.Integer(allow_none=True) + storage_status = fields.String( + required=True, + validate=validate.OneOf(['ok', 'degraded', 'missing']), + ) + download_url = fields.String(allow_none=True) diff --git a/mod_api/services/__init__.py b/mod_api/services/__init__.py new file mode 100644 index 00000000..a1bbdb18 --- /dev/null +++ b/mod_api/services/__init__.py @@ -0,0 +1 @@ +"""mod_api.services — Core business logic for the API.""" diff --git a/mod_api/utils.py b/mod_api/utils.py new file mode 100644 index 00000000..44a49997 --- /dev/null +++ b/mod_api/utils.py @@ -0,0 +1,68 @@ +"""Pagination, serialization, and response formatting helpers.""" + +from flask import jsonify + + +def paginated_response(data, total, limit, offset, schema=None): + """Build an offset-paginated JSON response.""" + if schema: + serialized = schema.dump(data, many=True) + else: + serialized = data + + next_offset = offset + limit if (offset + limit) < total else None + + return jsonify({ + 'data': serialized, + 'pagination': { + 'limit': limit, + 'offset': offset, + 'total': total, + 'next_offset': next_offset, + }, + }) + + +def cursor_paginated_response(data, next_cursor, limit, schema=None): + """Build a cursor-paginated JSON response.""" + if schema: + serialized = schema.dump(data, many=True) + else: + serialized = data + + return jsonify({ + 'data': serialized, + 'pagination': { + 'limit': limit, + 'next_cursor': next_cursor, + }, + }) + + +def single_response(data, schema=None, http_status=200): + """Build a single-item JSON response.""" + if schema: + serialized = schema.dump(data) + else: + serialized = data + + response = jsonify(serialized) + response.status_code = http_status + return response + + +def get_sort_column(sort_param, model, column_map): + """ + Translate a validated sort string (e.g. '-created_at') into an + SQLAlchemy order_by clause. + """ + descending = sort_param.startswith('-') + field_name = sort_param.lstrip('-') + + column = column_map.get(field_name) + if column is None: + return None + + if descending: + return column.desc() + return column.asc() diff --git a/requirements.txt b/requirements.txt index 4aaae11e..ae684782 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,3 +27,6 @@ PyGithub==2.9.1 blinker==1.9.0 click==8.3.3 PyYAML==6.0.3 +marshmallow>=3.21 +argon2-cffi>=23.0 +Flask-Limiter>=3.5 diff --git a/run.py b/run.py index e277c6d9..4e338d8c 100755 --- a/run.py +++ b/run.py @@ -273,3 +273,7 @@ def teardown(exception: Optional[Exception]): app.register_blueprint(mod_ci) app.register_blueprint(mod_customized, url_prefix='/custom') app.register_blueprint(mod_health) + +# REST API v1 +from mod_api import mod_api +app.register_blueprint(mod_api, url_prefix='/api/v1')