From ed28b8823d27f2d8b1c3b259df57ec58d755f38f Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 17 Jun 2026 11:40:46 +0200 Subject: [PATCH 1/4] :sparkles: add support for V2 CLI --- .github/workflows/_test-cli.yml | 46 +++ .github/workflows/_workflow_lint.yml | 20 ++ .github/workflows/cron.yml | 3 + .github/workflows/pull-request.yml | 7 + CHANGELOG.md | 11 + CLI.md | 69 +++- bin/MindeeCliCommand.php | 12 +- bin/V2/InferenceCommand.php | 318 ++++++++++++++++++ bin/V2/SearchModelsCommand.php | 102 ++++++ bin/V2/V2CliCommandConfig.php | 37 ++ bin/V2/V2CliProducts.php | 91 +++++ bin/cli.php | 138 +++++++- composer.json | 4 +- tests/V2/Cli/MindeeCliCommandV2Test.php | 264 +++++++++++++++ .../Cli/MindeeCliCommandV2TestFunctional.php | 164 +++++++++ tests/V2/Cli/MindeeCliV2TestingUtilities.php | 37 ++ tests/test_v1_cli.sh | 31 ++ tests/test_v2_cli.sh | 43 +++ 18 files changed, 1371 insertions(+), 26 deletions(-) create mode 100644 .github/workflows/_test-cli.yml create mode 100644 .github/workflows/_workflow_lint.yml create mode 100644 bin/V2/InferenceCommand.php create mode 100644 bin/V2/SearchModelsCommand.php create mode 100644 bin/V2/V2CliCommandConfig.php create mode 100644 bin/V2/V2CliProducts.php create mode 100644 tests/V2/Cli/MindeeCliCommandV2Test.php create mode 100644 tests/V2/Cli/MindeeCliCommandV2TestFunctional.php create mode 100644 tests/V2/Cli/MindeeCliV2TestingUtilities.php create mode 100755 tests/test_v1_cli.sh create mode 100755 tests/test_v2_cli.sh diff --git a/.github/workflows/_test-cli.yml b/.github/workflows/_test-cli.yml new file mode 100644 index 00000000..53f42770 --- /dev/null +++ b/.github/workflows/_test-cli.yml @@ -0,0 +1,46 @@ +name: Test Command Line Interface + +on: + workflow_call: + workflow_dispatch: + +env: + MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} + MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} + MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} + +jobs: + test: + name: Run CLI Tests + timeout-minutes: 30 + strategy: + max-parallel: 2 + matrix: + php-version: + - "8.1" + - "8.5" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up PHP ${{ matrix.php-version }} + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-version }} + - uses: ramsey/composer-install@v3 + + - name: Test V2 CLI + shell: sh + run: | + ./tests/test_v2_cli.sh ./tests/resources/file_types/pdf/blank_1.pdf + + - name: Test V1 CLI + shell: sh + run: | + ./tests/test_v1_cli.sh ./tests/resources/file_types/pdf/blank_1.pdf diff --git a/.github/workflows/_workflow_lint.yml b/.github/workflows/_workflow_lint.yml new file mode 100644 index 00000000..8eb21904 --- /dev/null +++ b/.github/workflows/_workflow_lint.yml @@ -0,0 +1,20 @@ +name: Lint workflows + +on: + workflow_call: + +permissions: + contents: read + +jobs: + actionlint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Download actionlint + id: get_actionlint + run: bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash) + shell: bash + - name: Run actionlint + run: ${{ steps.get_actionlint.outputs.executable }} -color + shell: bash diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index a306b37c..b016b7b2 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -8,3 +8,6 @@ jobs: smoke-test: uses: mindee/mindee-api-php/.github/workflows/_test-smoke.yml@main secrets: inherit + test-cli: + uses: mindee/mindee-api-php/.github/workflows/_test-cli.yml@main + secrets: inherit diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 0fdf33b6..1a2c06cb 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -8,8 +8,11 @@ permissions: pull-requests: read jobs: + workflow-lint: + uses: ./.github/workflows/_workflow_lint.yml static-analysis: uses: ./.github/workflows/_static-analysis.yml + needs: workflow-lint static-dependency-checks: uses: ./.github/workflows/_static-dependency-checks.yml needs: static-analysis @@ -25,3 +28,7 @@ jobs: uses: ./.github/workflows/_test-smoke.yml needs: test-units secrets: inherit + test-cli: + uses: ./.github/workflows/_test-cli.yml + needs: test-units + secrets: inherit diff --git a/CHANGELOG.md b/CHANGELOG.md index ecae5af4..2d4dcb8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Mindee PHP API Library Changelog +## Unreleased +### ¡Breaking Changes! +* :boom: :recycle: restructure CLI to align with the canonical .NET shape: + V1 product invocations now live under a `v1` group (e.g. `mindee v1 invoice file.pdf`). + Legacy `mindee ...` invocations remain supported via automatic dispatch. +### Changes +* :sparkles: add a working V2 CLI: `extraction`, `classification`, `crop`, `ocr`, `split` and `search-models` commands +* :sparkles: V2 inference commands accept `--api-key/-k`, `--alias/-a`, `--model-id/-m`, `--output/-o` (`summary` / `full` / `raw`) +* :sparkles: `extraction` additionally exposes `--rag/-g`, `--raw-text/-r`, `--confidence/-c`, `--polygon/-p`, `--text-context/-t` +* :white_check_mark: add unit + functional CLI tests for V2 commands + ## v3.0.0-rc1 - 2026-06-10 ### Changes * :recycle: move page count to attribute diff --git a/CLI.md b/CLI.md index 6e056429..a3c3fc0a 100644 --- a/CLI.md +++ b/CLI.md @@ -2,56 +2,105 @@ The CLI tool is provided mainly for quick tests and debugging. +The CLI ships **two top-level groups**: + +* V2 product subcommands at the root: `extraction`, `classification`, + `crop`, `ocr`, `split`, plus `search-models`. +* All V1 products live under a `v1` group (e.g. `mindee v1 invoice ...`). + +For backward compatibility, invoking a V1 product name directly +(`mindee invoice ...`, `mindee receipt ...`, …) is automatically +dispatched to `mindee v1 ...`. + ### General help ```shell -./mindee generated --help +./mindee --help +./mindee v1 --help +./mindee extraction --help +./mindee search-models --help ``` > Note: Due to the limited-nature of most PHP CLI tools, the help sections aren't customized for each command. +## V1 commands + ### Example parse command for Off-the-Shelf document ```shell -./mindee invoice -k xxxxxxx /path/to/invoice.pdf +./mindee v1 invoice -k xxxxxxx /path/to/invoice.pdf ``` +> Legacy `./mindee invoice -k xxxxxxx /path/to/invoice.pdf` still works. + ### Works with environment variables ```shell export MINDEE_API_KEY=xxxxxx -./mindee invoice /path/to/invoice.pdf +./mindee v1 invoice /path/to/invoice.pdf ``` ### Example parse command for a generated document (docTI) ```shell -./mindee generated -a pikachu -k xxxxxxx pokemon_card /path/to/card.jpg -A +./mindee v1 generated -a pikachu -k xxxxxxx pokemon_card /path/to/card.jpg -A ``` ### Example async parse command ```shell -./mindee invoice-splitter path/to/the/invoice.pdf -A +./mindee v1 invoice-splitter path/to/the/invoice.pdf -A ``` > Note: the `-A` can be omitted on products which do not support synchronous mode. +### Full parsed output + ```shell -./mindee invoice-splitter path/to/the/invoice.pdf -A +./mindee v1 invoice -k xxxxxxx /path/to/invoice.pdf -o raw ``` -### [DEPRECATED] Example parse command for a custom document (API Builder) +## V2 commands + +V2 inference commands share the same option set: + +| Option | Short | Description | +|--------|-------|-------------| +| `--model-id` | `-m` | ID of the model to use (required). | +| `--api-key` | `-k` | API key. Falls back to `MINDEE_V2_API_KEY`. | +| `--alias` | `-a` | Optional alias for the file. | +| `--output` | `-o` | `summary` (default), `full`, or `raw`. | + +The `extraction` command adds: + +| Option | Short | Description | +|--------|-------|-------------| +| `--rag` | `-g` | Enable Retrieval-Augmented Generation. | +| `--raw-text` | `-r` | Extract the document's raw text. | +| `--confidence` | `-c` | Return per-field confidence scores. | +| `--polygon` | `-p` | Return per-field bounding polygons. | +| `--text-context` | `-t` | Add text context to the API call. | + +### Example V2 extraction call ```shell -./mindee custom -a pikachu -k xxxxxxx pokemon_card /path/to/card.jpg +export MINDEE_V2_API_KEY=xxxxxx +./mindee extraction -m /path/to/file.pdf ``` -### Full parsed output +### Example V2 extraction with options and a JSON dump + +```shell +./mindee extraction -m -k -r -c -p -o full /path/to/file.pdf +./mindee extraction -m -o raw /path/to/file.pdf +``` + +### Listing models ```shell -./mindee invoice -k xxxxxxx /path/to/invoice.pdf -o raw +./mindee search-models -k +./mindee search-models --name fin --model-type extraction -r ``` ### Running the script through php diff --git a/bin/MindeeCliCommand.php b/bin/MindeeCliCommand.php index 772f9758..8322df00 100644 --- a/bin/MindeeCliCommand.php +++ b/bin/MindeeCliCommand.php @@ -49,14 +49,14 @@ class MindeeCliCommand extends Command */ public function __construct(array $documentList) { - require __DIR__ . '/../src/version.php'; + require_once __DIR__ . '/../src/version.php'; $this->documentList = $documentList; $this->acceptableDocuments = []; foreach ($this->documentList as $documentName => $document) { $this->acceptableDocuments[] = $documentName; } - parent::__construct('mindee'); + parent::__construct('v1'); } /** @@ -66,9 +66,9 @@ protected function formatHelp(string $product = null): string { $helpCondensed = ""; if (!$product) { - $helpCondensed = "Mindee Command-Line interface. + $helpCondensed = "Mindee V1 Command-Line interface. Usage: - mindee [options] [--] + mindee v1 [options] [--] Available products:"; foreach ($this->documentList as $documentName => $document) { @@ -92,8 +92,8 @@ protected function formatHelp(string $product = null): string protected function configure(): void { $this - ->setName('mindee') - ->setDescription('Mindee client.') + ->setName('v1') + ->setDescription('Mindee V1 product commands.') ->addArgument( 'product', InputArgument::REQUIRED, diff --git a/bin/V2/InferenceCommand.php b/bin/V2/InferenceCommand.php new file mode 100644 index 00000000..9b6e8d4f --- /dev/null +++ b/bin/V2/InferenceCommand.php @@ -0,0 +1,318 @@ +spec = $spec; + parent::__construct($spec->name); + } + + /** + * @return void Configure command options/arguments. + */ + protected function configure(): void + { + $this + ->setName($this->spec->name) + ->setDescription($this->spec->description) + ->addArgument( + 'path', + InputArgument::REQUIRED, + 'Path or HTTPS URL of the file to parse.' + ) + ->addOption( + 'model-id', + 'm', + InputOption::VALUE_REQUIRED, + 'ID of the model to use.' + ) + ->addOption( + 'api-key', + 'k', + InputOption::VALUE_REQUIRED, + 'Mindee V2 API key. Falls back to the MINDEE_V2_API_KEY environment variable.' + ) + ->addOption( + 'alias', + 'a', + InputOption::VALUE_REQUIRED, + 'Optional alias for the file.' + ) + ->addOption( + 'output', + 'o', + InputOption::VALUE_REQUIRED, + "Specify how to output the data:\n" + . "- summary: a basic summary (default)\n" + . "- full: detailed extraction results, including options\n" + . "- raw: full JSON object\n", + 'summary' + ); + + if ($this->spec->rag) { + $this->addOption( + 'rag', + 'g', + InputOption::VALUE_NONE, + "Enable Retrieval-Augmented Generation. Only valid for 'extraction'." + ); + } + if ($this->spec->rawText) { + $this->addOption( + 'raw-text', + 'r', + InputOption::VALUE_NONE, + 'Extract the full text of the document.' + ); + } + if ($this->spec->confidence) { + $this->addOption( + 'confidence', + 'c', + InputOption::VALUE_NONE, + 'Retrieve confidence scores from the extraction.' + ); + } + if ($this->spec->polygon) { + $this->addOption( + 'polygon', + 'p', + InputOption::VALUE_NONE, + 'Retrieve bounding polygons from the extraction.' + ); + } + if ($this->spec->textContext) { + $this->addOption( + 'text-context', + 't', + InputOption::VALUE_REQUIRED, + 'Add text context to your API call.' + ); + } + } + + /** + * @param InputInterface $input CLI input. + * @param OutputInterface $output CLI output. + * @return integer Exit code. + */ + protected function execute(InputInterface $input, OutputInterface $output): int + { + $modelId = $input->getOption('model-id'); + if (!$modelId) { + $output->writeln('The "--model-id" (-m) option is required.'); + return Command::FAILURE; + } + + $apiKey = $input->getOption('api-key'); + if (!$apiKey && !getenv('MINDEE_V2_API_KEY')) { + $output->writeln( + 'The Mindee V2 API key is missing. ' + . "Please provide it via the '--api-key' option or the MINDEE_V2_API_KEY environment variable." + ); + return Command::FAILURE; + } + + $outputType = (string) ($input->getOption('output') ?? 'summary'); + if (!in_array($outputType, ['summary', 'full', 'raw'], true)) { + $output->writeln( + "Invalid output type '$outputType'. Valid values: summary, full, raw." + ); + return Command::FAILURE; + } + + $path = (string) $input->getArgument('path'); + try { + $source = $this->resolveInputSource($path); + } catch (Exception $e) { + $output->writeln("Invalid path or URL provided '$path': " . $e->getMessage() . ''); + return Command::FAILURE; + } + if ($source === null) { + $output->writeln("Invalid path or URL provided '$path'."); + return Command::FAILURE; + } + + $alias = $input->getOption('alias'); + $rag = $this->spec->rag && (bool) $input->getOption('rag'); + $rawText = $this->spec->rawText && (bool) $input->getOption('raw-text'); + $confidence = $this->spec->confidence && (bool) $input->getOption('confidence'); + $polygon = $this->spec->polygon && (bool) $input->getOption('polygon'); + $textContext = $this->spec->textContext ? $input->getOption('text-context') : null; + + try { + $params = $this->buildParameters($modelId, $alias, $rag, $rawText, $confidence, $polygon, $textContext); + } catch (Exception $e) { + $output->writeln('Failed to build parameters: ' . $e->getMessage() . ''); + return Command::FAILURE; + } + + $client = new Client($apiKey ?: null); + + try { + $response = $client->enqueueAndGetResult( + $this->spec->responseClass, + $source, + $params + ); + } catch (MindeeV2HttpException $e) { + $output->writeln('' . $e->getMessage() . ''); + return Command::FAILURE; + } catch (Exception $e) { + $output->writeln("Something went wrong, '" . $e->getMessage() . "' was raised."); + return Command::FAILURE; + } + + $this->printResponse($response, $outputType, $rag, $rawText, $output); + return Command::SUCCESS; + } + + /** + * Resolves the input source from the given path or URL. + * + * @param string $path Path or HTTPS URL. + * @return PathInput|UrlInputSource|null Input source, or null if invalid. + */ + private function resolveInputSource(string $path): PathInput|UrlInputSource|null + { + if (str_starts_with($path, 'https://')) { + return new UrlInputSource($path); + } + if (@file_exists($path)) { + return new PathInput($path); + } + return null; + } + + /** + * Builds the V2 inference parameters for the current product. + * + * @param string $modelId Model identifier. + * @param string|null $alias Optional alias. + * @param boolean $rag Whether to enable RAG. + * @param boolean $rawText Whether to enable raw text extraction. + * @param boolean $confidence Whether to enable confidence scores. + * @param boolean $polygon Whether to enable polygons. + * @param string|null $textContext Optional text context. + * @return BaseParameters Parameters object for the V2 client. + */ + private function buildParameters( + string $modelId, + ?string $alias, + bool $rag, + bool $rawText, + bool $confidence, + bool $polygon, + ?string $textContext + ): BaseParameters { + $paramsClass = $this->spec->parametersClass; + if ($paramsClass === ExtractionParameters::class) { + return new ExtractionParameters( + $modelId, + rag: $rag ? true : null, + rawText: $rawText ? true : null, + polygon: $polygon ? true : null, + confidence: $confidence ? true : null, + alias: $alias, + textContext: $textContext, + ); + } + return new $paramsClass($modelId, $alias); + } + + /** + * Prints the response according to the chosen output mode. + * + * @param BaseResponse $response Inference response. + * @param string $outputType One of `summary`, `full`, `raw`. + * @param boolean $rag Whether RAG was requested by the caller. + * @param boolean $rawText Whether raw text was requested by the caller. + * @param OutputInterface $output CLI output. + */ + private function printResponse( + BaseResponse $response, + string $outputType, + bool $rag, + bool $rawText, + OutputInterface $output + ): void { + if ($outputType === 'raw') { + $output->writeln($response->getRawHttp()); + return; + } + + $inference = $response->inference ?? null; + if ($inference === null) { + return; + } + + if ($outputType === 'full') { + if ($rawText + && property_exists($inference, 'activeOptions') + && $inference->activeOptions->rawText + && property_exists($inference->result, 'rawText') + && $inference->result->rawText !== null + ) { + $rawTextStr = (string) $inference->result->rawText; + $output->writeln("#############\nRaw Text\n#############\n::"); + $output->writeln(' ' . str_replace("\n", "\n ", $rawTextStr)); + $output->writeln(''); + } + if ($rag + && property_exists($inference, 'activeOptions') + && $inference->activeOptions->rag + && property_exists($inference->result, 'rag') + && $inference->result->rag !== null + ) { + $ragStr = (string) ($inference->result->rag->retrievedDocumentId ?? ''); + $output->writeln("#############\nRetrieval-Augmented Generation\n#############\n::"); + $output->writeln(' ' . str_replace("\n", "\n ", $ragStr)); + $output->writeln(''); + } + $output->write((string) $inference); + return; + } + + // summary (default) + $output->write((string) $inference->result); + } +} diff --git a/bin/V2/SearchModelsCommand.php b/bin/V2/SearchModelsCommand.php new file mode 100644 index 00000000..c4ff273e --- /dev/null +++ b/bin/V2/SearchModelsCommand.php @@ -0,0 +1,102 @@ + Available V2 model types. + */ + private const AVAILABLE_MODELS = ['extraction', 'crop', 'classification', 'ocr', 'split']; + + /** + * @return void Configure command options/arguments. + */ + protected function configure(): void + { + $this + ->setName('search-models') + ->setDescription('Search available models.') + ->addOption( + 'api-key', + 'k', + InputOption::VALUE_REQUIRED, + 'Mindee V2 API key. Falls back to the MINDEE_V2_API_KEY environment variable.' + ) + ->addOption( + 'name', + null, + InputOption::VALUE_REQUIRED, + 'Filter by model name partial match (case insensitive).' + ) + ->addOption( + 'model-type', + 'm', + InputOption::VALUE_REQUIRED, + "Filter by exact model type (case sensitive). Available options:\n - " + . implode("\n - ", self::AVAILABLE_MODELS) + ) + ->addOption( + 'raw-json', + 'r', + InputOption::VALUE_NONE, + 'Whether to output the raw JSON response.' + ); + } + + /** + * @param InputInterface $input CLI input. + * @param OutputInterface $output CLI output. + * @return integer Exit code. + */ + protected function execute(InputInterface $input, OutputInterface $output): int + { + $apiKey = $input->getOption('api-key'); + if (!$apiKey && !getenv('MINDEE_V2_API_KEY')) { + $output->writeln( + 'The Mindee V2 API key is missing. ' + . "Please provide it via the '--api-key' option or the MINDEE_V2_API_KEY environment variable." + ); + return Command::FAILURE; + } + + $name = $input->getOption('name'); + $modelType = $input->getOption('model-type'); + $raw = (bool) $input->getOption('raw-json'); + + $client = new Client($apiKey ?: null); + + try { + $response = $client->searchModels($name ?: null, $modelType ?: null); + } catch (MindeeV2HttpException $e) { + $output->writeln('' . $e->getMessage() . ''); + return Command::FAILURE; + } catch (Exception $e) { + $output->writeln("Something went wrong, '" . $e->getMessage() . "' was raised."); + return Command::FAILURE; + } + + if ($raw) { + $output->writeln($response->getRawHttp()); + } else { + $output->write((string) $response); + } + return Command::SUCCESS; + } +} diff --git a/bin/V2/V2CliCommandConfig.php b/bin/V2/V2CliCommandConfig.php new file mode 100644 index 00000000..793f0fa4 --- /dev/null +++ b/bin/V2/V2CliCommandConfig.php @@ -0,0 +1,37 @@ + V2 product specs keyed by slug. + */ + public static function getSpecs(): array + { + return [ + 'classification' => new V2CliCommandConfig( + 'classification', + 'Classification utility.', + ClassificationResponse::class, + ClassificationParameters::class, + rag: false, + rawText: false, + confidence: false, + polygon: false, + textContext: false, + ), + 'crop' => new V2CliCommandConfig( + 'crop', + 'Crop utility.', + CropResponse::class, + CropParameters::class, + rag: false, + rawText: false, + confidence: false, + polygon: false, + textContext: false, + ), + 'extraction' => new V2CliCommandConfig( + 'extraction', + 'Generic all-purpose extraction.', + ExtractionResponse::class, + ExtractionParameters::class, + rag: true, + rawText: true, + confidence: true, + polygon: true, + textContext: true, + ), + 'ocr' => new V2CliCommandConfig( + 'ocr', + 'OCR utility.', + OcrResponse::class, + OcrParameters::class, + rag: false, + rawText: false, + confidence: false, + polygon: false, + textContext: false, + ), + 'split' => new V2CliCommandConfig( + 'split', + 'Split utility.', + SplitResponse::class, + SplitParameters::class, + rag: false, + rawText: false, + confidence: false, + polygon: false, + textContext: false, + ), + ]; + } +} diff --git a/bin/cli.php b/bin/cli.php index c64c6ea8..32a0b23b 100755 --- a/bin/cli.php +++ b/bin/cli.php @@ -5,20 +5,140 @@ namespace Mindee\Cli; require __DIR__ . '/../vendor/autoload.php'; +require_once __DIR__ . '/../src/version.php'; require __DIR__ . '/MindeeCliDocuments.php'; require __DIR__ . '/MindeeCliCommand.php'; +require __DIR__ . '/V2/V2CliCommandConfig.php'; +require __DIR__ . '/V2/V2CliProducts.php'; +require __DIR__ . '/V2/InferenceCommand.php'; +require __DIR__ . '/V2/SearchModelsCommand.php'; -use Symfony\Component\Console\Application; use Exception; +use Mindee\Cli\V2\InferenceCommand; +use Mindee\Cli\V2\SearchModelsCommand; +use Mindee\Cli\V2\V2CliProducts; +use Symfony\Component\Console\Application; +use Symfony\Component\Console\Input\ArgvInput; +use Symfony\Component\Console\Input\InputOption; + +use function defined; +use function getenv; +use function in_array; + +/** + * Checks whether the CLI should display explicit error_log() output. + * + * @param array $argv CLI arguments. + * @return boolean True when the error log option or verbosity is present. + */ +function mindeeCliShouldDisplayErrorLog(array $argv): bool +{ + $shellVerbosity = getenv('SHELL_VERBOSITY'); + if ($shellVerbosity !== false && (int) $shellVerbosity > 0) { + return true; + } + + foreach ($argv as $arg) { + if ($arg === '--error-log' || $arg === '--verbose' || str_starts_with($arg, '-v')) { + return true; + } + } + + return false; +} + +/** + * Redirects explicit error_log() output away from stderr unless requested. + * + * @param boolean $displayErrorLog Whether error_log() output should be displayed. + */ +function mindeeCliConfigureErrorLog(bool $displayErrorLog): void +{ + if ($displayErrorLog) { + return; + } + + ini_set('error_log', stripos(PHP_OS, 'WIN') === 0 ? 'NUL' : '/dev/null'); +} + +/** + * Checks whether an argv token is a CLI-level option that does not consume a value. + * + * @param string $arg CLI argument. + * @return boolean True when the argument can be skipped before command dispatch. + */ +function mindeeCliIsGlobalOptionWithoutValue(string $arg): bool +{ + return $arg === '--error-log' || $arg === '--verbose' || str_starts_with($arg, '-v'); +} + +/** + * Rewrites argv for V1 backward compatibility. + * + * If the first non-global-option argument is not a registered top-level command + * (V2 inference commands, `search-models`, `v1`, or a Symfony built-in like + * `help`/`list`/`completion`) and is not an option, it is treated as a V1 + * product name and `v1` is inserted before it. This preserves the legacy + * `mindee ...` invocation shape while letting V2 commands run + * unmodified. + * + * @param array $argv Original argv array. + * @param array $knownTopLevelCommands Top-level commands not to rewrite. + * @return array Possibly rewritten argv array. + */ +function mindeeRewriteArgvForV1Compat(array $argv, array $knownTopLevelCommands): array +{ + $commandIndex = 1; + while (isset($argv[$commandIndex]) && mindeeCliIsGlobalOptionWithoutValue($argv[$commandIndex])) { + $commandIndex++; + } + + if (!isset($argv[$commandIndex])) { + return $argv; + } + $first = $argv[$commandIndex]; + if ($first === '' || $first[0] === '-') { + return $argv; + } + if (in_array($first, $knownTopLevelCommands, true)) { + return $argv; + } + array_splice($argv, $commandIndex, 0, ['v1']); + return $argv; +} + +$displayErrorLog = mindeeCliShouldDisplayErrorLog($_SERVER['argv']); +mindeeCliConfigureErrorLog($displayErrorLog); + +$cli = new Application('mindee', defined('Mindee\\VERSION') ? \Mindee\VERSION : 'unknown'); +$cli->getDefinition()->addOption(new InputOption( + 'error-log', + null, + InputOption::VALUE_NONE, + 'Display PHP error_log() output. Also enabled by verbose output.' +)); + +$v1Specs = MindeeCliDocuments::getSpecs(); +$v1Command = new MindeeCliCommand($v1Specs); +$cli->add($v1Command); + +foreach (V2CliProducts::getSpecs() as $spec) { + $cli->add(new InferenceCommand($spec)); +} +$cli->add(new SearchModelsCommand()); + +$knownTopLevelCommands = ['v1', 'search-models', 'list', 'help', 'completion']; +foreach (V2CliProducts::getSpecs() as $spec) { + $knownTopLevelCommands[] = $spec->name; +} + +$argv = mindeeRewriteArgvForV1Compat($_SERVER['argv'], $knownTopLevelCommands); -$cli = new Application(); -$mindeeCommand = new MindeeCliCommand(MindeeCliDocuments::getSpecs()); -$cli->add($mindeeCommand); try { - $cli->add($mindeeCommand); - $cli->setDefaultCommand($mindeeCommand->getName(), true); - $cli->run(); + $cli->run(new ArgvInput($argv)); } catch (Exception $e) { - error_log("Could not start the Mindee CLI, an exception was raised:"); - error_log($e->getMessage()); + if ($displayErrorLog) { + error_log('Could not start the Mindee CLI, an exception was raised:'); + error_log($e->getMessage()); + } } diff --git a/composer.json b/composer.json index 6cdf6d7e..2c38a14b 100644 --- a/composer.json +++ b/composer.json @@ -53,6 +53,8 @@ "test:unit": "phpunit -c tests/phpunit.xml", "test:functional": "phpunit -c tests/functional.xml", "test:smoke:v1": "./tests/test_v1_code_samples.sh", - "test:smoke:v2": "./tests/test_v2_code_samples.sh" + "test:smoke:v2": "./tests/test_v2_code_samples.sh", + "test:cli:v1": "./tests/test_v1_cli.sh", + "test:cli:v2": "./tests/test_v2_cli.sh" } } diff --git a/tests/V2/Cli/MindeeCliCommandV2Test.php b/tests/V2/Cli/MindeeCliCommandV2Test.php new file mode 100644 index 00000000..017306c4 --- /dev/null +++ b/tests/V2/Cli/MindeeCliCommandV2Test.php @@ -0,0 +1,264 @@ +filePath = TestingUtilities::getFileTypesDir() . '/pdf/blank_1.pdf'; + } + + /** + * @return array Inference command name provider. + */ + public static function provideInferenceCommandNames(): iterable + { + return [ + 'classification' => ['classification'], + 'crop' => ['crop'], + 'extraction' => ['extraction'], + 'ocr' => ['ocr'], + 'split' => ['split'], + ]; + } + + public function testListShouldShowAllV2Commands(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest(['list']); + self::assertSame(0, $cmdOutput['code']); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('classification', $stdout); + self::assertStringContainsString('crop', $stdout); + self::assertStringContainsString('extraction', $stdout); + self::assertStringContainsString('ocr', $stdout); + self::assertStringContainsString('split', $stdout); + self::assertStringContainsString('search-models', $stdout); + self::assertStringContainsString('v1', $stdout); + } + + /** + * @dataProvider provideInferenceCommandNames + */ + public function testInferenceMissingModelIdMustFail(string $command): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [$command, $this->filePath, '-k', 'fake-key'], + ['MINDEE_V2_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + '--model-id', + implode("\n", $cmdOutput['output']), + "Command '$command' must complain about missing --model-id" + ); + } + + /** + * @dataProvider provideInferenceCommandNames + */ + public function testInferenceMissingApiKeyMustFail(string $command): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [$command, '-m', 'some-model-id', $this->filePath], + ['MINDEE_V2_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + 'API key is missing', + implode("\n", $cmdOutput['output']), + "Command '$command' must complain about missing API key" + ); + } + + /** + * @dataProvider provideInferenceCommandNames + */ + public function testInferenceInvalidPathMustFail(string $command): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [$command, '-m', 'some-model-id', '-k', 'fake-key', 'invalid-file-path'], + ['MINDEE_V2_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + "Invalid path or URL provided 'invalid-file-path'", + implode("\n", $cmdOutput['output']), + "Command '$command' must complain about an invalid path" + ); + } + + /** + * @dataProvider provideInferenceCommandNames + */ + public function testInferenceInvalidOutputTypeMustFail(string $command): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [$command, '-m', 'some-model-id', '-k', 'fake-key', '-o', 'garbage', $this->filePath], + ['MINDEE_V2_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + "Invalid output type 'garbage'", + implode("\n", $cmdOutput['output']), + "Command '$command' must complain about an invalid output type" + ); + } + + public function testExtractionExposesAllExtractionOptions(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest(['extraction', '--help']); + self::assertSame(0, $cmdOutput['code']); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('--rag', $stdout); + self::assertStringContainsString('--raw-text', $stdout); + self::assertStringContainsString('--confidence', $stdout); + self::assertStringContainsString('--polygon', $stdout); + self::assertStringContainsString('--text-context', $stdout); + self::assertStringContainsString('--alias', $stdout); + self::assertStringContainsString('--model-id', $stdout); + self::assertStringContainsString('--api-key', $stdout); + self::assertStringContainsString('--output', $stdout); + } + + /** + * Sibling V2 commands must NOT expose extraction-only options. + * + * @return array Non-extraction inference commands. + */ + public static function provideNonExtractionCommandsHideExtractionOnlyOptionsCases(): iterable + { + return [ + 'classification' => ['classification'], + 'crop' => ['crop'], + 'ocr' => ['ocr'], + 'split' => ['split'], + ]; + } + + /** + * @dataProvider provideNonExtractionCommandsHideExtractionOnlyOptionsCases + */ + public function testNonExtractionCommandsHideExtractionOnlyOptions(string $command): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest([$command, '--help']); + self::assertSame(0, $cmdOutput['code']); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringNotContainsString('--rag', $stdout); + self::assertStringNotContainsString('--raw-text', $stdout); + self::assertStringNotContainsString('--confidence', $stdout); + self::assertStringNotContainsString('--polygon', $stdout); + self::assertStringNotContainsString('--text-context', $stdout); + // Common options must still be present. + self::assertStringContainsString('--alias', $stdout); + self::assertStringContainsString('--model-id', $stdout); + self::assertStringContainsString('--api-key', $stdout); + self::assertStringContainsString('--output', $stdout); + } + + public function testSearchModelsHelpExposesExpectedOptions(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest(['search-models', '--help']); + self::assertSame(0, $cmdOutput['code']); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('--api-key', $stdout); + self::assertStringContainsString('--name', $stdout); + self::assertStringContainsString('--model-type', $stdout); + self::assertStringContainsString('--raw-json', $stdout); + } + + public function testSearchModelsMissingApiKeyMustFail(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['search-models'], + ['MINDEE_V2_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + 'API key is missing', + implode("\n", $cmdOutput['output']) + ); + } + + public function testV1BackwardCompatibilityDispatch(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['financial-document', 'invalid-file-path', '-k', 'fake-key', '-D'], + ['MINDEE_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + "Invalid path or url provided 'invalid-file-path'", + implode("\n", $cmdOutput['output']), + 'Legacy `mindee ...` dispatch must keep working' + ); + } + + public function testCliHidesErrorLogOutputByDefault(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['financial-document', $this->filePath, '-k', 'fake-key', '-D'], + ['MINDEE_API_KEY' => false] + ); + self::assertSame(0, $cmdOutput['code']); + self::assertStringNotContainsString( + 'PHP Warning', + implode("\n", $cmdOutput['output']) + ); + } + + public function testCliDisplaysErrorLogOutputWithVerbosity(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['-v', 'financial-document', $this->filePath, '-k', 'fake-key', '-D'], + ['MINDEE_API_KEY' => false] + ); + self::assertSame(0, $cmdOutput['code']); + self::assertStringContainsString( + 'PHP Warning', + implode(" +", $cmdOutput['output']) + ); + } + + public function testCliDisplaysErrorLogOutputWhenRequested(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['--error-log', 'financial-document', $this->filePath, '-k', 'fake-key', '-D'], + ['MINDEE_API_KEY' => false] + ); + self::assertSame(0, $cmdOutput['code']); + self::assertStringContainsString( + 'PHP Warning', + implode("\n", $cmdOutput['output']) + ); + } + + public function testV1ExplicitGroupInvocation(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['v1', 'financial-document', 'invalid-file-path', '-k', 'fake-key', '-D'], + ['MINDEE_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + "Invalid path or url provided 'invalid-file-path'", + implode("\n", $cmdOutput['output']) + ); + } +} diff --git a/tests/V2/Cli/MindeeCliCommandV2TestFunctional.php b/tests/V2/Cli/MindeeCliCommandV2TestFunctional.php new file mode 100644 index 00000000..1447f01a --- /dev/null +++ b/tests/V2/Cli/MindeeCliCommandV2TestFunctional.php @@ -0,0 +1,164 @@ +filePath = TestingUtilities::getFileTypesDir() . '/pdf/blank_1.pdf'; + $this->apiKey = (string) getenv('MINDEE_V2_API_KEY'); + if ($this->apiKey === '') { + self::markTestSkipped('MINDEE_V2_API_KEY is not set; skipping V2 functional CLI tests.'); + } + } + + /** + * @return iterable}> + * Tuples of [command name, env var holding the model ID, additional args]. + */ + public static function provideInferenceCommandSummaryOutputCases(): iterable + { + yield ['classification', 'MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID', []]; + yield ['crop', 'MINDEE_V2_SE_TESTS_CROP_MODEL_ID', []]; + yield ['extraction', 'MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID', []]; + yield ['ocr', 'MINDEE_V2_SE_TESTS_OCR_MODEL_ID', []]; + yield ['split', 'MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID', []]; + } + + /** + * @dataProvider provideInferenceCommandSummaryOutputCases + */ + public function testInferenceCommandSummaryOutput(string $command, string $modelEnv, array $extra): void + { + $modelId = (string) getenv($modelEnv); + if ($modelId === '') { + self::markTestSkipped("$modelEnv is not set; skipping $command CLI test."); + } + + $args = array_merge( + [$command, '-m', $modelId, '-k', $this->apiKey], + $extra, + [$this->filePath] + ); + $cmdOutput = MindeeCliV2TestingUtilities::executeTest($args); + self::assertSame( + 0, + $cmdOutput['code'], + "$command summary call must succeed:\n" . implode("\n", $cmdOutput['output']) + ); + self::assertNotEmpty($cmdOutput['output'], "$command must produce output"); + } + + public function testExtractionFullOutputWithRawText(): void + { + $modelId = (string) getenv('MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID'); + if ($modelId === '') { + self::markTestSkipped('MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID is not set.'); + } + + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [ + 'extraction', + '-m', $modelId, + '-k', $this->apiKey, + '-r', + '-o', 'full', + $this->filePath, + ] + ); + self::assertSame( + 0, + $cmdOutput['code'], + "extraction full call must succeed:\n" . implode("\n", $cmdOutput['output']) + ); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('Inference', $stdout); + } + + public function testExtractionRawJsonOutput(): void + { + $modelId = (string) getenv('MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID'); + if ($modelId === '') { + self::markTestSkipped('MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID is not set.'); + } + + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [ + 'extraction', + '-m', $modelId, + '-k', $this->apiKey, + '-o', 'raw', + $this->filePath, + ] + ); + self::assertSame( + 0, + $cmdOutput['code'], + "extraction raw call must succeed:\n" . implode("\n", $cmdOutput['output']) + ); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('"inference"', $stdout, 'Raw JSON output must contain "inference"'); + } + + public function testSearchModelsHumanReadableOutput(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['search-models', '-k', $this->apiKey] + ); + self::assertSame( + 0, + $cmdOutput['code'], + "search-models must succeed:\n" . implode("\n", $cmdOutput['output']) + ); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('Models', $stdout); + self::assertStringContainsString('Pagination', $stdout); + } + + public function testSearchModelsRawJsonOutput(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['search-models', '-k', $this->apiKey, '-r'] + ); + self::assertSame( + 0, + $cmdOutput['code'], + "search-models raw must succeed:\n" . implode("\n", $cmdOutput['output']) + ); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('"models"', $stdout); + self::assertStringContainsString('"pagination"', $stdout); + } + + public function testInferenceWithInvalidApiKeyMustFail(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [ + 'extraction', + '-m', 'fc405e37-4ba4-4d03-aeba-533a8d1f0f21', + '-k', 'invalid-api-key', + $this->filePath, + ] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertNotEmpty($cmdOutput['output']); + } +} diff --git a/tests/V2/Cli/MindeeCliV2TestingUtilities.php b/tests/V2/Cli/MindeeCliV2TestingUtilities.php new file mode 100644 index 00000000..e36689bc --- /dev/null +++ b/tests/V2/Cli/MindeeCliV2TestingUtilities.php @@ -0,0 +1,37 @@ + $args CLI arguments (each entry is shell-escaped before execution). + * @param array $envOverrides Environment variables to set/unset for the call. + * Use `false` to unset a variable. + * @return array{output: array, code: int} Output lines and exit code. + */ + public static function executeTest(array $args, array $envOverrides = []): array + { + $resCode = 0; + $output = []; + + $envPrefix = ''; + foreach ($envOverrides as $key => $value) { + if ($value === false) { + $envPrefix .= 'unset ' . escapeshellarg($key) . '; '; + } else { + $envPrefix .= escapeshellarg($key) . '=' . escapeshellarg((string) $value) . ' '; + } + } + + $escaped = array_map(escapeshellarg(...), $args); + $cmd = $envPrefix . 'php ./bin/cli.php ' . implode(' ', $escaped) . ' 2>&1'; + exec($cmd, $output, $resCode); + + return ['output' => $output, 'code' => $resCode]; + } +} diff --git a/tests/test_v1_cli.sh b/tests/test_v1_cli.sh new file mode 100755 index 00000000..f13937c4 --- /dev/null +++ b/tests/test_v1_cli.sh @@ -0,0 +1,31 @@ +#!/bin/sh +set -e + +TEST_FILE=$1 + +if [ -z "$TEST_FILE" ]; then + TEST_FILE='./tests/resources/file_types/pdf/blank_1.pdf' +fi +echo "TEST_FILE: ${TEST_FILE}" + +CLI_PATH="./bin/cli.php" +echo "CLI_PATH: ${CLI_PATH}" + +PRODUCTS="financial-document receipt invoice invoice-splitter" +PRODUCTS_SIZE=4 +i=1 + +for product in $PRODUCTS +do + echo "--- Test $product with Summary Output ($i/$PRODUCTS_SIZE) ---" + SUMMARY_OUTPUT=$(php "$CLI_PATH" v1 "$product" "$TEST_FILE") + if [ -z "$SUMMARY_OUTPUT" ]; then + echo "Error: no $product output" + exit 1 + fi + echo "$SUMMARY_OUTPUT" + echo "" + echo "" + sleep 0.5 + i=$((i + 1)) +done diff --git a/tests/test_v2_cli.sh b/tests/test_v2_cli.sh new file mode 100755 index 00000000..9b2c4739 --- /dev/null +++ b/tests/test_v2_cli.sh @@ -0,0 +1,43 @@ +#!/bin/sh +set -e + +TEST_FILE=$1 + +if [ -z "$TEST_FILE" ]; then + TEST_FILE='./tests/resources/file_types/pdf/blank_1.pdf' +fi +echo "TEST_FILE: ${TEST_FILE}" + +CLI_PATH="./bin/cli.php" +echo "CLI_PATH: ${CLI_PATH}" + +echo "--- Test model list retrieval" +MODELS=$(php "$CLI_PATH" search-models) +if [ -z "$MODELS" ]; then + echo "Error: no models found" + exit 1 +else + echo "Models retrieval OK" +fi + +run_test() { + model_id="$1" + model_type="$2" + + echo "--- Test $model_type ID: $model_id" + SUMMARY_OUTPUT=$(php "$CLI_PATH" "$model_type" -m "$model_id" "$TEST_FILE") + if [ -z "$SUMMARY_OUTPUT" ]; then + echo "Error: no $model_type output" + exit 1 + fi + echo "$SUMMARY_OUTPUT" + echo "" + echo "" + sleep 0.5 +} + +run_test "$MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID" "extraction" +run_test "$MINDEE_V2_SE_TESTS_CROP_MODEL_ID" "crop" +run_test "$MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID" "split" +run_test "$MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID" "classification" +run_test "$MINDEE_V2_SE_TESTS_OCR_MODEL_ID" "ocr" From 662e59789b4a4d33f50fb5f6f169cde3c3faa50f Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 17 Jun 2026 11:46:01 +0200 Subject: [PATCH 2/4] update workflows --- .github/workflows/_publish-docs.yml | 4 ++-- .github/workflows/_static-analysis.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_publish-docs.yml b/.github/workflows/_publish-docs.yml index b67ad3c5..725f05f2 100644 --- a/.github/workflows/_publish-docs.yml +++ b/.github/workflows/_publish-docs.yml @@ -14,7 +14,7 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up php ${{ matrix.php-version }} + - name: Set up php 8.3 uses: shivammathur/setup-php@v2 with: php-version: 8.3 @@ -30,7 +30,7 @@ jobs: cp -r ./docs/code_samples ./docs/_build/ - name: Deploy - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./docs/_build diff --git a/.github/workflows/_static-analysis.yml b/.github/workflows/_static-analysis.yml index 3058f84a..eeb10db0 100644 --- a/.github/workflows/_static-analysis.yml +++ b/.github/workflows/_static-analysis.yml @@ -20,7 +20,7 @@ jobs: tools: composer - name: Cache dependencies - uses: actions/cache@v3 + uses: actions/cache@v5 with: path: ./vendor key: ${{ runner.os }}-${{ hashFiles('composer.json') }} From c116d77c183b397798ce9e66cced0d41f96b172e Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 17 Jun 2026 16:44:25 +0200 Subject: [PATCH 3/4] fix errors --- CHANGELOG.md | 10 -------- tests/V2/Cli/MindeeCliCommandV2Test.php | 3 +-- tests/V2/Cli/MindeeCliV2TestingUtilities.php | 24 +++++++++++++++----- tests/test_v2_cli.sh | 5 ++++ 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d4dcb8d..1913a7e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,5 @@ # Mindee PHP API Library Changelog -## Unreleased -### ¡Breaking Changes! -* :boom: :recycle: restructure CLI to align with the canonical .NET shape: - V1 product invocations now live under a `v1` group (e.g. `mindee v1 invoice file.pdf`). - Legacy `mindee ...` invocations remain supported via automatic dispatch. -### Changes -* :sparkles: add a working V2 CLI: `extraction`, `classification`, `crop`, `ocr`, `split` and `search-models` commands -* :sparkles: V2 inference commands accept `--api-key/-k`, `--alias/-a`, `--model-id/-m`, `--output/-o` (`summary` / `full` / `raw`) -* :sparkles: `extraction` additionally exposes `--rag/-g`, `--raw-text/-r`, `--confidence/-c`, `--polygon/-p`, `--text-context/-t` -* :white_check_mark: add unit + functional CLI tests for V2 commands ## v3.0.0-rc1 - 2026-06-10 ### Changes diff --git a/tests/V2/Cli/MindeeCliCommandV2Test.php b/tests/V2/Cli/MindeeCliCommandV2Test.php index 017306c4..422b04b1 100644 --- a/tests/V2/Cli/MindeeCliCommandV2Test.php +++ b/tests/V2/Cli/MindeeCliCommandV2Test.php @@ -231,8 +231,7 @@ public function testCliDisplaysErrorLogOutputWithVerbosity(): void self::assertSame(0, $cmdOutput['code']); self::assertStringContainsString( 'PHP Warning', - implode(" -", $cmdOutput['output']) + implode("\n", $cmdOutput['output']) ); } diff --git a/tests/V2/Cli/MindeeCliV2TestingUtilities.php b/tests/V2/Cli/MindeeCliV2TestingUtilities.php index e36689bc..7ba255fd 100644 --- a/tests/V2/Cli/MindeeCliV2TestingUtilities.php +++ b/tests/V2/Cli/MindeeCliV2TestingUtilities.php @@ -19,18 +19,30 @@ public static function executeTest(array $args, array $envOverrides = []): array $resCode = 0; $output = []; - $envPrefix = ''; + $previousEnv = []; foreach ($envOverrides as $key => $value) { + $previousEnv[$key] = getenv($key); if ($value === false) { - $envPrefix .= 'unset ' . escapeshellarg($key) . '; '; + putenv($key); } else { - $envPrefix .= escapeshellarg($key) . '=' . escapeshellarg((string) $value) . ' '; + putenv($key . '=' . (string) $value); } } - $escaped = array_map(escapeshellarg(...), $args); - $cmd = $envPrefix . 'php ./bin/cli.php ' . implode(' ', $escaped) . ' 2>&1'; - exec($cmd, $output, $resCode); + try { + $escaped = array_map(escapeshellarg(...), $args); + $cliPath = escapeshellarg(__DIR__ . '/../../../bin/cli.php'); + $cmd = PHP_BINARY . ' ' . $cliPath . ' ' . implode(' ', $escaped) . ' 2>&1'; + exec($cmd, $output, $resCode); + } finally { + foreach ($previousEnv as $key => $prev) { + if ($prev === false) { + putenv($key); + } else { + putenv($key . '=' . $prev); + } + } + } return ['output' => $output, 'code' => $resCode]; } diff --git a/tests/test_v2_cli.sh b/tests/test_v2_cli.sh index 9b2c4739..54adaa2c 100755 --- a/tests/test_v2_cli.sh +++ b/tests/test_v2_cli.sh @@ -24,6 +24,11 @@ run_test() { model_id="$1" model_type="$2" + if [ -z "$model_id" ]; then + echo "Error: missing model id for $model_type (environment variable not set)" + exit 1 + fi + echo "--- Test $model_type ID: $model_id" SUMMARY_OUTPUT=$(php "$CLI_PATH" "$model_type" -m "$model_id" "$TEST_FILE") if [ -z "$SUMMARY_OUTPUT" ]; then From 8ec8ab0772ac217a0cf096f85d8d92213a50fedb Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Wed, 17 Jun 2026 17:38:59 +0200 Subject: [PATCH 4/4] separate configs and use inheritance --- ...ceCommand.php => BaseInferenceCommand.php} | 216 ++++++------------ bin/V2/ClassificationCommand.php | 43 ++++ bin/V2/CropCommand.php | 43 ++++ bin/V2/ExtractionCommand.php | 138 +++++++++++ bin/V2/OcrCommand.php | 43 ++++ bin/V2/SplitCommand.php | 43 ++++ bin/V2/V2CliCommandConfig.php | 37 --- bin/V2/V2CliProducts.php | 91 -------- bin/cli.php | 31 ++- 9 files changed, 407 insertions(+), 278 deletions(-) rename bin/V2/{InferenceCommand.php => BaseInferenceCommand.php} (50%) create mode 100644 bin/V2/ClassificationCommand.php create mode 100644 bin/V2/CropCommand.php create mode 100644 bin/V2/ExtractionCommand.php create mode 100644 bin/V2/OcrCommand.php create mode 100644 bin/V2/SplitCommand.php delete mode 100644 bin/V2/V2CliCommandConfig.php delete mode 100644 bin/V2/V2CliProducts.php diff --git a/bin/V2/InferenceCommand.php b/bin/V2/BaseInferenceCommand.php similarity index 50% rename from bin/V2/InferenceCommand.php rename to bin/V2/BaseInferenceCommand.php index 9b6e8d4f..bb6b4cee 100644 --- a/bin/V2/InferenceCommand.php +++ b/bin/V2/BaseInferenceCommand.php @@ -11,7 +11,6 @@ use Mindee\V2\ClientOptions\BaseParameters; use Mindee\V2\Error\MindeeV2HttpException; use Mindee\V2\Parsing\Inference\BaseResponse; -use Mindee\V2\Product\Extraction\Params\ExtractionParameters; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Input\InputArgument; use Symfony\Component\Console\Input\InputInterface; @@ -21,39 +20,25 @@ use function in_array; /** - * V2 inference CLI command. + * Abstract base class for V2 inference CLI commands. * - * One instance is registered per V2 product (extraction, classification, - * crop, ocr, split). The set of options exposed depends on the product - * spec passed at construction time. + * Handles the options shared by every V2 product (`path`, `--model-id`, + * `--api-key`, `--alias`, `--output`), input source resolution, client + * invocation and output formatting. Each concrete subclass owns its + * product-specific options, builds the right `BaseParameters` instance + * and customizes the human-readable output. * - * Mirrors the canonical .NET implementation in - * `mindee-api-dotnet/src/Mindee.Cli/Commands/V2/InferenceCommand.cs`. + * Mirrors the canonical Java implementation in + * `mindee-api-java/src/main/java/com/mindee/v2/cli/BaseInferenceCommand.java`. */ -class InferenceCommand extends Command +abstract class BaseInferenceCommand extends Command { /** - * @var V2CliCommandConfig Product configuration. - */ - private V2CliCommandConfig $spec; - - /** - * @param V2CliCommandConfig $spec Product configuration. - */ - public function __construct(V2CliCommandConfig $spec) - { - $this->spec = $spec; - parent::__construct($spec->name); - } - - /** - * @return void Configure command options/arguments. + * @return void Configures the options common to every V2 product. */ protected function configure(): void { $this - ->setName($this->spec->name) - ->setDescription($this->spec->description) ->addArgument( 'path', InputArgument::REQUIRED, @@ -88,48 +73,16 @@ protected function configure(): void 'summary' ); - if ($this->spec->rag) { - $this->addOption( - 'rag', - 'g', - InputOption::VALUE_NONE, - "Enable Retrieval-Augmented Generation. Only valid for 'extraction'." - ); - } - if ($this->spec->rawText) { - $this->addOption( - 'raw-text', - 'r', - InputOption::VALUE_NONE, - 'Extract the full text of the document.' - ); - } - if ($this->spec->confidence) { - $this->addOption( - 'confidence', - 'c', - InputOption::VALUE_NONE, - 'Retrieve confidence scores from the extraction.' - ); - } - if ($this->spec->polygon) { - $this->addOption( - 'polygon', - 'p', - InputOption::VALUE_NONE, - 'Retrieve bounding polygons from the extraction.' - ); - } - if ($this->spec->textContext) { - $this->addOption( - 'text-context', - 't', - InputOption::VALUE_REQUIRED, - 'Add text context to your API call.' - ); - } + $this->configureProductOptions(); } + /** + * Hook for subclasses to add product-specific options on top of the + * common ones (e.g. extraction's `--rag`, `--raw-text`, ...). + * + */ + protected function configureProductOptions(): void {} + /** * @param InputInterface $input CLI input. * @param OutputInterface $output CLI output. @@ -173,14 +126,9 @@ protected function execute(InputInterface $input, OutputInterface $output): int } $alias = $input->getOption('alias'); - $rag = $this->spec->rag && (bool) $input->getOption('rag'); - $rawText = $this->spec->rawText && (bool) $input->getOption('raw-text'); - $confidence = $this->spec->confidence && (bool) $input->getOption('confidence'); - $polygon = $this->spec->polygon && (bool) $input->getOption('polygon'); - $textContext = $this->spec->textContext ? $input->getOption('text-context') : null; try { - $params = $this->buildParameters($modelId, $alias, $rag, $rawText, $confidence, $polygon, $textContext); + $params = $this->buildParameters($input, (string) $modelId, $alias); } catch (Exception $e) { $output->writeln('Failed to build parameters: ' . $e->getMessage() . ''); return Command::FAILURE; @@ -190,7 +138,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int try { $response = $client->enqueueAndGetResult( - $this->spec->responseClass, + $this->getResponseClass(), $source, $params ); @@ -202,7 +150,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int return Command::FAILURE; } - $this->printResponse($response, $outputType, $rag, $rawText, $output); + $this->printResponse($input, $response, $outputType, $output); return Command::SUCCESS; } @@ -224,95 +172,81 @@ private function resolveInputSource(string $path): PathInput|UrlInputSource|null } /** - * Builds the V2 inference parameters for the current product. + * Builds the V2 inference parameters for this product. * + * @param InputInterface $input CLI input, used to read product-specific options. * @param string $modelId Model identifier. * @param string|null $alias Optional alias. - * @param boolean $rag Whether to enable RAG. - * @param boolean $rawText Whether to enable raw text extraction. - * @param boolean $confidence Whether to enable confidence scores. - * @param boolean $polygon Whether to enable polygons. - * @param string|null $textContext Optional text context. * @return BaseParameters Parameters object for the V2 client. */ - private function buildParameters( + abstract protected function buildParameters( + InputInterface $input, string $modelId, - ?string $alias, - bool $rag, - bool $rawText, - bool $confidence, - bool $polygon, - ?string $textContext - ): BaseParameters { - $paramsClass = $this->spec->parametersClass; - if ($paramsClass === ExtractionParameters::class) { - return new ExtractionParameters( - $modelId, - rag: $rag ? true : null, - rawText: $rawText ? true : null, - polygon: $polygon ? true : null, - confidence: $confidence ? true : null, - alias: $alias, - textContext: $textContext, - ); + ?string $alias + ): BaseParameters; + + /** + * @return class-string Fully-qualified product response class. + */ + abstract protected function getResponseClass(): string; + + /** + * Default human-readable representation of an inference response. + * + * @param BaseResponse $response Inference response. + * @return string Summary string (result only). + */ + protected function getSummary(BaseResponse $response): string + { + $inference = $response->inference ?? null; + if ($inference === null) { + return ''; + } + return (string) $inference->result; + } + + /** + * Detailed representation of an inference response. Defaults to the + * full inference dump; override to add product-specific sections + * (raw text, RAG, ...). + * + * @param InputInterface $input CLI input, used to read product-specific options. + * @param BaseResponse $response Inference response. + * @return string Full string. + */ + protected function getFullOutput(InputInterface $input, BaseResponse $response): string + { + $inference = $response->inference ?? null; + if ($inference === null) { + return ''; } - return new $paramsClass($modelId, $alias); + return (string) $inference; } /** * Prints the response according to the chosen output mode. * + * @param InputInterface $input CLI input. * @param BaseResponse $response Inference response. * @param string $outputType One of `summary`, `full`, `raw`. - * @param boolean $rag Whether RAG was requested by the caller. - * @param boolean $rawText Whether raw text was requested by the caller. * @param OutputInterface $output CLI output. */ private function printResponse( + InputInterface $input, BaseResponse $response, string $outputType, - bool $rag, - bool $rawText, OutputInterface $output ): void { - if ($outputType === 'raw') { - $output->writeln($response->getRawHttp()); - return; + switch ($outputType) { + case 'raw': + $output->writeln($response->getRawHttp()); + return; + case 'full': + $output->write($this->getFullOutput($input, $response)); + return; + default: + $output->write($this->getSummary($response)); + return; } - - $inference = $response->inference ?? null; - if ($inference === null) { - return; - } - - if ($outputType === 'full') { - if ($rawText - && property_exists($inference, 'activeOptions') - && $inference->activeOptions->rawText - && property_exists($inference->result, 'rawText') - && $inference->result->rawText !== null - ) { - $rawTextStr = (string) $inference->result->rawText; - $output->writeln("#############\nRaw Text\n#############\n::"); - $output->writeln(' ' . str_replace("\n", "\n ", $rawTextStr)); - $output->writeln(''); - } - if ($rag - && property_exists($inference, 'activeOptions') - && $inference->activeOptions->rag - && property_exists($inference->result, 'rag') - && $inference->result->rag !== null - ) { - $ragStr = (string) ($inference->result->rag->retrievedDocumentId ?? ''); - $output->writeln("#############\nRetrieval-Augmented Generation\n#############\n::"); - $output->writeln(' ' . str_replace("\n", "\n ", $ragStr)); - $output->writeln(''); - } - $output->write((string) $inference); - return; - } - - // summary (default) - $output->write((string) $inference->result); } } diff --git a/bin/V2/ClassificationCommand.php b/bin/V2/ClassificationCommand.php new file mode 100644 index 00000000..1f90016d --- /dev/null +++ b/bin/V2/ClassificationCommand.php @@ -0,0 +1,43 @@ +setDescription('Classification utility.'); + parent::configure(); + } + + protected function getResponseClass(): string + { + return ClassificationResponse::class; + } + + protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters { + return new ClassificationParameters($modelId, $alias); + } +} diff --git a/bin/V2/CropCommand.php b/bin/V2/CropCommand.php new file mode 100644 index 00000000..3ff5578b --- /dev/null +++ b/bin/V2/CropCommand.php @@ -0,0 +1,43 @@ +setDescription('Crop utility.'); + parent::configure(); + } + + protected function getResponseClass(): string + { + return CropResponse::class; + } + + protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters { + return new CropParameters($modelId, $alias); + } +} diff --git a/bin/V2/ExtractionCommand.php b/bin/V2/ExtractionCommand.php new file mode 100644 index 00000000..07f8bf27 --- /dev/null +++ b/bin/V2/ExtractionCommand.php @@ -0,0 +1,138 @@ +setDescription('Generic all-purpose extraction.'); + parent::configure(); + } + + protected function configureProductOptions(): void + { + $this + ->addOption( + 'rag', + 'g', + InputOption::VALUE_NONE, + "Enable Retrieval-Augmented Generation. Only valid for 'extraction'." + ) + ->addOption( + 'raw-text', + 'r', + InputOption::VALUE_NONE, + 'Extract the full text of the document.' + ) + ->addOption( + 'confidence', + 'c', + InputOption::VALUE_NONE, + 'Retrieve confidence scores from the extraction.' + ) + ->addOption( + 'polygon', + 'p', + InputOption::VALUE_NONE, + 'Retrieve bounding polygons from the extraction.' + ) + ->addOption( + 'text-context', + 't', + InputOption::VALUE_REQUIRED, + 'Add text context to your API call.' + ); + } + + protected function getResponseClass(): string + { + return ExtractionResponse::class; + } + + protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters { + $rag = (bool) $input->getOption('rag'); + $rawText = (bool) $input->getOption('raw-text'); + $confidence = (bool) $input->getOption('confidence'); + $polygon = (bool) $input->getOption('polygon'); + $textContext = $input->getOption('text-context'); + + return new ExtractionParameters( + $modelId, + rag: $rag ? true : null, + rawText: $rawText ? true : null, + polygon: $polygon ? true : null, + confidence: $confidence ? true : null, + alias: $alias, + textContext: $textContext, + ); + } + + protected function getFullOutput(InputInterface $input, BaseResponse $response): string + { + $inference = $response->inference ?? null; + if ($inference === null) { + return ''; + } + + $rawText = (bool) $input->getOption('raw-text'); + $rag = (bool) $input->getOption('rag'); + + $sections = []; + + if ( + $rawText + && property_exists($inference, 'activeOptions') + && $inference->activeOptions->rawText + && property_exists($inference->result, 'rawText') + && $inference->result->rawText !== null + ) { + $rawTextStr = (string) $inference->result->rawText; + $sections[] = "#############\nRaw Text\n#############\n::"; + $sections[] = ' ' . str_replace("\n", "\n ", $rawTextStr); + $sections[] = ''; + } + + if ( + $rag + && property_exists($inference, 'activeOptions') + && $inference->activeOptions->rag + && property_exists($inference->result, 'rag') + && $inference->result->rag !== null + ) { + $ragStr = (string) ($inference->result->rag->retrievedDocumentId ?? ''); + $sections[] = "#############\nRetrieval-Augmented Generation\n#############\n::"; + $sections[] = ' ' . str_replace("\n", "\n ", $ragStr); + $sections[] = ''; + } + + $sections[] = (string) $inference; + return implode("\n", $sections); + } +} diff --git a/bin/V2/OcrCommand.php b/bin/V2/OcrCommand.php new file mode 100644 index 00000000..31079382 --- /dev/null +++ b/bin/V2/OcrCommand.php @@ -0,0 +1,43 @@ +setDescription('OCR utility.'); + parent::configure(); + } + + protected function getResponseClass(): string + { + return OcrResponse::class; + } + + protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters { + return new OcrParameters($modelId, $alias); + } +} diff --git a/bin/V2/SplitCommand.php b/bin/V2/SplitCommand.php new file mode 100644 index 00000000..c7bde490 --- /dev/null +++ b/bin/V2/SplitCommand.php @@ -0,0 +1,43 @@ +setDescription('Split utility.'); + parent::configure(); + } + + protected function getResponseClass(): string + { + return SplitResponse::class; + } + + protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters { + return new SplitParameters($modelId, $alias); + } +} diff --git a/bin/V2/V2CliCommandConfig.php b/bin/V2/V2CliCommandConfig.php deleted file mode 100644 index 793f0fa4..00000000 --- a/bin/V2/V2CliCommandConfig.php +++ /dev/null @@ -1,37 +0,0 @@ - V2 product specs keyed by slug. - */ - public static function getSpecs(): array - { - return [ - 'classification' => new V2CliCommandConfig( - 'classification', - 'Classification utility.', - ClassificationResponse::class, - ClassificationParameters::class, - rag: false, - rawText: false, - confidence: false, - polygon: false, - textContext: false, - ), - 'crop' => new V2CliCommandConfig( - 'crop', - 'Crop utility.', - CropResponse::class, - CropParameters::class, - rag: false, - rawText: false, - confidence: false, - polygon: false, - textContext: false, - ), - 'extraction' => new V2CliCommandConfig( - 'extraction', - 'Generic all-purpose extraction.', - ExtractionResponse::class, - ExtractionParameters::class, - rag: true, - rawText: true, - confidence: true, - polygon: true, - textContext: true, - ), - 'ocr' => new V2CliCommandConfig( - 'ocr', - 'OCR utility.', - OcrResponse::class, - OcrParameters::class, - rag: false, - rawText: false, - confidence: false, - polygon: false, - textContext: false, - ), - 'split' => new V2CliCommandConfig( - 'split', - 'Split utility.', - SplitResponse::class, - SplitParameters::class, - rag: false, - rawText: false, - confidence: false, - polygon: false, - textContext: false, - ), - ]; - } -} diff --git a/bin/cli.php b/bin/cli.php index 32a0b23b..7ab44b10 100755 --- a/bin/cli.php +++ b/bin/cli.php @@ -8,15 +8,21 @@ require_once __DIR__ . '/../src/version.php'; require __DIR__ . '/MindeeCliDocuments.php'; require __DIR__ . '/MindeeCliCommand.php'; -require __DIR__ . '/V2/V2CliCommandConfig.php'; -require __DIR__ . '/V2/V2CliProducts.php'; -require __DIR__ . '/V2/InferenceCommand.php'; +require __DIR__ . '/V2/BaseInferenceCommand.php'; +require __DIR__ . '/V2/ClassificationCommand.php'; +require __DIR__ . '/V2/CropCommand.php'; +require __DIR__ . '/V2/ExtractionCommand.php'; +require __DIR__ . '/V2/OcrCommand.php'; +require __DIR__ . '/V2/SplitCommand.php'; require __DIR__ . '/V2/SearchModelsCommand.php'; use Exception; -use Mindee\Cli\V2\InferenceCommand; +use Mindee\Cli\V2\ClassificationCommand; +use Mindee\Cli\V2\CropCommand; +use Mindee\Cli\V2\ExtractionCommand; +use Mindee\Cli\V2\OcrCommand; use Mindee\Cli\V2\SearchModelsCommand; -use Mindee\Cli\V2\V2CliProducts; +use Mindee\Cli\V2\SplitCommand; use Symfony\Component\Console\Application; use Symfony\Component\Console\Input\ArgvInput; use Symfony\Component\Console\Input\InputOption; @@ -122,14 +128,21 @@ function mindeeRewriteArgvForV1Compat(array $argv, array $knownTopLevelCommands) $v1Command = new MindeeCliCommand($v1Specs); $cli->add($v1Command); -foreach (V2CliProducts::getSpecs() as $spec) { - $cli->add(new InferenceCommand($spec)); +$v2InferenceCommands = [ + new ClassificationCommand(), + new CropCommand(), + new ExtractionCommand(), + new OcrCommand(), + new SplitCommand(), +]; +foreach ($v2InferenceCommands as $command) { + $cli->add($command); } $cli->add(new SearchModelsCommand()); $knownTopLevelCommands = ['v1', 'search-models', 'list', 'help', 'completion']; -foreach (V2CliProducts::getSpecs() as $spec) { - $knownTopLevelCommands[] = $spec->name; +foreach ($v2InferenceCommands as $command) { + $knownTopLevelCommands[] = $command->getName(); } $argv = mindeeRewriteArgvForV1Compat($_SERVER['argv'], $knownTopLevelCommands);