diff --git a/.github/workflows/_publish-docs.yml b/.github/workflows/_publish-docs.yml index b67ad3c5..725f05f2 100644 --- a/.github/workflows/_publish-docs.yml +++ b/.github/workflows/_publish-docs.yml @@ -14,7 +14,7 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up php ${{ matrix.php-version }} + - name: Set up php 8.3 uses: shivammathur/setup-php@v2 with: php-version: 8.3 @@ -30,7 +30,7 @@ jobs: cp -r ./docs/code_samples ./docs/_build/ - name: Deploy - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./docs/_build diff --git a/.github/workflows/_static-analysis.yml b/.github/workflows/_static-analysis.yml index 3058f84a..eeb10db0 100644 --- a/.github/workflows/_static-analysis.yml +++ b/.github/workflows/_static-analysis.yml @@ -20,7 +20,7 @@ jobs: tools: composer - name: Cache dependencies - uses: actions/cache@v3 + uses: actions/cache@v5 with: path: ./vendor key: ${{ runner.os }}-${{ hashFiles('composer.json') }} diff --git a/.github/workflows/_test-cli.yml b/.github/workflows/_test-cli.yml new file mode 100644 index 00000000..53f42770 --- /dev/null +++ b/.github/workflows/_test-cli.yml @@ -0,0 +1,46 @@ +name: Test Command Line Interface + +on: + workflow_call: + workflow_dispatch: + +env: + MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} + MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} + MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} + +jobs: + test: + name: Run CLI Tests + timeout-minutes: 30 + strategy: + max-parallel: 2 + matrix: + php-version: + - "8.1" + - "8.5" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up PHP ${{ matrix.php-version }} + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-version }} + - uses: ramsey/composer-install@v3 + + - name: Test V2 CLI + shell: sh + run: | + ./tests/test_v2_cli.sh ./tests/resources/file_types/pdf/blank_1.pdf + + - name: Test V1 CLI + shell: sh + run: | + ./tests/test_v1_cli.sh ./tests/resources/file_types/pdf/blank_1.pdf diff --git a/.github/workflows/_workflow_lint.yml b/.github/workflows/_workflow_lint.yml new file mode 100644 index 00000000..8eb21904 --- /dev/null +++ b/.github/workflows/_workflow_lint.yml @@ -0,0 +1,20 @@ +name: Lint workflows + +on: + workflow_call: + +permissions: + contents: read + +jobs: + actionlint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Download actionlint + id: get_actionlint + run: bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash) + shell: bash + - name: Run actionlint + run: ${{ steps.get_actionlint.outputs.executable }} -color + shell: bash diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index a306b37c..b016b7b2 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -8,3 +8,6 @@ jobs: smoke-test: uses: mindee/mindee-api-php/.github/workflows/_test-smoke.yml@main secrets: inherit + test-cli: + uses: mindee/mindee-api-php/.github/workflows/_test-cli.yml@main + secrets: inherit diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 0fdf33b6..1a2c06cb 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -8,8 +8,11 @@ permissions: pull-requests: read jobs: + workflow-lint: + uses: ./.github/workflows/_workflow_lint.yml static-analysis: uses: ./.github/workflows/_static-analysis.yml + needs: workflow-lint static-dependency-checks: uses: ./.github/workflows/_static-dependency-checks.yml needs: static-analysis @@ -25,3 +28,7 @@ jobs: uses: ./.github/workflows/_test-smoke.yml needs: test-units secrets: inherit + test-cli: + uses: ./.github/workflows/_test-cli.yml + needs: test-units + secrets: inherit diff --git a/CHANGELOG.md b/CHANGELOG.md index ecae5af4..1913a7e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # Mindee PHP API Library Changelog + ## v3.0.0-rc1 - 2026-06-10 ### Changes * :recycle: move page count to attribute diff --git a/CLI.md b/CLI.md index 6e056429..a3c3fc0a 100644 --- a/CLI.md +++ b/CLI.md @@ -2,56 +2,105 @@ The CLI tool is provided mainly for quick tests and debugging. +The CLI ships **two top-level groups**: + +* V2 product subcommands at the root: `extraction`, `classification`, + `crop`, `ocr`, `split`, plus `search-models`. +* All V1 products live under a `v1` group (e.g. `mindee v1 invoice ...`). + +For backward compatibility, invoking a V1 product name directly +(`mindee invoice ...`, `mindee receipt ...`, …) is automatically +dispatched to `mindee v1 ...`. + ### General help ```shell -./mindee generated --help +./mindee --help +./mindee v1 --help +./mindee extraction --help +./mindee search-models --help ``` > Note: Due to the limited-nature of most PHP CLI tools, the help sections aren't customized for each command. +## V1 commands + ### Example parse command for Off-the-Shelf document ```shell -./mindee invoice -k xxxxxxx /path/to/invoice.pdf +./mindee v1 invoice -k xxxxxxx /path/to/invoice.pdf ``` +> Legacy `./mindee invoice -k xxxxxxx /path/to/invoice.pdf` still works. + ### Works with environment variables ```shell export MINDEE_API_KEY=xxxxxx -./mindee invoice /path/to/invoice.pdf +./mindee v1 invoice /path/to/invoice.pdf ``` ### Example parse command for a generated document (docTI) ```shell -./mindee generated -a pikachu -k xxxxxxx pokemon_card /path/to/card.jpg -A +./mindee v1 generated -a pikachu -k xxxxxxx pokemon_card /path/to/card.jpg -A ``` ### Example async parse command ```shell -./mindee invoice-splitter path/to/the/invoice.pdf -A +./mindee v1 invoice-splitter path/to/the/invoice.pdf -A ``` > Note: the `-A` can be omitted on products which do not support synchronous mode. +### Full parsed output + ```shell -./mindee invoice-splitter path/to/the/invoice.pdf -A +./mindee v1 invoice -k xxxxxxx /path/to/invoice.pdf -o raw ``` -### [DEPRECATED] Example parse command for a custom document (API Builder) +## V2 commands + +V2 inference commands share the same option set: + +| Option | Short | Description | +|--------|-------|-------------| +| `--model-id` | `-m` | ID of the model to use (required). | +| `--api-key` | `-k` | API key. Falls back to `MINDEE_V2_API_KEY`. | +| `--alias` | `-a` | Optional alias for the file. | +| `--output` | `-o` | `summary` (default), `full`, or `raw`. | + +The `extraction` command adds: + +| Option | Short | Description | +|--------|-------|-------------| +| `--rag` | `-g` | Enable Retrieval-Augmented Generation. | +| `--raw-text` | `-r` | Extract the document's raw text. | +| `--confidence` | `-c` | Return per-field confidence scores. | +| `--polygon` | `-p` | Return per-field bounding polygons. | +| `--text-context` | `-t` | Add text context to the API call. | + +### Example V2 extraction call ```shell -./mindee custom -a pikachu -k xxxxxxx pokemon_card /path/to/card.jpg +export MINDEE_V2_API_KEY=xxxxxx +./mindee extraction -m /path/to/file.pdf ``` -### Full parsed output +### Example V2 extraction with options and a JSON dump + +```shell +./mindee extraction -m -k -r -c -p -o full /path/to/file.pdf +./mindee extraction -m -o raw /path/to/file.pdf +``` + +### Listing models ```shell -./mindee invoice -k xxxxxxx /path/to/invoice.pdf -o raw +./mindee search-models -k +./mindee search-models --name fin --model-type extraction -r ``` ### Running the script through php diff --git a/bin/MindeeCliCommand.php b/bin/MindeeCliCommand.php index 772f9758..8322df00 100644 --- a/bin/MindeeCliCommand.php +++ b/bin/MindeeCliCommand.php @@ -49,14 +49,14 @@ class MindeeCliCommand extends Command */ public function __construct(array $documentList) { - require __DIR__ . '/../src/version.php'; + require_once __DIR__ . '/../src/version.php'; $this->documentList = $documentList; $this->acceptableDocuments = []; foreach ($this->documentList as $documentName => $document) { $this->acceptableDocuments[] = $documentName; } - parent::__construct('mindee'); + parent::__construct('v1'); } /** @@ -66,9 +66,9 @@ protected function formatHelp(string $product = null): string { $helpCondensed = ""; if (!$product) { - $helpCondensed = "Mindee Command-Line interface. + $helpCondensed = "Mindee V1 Command-Line interface. Usage: - mindee [options] [--] + mindee v1 [options] [--] Available products:"; foreach ($this->documentList as $documentName => $document) { @@ -92,8 +92,8 @@ protected function formatHelp(string $product = null): string protected function configure(): void { $this - ->setName('mindee') - ->setDescription('Mindee client.') + ->setName('v1') + ->setDescription('Mindee V1 product commands.') ->addArgument( 'product', InputArgument::REQUIRED, diff --git a/bin/V2/BaseInferenceCommand.php b/bin/V2/BaseInferenceCommand.php new file mode 100644 index 00000000..bb6b4cee --- /dev/null +++ b/bin/V2/BaseInferenceCommand.php @@ -0,0 +1,252 @@ +addArgument( + 'path', + InputArgument::REQUIRED, + 'Path or HTTPS URL of the file to parse.' + ) + ->addOption( + 'model-id', + 'm', + InputOption::VALUE_REQUIRED, + 'ID of the model to use.' + ) + ->addOption( + 'api-key', + 'k', + InputOption::VALUE_REQUIRED, + 'Mindee V2 API key. Falls back to the MINDEE_V2_API_KEY environment variable.' + ) + ->addOption( + 'alias', + 'a', + InputOption::VALUE_REQUIRED, + 'Optional alias for the file.' + ) + ->addOption( + 'output', + 'o', + InputOption::VALUE_REQUIRED, + "Specify how to output the data:\n" + . "- summary: a basic summary (default)\n" + . "- full: detailed extraction results, including options\n" + . "- raw: full JSON object\n", + 'summary' + ); + + $this->configureProductOptions(); + } + + /** + * Hook for subclasses to add product-specific options on top of the + * common ones (e.g. extraction's `--rag`, `--raw-text`, ...). + * + */ + protected function configureProductOptions(): void {} + + /** + * @param InputInterface $input CLI input. + * @param OutputInterface $output CLI output. + * @return integer Exit code. + */ + protected function execute(InputInterface $input, OutputInterface $output): int + { + $modelId = $input->getOption('model-id'); + if (!$modelId) { + $output->writeln('The "--model-id" (-m) option is required.'); + return Command::FAILURE; + } + + $apiKey = $input->getOption('api-key'); + if (!$apiKey && !getenv('MINDEE_V2_API_KEY')) { + $output->writeln( + 'The Mindee V2 API key is missing. ' + . "Please provide it via the '--api-key' option or the MINDEE_V2_API_KEY environment variable." + ); + return Command::FAILURE; + } + + $outputType = (string) ($input->getOption('output') ?? 'summary'); + if (!in_array($outputType, ['summary', 'full', 'raw'], true)) { + $output->writeln( + "Invalid output type '$outputType'. Valid values: summary, full, raw." + ); + return Command::FAILURE; + } + + $path = (string) $input->getArgument('path'); + try { + $source = $this->resolveInputSource($path); + } catch (Exception $e) { + $output->writeln("Invalid path or URL provided '$path': " . $e->getMessage() . ''); + return Command::FAILURE; + } + if ($source === null) { + $output->writeln("Invalid path or URL provided '$path'."); + return Command::FAILURE; + } + + $alias = $input->getOption('alias'); + + try { + $params = $this->buildParameters($input, (string) $modelId, $alias); + } catch (Exception $e) { + $output->writeln('Failed to build parameters: ' . $e->getMessage() . ''); + return Command::FAILURE; + } + + $client = new Client($apiKey ?: null); + + try { + $response = $client->enqueueAndGetResult( + $this->getResponseClass(), + $source, + $params + ); + } catch (MindeeV2HttpException $e) { + $output->writeln('' . $e->getMessage() . ''); + return Command::FAILURE; + } catch (Exception $e) { + $output->writeln("Something went wrong, '" . $e->getMessage() . "' was raised."); + return Command::FAILURE; + } + + $this->printResponse($input, $response, $outputType, $output); + return Command::SUCCESS; + } + + /** + * Resolves the input source from the given path or URL. + * + * @param string $path Path or HTTPS URL. + * @return PathInput|UrlInputSource|null Input source, or null if invalid. + */ + private function resolveInputSource(string $path): PathInput|UrlInputSource|null + { + if (str_starts_with($path, 'https://')) { + return new UrlInputSource($path); + } + if (@file_exists($path)) { + return new PathInput($path); + } + return null; + } + + /** + * Builds the V2 inference parameters for this product. + * + * @param InputInterface $input CLI input, used to read product-specific options. + * @param string $modelId Model identifier. + * @param string|null $alias Optional alias. + * @return BaseParameters Parameters object for the V2 client. + */ + abstract protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters; + + /** + * @return class-string Fully-qualified product response class. + */ + abstract protected function getResponseClass(): string; + + /** + * Default human-readable representation of an inference response. + * + * @param BaseResponse $response Inference response. + * @return string Summary string (result only). + */ + protected function getSummary(BaseResponse $response): string + { + $inference = $response->inference ?? null; + if ($inference === null) { + return ''; + } + return (string) $inference->result; + } + + /** + * Detailed representation of an inference response. Defaults to the + * full inference dump; override to add product-specific sections + * (raw text, RAG, ...). + * + * @param InputInterface $input CLI input, used to read product-specific options. + * @param BaseResponse $response Inference response. + * @return string Full string. + */ + protected function getFullOutput(InputInterface $input, BaseResponse $response): string + { + $inference = $response->inference ?? null; + if ($inference === null) { + return ''; + } + return (string) $inference; + } + + /** + * Prints the response according to the chosen output mode. + * + * @param InputInterface $input CLI input. + * @param BaseResponse $response Inference response. + * @param string $outputType One of `summary`, `full`, `raw`. + * @param OutputInterface $output CLI output. + */ + private function printResponse( + InputInterface $input, + BaseResponse $response, + string $outputType, + OutputInterface $output + ): void { + switch ($outputType) { + case 'raw': + $output->writeln($response->getRawHttp()); + return; + case 'full': + $output->write($this->getFullOutput($input, $response)); + return; + default: + $output->write($this->getSummary($response)); + return; + } + } +} diff --git a/bin/V2/ClassificationCommand.php b/bin/V2/ClassificationCommand.php new file mode 100644 index 00000000..1f90016d --- /dev/null +++ b/bin/V2/ClassificationCommand.php @@ -0,0 +1,43 @@ +setDescription('Classification utility.'); + parent::configure(); + } + + protected function getResponseClass(): string + { + return ClassificationResponse::class; + } + + protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters { + return new ClassificationParameters($modelId, $alias); + } +} diff --git a/bin/V2/CropCommand.php b/bin/V2/CropCommand.php new file mode 100644 index 00000000..3ff5578b --- /dev/null +++ b/bin/V2/CropCommand.php @@ -0,0 +1,43 @@ +setDescription('Crop utility.'); + parent::configure(); + } + + protected function getResponseClass(): string + { + return CropResponse::class; + } + + protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters { + return new CropParameters($modelId, $alias); + } +} diff --git a/bin/V2/ExtractionCommand.php b/bin/V2/ExtractionCommand.php new file mode 100644 index 00000000..07f8bf27 --- /dev/null +++ b/bin/V2/ExtractionCommand.php @@ -0,0 +1,138 @@ +setDescription('Generic all-purpose extraction.'); + parent::configure(); + } + + protected function configureProductOptions(): void + { + $this + ->addOption( + 'rag', + 'g', + InputOption::VALUE_NONE, + "Enable Retrieval-Augmented Generation. Only valid for 'extraction'." + ) + ->addOption( + 'raw-text', + 'r', + InputOption::VALUE_NONE, + 'Extract the full text of the document.' + ) + ->addOption( + 'confidence', + 'c', + InputOption::VALUE_NONE, + 'Retrieve confidence scores from the extraction.' + ) + ->addOption( + 'polygon', + 'p', + InputOption::VALUE_NONE, + 'Retrieve bounding polygons from the extraction.' + ) + ->addOption( + 'text-context', + 't', + InputOption::VALUE_REQUIRED, + 'Add text context to your API call.' + ); + } + + protected function getResponseClass(): string + { + return ExtractionResponse::class; + } + + protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters { + $rag = (bool) $input->getOption('rag'); + $rawText = (bool) $input->getOption('raw-text'); + $confidence = (bool) $input->getOption('confidence'); + $polygon = (bool) $input->getOption('polygon'); + $textContext = $input->getOption('text-context'); + + return new ExtractionParameters( + $modelId, + rag: $rag ? true : null, + rawText: $rawText ? true : null, + polygon: $polygon ? true : null, + confidence: $confidence ? true : null, + alias: $alias, + textContext: $textContext, + ); + } + + protected function getFullOutput(InputInterface $input, BaseResponse $response): string + { + $inference = $response->inference ?? null; + if ($inference === null) { + return ''; + } + + $rawText = (bool) $input->getOption('raw-text'); + $rag = (bool) $input->getOption('rag'); + + $sections = []; + + if ( + $rawText + && property_exists($inference, 'activeOptions') + && $inference->activeOptions->rawText + && property_exists($inference->result, 'rawText') + && $inference->result->rawText !== null + ) { + $rawTextStr = (string) $inference->result->rawText; + $sections[] = "#############\nRaw Text\n#############\n::"; + $sections[] = ' ' . str_replace("\n", "\n ", $rawTextStr); + $sections[] = ''; + } + + if ( + $rag + && property_exists($inference, 'activeOptions') + && $inference->activeOptions->rag + && property_exists($inference->result, 'rag') + && $inference->result->rag !== null + ) { + $ragStr = (string) ($inference->result->rag->retrievedDocumentId ?? ''); + $sections[] = "#############\nRetrieval-Augmented Generation\n#############\n::"; + $sections[] = ' ' . str_replace("\n", "\n ", $ragStr); + $sections[] = ''; + } + + $sections[] = (string) $inference; + return implode("\n", $sections); + } +} diff --git a/bin/V2/OcrCommand.php b/bin/V2/OcrCommand.php new file mode 100644 index 00000000..31079382 --- /dev/null +++ b/bin/V2/OcrCommand.php @@ -0,0 +1,43 @@ +setDescription('OCR utility.'); + parent::configure(); + } + + protected function getResponseClass(): string + { + return OcrResponse::class; + } + + protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters { + return new OcrParameters($modelId, $alias); + } +} diff --git a/bin/V2/SearchModelsCommand.php b/bin/V2/SearchModelsCommand.php new file mode 100644 index 00000000..c4ff273e --- /dev/null +++ b/bin/V2/SearchModelsCommand.php @@ -0,0 +1,102 @@ + Available V2 model types. + */ + private const AVAILABLE_MODELS = ['extraction', 'crop', 'classification', 'ocr', 'split']; + + /** + * @return void Configure command options/arguments. + */ + protected function configure(): void + { + $this + ->setName('search-models') + ->setDescription('Search available models.') + ->addOption( + 'api-key', + 'k', + InputOption::VALUE_REQUIRED, + 'Mindee V2 API key. Falls back to the MINDEE_V2_API_KEY environment variable.' + ) + ->addOption( + 'name', + null, + InputOption::VALUE_REQUIRED, + 'Filter by model name partial match (case insensitive).' + ) + ->addOption( + 'model-type', + 'm', + InputOption::VALUE_REQUIRED, + "Filter by exact model type (case sensitive). Available options:\n - " + . implode("\n - ", self::AVAILABLE_MODELS) + ) + ->addOption( + 'raw-json', + 'r', + InputOption::VALUE_NONE, + 'Whether to output the raw JSON response.' + ); + } + + /** + * @param InputInterface $input CLI input. + * @param OutputInterface $output CLI output. + * @return integer Exit code. + */ + protected function execute(InputInterface $input, OutputInterface $output): int + { + $apiKey = $input->getOption('api-key'); + if (!$apiKey && !getenv('MINDEE_V2_API_KEY')) { + $output->writeln( + 'The Mindee V2 API key is missing. ' + . "Please provide it via the '--api-key' option or the MINDEE_V2_API_KEY environment variable." + ); + return Command::FAILURE; + } + + $name = $input->getOption('name'); + $modelType = $input->getOption('model-type'); + $raw = (bool) $input->getOption('raw-json'); + + $client = new Client($apiKey ?: null); + + try { + $response = $client->searchModels($name ?: null, $modelType ?: null); + } catch (MindeeV2HttpException $e) { + $output->writeln('' . $e->getMessage() . ''); + return Command::FAILURE; + } catch (Exception $e) { + $output->writeln("Something went wrong, '" . $e->getMessage() . "' was raised."); + return Command::FAILURE; + } + + if ($raw) { + $output->writeln($response->getRawHttp()); + } else { + $output->write((string) $response); + } + return Command::SUCCESS; + } +} diff --git a/bin/V2/SplitCommand.php b/bin/V2/SplitCommand.php new file mode 100644 index 00000000..c7bde490 --- /dev/null +++ b/bin/V2/SplitCommand.php @@ -0,0 +1,43 @@ +setDescription('Split utility.'); + parent::configure(); + } + + protected function getResponseClass(): string + { + return SplitResponse::class; + } + + protected function buildParameters( + InputInterface $input, + string $modelId, + ?string $alias + ): BaseParameters { + return new SplitParameters($modelId, $alias); + } +} diff --git a/bin/cli.php b/bin/cli.php index c64c6ea8..7ab44b10 100755 --- a/bin/cli.php +++ b/bin/cli.php @@ -5,20 +5,153 @@ namespace Mindee\Cli; require __DIR__ . '/../vendor/autoload.php'; +require_once __DIR__ . '/../src/version.php'; require __DIR__ . '/MindeeCliDocuments.php'; require __DIR__ . '/MindeeCliCommand.php'; +require __DIR__ . '/V2/BaseInferenceCommand.php'; +require __DIR__ . '/V2/ClassificationCommand.php'; +require __DIR__ . '/V2/CropCommand.php'; +require __DIR__ . '/V2/ExtractionCommand.php'; +require __DIR__ . '/V2/OcrCommand.php'; +require __DIR__ . '/V2/SplitCommand.php'; +require __DIR__ . '/V2/SearchModelsCommand.php'; -use Symfony\Component\Console\Application; use Exception; +use Mindee\Cli\V2\ClassificationCommand; +use Mindee\Cli\V2\CropCommand; +use Mindee\Cli\V2\ExtractionCommand; +use Mindee\Cli\V2\OcrCommand; +use Mindee\Cli\V2\SearchModelsCommand; +use Mindee\Cli\V2\SplitCommand; +use Symfony\Component\Console\Application; +use Symfony\Component\Console\Input\ArgvInput; +use Symfony\Component\Console\Input\InputOption; + +use function defined; +use function getenv; +use function in_array; + +/** + * Checks whether the CLI should display explicit error_log() output. + * + * @param array $argv CLI arguments. + * @return boolean True when the error log option or verbosity is present. + */ +function mindeeCliShouldDisplayErrorLog(array $argv): bool +{ + $shellVerbosity = getenv('SHELL_VERBOSITY'); + if ($shellVerbosity !== false && (int) $shellVerbosity > 0) { + return true; + } + + foreach ($argv as $arg) { + if ($arg === '--error-log' || $arg === '--verbose' || str_starts_with($arg, '-v')) { + return true; + } + } + + return false; +} + +/** + * Redirects explicit error_log() output away from stderr unless requested. + * + * @param boolean $displayErrorLog Whether error_log() output should be displayed. + */ +function mindeeCliConfigureErrorLog(bool $displayErrorLog): void +{ + if ($displayErrorLog) { + return; + } + + ini_set('error_log', stripos(PHP_OS, 'WIN') === 0 ? 'NUL' : '/dev/null'); +} + +/** + * Checks whether an argv token is a CLI-level option that does not consume a value. + * + * @param string $arg CLI argument. + * @return boolean True when the argument can be skipped before command dispatch. + */ +function mindeeCliIsGlobalOptionWithoutValue(string $arg): bool +{ + return $arg === '--error-log' || $arg === '--verbose' || str_starts_with($arg, '-v'); +} + +/** + * Rewrites argv for V1 backward compatibility. + * + * If the first non-global-option argument is not a registered top-level command + * (V2 inference commands, `search-models`, `v1`, or a Symfony built-in like + * `help`/`list`/`completion`) and is not an option, it is treated as a V1 + * product name and `v1` is inserted before it. This preserves the legacy + * `mindee ...` invocation shape while letting V2 commands run + * unmodified. + * + * @param array $argv Original argv array. + * @param array $knownTopLevelCommands Top-level commands not to rewrite. + * @return array Possibly rewritten argv array. + */ +function mindeeRewriteArgvForV1Compat(array $argv, array $knownTopLevelCommands): array +{ + $commandIndex = 1; + while (isset($argv[$commandIndex]) && mindeeCliIsGlobalOptionWithoutValue($argv[$commandIndex])) { + $commandIndex++; + } + + if (!isset($argv[$commandIndex])) { + return $argv; + } + $first = $argv[$commandIndex]; + if ($first === '' || $first[0] === '-') { + return $argv; + } + if (in_array($first, $knownTopLevelCommands, true)) { + return $argv; + } + array_splice($argv, $commandIndex, 0, ['v1']); + return $argv; +} + +$displayErrorLog = mindeeCliShouldDisplayErrorLog($_SERVER['argv']); +mindeeCliConfigureErrorLog($displayErrorLog); + +$cli = new Application('mindee', defined('Mindee\\VERSION') ? \Mindee\VERSION : 'unknown'); +$cli->getDefinition()->addOption(new InputOption( + 'error-log', + null, + InputOption::VALUE_NONE, + 'Display PHP error_log() output. Also enabled by verbose output.' +)); + +$v1Specs = MindeeCliDocuments::getSpecs(); +$v1Command = new MindeeCliCommand($v1Specs); +$cli->add($v1Command); + +$v2InferenceCommands = [ + new ClassificationCommand(), + new CropCommand(), + new ExtractionCommand(), + new OcrCommand(), + new SplitCommand(), +]; +foreach ($v2InferenceCommands as $command) { + $cli->add($command); +} +$cli->add(new SearchModelsCommand()); + +$knownTopLevelCommands = ['v1', 'search-models', 'list', 'help', 'completion']; +foreach ($v2InferenceCommands as $command) { + $knownTopLevelCommands[] = $command->getName(); +} + +$argv = mindeeRewriteArgvForV1Compat($_SERVER['argv'], $knownTopLevelCommands); -$cli = new Application(); -$mindeeCommand = new MindeeCliCommand(MindeeCliDocuments::getSpecs()); -$cli->add($mindeeCommand); try { - $cli->add($mindeeCommand); - $cli->setDefaultCommand($mindeeCommand->getName(), true); - $cli->run(); + $cli->run(new ArgvInput($argv)); } catch (Exception $e) { - error_log("Could not start the Mindee CLI, an exception was raised:"); - error_log($e->getMessage()); + if ($displayErrorLog) { + error_log('Could not start the Mindee CLI, an exception was raised:'); + error_log($e->getMessage()); + } } diff --git a/composer.json b/composer.json index 6cdf6d7e..2c38a14b 100644 --- a/composer.json +++ b/composer.json @@ -53,6 +53,8 @@ "test:unit": "phpunit -c tests/phpunit.xml", "test:functional": "phpunit -c tests/functional.xml", "test:smoke:v1": "./tests/test_v1_code_samples.sh", - "test:smoke:v2": "./tests/test_v2_code_samples.sh" + "test:smoke:v2": "./tests/test_v2_code_samples.sh", + "test:cli:v1": "./tests/test_v1_cli.sh", + "test:cli:v2": "./tests/test_v2_cli.sh" } } diff --git a/tests/V2/Cli/MindeeCliCommandV2Test.php b/tests/V2/Cli/MindeeCliCommandV2Test.php new file mode 100644 index 00000000..422b04b1 --- /dev/null +++ b/tests/V2/Cli/MindeeCliCommandV2Test.php @@ -0,0 +1,263 @@ +filePath = TestingUtilities::getFileTypesDir() . '/pdf/blank_1.pdf'; + } + + /** + * @return array Inference command name provider. + */ + public static function provideInferenceCommandNames(): iterable + { + return [ + 'classification' => ['classification'], + 'crop' => ['crop'], + 'extraction' => ['extraction'], + 'ocr' => ['ocr'], + 'split' => ['split'], + ]; + } + + public function testListShouldShowAllV2Commands(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest(['list']); + self::assertSame(0, $cmdOutput['code']); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('classification', $stdout); + self::assertStringContainsString('crop', $stdout); + self::assertStringContainsString('extraction', $stdout); + self::assertStringContainsString('ocr', $stdout); + self::assertStringContainsString('split', $stdout); + self::assertStringContainsString('search-models', $stdout); + self::assertStringContainsString('v1', $stdout); + } + + /** + * @dataProvider provideInferenceCommandNames + */ + public function testInferenceMissingModelIdMustFail(string $command): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [$command, $this->filePath, '-k', 'fake-key'], + ['MINDEE_V2_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + '--model-id', + implode("\n", $cmdOutput['output']), + "Command '$command' must complain about missing --model-id" + ); + } + + /** + * @dataProvider provideInferenceCommandNames + */ + public function testInferenceMissingApiKeyMustFail(string $command): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [$command, '-m', 'some-model-id', $this->filePath], + ['MINDEE_V2_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + 'API key is missing', + implode("\n", $cmdOutput['output']), + "Command '$command' must complain about missing API key" + ); + } + + /** + * @dataProvider provideInferenceCommandNames + */ + public function testInferenceInvalidPathMustFail(string $command): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [$command, '-m', 'some-model-id', '-k', 'fake-key', 'invalid-file-path'], + ['MINDEE_V2_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + "Invalid path or URL provided 'invalid-file-path'", + implode("\n", $cmdOutput['output']), + "Command '$command' must complain about an invalid path" + ); + } + + /** + * @dataProvider provideInferenceCommandNames + */ + public function testInferenceInvalidOutputTypeMustFail(string $command): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [$command, '-m', 'some-model-id', '-k', 'fake-key', '-o', 'garbage', $this->filePath], + ['MINDEE_V2_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + "Invalid output type 'garbage'", + implode("\n", $cmdOutput['output']), + "Command '$command' must complain about an invalid output type" + ); + } + + public function testExtractionExposesAllExtractionOptions(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest(['extraction', '--help']); + self::assertSame(0, $cmdOutput['code']); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('--rag', $stdout); + self::assertStringContainsString('--raw-text', $stdout); + self::assertStringContainsString('--confidence', $stdout); + self::assertStringContainsString('--polygon', $stdout); + self::assertStringContainsString('--text-context', $stdout); + self::assertStringContainsString('--alias', $stdout); + self::assertStringContainsString('--model-id', $stdout); + self::assertStringContainsString('--api-key', $stdout); + self::assertStringContainsString('--output', $stdout); + } + + /** + * Sibling V2 commands must NOT expose extraction-only options. + * + * @return array Non-extraction inference commands. + */ + public static function provideNonExtractionCommandsHideExtractionOnlyOptionsCases(): iterable + { + return [ + 'classification' => ['classification'], + 'crop' => ['crop'], + 'ocr' => ['ocr'], + 'split' => ['split'], + ]; + } + + /** + * @dataProvider provideNonExtractionCommandsHideExtractionOnlyOptionsCases + */ + public function testNonExtractionCommandsHideExtractionOnlyOptions(string $command): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest([$command, '--help']); + self::assertSame(0, $cmdOutput['code']); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringNotContainsString('--rag', $stdout); + self::assertStringNotContainsString('--raw-text', $stdout); + self::assertStringNotContainsString('--confidence', $stdout); + self::assertStringNotContainsString('--polygon', $stdout); + self::assertStringNotContainsString('--text-context', $stdout); + // Common options must still be present. + self::assertStringContainsString('--alias', $stdout); + self::assertStringContainsString('--model-id', $stdout); + self::assertStringContainsString('--api-key', $stdout); + self::assertStringContainsString('--output', $stdout); + } + + public function testSearchModelsHelpExposesExpectedOptions(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest(['search-models', '--help']); + self::assertSame(0, $cmdOutput['code']); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('--api-key', $stdout); + self::assertStringContainsString('--name', $stdout); + self::assertStringContainsString('--model-type', $stdout); + self::assertStringContainsString('--raw-json', $stdout); + } + + public function testSearchModelsMissingApiKeyMustFail(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['search-models'], + ['MINDEE_V2_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + 'API key is missing', + implode("\n", $cmdOutput['output']) + ); + } + + public function testV1BackwardCompatibilityDispatch(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['financial-document', 'invalid-file-path', '-k', 'fake-key', '-D'], + ['MINDEE_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + "Invalid path or url provided 'invalid-file-path'", + implode("\n", $cmdOutput['output']), + 'Legacy `mindee ...` dispatch must keep working' + ); + } + + public function testCliHidesErrorLogOutputByDefault(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['financial-document', $this->filePath, '-k', 'fake-key', '-D'], + ['MINDEE_API_KEY' => false] + ); + self::assertSame(0, $cmdOutput['code']); + self::assertStringNotContainsString( + 'PHP Warning', + implode("\n", $cmdOutput['output']) + ); + } + + public function testCliDisplaysErrorLogOutputWithVerbosity(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['-v', 'financial-document', $this->filePath, '-k', 'fake-key', '-D'], + ['MINDEE_API_KEY' => false] + ); + self::assertSame(0, $cmdOutput['code']); + self::assertStringContainsString( + 'PHP Warning', + implode("\n", $cmdOutput['output']) + ); + } + + public function testCliDisplaysErrorLogOutputWhenRequested(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['--error-log', 'financial-document', $this->filePath, '-k', 'fake-key', '-D'], + ['MINDEE_API_KEY' => false] + ); + self::assertSame(0, $cmdOutput['code']); + self::assertStringContainsString( + 'PHP Warning', + implode("\n", $cmdOutput['output']) + ); + } + + public function testV1ExplicitGroupInvocation(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['v1', 'financial-document', 'invalid-file-path', '-k', 'fake-key', '-D'], + ['MINDEE_API_KEY' => false] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertStringContainsString( + "Invalid path or url provided 'invalid-file-path'", + implode("\n", $cmdOutput['output']) + ); + } +} diff --git a/tests/V2/Cli/MindeeCliCommandV2TestFunctional.php b/tests/V2/Cli/MindeeCliCommandV2TestFunctional.php new file mode 100644 index 00000000..1447f01a --- /dev/null +++ b/tests/V2/Cli/MindeeCliCommandV2TestFunctional.php @@ -0,0 +1,164 @@ +filePath = TestingUtilities::getFileTypesDir() . '/pdf/blank_1.pdf'; + $this->apiKey = (string) getenv('MINDEE_V2_API_KEY'); + if ($this->apiKey === '') { + self::markTestSkipped('MINDEE_V2_API_KEY is not set; skipping V2 functional CLI tests.'); + } + } + + /** + * @return iterable}> + * Tuples of [command name, env var holding the model ID, additional args]. + */ + public static function provideInferenceCommandSummaryOutputCases(): iterable + { + yield ['classification', 'MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID', []]; + yield ['crop', 'MINDEE_V2_SE_TESTS_CROP_MODEL_ID', []]; + yield ['extraction', 'MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID', []]; + yield ['ocr', 'MINDEE_V2_SE_TESTS_OCR_MODEL_ID', []]; + yield ['split', 'MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID', []]; + } + + /** + * @dataProvider provideInferenceCommandSummaryOutputCases + */ + public function testInferenceCommandSummaryOutput(string $command, string $modelEnv, array $extra): void + { + $modelId = (string) getenv($modelEnv); + if ($modelId === '') { + self::markTestSkipped("$modelEnv is not set; skipping $command CLI test."); + } + + $args = array_merge( + [$command, '-m', $modelId, '-k', $this->apiKey], + $extra, + [$this->filePath] + ); + $cmdOutput = MindeeCliV2TestingUtilities::executeTest($args); + self::assertSame( + 0, + $cmdOutput['code'], + "$command summary call must succeed:\n" . implode("\n", $cmdOutput['output']) + ); + self::assertNotEmpty($cmdOutput['output'], "$command must produce output"); + } + + public function testExtractionFullOutputWithRawText(): void + { + $modelId = (string) getenv('MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID'); + if ($modelId === '') { + self::markTestSkipped('MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID is not set.'); + } + + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [ + 'extraction', + '-m', $modelId, + '-k', $this->apiKey, + '-r', + '-o', 'full', + $this->filePath, + ] + ); + self::assertSame( + 0, + $cmdOutput['code'], + "extraction full call must succeed:\n" . implode("\n", $cmdOutput['output']) + ); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('Inference', $stdout); + } + + public function testExtractionRawJsonOutput(): void + { + $modelId = (string) getenv('MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID'); + if ($modelId === '') { + self::markTestSkipped('MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID is not set.'); + } + + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [ + 'extraction', + '-m', $modelId, + '-k', $this->apiKey, + '-o', 'raw', + $this->filePath, + ] + ); + self::assertSame( + 0, + $cmdOutput['code'], + "extraction raw call must succeed:\n" . implode("\n", $cmdOutput['output']) + ); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('"inference"', $stdout, 'Raw JSON output must contain "inference"'); + } + + public function testSearchModelsHumanReadableOutput(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['search-models', '-k', $this->apiKey] + ); + self::assertSame( + 0, + $cmdOutput['code'], + "search-models must succeed:\n" . implode("\n", $cmdOutput['output']) + ); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('Models', $stdout); + self::assertStringContainsString('Pagination', $stdout); + } + + public function testSearchModelsRawJsonOutput(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + ['search-models', '-k', $this->apiKey, '-r'] + ); + self::assertSame( + 0, + $cmdOutput['code'], + "search-models raw must succeed:\n" . implode("\n", $cmdOutput['output']) + ); + $stdout = implode("\n", $cmdOutput['output']); + self::assertStringContainsString('"models"', $stdout); + self::assertStringContainsString('"pagination"', $stdout); + } + + public function testInferenceWithInvalidApiKeyMustFail(): void + { + $cmdOutput = MindeeCliV2TestingUtilities::executeTest( + [ + 'extraction', + '-m', 'fc405e37-4ba4-4d03-aeba-533a8d1f0f21', + '-k', 'invalid-api-key', + $this->filePath, + ] + ); + self::assertSame(1, $cmdOutput['code']); + self::assertNotEmpty($cmdOutput['output']); + } +} diff --git a/tests/V2/Cli/MindeeCliV2TestingUtilities.php b/tests/V2/Cli/MindeeCliV2TestingUtilities.php new file mode 100644 index 00000000..7ba255fd --- /dev/null +++ b/tests/V2/Cli/MindeeCliV2TestingUtilities.php @@ -0,0 +1,49 @@ + $args CLI arguments (each entry is shell-escaped before execution). + * @param array $envOverrides Environment variables to set/unset for the call. + * Use `false` to unset a variable. + * @return array{output: array, code: int} Output lines and exit code. + */ + public static function executeTest(array $args, array $envOverrides = []): array + { + $resCode = 0; + $output = []; + + $previousEnv = []; + foreach ($envOverrides as $key => $value) { + $previousEnv[$key] = getenv($key); + if ($value === false) { + putenv($key); + } else { + putenv($key . '=' . (string) $value); + } + } + + try { + $escaped = array_map(escapeshellarg(...), $args); + $cliPath = escapeshellarg(__DIR__ . '/../../../bin/cli.php'); + $cmd = PHP_BINARY . ' ' . $cliPath . ' ' . implode(' ', $escaped) . ' 2>&1'; + exec($cmd, $output, $resCode); + } finally { + foreach ($previousEnv as $key => $prev) { + if ($prev === false) { + putenv($key); + } else { + putenv($key . '=' . $prev); + } + } + } + + return ['output' => $output, 'code' => $resCode]; + } +} diff --git a/tests/test_v1_cli.sh b/tests/test_v1_cli.sh new file mode 100755 index 00000000..f13937c4 --- /dev/null +++ b/tests/test_v1_cli.sh @@ -0,0 +1,31 @@ +#!/bin/sh +set -e + +TEST_FILE=$1 + +if [ -z "$TEST_FILE" ]; then + TEST_FILE='./tests/resources/file_types/pdf/blank_1.pdf' +fi +echo "TEST_FILE: ${TEST_FILE}" + +CLI_PATH="./bin/cli.php" +echo "CLI_PATH: ${CLI_PATH}" + +PRODUCTS="financial-document receipt invoice invoice-splitter" +PRODUCTS_SIZE=4 +i=1 + +for product in $PRODUCTS +do + echo "--- Test $product with Summary Output ($i/$PRODUCTS_SIZE) ---" + SUMMARY_OUTPUT=$(php "$CLI_PATH" v1 "$product" "$TEST_FILE") + if [ -z "$SUMMARY_OUTPUT" ]; then + echo "Error: no $product output" + exit 1 + fi + echo "$SUMMARY_OUTPUT" + echo "" + echo "" + sleep 0.5 + i=$((i + 1)) +done diff --git a/tests/test_v2_cli.sh b/tests/test_v2_cli.sh new file mode 100755 index 00000000..54adaa2c --- /dev/null +++ b/tests/test_v2_cli.sh @@ -0,0 +1,48 @@ +#!/bin/sh +set -e + +TEST_FILE=$1 + +if [ -z "$TEST_FILE" ]; then + TEST_FILE='./tests/resources/file_types/pdf/blank_1.pdf' +fi +echo "TEST_FILE: ${TEST_FILE}" + +CLI_PATH="./bin/cli.php" +echo "CLI_PATH: ${CLI_PATH}" + +echo "--- Test model list retrieval" +MODELS=$(php "$CLI_PATH" search-models) +if [ -z "$MODELS" ]; then + echo "Error: no models found" + exit 1 +else + echo "Models retrieval OK" +fi + +run_test() { + model_id="$1" + model_type="$2" + + if [ -z "$model_id" ]; then + echo "Error: missing model id for $model_type (environment variable not set)" + exit 1 + fi + + echo "--- Test $model_type ID: $model_id" + SUMMARY_OUTPUT=$(php "$CLI_PATH" "$model_type" -m "$model_id" "$TEST_FILE") + if [ -z "$SUMMARY_OUTPUT" ]; then + echo "Error: no $model_type output" + exit 1 + fi + echo "$SUMMARY_OUTPUT" + echo "" + echo "" + sleep 0.5 +} + +run_test "$MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID" "extraction" +run_test "$MINDEE_V2_SE_TESTS_CROP_MODEL_ID" "crop" +run_test "$MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID" "split" +run_test "$MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID" "classification" +run_test "$MINDEE_V2_SE_TESTS_OCR_MODEL_ID" "ocr"