diff --git a/.github/workflows/_test-cli.yml b/.github/workflows/_test-cli.yml new file mode 100644 index 000000000..43fb74dae --- /dev/null +++ b/.github/workflows/_test-cli.yml @@ -0,0 +1,52 @@ +name: Test Command Line Interface + +on: + workflow_call: + workflow_dispatch: + +permissions: + contents: read + +env: + MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} + MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} + MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID }} + MINDEE_V2_SE_TESTS_CROP_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_CROP_MODEL_ID }} + MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }} + MINDEE_V2_SE_TESTS_OCR_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_OCR_MODEL_ID }} + +jobs: + test: + name: Run CLI tests + timeout-minutes: 30 + runs-on: ubuntu-latest + + strategy: + matrix: + java-version: + - "25" + - "11" + distribution: + - "temurin" + + steps: + - uses: actions/checkout@v5 + with: + submodules: recursive + + - name: Set up JDK ${{ matrix.java-version }} + uses: actions/setup-java@v5 + with: + java-version: ${{ matrix.java-version }} + distribution: ${{ matrix.distribution }} + cache: "maven" + + - name: Build JAR + run: mvn package -DskipTests --no-transfer-progress + + - name: Test V1 CLI + run: ./tests/test_v1_cli.sh ./src/test/resources/file_types/pdf/blank_1.pdf + + - name: Test V2 CLI + run: ./tests/test_v2_cli.sh ./src/test/resources/file_types/pdf/blank_1.pdf diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index f263b0f0f..c9adc5804 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -4,9 +4,15 @@ on: schedule: - cron: '33 0 * * *' +permissions: + contents: read + jobs: codeql: uses: mindee/mindee-api-java/.github/workflows/_codeql.yml@main test_code_samples: uses: mindee/mindee-api-java/.github/workflows/_test-code-samples.yml@main secrets: inherit + test_cli: + uses: mindee/mindee-api-java/.github/workflows/_test-cli.yml@main + secrets: inherit diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 6d5aa856a..e2a94165d 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -29,3 +29,7 @@ jobs: uses: ./.github/workflows/_test-code-samples.yml needs: build secrets: inherit + test_cli: + uses: ./.github/workflows/_test-cli.yml + needs: build + secrets: inherit diff --git a/cli.sh b/cli.sh index 58652d86e..4a8febd8c 100755 --- a/cli.sh +++ b/cli.sh @@ -5,6 +5,6 @@ if ! ls ./target/libs/picocli-* 2>&1 >/dev/null; then mvn dependency:copy-dependencies -DoutputDirectory=target/libs -Dhttps.protocols=TLSv1.2 fi -VERSION=$(grep -m1 -o -P '(?<=\)[0-9.]*(?!/<\/version>)' pom.xml) +VERSION=$(grep -m1 -o -P '(?<=)[^<]+' pom.xml) java -cp "target/mindee-api-java-${VERSION}.jar:target/libs/*" com.mindee.CommandLineInterface "$@" diff --git a/src/main/java/com/mindee/CommandLineInterface.java b/src/main/java/com/mindee/CommandLineInterface.java new file mode 100644 index 000000000..85594c3a5 --- /dev/null +++ b/src/main/java/com/mindee/CommandLineInterface.java @@ -0,0 +1,77 @@ +package com.mindee; + +import com.mindee.v1.cli.CommandLineInterfaceProducts; +import com.mindee.v1.cli.ProductProcessor; +import com.mindee.v2.cli.ClassificationCommand; +import com.mindee.v2.cli.CropCommand; +import com.mindee.v2.cli.ExtractionCommand; +import com.mindee.v2.cli.OcrCommand; +import com.mindee.v2.cli.SearchModelsCommand; +import com.mindee.v2.cli.SplitCommand; +import java.lang.reflect.Method; +import picocli.CommandLine; +import picocli.CommandLine.Command; +import picocli.CommandLine.Model.CommandSpec; +import picocli.CommandLine.Spec; + +/** + * Top-level Mindee CLI entry point. + *

+ * V2 commands (search-models, classification, crop, extraction, ocr, split) are available at the + * root level. V1 product commands are available under the {@code v1} subcommand. + */ +@Command( + name = "mindee", + description = "Mindee API client", + mixinStandardHelpOptions = true, + subcommands = { CommandLine.HelpCommand.class } +) +public class CommandLineInterface implements Runnable { + + @Spec + private CommandSpec spec; + + /** + * Main entry point for the Mindee CLI. + * + * @param args command-line arguments + */ + public static void main(String[] args) { + CommandLineInterface root = new CommandLineInterface(); + CommandLine rootCmd = new CommandLine(root); + + // V2 commands at root + rootCmd.addSubcommand("search-models", new CommandLine(new SearchModelsCommand())); + rootCmd.addSubcommand("classification", new CommandLine(new ClassificationCommand())); + rootCmd.addSubcommand("crop", new CommandLine(new CropCommand())); + rootCmd.addSubcommand("extraction", new CommandLine(new ExtractionCommand())); + rootCmd.addSubcommand("ocr", new CommandLine(new OcrCommand())); + rootCmd.addSubcommand("split", new CommandLine(new SplitCommand())); + + // V1 commands under the "v1" subcommand + var v1Cli = new com.mindee.v1.CommandLineInterface(); + var v1Cmd = new CommandLine(v1Cli); + v1Cmd.getCommandSpec().name("v1"); + v1Cmd.getCommandSpec().usageMessage().description("Mindee V1 product commands."); + var products = new CommandLineInterfaceProducts((ProductProcessor) v1Cli); + for (Method method : CommandLineInterfaceProducts.class.getDeclaredMethods()) { + if (method.isAnnotationPresent(CommandLine.Command.class)) { + CommandLine.Command annotation = method.getAnnotation(CommandLine.Command.class); + String subcommandName = annotation.name(); + var subCmd = new CommandLine( + new com.mindee.v1.CommandLineInterface.ProductCommandHandler(products, method) + ); + subCmd.getCommandSpec().usageMessage().description(annotation.description()); + v1Cmd.addSubcommand(subcommandName, subCmd); + } + } + rootCmd.addSubcommand("v1", v1Cmd); + + System.exit(rootCmd.execute(args)); + } + + @Override + public void run() { + spec.commandLine().usage(System.out); + } +} diff --git a/src/main/java/com/mindee/v2/cli/BaseInferenceCommand.java b/src/main/java/com/mindee/v2/cli/BaseInferenceCommand.java new file mode 100644 index 000000000..3db9cc04c --- /dev/null +++ b/src/main/java/com/mindee/v2/cli/BaseInferenceCommand.java @@ -0,0 +1,105 @@ +package com.mindee.v2.cli; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.mindee.input.LocalInputSource; +import com.mindee.v2.MindeeClient; +import com.mindee.v2.parsing.CommonResponse; +import java.io.File; +import java.util.concurrent.Callable; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; + +/** + * Abstract base class for V2 inference CLI commands. + * Handles common options (path, model-id, api-key, alias, output) and output formatting. + */ +public abstract class BaseInferenceCommand implements Callable { + + @Parameters(index = "0", paramLabel = "", description = "The path of the file to parse") + protected File file; + + @Option(names = { "-m", "--model-id" }, description = "ID of the model to use", required = true) + protected String modelId; + + @Option(names = { "-k", "--api-key" }, description = "Mindee V2 API key.") + protected String apiKey; + + @Option(names = { "-a", "--alias" }, description = "Alias for the file") + protected String alias; + + /** Output format for the command. */ + public enum OutputType { + Summary, + Full, + Raw + } + + @Option( + names = { "-o", "--output" }, + description = "Specify how to output the data.\n" + + "- summary: a basic summary (default)\n" + + "- full: detail extraction results, including options\n" + + "- raw: full JSON object", + defaultValue = "Summary" + ) + protected OutputType output; + + /** + * Executes the inference request and returns the product response. + * + * @param client the V2 Mindee client + * @param inputSource the prepared local input source + * @return the product response + * @throws Exception on IO or API error + */ + protected abstract CommonResponse executeRequest( + MindeeClient client, + LocalInputSource inputSource + ) throws Exception; + + /** + * Returns the summary (result-only) string for the given response. + * Override in each product command. + * + * @param response the product response + * @return the summary string + */ + protected abstract String getSummary(CommonResponse response); + + /** + * Returns the full (inference + options + result) string for the given response. + * Defaults to the same as {@link #getSummary}; override for richer output. + * + * @param response the product response + * @return the full output string + */ + protected String getFullOutput(CommonResponse response) { + return getSummary(response); + } + + @Override + public Integer call() throws Exception { + var client = new MindeeClient(apiKey != null ? apiKey : ""); + var inputSource = new LocalInputSource(file); + var response = executeRequest(client, inputSource); + printOutput(response); + return 0; + } + + private void printOutput(CommonResponse response) throws Exception { + switch (output) { + case Full: + System.out.println(getFullOutput(response)); + break; + case Raw: + var mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT); + var jsonNode = mapper.readTree(response.getRawResponse()); + System.out.println(mapper.writeValueAsString(jsonNode)); + break; + default: + System.out.println(getSummary(response)); + break; + } + } +} diff --git a/src/main/java/com/mindee/v2/cli/ClassificationCommand.java b/src/main/java/com/mindee/v2/cli/ClassificationCommand.java new file mode 100644 index 000000000..1f7844389 --- /dev/null +++ b/src/main/java/com/mindee/v2/cli/ClassificationCommand.java @@ -0,0 +1,42 @@ +package com.mindee.v2.cli; + +import com.mindee.input.LocalInputSource; +import com.mindee.v2.MindeeClient; +import com.mindee.v2.parsing.CommonResponse; +import com.mindee.v2.product.classification.ClassificationResponse; +import com.mindee.v2.product.classification.params.ClassificationParameters; +import picocli.CommandLine.Command; + +/** + * CLI command for the V2 classification utility. + */ +@Command( + name = "classification", + description = "Classification utility.", + mixinStandardHelpOptions = true +) +public class ClassificationCommand extends BaseInferenceCommand { + + @Override + protected CommonResponse executeRequest( + MindeeClient client, + LocalInputSource inputSource + ) throws Exception { + return client + .enqueueAndGetResult( + ClassificationResponse.class, + inputSource, + ClassificationParameters.builder(modelId).alias(alias).build() + ); + } + + @Override + protected String getSummary(CommonResponse response) { + return ((ClassificationResponse) response).getInference().getResult().toString(); + } + + @Override + protected String getFullOutput(CommonResponse response) { + return ((ClassificationResponse) response).getInference().toString(); + } +} diff --git a/src/main/java/com/mindee/v2/cli/CropCommand.java b/src/main/java/com/mindee/v2/cli/CropCommand.java new file mode 100644 index 000000000..2431f7bb5 --- /dev/null +++ b/src/main/java/com/mindee/v2/cli/CropCommand.java @@ -0,0 +1,38 @@ +package com.mindee.v2.cli; + +import com.mindee.input.LocalInputSource; +import com.mindee.v2.MindeeClient; +import com.mindee.v2.parsing.CommonResponse; +import com.mindee.v2.product.crop.CropResponse; +import com.mindee.v2.product.crop.params.CropParameters; +import picocli.CommandLine.Command; + +/** + * CLI command for the V2 crop utility. + */ +@Command(name = "crop", description = "Crop utility.", mixinStandardHelpOptions = true) +public class CropCommand extends BaseInferenceCommand { + + @Override + protected CommonResponse executeRequest( + MindeeClient client, + LocalInputSource inputSource + ) throws Exception { + return client + .enqueueAndGetResult( + CropResponse.class, + inputSource, + CropParameters.builder(modelId).alias(alias).build() + ); + } + + @Override + protected String getSummary(CommonResponse response) { + return ((CropResponse) response).getInference().getResult().toString(); + } + + @Override + protected String getFullOutput(CommonResponse response) { + return ((CropResponse) response).getInference().toString(); + } +} diff --git a/src/main/java/com/mindee/v2/cli/ExtractionCommand.java b/src/main/java/com/mindee/v2/cli/ExtractionCommand.java new file mode 100644 index 000000000..8b11fc949 --- /dev/null +++ b/src/main/java/com/mindee/v2/cli/ExtractionCommand.java @@ -0,0 +1,107 @@ +package com.mindee.v2.cli; + +import com.mindee.input.LocalInputSource; +import com.mindee.v2.MindeeClient; +import com.mindee.v2.parsing.CommonResponse; +import com.mindee.v2.product.extraction.ExtractionInference; +import com.mindee.v2.product.extraction.ExtractionResponse; +import com.mindee.v2.product.extraction.params.ExtractionParameters; +import java.util.StringJoiner; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; + +/** + * CLI command for the V2 generic all-purpose extraction utility. + */ +@Command( + name = "extraction", + description = "Generic all-purpose extraction.", + mixinStandardHelpOptions = true +) +public class ExtractionCommand extends BaseInferenceCommand { + + @Option( + names = { "-g", "--rag" }, + description = "Enable RAG context. Only valid for 'extraction' product.", + defaultValue = "false" + ) + private boolean rag; + + @Option( + names = { "-r", "--raw-text" }, + description = "To get all the words in the current document. False by default.", + defaultValue = "false" + ) + private boolean rawText; + + @Option( + names = { "-c", "--confidence" }, + description = "To retrieve confidence scores from the extraction. False by default.", + defaultValue = "false" + ) + private boolean confidence; + + @Option( + names = { "-p", "--polygon" }, + description = "To retrieve bounding boxes from the extraction. False by default.", + defaultValue = "false" + ) + private boolean polygon; + + @Option( + names = { "-t", "--text-context" }, + description = "To add text context to your API call. False by default." + ) + private String textContext; + + @Override + protected CommonResponse executeRequest( + MindeeClient client, + LocalInputSource inputSource + ) throws Exception { + return client + .enqueueAndGetResult( + ExtractionResponse.class, + inputSource, + ExtractionParameters + .builder(modelId) + .alias(alias) + .rag(rag) + .rawText(rawText) + .confidence(confidence) + .polygon(polygon) + .textContext(textContext) + .build() + ); + } + + @Override + protected String getSummary(CommonResponse response) { + return ((ExtractionResponse) response).getInference().getResult().toString(); + } + + @Override + protected String getFullOutput(CommonResponse response) { + ExtractionInference inference = ((ExtractionResponse) response).getInference(); + var joiner = new StringJoiner("\n"); + + if ( + rawText + && inference.getActiveOptions().getRawText() + && inference.getResult().getRawText() != null + ) { + joiner.add("#############\nRaw Text\n#############\n::"); + var rawTextStr = inference.getResult().getRawText().toString().replace("\n", "\n "); + joiner.add(" " + rawTextStr + "\n"); + } + + if (rag && inference.getActiveOptions().getRag() && inference.getResult().getRag() != null) { + joiner.add("#############\nRetrieval-Augmented Generation\n#############\n::"); + var ragStr = inference.getResult().getRag().toString().replace("\n", "\n "); + joiner.add(" " + ragStr + "\n"); + } + + joiner.add(inference.toString()); + return joiner.toString(); + } +} diff --git a/src/main/java/com/mindee/v2/cli/OcrCommand.java b/src/main/java/com/mindee/v2/cli/OcrCommand.java new file mode 100644 index 000000000..14664b281 --- /dev/null +++ b/src/main/java/com/mindee/v2/cli/OcrCommand.java @@ -0,0 +1,38 @@ +package com.mindee.v2.cli; + +import com.mindee.input.LocalInputSource; +import com.mindee.v2.MindeeClient; +import com.mindee.v2.parsing.CommonResponse; +import com.mindee.v2.product.ocr.OcrResponse; +import com.mindee.v2.product.ocr.params.OcrParameters; +import picocli.CommandLine.Command; + +/** + * CLI command for the V2 OCR utility. + */ +@Command(name = "ocr", description = "OCR utility.", mixinStandardHelpOptions = true) +public class OcrCommand extends BaseInferenceCommand { + + @Override + protected CommonResponse executeRequest( + MindeeClient client, + LocalInputSource inputSource + ) throws Exception { + return client + .enqueueAndGetResult( + OcrResponse.class, + inputSource, + OcrParameters.builder(modelId).alias(alias).build() + ); + } + + @Override + protected String getSummary(CommonResponse response) { + return ((OcrResponse) response).getInference().getResult().toString(); + } + + @Override + protected String getFullOutput(CommonResponse response) { + return ((OcrResponse) response).getInference().toString(); + } +} diff --git a/src/main/java/com/mindee/v2/cli/SearchModelsCommand.java b/src/main/java/com/mindee/v2/cli/SearchModelsCommand.java new file mode 100644 index 000000000..f40979226 --- /dev/null +++ b/src/main/java/com/mindee/v2/cli/SearchModelsCommand.java @@ -0,0 +1,55 @@ +package com.mindee.v2.cli; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.mindee.v2.MindeeClient; +import java.util.concurrent.Callable; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; + +/** + * CLI command for searching available V2 models. + */ +@Command( + name = "search-models", + description = "Search available models.", + mixinStandardHelpOptions = true +) +public class SearchModelsCommand implements Callable { + + @Option(names = { "-k", "--api-key" }, description = "Mindee V2 API key.") + private String apiKey; + + @Option( + names = { "-n", "--name" }, + description = "Filter by model name partial match (case insensitive)." + ) + private String name; + + @Option( + names = { "-m", "--model-type" }, + description = "Filter by exact model type (case sensitive)." + ) + private String modelType; + + @Option( + names = { "-r", "--raw-json" }, + description = "Whether to output the raw JSON response.", + defaultValue = "false" + ) + private boolean rawJson; + + @Override + public Integer call() throws Exception { + var client = new MindeeClient(apiKey != null ? apiKey : ""); + var response = client.searchModels(name, modelType); + if (rawJson) { + var mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT); + var jsonNode = mapper.readTree(response.getRawResponse()); + System.out.println(mapper.writeValueAsString(jsonNode)); + } else { + System.out.println(response); + } + return 0; + } +} diff --git a/src/main/java/com/mindee/v2/cli/SplitCommand.java b/src/main/java/com/mindee/v2/cli/SplitCommand.java new file mode 100644 index 000000000..a8185f962 --- /dev/null +++ b/src/main/java/com/mindee/v2/cli/SplitCommand.java @@ -0,0 +1,38 @@ +package com.mindee.v2.cli; + +import com.mindee.input.LocalInputSource; +import com.mindee.v2.MindeeClient; +import com.mindee.v2.parsing.CommonResponse; +import com.mindee.v2.product.split.SplitResponse; +import com.mindee.v2.product.split.params.SplitParameters; +import picocli.CommandLine.Command; + +/** + * CLI command for the V2 split utility. + */ +@Command(name = "split", description = "Split utility.", mixinStandardHelpOptions = true) +public class SplitCommand extends BaseInferenceCommand { + + @Override + protected CommonResponse executeRequest( + MindeeClient client, + LocalInputSource inputSource + ) throws Exception { + return client + .enqueueAndGetResult( + SplitResponse.class, + inputSource, + SplitParameters.builder(modelId).alias(alias).build() + ); + } + + @Override + protected String getSummary(CommonResponse response) { + return ((SplitResponse) response).getInference().getResult().toString(); + } + + @Override + protected String getFullOutput(CommonResponse response) { + return ((SplitResponse) response).getInference().toString(); + } +} diff --git a/src/main/java/com/mindee/v2/http/MindeeHttpApiV2.java b/src/main/java/com/mindee/v2/http/MindeeHttpApiV2.java index c048655ba..890ca2c5c 100644 --- a/src/main/java/com/mindee/v2/http/MindeeHttpApiV2.java +++ b/src/main/java/com/mindee/v2/http/MindeeHttpApiV2.java @@ -149,7 +149,7 @@ public SearchResponse reqGetSearchModels(String modelName, String modelType) { url.addParameter("name", modelName); } if (modelType != null) { - url.addParameter("type", modelType); + url.addParameter("model_type", modelType); } var get = new HttpGet(url.toString()); return executeAPIRequest(get, SearchResponse.class); diff --git a/tests/test_v1_cli.sh b/tests/test_v1_cli.sh new file mode 100755 index 000000000..f4d40c0f2 --- /dev/null +++ b/tests/test_v1_cli.sh @@ -0,0 +1,24 @@ +#!/bin/sh +set -e + +TEST_FILE=$1 + +if [ -z "$TEST_FILE" ]; then + TEST_FILE='./src/test/resources/file_types/pdf/blank_1.pdf' +fi +echo "TEST_FILE: ${TEST_FILE}" + +PRODUCTS="financial-document receipt invoice invoice-splitter" +PRODUCTS_SIZE=4 +i=1 + +for product in $PRODUCTS +do + echo "--- Test $product with Summary Output ($i/$PRODUCTS_SIZE) ---" + SUMMARY_OUTPUT=$(./cli.sh v1 "$product" "$TEST_FILE") + echo "$SUMMARY_OUTPUT" + echo "" + echo "" + sleep 0.5 + i=$((i + 1)) +done diff --git a/tests/test_v2_cli.sh b/tests/test_v2_cli.sh new file mode 100755 index 000000000..fed84061e --- /dev/null +++ b/tests/test_v2_cli.sh @@ -0,0 +1,36 @@ +#!/bin/sh +set -e + +TEST_FILE=$1 + +if [ -z "$TEST_FILE" ]; then + TEST_FILE='./src/test/resources/file_types/pdf/blank_1.pdf' +fi +echo "TEST_FILE: ${TEST_FILE}" + +echo "--- Test model list retrieval" +MODELS=$(./cli.sh search-models) +if [ -z "$MODELS" ]; then + echo "Error: no models found" + exit 1 +else + echo "Models retrieval OK" +fi + +run_test() { + model_id="$1" + model_type="$2" + + echo "--- Test $model_type ID: $model_id" + SUMMARY_OUTPUT=$(./cli.sh "$model_type" -m "$model_id" "$TEST_FILE") + echo "$SUMMARY_OUTPUT" + echo "" + echo "" + sleep 0.5 +} + +run_test "$MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID" "extraction" +run_test "$MINDEE_V2_SE_TESTS_CROP_MODEL_ID" "crop" +run_test "$MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID" "split" +run_test "$MINDEE_V2_SE_TESTS_CLASSIFICATION_MODEL_ID" "classification" +run_test "$MINDEE_V2_SE_TESTS_OCR_MODEL_ID" "ocr"