diff --git a/.agents/skills/agent-platform-deploy/SKILL.md b/.agents/skills/agent-platform-deploy/SKILL.md new file mode 100644 index 0000000..4567ee3 --- /dev/null +++ b/.agents/skills/agent-platform-deploy/SKILL.md @@ -0,0 +1,320 @@ +--- +name: agent-platform-deploy +description: >- + Deploy open models or custom weights from Model Garden to Agent Platform + endpoints, check deployment status, verify serving endpoints, or clean up + resources by undeploying models and deleting endpoints. Use when asked to + deploy models on Agent Platform, list available Model Garden models, check if + a model is deployable, query deployment cost, troubleshoot deployment errors + (like quota limits), or undeploy/clean up endpoints. Also use when copying + and deploying a 1P Tuned Model. Don't use for public Vertex AI deployments + (use the `vertex-deploy` skill) or for running model evaluations (use the + `agent-platform-eval` skill). +--- + +# Agent Platform Model Garden Deploy Skill + +This skill provides instructions for deploying Open Models from Agent Platform +Model Garden to endpoints, and subsequently undeploying them to clean up +resources. + +## 1P Tuned Model Copy & Deployment + +If you need to copy a **1P (First-Party) Tuned Model** from a source project to a destination region or project and deploy it to a newly created endpoint, refer to the [1P Tuned Model Copy & Deployment Guide](references/copy_deploy_guide.md). + +## Safety & Confirmation Tiers (CRITICAL) + +Before executing any commands on behalf of the user, you MUST adhere to the +following safety tiers based on the action requested: + +1. **Tier R: Read-only (`list`, `describe`, `list-deployment-config`)** + * **Rule**: No confirmation needed. You may execute these commands immediately to gather information for the user. +2. **Tier M: Mutating & Reversible (`deploy`, `undeploy-model`)** + * **Rule**: This requires explicit user confirmation. You MUST present a + clear confirmation prompt to the user explaining the proposed command. + You MUST wait for their explicit confirmation before executing. For + `undeploy-model`, you MUST first verify that the endpoint and deployed + model exist; if `describe` or `list` returns a 404 or empty result, you + MUST halt and inform the user rather than attempting undeployment. +3. **Tier D: Destructive & Irreversible (`delete`)** + * **Rule**: This requires **explicit typed confirmation**. You MUST output + a text message explaining the irreversible nature of endpoint or model + deletion and asking the user to type "I confirm" or "Yes, delete it" + before executing the deletion command. + +## 1. Prerequisites + +Before deploying, ensure you have the correct project and region set. The +commands below use placeholder variables `PROJECT_ID` and `LOCATION_ID`. + +Ensure you are authenticated: + +```bash +gcloud auth login +gcloud auth application-default login +gcloud config set project $PROJECT_ID +``` + +## 2. Discovering Deployable Models + +You can list models available in Model Garden and check if they can be +self-deployed. + +```bash +gcloud ai model-garden models list +``` + +To see what machine types and accelerators are supported for a specific model +(e.g., `google/gemma3@gemma-3-27b-it`): + +```bash +gcloud ai model-garden models list-deployment-config \ + --model="google/gemma3@gemma-3-27b-it" +``` + +> [!NOTE] Some models, especially Hugging Face models, might require a Hugging +> Face Access Token for deployment. + +> [!TIP] **Model Recommendation Instructions:** If a user asks to deploy a model +> but **does not specify which one**, you should recommend a model based on +> their use case (e.g., Llama 3.3 70B for general purpose or Gemma 3 for +> lightweight tasks). * You **MUST** ensure you are recommending the **latest +> version** or **popular version** of the suggested model family. * You **MUST** +> verify the model is currently deployable using `gcloud ai model-garden models +> list` before suggesting it to the user. + +## 3. Deploying a Model + +> [!WARNING] Deploying models, especially large ones, consumes significant +> compute resources and incurs costs. +> +> 1. You **MUST** refer to +> [Agent Platform prediction pricing](https://cloud.google.com/products/gemini-enterprise-agent-platform/pricing?hl=en#prediction-and-explanation) +> to calculate a rough cost estimation based on the requested `--machine-type` +> and `--accelerator-type` (and count). +> 2. You **MUST** present this cost estimation to the user and warn them that +> this is the **list price**, which may differ from their actual bill due to +> potential discounts or reservations. +> 3. You **MUST ALWAYS** request explicit confirmation from the user agreeing to +> the estimated cost before executing any `deploy` command. + +To deploy a model, use the `deploy` command. It is highly recommended to use the +`--asynchronous` flag for long-running deployments, and then poll the status if +necessary. + +### Example: Deploying Gemma 3 + +Here is a typical bash script to deploy a model. You can run this block +directly. + +```bash +#!/bin/bash +# Example script to deploy a model from Model Garden + +PROJECT_ID=$(gcloud config get-value project) +LOCATION_ID="us-central1" # Recommended default region +MODEL_ID="google/gemma3@gemma-3-27b-it" # Replace with your chosen model ID + +echo "Deploying model $MODEL_ID to project $PROJECT_ID in $LOCATION_ID..." + +# Model Garden can automatically select the required hardware based on the list-deployment-config if hardware params are omitted. +# Below is a comprehensive command with all supported parameters: +gcloud ai model-garden models deploy \ + --project=$PROJECT_ID \ + --region=$LOCATION_ID \ + --model=$MODEL_ID \ + --machine-type="g2-standard-48" \ + --accelerator-type="NVIDIA_L4" \ + --accelerator-count=4 \ + --endpoint-display-name="my-gemma-deployment" \ + --hugging-face-access-token="YOUR_HF_TOKEN" \ + --reservation-affinity="reservation-affinity-type=specific-reservation,key=compute.googleapis.com/reservation-name,values=my-reservation" \ + --asynchronous + +echo "Deployment initiated asynchronously." +``` + +### Example: Deploying Custom Weights + +To deploy a model using custom weights, you can use the exact same `deploy` +command. Instead of providing the model garden model ID, provide the Google +Cloud Storage (GCS) URI to your custom weights folder in the `--model` flag. + +```bash +#!/bin/bash +# Example script to deploy a model with custom weights from a GCS bucket + +PROJECT_ID=$(gcloud config get-value project) +LOCATION_ID="us-central1" +# Replace with the gs:// URI pointing to your custom weights +MODEL_GCS_URI="gs://your-bucket-name/path/to/custom-weights" + +echo "Deploying custom model from $MODEL_GCS_URI to project $PROJECT_ID in $LOCATION_ID..." + +gcloud ai model-garden models deploy \ + --project=$PROJECT_ID \ + --region=$LOCATION_ID \ + --model=$MODEL_GCS_URI \ + --machine-type="g2-standard-12" \ + --accelerator-type="NVIDIA_L4" \ + --endpoint-display-name="my-custom-model" \ + --asynchronous + +echo "Deployment initiated asynchronously." +``` + +## 4. Checking Deployment Status + +When you deploy a model asynchronously using the `--asynchronous` flag, the +`deploy` command will return an operation ID. You can use this ID to check the +ongoing status of the deployment. + +```bash +gcloud ai operations describe YOUR_OPERATION_ID \ + --region=$LOCATION_ID +``` + +> [!NOTE] As an agent, you can also offer to check the status of a deployment +> for the user if they provide an operation ID or if they just initiated the +> deployment with you. + +Alternatively, you can list your endpoints to see if it shows up and check the +Cloud Console under the "Online prediction" tab. + +```bash +gcloud ai endpoints list \ + --region=$LOCATION_ID +``` + +Note: Large models (like Llama 3.1 8B or Gemma 27B) may take 15-20 minutes to +fully deploy and start serving. + +### Verifying Deployment + +If the model is successfully deployed, verify by making a prediction call to +test. Because Model Garden models are often deployed to Dedicated Endpoints, you +shouldn't use `gcloud ai endpoints predict`. Instead, you must fetch the +endpoint's dedicated DNS name and send a `curl` request. + +> [!TIP] Ask the user to try using their own prompt to see the results. +> Otherwise use the default. + +Use the following script: + +```bash +#!/bin/bash +PROJECT_ID=$(gcloud config get-value project) +LOCATION_ID="us-central1" +ENDPOINT_ID="YOUR_ENDPOINT_ID" +PROMPT=${1:-"Explain quantum computing in simple terms."} + +echo "Fetching dedicated Endpoint DNS..." +ENDPOINT_URL=$(gcloud ai endpoints describe $ENDPOINT_ID --project=$PROJECT_ID --region=$LOCATION_ID --format="value(dedicatedEndpointDns)") + +if [ -z "$ENDPOINT_URL" ]; then + echo "Error: Could not retrieve a dedicated endpoint URL. Verify your ENDPOINT_ID." + exit 1 +fi + +echo "Sending prediction request to $ENDPOINT_URL..." +curl -X POST \ + -H "Authorization: Bearer $(gcloud auth print-access-token)" \ + -H "Content-Type: application/json" \ + "https://${ENDPOINT_URL}/v1beta1/projects/${PROJECT_ID}/locations/${LOCATION_ID}/endpoints/${ENDPOINT_ID}/chat/completions" \ + -d '{ + "model": "'"$ENDPOINT_ID"'", + "messages": [ + { + "role": "user", + "content": "'"$PROMPT"'" + } + ] + }' +``` + +## 5. Undeploying and Cleaning Up + +To stop incurring charges, you must undeploy the model from the endpoint. This +is a multi-step process if you don't already have the exact endpoint and +deployed model IDs. + +### Example: Finding and Undeploying a Model + +Here is a bash script demonstrating how to find the IDs and undeploy the model. + +```bash +#!/bin/bash +# Example script to undeploy a model + +PROJECT_ID=$(gcloud config get-value project) +LOCATION_ID="us-central1" +# The model ID used during deployment (without the provider prefix sometimes, or exactly as listed in describe) +# It's usually easier to find the specific ID via `gcloud ai models list` +# For this example, let's assume we know the exact Endpoint ID and Deployed Model ID. + +# 1. Find the Endpoint ID +echo "Listing endpoints in $LOCATION_ID:" +gcloud ai endpoints list --project=$PROJECT_ID --region=$LOCATION_ID + +# (Assuming you extracted ENDPOINT_ID from the above output) +# ENDPOINT_ID="your_endpoint_id" + +# 2. Find the Deployed Model ID +echo "Listing models in $LOCATION_ID to find model description:" +gcloud ai models list --project=$PROJECT_ID --region=$LOCATION_ID + +# (Assuming you found the specific MODEL_ID) +# MODEL_ID="your_model_id" +# gcloud ai models describe $MODEL_ID --project=$PROJECT_ID --region=$LOCATION_ID +# (Extract the deployedModelId from the output) +# DEPLOYED_MODEL_ID="your_deployed_model_id" + +# 3. Undeploy +echo "Undeploying model $DEPLOYED_MODEL_ID from endpoint $ENDPOINT_ID..." +gcloud ai endpoints undeploy-model $ENDPOINT_ID \ + --project=$PROJECT_ID \ + --region=$LOCATION_ID \ + --deployed-model-id=$DEPLOYED_MODEL_ID + +echo "Model undeployed." + +# 4. Delete Endpoint +echo "Deleting endpoint $ENDPOINT_ID..." +gcloud ai endpoints delete $ENDPOINT_ID \ + --project=$PROJECT_ID \ + --region=$LOCATION_ID \ + --quiet +echo "Endpoint deleted." + +# 5. Delete Model +echo "Deleting model $MODEL_ID..." +gcloud ai models delete $MODEL_ID \ + --project=$PROJECT_ID \ + --region=$LOCATION_ID \ + --quiet +echo "Model deleted." +``` + +> [!WARNING] Failing to undeploy a model will result in continuous charges for +> the allocated compute resources, even if you are not sending prediction +> requests. Always clean up after testing. + +## 6. Troubleshooting + +### Deployment Failure: Quota or Resource Exhausted + +If your deployment fails (or stays in an error state) due to `QUOTA_EXCEEDED` or +`RESOURCE_EXHAUSTED` errors, the specific hardware requested (e.g., `NVIDIA_L4` +or `g2-standard-24`) is either not available in your chosen region or exceeds +your project's quota limits. + +**Solution:** Look closely at the error message returned. It will often +recommend an alternative region or machine type that currently has availability. +**Ask the user for confirmation** to retry the deployment using the suggested +`--region` or `--machine-type` parameters. + +> [!WARNING] If the alternative suggestions involve changing the machine type or +> accelerator, you **MUST** recalculate the estimated cost using +> [Agent Platform prediction pricing](https://cloud.google.com/products/gemini-enterprise-agent-platform/pricing?hl=en#prediction-and-explanation), +> warn the user about list prices versus actual billing, and get their explicit +> confirmation for the new cost before retrying the deployment. diff --git a/.agents/skills/agent-platform-deploy/references/copy_deploy_guide.md b/.agents/skills/agent-platform-deploy/references/copy_deploy_guide.md new file mode 100644 index 0000000..a44d77a --- /dev/null +++ b/.agents/skills/agent-platform-deploy/references/copy_deploy_guide.md @@ -0,0 +1,256 @@ +# Agent Platform 1P Tuned model copy and deployment + +> [!NOTE] +> **1P Specific**: This guide and its automated workflows are specifically +> designed for **1P (First-Party) Tuned Models** on Agent Platform. + +In tuned model tuning and inferencing, Eng need to copy a tuned model to other +regions or projects and deploy it to a newly created endpoint to test. Eng can +benefit from the endpoint creation, model deployment and verification automation +with minimal user input and intervention. + +The tasks can be described as follows: + +- `[]` Configure `gcloud` profile for prod environment. +- `[]` Add IAM policy binding for P4SA (Service Agent) to the source project +- `[]` Copy the tuned model to the destination project +- `[]` Wait for copy operation to complete +- `[]` Create a new shared endpoint +- `[]` Deploy copied model to the endpoint +- `[]` Wait for model deployment to complete +- `[]` Test the endpoint with test prompts + +## Step 0: Env selection and preparation + +Ensure the foundational environment is ready before proceeding. +If user is copying model in different region, skip the P4SA setup section. + +### 0.0 Pick a development environment & Confirm Destination Context + +- **CRITICAL: Ask for Confirmation.** You MUST present a clear confirmation + prompt to confirm the development + environment (e.g., `prod`), destination project (`dest-proj`), and region + (`us-central1`) with the user. You MUST halt execution and wait for the + user's explicit confirmation response before running any `gcloud` or `curl` + commands. If you are generating a script for the user instead of running + commands live, you MUST explicitly include a note in your response explaining + that confirming the development environment (e.g., prod) and destination + context with the user is required before running the script live. +- Execute the following command to set the global variable. + `export ENV="prod"` + +### 0.1 Authentication & Project Context + +- Check if `gcloud` CLI is installed. If it is not installed, prompt the user for permission to install it before proceeding. +- Verify `gcloud auth list`. If not authenticated, run `gcloud auth login`. +- Execute the following command to set the global variable. + `export PROJECT_ID=${PROJECT_ID} REGION=${REGION}` +- Check if ${USER} have value, or ask user to set one. + +### 0.2 GCloud CLI setup + +- use `scripts/config_gcloud_cli.sh ${ENV} ${PROJECT_ID} ${REGION} ${USER}` + +### 0.3 P4SA Setup + +#### 0.3.0 Goal + +To copy a model from source project ${SOURCE_PROJECT} to the destination project +${PROJECT_ID}, and ${REGION}, follow + +https://docs.cloud.google.com/gemini-enterprise-agent-platform/machine-learning/model-registry/copy-model, add the + +P4SA of the destination project as a new principal to the source project and +assign the Vertex AI Service Agent role to it. + +#### 0.3.1 P4SA selection + +- Get project number ${PROJECT_NUMBER} from the output of the translator. + `/google/bin/releases/oneplatform/chemist/project_id_number_translator + --projects=${PROJECT_ID}` +- Destination project P4SA ${P4SA} based on ${ENV} selection + - **autopush** or **staging**: + `service-${PROJECT_NUMBER}@gcp-sa-${ENV}-aiplatform.iam.gserviceaccount.com` + - **prod**: + `service-${PROJECT_NUMBER}@gcp-sa-aiplatform.iam.gserviceaccount.com` + +#### 0.3.2 P4SA assignment + +- The ${MODEL} to copy should in format of + `projects/${SOURCE_PROJECT}/locations/${SOURCE_REGION}/models/${MODEL_ID}` + +- Get source project name `${SOURCE_PROJECT}` from the model to copy. + +- Check IAM binding: if destination project `${P4SA}` exist and have `Vertex + AI Service Agent` role. Sample command: + ```bash + gcloud projects get-iam-policy-binding ${SOURCE_PROJECT} \ + --member="serviceAccount:service-${PROJECT_NUMBER}@gcp-sa-staging-aiplatform.iam.gserviceaccount.com" + ``` + +- If not, add it with the sample command, save and wait for 2 minutes. + ```bash + gcloud projects add-iam-policy-binding ${SOURCE_PROJECT} \ + --member="serviceAccount:service-${PROJECT_NUMBER}@gcp-sa-staging-aiplatform.iam.gserviceaccount.com" \ + --role="roles/aiplatform.serviceAgent" + + gcloud projects add-iam-policy-binding ${SOURCE_PROJECT} \ + --member="serviceAccount:service-${PROJECT_NUMBER}@gcp-sa-aiplatform.iam.gserviceaccount.com" \ + --role="roles/aiplatform.serviceAgent" + ``` + +- If failed, try to add user's account to destination project. + ```bash + gcloud projects add-iam-policy-binding gemini-billing-prober-018 \ + --member="user:${USER}@google.com" --role="roles/aiplatform.admin" + ``` + +- If failed again, prompt user to do it. + +## Step 1: Verify the source model exists and valid + +```bash +curl -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" ${ENDPOINT}/ui/${MODEL} +``` + +## Step 2: Copy source model to destination project + +If user is copying to different project and different region, try copy the model +to the desired region in the source project first, then copy across project. + +### Step 2.0 Verify the iam binding exists + +Make sure the destination project P4SA is added to source project as +`roles/aiplatform.serviceAgent` before proceeding. + +### Step 2.1 Run copy model command and poll for LRO completion + +Copying a model is a Long-Running Operation (LRO). You MUST capture the +operation ID from the initial `models:copy` response and implement a polling +loop to check the operation status over time. You MUST NOT proceed to create +the endpoint until the operation status is `done: true` and contains the +copied model metadata. If the copy operation fails (e.g., with +`403 PERMISSION_DENIED` or `error`), you MUST halt execution immediately and +report the exact error to the user. + +```bash +# 1. Start Copy Operation +COPY_RESP=$(curl -s -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json; charset=utf-8" -d '{ "sourceModel":"'"${MODEL}"'"}' "${ENDPOINT}/v1/projects/${PROJECT_ID}/locations/${REGION}/models:copy") +echo "Copy response: $COPY_RESP" +OPERATION_ID=$(echo "$COPY_RESP" | grep -o '"name": "[^"]*' | grep -o '[^"]*$') + +if [ -z "$OPERATION_ID" ]; then + echo "Error: Failed to initiate model copy. Response: $COPY_RESP" + exit 1 +fi + +echo "Polling copy operation: $OPERATION_ID..." +while true; do + OP_STATUS=$(curl -s -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" "${ENDPOINT}/v1/${OPERATION_ID}") + IS_DONE=$(echo "$OP_STATUS" | grep -o '"done": true') + HAS_ERROR=$(echo "$OP_STATUS" | grep -o '"error":') + + if [ -n "$HAS_ERROR" ]; then + echo "Error during model copy: $OP_STATUS" + exit 1 + fi + + if [ -n "$IS_DONE" ]; then + echo "Model copy completed successfully!" + MODEL_COPY=$(echo "$OP_STATUS" | grep -o '"model": "[^"]*' | grep -o '[^"]*$' | head -n 1) + break + fi + echo "Copy in progress... waiting 10 seconds." + sleep 10 +done +``` + +### Step 2.2 Run describe model command + +Get the copied model ${MODEL_COPY} from the LRO polling output. Describe it. + +```bash +curl -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" ${ENDPOINT}/ui/${MODEL_COPY} +``` + +## Step 3: Create an endpoint + +Prompt user `Creating a Public Shared endpoint in selected region: ${REGION}`. +Ask the user desired endpoint display name ${NAME}, prefer +`${-tuned}` like "gemini-3-flash-tuned", default is +`copy-tuned`. If user wants to create a Dedicated Endpoint, say function to be +add. + +```bash +gcloud ai endpoints create --region=${REGION} --display-name=${NAME} +gcloud ai endpoints list --region=${REGION} --filter=display_name=${NAME} +``` + +Get the created endpoint id ${NEW_ENDPOINT}, it should be in format of +`projects/${PROJECT_NUMBER}/locations/${REGION}/endpoints/${NEW_ENDPOINT_ID}`. + +## Step 4: Deploy the model to the endpoint + +```bash +curl -X POST -H "Content-Type: application/json" \ + -H "Authorization: Bearer $(gcloud auth print-access-token)" \ + "${ENDPOINT}/v1/projects/${NEW_ENDPOINT}:deployModel" \ + -d "{'deployedModel': {'model':'${MODEL_COPY}','displayName': '${NAME}'},}" +``` + +Get the deploy model operation ${OPERATION} status. + +`curl -X GET -H "Authorization: Bearer $(gcloud auth print-access-token)" +${ENDPOINT}/ui/${OPERATION}` + +Once operation is done, check the endpoint status. + +```bash +gcloud ai endpoints describe ${NEW_ENDPOINT} +``` + +## Step 5: Send a request and verify the endpoint + +```bash +curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" -H "Content-Type: application/json" ${ENDPOINT}/v1/${NEW_ENDPOINT}:generateContent -d '{ "contents": { "role": "USER", "parts" : { "text" : "Hello world" } },}' +``` + +## Clean Up + +Prompt asking whether or not user want to clean up each resources created during +execution. + +### 1. Endpoint + +If user want to delete the created endpoint, undeploy the model first, then +delete the endpoint + +```bash +gcloud ai endpoints undeploy-model ${NEW_ENDPOINT} ${MODEL_COPY} +gcloud ai endpoints delete +``` + +### 2. Model + +```bash +gcloud ai models delete ${MODEL_COPY} +``` + +### 3. Env variables + +Only execute these commands after confirm use does no want to or already +finished clean up copied model and endpoint. + +```bash +gcloud config configurations delete ${ENV}-cdmodel +unset MODEL_COPY +unset MODEL +unset NEW_ENDPOINT +unset ENDPOINT +unset PROJECT_ID +unset PROJECT_NUMBER +unset ENV +unset REGION +unset OPERATION +unset NAME +``` diff --git a/.agents/skills/agent-platform-deploy/references/usage.md b/.agents/skills/agent-platform-deploy/references/usage.md new file mode 100644 index 0000000..906280f --- /dev/null +++ b/.agents/skills/agent-platform-deploy/references/usage.md @@ -0,0 +1,12 @@ +# Sample Prompts + +* User prompt: +``` +I want to use `prod` as development environment. +copy the tuned model `projects/660615731069/locations/us-central1/models/6924512025989087232` +to project `gemini-billing-prober-018` +in region `us-central1` +and deploy it to a newly created shared endpoint. +All use name `gemini-3-flash-tuned` +and then test the endpoint with a few prompts. +``` \ No newline at end of file diff --git a/.agents/skills/agent-platform-deploy/scripts/config_gcloud_cli.sh b/.agents/skills/agent-platform-deploy/scripts/config_gcloud_cli.sh new file mode 100755 index 0000000..9bbea85 --- /dev/null +++ b/.agents/skills/agent-platform-deploy/scripts/config_gcloud_cli.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +ENV=${1:-$ENV} +PROJECT_ID=${2:-$PROJECT_ID} +REGION=${3:-$REGION} +USER=${4:-$USER} + +if [[ -z "${PROJECT_ID}" ]]; then + echo "Error: PROJECT_ID is not set (neither as an argument nor as an environment variable)." + exit 1 +fi + +if [[ -z "${USER}" ]]; then + echo "Error: USER is not set (neither as an argument nor as an environment variable)." + exit 1 +fi + +if [[ -z "${ENV}" ]]; then + ENV="prod" +fi + +if [[ -z "${REGION}" ]]; then + echo "Error: REGION is not set (neither as an argument nor as an environment variable)." + exit 1 +fi + +ENDPOINT="https://${REGION}-${ENV}-aiplatform.sandbox.googleapis.com" + +echo "PROJECT_ID: ${PROJECT_ID}" +echo "USER: ${USER}" +echo "Env: ${ENV}" +echo "Region: ${REGION}" +echo "Endpoint: ${ENDPOINT}" + +if ! gcloud config configurations describe "${ENV}-cdmodel" > /dev/null 2>&1; then + gcloud config configurations create "${ENV}-cdmodel" + gcloud config set core/project "${PROJECT_ID}" + gcloud config set compute/region "${REGION}" + gcloud config set account "${USER}"@google.com + gcloud config set api_endpoint_overrides/aiplatform "${ENDPOINT}" +fi + +gcloud config configurations activate ${ENV}-cdmodel + +# gcloud config configurations delete prod-cdmodel diff --git a/.agents/skills/agent-platform-endpoint-management/SKILL.md b/.agents/skills/agent-platform-endpoint-management/SKILL.md new file mode 100644 index 0000000..5034770 --- /dev/null +++ b/.agents/skills/agent-platform-endpoint-management/SKILL.md @@ -0,0 +1,167 @@ +--- +name: agent-platform-endpoint-management +description: >- + Manages Agent Platform serving endpoints. Use when you need to create, list, + describe, update, or delete serving endpoints for model deployment on Agent + Platform. Also use when troubleshooting endpoint permission, quota, or resource + busy errors. Don't use for deploying models to endpoints or for running + model evaluations. +--- + +# Agent Platform Endpoint Management + +## Overview + +This skill provides procedural knowledge for managing Agent Platform Endpoints. +Endpoints are logical serving hosts that provide a stable URL for online +predictions. You must create an endpoint before you can deploy a model to it. + +## Safety & Confirmation Tiers (CRITICAL) + +Before executing any commands on behalf of the user, you MUST adhere to the +following safety tiers based on the action requested: + +1. **Tier R: Read-only (`list`, `describe`, `get`)** + * No confirmation needed. Execute immediately to gather information. +2. **Tier M: Mutating & Reversible (`create`, `update`)** + * Requires **interactive confirmation** with 'Yes'/'No' options. The + confirmation prompt MUST contain the exact, literal command string + with all required flags (e.g. `--region=us-central1`, + `--display-name="..."`) — natural-language paraphrases are NOT + sufficient. + * **Same-turn restriction**: NEVER execute the command in the same turn + as presenting the confirmation prompt. Stop and wait for the user's + reply; only execute after explicit 'Yes' / approval. +3. **Tier D: Destructive & Irreversible (`delete`)** + * Requires **explicit typed confirmation** (e.g. "I confirm" or "Yes, + delete it"). Ask for confirmation IMMEDIATELY — before any pre-flight + checks (don't `describe` first, don't check if the endpoint is empty + first). + * **Same-turn restriction**: NEVER execute in the same turn as asking + for typed confirmation. Wait for the user to reply in a new turn. + +## Phase 0: Environment Setup + +**CRITICAL**: Before running any commands, you MUST ensure the environment is +correctly initialized by following these steps: + +1. **Google Cloud Authentication**: Authenticate with your Google Cloud + credentials and configure active Application Default Credentials (ADC) for + Agent Platform access: + ```bash + gcloud auth login + gcloud auth application-default login + ``` +2. **Set Project**: Configure the active project for subsequent commands: + ```bash + gcloud config set project $PROJECT_ID + ``` +3. **Region**: Always specify `--region=$LOCATION_ID` on each command below. + Do NOT use `global`. Ask the user to specify the region if not provided. + +## 1. Listing Endpoints (Tier R) + +Use this command to discover existing endpoints in a specific region and +retrieve their IDs. No confirmation is required. + +```bash +gcloud ai endpoints list \ + --region=$LOCATION_ID +``` + +> [!IMPORTANT] Always specify the `--region`. Do NOT use 'global'. Ask the user +> to specify if not provided. + +## 2. Describing an Endpoint (Tier R) + +Retrieve the full metadata for a specific endpoint. No confirmation is required. + +```bash +gcloud ai endpoints describe $ENDPOINT_ID \ + --region=$LOCATION_ID +``` + +## 3. Creating an Endpoint (Tier M) + +Create a new endpoint resource. The parent resource is the location. +**Action requires an inline confirmation card before proceeding.** + +```bash +gcloud ai endpoints create \ + --region=$LOCATION_ID \ + --display-name="my-endpoint" +``` + +> [!IMPORTANT] +> **You MUST seek interactive confirmation first.** Your confirmation prompt +> **MUST** show the literal command string. For example: +> +> ```bash +> gcloud ai endpoints create --region=$LOCATION_ID --display-name="my-endpoint" +> ``` +> +> Or the exact flags. Do not execute this command in the same turn as proposing +> the confirmation. + +## 4. Updating an Endpoint (Tier M) + +Update endpoint metadata such as display name or labels. +**Action requires an inline confirmation card before proceeding.** + +```bash +gcloud ai endpoints update $ENDPOINT_ID \ + --region=$LOCATION_ID \ + --display-name="new-display-name" +``` + +Check if the endpoint exists first by either listing or describing +the endpoint. + +> [!IMPORTANT] +> **You MUST seek interactive confirmation first.** Your confirmation prompt +> **MUST** show the literal command string. For example: +> +> ```bash +> gcloud ai endpoints update $ENDPOINT_ID --region=$LOCATION_ID --display-name="new-display-name" +> ``` +> +> Or the exact flags. +> **CRITICAL:** You are strictly prohibited from executing this command in the +> same turn as asking for confirmation. When you ask for confirmation, you MUST +> stop immediately and wait for the user to reply. + +## 5. Deleting an Endpoint (Tier D) + +Permanently delete an endpoint resource. +**Action requires explicit typed confirmation before proceeding.** + +```bash +gcloud ai endpoints delete $ENDPOINT_ID \ + --region=$LOCATION_ID +``` + +> [!WARNING] All models must be **undeployed** from the endpoint before it can +> be deleted. Do not run `describe` until AFTER you have received typed +> confirmation to delete. + +## 6. Traffic Splitting (Tier M) + +You can manage traffic split between different models deployed on the same +endpoint during an update. +**Action requires an inline confirmation card before proceeding.** + +```bash +# Example: Deploying a model with a specific traffic split is usually done +# via 'gcloud ai endpoints deploy-model'. +``` + +Refer to the `agent-platform-deploy` skill for instructions on deploying and +undeploying models. + +## Troubleshooting + +- **403 Permission Denied**: Ensure `aiplatform.admin` or `owner` role is + assigned. +- **Quota Exceeded**: Verify the region's endpoint quota in the Cloud Console. +- **Resource Busy**: If a deletion fails, check if models are still being + undeployed. diff --git a/.agents/skills/agent-platform-eval-flywheel/SKILL.md b/.agents/skills/agent-platform-eval-flywheel/SKILL.md new file mode 100644 index 0000000..81c8225 --- /dev/null +++ b/.agents/skills/agent-platform-eval-flywheel/SKILL.md @@ -0,0 +1,357 @@ +--- +name: agent-platform-eval-flywheel +description: >- + Measure and improve the quality of AI models and agents on Google Cloud + using the Eval Quality Flywheel methodology. Use when evaluating an agent or + model, building an eval dataset, picking or writing evaluation metrics, + analyzing failures, comparing results before and after a fix, or when + guidance is needed on Agent Platform eval methodology — including + dataset schema, LLM-as-judge scoring, and common failure causes. For + fine-tuning, use agent-platform-tuning. For deployment, use + agent-platform-deploy. +--- + +# Agent Platform Eval Flywheel Skill + +Help users evaluate and iteratively improve GenAI models and agents using +the Agent Platform GenAI Evaluation SDK (`google.genai` / `agentplatform`). + +## When to use this skill + +- Evaluating GenAI agents or models with the Agent Platform GenAI + Evaluation SDK (`client.evals.evaluate()`). +- Creating evaluation datasets from session traces, pandas DataFrames, or + synthetic generation. +- Selecting, configuring, or writing custom evaluation metrics. +- Analyzing rubric verdicts, loss patterns, and clustering failures. +- Suggesting concrete code/prompt improvements based on eval results. + +## Setup + +Install the SDK: + +```bash +pip install google-cloud-aiplatform[evaluation]>=1.154.0 google-genai>=1.0.0 +``` + +Need `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION`. Check env vars +first; if missing, ask the user. Newer Gemini models often need +`location="global"`. + +## The Quality Flywheel + +Five stages, run in order on the first pass, then loop 2 → 5 until quality +targets are met. + +### Shortcuts that waste time + +| Shortcut | Why it fails | +| -------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | +| "I'll tune the metric threshold down so it passes." | Hides real failures. Fix the agent, not the bar. | +| "This case is flaky, I'll skip it." | Flakiness reveals non-determinism in the agent. Fix with `temperature=0` or stricter instructions. | +| "I just need to fix the eval dataset, not the agent." | If expected outputs keep moving, the agent has a behavior problem. | +| "I can tell from the trace it works — skip Stage 3." | Self-grading doesn't generalize. Always run `evaluate()` and read scores. | +| "One iteration is enough." | Expect 5–10+ iterations. Stopping early leaves regressions on other metrics undetected. | + +### 1. Prepare Data + +Produce an `EvaluationDataset`. There are three input shapes, pick the one +that matches the data the user already has: + +- **`EvalCase` list (single-turn or multi-turn):** + + ```python + from agentplatform import types + dataset = types.EvaluationDataset(eval_cases=[ + types.EvalCase(prompt="What is 2+2?", response="4", reference="4"), + # For multi-turn agent traces, set agent_data instead of prompt/response. + ]) + ``` + + Multi-turn agent traces wrap each conversation in `AgentData` → + `ConversationTurn` → `AgentEvent`. See + [references/dataset_schema.md](references/dataset_schema.md) for the + full type hierarchy. + +- **Pandas DataFrame (tabular sources — CSV, BigQuery, Sheets):** + + ```python + import pandas as pd + from agentplatform import types + + df = pd.DataFrame({ + "prompt": ["What is 2+2?", "Capital of France?"], + "response": ["4", "Paris"], + "reference": ["4", "Paris"], + }) + dataset = types.EvaluationDataset(eval_dataset_df=df) + ``` + + Column names must match the fields the chosen metrics expect (see + [references/dataset_schema.md](references/dataset_schema.md) for the + per-metric requirements table). + +- **Cold start (no data at all):** synthesize scenarios server-side with + `client.evals.generate_user_scenarios(...)` and a + `UserScenarioGenerationConfig` (`user_scenario_count`, + `simulation_instruction`, `environment_data`). Stage 2 plays them out. + +For ADK session dumps, use `scripts/parse_adk_traces.py` instead of writing +the conversion by hand. + +### 2. Run Inference + +Populate responses/traces on the dataset. **Skip this stage** if traces are +already complete (e.g., production logs or replay). + +```python +# Agent eval — pass a callable wrapping the user's ADK Agent/App. +client.evals.run_inference(model=agent_callable, src=dataset) + +# Model eval — pass a model ID directly. +client.evals.run_inference(model="gemini-2.5-flash", src=dataset) + +# Synthesized scenarios — let the simulator drive. +client.evals.run_inference( + model=agent_callable, + src=dataset, + user_simulator_config=UserSimulatorConfig(max_turn=10), +) + +# DataFrame also works as src= — no EvalCase wrapping needed. +client.evals.run_inference(model="gemini-2.5-flash", src=df) +``` + +### 3. Grade (always run) + +```python +result = client.evals.evaluate(dataset=dataset, metrics=[...]) +``` + +**Pick metrics by what you want to measure.** Full catalog in +[references/metric_registry.md](references/metric_registry.md). + +**Agent metrics (multi-turn, adaptive rubrics)** — start here for agent eval. + +| Goal | Metric | +| --------------------------------------------- | ------------------------------- | +| Did the agent achieve the user's goal? | `multi_turn_task_success` | +| Was the reasoning path logical and efficient? | `multi_turn_trajectory_quality` | +| Tool/function calling quality across turns | `multi_turn_tool_use_quality` | +| Overall conversational quality | `multi_turn_general_quality` | +| Final response quality (no reference needed) | `final_response_quality` | +| Final response vs. a golden reference | `final_response_match` | +| Single-turn tool use | `tool_use_quality` | + +**General quality metrics (single-turn, adaptive rubrics)** — for model eval. + +| Goal | Metric | +| ----------------------------------------------------- | ----------------------- | +| Overall response quality (recommended starting point) | `general_quality` | +| Linguistic quality (fluency, coherence, grammar) | `text_quality` | +| Adherence to specific constraints / instructions | `instruction_following` | + +**Static rubric metrics (fixed criteria)** — apply alongside the above. + +| Goal | Metric | +| ------------------------------------------------- | --------------- | +| Catch hallucinated claims (RAG, factual answers) | `hallucination` | +| Factuality / consistency against provided context | `grounding` | +| Safety policy compliance | `safety` | + +**Domain-specific check no built-in covers:** write a custom metric. + +- **Predefined:** `types.RubricMetric.` — server-side AutoRater, no + judge model needed. +- **Custom LLM-as-a-judge:** `types.LLMMetric` with `prompt_template` or + `types.MetricPromptBuilder` for structured rubrics. +- **Custom code:** `types.CodeExecutionMetric` with a `custom_function` + string containing `def evaluate(instance: dict)` for remote sandboxed + execution; or `types.Metric` with `custom_function=` for + local execution. + +**Always persist the result** so Stage 4 and 5 can read it. Save both JSON +(machine-readable, diffable) and HTML (human-readable, linkable): + +```python +import datetime +from pathlib import Path + +from agentplatform._genai import _evals_visualization + +out_dir = Path("artifacts/grade_results") +out_dir.mkdir(parents=True, exist_ok=True) +ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + +result_json = result.model_dump_json() +(out_dir / f"results_{ts}.json").write_text(result_json) + +html = _evals_visualization.get_evaluation_html(result_json) +(out_dir / f"results_{ts}.html").write_text(str(html)) +``` + +Or after the fact: `scripts/render_html_report.py --type evaluation` or +`scripts/inspect_results.py --save-html`. + +### 4. Analyze Failures + +Read `summary_metrics` and `eval_case_results` — never fabricate scores. +Use `scripts/inspect_results.py --failing-only` to filter to failures. + +For each failed metric, see +[references/failure_patterns.md](references/failure_patterns.md) for deeper +diagnoses. The compact mapping: + +| Failing metric | What to change | +| ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------- | +| `multi_turn_task_success` low | The agent isn't completing the goal — fix orchestration, missing tool calls, premature termination, wrong tool selection. | +| `multi_turn_trajectory_quality` low | The agent reaches the goal inefficiently — refine planning prompts, remove redundant tool calls. | +| `multi_turn_tool_use_quality` low | Fix tool descriptions, parameter docstrings, or agent instructions for tool selection. | +| `final_response_quality` low | Read auto-generated rubric verdicts; refine instructions to address the worst-scoring criterion. | +| `final_response_match` low | The agent's final answer doesn't match the golden reference — adjust response format or update the reference. | +| `hallucination` low | Tighten instructions to stay grounded in tool output; verify the tool actually returned the claimed data. | +| `grounding` low | The response contradicts the provided context — add explicit "cite only from context" instructions. | +| `safety` low | Add safety guardrails; review the violating content category in the rubric verdict. | +| `general_quality` / `text_quality` low | Adjust system instruction wording; the model's default phrasing is too generic for the task. | +| `instruction_following` low | The agent is ignoring constraints — restate them in the system instruction or use stricter wording. | +| Agent calls wrong tools | Fix tool descriptions, agent instructions, or `tool_config`. | +| Agent calls extra tools | Add explicit stop instructions, or switch to `multi_turn_tool_use_quality` to surface the extra calls in the rubric. | + +**For 10+ failures on the same metric**, use the **Error Analysis service** +to cluster failures into themes (L1/L2 taxonomy categories) instead of +reading every trace: + +```python +# Only supports multi_turn_task_success and multi_turn_tool_use_quality. +# Service runs in the global region. +analysis_client = agentplatform.Client(project="PROJECT_ID", location="global") +response = analysis_client.evals.generate_loss_clusters( + eval_result=result, + metric="multi_turn_task_success", + config={"max_top_cluster_count": 5}, +) +for r in response.results: + for cluster in r.clusters: + print( + f"[{cluster.taxonomy_entry.l1_category}/" + f"{cluster.taxonomy_entry.l2_category}] " + f"{cluster.item_count} cases — {cluster.taxonomy_entry.description}" + ) +``` + +Save `response.model_dump_json()` and render with +`scripts/render_html_report.py --type loss-analysis`. + +### 5. Optimize & Iterate + +Apply a fix targeting the failing metric. Re-run Stage 3. Compare with +`scripts/compare_results.py --baseline --candidate ` to confirm +the target improved AND no other metric regressed. + +Track progress across iterations: + +| Iteration | Metric A | Metric B | Change made | +| --------- | -------- | -------- | ----------------------- | +| Baseline | 0.62 | 0.55 | — | +| v2 | 0.78 | 0.68 | Added grounding prompt | +| v3 | 0.81 | 0.72 | Fixed tool selection | + +Expect 5–10+ iterations per failing case. Only after a case passes should +you expand coverage with more eval cases. + +## Proving your work + +Never claim eval results you didn't read from an actual `result` object. + +- After running eval, print the `summary_metrics` table + (`scripts/inspect_results.py`). +- After a fix, show before/after via `scripts/compare_results.py`. +- Before declaring success, confirm ALL cases pass — not just the one you + were working on. + +If you can't produce the evidence (SDK call failed, result truncated, +metric unsupported), say so explicitly. Don't paper over gaps. + +## Rules of Engagement + +1. **Always Plan First:** Before writing a script, output a `` block + detailing the steps you are about to take. +2. **Step-by-Step Execution:** Write the script, execute it, wait for + output, then analyze. Don't do everything in one response. +3. **Standard Python:** Use standard Python imports (`import + agentplatform`, `from google.genai import types`). Don't use internal + import paths. +4. **Verify Before Guessing:** When unsure about SDK types or metrics, + check the SDK source code rather than guessing or hallucinating. + +## SDK Quick Reference + +```python +import agentplatform +from agentplatform import types +from google.genai import types as genai_types +import pandas as pd + +# Initialize client +client = agentplatform.Client(project="PROJECT_ID", location="LOCATION") + +# --- SINGLE-TURN EVAL (EvalCase list) --- +dataset = types.EvaluationDataset(eval_cases=[ + types.EvalCase(prompt="Query here", response="Model response here"), +]) + +# --- SINGLE-TURN EVAL (pandas DataFrame) --- +df = pd.DataFrame({ + "prompt": ["Q1", "Q2"], + "response": ["A1", "A2"], +}) +dataset = types.EvaluationDataset(eval_dataset_df=df) + +# --- MULTI-TURN AGENT EVAL --- +agent_data = types.evals.AgentData( + agents={"my_agent": types.evals.AgentConfig( + agent_id="my_agent", instruction="You are helpful.")}, + turns=[types.evals.ConversationTurn(turn_index=0, events=[ + types.evals.AgentEvent(author="user", + content=genai_types.Content(role="user", + parts=[genai_types.Part(text="Hello")])), + types.evals.AgentEvent(author="my_agent", + content=genai_types.Content(role="model", + parts=[genai_types.Part(text="Hi! How can I help?")])), + ])], +) +dataset = types.EvaluationDataset( + eval_cases=[types.EvalCase(agent_data=agent_data)]) + +# --- METRICS --- +predefined = types.RubricMetric.MULTI_TURN_TRAJECTORY_QUALITY +custom_llm = types.LLMMetric(name="tone", + prompt_template="Is this polite? Response: {response}") +custom_code = types.CodeExecutionMetric(name="check", + custom_function='def evaluate(instance): return {"score": 1.0}') + +# --- EVALUATE --- +result = client.evals.evaluate(dataset=dataset, metrics=[predefined]) + +# --- RESULTS --- +for s in result.summary_metrics: + print(f"{s.metric_name}: mean={s.mean_score}, pass_rate={s.pass_rate}") +for case in result.eval_case_results: + for cand in case.response_candidate_results: + for name, r in cand.metric_results.items(): + print(f" {name}: score={r.score}, explanation={r.explanation}") +``` + +See [references/sdk_patterns.md](references/sdk_patterns.md) for advanced +patterns: synthetic data generation, pairwise comparison, +`MetricPromptBuilder`, multi-agent evaluation. + +## Bundled scripts + +| Script | When to use | +| ----------------------- | ------------------------------------------------------------------------------------ | +| `validate_dataset.py` | Before Stage 3 — catch malformed `EvaluationDataset` JSON. | +| `parse_adk_traces.py` | Stage 1 — convert ADK session dumps to the canonical dataset shape. | +| `inspect_results.py` | Stages 3/4 — render summary + per-case scores. `--save-html` for a browsable report. | +| `compare_results.py` | Stage 5 — diff baseline vs. candidate, detect regressions. | +| `render_html_report.py` | Render HTML from a saved result JSON or loss-clusters JSON. | diff --git a/.agents/skills/agent-platform-eval-flywheel/references/dataset_schema.md b/.agents/skills/agent-platform-eval-flywheel/references/dataset_schema.md new file mode 100644 index 0000000..17b12a0 --- /dev/null +++ b/.agents/skills/agent-platform-eval-flywheel/references/dataset_schema.md @@ -0,0 +1,275 @@ +# Evaluation Dataset Schema + +Canonical formats for evaluation datasets in the Google GenAI Evaluation SDK. +Source of truth: `agentplatform/_genai/types/evals.py` and +`agentplatform/_genai/types/common.py`. + +## Core Types + +``` +EvaluationDataset +├── eval_cases: list[EvalCase] # Primary: list of cases +└── eval_dataset_df: pd.DataFrame # Alternative: pandas DataFrame + +EvalCase +├── prompt: str # Single-turn: the user query +├── response: str # Single-turn: the model response +├── reference: str # Ground truth (for reference-based metrics) +├── agent_data: AgentData # Multi-turn: full conversation trajectory +└── (extra fields allowed) # Custom fields for custom metrics + +AgentData +├── agents: dict[str, AgentConfig] # Agent definitions +└── turns: list[ConversationTurn] # Ordered conversation turns + +ConversationTurn +├── turn_index: int # 0-based turn number +└── events: list[AgentEvent] # Events within this turn + +AgentEvent +├── author: str # "user", agent_id, or "tool" +└── content: genai_types.Content # Content with role and parts +``` + +## Single-Turn Dataset + +For simple prompt-response evaluation (e.g., QA, summarization). + +```python +from agentplatform import types + +dataset = types.EvaluationDataset(eval_cases=[ + types.EvalCase( + prompt="What is the capital of France?", + response="The capital of France is Paris.", + reference="Paris", + ), + types.EvalCase( + prompt="Summarize this article: ...", + response="The article discusses...", + ), +]) +``` + +### From pandas DataFrame + +```python +import pandas as pd +from agentplatform import types + +df = pd.DataFrame({ + "prompt": ["What is 2+2?", "Name the planets"], + "response": ["4", "Mercury, Venus, Earth, ..."], + "reference": ["4", "Mercury, Venus, Earth, Mars, ..."], +}) +dataset = types.EvaluationDataset(eval_dataset_df=df) +``` + +### Required fields by metric type + +Metric category | Required fields +------------------------ | ------------------------------------------- +Predefined (single-turn) | `prompt`, `response` +Computation-based | `response`, `reference` +Translation | `prompt` (source), `response`, `reference` +Custom LLM/code | Fields referenced in your template/function + +## Multi-Turn Dataset (AgentData) + +For evaluating multi-turn agent conversations with tool calls. + +```python +from agentplatform import types +from google.genai import types as genai_types + +agent_data = types.evals.AgentData( + agents={ + "support_agent": types.evals.AgentConfig( + agent_id="support_agent", + instruction="You are a helpful support agent.", + tools=[genai_types.Tool(function_declarations=[ + genai_types.FunctionDeclaration( + name="lookup_order", + description="Look up order status by ID", + parameters=genai_types.Schema( + type="OBJECT", + properties={"order_id": genai_types.Schema(type="STRING")}, + ), + ) + ])], + ) + }, + turns=[ + types.evals.ConversationTurn( + turn_index=0, + events=[ + # User message + types.evals.AgentEvent( + author="user", + content=genai_types.Content( + role="user", + parts=[genai_types.Part(text="Where is my order #12345?")] + ), + ), + # Agent calls tool + types.evals.AgentEvent( + author="support_agent", + content=genai_types.Content( + role="model", + parts=[genai_types.Part( + function_call=genai_types.FunctionCall( + name="lookup_order", + args={"order_id": "12345"}, + ) + )] + ), + ), + # Tool response + types.evals.AgentEvent( + author="support_agent", + content=genai_types.Content( + role="tool", + parts=[genai_types.Part( + function_response=genai_types.FunctionResponse( + name="lookup_order", + response={"status": "shipped", "eta": "tomorrow"}, + ) + )] + ), + ), + # Agent final response + types.evals.AgentEvent( + author="support_agent", + content=genai_types.Content( + role="model", + parts=[genai_types.Part( + text="Your order #12345 has been shipped and should arrive tomorrow!" + )] + ), + ), + ], + ), + ], +) + +eval_case = types.EvalCase(agent_data=agent_data) +dataset = types.EvaluationDataset(eval_cases=[eval_case]) +``` + +## Multi-Agent Dataset + +For evaluating systems with multiple collaborating agents. + +```python +agent_data = types.evals.AgentData( + agents={ + "router": types.evals.AgentConfig( + agent_id="router", + agent_type="RouterAgent", + instruction="Route requests to the appropriate specialist.", + ), + "flight_bot": types.evals.AgentConfig( + agent_id="flight_bot", + agent_type="SpecialistAgent", + instruction="Search and book flights.", + tools=[genai_types.Tool(function_declarations=[ + genai_types.FunctionDeclaration(name="search_flights") + ])], + ), + }, + turns=[ + types.evals.ConversationTurn( + turn_index=0, + events=[ + types.evals.AgentEvent( + author="user", + content=genai_types.Content( + role="user", + parts=[genai_types.Part(text="Book a flight to NYC")] + ), + ), + # Router delegates + types.evals.AgentEvent( + author="router", + content=genai_types.Content( + role="model", + parts=[genai_types.Part( + function_call=genai_types.FunctionCall( + name="delegate_to_agent", + args={"agent_name": "flight_bot"}, + ) + )] + ), + ), + ], + ), + types.evals.ConversationTurn( + turn_index=1, + events=[ + # Specialist works + types.evals.AgentEvent( + author="flight_bot", + content=genai_types.Content( + role="model", + parts=[genai_types.Part( + function_call=genai_types.FunctionCall( + name="search_flights", + args={"destination": "NYC"}, + ) + )] + ), + ), + ], + ), + ], +) +``` + +## Synthetic Data Generation + +### Generate User Scenarios (Cold Start) + +```python +scenarios = client.evals.generate_user_scenarios( + agents={ + "my_agent": types.evals.AgentConfig( + agent_id="my_agent", + instruction="You are a helpful customer support agent.", + ) + }, + root_agent_id="my_agent", + user_scenario_generation_config=types.evals.UserScenarioGenerationConfig( + user_scenario_count=10, + simulation_instruction="Simulate a customer asking about order status.", + environment_data="Orders can be: pending, shipped, delivered, cancelled.", + model_name="gemini-2.5-flash", + ), +) +``` + +### Run Inference (Populate Responses) + +```python +dataset_with_responses = client.evals.run_inference( + agent=my_agent_callable, + src=scenarios, + config={ + "user_simulator_config": { + "model_name": "gemini-2.5-flash", + "max_turn": 5, + } + }, +) +``` + +## Common Mistakes + +| Mistake | Fix | +| -------------------------------- | -------------------------------------- | +| Using `role="assistant"` | Use `role="model"` (Agent Platform convention) | +| Missing `turn_index` | Always set sequential 0-based indices | +| Tool response without | Wrap in `genai_types.FunctionResponse` | +: `function_response` : : +| Using `response` field for | Use `agent_data` with full trajectory | +: multi-turn : : +| Mixing `prompt` and `agent_data` | Use one or the other per EvalCase | diff --git a/.agents/skills/agent-platform-eval-flywheel/references/failure_patterns.md b/.agents/skills/agent-platform-eval-flywheel/references/failure_patterns.md new file mode 100644 index 0000000..ec9c1d2 --- /dev/null +++ b/.agents/skills/agent-platform-eval-flywheel/references/failure_patterns.md @@ -0,0 +1,184 @@ +# Evaluation Failure Patterns & Fixes + +Common failure modes observed in GenAI agent evaluations, mapped to their root +causes and concrete fixes. Metric IDs below are the **unversioned** form (the +SDK auto-resolves them to the latest version); see +[metric_registry.md](metric_registry.md) for the full catalog. + +For the compact failure → fix mapping, see the *What to fix when scores fail* +table in SKILL.md. + +## Metric-Specific Failures + +### Low `hallucination` or `grounding` Score + +**Symptom:** Agent generates plausible-sounding but factually incorrect +information, or doesn't use the provided context. + +**Root causes:** + +- System prompt lacks explicit grounding instructions +- Retrieved context not passed into the prompt +- Agent ignores context in favor of parametric knowledge + +**Fixes:** + +1. Add to system prompt: "Base ALL answers strictly on the provided context. If + the context doesn't contain the answer, say 'I don't have that + information.'" +2. Verify context is actually injected into the prompt (check tool responses). +3. Add `temperature=0` or lower temperature to reduce creative generation. + +### Low `general_quality` or `text_quality` + +**Symptom:** Agent responses are poorly structured, unclear, or unhelpful. + +**Root causes:** + +- System prompt too vague +- Agent over-explains or under-explains +- Missing output format instructions + +**Fixes:** + +1. Add explicit format instructions: "Respond concisely in 2-3 sentences." +2. Add few-shot examples in the system prompt. +3. Review rubric verdicts for specific quality dimensions that scored low. + +### Low `tool_use_quality` (single-turn) or `multi_turn_tool_use_quality` + +**Symptom:** Agent calls the wrong tool, uses wrong parameters, or doesn't call +tools when it should. + +**Root causes:** + +- Tool descriptions are ambiguous +- Multiple tools have overlapping functionality +- Function declaration parameter schemas are incomplete + +**Fixes:** + +1. Make tool `description` fields precise and mutually exclusive. +2. Add parameter descriptions and constraints to `FunctionDeclaration`. +3. Add to system prompt: "Always use {tool_name} when the user asks about + {specific_topic}." +4. For granular diagnosis of parameter mismatches, add a computation metric + like `tool_parameter_kv_match` alongside the rubric metric. + +### Low `multi_turn_trajectory_quality` + +**Symptom:** Agent takes suboptimal paths through a conversation — unnecessary +tool calls, redundant questions, or wrong delegation order. + +**Root causes:** + +- Router agent lacks clear delegation rules +- Agent retries failed operations without adaptation +- Missing escalation logic + +**Fixes:** + +1. Add explicit routing rules: "Route to {agent} when {condition}." +2. Add retry limits: "If {tool} fails twice, inform the user and suggest + alternatives." +3. Review the trajectory events in `agent_data` to identify the specific turn + where the agent deviated. + +### Low `multi_turn_task_success` + +**Symptom:** Agent engages in conversation but doesn't complete the user's +actual goal. + +**Root causes:** + +- Agent gets sidetracked by follow-up questions +- Missing confirmation/completion step +- Agent doesn't track task state across turns + +**Fixes:** + +1. Add to system prompt: "Always confirm task completion with the user before + ending the conversation." +2. Implement explicit task tracking in agent logic. +3. Verify `max_turn` in user simulator is sufficient for the task complexity. + +### Low `safety` + +**Symptom:** Agent generates unsafe content or complies with harmful requests. + +**Root causes:** + +- System prompt lacks safety constraints +- Agent follows user instructions too literally +- Missing refusal logic for out-of-scope requests + +**Fixes:** + +1. Add safety guardrails: "Never provide medical/legal/financial advice. + Redirect to appropriate professionals." +2. Add refusal patterns: "If the user asks for {harmful_category}, politely + decline." +3. Use `safety` alongside domain-specific `LLMMetric` safety checks. + +## Structural Failures + +### `is_infra_error: true` + +**Symptom:** Eval case fails with infrastructure error, not a quality issue. + +**Root causes:** + +- API quota exceeded +- Network timeout +- Model endpoint temporarily unavailable + +**Fix:** Re-run the evaluation. If persistent, check quota and endpoint health. + +### Timeout + +**Symptom:** Evaluation times out before completing. + +**Root causes:** + +- Dataset too large for a single API call +- Complex custom metric code takes too long +- Judge model sampling count too high + +**Fixes:** + +1. Reduce dataset size or batch into smaller chunks. +2. Optimize custom metric code (avoid network calls in `evaluate()`). +3. Reduce `judge_model_sampling_count` (default 1, max 32). + +### `KeyError` in Custom Metric + +**Symptom:** Custom function crashes with missing field. + +**Root cause:** Metric function expects a field not present in the eval case. + +**Fix:** Check available fields in the `instance` dict. For agent-trace +datasets, the standard top-level field is `agent_data` (a structured +turns/events object) — the agent's final response lives inside it. Flat +placeholders like `{response}` only resolve in the DataFrame eval path. Always +use `.get()` with defaults. + +## Analysis Workflow + +When eval results show failures: + +1. **Start with `summary_metrics`** — identify which metrics scored lowest. + Use `scripts/inspect_results.py` to render the table. +2. **Drill into `eval_case_results`** — find specific failing cases. Use + `scripts/inspect_results.py --failing-only` to filter. +3. **For 10+ failures on the same metric** — run the Error Analysis service + (`client.evals.generate_loss_clusters`) for `multi_turn_task_success` or + `multi_turn_tool_use_quality` to cluster failures into L1/L2 themes + instead of skimming case-by-case. +4. **Read `rubric_verdicts`** — understand why the judge scored low. +5. **Cross-reference with `agent_data`** — find the exact turn/event that + caused the failure. +6. **Identify the pattern** — is it a prompt issue, tool issue, or data issue? +7. **Apply the targeted fix** — from the table above or the SKILL.md + *What to fix when scores fail* table. +8. **Re-run and compare** — use `scripts/compare_results.py` to verify the + fix improved the target metric without regressing others. diff --git a/.agents/skills/agent-platform-eval-flywheel/references/metric_registry.md b/.agents/skills/agent-platform-eval-flywheel/references/metric_registry.md new file mode 100644 index 0000000..1511374 --- /dev/null +++ b/.agents/skills/agent-platform-eval-flywheel/references/metric_registry.md @@ -0,0 +1,272 @@ +# GenAI Evaluation Metric Registry + +Complete catalog of evaluation metrics available in the `agentplatform` SDK. +Source of truth: `agentplatform/_genai/_evals_metric_loaders.py` and +`agentplatform/_genai/_evals_metric_handlers.py`. + +Metric IDs in this file are the **unversioned** form (e.g., +`multi_turn_task_success`, not `multi_turn_task_success_v1`). The SDK resolves +unversioned names to the latest version; pin to a specific version only when +required for reproducibility (e.g., `RubricMetric.GENERAL_QUALITY(version="v2")`). + +## Metric Type Hierarchy + +``` +Metric (base) +├── LLMMetric — LLM-as-a-judge with prompt_template +├── CodeExecutionMetric — Sandboxed remote Python function +└── (base Metric) — Local callable or predefined name +``` + +Access predefined metrics via `types.RubricMetric.` (preferred). +`types.PrebuiltMetric` is an alias with identical behavior. + +## Predefined API Metrics (AutoRater) + +Server-side evaluation via the Agent Platform AutoRater. No judge model needed. + +### Agent metrics (multi-turn, adaptive rubrics) + +Start here for agent evaluation. Adaptive rubrics generate criteria from the +trace at runtime. + +| Metric ID | What it measures | Required fields | +| ------------------------------- | -------------------------------------------------------------------------------------- | -------------------------------- | +| `multi_turn_task_success` | Whether the user's goal was fulfilled across the full multi-turn conversation. | `agent_data` with task context | +| `multi_turn_trajectory_quality` | Sequential logic, efficiency, and error-recovery robustness across turns. | `agent_data` with full trajectory | +| `multi_turn_tool_use_quality` | Technical and semantic correctness of tool calls across the multi-turn conversation. | `agent_data` with function calls | +| `multi_turn_general_quality` | Overall response quality within a multi-turn dialogue. | `agent_data` (1+ turns) | +| `multi_turn_text_quality` | Linguistic text quality within a multi-turn dialogue. | `agent_data` (1+ turns) | +| `final_response_quality` | Comprehensive evaluation of the final response and intermediate tool usage. | `agent_data` | +| `final_response_match` | Compares the agent's final response to a provided golden reference answer. | `agent_data`, `reference` | +| `final_response_reference_free` | Final-response quality without a reference answer (requires custom rubrics). | `agent_data` + rubric_groups | +| `tool_use_quality` | Tool selection, parameter accuracy, and step sequence correctness (single-turn). | `agent_data` with tool calls | + +### General quality metrics (single-turn, adaptive rubrics) + +For model evaluation (no agent orchestration). + +| Metric ID | What it measures | Required fields | +| ----------------------- | ----------------------------------------------------------------- | -------------------- | +| `general_quality` | Overall response quality with auto-generated content-based criteria. Recommended starting point for non-agent eval. | `prompt`, `response` | +| `text_quality` | Linguistic aspects: fluency, coherence, grammar. | `prompt`, `response` | +| `instruction_following` | How well the response adheres to specific constraints / instructions. | `prompt`, `response` | + +### Static rubric metrics (fixed criteria) + +Apply alongside the agent or general-quality metrics above. Fixed rubrics — no +adaptive generation. + +| Metric ID | What it measures | Required fields | +| --------------- | --------------------------------------------------------------------------------------- | ------------------------------- | +| `hallucination` | Segments the response into atomic claims; verifies each against tool-returned context. | `response`, intermediate context | +| `grounding` | Factuality and consistency against provided context. | `prompt`, `response`, `context` | +| `safety` | Policy compliance (PII, hate speech, dangerous content, harassment, sexual). | `prompt`, `response` | + +### Accessing predefined metrics + +```python +from agentplatform import types + +# Via RubricMetric (preferred) — uppercase enum, unversioned +metric = types.RubricMetric.MULTI_TURN_TRAJECTORY_QUALITY + +# Via PrebuiltMetric (alias — identical behavior) +metric = types.PrebuiltMetric.MULTI_TURN_TRAJECTORY_QUALITY + +# Pin to a specific version (only when needed for reproducibility) +metric = types.RubricMetric.GENERAL_QUALITY(version="v2") +``` + +## Computation-Based Metrics + +No LLM judge. Deterministic comparison of `response` vs `reference`. Use only +when you have exact reference text to compare against; for agent eval prefer +the rubric-based metrics above. + +| Metric ID | What it measures | Notes | +| -------------------------- | ------------------------------------------- | -------------- | +| `bleu` | BLEU score (translation/generation). | Standard BLEU | +| `rouge_1` | ROUGE-1 (unigram overlap). | Summarization | +| `tool_parameter_key_match` | Whether tool parameter keys match reference. | Agent evals | +| `tool_parameter_kv_match` | Whether tool parameter key-value pairs match reference. | Agent evals | + +```python +metric = types.Metric(name="bleu") +metric = types.Metric(name="tool_parameter_kv_match") +``` + +## Translation Metrics + +| Metric ID | Default version | Notes | +| --------- | -------------------- | --------------------------------------- | +| `comet` | `COMET_22_SRC_REF` | Requires `prompt` (source), `response`, `reference` | +| `metricx` | `METRICX_24_SRC_REF` | Requires `prompt` (source), `response`, `reference` | + +## Multimodal Metrics + +| Metric ID | What it measures | Required fields | +| -------------------- | --------------------- | --------------- | +| `gecko_text2image_v1` | Text-to-image quality | image content | +| `gecko_text2video_v1` | Text-to-video quality | video content | + +## RubricMetric / PrebuiltMetric (Enum Access) + +`RubricMetric.` resolves first against the API predefined list, then +falls back to GCS-hosted LLM metric recipes. The names below are the +**uppercase enum form** of the unversioned IDs above; pass them as +`types.RubricMetric.`. + +| Property | Resolution | +| ------------------------------- | -------------------- | +| `MULTI_TURN_TASK_SUCCESS` | API predefined | +| `MULTI_TURN_TRAJECTORY_QUALITY` | API predefined | +| `MULTI_TURN_TOOL_USE_QUALITY` | API predefined | +| `MULTI_TURN_GENERAL_QUALITY` | API predefined | +| `MULTI_TURN_TEXT_QUALITY` | API predefined | +| `FINAL_RESPONSE_QUALITY` | API predefined | +| `FINAL_RESPONSE_MATCH` | API predefined (v2) | +| `FINAL_RESPONSE_REFERENCE_FREE` | API predefined | +| `TOOL_USE_QUALITY` | API predefined | +| `GENERAL_QUALITY` | API predefined | +| `TEXT_QUALITY` | API predefined | +| `INSTRUCTION_FOLLOWING` | API predefined | +| `HALLUCINATION` | API predefined | +| `SAFETY` | API predefined | + +Any arbitrary name can be tried via `RubricMetric.` — it will attempt +resolution against the API list and then GCS. + +## Custom Metrics + +### Custom Local Function + +Runs client-side. Fastest iteration, no API call. Runs with the calling +process's privileges, so only use trusted code. + +```python +def my_evaluator(instance: dict) -> float: + response_text = instance.get("response", "") + return 1.0 if "thank you" in response_text.lower() else 0.0 + +metric = types.Metric( + name="politeness_check", + custom_function=my_evaluator, +) +``` + +### CodeExecutionMetric (Remote Sandboxed) + +Runs server-side in an Agent Platform sandbox. Must contain `def evaluate(instance)`. + +```python +metric = types.CodeExecutionMetric( + name="link_validator", + custom_function=''' +import re +def evaluate(instance: dict) -> dict: + text = instance.get("response", "") + links = re.findall(r"https?://\\S+", text) + valid = all(link.startswith("https://") for link in links) + return {"score": 1.0 if valid else 0.0, "explanation": f"Found {len(links)} links"} +''', +) +``` + +### LLMMetric (LLM-as-a-Judge) + +Uses a judge model to evaluate with a custom prompt template. + +```python +metric = types.LLMMetric( + name="helpfulness", + prompt_template=""" +Evaluate whether the response is helpful for the given query. + +Query: {prompt} +Response: {response} + +Score 1 if helpful, 0 if not. Explain your reasoning. +""", + judge_model="gemini-2.5-flash", + judge_model_sampling_count=3, +) + +# Load from YAML/JSON file +metric = types.LLMMetric.load("path/to/metric_config.yaml") +``` + +### MetricPromptBuilder (Structured Judge Prompt) + +Builds structured LLM judge prompts from criteria, rating scores, and evaluation +steps. Preferred over raw `prompt_template` strings for complex rubrics. + +```python +metric = types.LLMMetric( + name="structured_quality", + prompt_template=types.MetricPromptBuilder( + criteria={ + "Accuracy": "Response contains factually correct information", + "Completeness": "Response addresses all aspects of the query", + }, + rating_scores={ + "1": "Poor — fails on both criteria", + "3": "Acceptable — meets one criterion", + "5": "Excellent — meets both criteria", + }, + ), + judge_model="gemini-2.5-flash", +) +``` + +### Registered Metric (Server-Side Resource) + +For reusable metrics shared across teams. + +```python +# Create once +resource = client.evals.create_evaluation_metric(metric_config) + +# Use by resource name +metric = types.Metric( + name="team_quality", + metric_resource_name="projects/.../evaluationMetrics/...", +) +``` + +## Metric Selection Guide + +| Agent Type | Recommended Metrics | +| ----------------------------- | ---------------------------------------------------------------------- | +| **RAG agent** | `hallucination`, `grounding`, `final_response_quality`, `safety` | +| **Tool-use agent (multi-turn)** | `multi_turn_task_success`, `multi_turn_tool_use_quality`, `multi_turn_trajectory_quality` | +| **Tool-use agent (single-turn)** | `tool_use_quality`, `final_response_quality` | +| **Multi-turn conversational** | `multi_turn_general_quality`, `multi_turn_text_quality`, `safety` | +| **Goal-oriented agent** | `multi_turn_task_success`, `final_response_quality` | +| **Single-turn model eval** | `general_quality`, `text_quality`, `instruction_following` | +| **Translation** | `comet`, `metricx` | +| **Code generation** | Custom `CodeExecutionMetric` + `instruction_following` | + +## Pairwise Comparison + +There is no `PairwiseMetric` class. For model comparison, provide multiple +`EvaluationDataset` instances and use `calculate_win_rates()`: + +```python +result_a = client.evals.evaluate(dataset=dataset_a, metrics=[...]) +result_b = client.evals.evaluate(dataset=dataset_b, metrics=[...]) +win_rates = calculate_win_rates(result_a, result_b) +``` + +## Handler Dispatch Order + +When the SDK receives a metric, it checks in this order: + +1. `CodeExecutionMetric` with `custom_function` (str) or + `remote_custom_function` +2. `Metric` with `custom_function` (local `Callable`) +3. `Metric` with `metric_resource_name` (registered) +4. Name in computation metrics (`bleu`, `rouge_1`, etc.) +5. Name in translation metrics (`comet`, `metricx`) +6. Name in predefined API metrics (`general_quality`, `multi_turn_task_success`, etc.) +7. `LLMMetric` with `prompt_template` (custom LLM judge) diff --git a/.agents/skills/agent-platform-eval-flywheel/references/sdk_patterns.md b/.agents/skills/agent-platform-eval-flywheel/references/sdk_patterns.md new file mode 100644 index 0000000..58fa346 --- /dev/null +++ b/.agents/skills/agent-platform-eval-flywheel/references/sdk_patterns.md @@ -0,0 +1,251 @@ +# Agent Platform Evaluation SDK Patterns + +Code patterns for common evaluation scenarios using `agentplatform._genai.evals`. + +## Initialization + +```python +import agentplatform +from agentplatform import types +from google.genai import types as genai_types + +client = agentplatform.Client(project="{PROJECT_ID}", location="{LOCATION}") +``` + +For Gemini 3+ models, use `location="global"`. + +## Pattern 1: Single-Turn Evaluation + +Simplest case — evaluate prompt/response pairs against predefined metrics. + +```python +dataset = types.EvaluationDataset(eval_cases=[ + types.EvalCase( + prompt="What causes rain?", + response="Rain is caused by water evaporating...", + reference="Rain forms when water vapor condenses...", + ), +]) + +result = client.evals.evaluate( + dataset=dataset, + metrics=[ + types.RubricMetric.GENERAL_QUALITY, + types.Metric(name="rouge_l_sum"), + ], +) +``` + +## Pattern 2: Multi-Turn Agent Evaluation + +Evaluate a full agent conversation trajectory with tool calls. + +```python +agent_data = types.evals.AgentData( + agents={ + "my_agent": types.evals.AgentConfig( + agent_id="my_agent", + instruction="You are a helpful assistant.", + tools=[genai_types.Tool(function_declarations=[ + genai_types.FunctionDeclaration( + name="search", + description="Search the web", + parameters=genai_types.Schema( + type="OBJECT", + properties={"query": genai_types.Schema(type="STRING")}, + ), + ), + ])], + ), + }, + turns=[ + types.evals.ConversationTurn(turn_index=0, events=[ + types.evals.AgentEvent( + author="user", + content=genai_types.Content(role="user", + parts=[genai_types.Part(text="Find me the weather in NYC")]), + ), + types.evals.AgentEvent( + author="my_agent", + content=genai_types.Content(role="model", + parts=[genai_types.Part(function_call=genai_types.FunctionCall( + name="search", args={"query": "NYC weather"}))]), + ), + types.evals.AgentEvent( + author="my_agent", + content=genai_types.Content(role="tool", + parts=[genai_types.Part(function_response=genai_types.FunctionResponse( + name="search", response={"result": "72F, sunny"}))]), + ), + types.evals.AgentEvent( + author="my_agent", + content=genai_types.Content(role="model", + parts=[genai_types.Part(text="It's 72F and sunny in NYC.")]), + ), + ]), + ], +) + +result = client.evals.evaluate( + dataset=types.EvaluationDataset(eval_cases=[ + types.EvalCase(agent_data=agent_data), + ]), + metrics=[ + types.RubricMetric.MULTI_TURN_TRAJECTORY_QUALITY, + types.RubricMetric.MULTI_TURN_TASK_SUCCESS, + ], +) +``` + +## Pattern 3: Synthetic Data Generation (Cold Start) + +Generate user scenarios when no eval data exists. + +```python +# Step 1: Generate scenarios +scenarios = client.evals.generate_user_scenarios( + agents={ + "agent": types.evals.AgentConfig( + agent_id="agent", + instruction="You are a customer support agent for an airline.", + ), + }, + root_agent_id="agent", + user_scenario_generation_config=types.evals.UserScenarioGenerationConfig( + user_scenario_count=10, + simulation_instruction="Simulate customers with flight booking issues.", + environment_data="Flights available: NYC-LAX, NYC-SFO. Cancellation policy: free within 24h.", + model_name="gemini-2.5-flash", + ), +) + +# Step 2: Run inference with user simulation +dataset_with_responses = client.evals.run_inference( + agent=my_agent, # Your callable agent + src=scenarios, + config={ + "user_simulator_config": { + "model_name": "gemini-2.5-flash", + "max_turn": 5, + }, + }, +) + +# Step 3: Evaluate +result = client.evals.evaluate( + dataset=dataset_with_responses, + metrics=[types.RubricMetric.MULTI_TURN_GENERAL_QUALITY, types.RubricMetric.SAFETY], +) +``` + +## Pattern 4: Custom LLM-as-a-Judge with MetricPromptBuilder + +For domain-specific evaluation with structured rubrics. + +```python +metric = types.LLMMetric( + name="domain_expertise", + prompt_template=types.MetricPromptBuilder( + metric_definition="Evaluates domain expertise in the response.", + criteria={ + "Accuracy": "Claims are factually correct for the domain", + "Depth": "Response shows understanding beyond surface level", + "Actionability": "Advice is specific and actionable", + }, + rating_scores={ + "1": "Incorrect or misleading information", + "2": "Partially correct but superficial", + "3": "Correct and shows reasonable understanding", + "4": "Accurate with good depth", + "5": "Expert-level accuracy, depth, and actionability", + }, + ), + judge_model="gemini-2.5-flash", + judge_model_sampling_count=3, +) +``` + +## Pattern 5: CodeExecutionMetric for Structured Validation + +For programmatic checks that go beyond text comparison. + +```python +# Validate JSON output structure +json_validator = types.CodeExecutionMetric( + name="json_structure_check", + custom_function=''' +import json +def evaluate(instance: dict) -> dict: + try: + data = json.loads(instance.get("response", "")) + required_keys = {"name", "status", "result"} + missing = required_keys - set(data.keys()) + if missing: + return {"score": 0.0, "explanation": f"Missing keys: {missing}"} + return {"score": 1.0, "explanation": "All required keys present"} + except json.JSONDecodeError as e: + return {"score": 0.0, "explanation": f"Invalid JSON: {e}"} +''', +) +``` + +## Pattern 6: Pairwise Model Comparison + +Compare two models using `calculate_win_rates()`. + +```python +# Same dataset, two different model responses +dataset_a = types.EvaluationDataset(eval_cases=[ + types.EvalCase(prompt="Explain quantum computing", response="Model A response..."), +]) +dataset_b = types.EvaluationDataset(eval_cases=[ + types.EvalCase(prompt="Explain quantum computing", response="Model B response..."), +]) + +result_a = client.evals.evaluate(dataset=dataset_a, metrics=[types.RubricMetric.GENERAL_QUALITY]) +result_b = client.evals.evaluate(dataset=dataset_b, metrics=[types.RubricMetric.GENERAL_QUALITY]) + +# Compare +from agentplatform._genai._evals_metric_handlers import calculate_win_rates +win_rates = calculate_win_rates(result_a, result_b) +``` + +## Pattern 7: Parsing Results + +```python +result = client.evals.evaluate(dataset=dataset, metrics=metrics) + +# Summary level +for summary in result.summary_metrics: + print(f"{summary.metric_name}: mean={summary.mean_score}, pass_rate={summary.pass_rate}") + +# Per-case level +for case in result.eval_case_results: + for candidate in case.response_candidate_results: + for metric_name, metric_result in candidate.metric_results.items(): + print(f" {metric_name}: score={metric_result.score}") + print(f" explanation: {metric_result.explanation}") + + # Rubric verdicts (for rubric-based metrics) + if metric_result.rubric_verdicts: + for v in metric_result.rubric_verdicts: + print(f" rubric {v.evaluated_rubric.rubric_id}: " + f"{'PASS' if v.verdict else 'FAIL'} - {v.reasoning}") +``` + +## Error Handling + +```python +try: + result = client.evals.evaluate(dataset=dataset, metrics=metrics) +except Exception as e: + error_type = type(e).__name__ + if "PermissionDenied" in error_type: + print("Check: GCP project permissions, API enabled, billing active") + elif "InvalidArgument" in error_type: + print("Check: dataset format, metric compatibility with data type") + elif "ResourceExhausted" in error_type: + print("Check: API quota, reduce dataset size or add delay") + else: + raise +``` diff --git a/.agents/skills/agent-platform-eval-flywheel/scripts/compare_results.py b/.agents/skills/agent-platform-eval-flywheel/scripts/compare_results.py new file mode 100644 index 0000000..ff7a160 --- /dev/null +++ b/.agents/skills/agent-platform-eval-flywheel/scripts/compare_results.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +"""Diff two Agent Platform Eval result JSON files side by side. + +Use in Stage 5 of the Quality Flywheel (Optimize & Iterate) to confirm a fix +improved the target metric without regressing others. + +Reads two JSON files produced by ``result.model_dump_json()``, joins summary +metrics by metric_name, and prints baseline vs. candidate scores with a delta. + +Usage: + python compare_results.py --baseline baseline.json --candidate candidate.json + python compare_results.py -b baseline.json -c candidate.json --threshold 0.05 + python compare_results.py -b baseline.json -c candidate.json --json + +Exit codes: 0 = candidate >= baseline on every metric (within threshold), + 1 = at least one metric regressed beyond the threshold. +""" + +import argparse +import json +import sys +from typing import Any + + +def _summary_by_name(result: dict[str, Any]) -> dict[str, dict[str, Any]]: + """Index summary metrics by metric_name.""" + out = {} + for s in result.get("summary_metrics") or []: + name = s.get("metric_name") or s.get("metricName") + if name: + out[name] = { + "mean_score": s.get("mean_score") or s.get("meanScore"), + "pass_rate": s.get("pass_rate") or s.get("passRate"), + "num_cases": s.get("num_cases") or s.get("numCases"), + } + return out + + +def _delta(a: float | None, b: float | None) -> float | None: + """Compute b - a, or None if either input is missing.""" + if a is None or b is None: + return None + return b - a + + +def _format_signed(x: float | None) -> str: + """Render a delta with explicit sign and 4 decimal places.""" + if x is None: + return "—" + return f"{x:+.4f}" + + +def compare( + baseline: dict[str, Any], + candidate: dict[str, Any], + threshold: float = 0.0, +) -> tuple[list[dict[str, Any]], bool]: + """Return per-metric diff rows and whether the candidate is safe to ship. + + A row regresses when (candidate - baseline) < -threshold on either + mean_score or pass_rate. Threshold defaults to 0 (any drop is a regression). + """ + b = _summary_by_name(baseline) + c = _summary_by_name(candidate) + metric_names = sorted(set(b) | set(c)) + + rows = [] + regressed = False + for name in metric_names: + bv = b.get(name, {}) + cv = c.get(name, {}) + d_mean = _delta(bv.get("mean_score"), cv.get("mean_score")) + d_pass = _delta(bv.get("pass_rate"), cv.get("pass_rate")) + row = { + "metric_name": name, + "baseline_mean": bv.get("mean_score"), + "candidate_mean": cv.get("mean_score"), + "delta_mean": d_mean, + "baseline_pass": bv.get("pass_rate"), + "candidate_pass": cv.get("pass_rate"), + "delta_pass": d_pass, + } + is_regression = (d_mean is not None and d_mean < -threshold) or ( + d_pass is not None and d_pass < -threshold + ) + row["regressed"] = is_regression + if is_regression: + regressed = True + rows.append(row) + return rows, not regressed + + +def _format_table(rows: list[dict[str, Any]]) -> str: + """Render the diff as a fixed-width text table.""" + if not rows: + return "(no metrics in either result)\n" + header = [ + "metric_name", + "base_mean", + "cand_mean", + "Δmean", + "base_pass", + "cand_pass", + "Δpass", + "regressed", + ] + + def _cell(r, key): + v = r.get(key) + if isinstance(v, bool): + return "YES" if v else "" + if isinstance(v, float): + return ( + f"{v:.4f}" + if key not in ("delta_mean", "delta_pass") + else _format_signed(v) + ) + if v is None: + return "—" + return str(v) + + name_map = { + "metric_name": "metric_name", + "base_mean": "baseline_mean", + "cand_mean": "candidate_mean", + "Δmean": "delta_mean", + "base_pass": "baseline_pass", + "cand_pass": "candidate_pass", + "Δpass": "delta_pass", + "regressed": "regressed", + } + widths = {} + for col in header: + cells = [_cell(r, name_map[col]) for r in rows] + widths[col] = max(len(col), *(len(c) for c in cells)) + + line1 = " ".join(c.ljust(widths[c]) for c in header) + line2 = " ".join("-" * widths[c] for c in header) + body = "\n".join( + " ".join(_cell(r, name_map[c]).ljust(widths[c]) for c in header) for r in rows + ) + return f"{line1}\n{line2}\n{body}\n" + + +def main(): + parser = argparse.ArgumentParser( + description="Diff two Agent Platform Eval result JSON files side by side." + ) + parser.add_argument( + "--baseline", + "-b", + required=True, + help="Path to the baseline result JSON.", + ) + parser.add_argument( + "--candidate", + "-c", + required=True, + help="Path to the candidate result JSON (after the fix).", + ) + parser.add_argument( + "--threshold", + "-t", + type=float, + default=0.0, + help="A candidate metric counts as regressed only if it drops more" + " than this much vs. baseline. Default 0.0 (any drop regresses).", + ) + parser.add_argument( + "--json", + action="store_true", + help="Emit the diff as JSON instead of a text table.", + ) + args = parser.parse_args() + + try: + with open(args.baseline) as f: + baseline = json.load(f) + with open(args.candidate) as f: + candidate = json.load(f) + except FileNotFoundError as e: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + except json.JSONDecodeError as e: + print(f"ERROR: Invalid JSON: {e}", file=sys.stderr) + sys.exit(1) + + rows, ok = compare(baseline, candidate, threshold=args.threshold) + + if args.json: + print(json.dumps({"rows": rows, "no_regression": ok}, indent=2)) + else: + print(_format_table(rows)) + if not ok: + print( + f"REGRESSION: at least one metric dropped by more than" + f" {args.threshold:+.4f} vs. baseline.", + file=sys.stderr, + ) + + sys.exit(0 if ok else 1) + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/agent-platform-eval-flywheel/scripts/inspect_results.py b/.agents/skills/agent-platform-eval-flywheel/scripts/inspect_results.py new file mode 100644 index 0000000..b49ed27 --- /dev/null +++ b/.agents/skills/agent-platform-eval-flywheel/scripts/inspect_results.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +"""Print summary metrics and per-case scores from an Agent Platform Eval result. + +Use after ``client.evals.evaluate(...)`` returns to read scores. The field +paths (``summary_metrics``, ``eval_case_results[].response_candidate_results``) +are deep and easy to get wrong; this helper renders the standard view. + +Two input shapes are accepted: + +1. A JSON file produced by ``result.model_dump_json()`` (preferred — save + the result to disk so it can be diffed later with ``compare_results.py``). +2. An in-process result object, when imported as a library: + + from inspect_results import render + render(result) + +Usage: + python inspect_results.py --result result.json + python inspect_results.py --result result.json --failing-only + python inspect_results.py --result result.json --metric multi_turn_task_success + python inspect_results.py --result result.json --save-html report.html + + +Exit codes: 0 = at least one case present, 1 = empty / malformed result. +""" + +import argparse +import json +import sys +from typing import Any + + +def _summary_rows(result: dict[str, Any]) -> list[dict[str, Any]]: + """Extract summary metric rows from a serialized result.""" + rows = [] + for s in result.get("summary_metrics") or []: + rows.append({ + "metric_name": s.get("metric_name") or s.get("metricName"), + "mean_score": s.get("mean_score") or s.get("meanScore"), + "pass_rate": s.get("pass_rate") or s.get("passRate"), + "num_cases": s.get("num_cases") or s.get("numCases"), + }) + return rows + + +def _case_rows( + result: dict[str, Any], + metric_filter: str | None, + failing_only: bool, +) -> list[dict[str, Any]]: + """Extract per-case metric rows from a serialized result.""" + rows = [] + for case in result.get("eval_case_results") or []: + case_id = case.get("eval_case_id") or case.get("evalCaseId") or "?" + for cand in case.get("response_candidate_results") or []: + metric_results = cand.get("metric_results") or {} + for name, r in metric_results.items(): + if metric_filter and name != metric_filter: + continue + score = r.get("score") if isinstance(r, dict) else None + if failing_only and score is not None and score >= 1.0: + continue + rows.append({ + "case_id": case_id, + "metric_name": name, + "score": score, + "explanation": ( + (r.get("explanation") or "")[:200] + if isinstance(r, dict) + else "" + ), + }) + return rows + + +def _format_table(rows: list[dict[str, Any]], cols: list[str]) -> str: + """Render rows as a fixed-width text table.""" + if not rows: + return "(no rows)\n" + widths = { + c: max(len(c), *(len(str(r.get(c, ""))) for r in rows)) for c in cols + } + header = " ".join(c.ljust(widths[c]) for c in cols) + sep = " ".join("-" * widths[c] for c in cols) + body = "\n".join( + " ".join(str(r.get(c, "")).ljust(widths[c]) for c in cols) for r in rows + ) + return f"{header}\n{sep}\n{body}\n" + + +def render( + result: dict[str, Any] | Any, + metric_filter: str | None = None, + failing_only: bool = False, +) -> str: + """Render summary and per-case tables. Accepts dict or SDK result object.""" + if not isinstance(result, dict): + if hasattr(result, "model_dump"): + result = result.model_dump() + else: + result = json.loads(json.dumps(result, default=lambda o: o.__dict__)) + + out = [] + out.append("=== Summary Metrics ===") + out.append( + _format_table( + _summary_rows(result), + ["metric_name", "mean_score", "pass_rate", "num_cases"], + ) + ) + out.append("=== Per-Case Scores ===") + if failing_only: + out.append("(failing cases only, score < 1.0)") + if metric_filter: + out.append(f"(filtered to metric: {metric_filter})") + out.append( + _format_table( + _case_rows(result, metric_filter, failing_only), + ["case_id", "metric_name", "score", "explanation"], + ) + ) + return "\n".join(out) + + +def main(): + parser = argparse.ArgumentParser( + description=( + "Print summary metrics and per-case scores from an Agent" + " Platform Eval result JSON file." + ) + ) + parser.add_argument( + "--result", + "-r", + required=True, + help="Path to a JSON file produced by result.model_dump_json().", + ) + parser.add_argument( + "--metric", + "-m", + help="Only show per-case scores for this metric name.", + ) + parser.add_argument( + "--failing-only", + action="store_true", + help="Only show per-case scores with score < 1.0.", + ) + parser.add_argument( + "--save-html", + "-o", + dest="html_path", + help="Also render the result as a browsable HTML report to this path.", + ) + args = parser.parse_args() + + try: + with open(args.result) as f: + result = json.load(f) + except FileNotFoundError: + print(f"ERROR: File not found: {args.result}", file=sys.stderr) + sys.exit(1) + except json.JSONDecodeError as e: + print(f"ERROR: Invalid JSON in {args.result}: {e}", file=sys.stderr) + sys.exit(1) + + if not result.get("summary_metrics") and not result.get("eval_case_results"): + print( + "ERROR: result has no summary_metrics or eval_case_results." + " Did you save result.model_dump_json() to this file?", + file=sys.stderr, + ) + sys.exit(1) + + print( + render(result, metric_filter=args.metric, failing_only=args.failing_only) + ) + + if args.html_path: + save_html(result, args.html_path) + + +def save_html(result: dict[str, Any], html_path: str) -> None: + """Render the result as a standalone HTML report and write to ``html_path``.""" + try: + from agentplatform._genai import _evals_visualization + except ImportError as e: + print( + "ERROR: --save-html requires the agentplatform SDK to be installed" + f" (`pip install google-cloud-aiplatform`): {e}", + file=sys.stderr, + ) + sys.exit(1) + + try: + html_content = _evals_visualization.get_evaluation_html(json.dumps(result)) + except Exception as e: + print(f"ERROR: Failed to render HTML report: {e}", file=sys.stderr) + sys.exit(1) + + if not html_content: + print( + "ERROR: Agent Platform Eval SDK returned empty HTML — the result" + " may be missing fields the visualizer requires.", + file=sys.stderr, + ) + sys.exit(1) + + with open(html_path, "w", encoding="utf-8") as f: + f.write(str(html_content)) + print(f"Saved HTML report to {html_path}") + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/agent-platform-eval-flywheel/scripts/parse_adk_traces.py b/.agents/skills/agent-platform-eval-flywheel/scripts/parse_adk_traces.py new file mode 100644 index 0000000..db37313 --- /dev/null +++ b/.agents/skills/agent-platform-eval-flywheel/scripts/parse_adk_traces.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +"""Parse ADK session traces into Agent Platform Evaluation SDK dataset format. + +Reads serialized ADK session JSON (from Session.model_dump_json() or +DatabaseSessionService exports) and converts to the canonical +EvaluationDataset format for use with client.evals.evaluate(). + +Usage: + python parse_adk_traces.py --input session.json --output dataset.json + python parse_adk_traces.py --input_dir ./sessions/ --output dataset.json + python parse_adk_traces.py --input session.json # prints to stdout + +Input format: JSON file(s) with ADK Session structure: + { + "id": "...", "app_name": "...", "user_id": "...", + "events": [{"author": "user"|"agent_name", "content": {...}}, ...] + } + +Output format: JSON with EvaluationDataset structure: + { + "eval_cases": [{"agent_data": {"agents": {...}, "turns": [...]}}] + } +""" + +import argparse +import json +import os +import sys +from typing import Any + + +def _is_user_event(event: dict[str, Any]) -> bool: + """Check if an event is from the user.""" + if event.get("author") == "user": + return True + content = event.get("content") + if isinstance(content, dict) and content.get("role") == "user": + return True + if event.get("role") == "user": + return True + return False + + +def _extract_content(event: dict[str, Any]) -> dict[str, Any] | None: + """Extract genai Content from an ADK event dict.""" + if "content" in event: + raw = event["content"] + if isinstance(raw, dict) and "parts" in raw: + return raw + if isinstance(raw, str): + return { + "role": "user" if _is_user_event(event) else "model", + "parts": [{"text": raw}], + } + if "parts" in event: + return {"role": event.get("role", "model"), "parts": event["parts"]} + return None + + +def _extract_author(event: dict[str, Any], default_agent_id: str) -> str: + """Extract the author from an event, preserving sub-agent attribution.""" + author = event.get("author") + if author: + return author + content = event.get("content") + if isinstance(content, dict) and content.get("role") == "user": + return "user" + if event.get("role") == "user": + return "user" + return default_agent_id + + +def _extract_agent_configs( + session: dict[str, Any], +) -> dict[str, dict[str, Any]]: + """Extract agent configs from session metadata if available.""" + configs = {} + # Check for agent_config in session metadata + agent_config = session.get("agent_config") or session.get("agentConfig") + if agent_config: + agent_id = agent_config.get("agent_id") or agent_config.get("agentId", "agent") + configs[agent_id] = { + "agent_id": agent_id, + "agent_type": agent_config.get("agent_type", agent_config.get("agentType")), + "instruction": agent_config.get("instruction"), + "description": agent_config.get("description"), + } + return configs + + # Infer from events — collect unique non-user authors + events = session.get("events", []) + authors = set() + for event in events: + author = event.get("author", "") + if author and author != "user": + authors.add(author) + + if not authors: + authors.add(session.get("app_name", session.get("appName", "agent"))) + + for author in authors: + configs[author] = {"agent_id": author} + + return configs + + +def _segment_into_turns( + events: list[dict[str, Any]], default_agent_id: str +) -> list[dict[str, Any]]: + """Segment a flat event list into ConversationTurns. + + A new turn starts with each user message (matching AgentData.from_session() + behavior). + """ + turns = [] + current_events = [] + + for event in events: + is_user = _is_user_event(event) + + # Start new turn on user message (if we have accumulated events) + if is_user and current_events: + turns.append( + { + "turn_index": len(turns), + "turn_id": f"turn_{len(turns)}", + "events": current_events, + } + ) + current_events = [] + + content = _extract_content(event) + if content is None: + continue + + author = _extract_author(event, default_agent_id) + + agent_event = {"author": author, "content": content} + + # Preserve state_delta if present (from EventActions) + actions = event.get("actions", {}) + state_delta = actions.get("state_delta") or actions.get("stateDelta") + if state_delta: + agent_event["state_delta"] = state_delta + + current_events.append(agent_event) + + # Don't forget the last turn + if current_events: + turns.append( + { + "turn_index": len(turns), + "turn_id": f"turn_{len(turns)}", + "events": current_events, + } + ) + + return turns + + +def parse_session(session: dict[str, Any]) -> dict[str, Any]: + """Convert a single ADK session dict to an EvalCase dict.""" + events = session.get("events", []) + if not events: + raise ValueError(f"Session {session.get('id', 'unknown')} has no events.") + + agent_configs = _extract_agent_configs(session) + default_agent_id = next(iter(agent_configs)) + + turns = _segment_into_turns(events, default_agent_id) + if not turns: + raise ValueError(f"Session {session.get('id', 'unknown')} produced no turns.") + + agent_data = {"agents": agent_configs, "turns": turns} + return {"agent_data": agent_data} + + +def parse_file(filepath: str) -> list[dict[str, Any]]: + """Parse a JSON file containing one or more ADK sessions.""" + with open(filepath) as f: + data = json.load(f) + + # Handle single session or list of sessions + if isinstance(data, list): + sessions = data + elif isinstance(data, dict): + if "events" in data: + sessions = [data] + elif "sessions" in data: + sessions = data["sessions"] + else: + raise ValueError( + f"Unrecognized format in {filepath}. Expected a session object " + "with 'events' field, a list of sessions, or an object with " + "'sessions' field." + ) + else: + raise ValueError(f"Unexpected JSON type in {filepath}: {type(data)}") + + eval_cases = [] + for i, session in enumerate(sessions): + try: + eval_cases.append(parse_session(session)) + except ValueError as e: + print(f"WARNING: Skipping session {i}: {e}", file=sys.stderr) + + return eval_cases + + +def main(): + parser = argparse.ArgumentParser( + description="Parse ADK session traces into Agent Platform Eval dataset format." + ) + parser.add_argument( + "--input", + "-i", + help="Path to a single ADK session JSON file.", + ) + parser.add_argument( + "--input_dir", + "-d", + help="Path to a directory of ADK session JSON files.", + ) + parser.add_argument( + "--output", + "-o", + help="Output file path. Prints to stdout if not specified.", + ) + args = parser.parse_args() + + if not args.input and not args.input_dir: + parser.error("Specify --input or --input_dir") + + eval_cases = [] + + if args.input: + if not os.path.exists(args.input): + print(f"ERROR: File not found: {args.input}", file=sys.stderr) + sys.exit(1) + eval_cases.extend(parse_file(args.input)) + + if args.input_dir: + if not os.path.isdir(args.input_dir): + print(f"ERROR: Directory not found: {args.input_dir}", file=sys.stderr) + sys.exit(1) + json_files = sorted( + f for f in os.listdir(args.input_dir) if f.endswith(".json") + ) + if not json_files: + print(f"ERROR: No .json files in {args.input_dir}", file=sys.stderr) + sys.exit(1) + for filename in json_files: + filepath = os.path.join(args.input_dir, filename) + eval_cases.extend(parse_file(filepath)) + + if not eval_cases: + print("ERROR: No eval cases produced from input.", file=sys.stderr) + sys.exit(1) + + dataset = {"eval_cases": eval_cases} + output_json = json.dumps(dataset, indent=2, default=str) + + if args.output: + with open(args.output, "w") as f: + f.write(output_json) + print( + f"Wrote {len(eval_cases)} eval case(s) to {args.output}", + file=sys.stderr, + ) + else: + print(output_json) + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/agent-platform-eval-flywheel/scripts/render_html_report.py b/.agents/skills/agent-platform-eval-flywheel/scripts/render_html_report.py new file mode 100644 index 0000000..5768f6c --- /dev/null +++ b/.agents/skills/agent-platform-eval-flywheel/scripts/render_html_report.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +"""Render Agent Platform Eval HTML reports from saved result / loss-cluster JSON. + +Two report types are supported: + + * ``evaluation`` — rendered from a result JSON produced by + ``result.model_dump_json()`` + * ``loss-analysis`` — rendered from a loss-clusters JSON produced by + ``response.model_dump_json()`` + +Use when you have a saved JSON artifact and want a browsable HTML report +to share with the user, link in a PR description, or attach to a bug. + +Usage: + python render_html_report.py --input result.json --type evaluation --output report.html + python render_html_report.py --input clusters.json --type loss-analysis --output clusters.html + +Exit codes: 0 = HTML written, 1 = SDK import / render failure. +""" + +import argparse +import json +import sys + + +def render_evaluation_html(result_json_str: str) -> str: + """Render an EvaluationResult JSON string as standalone HTML.""" + from agentplatform._genai import _evals_visualization + + html = _evals_visualization.get_evaluation_html(result_json_str) + if not html: + raise RuntimeError( + "Agent Platform Eval SDK returned empty HTML — the input may be" + " missing fields the evaluation visualizer requires." + ) + return str(html) + + +def render_loss_analysis_html(response_json_str: str) -> str: + """Render a loss-clusters response JSON string as standalone HTML.""" + from agentplatform._genai import _evals_visualization + + html = _evals_visualization.get_loss_analysis_html(response_json_str) + if not html: + raise RuntimeError( + "Agent Platform Eval SDK returned empty HTML — the input may be" + " missing fields the loss-analysis visualizer requires." + ) + return str(html) + + +def main(): + parser = argparse.ArgumentParser( + description="Render Agent Platform Eval HTML reports from saved JSON artifacts." + ) + parser.add_argument( + "--input", + "-i", + required=True, + help="Path to the input JSON file (result or loss-clusters response).", + ) + parser.add_argument( + "--type", + "-t", + choices=["evaluation", "loss-analysis"], + required=True, + help=( + "evaluation = client.evals.evaluate result;" + " loss-analysis = client.evals.generate_loss_clusters response." + ), + ) + parser.add_argument( + "--output", + "-o", + required=True, + help="Path to write the HTML report to.", + ) + args = parser.parse_args() + + try: + with open(args.input) as f: + content = f.read() + json.loads(content) + except FileNotFoundError: + print(f"ERROR: File not found: {args.input}", file=sys.stderr) + sys.exit(1) + except json.JSONDecodeError as e: + print(f"ERROR: Invalid JSON in {args.input}: {e}", file=sys.stderr) + sys.exit(1) + + try: + if args.type == "evaluation": + html = render_evaluation_html(content) + else: + html = render_loss_analysis_html(content) + except ImportError as e: + print( + "ERROR: requires the agentplatform SDK to be installed" + f" (`pip install google-cloud-aiplatform`): {e}", + file=sys.stderr, + ) + sys.exit(1) + except Exception as e: + print(f"ERROR: Failed to render HTML: {e}", file=sys.stderr) + sys.exit(1) + + with open(args.output, "w", encoding="utf-8") as f: + f.write(html) + print(f"Saved {args.type} HTML report to {args.output}") + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/agent-platform-eval-flywheel/scripts/validate_dataset.py b/.agents/skills/agent-platform-eval-flywheel/scripts/validate_dataset.py new file mode 100644 index 0000000..fe1bf4f --- /dev/null +++ b/.agents/skills/agent-platform-eval-flywheel/scripts/validate_dataset.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python3 +"""Validate an evaluation dataset for Agent Platform Eval SDK compatibility. + +Checks structural compliance with the EvaluationDataset schema, +required fields per metric type, and common formatting mistakes. + +Usage: + python validate_dataset.py --dataset dataset.json + python validate_dataset.py --dataset dataset.json --metrics + hallucination,multi_turn_task_success + +Metric names are the unversioned form (the SDK resolves them to the latest +version). Pinned versions (e.g., general_quality_v1) are also accepted. + +Exit codes: 0 = valid, 1 = invalid (with specific errors). +""" + +import argparse +import json +import sys +from typing import Any + +_SINGLE_TURN_METRICS = frozenset({ + "general_quality", + "text_quality", + "instruction_following", + "grounding", + "safety", + "hallucination", + "tool_use_quality", + "final_response_reference_free", + "final_response_quality", +}) +_MULTI_TURN_METRICS = frozenset({ + "multi_turn_tool_use_quality", + "multi_turn_trajectory_quality", + "multi_turn_task_success", + "multi_turn_general_quality", + "multi_turn_text_quality", +}) +_COMPUTATION_METRICS = frozenset({ + "bleu", + "rouge_1", + "rouge_l_sum", +}) + + +_VALID_ROLES = frozenset({"user", "model", "tool"}) + + +def _canonical_metric(name: str) -> str: + """Strip a trailing _v version suffix so versioned IDs match the sets.""" + parts = name.rsplit("_v", 1) + if len(parts) == 2 and parts[1].isdigit(): + return parts[0] + return name + + +class ValidationError: + """A single validation error with location context.""" + + def __init__(self, path: str, message: str, severity: str = "ERROR"): + self.path = path + self.message = message + self.severity = severity + + def __str__(self): + return f"[{self.severity}] {self.path}: {self.message}" + + +def _validate_content(content: Any, path: str) -> list[ValidationError]: + """Validate a genai Content object.""" + errors = [] + if not isinstance(content, dict): + errors.append( + ValidationError(path, f"Expected dict, got {type(content).__name__}") + ) + return errors + + role = content.get("role") + if role and role not in _VALID_ROLES: + errors.append( + ValidationError( + f"{path}.role", + f"Invalid role '{role}'. Must be one of:" + f" {', '.join(sorted(_VALID_ROLES))}", + ) + ) + if role == "assistant": + errors.append( + ValidationError( + f"{path}.role", + "Use 'model' instead of 'assistant' (Agent Platform convention).", + ) + ) + + parts = content.get("parts") + if parts is None: + errors.append(ValidationError(f"{path}.parts", "Missing 'parts' field.")) + elif not isinstance(parts, list): + errors.append(ValidationError(f"{path}.parts", "Must be a list.")) + elif not parts: + errors.append( + ValidationError( + f"{path}.parts", + "Empty parts list.", + severity="WARNING", + ) + ) + + return errors + + +def _validate_agent_event(event: Any, path: str) -> list[ValidationError]: + """Validate an AgentEvent object.""" + errors = [] + if not isinstance(event, dict): + errors.append( + ValidationError(path, f"Expected dict, got {type(event).__name__}") + ) + return errors + + if "author" not in event: + errors.append(ValidationError(f"{path}", "Missing 'author' field.")) + + content = event.get("content") + if content is None: + errors.append(ValidationError(f"{path}", "Missing 'content' field.")) + else: + errors.extend(_validate_content(content, f"{path}.content")) + + return errors + + +def _validate_turn(turn: Any, path: str) -> list[ValidationError]: + """Validate a ConversationTurn object.""" + errors = [] + if not isinstance(turn, dict): + errors.append( + ValidationError(path, f"Expected dict, got {type(turn).__name__}") + ) + return errors + + if "turn_index" not in turn and "turnIndex" not in turn: + errors.append(ValidationError(f"{path}", "Missing 'turn_index' field.")) + + events = turn.get("events") + if events is None: + errors.append(ValidationError(f"{path}", "Missing 'events' field.")) + elif not isinstance(events, list): + errors.append(ValidationError(f"{path}.events", "Must be a list.")) + elif not events: + errors.append(ValidationError(f"{path}.events", "Empty events list.")) + else: + for i, event in enumerate(events): + errors.extend(_validate_agent_event(event, f"{path}.events[{i}]")) + + return errors + + +def _validate_agent_data(agent_data: Any, path: str) -> list[ValidationError]: + """Validate an AgentData object.""" + errors = [] + if not isinstance(agent_data, dict): + errors.append( + ValidationError(path, f"Expected dict, got {type(agent_data).__name__}") + ) + return errors + + agents = agent_data.get("agents") + if agents is None: + errors.append(ValidationError(f"{path}", "Missing 'agents' field.")) + elif not isinstance(agents, dict): + errors.append(ValidationError(f"{path}.agents", "Must be a dict.")) + elif not agents: + errors.append(ValidationError(f"{path}.agents", "Empty agents map.")) + else: + for agent_id, config in agents.items(): + if not isinstance(config, dict): + errors.append( + ValidationError( + f"{path}.agents.{agent_id}", + f"Expected dict, got {type(config).__name__}", + ) + ) + elif "agent_id" not in config and "agentId" not in config: + errors.append( + ValidationError( + f"{path}.agents.{agent_id}", + "Missing 'agent_id' field.", + severity="WARNING", + ) + ) + + turns = agent_data.get("turns") + if turns is None: + errors.append(ValidationError(f"{path}", "Missing 'turns' field.")) + elif not isinstance(turns, list): + errors.append(ValidationError(f"{path}.turns", "Must be a list.")) + elif not turns: + errors.append(ValidationError(f"{path}.turns", "Empty turns list.")) + else: + for i, turn in enumerate(turns): + errors.extend(_validate_turn(turn, f"{path}.turns[{i}]")) + + indices = [] + for turn in turns: + idx = turn.get("turn_index", turn.get("turnIndex")) + if idx is not None: + indices.append(idx) + if indices and indices != list(range(len(indices))): + errors.append( + ValidationError( + f"{path}.turns", + f"turn_index values are not sequential 0-based: {indices}", + severity="WARNING", + ) + ) + + return errors + + +def _validate_eval_case( + case: Any, index: int, metrics: list[str] | None +) -> list[ValidationError]: + """Validate a single EvalCase.""" + path = f"eval_cases[{index}]" + errors = [] + + if not isinstance(case, dict): + errors.append( + ValidationError(path, f"Expected dict, got {type(case).__name__}") + ) + return errors + + has_prompt = "prompt" in case + has_agent_data = "agent_data" in case or "agentData" in case + has_reference = "reference" in case + + if not has_prompt and not has_agent_data: + errors.append( + ValidationError( + path, + "Must have either 'prompt' (single-turn) or 'agent_data'" + " (multi-turn).", + ) + ) + + if has_prompt and has_agent_data: + errors.append( + ValidationError( + path, + "Has both 'prompt' and 'agent_data'. Use one or the other.", + severity="WARNING", + ) + ) + + if has_agent_data: + ad = case.get("agent_data") or case.get("agentData") + errors.extend(_validate_agent_data(ad, f"{path}.agent_data")) + + if metrics: + for raw_metric in metrics: + metric = _canonical_metric(raw_metric) + if ( + metric in _SINGLE_TURN_METRICS + and not has_prompt + and not has_agent_data + ): + errors.append( + ValidationError( + path, + f"Metric '{metric}' requires 'prompt' and 'response' fields.", + ) + ) + if metric in _MULTI_TURN_METRICS and not has_agent_data: + errors.append( + ValidationError( + path, + f"Metric '{metric}' requires 'agent_data' with conversation" + " turns.", + ) + ) + if metric in _COMPUTATION_METRICS and not has_reference: + errors.append( + ValidationError( + path, + f"Metric '{metric}' requires a 'reference' field.", + severity="WARNING", + ) + ) + + return errors + + +def validate_dataset( + dataset: dict[str, Any], metrics: list[str] | None = None +) -> list[ValidationError]: + """Validate an entire EvaluationDataset.""" + errors = [] + + if not isinstance(dataset, dict): + errors.append( + ValidationError("root", f"Expected dict, got {type(dataset).__name__}") + ) + return errors + + eval_cases = dataset.get("eval_cases") + if eval_cases is None: + errors.append(ValidationError("root", "Missing 'eval_cases' field.")) + return errors + + if not isinstance(eval_cases, list): + errors.append(ValidationError("eval_cases", "Must be a list.")) + return errors + + if not eval_cases: + errors.append(ValidationError("eval_cases", "Empty eval_cases list.")) + return errors + + for i, case in enumerate(eval_cases): + errors.extend(_validate_eval_case(case, i, metrics)) + + return errors + + +def main(): + parser = argparse.ArgumentParser( + description="Validate an evaluation dataset for Agent Platform Eval SDK." + ) + parser.add_argument( + "--dataset", + "-d", + required=True, + help="Path to the evaluation dataset JSON file.", + ) + parser.add_argument( + "--metrics", + "-m", + help="Comma-separated list of metrics to validate against.", + ) + args = parser.parse_args() + + try: + with open(args.dataset) as f: + dataset = json.load(f) + except FileNotFoundError: + print(f"ERROR: File not found: {args.dataset}", file=sys.stderr) + sys.exit(1) + except json.JSONDecodeError as e: + print(f"ERROR: Invalid JSON in {args.dataset}: {e}", file=sys.stderr) + sys.exit(1) + + metrics = args.metrics.split(",") if args.metrics else None + + errors = validate_dataset(dataset, metrics) + + n_cases = len(dataset.get("eval_cases", [])) + real_errors = [e for e in errors if e.severity == "ERROR"] + warnings = [e for e in errors if e.severity == "WARNING"] + + print(f"Dataset: {args.dataset}") + print(f"Eval cases: {n_cases}") + if metrics: + print(f"Validating against metrics: {', '.join(metrics)}") + print() + + if real_errors: + print(f"ERRORS ({len(real_errors)}):") + for e in real_errors: + print(f" {e}") + print() + + if warnings: + print(f"WARNINGS ({len(warnings)}):") + for w in warnings: + print(f" {w}") + print() + + if not real_errors and not warnings: + print("VALID: No issues found.") + + if real_errors: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/agent-platform-inference/SKILL.md b/.agents/skills/agent-platform-inference/SKILL.md new file mode 100644 index 0000000..1345911 --- /dev/null +++ b/.agents/skills/agent-platform-inference/SKILL.md @@ -0,0 +1,334 @@ +--- +name: agent-platform-inference +description: >- + Connects to and performs inference with Google Cloud Agent Platform GenAI + models, including First-Party Gemini models and Third-Party OpenMaaS models + (Llama, DeepSeek, Qwen, etc.). Use when you need to generate code for calling + Gemini or OpenMaaS models, authenticate with GenAI SDK, OpenAI SDK, or legacy + Agent Platform SDK, configure base URLs and global/regional endpoints, or troubleshoot + 429 Resource Exhausted (DSQ), 400 User Validation, or 404 Not Found errors. + Don't use for deploying models to endpoints or for running model evaluations. +--- + +# Agent Platform GenAI Inference Skill + +This skill provides instructions for authenticating and connecting to Google +Cloud Agent Platform to use Generative AI models. It covers both First-Party +(Gemini) and Third-Party (OpenMaaS) models. + +## Phase 0: Environment Setup + +**CRITICAL**: Before running any of the Python sample scripts in the `scripts/` +directory (e.g., `scripts/openmaas_openai_sdk.py`), you MUST ensure the +environment is correctly initialized by following these steps: + +1. **Google Cloud Authentication**: Authenticate with your Google Cloud + credentials and configure active Application Default Credentials (ADC) for + Agent Platform access: + ```bash + gcloud auth login + gcloud auth application-default login + ``` +2. **Enable API** (if not already enabled): + ```bash + gcloud services enable aiplatform.googleapis.com + ``` +3. **Virtual Environment**: Create and activate a dedicated local virtual + environment: + ```bash + python3 -m venv .venv + source .venv/bin/activate + ``` +4. **Install Dependencies**: Install the required SDKs: + ```bash + pip install -r scripts/requirements.txt + ``` +5. **Verify Setup (Optional)**: Run all sample scripts at once to verify the + environment is working end-to-end: + ```bash + ./scripts/verify_all.sh + ``` +6. **Execution**: Advise the user that every time they execute a Python + snippet from this skill, they must ensure this virtual environment is + activated first. + + +> [!IMPORTANT] +> **CRITICAL: Model IDs & Availability** +> * **Gemini Models**: See [Gemini Models][gemini-models-docs] for valid +> Model IDs and Regions. +> * **OpenMaaS Models**: See [Use Open Models on Agent Platform] +> (https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/maas/use-open-models) +> for Llama, DeepSeek, Qwen, etc. +> * **Incomplete Lists**: The Model IDs listed in this skill are **examples +> only** and may be incomplete or outdated. +> * **Action**: Always verify the Model ID and Region using the links above +> before generating code. +> +> [gemini-models-docs]: https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/migrate + +## Workflow Decision Tree + +1. **Model Family Identification**: Has the user specified whether they want + to call a **Gemini** (First-Party) model or an **OpenMaaS** (Third-Party, + e.g. Llama, DeepSeek, Qwen) model? + + * **No** -> Ask the user which model family they want to use. If they + provide a specific model name, infer the family from the name. + * **Yes** -> Proceed to Step 2. + +2. **SDK Choice**: Which SDK does the user want to use? + + * **Gemini + GenAI SDK** (preferred for Gemini) -> Proceed to + [1. Gemini Models]. + * **Gemini + legacy Vertex AI SDK** -> Proceed to [1. Gemini Models]. + * **OpenMaaS + OpenAI SDK** (preferred for OpenMaaS) -> Proceed to + [2. OpenMaaS Models]. + * **OpenMaaS + GenAI SDK** -> Proceed to [2. OpenMaaS Models]. + * **Unsure** -> Default to the preferred SDK for the chosen family. + +3. **Troubleshooting**: Is the user reporting an error (429 Resource + Exhausted, 400 User Validation, 404 Not Found, etc.)? + + * **Yes** -> Proceed to [3. Troubleshooting & Common Error Codes]. + * **No** -> Proceed with the SDK choice from Step 2. + +## 1. Gemini Models + +For Gemini models (e.g., `gemini-2.5-pro`, `gemini-3-flash-preview`), the +**GenAI SDK** (`google-genai`) is the **PREFERRED** method. The legacy +`vertexai` SDK is still supported but GenAI SDK is recommended for new projects. + +> [!IMPORTANT] +> **Preview Models (including Gemini 3.1)** are often **ONLY** available in the +> `global` region. Stable models are available in `us-central1` and other +> regions. + +### Choosing the Right SDK + +* **Gemini Models**: **GenAI SDK** (`google-genai`) is **PREFERRED**. Use OpenAI SDK for compatibility, or Legacy SDK (`vertexai`) if needed. +* **OpenMaaS Models**: **OpenAI SDK** is **HIGHLY RECOMMENDED**. Use GenAI SDK or Legacy SDK if you have specific infrastructure requirements. + +### Installation + +```bash +pip install google-genai +``` + +### Python Example (GenAI SDK - Preferred) + +See [`scripts/gemini_genai_sdk.py`](scripts/gemini_genai_sdk.py) for the +complete code. + +### Alternative: OpenAI SDK (Chat Completions) + +Use the standard OpenAI SDK with the Agent Platform endpoint. This is great for +cross-compatibility. + +See [`scripts/gemini_openai_sdk.py`](scripts/gemini_openai_sdk.py) for the +complete code. + +### Legacy: Agent Platform SDK + +The legacy `vertexai` SDK is still widely used but `google-genai` is preferred +for new Gemini projects. + +See [`scripts/gemini_vertexai_sdk.py`](scripts/gemini_vertexai_sdk.py) for the +complete code. + +**Documentation**: [Google GenAI SDK](https://github.com/googleapis/python-genai) + +**Documentation**: [Agent Platform Gemini Models](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/google-models) + +## 2. OpenMaaS Models (Llama, DeepSeek, Qwen, etc.) + +For OpenMaaS (Model-as-a-Service) models, the **HIGHLY RECOMMENDED** approach is +to use the standard **OpenAI SDK** with a specific Vertex AI endpoint. + +> [!WARNING] +> While `GenerativeModel` *can* support some OpenMaaS models, it is +**discouraged**. Use the OpenAI SDK for best compatibility (especially for Chat +Completions). + +### Installation + +```bash +pip install openai google-auth +``` + +### Authentication for OpenAI SDK + +You **MUST** use a Google Cloud OAuth access token as the API key for the OpenAI +SDK. + +```python +import google.auth +from google.auth.transport.requests import Request + +def get_gcp_access_token(): + creds, _ = google.auth.default() + creds.refresh(Request()) + return creds.token +``` + +> [!NOTE] +> Google Cloud access tokens typically expire after 1 hour. The +> `get_gcp_access_token()` function above retrieves a *fresh* token at the time +> it is called. + +> For long-running applications, you implement a refresh mechanism. See [Refresh the access token](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/migrate/openai/auth-and-credentials?hl=en#refresh_your_credentials) for details. + + +### Configuration (Base URL) + + + +- **Global Endpoint** (Recommended for most models requiring global + availability): + `https://aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/global/endpoints/openapi` +- **Regional Endpoint**: + `https://{REGION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{REGION}/endpoints/openapi` + + +### Python Example (OpenMaaS - Chat Completions) + +See [`scripts/openmaas_openai_sdk.py`](scripts/openmaas_openai_sdk.py) for the +complete code. + +> [!TIP] +> **Alternative: Environment Variables** +> You can set environment variables in your shell instead of updating the code. +> ```bash +> export OPENAI_BASE_URL="https://aiplatform.googleapis.com/v1/projects/YOUR_PROJECT_ID/locations/global/endpoints/openapi" +> export OPENAI_API_KEY="$(gcloud auth print-access-token)" +> ``` +> Then initialize the client without arguments: `client = OpenAI()` + +### Python Example (OpenMaaS - Completions API) + +The following models support the legacy Completions API: `zai-org/glm-5-maas`, +`moonshotai/kimi-k2-thinking-maas`, `minimaxai/minimax-m2-maas`, +`deepseek-ai/deepseek-v3.1-maas`, and `deepseek-ai/deepseek-v3.2-maas`. + +```python +response = client.completions.create( + model="deepseek-ai/deepseek-v3.2-maas", + prompt="Once upon a time", + max_tokens=100 +) +print(response.choices[0].text) +``` + +### Python Example (OpenMaaS - Embeddings) + +```python +# Verify specific Embedding Model ID on Model Garden (e.g., intfloat/multilingual-e5-small) +response = client.embeddings.create( + model="intfloat/multilingual-e5-large-maas", + input="The quick brown fox jumps over the lazy dog", +) +print(response.data[0].embedding) +``` + +### Alternative: GenAI SDK + +The `google-genai` SDK can also access OpenMaaS models via the `vertexai` +backend. + +See [`scripts/openmaas_genai_sdk.py`](scripts/openmaas_genai_sdk.py) for the +complete code. + +> [!IMPORTANT] +> **Model ID Format**: For GenAI SDK with OpenMaaS, you **MUST** use the full +> path: `publishers/PUBLISHER/models/MODEL` (e.g., +> `publishers/zai-org/models/glm-5-maas`). + +### Legacy: Agent Platform SDK (OpenMaaS) + +For OpenMaaS, you can also use `GenerativeModel` (if supported). + +See [`scripts/openmaas_vertexai_sdk.py`](scripts/openmaas_vertexai_sdk.py) for +the complete code. + +> [!IMPORTANT] +> **Model ID Format**: For Agent Platform SDK with OpenMaaS, you **MUST** use the +> full path: `publishers/PUBLISHER/models/MODEL`. + +### Model Reference & Availability + +**Documentation**: [Use Open Models on Agent Platform](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/maas/use-open-models) + +> [!TIP] +> **Self-Deployment for Control**: If you need **dedicated hardware** +> (GPUs/TPUs), **guaranteed capacity**, or **specific regional placement** not +> offered by MaaS, you can **Self-Deploy** these models to Agent Platform +> Endpoints. Search for the model in Model Garden and click "Deploy" to select +> your machine type. + +> [!IMPORTANT] +> **Finding Inference Examples**: The list above is a starting point. For the +> **definitive** inference snippets (especially for Chat Completions payload +> structure): +> 1. Consult the [Use Open Models on Agent Platform](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/maas/use-open-models) +> list. +> 2. Click the link for your specific model (e.g., "DeepSeek-V3") to visit its +> **Model Garden** page. +> 3. Look for the **"Sample Code"** or **"Use this model"** button on the Model +> Garden page to get the exact `curl` or Python code for that specific model +> version. + +> [!NOTE] +> This list is **INCOMPLETE**. See [Use Open Models on Agent Platform] +> (https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/maas/use-open-models) +> for the full list of supported models. + +| Model Family | Model ID Examples | Location | Notes | +| :--- | :--- | :--- | :--- | +| **Llama 4** | `meta/llama-4-maverick-17b-128e-instruct-maas` | `us-east5` | | +| **Llama 4** | `meta/llama-4-scout-17b-16e-instruct-maas` | `us-east5` | | +| **Llama 3.3** | `meta/llama-3.3-70b-instruct-maas` | `us-central1` | | +| **DeepSeek** | `deepseek-ai/deepseek-v3.2-maas` | `global` | Global ONLY | +| **DeepSeek** | `deepseek-ai/deepseek-v3.1-maas` | `us-west2` | US-West2 ONLY | +| **DeepSeek** | `deepseek-ai/deepseek-r1-0528-maas` | `us-central1` | | +| **Qwen 3** | `qwen/qwen3-coder-480b-a35b-instruct-maas` | `global` | | +| **Qwen 3** | `qwen/qwen3-next-80b-a3b-instruct-maas` | `global` | | +| **Kimi** | `moonshotai/kimi-k2-thinking-maas` | `global` | | +| **MiniMax** | `minimaxai/minimax-m2-maas` | `global` | | +| **GLM** | `zai-org/glm-4.7-maas`, `zai-org/glm-5-maas` | `global` | | + +## 3. Troubleshooting & Common Error Codes + +### 429: Resource Exhausted + +* **Cause**: OpenMaaS and Gemini models use **Dynamic Shared Quota (DSQ)**. + Resources are pooled and allocated dynamically based on availability. A 429 + error indicates the shared pool is temporarily exhausted, not necessarily + that *your* specific project quota is hit (though it can be). +* **Solution**: Implement strict **exponential backoff and retry** strategies. +* **High Throughput**: For production workloads requiring high throughput or guaranteed capacity, consider **Provisioned Throughput (PT)**. +* **Important**: Quota increases through normal cloud processes (Cloud Console) are **NOT** applicable for DSQ constraints. +* **Documentation**: [Quotas and limits (DSQ)](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/quotas) + +### 400: User Validation Error + +* **Cause**: Invalid request format, unsupported parameter, or incorrect Model ID. +* **Action**: Double-check your request payload and parameters. Verify the Model ID and Region are correct. + +### 404: Not Found / Model Not Available + +* **Cause**: The model is not enabled, or not available in the specified project or region. +* **Action**: + 1. **Check Location Availability**: + * **OpenMaaS**: Verify the model is available in your region. See [Model Availability by Location](https://docs.cloud.google.com/gemini-enterprise-agent-platform/resources/locations#genai-open-models). + * **Gemini**: + + * **Source of Truth**: Always check [Gemini Model Locations](https://docs.cloud.google.com/gemini-enterprise-agent-platform/resources/locations#google-models) for the authoritative list. + + * **Preview Models**: All Preview models (e.g., Gemini 3.1, experimental versions) are often **ONLY** available in the `us-central1` or `global` regions. + * **Stable Models**: (e.g., Gemini 2.5 Pro) Available in `us-central1`, `europe-west4`, and many other regions. + * **Important**: If you get a 404/400 error, try switching your client location to `us-central1` or `global`. + 2. **Enable Llama Models**: For **Llama 3.3** and **Llama 4**, you **MUST** + enable the model in Model Garden before use. Go to the [Model Garden] + (https://console.cloud.google.com/agent-platform/model-garden), search + for the model card (e.g., "Llama 3.3 API Service"), and click + **Enable**. Only then can you make inference requests. diff --git a/.agents/skills/agent-platform-inference/scripts/gemini_genai_sdk.py b/.agents/skills/agent-platform-inference/scripts/gemini_genai_sdk.py new file mode 100644 index 0000000..faebe93 --- /dev/null +++ b/.agents/skills/agent-platform-inference/scripts/gemini_genai_sdk.py @@ -0,0 +1,16 @@ +"""Example of using the GenAI SDK with Gemini on Vertex AI.""" + +from google import genai +import google.auth + +# Get default project ID from environment +_, project_id = google.auth.default() + +# Initialize GenAI Client with Vertex AI backend +# Use location="global" for Preview models (Gemini 2.0) +client = genai.Client(enterprise=True, project=project_id, location="us-central1") + +response = client.models.generate_content( + model="gemini-2.5-pro", contents="Why is the sky blue?" +) +print(response.text) diff --git a/.agents/skills/agent-platform-inference/scripts/gemini_openai_sdk.py b/.agents/skills/agent-platform-inference/scripts/gemini_openai_sdk.py new file mode 100644 index 0000000..a27e655 --- /dev/null +++ b/.agents/skills/agent-platform-inference/scripts/gemini_openai_sdk.py @@ -0,0 +1,28 @@ +"""Example of using the OpenAI SDK with Gemini on Vertex AI.""" + +import google.auth +import google.auth.transport.requests +import openai + + +def get_gcp_access_token(): + creds, _ = google.auth.default() + # Refresh credentials using a Request transport object to obtain a fresh + # OAuth access token for the OpenAI client authorization header. + creds.refresh(google.auth.transport.requests.Request()) + return creds.token + + +# Get default project ID from environment +_, project_id = google.auth.default() + +client = openai.OpenAI( + base_url=f"https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/us-central1/endpoints/openapi", + api_key=get_gcp_access_token(), +) + +response = client.chat.completions.create( + model="google/gemini-2.5-pro", + messages=[{"role": "user", "content": "Why is the sky blue?"}], +) +print(response.choices[0].message.content) diff --git a/.agents/skills/agent-platform-inference/scripts/gemini_vertexai_sdk.py b/.agents/skills/agent-platform-inference/scripts/gemini_vertexai_sdk.py new file mode 100644 index 0000000..f73ba63 --- /dev/null +++ b/.agents/skills/agent-platform-inference/scripts/gemini_vertexai_sdk.py @@ -0,0 +1,14 @@ +"""Example of using the Agent Platform SDK with Gemini on Vertex AI.""" +# TODO: replace with rename of Vertex +import google.auth +import vertexai +from vertexai.generative_models import GenerativeModel + +# Get default project ID from environment +_, project_id = google.auth.default() + +vertexai.init(project=project_id, location="us-central1") + +model = GenerativeModel("gemini-2.5-pro") +response = model.generate_content("Why is the sky blue?") +print(response.text) diff --git a/.agents/skills/agent-platform-inference/scripts/openmaas_genai_sdk.py b/.agents/skills/agent-platform-inference/scripts/openmaas_genai_sdk.py new file mode 100644 index 0000000..9e9a82f --- /dev/null +++ b/.agents/skills/agent-platform-inference/scripts/openmaas_genai_sdk.py @@ -0,0 +1,21 @@ +"""Example of using the GenAI SDK with OpenMaaS on Vertex AI.""" + +from google import genai +import google.auth + +# Get default project ID from environment +_, project_id = google.auth.default() + +client = genai.Client( + enterprise=True, + project=project_id, + location="global", # OpenMaaS models are often global +) + +# Note: For GenAI SDK/Vertex with OpenMaaS, you MUST use the full path: +# `publishers/PUBLISHER/models/MODEL` +response = client.models.generate_content( + model="publishers/zai-org/models/glm-5-maas", + contents="Explain quantum computing in simple terms.", +) +print(response.text) diff --git a/.agents/skills/agent-platform-inference/scripts/openmaas_openai_sdk.py b/.agents/skills/agent-platform-inference/scripts/openmaas_openai_sdk.py new file mode 100644 index 0000000..1534dd0 --- /dev/null +++ b/.agents/skills/agent-platform-inference/scripts/openmaas_openai_sdk.py @@ -0,0 +1,33 @@ +"""Example of using the OpenAI SDK with OpenMaaS on Vertex AI.""" + +import google.auth +import google.auth.transport.requests +import openai + + +def get_gcp_access_token(): + creds, _ = google.auth.default() + creds.refresh(google.auth.transport.requests.Request()) + return creds.token + + +# Get default project ID from environment +_, project_id = google.auth.default() + +client = openai.OpenAI( + base_url=f"https://aiplatform.googleapis.com/v1/projects/{project_id}/locations/global/endpoints/openapi", + api_key=get_gcp_access_token(), +) + +# NOTE: For OpenMaaS models, you MUST use the format: `publisher/model` +# Example: `deepseek-ai/deepseek-v3.2-maas`, `zai-org/glm-5-maas` etc. + +response = client.chat.completions.create( + model="zai-org/glm-5-maas", + messages=[{ + "role": "user", + "content": "Explain quantum computing in simple terms.", + }], +) + +print(response.choices[0].message.content) diff --git a/.agents/skills/agent-platform-inference/scripts/openmaas_vertexai_sdk.py b/.agents/skills/agent-platform-inference/scripts/openmaas_vertexai_sdk.py new file mode 100644 index 0000000..383927b --- /dev/null +++ b/.agents/skills/agent-platform-inference/scripts/openmaas_vertexai_sdk.py @@ -0,0 +1,15 @@ +"""Example of using the Agent Platform SDK with OpenMaaS on Vertex AI.""" +# TODO: replace with rename of Vertex +import google.auth +import vertexai +from vertexai.generative_models import GenerativeModel + +# Get default project ID from environment +_, project_id = google.auth.default() + +vertexai.init(project=project_id, location="global") + +# Important: Use the full resource path: `publishers/PUBLISHER/models/MODEL` +model = GenerativeModel("publishers/zai-org/models/glm-5-maas") +response = model.generate_content("Explain quantum computing.") +print(response.text) diff --git a/.agents/skills/agent-platform-inference/scripts/requirements.txt b/.agents/skills/agent-platform-inference/scripts/requirements.txt new file mode 100644 index 0000000..7c58686 --- /dev/null +++ b/.agents/skills/agent-platform-inference/scripts/requirements.txt @@ -0,0 +1,4 @@ +google-genai +google-cloud-aiplatform +openai +google-auth diff --git a/.agents/skills/agent-platform-inference/scripts/verify_all.sh b/.agents/skills/agent-platform-inference/scripts/verify_all.sh new file mode 100755 index 0000000..88a01a9 --- /dev/null +++ b/.agents/skills/agent-platform-inference/scripts/verify_all.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -e + +# Create a temporary directory for the virtual environment +VENV_DIR=$(mktemp -d -t venv_verify.XXXXXX) +python3 -m venv "$VENV_DIR" +source "$VENV_DIR/bin/activate" + +# Trap to ensure cleanup happens on exit +cleanup() { + echo "Cleaning up virtual environment..." + deactivate 2>/dev/null || true + rm -rf "$VENV_DIR" +} +trap cleanup EXIT + +echo "Installing requirements..." +pip install -q -r scripts/requirements.txt + +echo "Running verification tests..." +FAILED=0 + +# Iterate directly over the files in the scripts directory +for script in scripts/*.py; do + echo "Running $script..." + if python3 "$script" > /dev/null 2>&1; then + echo " PASS: $script" + else + echo " FAIL: $script" + python3 "$script" # Run again to show output + FAILED=1 + fi +done + +if [[ $FAILED -eq 0 ]]; then + echo "All scripts passed verification!" + exit 0 +else + echo "Some scripts failed verification." + exit 1 +fi diff --git a/.agents/skills/agent-platform-migrate-from-ai-studio/SKILL.md b/.agents/skills/agent-platform-migrate-from-ai-studio/SKILL.md new file mode 100644 index 0000000..32d8dd9 --- /dev/null +++ b/.agents/skills/agent-platform-migrate-from-ai-studio/SKILL.md @@ -0,0 +1,350 @@ +--- +name: agent-platform-migrate-from-ai-studio +description: Guides agents and users through migrating from Gemini API in Google AI Studio to Gemini Enterprise Agent Platform (formerly Vertex AI). Use this skill when moving applications to Google Cloud, to leverage Cloud credits, or to unify inferencing with other Cloud infrastructure (IAM, billing, telemetry). +--- + +# Migrating from Gemini API in AI Studio to Agent Platform + +Use this skill when you need to transition an application from the +developer-centric Google AI Studio ecosystem +(`generativelanguage.googleapis.com`) to the enterprise-grade Google Cloud Agent +Platform (`aiplatform.googleapis.com`). + +-------------------------------------------------------------------------------- + +## When to Invoke This Skill + +* You want to migrate an application from Google AI Studio to Agent Platform + (formerly Vertex AI). +* You have **Google Cloud credits** (e.g., the $300 Welcome Free Trial) that + you want to apply toward Gemini API inferencing costs. +* You need to unify your inferencing pipelines, IAM permissions, telemetry, + and billing with existing Google Cloud infrastructure (Compute Engine, Cloud + SQL, BigQuery). +* You are deploying open-source orchestration engines (like OpenClaw or ADK + agents) on Google Cloud VMs, and want the entire system to run under a + unified Google Cloud billing structure. + +-------------------------------------------------------------------------------- + +## Gemini API Comparison + +Feature / Control | Google AI Studio (Gemini Developer API) | Agent Platform (Enterprise Gemini API) +:--------------------- | :-------------------------------------------------------------------- | :------------------------------------- +**API Endpoint** | `generativelanguage.googleapis.com` | `aiplatform.googleapis.com` +**Target Audience** | Developers, startups, students, researchers building production apps. | Enterprise production, MLOps engineers +**GCP Credit Support** | No (GCP credits/Free Trial **cannot** be applied) | Yes (Fully covered by Welcome or custom credits) +**Data Privacy** | Data may be reviewed to improve Google products | Prompts/responses are **never** used for training +**Security & IAM** | API key, OAuth | Google Cloud IAM (Service Accounts, OAuth 2.0, VPC-SC) +**Compliance & SLAs** | None (Best-effort availability) | 24/7 Enterprise Support, SLAs, HIPAA, SOC2 +**Throughput Options** | Shared / Rate-limited | Pay-as-you-go OR Provisioned Throughput +**MLOps Ecosystem** | Basic prompt management | Model Registry, Model Monitoring, Pipeline Evaluation +**Inferencing Scope** | Global endpoints only | Both Global and strict Regional endpoints + +See +[Google Cloud Documentation](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/migrate/migrate-google-ai.md.txt) +to learn more about the differences between the two offerings. + +-------------------------------------------------------------------------------- + +## Migration Guide + +### Billing and Credits + +Google Cloud Free Trial credits +**[do not apply to AI Studio](https://docs.cloud.google.com/free/docs/free-cloud-features.md.txt)**. +To use your credits for Gemini models, you must route calls through the Agent +Platform. + +1. Create a Google Cloud billing account. You must provide a valid payment + method during setup to verify identity. +2. If you are a new customer, ensure your $300 Welcome credit is active in the + Billing Console. +3. **Avoid Billing Surprises:** To prevent automatic fallback to your standard + form of payment when credits are exhausted, you should establish a budget + alert: + * Go to **Billing** -> **Budgets & Alerts** -> **Create Budget**. + * Set the threshold to map to your credit limit or maximum comfortable + spend. + +### Enable the Agent Platform API + +You must explicitly enable the Agent Platform API on your target Google Cloud +Project. Run the following command via your local shell: + +```bash +gcloud services enable aiplatform.googleapis.com --project="YOUR_PROJECT_ID" +``` + +### Authentication & Authorization (IAM) + +#### User Auth + +For local debugging or script execution, authenticate using +[Application Default Credentials](https://docs.cloud.google.com/docs/authentication/application-default-credentials.md.txt) +(ADC). + +**Option 1 - Automated Script**: + +```bash +bash <(curl -sSL https://storage.googleapis.com/cloud-samples-data/adc/setup_adc.sh) +``` + +**Option 2 - Manual Setup**: + +```bash +gcloud auth login +gcloud auth application-default login +``` + +Grant your user identity the required IAM role to perform inferencing calls: + +```bash +gcloud projects add-iam-policy-binding "YOUR_PROJECT_ID" \ + --member="user:YOUR_EMAIL@domain.com" \ + --role="roles/aiplatform.user" +``` + +#### Service Auth + +When running your application on Google Cloud infrastructure such as a Compute +Engine VM, authenticate using the machine's attached Service Account. For +example, the +[Compute Engine Default Service Account](https://docs.cloud.google.com/compute/docs/access/service-accounts#default_service_account.md.txt). + +1. Grant the virtual machine's underlying Service Account the user role: + +```bash +gcloud projects add-iam-policy-binding "YOUR_PROJECT_ID" \ + --member="serviceAccount:PROJECT_NUMBER-compute@developer.gserviceaccount.com" \ + --role="roles/aiplatform.user" +``` + +2. **[Compute Engine Access Scopes](https://docs.cloud.google.com/compute/docs/access/service-accounts.md.txt):** + Legacy access scopes can override IAM bindings. When provisioning or + modifying your GCE instance, you must verify that the VM access scope is + configured to either **Allow full access to all Cloud APIs** + (`https://www.googleapis.com/auth/cloud-platform`) or explicitly includes + the standard cloud-platform scope. + +-------------------------------------------------------------------------------- + +## Use the Gemini API in Agent Platform + +### SDKs (Client Libraries) + +You can continue to use the unified +[Google GenAI SDK](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/sdks/overview.md.txt) +(`google-genai`). This SDK works with both AI Studio and Agent Platform. You +only need to switch the routing flags via your runtime environment variables to +target the Agent Platform backend. + +Set your target environment details: + +```bash +export GOOGLE_CLOUD_PROJECT="YOUR_PROJECT_ID" +export GOOGLE_CLOUD_LOCATION="global" # Or your chosen regional endpoint +export GOOGLE_GENAI_USE_ENTERPRISE=TRUE +``` + +Now, your standard python code shifts from using AI Studio to Agent Platform +without altering the core initialization blocks: + +```python +from google import genai + +# The client automatically picks up the GOOGLE_GENAI_USE_ENTERPRISE=TRUE environment flag +client = genai.Client() + +response = client.models.generate_content( + model='gemini-3-flash-preview', + contents='Hello world!', +) +print(response.text) +``` + +### Agent Development Kit (ADK) + +To call Gemini models in Agent Platform from an Agent Development Kit agent, +follow these steps. + +1. Authenticate to Google Cloud. + +If running an ADK agent in Google Cloud (e.g. Agent Platform Runtime), use the +agent's assigned service account. Alternatively, if running ADK locally, run: + +```bash +gcloud auth application-default login +``` + +2. Set env variables. Ensure these are set no matter if your ADK agent is + running in Google Cloud or locally: + +```bash +export GOOGLE_CLOUD_PROJECT="YOUR_PROJECT_ID" +export GOOGLE_CLOUD_LOCATION="global" +export GOOGLE_GENAI_USE_ENTERPRISE=TRUE +``` + +3. Initialize the ADK agent. You can use the same model string you used with AI + Studio (e.g. `gemini-3-flash-preview`). + +```python +from google.adk.agents.llm_agent import Agent + +def get_current_time(city: str) -> dict: + """Returns the current time in a specified city.""" + return {"status": "success", "city": city, "time": "10:30 AM"} + +root_agent = Agent( + model='gemini-3-flash-preview', + name='root_agent', + description="Tells the current time in a specified city.", + instruction="You are a helpful assistant that tells the current time in cities. Use the 'get_current_time' tool for this purpose.", + tools=[get_current_time], +) +``` + +To learn more about integrating ADK agents with Agent Platform, +[see the ADK documentation](https://raw.githubusercontent.com/google/adk-docs/main/docs/agents/models/agent-platform.md). + +### Antigravity CLI + +Google Cloud users [can now access](https://antigravity.google/pricing) +Antigravity 2.0, including the Antigravity CLI, with Gemini Enterprise Agent +Platform. + +1. [Install the Antigravity CLI](https://antigravity.google/docs/cli-install) + to your local environment. +2. Start the Antigravity CLI. + + ```bash + agy + ``` + +3. Follow the CLI setup prompts - select **Use a Google Cloud Project**. + +4. Complete the OAuth flow in the opened browser window using your + authenticated Google Cloud Workspace or user identity. + +5. Copy the confirmation token, and paste it directly back into your terminal. + +6. Follow the prompts to enter your Google Cloud Project ID. + +7. Select your Google Cloud location (e.g. `global`). + +8. Optionally, run the `/model` command to select a different Gemini model in + Agent Platform. + +### OpenClaw + +To configure an OpenClaw agent to use Gemini models in Agent Platform, Follow +these steps. + +1. [Install OpenClaw using OpenClaw's official instructions](https://docs.openclaw.ai/install). + +⚠️ Important: OpenClaw 2026.5.28 currently has a `google-vertex` bug. Please use +2026.5.20 for now. + +2. Ensure that the runtime where OpenClaw is running (e.g. GCE VM with Service + Account) has the `aiplatform.user` IAM role - see Authentication and + Authorization section above. + +3. Get the project number from the user's project ID. Outside of GCE, run: + +```bash +export PROJECT_NUMBER=$(gcloud projects describe "$PROJECT_ID" --format="value(projectNumber)") +echo "Project number: $PROJECT_NUMBER" +``` + +Within a GCE VM, run: + +```bash +export PROJECT_NUMBER=$(curl "http://metadata.google.internal/computeMetadata/v1/project/numeric-project-id" -H "Metadata-Flavor: Google") +echo "Project number: $PROJECT_NUMBER" +``` + +4. From outside the GCE instance, generate `GOOGLE_APPLICATION_CREDENTIALS` + using the Google Cloud project number. Then, `scp` these credentials to the + GCE VM. + +```bash +mkdir -p ~/.config/gcloud +gcloud iam service-accounts keys create ~/.config/gcloud/application_default_credentials.json --iam-account="${PROJECT_NUMBER}-compute@developer.gserviceaccount.com" +``` + +5. Edit the configuration file that's usually located at: + `~/.openclaw/openclaw.json`. Ensure you prefix the Gemini model with + `google-vertex/`. + + ⚠️ Important: + + - Do not use Gemini 3.5 models, since OpenClaw's `google-vertex` provider + does not support it yet. Older models work. + - When using the + [Gemini 3 Flash Preview](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/gemini/3-flash) + model in Agent Platform, always set the location to `global`, NOT a + regional endpoint. + +```json +{ + "env": { + "vars": { + "GOOGLE_CLOUD_PROJECT": "PROJECT_ID", + "GOOGLE_CLOUD_LOCATION": "global", + "GOOGLE_APPLICATION_CREDENTIALS": "~/.config/gcloud/application_default_credentials.json" + } + }, + "agents": { + "defaults": { + "model": { + "primary": "google-vertex/gemini-3-flash-preview" + }, + "workspace": "~/.openclaw/workspace", + "compaction": { + "mode": "safeguard" + }, + "heartbeat": { + "model": "google-vertex/gemini-3-flash-preview" + } + }, + "list": [ + { + "id": "main", + "workspace": "~/.openclaw/workspace", + "model": "google-vertex/gemini-3-flash-preview" + } + ] + }, + "session": { + "dmScope": "per-channel-peer" + }, + "tools": { + "profile": "coding" + } +} +``` + +6. Restart OpenClaw. + +```bash +openclaw gateway restart +``` + +7. Verify the OpenClaw connection to Agent Platform: + +```bash +openclaw models status +openclaw agent --agent main --message "Hello world!" +``` + +-------------------------------------------------------------------------------- + +## Additional Resources + +* [Google Cloud Free Trial Features & Limits](https://docs.cloud.google.com/free/docs/free-cloud-features.md.txt) +* [Migrate from Google AI Studio to Gemini Enterprise Agent Platform](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/migrate/migrate-google-ai.md.txt) +* [Gemini Enterprise Agent Platform - Models](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/google-models.md.txt) +* [Agent Development Kit Documentation - Connect to Models in Agent Platform](https://adk.dev/agents/models/agent-platform/#agent-platform-setup) +* [OpenClaw Documentation - Connect to Google models](https://docs.openclaw.ai/providers/google) +* [Google Cloud Budget Alerts - Setup Guide](https://docs.cloud.google.com/billing/docs/how-to/budgets#steps-to-create-budget.md.txt) diff --git a/.agents/skills/agent-platform-model-registry/SKILL.md b/.agents/skills/agent-platform-model-registry/SKILL.md new file mode 100644 index 0000000..caee70c --- /dev/null +++ b/.agents/skills/agent-platform-model-registry/SKILL.md @@ -0,0 +1,136 @@ +--- +name: agent-platform-model-registry +description: >- + Agent Platform Model Registry Management. Use when you need to upload, list, + describe, update, or delete machine learning models (and their versions) + in the Agent Platform Model Registry. Don't use for model training, model + deployment to endpoints, or managing non-Agent Platform models. +--- + +# Agent Platform Model Registry Management + +## Overview + +This skill provides instructions for managing machine learning models in the +Agent Platform Model Registry. It covers listing models, describing model +details, uploading new models or versions, updating metadata, and deleting +models. + +## Safety & Confirmation Tiers (CRITICAL) + +Before executing any commands on behalf of the user, you MUST adhere to the +following safety tiers based on the action requested: + +1. **Tier R: Read-only (`list`, `describe`, `get`)** + * No confirmation needed. Execute immediately to gather information. +2. **Tier M: Mutating & Reversible (`upload`, `update`)** + * Requires **interactive confirmation** with 'Yes'/'No' options. The + confirmation prompt MUST contain the exact, literal command string + with all required flags (e.g. `--region=us-central1`, + `--display-name="..."`) — natural-language paraphrases are NOT + sufficient. + * **Same-turn restriction**: NEVER execute the command in the same turn + as presenting the confirmation prompt. Stop and wait for the user's + reply; only execute after explicit 'Yes' / approval. +3. **Tier D: Destructive & Irreversible (`delete`)** + * Requires **explicit typed confirmation** (e.g. "I confirm" or "Yes, + delete it"). Ask for confirmation IMMEDIATELY — before any pre-flight + checks (don't check if the model is deployed to endpoints first). + * **Same-turn restriction**: NEVER execute in the same turn as asking + for typed confirmation. Wait for the user to reply in a new turn. + +## Phase 0: Environment Setup + +**CRITICAL**: Before running any commands, you MUST ensure the environment is +correctly initialized by following these steps: + +1. **Google Cloud Authentication**: Authenticate with your Google Cloud + credentials and configure active Application Default Credentials (ADC) for + Agent Platform access: + ```bash + gcloud auth login + gcloud auth application-default login + ``` +2. **Set Project**: Configure the active project for subsequent commands: + ```bash + gcloud config set project $PROJECT_ID + ``` +3. **Region**: Always specify `--region=$LOCATION_ID` on each command below. + Do NOT use `global`. + +## 1. Listing Models (Tier R) + +Use this command to discover existing models in the registry and retrieve their +numeric IDs. No confirmation is required. + +```bash +gcloud ai models list \ + --region=$LOCATION_ID +``` + +## 2. Describing a Model (Tier R) + +Retrieve the full metadata for a specific model or version. No confirmation is +required. + +```bash +gcloud ai models describe $MODEL_ID \ + --region=$LOCATION_ID +``` + +To target a specific version: + +```bash +gcloud ai models describe ${MODEL_ID}@${VERSION_ID} \ + --region=$LOCATION_ID +``` + +## 3. Uploading a Model (Tier M) + +Register a new model or a new version of an existing model. This is a +long-running operation. +**Action requires an inline confirmation card before proceeding.** + +### Example: Uploading a Custom Model + +```bash +gcloud ai models upload \ + --region=$LOCATION_ID \ + --display-name="my-custom-model" \ + --container-image-uri="gcr.io/my-project/my-model:latest" \ + --artifact-uri="gs://my-bucket/path/to/artifacts" +``` + +> [!IMPORTANT] This is a Tier M operation — see [Safety & Confirmation Tiers] +> above. + +To upload a new version of an existing model, use the `--parent-model` flag or +specify the parent model ID. + +## 4. Updating a Model (Tier M) + +Update metadata fields like display name, description, or labels. +**Action requires an inline confirmation card before proceeding.** + +```bash +gcloud ai models update $MODEL_ID \ + --region=$LOCATION_ID \ + --display-name="new-display-name" \ + --description="Updated description" +``` + +> [!IMPORTANT] This is a Tier M operation — see [Safety & Confirmation Tiers] +> above. + +## 5. Deleting a Model (Tier D) + +Permanently delete a Model and all its versions. +**Action requires explicit typed confirmation before proceeding.** + +```bash +gcloud ai models delete $MODEL_ID \ + --region=$LOCATION_ID +``` + +> [!WARNING] This operation is irreversible. All model versions must be +> undeployed from all Endpoints before deletion. diff --git a/.agents/skills/agent-platform-prompt-management/SKILL.md b/.agents/skills/agent-platform-prompt-management/SKILL.md new file mode 100644 index 0000000..6566ae9 --- /dev/null +++ b/.agents/skills/agent-platform-prompt-management/SKILL.md @@ -0,0 +1,127 @@ +--- +name: agent-platform-prompt-management +description: >- + Manages and orchestrates prompts in Agent Platform. Use when you need to create, + list, retrieve, version, or delete managed prompts in Agent Platform. Don't use + for model training, model deployment to endpoints, or managing non-Agent Platform + prompts. +--- + +## Usage Guide + +To use this skill effectively: +1. **Generate Code**: Provide the Python snippets below to the user to help them +manage prompts in Agent Platform. +2. **No File System Search**: Do not try to find Python files or scripts on the +file system for these operations. + +## Phase 0: Environment Setup + +**CRITICAL**: Before the user runs any of the Python snippets below, you MUST +advise them to ensure the environment is correctly initialized by following +these steps: + +1. **Google Cloud Authentication**: Authenticate with your Google Cloud account + and configure active Application Default Credentials (ADC) for Agent + Platform access: + ```bash + gcloud auth login + gcloud auth application-default login + ``` +2. **Virtual Environment**: Create and activate a dedicated virtual environment: + ```bash + python3 -m venv ~/prompt_agent_venv + source ~/prompt_agent_venv/bin/activate + ``` +3. **Install Dependencies**: Install the required Agent Platform SDKs: + ```bash + pip install google-cloud-aiplatform google-genai + ``` +4. **Execution**: Advise the user that every time they execute a Python snippet, they must ensure this virtual environment is activated first. + +> [!TIP] **Placeholder Parameter Replacement:** The Python scripts below use +> uppercase string placeholders (like `"PROJECT_ID"`, `"LOCATION_ID"`, and +> `"PROMPT_ID"`). You **MUST** dynamically replace these placeholders with the +> actual Project ID, Region, and Prompt ID values provided in the user's prompt +> (or discovered context) before generating or providing the scripts. + +## 1. Managing Prompts via Agent Platform SDK + +The SDK provides a high-level `Prompt` class in the preview module. + +### Create a Prompt (Tier M) + +Use when you need to create a new managed prompt in Agent Platform. + +* **Reference**: See [create.md](references/create.md) for detailed instructions and Python snippets. + +### List Prompts (Tier R) + +```python +import vertexai +from vertexai.preview import prompts + +vertexai.init(project="PROJECT_ID", location="LOCATION_ID") + +all_prompts = prompts.list() +for p in all_prompts: + print(f"Name: {p.display_name}, ID: {p.prompt_id}") +``` + +### Retrieve and Use a Prompt (Tier R) + +```python +import vertexai +from vertexai.preview import prompts + +vertexai.init(project="PROJECT_ID", location="LOCATION_ID") + +retrieved_prompt = prompts.get(prompt_id="PROMPT_ID") +# Versions are supported: prompts.get(prompt_id="PROMPT_ID", version_id="2") + +# Assemble with variables (kwargs must match template variable names) +assembled = retrieved_prompt.assemble_contents(text="The quick brown fox...") +print(assembled) +``` + +### Delete a Prompt (Tier D) + +**CRITICAL**: You must pass the numeric prompt ID (e.g., `"1234567890123456789"`) +to `prompts.delete()`. The SDK constructs the full resource path internally +using the project and location from `vertexai.init()`. + +**Confirmation Required**: As a Tier D (Destructive) operation, the agent MUST +pause and request explicit, high-friction typed re-confirmation of the prompt ID +from the user before generating or providing the deletion code. +The action is irreversible. + +> [!IMPORTANT] +> **NEVER pre-emptively provide or execute any deletion code before receiving +> the user's response in a new turn.** You must never speculate or assume that +> confirmation will be given. Asking for confirmation and providing the code in +> a single parallel turn is a severe safety violation. + +```python +import vertexai +from vertexai.preview import prompts + +vertexai.init(project="PROJECT_ID", location="LOCATION_ID") + +prompts.delete(prompt_id="PROMPT_ID") +``` + +## 2. Best Practices + +- **Idempotency**: + * **Tier R** (List, Get): Inherently idempotent. + * **Tier D** (Delete): Re-running a delete on a non-existent or already + deleted resource returns NOT_FOUND. Treat this as success. +- **Placeholders**: Use the standard placeholder syntax (variable name + enclosed in double curly braces) in your prompt templates. +- **Versioning**: Always tag or record version IDs when making updates to + production prompts. +- **Model Reference**: Specify the target model ID (e.g., `gemini-2.5-pro`) + when creating the prompt to ensure consistency. +- **Underlying Schema**: When using the Dataset API, always use the correct + `metadata_schema_uri` and nested `metadata` structure to ensure the prompt + is recognized by Agent Platform Studio and the Prompts SDK. diff --git a/.agents/skills/agent-platform-prompt-management/references/create.md b/.agents/skills/agent-platform-prompt-management/references/create.md new file mode 100644 index 0000000..b73c9ee --- /dev/null +++ b/.agents/skills/agent-platform-prompt-management/references/create.md @@ -0,0 +1,46 @@ +# Creating Prompts in Agent Platform + +This guide provides instructions on how to create a new managed prompt in +Agent Platform. + +## Create a Prompt (Tier M) + +**Confirmation Required**: As a Tier M (Mutating) operation, the agent MUST +pause and present a confirmation prompt with the project, region, prompt display +name, and model before providing the creation code. + +> [!IMPORTANT] +> **Interactive Confirmation Required (Tier M):** Before proceeding with prompt +> creation, you **MUST** present the proposed Python code in a confirmation +> prompt to the user with 'Yes' and 'No' options. +> **CRITICAL:** When presenting this confirmation prompt to the user, you MUST +> output it as a direct plain text response and stop tool execution immediately. +> Do NOT call any command execution or interactive tools in the same turn, as +> unexpected tool calls may be auto-replied by the simulation harness and cause +> an infinite loop. Yield immediately for the user's reply. + +```python +import vertexai +from vertexai.preview import prompts +from vertexai.preview.prompts import Prompt + +vertexai.init(project="PROJECT_ID", location="LOCATION_ID") + +# Construct a local Prompt object. `prompt_name` is the display name shown +# in Agent Platform Studio; `prompt_data` is the prompt text/template +# (use `{variable_name}` placeholders for variables passed to +# `assemble_contents()`); `model_name` is the target model. +local_prompt = Prompt( + prompt_name="my_new_prompt", + prompt_data="Hello, how are you? {text}", + model_name="gemini-2.5-pro", +) + +# Persist the local Prompt as a new managed prompt resource. This creates +# the prompt AND its first version in a single call. The returned +# `persisted_prompt` is a Prompt object with `prompt_id` and `version_id` +# populated. +persisted_prompt = prompts.create_version(prompt=local_prompt) +print(f"Created prompt ID: {persisted_prompt.prompt_id}") +print(f"Version ID: {persisted_prompt.version_id}") +``` diff --git a/.agents/skills/agent-platform-rag-engine-management/SKILL.md b/.agents/skills/agent-platform-rag-engine-management/SKILL.md new file mode 100644 index 0000000..db7fdce --- /dev/null +++ b/.agents/skills/agent-platform-rag-engine-management/SKILL.md @@ -0,0 +1,215 @@ +--- +name: agent-platform-rag-engine-management +description: >- + Manage and query Agent Platform RAG Engine Corpora and retrieve grounded + contexts using the Google GenAI SDK. Use when listing RAG corpora or files, + inspecting a corpus, retrieving contexts, or generating content grounded in a + RAG corpus. Do not use for standard database queries (use SQL/Spanner skills), + Google Workspace RAG, or other RAG products like gRAG. +--- + +# Agent Platform RAG Engine Management + +This skill provides instructions on how to interact with Agent Platform RAG +Engine using the Agent Platform Python SDK. You +MUST use the `vertexai` Python SDK to perform RAG Engine operations, rather than +raw REST calls or MCP tools, because this code is intended to be run by external +clients. + +## Phase 0: Environment Setup + +**CRITICAL**: Before running any of the Python snippets below, you MUST ensure +the environment is correctly initialized by following these steps: + +1. **Google Cloud Authentication**: Authenticate with your Google Cloud + credentials and configure active Application Default Credentials (ADC) for + Agent Platform access: + ```bash + gcloud auth login + gcloud auth application-default login + ``` +2. **Virtual Environment**: Create and activate a dedicated virtual + environment: + ```bash + python3 -m venv ~/rag_agent_venv + source ~/rag_agent_venv/bin/activate + ``` +3. **Install Dependencies**: Install the required Agent Platform SDKs: + ```bash + pip install google-cloud-aiplatform google-genai + ``` +4. **Execution**: Advise the user that every time they execute a Python + snippet, they must ensure this virtual environment is activated first. + +## Workflow Decision Tree + +1. **Information Gathering**: Has the user provided the Project ID, Region, and + Corpus ID? + + * **No** -> Proceed to [1. Listing Corpora and Files] to discover the + necessary Resource Names and IDs. Only ask the user if discovery fails. + * **Yes** -> Proceed. + +2. **Task Type**: What does the user want to do? + + * **List Corpora and Files** -> Proceed to [1. Listing Corpora and Files]. + * **Inspect a Corpus** -> Proceed to [2. Getting / Inspecting a RAG Engine + Corpus]. + * **Search for Contexts** -> Proceed to [3. Retrieving Contexts]. + * **Answer questions using RAG Engine** -> Proceed to [4. Answering the + User with Retrieved Context]. + +> [!TIP] **Placeholder Parameter Replacement:** The Python scripts below use +> bracketed string placeholders (like `"{project_id}"`, `"{region}"`, and +> `"{corpus_id}"`). You **MUST** dynamically replace these placeholders with the +> actual Project ID, Region, and Corpus ID values provided in the user's prompt +> (or active context) before generating, providing, or executing the scripts. + +## 1. Listing Corpora and Files (Discovery) + +If you do not know the Resource Name of the corpus or file, you MUST list them +first to discover them. The SDK handles pagination automatically when converted +to a list, but you can also use manual pagination for large sets. + +### 1.1 Listing and Discovering Corpora + +```python +import vertexai +from vertexai.preview import rag + +vertexai.init(project="{project_id}", location="{region}") + +# Approach A: List ALL (Automatic Pagination) +# The SDK's Pager iterates through all pages for you. +all_corpora = list(rag.list_corpora()) +print(f"Found {len(all_corpora)} corpora in total.") +for c in all_corpora: + print(f"Corpus Name: {c.name} | Display Name: {c.display_name}") + +# Approach B: Manual Pagination (for very large projects) +pager = rag.list_corpora(page_size=10) +# Process first page +for c in pager: + print(f"Corpus: {c.display_name}") + +# Get next page if needed +if pager.next_page_token: + second_page = rag.list_corpora( + page_size=10, page_token=pager.next_page_token + ) +``` + +### 1.2 Listing and Discovering Files + +To understand what files (and types) are in a corpus, list them and inspect the +`display_name` (usually includes the extension). + +```python +import vertexai +from vertexai.preview import rag + +vertexai.init(project="{project_id}", location="{region}") +corpus_name = ( + "projects/{project_id}/locations/{region}/ragCorpora/{corpus_id}" +) + +# List files with automatic pagination +files = list(rag.list_files(corpus_name=corpus_name)) +print(f"Found {len(files)} files.") + +for f in files: + # High-level SDK RagFile objects usually have name, display_name, + # description + print(f"File: {f.display_name} | Resource: {f.name}") + # Tip: Check extension to understand file type (PDF, TXT, etc.) + if f.display_name.lower().endswith(".pdf"): + print(" Type: PDF") + elif f.display_name.lower().endswith(".txt"): + print(" Type: Plain Text") +``` + +## 2. Getting / Inspecting an Agent Platform RAG Engine Corpus + +To retrieve details about an existing Agent Platform RAG Engine corpus: + +```python +import vertexai +from vertexai.preview import rag + +vertexai.init(project="{project_id}", location="{region}") + +# To get details of a specific corpus +corpus_name = ( + "projects/{project_id}/locations/{region}/ragCorpora/{corpus_id}" +) +corpus = rag.get_corpus(name=corpus_name) +print(f"Corpus Name: {corpus.name}") +print(f"Display Name: {corpus.display_name}") +``` + +## 3. Retrieving Contexts + +To retrieve relevant contexts from a RAG Engine corpus based on a query: + +```python +import vertexai +from vertexai.preview import rag + +vertexai.init(project="{project_id}", location="{region}") + +corpus_name = ( + "projects/{project_id}/locations/{region}/ragCorpora/{corpus_id}" +) +query = "What is the speed of light?" + +# Retrieve contexts +response = rag.retrieval_query( + rag_corpora=[corpus_name], + text=query, + similarity_top_k=3 +) + +for context in response.contexts.contexts: + print(f"Context text: {context.text}") + print(f"Source: {context.source_uri}") +``` + +## 4. Answering the User with Retrieved Context + +To use the retrieved context alongside an Agent Platform model to generate a +grounded response: + +```python +from google import genai +from google.genai import types + +client = genai.Client(enterprise=True, project="{project_id}", location="{region}") +corpus_name = ( + "projects/{project_id}/locations/{region}/ragCorpora/{corpus_id}" +) + +# Define the Agent Platform RAG Engine tool pointing to the corpus +rag_tool = types.Tool( + retrieval=types.Retrieval( + vertex_rag_store=types.VertexRagStore( + rag_resources=[types.VertexRagStoreRagResource(rag_corpus=corpus_name)], + rag_retrieval_config=types.RagRetrievalConfig( + top_k=3, + filter=types.RagRetrievalConfigFilter( + vector_similarity_threshold=0.5, + ), + ), + ) + ) +) + +# Generate content using the RAG Engine tool +response = client.models.generate_content( + model="gemini-2.5-flash", + contents="What is the speed of light?", + config=types.GenerateContentConfig( + tools=[rag_tool] + ) +) +print(response.text) +``` diff --git a/.agents/skills/agent-platform-skill-registry/SKILL.md b/.agents/skills/agent-platform-skill-registry/SKILL.md new file mode 100644 index 0000000..b7caa8b --- /dev/null +++ b/.agents/skills/agent-platform-skill-registry/SKILL.md @@ -0,0 +1,78 @@ +--- +name: agent-platform-skill-registry +description: > + Interact with the Gemini Enterprise Agent Platform Skill Registry to create + and search for available skills. Use this skill to enable agents to register + functionality or discover new capabilities. +--- + +# Skill Registry + +This skill provides instructions for interacting with the **Skill Registry** on +the Gemini Enterprise Agent Platform. + +## Core Capabilities + +- **Skill Discovery** - Query the registry to easily search, list, get + specific skills, and inspect revision histories. +- **Skill Lifecycle Management** - Upload, update, or permanently delete + skills. +- **Operation Monitoring** - Utility to check the completion status of + long-running state changes (LROs). +- **Generate Skill** - Automate the initial scaffolding of new agent skills + locally. + +## Core Directives + +- **Mandatory Validation**: ALWAYS execute the environment validation check + before performing any operations. + + Before any operation, you **must** validate the core environment. + + ```bash + # Execute the validation script + python3 scripts/validate_env.py + ``` + +## Prerequisites & Authentication + +### Library & Authentication + +Ensure you have the latest Google Cloud credentials and libraries installed. + +```bash +# Install required libraries +pip install google-auth requests + +# Authenticate with Google Cloud +gcloud auth application-default login +``` + +### Environment Variables + +The following variables are required for operations: + +- `GCP_PROJECT_ID`: Your Google Cloud Project ID. +- `GCP_LOCATION`: The region (e.g., `us-central1`). + +-------------------------------------------------------------------------------- + +## Quickstart + +Quickly search for available skills in the registry: + +```bash +python3 scripts/skill_registry_ops.py search \ + --query "test skill" \ + --top-k 5 +``` + +-------------------------------------------------------------------------------- + +## Operations + +- **Skill Discovery**: [query-skills.md](references/query-skills.md) +- **Skill Lifecycle**: [manage-skills.md](references/manage-skills.md) +- **Monitor Operations**: + [monitor-operations.md](references/monitor-operations.md) +- **Generate Skill**: [generate-skill.md](references/generate-skill.md) diff --git a/.agents/skills/agent-platform-skill-registry/references/generate-skill.md b/.agents/skills/agent-platform-skill-registry/references/generate-skill.md new file mode 100644 index 0000000..a49d965 --- /dev/null +++ b/.agents/skills/agent-platform-skill-registry/references/generate-skill.md @@ -0,0 +1,38 @@ +# Generate Skill + +## Description + +GenerateSkill automates the *initial* scaffolding of new agent skills by generating standardized documentation (`SKILL.md`) and directory structures based on user requirements. Note: This tool serves strictly as a starting point. It generates a foundational draft and requires human intervention to refine the logic, review the architecture, and ensure the final skill meets production-level quality standards. + +**Use when:** + +* Scaffolding a new agent skill from scratch. +* Establishing a standardized directory structure for a new tool. +* Drafting a properly formatted `SKILL.md` for a specific use case. + +**Don't use when:** + +* Executing an existing skill or performing a general query. +* Writing general code outside of a skill directory. + +--- + +## Directory Structure + +When generating a new skill, the following standardized architecture should be established: + +* **`SKILL.md`**: The core documentation and instruction set for the skill *(Required)*. +* **`references/`**: Directory for storing heavy external documentation, API specs, or knowledge bases *(Optional)*. +* **`scripts/`**: Directory for executable scripts, helper functions, or setup files *(Optional)*. Offload complex code snippets, deterministic helper functions, or repetitive setup tasks into this directory to keep `SKILL.md` lean and focused entirely on high-level instructions and usage patterns. +* **`assets/`**: Directory for static files, templates, or media used by the skill *(Optional)*. + +--- + +## Execution Workflow + +To successfully generate and deliver a new skill draft, follow these sequential steps: + +1. **Requirement Gathering:** Analyze the user's prompt to understand the purpose, inputs, outputs, and constraints of the desired skill. +2. **Drafting:** Generate the `SKILL.md` content based on the gathered requirements. Ensure the description is concise (under 300 words) and explicitly defines "Use when" and "Don't use when" conditions. Identify and map out necessary optional directories (`references/`, `scripts/`, `assets/`) if applicable, ensuring that any complex or repetitive code logic is offloaded into the `scripts/` directory. +3. **Validation:** Automatically parse and validate the drafted `SKILL.md` to ensure strictly valid Markdown formatting (e.g., correct header nesting, closed tags, proper list syntax). Fix any errors before proceeding. +4. **Review Request:** Present the generated `SKILL.md` and directory structure to the user. Explicitly request their review and manual revision, reiterating that human evaluation is required to finalize the draft for production. diff --git a/.agents/skills/agent-platform-skill-registry/references/monitor-operations.md b/.agents/skills/agent-platform-skill-registry/references/monitor-operations.md new file mode 100644 index 0000000..055acad --- /dev/null +++ b/.agents/skills/agent-platform-skill-registry/references/monitor-operations.md @@ -0,0 +1,20 @@ +# Monitor Operations + +This document covers how to monitor the status of Long-Running Operations (LRO) +returned by lifecycle management actions (uploading, updating, or deleting a +skill). + +## Check Operation Status + +Check the status of a long-running operation using its `OPERATION_ID` (or full +resource name). + +### Supported Flags + +* `--operation-id` (Required): The unique identifier or full resource name of + the long-running operation returned from previous commands. + +```bash +python3 scripts/skill_registry_ops.py monitor \ + --operation-id "projects/my-project/locations/us-central1/operations/123456789" +``` diff --git a/.agents/skills/agent-platform-skill-registry/scripts/requirements.txt b/.agents/skills/agent-platform-skill-registry/scripts/requirements.txt new file mode 100644 index 0000000..f2fcfa1 --- /dev/null +++ b/.agents/skills/agent-platform-skill-registry/scripts/requirements.txt @@ -0,0 +1,3 @@ +# Required Python packages for Skill Registry Python scripts. +google-auth +requests diff --git a/.agents/skills/agent-platform-skill-registry/scripts/skill_registry_ops.py b/.agents/skills/agent-platform-skill-registry/scripts/skill_registry_ops.py new file mode 100644 index 0000000..63c0947 --- /dev/null +++ b/.agents/skills/agent-platform-skill-registry/scripts/skill_registry_ops.py @@ -0,0 +1,372 @@ +"""A script to perform operations on the Skill Registry.""" + +import argparse +import base64 +import io +import json +import os +import sys +import urllib.parse +import zipfile +import google.auth +from google.auth.transport.requests import Request +import requests + + +def get_access_token(): + credentials, _ = google.auth.default() + credentials.refresh(Request()) + return credentials.token + + +def get_endpoint(region): + return f"{region}-aiplatform.googleapis.com" + + +def upload(args): + """Uploads a new skill in the Skill Registry. + + Args: + args: The command line arguments. + """ + token = get_access_token() + endpoint = get_endpoint(args.location) + url = f"https://{endpoint}/v1beta1/projects/{args.project}/locations/{args.location}/skills?skillId={args.skill_id}" + + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + } + + if args.zip_file: + with open(args.zip_file, "rb") as f: + zip_bytes = f.read() + elif args.folder: + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file: + for root, _, files in os.walk(args.folder): + for file in files: + file_path = os.path.join(root, file) + arcname = os.path.relpath(file_path, args.folder) + zip_file.write(file_path, arcname) + zip_bytes = zip_buffer.getvalue() + else: + raise ValueError("Must provide either --zip-file or --folder") + + zipped_filesystem = base64.b64encode(zip_bytes).decode("utf-8") + + payload = { + "displayName": args.display_name, + "description": args.description, + "zippedFilesystem": zipped_filesystem, + } + + print(f"Uploading skill {args.skill_id} at {endpoint}...") + response = requests.post(url, headers=headers, json=payload) + + if response.status_code >= 400: + print(f"Error: {response.status_code} - {response.text}") + sys.exit(1) + + print("Response:") + print(json.dumps(response.json(), indent=2)) + + +def search(args): + """Searches for skills in the Skill Registry.""" + token = get_access_token() + endpoint = get_endpoint(args.location) + query_encoded = urllib.parse.quote(args.query) + url = f"https://{endpoint}/v1beta1/projects/{args.project}/locations/{args.location}/skills:retrieve?query={query_encoded}&topK={args.top_k}" + + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + } + + print(f"Searching skills at {endpoint} with query '{args.query}'...") + response = requests.get(url, headers=headers) + + if response.status_code >= 400: + print(f"Error: {response.status_code} - {response.text}") + sys.exit(1) + + print("Response:") + print(json.dumps(response.json(), indent=2)) + + +def get_skill(args): + """Gets a skill from the Skill Registry.""" + token = get_access_token() + endpoint = get_endpoint(args.location) + url = f"https://{endpoint}/v1beta1/projects/{args.project}/locations/{args.location}/skills/{args.skill_id}" + + headers = { + "Authorization": f"Bearer {token}", + } + + print(f"Getting skill {args.skill_id} at {endpoint}...") + response = requests.get(url, headers=headers) + + if response.status_code >= 400: + print(f"Error: {response.status_code} - {response.text}") + sys.exit(1) + + print("Response:") + print(json.dumps(response.json(), indent=2)) + + +def list_skills(args): + """Lists skills in the Skill Registry.""" + token = get_access_token() + endpoint = get_endpoint(args.location) + url = f"https://{endpoint}/v1beta1/projects/{args.project}/locations/{args.location}/skills" + + headers = { + "Authorization": f"Bearer {token}", + } + + print(f"Listing skills at {endpoint}...") + response = requests.get(url, headers=headers) + + if response.status_code >= 400: + print(f"Error: {response.status_code} - {response.text}") + sys.exit(1) + + print("Response:") + print(json.dumps(response.json(), indent=2)) + + +def delete_skill(args): + """Deletes a skill from the Skill Registry.""" + token = get_access_token() + endpoint = get_endpoint(args.location) + url = f"https://{endpoint}/v1beta1/projects/{args.project}/locations/{args.location}/skills/{args.skill_id}" + + headers = { + "Authorization": f"Bearer {token}", + } + + print(f"Deleting skill {args.skill_id} at {endpoint}...") + response = requests.delete(url, headers=headers) + + if response.status_code >= 400: + print(f"Error: {response.status_code} - {response.text}") + sys.exit(1) + + print("Response:") + print(json.dumps(response.json(), indent=2)) + + +def update_skill(args): + """Updates an existing skill in the Skill Registry.""" + token = get_access_token() + endpoint = get_endpoint(args.location) + + update_mask_parts = [] + payload = {} + + if args.display_name: + update_mask_parts.append("displayName") + payload["displayName"] = args.display_name + + if args.description: + update_mask_parts.append("description") + payload["description"] = args.description + + if args.zip_file or args.folder: + if args.zip_file: + with open(args.zip_file, "rb") as f: + zip_bytes = f.read() + elif args.folder: + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file: + for root, _, files in os.walk(args.folder): + for file in files: + file_path = os.path.join(root, file) + arcname = os.path.relpath(file_path, args.folder) + zip_file.write(file_path, arcname) + zip_bytes = zip_buffer.getvalue() + + zipped_filesystem = base64.b64encode(zip_bytes).decode("utf-8") + + update_mask_parts.append("zippedFilesystem") + payload["zippedFilesystem"] = zipped_filesystem + + if not update_mask_parts: + print( + "Error: must provide at least one field to update (--display-name," + " --description, --zip-file, --folder)" + ) + sys.exit(1) + + update_mask = ",".join(update_mask_parts) + url = f"https://{endpoint}/v1beta1/projects/{args.project}/locations/{args.location}/skills/{args.skill_id}?updateMask={update_mask}" + + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + } + + print(f"Updating skill {args.skill_id} at {endpoint}...") + response = requests.patch(url, headers=headers, json=payload) + + if response.status_code >= 400: + print(f"Error: {response.status_code} - {response.text}") + sys.exit(1) + + print("Response:") + print(json.dumps(response.json(), indent=2)) + + +def list_skill_revision(args): + """Lists revisions of a skill in the Skill Registry.""" + token = get_access_token() + endpoint = get_endpoint(args.location) + url = f"https://{endpoint}/v1beta1/projects/{args.project}/locations/{args.location}/skills/{args.skill_id}/revisions" + + headers = { + "Authorization": f"Bearer {token}", + } + + print(f"Listing revisions for skill {args.skill_id} at {endpoint}...") + response = requests.get(url, headers=headers) + + if response.status_code >= 400: + print(f"Error: {response.status_code} - {response.text}") + sys.exit(1) + + print("Response:") + print(json.dumps(response.json(), indent=2)) + + +def get_skill_revision(args): + """Gets a specific revision of a skill from the Skill Registry.""" + token = get_access_token() + endpoint = get_endpoint(args.location) + url = f"https://{endpoint}/v1beta1/projects/{args.project}/locations/{args.location}/skills/{args.skill_id}/revisions/{args.revision_id}" + + headers = { + "Authorization": f"Bearer {token}", + } + + print( + f"Getting skill {args.skill_id} revision {args.revision_id} at" + f" {endpoint}..." + ) + response = requests.get(url, headers=headers) + + if response.status_code >= 400: + print(f"Error: {response.status_code} - {response.text}") + sys.exit(1) + + print("Response:") + print(json.dumps(response.json(), indent=2)) + + +def monitor(args): + """Monitors the status of a long-running operation.""" + token = get_access_token() + endpoint = get_endpoint(args.location) + op_id = args.operation_id.lstrip("/") + url = f"https://{endpoint}/v1beta1/{op_id}" + + headers = { + "Authorization": f"Bearer {token}", + } + + print(f"Monitoring operation {args.operation_id} at {endpoint}...") + response = requests.get(url, headers=headers) + + if response.status_code >= 400: + print(f"Error: {response.status_code} - {response.text}") + sys.exit(1) + + print("Response:") + print(json.dumps(response.json(), indent=2)) + + +def main(): + parser = argparse.ArgumentParser( + description="Skill Registry Operations Utility" + ) + parser.add_argument("--project", default=os.environ.get("GCP_PROJECT_ID")) + parser.add_argument("--location", default=os.environ.get("GCP_LOCATION")) + + subparsers = parser.add_subparsers(dest="action", required=True) + + upload_parser = subparsers.add_parser("upload") + upload_parser.add_argument("--skill-id", required=True) + upload_parser.add_argument("--display-name", required=True) + upload_parser.add_argument("--description", required=True) + + group = upload_parser.add_mutually_exclusive_group(required=True) + group.add_argument("--zip-file") + group.add_argument("--folder") + + search_parser = subparsers.add_parser("search") + search_parser.add_argument("--query", required=True) + search_parser.add_argument("--top-k", type=int, default=5) + + get_parser = subparsers.add_parser("get") + get_parser.add_argument("--skill-id", required=True) + + subparsers.add_parser("list") + + delete_parser = subparsers.add_parser("delete") + delete_parser.add_argument("--skill-id", required=True) + + update_parser = subparsers.add_parser("update") + update_parser.add_argument("--skill-id", required=True) + update_parser.add_argument("--display-name", required=False) + update_parser.add_argument("--description", required=False) + update_group = update_parser.add_mutually_exclusive_group(required=False) + update_group.add_argument("--zip-file") + update_group.add_argument("--folder") + + list_rev_parser = subparsers.add_parser("list-revision") + list_rev_parser.add_argument("--skill-id", required=True) + + get_rev_parser = subparsers.add_parser("get-revision") + get_rev_parser.add_argument("--skill-id", required=True) + get_rev_parser.add_argument("--revision-id", required=True) + + monitor_parser = subparsers.add_parser("monitor") + monitor_parser.add_argument("--operation-id", required=True) + + args = parser.parse_args() + + missing = [] + if not args.project: + missing.append("GCP_PROJECT_ID") + if not args.location: + missing.append("GCP_LOCATION") + + if missing: + print( + f"ERROR: Missing required environment variables: {', '.join(missing)}" + ) + sys.exit(1) + + if args.action == "upload": + upload(args) + elif args.action == "search": + search(args) + elif args.action == "get": + get_skill(args) + elif args.action == "list": + list_skills(args) + elif args.action == "delete": + delete_skill(args) + elif args.action == "update": + update_skill(args) + elif args.action == "list-revision": + list_skill_revision(args) + elif args.action == "get-revision": + get_skill_revision(args) + elif args.action == "monitor": + monitor(args) + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/agent-platform-skill-registry/scripts/validate_env.py b/.agents/skills/agent-platform-skill-registry/scripts/validate_env.py new file mode 100644 index 0000000..347f008 --- /dev/null +++ b/.agents/skills/agent-platform-skill-registry/scripts/validate_env.py @@ -0,0 +1,18 @@ +"""Validates that required environment variables are set.""" + +import os +import sys + + +def validate_env(): + print("Validating core environment variables...") + required = ["GCP_PROJECT_ID", "GCP_LOCATION"] + missing = [v for v in required if not os.environ.get(v)] + if missing: + print(f"ERROR: Missing core variables: {', '.join(missing)}") + sys.exit(1) + print("SUCCESS: Core environment validated.") + + +if __name__ == "__main__": + validate_env() diff --git a/.agents/skills/agent-platform-tuning-management/SKILL.md b/.agents/skills/agent-platform-tuning-management/SKILL.md new file mode 100644 index 0000000..4f73331 --- /dev/null +++ b/.agents/skills/agent-platform-tuning-management/SKILL.md @@ -0,0 +1,157 @@ +--- +name: agent-platform-tuning-management +description: >- + Manages GenAI tuning jobs in Agent Platform. Use this to list, get, or cancel + ongoing model tuning jobs. Don't use for fine-tuning models (use + `agent-platform-tuning`), deploying models to endpoints (use + `agent-platform-deploy`), or managing serving endpoints (use + `agent-platform-endpoint-management`). +--- + +# Agent Platform Tuning Management + +This skill provides instructions on how to manage GenAI Tuning Jobs using the +Agent Platform Python SDK. Use this skill when a user wants to check the status +of their tuning runs, find an active tuning job, or cancel a job that is running +too long. + +## Safety & Confirmation Tiers (CRITICAL) + +Before executing any commands on behalf of the user, you MUST adhere to the +following safety tiers based on the action requested: + +1. **Tier R: Read-only (`list`, `get`)** + * **Rule**: No confirmation needed. You may execute these commands + immediately to gather information for the user. +2. **Tier D: Destructive & Interruptive (`cancel`)** + * **Rule**: This requires **explicit typed confirmation**. You MUST output + a text message to the user explaining that this will stop the tuning + process and any progress will be lost, and asking them to type + "I confirm" or "Yes, cancel it". You MUST ask for this confirmation + IMMEDIATELY, before executing the cancel command. + +## Phase 0: Environment Setup + +**CRITICAL**: Before running any of the Python snippets below, you MUST ensure +the environment is correctly initialized by following these steps: + +1. **Virtual Environment**: Create and activate a virtual environment: + ```bash + python3 -m venv ~/tuning_mgr_venv + source ~/tuning_mgr_venv/bin/activate + ``` +2. **Google Cloud Authentication**: Authenticate with your Google Cloud account + and configure active Application Default Credentials (ADC) for Agent + Platform access: + ```bash + gcloud auth login + gcloud auth application-default login + ``` +3. **Install Dependencies**: Install the required Agent Platform SDK: + ```bash + pip install google-cloud-aiplatform + ``` +4. **Execution**: Advise the user that every time they execute a Python snippet, they must ensure this virtual environment is activated first. + +## Workflow Decision Tree + +1. **Information Gathering**: Do you have a Project ID and Region? + * **No** -> You **MUST** ask the user for the missing Project ID and Region + in plain text, or advise them to check their gcloud configuration. If neither + location has this information, then ask the user to provide it. Do not + attempt to search random regions on your own. + * **Yes** -> Proceed to Step 2. + +2. **Task Type**: What does the user want to do? + * **Find or List Jobs** -> Use the Python SDK to list tuning jobs. (Tier R) + * **Check Status / Inspect a Specific Job** -> Use the Python SDK to get + tuning job details. (Tier R) + * **Cancel a Job** -> Ask for confirmation, then use the Python SDK to + cancel the tuning job. (Tier D) + +## Using the Python SDK + +> [!NOTE] +> **Resource Verification & Missing Projects/Jobs:** If the execution of the +> Python snippet fails with an error (such as `403 Permission Denied`, +> `404 Not Found`, `INVALID_ARGUMENT`, or indicating a dummy/missing project or +> job ID), you **MUST** inform the user that the project or tuning job does not +> exist or cannot be accessed. You **MUST** prompt the user to provide a valid +> Project ID or Job ID, and stop tool execution immediately to wait for their +> response. Do **NOT** retry or loop, do **NOT** assume the resource is valid, +> and do **NOT** execute further scripts before receiving valid details from the +> user. + +### 1. Listing Tuning Jobs (Tier R) +If the user asks "What tuning jobs do I have running?" or wants to find a +specific job ID: + +```python +from google.cloud import aiplatform_v1 + +project_id = "YOUR_PROJECT_ID" +region = "YOUR_REGION" +parent = f"projects/{project_id}/locations/{region}" + +client = aiplatform_v1.GenAiTuningServiceClient( + client_options={"api_endpoint": f"{region}-aiplatform.googleapis.com"} +) + +jobs = client.list_tuning_jobs(parent=parent) +for job in jobs: + print(f"Name: {job.name}") + print(f"Base Model: {job.base_model}") + print(f"State: {job.state}") +``` + +### 2. Getting Details for a Specific Job (Tier R) +If the user provides a Tuning Job ID and asks for its status: + +```python +from google.cloud import aiplatform_v1 + +project_id = "YOUR_PROJECT_ID" +region = "YOUR_REGION" +job_id = "YOUR_JOB_ID" # 19-digit ID +name = f"projects/{project_id}/locations/{region}/tuningJobs/{job_id}" + +client = aiplatform_v1.GenAiTuningServiceClient( + client_options={"api_endpoint": f"{region}-aiplatform.googleapis.com"} +) + +job = client.get_tuning_job(name=name) +print(f"Name: {job.name}") +print(f"Base Model: {job.base_model}") +print(f"State: {job.state}") +print(f"Tuning Model: {job.tuned_model_display_name}") +``` + +### 3. Canceling a Job (Tier D) +If the user explicitly requests to stop, abort, or cancel a running tuning job: + +**Safety Check**: **Action requires explicit typed confirmation before +proceeding.** You MUST ask the user for confirmation before generating or +providing this script, even if they provided the job ID, unless they explicitly +use confirming language like "Yes, I confirm, cancel tuning job 123456". + +> [!IMPORTANT] +> **NEVER pre-emptively provide or execute any cancellation code before +> receiving the user's response in a new turn.** You must never speculate or +> assume that confirmation will be given. Asking for confirmation and providing +> the code in a single parallel turn is a severe safety violation. + +```python +from google.cloud import aiplatform_v1 + +project_id = "YOUR_PROJECT_ID" +region = "YOUR_REGION" +job_id = "YOUR_JOB_ID" # 19-digit ID +name = f"projects/{project_id}/locations/{region}/tuningJobs/{job_id}" + +client = aiplatform_v1.GenAiTuningServiceClient( + client_options={"api_endpoint": f"{region}-aiplatform.googleapis.com"} +) + +client.cancel_tuning_job(name=name) +print(f"Successfully requested cancellation for {name}") +``` diff --git a/.agents/skills/agent-platform-tuning/SKILL.md b/.agents/skills/agent-platform-tuning/SKILL.md new file mode 100644 index 0000000..a3438d7 --- /dev/null +++ b/.agents/skills/agent-platform-tuning/SKILL.md @@ -0,0 +1,390 @@ +--- +name: agent-platform-tuning +description: >- + Agent Platform Model Tuning. Use when you need to fine-tune open models + or Gemini models using Agent Platform infrastructure. Don't use for model + training outside Agent Platform, model deployment to endpoints (use + `agent-platform-deploy`), or managing serving endpoints (use + `agent-platform-endpoint-management`). +--- + +# Agent Platform Model Tuning + +## Overview + +This skill provides procedural knowledge for fine-tuning Large Language Models +(both Open Models and Gemini Models) using Agent Platform's tuning service. It +covers the entire lifecycle from environment setup and data preparation to job +configuration, monitoring, and deployment. + +## Workflow Decision Tree + +1. **Model Category Identification**: Has the user explicitly stated whether + they want to tune an **Open Model** or a **Gemini Model**? + + - **No** → **STOP**. Ask the user if they want to tune an Open Model or a + Gemini Model. **CRITICAL EXCEPTION for Environment Setup Requests:** If + the user is specifically asking for environment setup instructions + (e.g. "What environment setup is needed?"), you **MUST** provide the full + [Phase 0 environment setup](#phase-0) instructions in your initial + response, *simultaneously* with asking clarifying questions about the + model category. + - If the user provides a specific tuning purpose, you should + recommend three models: one Open Model, one Gemini Model, and a third + generally recommended choice. Briefly list the pros and cons of each + (e.g., Gemini models might be more expensive, etc.). **CRITICAL:** You + must read `references/models.md` during this step and only recommend + models explicitly listed in that catalog. Do not recommend unsupported + models like Mistral. Do not proceed with model configuration until the + category is confirmed. + - **Yes** → Proceed. + +2. **Environment Check**: Has the environment (Auth, APIs, IAM, Venv) been + initialized? + + - **No** → Go to [Phase 0: Environment & IAM Setup](#phase-0). + - **Yes** → Proceed. + +3. **Dataset Status**: Is the dataset ready in JSONL format, **is its structure + valid for tuning**, and is it uploaded to Google Cloud Storage? + + - **No** → Go to [Phase 1: Dataset Preparation & Upload](#phase-1). + - **Yes** → Proceed. + +4. **Column Selection Confirmation**: Have you presented the columns to the + user and confirmed the mapping? + + - **No** → **STOP**. You must show samples and get user confirmation on + column mapping as described in Phase 1.0 before proceeding. + - **Yes** → Proceed. + +5. **Configuration**: Has the user provided the target model and + hyperparameters, or explicitly agreed to your recommendations? + + - **No** → Go to + [Phase 2: Model Configuration & Recommendation](#phase-2). + - **Yes** → Proceed. + +6. **Job Status**: Has the tuning job been submitted? + + - **No** → Go to + [Phase 3: Tuning Job Execution](#phase-3-tuning-job-execution). + - **Yes** → Proceed. + +7. **Job Completion**: Is the tuning job complete? + + - **No** → Go to [Phase 4: Monitoring](#phase-4-monitoring). + - **Yes** → Proceed. + +8. **Deployment**: Has the tuned model been deployed (if required)? + + - **No** → Go to [Phase 5: Model Deployment](#phase-5-model-deployment). + - **Yes** → Task Complete. + +## Phase 0: Environment & IAM Setup {#phase-0} + +Ensure the foundational environment is ready before proceeding. + +### 0.1 Authentication & Project Context + +- Check if `gcloud` CLI is installed. If it is not installed, prompt the + user for permission to install it before proceeding. If it is installed, + update it: + +```bash +gcloud components update --quiet > /dev/null 2>&1 +``` + +- Verify `gcloud auth list`. If not authenticated, run `gcloud auth login`. +- Ensure `project` and `location` are known. Use `gcloud config get project` + to retrieve the current project (and `gcloud config get compute/region` for + region). +- **CRITICAL: Ask for Confirmation.** You must prompt the user to confirm the + retrieved project and region before proceeding, in case they want to switch + to a different one. + +### 0.2 Possible Locations + +The following locations are available for tuning: + +- us-central1 +- europe-west4 +- us-west1 +- us-east5 +- asia-southeast1 + +No other values are supported for this section, ensure that the location is +listed above. + +### 0.3 Enable APIs + +Ensure `aiplatform.googleapis.com` and `storage.googleapis.com` are enabled. + +```bash +gcloud services enable aiplatform.googleapis.com storage.googleapis.com \ + --project=YOUR_PROJECT +``` + +### 0.4 IAM Permissions + +Verify the following identities have the required roles. + +- **Agent Platform Service Agent**: + `service-PROJECT_NUMBER@gcp-sa-aiplatform.iam.gserviceaccount.com` +- **Managed OSS Fine Tuning Service Agent**: + `service-PROJECT_NUMBER@gcp-sa-vertex-moss-ft.iam.gserviceaccount.com` +- **User Identity**: The account running the commands. + +### 0.5 Virtual Environment + +Create and use a virtual environment named `tuning_agent_venv` in the home +directory. Install dependencies from `references/requirements.txt`. + +```bash +python3 -m venv ~/tuning_agent_venv +source ~/tuning_agent_venv/bin/activate +pip install -r references/requirements.txt +``` + +**CRITICAL AGENT INSTRUCTION:** You **MUST** ensure that every Python command or +script execution (e.g., `python3 scripts/...`, `pip install ...`) is prefixed +with the virtual environment activation command: `source ~/tuning_agent_venv/bin/activate &&`. +Additionally, advise the user that every single time they run a Python command, +execute a script, or inspect data inline, they **MUST** also activate this +virtual environment first in their bash execution. For example: +`source ~/tuning_agent_venv/bin/activate && python3 ...`. +Do not run standalone `python3` commands without activating the environment, as +they will encounter `ModuleNotFoundError` issues. + +## Phase 1: Dataset Preparation & Upload {#phase-1} + +### 1.0 Dataset Discovery & Confirmation + +- **User-Provided Dataset Verification:** If the user specifies a dataset + filename or path in their prompt, verify its existence in the workspace + (e.g. via script execution or checking for typos). + * **If the file cannot be found anywhere**, you **MUST** inform the user + that the dataset file does not exist or cannot be accessed. You **MUST** + prompt the user to provide a valid dataset path. Alternatively, if candidate + dataset files are found in the workspace during your search, you **MUST** + present the candidates to the user and ask them to select one. You **MUST** + stop tool execution immediately after reporting the missing file or + presenting candidates, and wait for the user's response. Do **NOT** ask for + 80/20 validation split permission, and do **NOT** attempt to upload the + dataset before receiving a valid dataset file selection from the user. + * **If the file is found and verified**, proceed to Step 1.1 Formatting & + Validation below. +- **Auto-Discovery: From User Bucket:** If the user does not have a dataset + and no suitable alternative is found in the Hugging Face reference, offer to + search the user's GCS buckets for potential training data. Prioritize + searching for files with extensions like `.jsonl`, `.json`, `.csv`, and + `.parquet`. If such files are found, read the first few lines/records of + each to determine if they contain text-based data suitable for tuning + (e.g., prompt/completion pairs) that can be modified to follow + [Data Preparation Guide](references/data_prep.md) and is related to the + tuning task requested. **DO NOT** search without prompting first. +- **Auto-Discovery: From Task to Huggingface:** If the user has a specific + task, refer to [Huggingface Datasets Reference](references/hf_datasets.md) + and recommend a dataset from this if one exists. For each dataset + recommended, provide some information about the dataset and provide some + reasonable splits. > [!IMPORTANT] > **CRITICAL: Ask for Confirmation and + Column Selection.** Do not proceed > with dataset preparation or upload + until you perform the following > steps and get user confirmation: > 1. + **Dataset and Split Confirmation:** Present the dataset and > available + splits to the user and have them confirm which to use. > 2. **Column + Selection (Hugging Face or Custom Datasets):** You must: > - Provide a list + of all available columns in the selected dataset > split. > - **Show a few + samples from the dataset** to help the user > understand the content and + make the choice of columns. > - Recommend which columns should be mapped to + `prompt` (or user > message) and `completion` (or assistant response), + offering a few > reasonable options if applicable. > - Ask the user to + confirm the column mapping or specify which > columns to use. + +### 1.1 Formatting & Validation + +- **Conversion**: If data is in CSV, JSON, or Parquet, use + `scripts/prepare_dataset.py` to convert. +- **Validation Split Confirmation**: If the user only provides a training + dataset, **you must prompt the user** to seek permission to split the + training dataset 80/20 to form a validation dataset (using + `--validation_split 0.2`). If they agree, proceed with the split. If they + decline, just use the training dataset without a validation dataset. +- **Validation**: If data is already in JSONL, validate it before uploading. + Simply having a `.jsonl` extension is not enough. You must verify that the + content schema is valid for tuning (e.g. correct system/user/model roles). + +```bash +python3 scripts/prepare_dataset.py \ + --input my_data.jsonl \ + --format \ + --validate_only +``` + +*(Use `--format messages` for open models and `--format messages_gemini` for +Gemini models.)* - Refer to [Data Preparation Guide](references/data_prep.md) +for required schemas. + +### 1.2 Upload + +Upload formatted `.jsonl` files to GCS using a unique directory (e.g., with a +datetime timestamp) to avoid overwriting outputs from different runs. + `bash +ARTIFACTS="gs://YOUR_BUCKET/tuning_agent_job_/dataset.jsonl" gcloud +storage cp dataset.jsonl $ARTIFACTS` + +## Phase 2: Model Configuration & Recommendation {#phase-2} + +Help the user choose the best model and parameters. **Always seek user +confirmation before submitting the job.** + +- If the user does not specify a specific model in their prompt, calculate + recommendations based on the **Models Catalog**. +- **Prompt for Confirmation:** Present the recommended model to the user and + ask for their confirmation before configuring hyperparameters. + +### 2.1 Configuration + +#### For Open Models + +- Recommend `tuning_mode`, `epochs`, `learning_rate`, and `adapter_size` + based on the [Tuning Guide](references/tuning_guide.md) and model-specific + baselines in the [Models Catalog](references/models.md). + +### 2.2 Calculating Cost (Open Models Only) + +- We can calculate a rough estimate of cost of tuning based on the dataset and + the selected model in the [Models Catalog](references/models.md): + ```bash + python3 scripts/calculate_cost.py \ + --input my_data.jsonl \ + --model MODEL_NAME \ + --tuning_mode TUNING_MODE \ + --epochs epochs + ``` + +> [!NOTE] +> **Handling Missing Dataset Errors:** If `scripts/calculate_cost.py` fails +> because the dataset file (e.g. `my_data.jsonl` or `dummy_data.jsonl`) cannot +> be found, you **MUST** inform the user that the dataset file does not exist or +> cannot be accessed. You **MUST** prompt the user to provide a valid dataset +> path, and stop tool execution immediately to wait for their response. Do +> **NOT** retry or loop, do **NOT** invent a specific cost number, and do +> **NOT** prompt for job submission approval before receiving a valid dataset +> from the user. + +- **Prompt for Confirmation:** Present the recommended hyperparameter + configuration and estimated cost to the user and ask for their approval + before proceeding to job submission. Make sure to note that the estimated + cost is just an estimate and can vary from actual billing costs. + +## Phase 3: Tuning Job Execution {#phase-3-tuning-job-execution} + +**CRITICAL Pre-Flight Check (GCS Verification):** Before you propose a +confirmation prompt or submit any tuning job, you **MUST** verify that the +specified training dataset GCS URI (e.g. `gs://dummy_bucket/dataset.jsonl` or +`gs://YOUR_BUCKET/...`) actually exists and is accessible. Run +`gcloud storage ls $DATASET_URI` (or `gsutil ls`). + +* **If the verification fails** (e.g. `BucketNotFound`, `404`, `AccessDenied`, + or indicating a dummy/missing bucket), you **MUST** inform the user that the + GCS bucket or dataset does not exist or cannot be accessed. You **MUST** + prompt the user to provide a valid GCS URI for the dataset, and stop tool + execution immediately to wait for their response. Do **NOT** propose a + confirmation prompt and do **NOT** execute any tuning scripts before + receiving a valid dataset URI from the user. +* **If the verification succeeds**, proceed to propose the confirmation prompt + below. + +### For Gemini Models + +Check if `scripts/tune_gemini_model.py` exists. + +- **If `scripts/tune_gemini_model.py` exists:** Submit the Gemini model tuning + job using this script. + + ```bash + python3 scripts/tune_gemini_model.py + ``` + +- **If `scripts/tune_gemini_model.py` does not exist:** Instruct the user to + manually configure and submit the tuning job via the Google Cloud Console + UI or using the Agent Platform SDK for Python. + +### For Open Models + +Submit the open model tuning job using `scripts/tune_open_model.py`. Identify +the model id using available models documentation at + +[documentation](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/open-model-tuning#supported-models). + + +```bash +python3 scripts/tune_open_model.py \ + --project YOUR_PROJECT \ + --location YOUR_LOCATION \ + --base_model BASE_MODEL_ID \ + --train_dataset gs://YOUR_BUCKET/tuning_agent_job_/dataset.jsonl \ + --output_uri gs://YOUR_BUCKET/tuning_agent_job_/output \ + --epochs EPOCHS \ + --learning_rate LR \ + --tuning_mode MODE +``` + +> [!IMPORTANT] +> **Interactive Confirmation Required (Tier M):** Before proceeding with job +> submission, you **MUST** present the proposed command string showing all +> literal flags in a confirmation prompt to the user with 'Yes' and 'No' +> options. + +> **CRITICAL:** When presenting this confirmation prompt to the user, you MUST +> output it as a direct plain text response and stop tool execution immediately. +> Do NOT call any command execution or interactive tools in the same turn, as +> unexpected tool calls may be auto-replied by the simulation harness and cause +> an infinite loop. Yield immediately for the user's reply. + +## Phase 4: Monitoring {#phase-4-monitoring} + +Monitor the job via the Cloud Console link provided in the script output. +Additionally, ask the user if they want you to monitor the job status for them +in the background. If they agree, execute `scripts/monitor_tuning_job.py` as +a background task to periodically poll the job status and notify the user to +show the status. If the user declines, leave it completely to the user to +check on the status. + +## Phase 5: Model Deployment {#phase-5-model-deployment} + +Once the tuning job is `SUCCEEDED`, deploy the model. + +```bash +ARTIFACTS="gs://YOUR_BUCKET/tuning_agent_job_/output/postprocess/node-0/checkpoints/final" +gcloud ai model-garden models deploy \ + --project=YOUR_PROJECT \ + --region=YOUR_LOCATION \ + --model="$ARTIFACTS" \ + --machine-type=MACHINE_TYPE \ + --accelerator-type=ACCELERATOR_TYPE \ + --accelerator-count=COUNT +``` + +> [!IMPORTANT] +> **Interactive Confirmation Required (Tier M):** Before proceeding with +> deployment, you **MUST** present the proposed command string showing all +> literal flags in a confirmation prompt to the user with 'Yes' and 'No' +> options. + +> **CRITICAL:** When presenting this confirmation prompt to the user, you MUST +> output it as a direct plain text response and stop tool execution immediately. +> Do NOT call any command execution or interactive tools in the same turn, as +> unexpected tool calls may be auto-replied by the simulation harness and cause +> an infinite loop. Yield immediately for the user's reply. + +Refer to [Models Catalog](references/models.md) for hardware recommendations for +specific open models. + +## Resources + +- [Data Preparation Guide](references/data_prep.md) +- [Models Catalog](references/models.md) +- [Tuning Guide](references/tuning_guide.md) +- `scripts/prepare_dataset.py`: Data conversion & validation. +- `scripts/tune_open_model.py`: Open model tuning job submission. \ No newline at end of file diff --git a/.agents/skills/agent-platform-tuning/references/data_prep.md b/.agents/skills/agent-platform-tuning/references/data_prep.md new file mode 100644 index 0000000..6268fd7 --- /dev/null +++ b/.agents/skills/agent-platform-tuning/references/data_prep.md @@ -0,0 +1,51 @@ +# Data Preparation for Agent Platform Model Tuning + +Agent Platform Model Tuning requires training data in **JSON Lines (JSONL)** +format stored in Google Cloud Storage (GCS). + +## Supported JSONL Formats for Open Models + +### 1. Conversational (Messages) Format +Recommended for chat-based models (Llama 3.1/3.2/3.3 Chat, Gemma 3 IT, etc.). + +```json +{ + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the capital of France?"}, + {"role": "assistant", "content": "The capital of France is Paris."} + ] +} +``` + +### 2. Instruction (Prompt/Completion) Format +Suitable for base models or simple completion tasks. + +```json +{ + "prompt": "Summarize the following text: [TEXT]", + "completion": "[SUMMARY]" +} +``` + +## Dataset Requirements + +- **File Type**: Must be `.jsonl`. +- **Encoding**: UTF-8. +- **Location**: Must be in a GCS bucket (e.g., `gs://my-bucket/train.jsonl`). +- **Validation Split**: A separate validation file is optional but recommended. It must be no more than 25% of the training dataset size. + +## Bucket Considerations + +If a bucket does not exist, create one in the same region as your tuning job: + +```bash +gcloud storage buckets create gs://YOUR_BUCKET_NAME --location=YOUR_LOCATION +``` + +## Formatting Best Practices + +1. **Quality over Quantity**: 100 high-quality examples often outperform 1,000 noisy ones. +2. **Consistency**: Use consistent formatting for system prompts and instruction styles. +3. **No Empty Values**: Ensure every example has a valid prompt/user message and + completion/assistant response. Use the [preparation script](../scripts/prepare_dataset.py) to validate this. diff --git a/.agents/skills/agent-platform-tuning/references/hf_datasets.md b/.agents/skills/agent-platform-tuning/references/hf_datasets.md new file mode 100644 index 0000000..6f8e758 --- /dev/null +++ b/.agents/skills/agent-platform-tuning/references/hf_datasets.md @@ -0,0 +1,90 @@ +# Huggingface Dataset References + +This document provides references for various datasets used in Agent Platform +Tuning. + +## How to use datasets + +When using a dataset for tuning, it is recommended to first analyze the dataset to understand its structure and content. For more information on data preparation and formatting, please refer to [Data Preparation for Tuning](data_prep.md). + +To download the entire dataset, the following script can be used: + +```python +from datasets import load_dataset +dataset = load_dataset("username/dataset_name") +``` + +To load a specific split, the following script can be used: + +```python +from datasets import load_dataset +dataset = load_dataset("username/dataset_name", split="split_name") +``` + +For larger datasets, it might be helpful to subset the dataset and use only +portions of the dataset as required by the specific task requested. Either use +the splits defined in the huggingface website (ex. `default`) or offer to +partition the dataset for the user. Ensure that the user can see some examples +of the dataset before proceeding. + +[!IMPORTANT] +**CRITICAL: Ask for Confirmation and Column Selection.** +Do not proceed with dataset preparation or upload until you perform the +following steps and get user confirmation: +1. **Dataset and Split Confirmation:** Present the dataset and available splits + to the user and have them confirm which to use. Additionally, show a few + samples to the user for preview. +2. **Column Selection:** The tuning entrypoint requires mapping source columns to the target format (either `prompt`/`completion` or `messages` format). You must: + - Provide a list of all available columns in the selected dataset split. + - Recommend which columns should be mapped to `prompt` (or user message) and `completion` (or assistant response), offering a few reasonable options if applicable. + - Ask the user to confirm the column mapping or specify which columns to use. + +## Datasets List + +Each dataset includes a brief description of the dataset type and some usage +hints. The hints are **not** the only way to use these datasets but offer some +suggestions based on the corresponding dataset. + +[!IMPORTANT] +Examine the hints since they include some information about the dataset. These +include comments about the contents of the dataset that may be pertinent to the +users request. + +### General and Reasoning Tasks + +#### Mathematical Reasoning + +| Name | Description | Sample Count (Split) | Usage Hints | +|---|---|---|---| +| [open-r1/OpenR1-Math-220k](https://huggingface.co/datasets/open-r1/OpenR1-Math-220k) | A dataset of math problems and solutions. | 93,700 (default) - 220,000 (full) | The main columns are problem and solution. Some other helpful columns are answer, problem_type, question_type, and messages. | +| [AI-MO/NuminaMath-TIR](https://huggingface.co/datasets/AI-MO/NuminaMath-TIR) | A dataset for improving model performance in complex logic and calculations. | N/A | Good choice for mathematical reasoning. | + +#### Instruction Following + +| Name | Description | Sample Count (Split) | Usage Hints | +|---|---|---|---| +| [argilla/ifeval-like-data](https://huggingface.co/datasets/argilla/ifeval-like-data) | A dataset that involves instruction following. | 550,000 (default), 56,000 (filtered) | There are multiple languages in this dataset. Prompt the user with this dataset if specific languages are expected. Filter accordingly | +| [HuggingFaceTB/smoltalk2](https://huggingface.co/datasets/HuggingFaceTB/smoltalk2) | Enhancing broad instruction-following capabilities. | N/A | This needs to be subsetted. as the initial dataset is very large and covers a wide range of tasks. | + +#### Multilingual Support + +| Name | Description | Sample Count (Split) | Usage Hints | +|---|---|---|---| +| [CohereForAI/aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset) | Expanding linguistic capabilities across diverse languages. | N/A | Contains multi-language instruction following data. | + +### Specialized and Technical Tasks + +#### Programming & Coding + +| Name | Description | Sample Count (Split) | Usage Hints | +|---|---|---|---| +| [ise-uiuc/Magicoder-Evol-Instruct-110K](https://huggingface.co/datasets/ise-uiuc/Magicoder-Evol-Instruct-110K) | Code generation dataset. | 110,000 | Suitable for advancing coding capabilities. | +| [open-r1/datasets](https://huggingface.co/open-r1/datasets) | Specialized programming & reasoning data. | N/A | General source for open reasoning technical data. | + +#### Tool Use & Integration + +| Name | Description | Sample Count (Split) | Usage Hints | +|---|---|---|---| +| [gorilla-llm/Berkeley-Function-Calling-Leaderboard](https://huggingface.co/datasets/gorilla-llm/Berkeley-Function-Calling-Leaderboard) | Adhere to constraints and use external systems. | N/A | High-quality tool usage and function calling data. | +| [Bingguang/HardGen](https://huggingface.co/datasets/Bingguang/HardGen) | Evaluating handling complex tools and constraints. | N/A | Validated for tool use integration tasks. | + diff --git a/.agents/skills/agent-platform-tuning/references/models.md b/.agents/skills/agent-platform-tuning/references/models.md new file mode 100644 index 0000000..542dcea --- /dev/null +++ b/.agents/skills/agent-platform-tuning/references/models.md @@ -0,0 +1,62 @@ +# Agent Platform Supported Models and Recommendations + +This reference catalog provides technical specifications, tuning +recommendations, and deployment hardware requirements for supported models in +Agent Platform. + +## Supported Models Catalog + +> [!WARNING] **CRITICAL AGENT INSTRUCTION** +> Do NOT use this catalog to recommend a specific model to the user until they +> have explicitly confirmed their **Model Category** as Open Model. +> Furthermore, do NOT recommend any model that is not explicitly listed in this +> catalog, as the tuning service does not support it. + +Available open models can be found in Google Cloud [documentation](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/open-model-tuning#supported-models). +This is the list of open models that are available for tuning; do not suggest +any other open models besides the one listed here. +Each model has some [limitations](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/open-model-tuning#limitations) for tuning. + +## Model Selection Guidelines + +**Identify Task**: Check a few samples from the dataset to identify the task. + +Choose a model family based on your task type: + +- **Qwen**: Best for code generation or complex math-based tasks. +- **Gemma**: Optimized for chat-based interactions, creative writing and multilingual tasks. +- **Llama (Instruct)**: Strong general-purpose chat/instruction models. +- **Llama (Base/Scout)**: Best for continuation tasks or building custom instruction-tuned models. + +**Complexity Heuristics**: + +- **Simple (QA, Extraction)**: 1B - 3B models. +- **Intermediate (Summarization, Reasoning)**: 8B - 17B models. +- **Complex (Multi-turn, Tool use, Deep reasoning)**: 27B - 70B models. + +## Baseline Hyperparameter Recommendations + +These values are starting points and should be adjusted based on your dataset +size. + +| Model | Tuning Mode | Learning Rate | Epochs | Adapter Size (PEFT) | +| :--- | :--- | :--- | :--- | :--- | +| Gemma 3 1B IT | Full | 2.0E-5 | 3 | N/A | +| Gemma 3 4B IT | Full | 1.0E-5 | 3 | N/A | +| Gemma 3 12B IT | Full | 1.0E-5 | 3 | N/A | +| Gemma 3 27B IT | PEFT | 2.0E-4 | 3 | 32 | +| Gemma 3 27B IT | Full | 2.0E-4 | 3 | N/A | +| Llama 3.1 8B | PEFT | 2.0E-4 | 3 | 16 | +| Llama 3.1 8B | Full | 2.0E-4 | 3 | N/A | +| Llama 3.1 8B Instruct | PEFT | 2.0E-4 | 3 | 16 | +| Llama 3.1 8B Instruct | Full | 2.0E-4 | 3 | N/A | +| Llama 3.2 1B Instruct | Full | 1.5E-6 | 3 | N/A | +| Llama 3.2 3B Instruct | Full | 1.0E-7 | 3 | N/A | +| Llama 3.3 70B Instruct | PEFT | 5.0E-5 | 3 | 16 | +| Llama 3.3 70B Instruct | Full | 5.0E-5 | 3 | N/A | +| Llama 4 Scout 17B 16E | PEFT | 2.0E-5 | 3 | 16 | +| Qwen 3 4B | Full | 7.5e-5 | 3 | N/A | +| Qwen 3 8B | Full | 5e-5 | 3 | N/A | +| Qwen 3 14B | Full | 4e-5 | 3 | N/A | +| Qwen 3 32B | PEFT | 2.0E-4 | 3 | 16 | +| Qwen 3 32B | Full | 2.5e-5 | 3 | N/A | \ No newline at end of file diff --git a/.agents/skills/agent-platform-tuning/references/requirements.txt b/.agents/skills/agent-platform-tuning/references/requirements.txt new file mode 100644 index 0000000..3439875 --- /dev/null +++ b/.agents/skills/agent-platform-tuning/references/requirements.txt @@ -0,0 +1,7 @@ +google-cloud-aiplatform==1.138.0 +numpy==2.4.2 +pandas==3.0.1 +datasets==2.18.0 +smart_open[gcs]==7.5.1 +pyarrow==19.0.1 +google-genai==1.73.1 diff --git a/.agents/skills/agent-platform-tuning/references/tuning_guide.md b/.agents/skills/agent-platform-tuning/references/tuning_guide.md new file mode 100644 index 0000000..74e0736 --- /dev/null +++ b/.agents/skills/agent-platform-tuning/references/tuning_guide.md @@ -0,0 +1,39 @@ +# Agent Platform Model Tuning Heuristics and Concepts + +This guide details the core concepts of fine-tuning and provides heuristics for +adjusting hyperparameters based on your specific dataset. + +## Core Tuning Concepts + +### Open Models Tuning Modes + +- **FULL**: Updates all parameters of the model. Requires more GPU memory and a larger dataset to avoid catastrophic forgetting. +- **PEFT_ADAPTER**: Parameter-Efficient Fine-Tuning. Only a small set of "adapter" weights are trained. Faster, uses less memory, and is less prone to overfitting on small datasets. + +### Hyperparameters + +- **Epochs**: Number of times the model sees the entire dataset. +- **Learning Rate**: Step size for optimization. Too high can cause instability; too low can lead to very slow convergence. +- **Adapter Size (Rank)**: For PEFT_ADAPTER, this determines the capacity of the adapters. Higher rank allows more complex learning but increases the risk of overfitting. + +## Dataset Heuristics + +The size and quality of your dataset should dictate your parameter choices. Refer to [Models Catalog](models.md) for baseline values, then adjust as follows: + +### 1. Dataset Size Implications + +| Dataset Size | Tuning Mode Recommendation | Learning Rate Adjustment | Epochs Recommendation | +| :--- | :--- | :--- | :--- | +| **< 100 examples** | PEFT_ADAPTER (Rank 8) | Lower than baseline | 1-2 | +| **100 - 1000 examples** | PEFT_ADAPTER (Rank 16/32) | Baseline | 3 | +| **> 1000 examples** | FULL or PEFT_ADAPTER (Rank 32) | Higher than baseline | 3-5 | + +### 2. General Best Practices + +- **Overfitting**: If validation loss starts increasing while training loss decreases, you are overfitting. Reduce epochs or decrease the learning rate. +- **Underfitting**: If both training and validation loss remain high, increase the learning rate or use more epochs. +- **Validation**: Always use a validation set to monitor performance. If not provided, a 10-20% split is highly recommended. +- **Checkpoints**: The final model is always saved to `/postprocess/node-0/checkpoints/final`. + +## Hardware and Limitations +For specific hardware recommendations and sequence length limits per model, please refer to the [Models Catalog](models.md). diff --git a/.agents/skills/agent-platform-tuning/scripts/calculate_cost.py b/.agents/skills/agent-platform-tuning/scripts/calculate_cost.py new file mode 100644 index 0000000..bb44bb7 --- /dev/null +++ b/.agents/skills/agent-platform-tuning/scripts/calculate_cost.py @@ -0,0 +1,155 @@ +"""Calculate tuning cost for a given dataset and model.""" + +import argparse +import json +import sys +import smart_open + +# Data from +# https://docs.google.com/spreadsheets/d/1pOXzfQBSCaKJYcemvRKv4b30qmUBx3yG28yScH-pnVI/edit?resourcekey=0-0kJGshytd3yrxB41YM4OFg&gid=0#gid=0 +MODEL_DATA = { + 'Gemma 3 1B IT': { + 'Full': {'tokens_per_character': 0.231, 'cost_per_1m_tokens': 0.47}, + }, + 'Gemma 3 4B IT': { + 'Full': {'tokens_per_character': 0.231, 'cost_per_1m_tokens': 1.14}, + }, + 'Gemma 3 12B IT': { + 'Full': {'tokens_per_character': 0.231, 'cost_per_1m_tokens': 1.82}, + }, + 'Gemma 3 27B IT': { + 'PEFT': {'tokens_per_character': 0.231, 'cost_per_1m_tokens': 6.83}, + 'Full': {'tokens_per_character': 0.231, 'cost_per_1m_tokens': 6.83}, + }, + 'Llama 3.1 8B': { + 'PEFT': {'tokens_per_character': 0.317, 'cost_per_1m_tokens': 0.67}, + 'Full': {'tokens_per_character': 0.247, 'cost_per_1m_tokens': 0.67}, + }, + 'Llama 3.1 8B Instruct': { + 'PEFT': {'tokens_per_character': 0.317, 'cost_per_1m_tokens': 0.67}, + 'Full': {'tokens_per_character': 0.247, 'cost_per_1m_tokens': 0.67}, + }, + 'Llama 3.2 1B Instruct': { + 'Full': {'tokens_per_character': 0.247, 'cost_per_1m_tokens': 0.28}, + }, + 'Llama 3.2 3B Instruct': { + 'Full': {'tokens_per_character': 0.247, 'cost_per_1m_tokens': 0.61}, + }, + 'Llama 3.3 70B Instruct': { + 'PEFT': {'tokens_per_character': 0.317, 'cost_per_1m_tokens': 6.72}, + 'Full': {'tokens_per_character': 0.247, 'cost_per_1m_tokens': 6.72}, + }, + 'Llama 4 Scout 17B 16E': { + 'PEFT': {'tokens_per_character': 0.295, 'cost_per_1m_tokens': 5.77}, + }, + 'Qwen 3 4B': { + 'Full': {'tokens_per_character': 0.246, 'cost_per_1m_tokens': 1.35}, + }, + 'Qwen 3 8B': { + 'Full': {'tokens_per_character': 0.246, 'cost_per_1m_tokens': 4.18}, + }, + 'Qwen 3 14B': { + 'Full': {'tokens_per_character': 0.246, 'cost_per_1m_tokens': 8.46}, + }, + 'Qwen 3 32B': { + 'PEFT': {'tokens_per_character': 0.246, 'cost_per_1m_tokens': 6.57}, + 'Full': {'tokens_per_character': 0.246, 'cost_per_1m_tokens': 6.57}, + }, +} + + +def count_characters(input_file: str) -> int: + """Counts the characters in a jsonl dataset. + + It is expected that each line in the jsonl file is a json object + with a "messages" key, which is a list of dictionaries. Each + dictionary in the "messages" list should have a "content" key. + This function counts the characters in the "content" field of each + dictionary in the "messages" list. + + Args: + input_file: Path to the input jsonl file. + + Returns: + Total character count. + """ + total_character_count = 0 + if not input_file.startswith('gs://') and '://' in input_file: + raise ValueError( + f'Unsupported file path: {input_file}. ' + 'Only local paths and gs:// paths are supported.' + ) + with smart_open.smart_open(input_file, 'r') as f: + for line in f: + data = json.loads(line) + for message in data['messages']: + content = message['content'] + total_character_count += len(content) + return total_character_count + + +def calculate_cost( + count: int, + model: str, + tuning_mode: str, + epochs: int, +) -> float: + """Calculates the tuning cost. + + Args: + count: Total character count of the dataset. + model: Model to use for tuning. + tuning_mode: Tuning mode. + epochs: Number of epochs. + + Returns: + Estimated tuning cost. + """ + model_data = MODEL_DATA[model][tuning_mode] + tokens_per_character = model_data['tokens_per_character'] + cost_per_1m_tokens = model_data['cost_per_1m_tokens'] + num_tokens = count * tokens_per_character * epochs + return (num_tokens / 1000000) * cost_per_1m_tokens + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Calculate tuning cost for a given dataset and model.' + ) + parser.add_argument('--input', help='Input jsonl file.', required=True) + parser.add_argument( + '--model', + help='Model to use for tuning.', + required=True, + choices=MODEL_DATA.keys(), + ) + parser.add_argument( + '--tuning_mode', + help='Tuning mode.', + required=True, + choices=['PEFT', 'Full'], + ) + parser.add_argument( + '--epochs', + help='Number of epochs.', + required=True, + type=int, + ) + args = parser.parse_args() + + if ( + args.model not in MODEL_DATA + or args.tuning_mode not in MODEL_DATA[args.model] + ): + print( + f'Error: Tuning mode {args.tuning_mode} not supported for model' + f' {args.model}' + ) + sys.exit(1) + + character_count = count_characters(args.input) + cost = calculate_cost( + character_count, args.model, args.tuning_mode, args.epochs + ) + print(f'Total character count: {character_count}') + print(f'Estimated tuning cost: ${cost:.2f}') diff --git a/.agents/skills/agent-platform-tuning/scripts/cancel_tuning_job.py b/.agents/skills/agent-platform-tuning/scripts/cancel_tuning_job.py new file mode 100644 index 0000000..5f12e94 --- /dev/null +++ b/.agents/skills/agent-platform-tuning/scripts/cancel_tuning_job.py @@ -0,0 +1,30 @@ +"""Cancels a Agent Platform Supervised Tuning Job. + +This script provides a function to cancel an ongoing SupervisedTuningJob +on Agent Platform given the project, location, and job ID. +""" + +import argparse +import vertexai # pytype: disable=import-error +from vertexai.tuning import sft # pytype: disable=import-error + + +def cancel_job(project, location, job_id): + vertexai.init(project=project, location=location) + job_resource = f"projects/{project}/locations/{location}/tuningJobs/{job_id}" + job = sft.SupervisedTuningJob(job_resource) + print(f"Cancelling job: {job_resource}") + job.cancel() + print("Cancellation request sent.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Cancel Vertex AI Supervised Tuning Job" + ) + parser.add_argument("--project", required=True) + parser.add_argument("--location", required=True) + parser.add_argument("--job_id", required=True) + + args = parser.parse_args() + cancel_job(args.project, args.location, args.job_id) diff --git a/.agents/skills/agent-platform-tuning/scripts/monitor_tuning_job.py b/.agents/skills/agent-platform-tuning/scripts/monitor_tuning_job.py new file mode 100755 index 0000000..7f69bd0 --- /dev/null +++ b/.agents/skills/agent-platform-tuning/scripts/monitor_tuning_job.py @@ -0,0 +1,75 @@ +"""Monitors a Agent Platform Supervised Tuning Job.""" + +import argparse +import logging +import time + +from google import genai + + +def monitor_job( + project: str, location: str, job_id: str, poll_interval_secs: int = 60 +): + """Monitors a Agent Platform Supervised Tuning Job. + + This function polls the job status until it reaches a terminal state. + + Args: + project: The Google Cloud project ID. + location: The Google Cloud location. + job_id: The ID of the SupervisedTuningJob. + poll_interval_secs: The interval in seconds to poll the job status. + """ + job_resource = ( + f"projects/{project}/locations/{location}/tuningJobs/{job_id}" + ) + + logging.info("Starting monitoring for tuning job: %s", job_resource) + logging.info( + "View job in console: https://console.cloud.google.com/agent-platform/" + "tuning/locations/%s/tuningJob/%s/monitor?project=%s", + location, + job_id, + project, + ) + with genai.Client( + enterprise=True, project=project, location=location + ) as client: + while True: + job = client.tunings.get(name=job_resource) + status = job.state + status_name = status.name if status else "unknown" + + if status_name in ( + "JOB_STATE_SUCCEEDED", + "JOB_STATE_FAILED", + "JOB_STATE_CANCELLED", + "JOB_STATE_PARTIALLY_SUCCEEDED", + ): + logging.info("Job finished with terminal state: %s", status_name) + break + else: + logging.info("Current job status: %s", status_name) + + time.sleep(poll_interval_secs) + + +if __name__ == "__main__": + logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" + ) + parser = argparse.ArgumentParser( + description="Monitor Agent Platform Supervised Tuning Job" + ) + parser.add_argument("--project", required=True) + parser.add_argument("--location", required=True) + parser.add_argument("--job_id", required=True) + parser.add_argument( + "--poll_interval_secs", + type=int, + default=60, + help="Seconds to wait between polls", + ) + + args = parser.parse_args() + monitor_job(args.project, args.location, args.job_id, args.poll_interval_secs) diff --git a/.agents/skills/agent-platform-tuning/scripts/prepare_dataset.py b/.agents/skills/agent-platform-tuning/scripts/prepare_dataset.py new file mode 100755 index 0000000..395ff7f --- /dev/null +++ b/.agents/skills/agent-platform-tuning/scripts/prepare_dataset.py @@ -0,0 +1,274 @@ +"""Module for preparing and validating datasets for Agent Platform model tuning.""" + +import argparse +import json +import logging +import os +import sys +from typing import Any + +import datasets + + +def _validate_example(example: dict[str, Any], format_type: str) -> bool: + """Validates a single example against the expected format.""" + if format_type == "messages": + if "messages" not in example or not isinstance(example["messages"], list): + return False + for msg in example["messages"]: + if not all(k in msg for k in ("role", "content")): + return False + if not msg["content"] or str(msg["content"]).strip().lower() == "nan": + return False + else: + if not all(k in example for k in ("prompt", "completion")): + return False + for k in ("prompt", "completion"): + if not example[k] or str(example[k]).strip().lower() == "nan": + return False + return True + + +def _format_row( + row, format_type: str, prompt_col: str, completion_col: str +) -> dict[str, Any]: + """Formats a single row into the expected JSON structure.""" + prompt_text = str(row[prompt_col]) + completion_text = str(row[completion_col]) + + if format_type == "messages": + return { + "messages": [ + {"role": "user", "content": prompt_text}, + {"role": "assistant", "content": completion_text}, + ] + } + return { + "prompt": prompt_text, + "completion": completion_text, + } + + +def validate_jsonl(file_path: str, format_type: str) -> bool: + """Validates an existing JSONL file.""" + if not os.path.exists(file_path): + logging.error("File not found: %s", file_path) + return False + + valid_count = 0 + invalid_count = 0 + with open(file_path, "r", encoding="utf-8") as f: + for i, line in enumerate(f): + try: + example = json.loads(line) + if _validate_example(example, format_type): + valid_count += 1 + else: + invalid_count += 1 + logging.warning("Invalid format/empty content at line %d", i + 1) + except json.JSONDecodeError: + invalid_count += 1 + logging.warning("Invalid JSON at line %d", i + 1) + + logging.info("Validation complete for %s", file_path) + logging.info("Valid: %d, Invalid: %d", valid_count, invalid_count) + return invalid_count == 0 + + +def convert_to_jsonl( + input_file: str, + output_file: str, + format_type: str, + prompt_col: str, + completion_col: str, + validation_split: float | None = 0.2, +): + """Converts a file to JSONL format for Agent Platform tuning. + + Supports CSV, JSON, or Parquet files. + + Args: + input_file: Path to the input CSV, JSON, or Parquet file. + output_file: Path where the formatted JSONL file will be saved. + format_type: Target format ("messages" or "prompt"). + prompt_col: Name of the column containing the prompt/user message. + completion_col: Name of the column containing the completion/response. + validation_split: Optional fraction of data to split for validation. + """ + if not os.path.exists(input_file): + logging.error("Input file not found: %s", input_file) + sys.exit(1) + + try: + if input_file.endswith(".csv"): + dataset = datasets.load_dataset( + "csv", data_files=input_file, split="train" + ) + elif input_file.endswith(".json"): + dataset = datasets.load_dataset( + "json", data_files=input_file, split="train" + ) + elif input_file.endswith(".parquet"): + dataset = datasets.load_dataset( + "parquet", data_files=input_file, split="train" + ) + else: + logging.error("Unsupported file format. Use .csv, .json, or .parquet") + sys.exit(1) + except Exception as e: # pylint: disable=broad-exception-caught + logging.exception("Failed to read input file: %s", e) + sys.exit(1) + + for col in [prompt_col, completion_col]: + if col not in dataset.column_names: + logging.error( + "Column '%s' not found. Available columns: %s", + col, + dataset.column_names, + ) + sys.exit(1) + + # Remove rows with empty values in critical columns + def is_valid(example): + prompt_text = str(example[prompt_col]).strip().lower() + completion_text = str(example[completion_col]).strip().lower() + return ( + len(prompt_text) > 0 + and len(completion_text) > 0 + and prompt_text != "nan" + and completion_text != "nan" + and prompt_text != "none" + and completion_text != "none" + ) + + initial_len = len(dataset) + dataset = dataset.filter(is_valid) + if len(dataset) < initial_len: + logging.warning( + "Dropped %d rows with empty or NaN values", initial_len - len(dataset) + ) + + def format_example(example): + prompt_text = str(example[prompt_col]) + completion_text = str(example[completion_col]) + if format_type == "messages": + return { + "messages": [ + {"role": "user", "content": prompt_text}, + {"role": "assistant", "content": completion_text}, + ] + } + else: + return { + "prompt": prompt_text, + "completion": completion_text, + } + + formatted_dataset = dataset.map( + format_example, remove_columns=dataset.column_names + ) + + if validation_split and validation_split > 0: + if not (0 < validation_split < 1): + logging.error("validation_split must be between 0 and 1") + sys.exit(1) + + # Use the datasets library to perform the split as requested + split_dict = formatted_dataset.train_test_split( + seed=42, test_size=validation_split + ) + train_ds = split_dict["train"] + val_ds = split_dict["test"] + + val_output_file = output_file.replace(".jsonl", "_validation.jsonl") + if val_output_file == output_file: + val_output_file = output_file + ".validation.jsonl" + + train_ds.to_json(output_file, force_ascii=False, lines=True) + val_ds.to_json(val_output_file, force_ascii=False, lines=True) + + logging.info( + "Successfully saved %d training examples to %s", + len(train_ds), + output_file, + ) + logging.info( + "Successfully saved %d validation examples to %s", + len(val_ds), + val_output_file, + ) + else: + formatted_dataset.to_json(output_file, force_ascii=False, lines=True) + logging.info( + "Successfully saved %d examples to %s", + len(formatted_dataset), + output_file, + ) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") + parser = argparse.ArgumentParser( + description="Prepare or Validate dataset for Agent Platform Model Tuning" + ) + parser.add_argument( + "--input", + help="Input CSV, JSON, Parquet, or JSONL file", + ) + parser.add_argument( + "--output", + default="tuning_dataset.jsonl", + help="Output JSONL file (only for conversion)", + ) + parser.add_argument( + "--format", + choices=["messages", "prompt"], + default="messages", + help="Target format (messages or prompt/completion)", + ) + parser.add_argument( + "--prompt_col", + help="Column name for prompt/user message (CSV/JSON/Parquet only)", + ) + parser.add_argument( + "--completion_col", + help=( + "Column name for completion/assistant response (CSV/JSON/Parquet" + " only)" + ), + ) + parser.add_argument( + "--validation_split", + type=float, + default=0.2, + help="Fraction of data to use for validation (e.g. 0.2)", + ) + parser.add_argument( + "--validate_only", + action="store_true", + help="Only validate the input JSONL file without converting", + ) + + args = parser.parse_args() + + if args.validate_only: + if not args.input: + logging.error("--input is required for validation") + sys.exit(1) + success = validate_jsonl(args.input, args.format) + sys.exit(0 if success else 1) + else: + if not all([args.input, args.prompt_col, args.completion_col]): + logging.error( + "--input, --prompt_col, and --completion_col are required for" + " conversion" + ) + sys.exit(1) + convert_to_jsonl( + args.input, + args.output, + args.format, + args.prompt_col, + args.completion_col, + args.validation_split, + ) diff --git a/.agents/skills/agent-platform-tuning/scripts/tune_open_model.py b/.agents/skills/agent-platform-tuning/scripts/tune_open_model.py new file mode 100755 index 0000000..7a70e7c --- /dev/null +++ b/.agents/skills/agent-platform-tuning/scripts/tune_open_model.py @@ -0,0 +1,104 @@ +"""Module for launching Agent Platform model tuning jobs.""" + +import argparse +import logging + +from google import genai +from google.genai import types + + +def tune_open_model( + project: str, + location: str, + base_model: str, + train_dataset: str, + validation_dataset: str | None, + output_uri: str, + epochs: int, + learning_rate: float, + tuning_mode: str, + adapter_size: int | None = None, +) -> types.TuningJob: + """Launches a Agent Platform model tuning job.""" + training_ds = types.TuningDataset(gcs_uri=train_dataset) + validation_ds = ( + types.TuningValidationDataset(gcs_uri=validation_dataset) + if validation_dataset + else None + ) + + if tuning_mode == "FULL": + mapped_tuning_mode = types.TuningMode.TUNING_MODE_FULL + elif tuning_mode == "PEFT_ADAPTER": + mapped_tuning_mode = types.TuningMode.TUNING_MODE_PEFT_ADAPTER + else: + raise ValueError( + f"Unsupported tuning mode: {tuning_mode}. Supported modes are: FULL," + " PEFT_ADAPTER." + ) + adapter_map = { + 1: "ADAPTER_SIZE_ONE", + 2: "ADAPTER_SIZE_TWO", + 4: "ADAPTER_SIZE_FOUR", + 8: "ADAPTER_SIZE_EIGHT", + 16: "ADAPTER_SIZE_SIXTEEN", + 32: "ADAPTER_SIZE_THIRTY_TWO", + } + mapped_adapter = adapter_map.get(adapter_size) if adapter_size else None + + config = types.CreateTuningJobConfig( + epoch_count=epochs, + learning_rate=learning_rate, + validation_dataset=validation_ds, + tuning_mode=mapped_tuning_mode, + adapter_size=mapped_adapter, + output_uri=output_uri, + labels={"mg-source": "agent-platform-tuning-skill"}, + ) + with genai.Client( + enterprise=True, project=project, location=location + ) as client: + tuning_job = client.tunings.tune( + base_model=base_model, + training_dataset=training_ds, + config=config, + ) + + logging.info("Tuning job launched: %s", tuning_job.name) + job_id = tuning_job.name.split("/")[-1] if tuning_job.name else "unknown" + logging.info( + "View job in console:" + "https://console.cloud.google.com/agent-platform/tuning/locations/%s/tuningJob/%s/monitor?project=%s", + location, + job_id, + project, + ) + return tuning_job + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Launch Vertex AI Model Tuning Job" + ) + parser.add_argument("--project", required=True) + parser.add_argument("--location", required=True) + parser.add_argument("--base_model", required=True) + parser.add_argument("--train_dataset", required=True) + parser.add_argument( + "--validation_dataset", help="Optional validation dataset URI" + ) + parser.add_argument("--output_uri", required=True) + parser.add_argument("--epochs", type=int, required=True) + parser.add_argument("--learning_rate", type=float, required=True) + parser.add_argument( + "--tuning_mode", choices=("FULL", "PEFT_ADAPTER"), required=True + ) + parser.add_argument( + "--adapter_size", + type=int, + choices=(1, 4, 8, 16, 32), + help="Adapter size for PEFT", + ) + + args = parser.parse_args() + tune_open_model(**vars(args)) diff --git a/.agents/skills/alloydb-basics/SKILL.md b/.agents/skills/alloydb-basics/SKILL.md new file mode 100644 index 0000000..31c978c --- /dev/null +++ b/.agents/skills/alloydb-basics/SKILL.md @@ -0,0 +1,68 @@ +--- +name: alloydb-basics +description: >- + Manages clusters, instances, and backups for AlloyDB for PostgreSQL, and + integrates with AlloyDB model context protocol (MCP) tools for automated database operations. +--- + +# AlloyDB Basics + +AlloyDB for PostgreSQL is a managed, PostgreSQL-compatible database service +designed for enterprise-grade performance and availability. It utilizes a +disaggregated compute and storage architecture to scale resources independently. +It also provides AlloyDB AI, a collection of features that includes AI-powered +search (vector, hybrid search, and AI functions), natural language capabilities, +conversational analytics, and inference features like forecasting and model +endpoint management to help developers build AI apps faster. + +## Quick Start + +1. **Enable the AlloyDB API:** + + ```bash + gcloud services enable alloydb.googleapis.com --quiet + ``` + +2. **Create a Cluster:** + + ```bash + gcloud alloydb clusters create my-cluster --region=us-central1 \ + --password=my-password --network=my-vpc \ + --quiet + ``` + + *Note: For production, we recommend using IAM database authentication + instead of passwords. If passwords must be used, use secure secret + management (e.g., Secret Manager) instead of passing passwords in + cleartext.* + +3. **Create a Primary Instance:** + + ```bash + gcloud alloydb instances create my-primary --cluster=my-cluster \ + --region=us-central1 --instance-type=PRIMARY --cpu-count=2 \ + --quiet + ``` + +## Reference Directory + +- [Core Concepts](references/core-concepts.md): Architecture, disaggregated + storage, and performance features. + +- [CLI Usage](references/cli-usage.md): Essential `gcloud alloydb` commands + for cluster and instance management. + +- [Client Libraries & Connectors](references/client-library-usage.md): + Connecting to AlloyDB using Python, Java, Node.js, and Go. + +- [MCP Usage](references/mcp-usage.md): Using the AlloyDB remote MCP server + and Gemini CLI extension. + +- [Infrastructure as Code](references/iac-usage.md): Terraform + configuration and deployment examples. + +- [IAM & Security](references/iam-security.md): Predefined roles, service + agents, and database authentication. + +*If you need product information not found in these references, use the + Developer Knowledge MCP server `search_documents` tool.* \ No newline at end of file diff --git a/.agents/skills/alloydb-basics/references/cli-usage.md b/.agents/skills/alloydb-basics/references/cli-usage.md new file mode 100644 index 0000000..964cc24 --- /dev/null +++ b/.agents/skills/alloydb-basics/references/cli-usage.md @@ -0,0 +1,37 @@ +# AlloyDB CLI Usage + +AlloyDB resources are managed using the `gcloud alloydb` command group. + +## Clusters + +1. Create a cluster: `gcloud alloydb clusters create CLUSTER_ID --region=REGION + --password=PASSWORD` + +2. List clusters: `gcloud alloydb clusters list --region=REGION` + +3. Get cluster info: `gcloud alloydb clusters describe CLUSTER_ID + --region=REGION` + +4. Delete a cluster: `gcloud alloydb clusters delete CLUSTER_ID --region=REGION` + +## Instances + +1. Create a primary instance: `gcloud alloydb instances create INSTANCE_ID + --cluster=CLUSTER_ID --region=REGION --instance-type=PRIMARY --cpu-count=8` + +2. Create a read pool instance: `gcloud alloydb instances create INSTANCE_ID + --cluster=CLUSTER_ID --region=REGION --instance-type=READ_POOL + --read-pool-node-count=2 --cpu-count=2` + +3. List instances: `gcloud alloydb instances list --cluster=CLUSTER_ID + --region=REGION` + +4. Restart an instance: `gcloud alloydb instances restart INSTANCE_ID + --cluster=CLUSTER_ID --region=REGION` + +## Backups + +1. Create a backup: `gcloud alloydb backups create BACKUP_ID + --cluster=CLUSTER_ID --region=REGION` + +2. List backups: `gcloud alloydb backups list --region=REGION` diff --git a/.agents/skills/alloydb-basics/references/client-library-usage.md b/.agents/skills/alloydb-basics/references/client-library-usage.md new file mode 100644 index 0000000..f584c81 --- /dev/null +++ b/.agents/skills/alloydb-basics/references/client-library-usage.md @@ -0,0 +1,187 @@ +# AlloyDB Client Libraries & Connectors + +Google Cloud provides various ways to connect to AlloyDB idiomatically from +different programming languages. We optionally provide Client Libraries and +Connectors to facilitate secure authentication and connection from your clients +to your AlloyDB instances. These tools handle the management of SSL +certificates, firewall rules, and IAM Auth token automation. + +## AlloyDB Language Connectors + +Language connectors are libraries for Python, Java, and Go designed for +developers who prefer an integrated, driver-level experience over the +operational overhead of managing the Auth Proxy as a separate binary. + +### Python + +- **Installation:** + + ```bash + pip install "google-cloud-alloydb-connector[pg8000]" sqlalchemy + ``` + +- **Usage Example:** + + ```python + import sqlalchemy + from google.cloud.alloydbconnector import Connector + + INSTANCE_URI = "projects/MY_PROJECT/locations/MY_REGION/clusters/MY_CLUSTER/instances/MY_INSTANCE" + + with Connector() as connector: + pool = sqlalchemy.create_engine( + "postgresql+pg8000://", + creator=lambda: connector.connect( + INSTANCE_URI, + "pg8000", + user="my-user", + password="my-password", + db="my-db", + ), + ) + + with pool.connect() as conn: + result = conn.execute(sqlalchemy.text("SELECT NOW()")).fetchone() + print(result) + ``` + +### Java + +- **Maven Dependency:** + + ```xml + + com.google.cloud + alloydb-jdbc-connector + + + org.postgresql + postgresql + + + com.zaxxer + HikariCP + + ``` + +- **Configuring a Connection Pool:** + + We recommend using HikariCP for connection pooling. To use HikariCP with the + Java Connector, you will need to set the usual properties (e.g., JDBC URL, + username, password, etc) and you will need to set two Connector specific + properties: + + * `socketFactory` should be set to + `com.google.cloud.alloydb.SocketFactory` + * `alloydbInstanceName` should be set to the AlloyDB instance you want to + connect to, e.g.: + `projects//locations//clusters//instances/` + + Basic configuration of a data source looks like this: + + ```java + import com.zaxxer.hikari.HikariConfig; + import com.zaxxer.hikari.HikariDataSource; + + public class ExampleApplication { + + private HikariDataSource dataSource; + + public HikariDataSource getDataSource() { + HikariConfig config = new HikariConfig(); + + // There is no need to set a host on the JDBC URL + // since the Connector will resolve the correct IP address. + config.setJdbcUrl(String.format("jdbc:postgresql:///%s", System.getenv("ALLOYDB_DB"))); + config.setUsername(System.getenv("ALLOYDB_USER")); + config.setPassword(System.getenv("ALLOYDB_PASS")); + + // Tell the driver to use the AlloyDB Java Connector's SocketFactory + // when connecting to an instance/ + config.addDataSourceProperty("socketFactory", + "com.google.cloud.alloydb.SocketFactory"); + // Tell the Java Connector which instance to connect to. + config.addDataSourceProperty("alloydbInstanceName", + System.getenv("ALLOYDB_INSTANCE_NAME")); + + dataSource = new HikariDataSource(config); + return dataSource; + } + + // Use DataSource as usual ... + + } + ``` + + See [end to end + test](https://github.com/GoogleCloudPlatform/alloydb-java-connector/blob/main/jdbc/postgres/src/test/java/com/google/cloud/alloydb/postgres/PgJdbcIntegrationTests.java) + for a full example. + + See [About Pool + Sizing](https://github.com/brettwooldridge/HikariCP/wiki/About-Pool-Sizing) + for useful guidance on getting the best performance from a connection pool. + +### Go + +- **Installation:** + + ```bash + go get cloud.google.com/go/alloydbconn + ``` + +- **Usage Example:** + + ```go + package main + + import ( + "database/sql" + "fmt" + "log" + + "cloud.google.com/go/alloydbconn/driver/pgxv5" + ) + + func main() { + // Register the AlloyDB driver with the name "alloydb" + // Uses Private IP by default. See Network Options below for details. + cleanup, err := pgxv5.RegisterDriver("alloydb") + if err != nil { + log.Fatal(err) + } + defer cleanup() + + // Instance URI format: + // projects/PROJECT/locations/REGION/clusters/CLUSTER/instances/INSTANCE + db, err := sql.Open("alloydb", fmt.Sprintf( + "host=%s user=%s password=%s dbname=%s sslmode=disable", + "projects/my-project/locations/us-central1/clusters/my-cluster/instances/my-instance", + "my-user", + "my-password", + "my-db", + )) + if err != nil { + log.Fatal(err) + } + defer db.Close() + + var greeting string + if err := db.QueryRow("SELECT 'Hello, AlloyDB!'").Scan(&greeting); err != nil { + log.Fatal(err) + } + fmt.Println(greeting) + } + ``` + +## Standard PostgreSQL Drivers + +Since AlloyDB is PostgreSQL-compatible, you can also use standard drivers: + +- **Python:** `psycopg2`, `asyncpg`, `pg8000` + +- **Java:** `PostgreSQL JDBC Driver` + +- **Go:** `lib/pq`, `jackc/pgx` + +For more details, see: [AlloyDB +Connectors](https://cloud.google.com/alloydb/docs/connect-external). \ No newline at end of file diff --git a/.agents/skills/alloydb-basics/references/core-concepts.md b/.agents/skills/alloydb-basics/references/core-concepts.md new file mode 100644 index 0000000..03403e5 --- /dev/null +++ b/.agents/skills/alloydb-basics/references/core-concepts.md @@ -0,0 +1,50 @@ +# AlloyDB Core Concepts + +AlloyDB for PostgreSQL is a fully managed, PostgreSQL-compatible database +service designed for high performance, scale, and availability. It is built on +top of a cloud-native storage engine that separates compute from storage, +allowing for efficient scaling and high availability. + +AlloyDB is ideal for enterprise-grade transactional workloads, such as ERP or +CRM systems, as well as for analytical workloads that benefit from its columnar +engine, and vector workloads using its [vector search +capabilities](https://docs.cloud.google.com/alloydb/docs/ai/perform-vector-search). + +## Regional Availability + +AlloyDB is a regional service. A cluster consists of a primary instance and +optional read pool instances, all of which are located in the same region. The +storage is replicated across multiple zones within the region to ensure high +availability. + +## AlloyDB Auth Proxy + +The [AlloyDB Auth +Proxy](https://cloud.google.com/alloydb/docs/auth-proxy/connect) is a standalone +tool that can be deployed in any environment, and works by opening a local +socket and proxying connections to your AlloyDB instance. + +## Connectivity Options + +### Private vs Public IP + +When connecting to AlloyDB, you can use either a Private IP or a Public IP: + +- **Private IP:** Your client must be deployed either in the same VPC network + as your AlloyDB cluster (when using PSA), or have a PSC endpoint in your VPC + (when using PSC) to connect directly using Private IP. For indirect methods + of connecting outside your VPC, see [Enable private services + access](https://cloud.google.com/alloydb/docs/configure-connectivity). +- **Public IP:** If enabled on your instance, you can connect from outside the + VPC network. + +## Connection Pooling + +For production workloads, use connection poolers like **PgBouncer** (integrated +in AlloyDB) to manage high numbers of concurrent connections efficiently. + +## Pricing + +For up-to-date pricing information, visit the official [AlloyDB +Pricing](https://cloud.google.com/alloydb/pricing) page. Pricing is based on the +number of vCPUs and memory for each instance, as well as the storage used. diff --git a/.agents/skills/alloydb-basics/references/iac-usage.md b/.agents/skills/alloydb-basics/references/iac-usage.md new file mode 100644 index 0000000..25b8ad5 --- /dev/null +++ b/.agents/skills/alloydb-basics/references/iac-usage.md @@ -0,0 +1,135 @@ +# AlloyDB Infrastructure as Code Usage + +AlloyDB resources can be managed using Terraform via the Google Cloud Provider, +or via Kubernetes Config Connector (KCC). + +## Terraform + +### Resources + +1. `google_alloydb_cluster`: Manages an AlloyDB cluster. +2. `google_alloydb_instance`: Manages an AlloyDB instance within a cluster. + +### Example + +```terraform +data "google_project" "project" {} + +resource "google_compute_network" "default" { + name = "alloydb-network" +} + +resource "google_compute_global_address" "private_ip_alloc" { + name = "alloydb-cluster" + address_type = "INTERNAL" + purpose = "VPC_PEERING" + prefix_length = 16 + network = google_compute_network.default.id +} + +resource "google_service_networking_connection" "vpc_connection" { + network = google_compute_network.default.id + service = "servicenetworking.googleapis.com" + reserved_peering_ranges = [google_compute_global_address.private_ip_alloc.name] +} + +resource "google_alloydb_cluster" "default" { + cluster_id = "alloydb-cluster" + location = "us-central1" + network_config { + network = google_compute_network.default.id + } + + initial_user { + password = "alloydb-cluster" + } + + deletion_protection = false +} + +resource "google_alloydb_instance" "default" { + cluster = google_alloydb_cluster.default.name + instance_id = "alloydb-instance" + instance_type = "PRIMARY" + + machine_config { + cpu_count = 2 + } + + depends_on = [google_service_networking_connection.vpc_connection] +} +``` + +For more information, see the [Google Provider +Reference](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/alloydb_cluster). + +## Kubernetes Config Connector (KCC) + +### Resources + +1. `AlloyDBCluster`: Manages an AlloyDB cluster. +2. `AlloyDBInstance`: Manages an AlloyDB instance within a cluster. + +### Example + +```yaml +apiVersion: compute.cnrm.cloud.google.com/v1beta1 +kind: ComputeNetwork +metadata: + name: alloydb-network-kcc +spec: + routingMode: REGIONAL + autoCreateSubnetworks: false +--- +apiVersion: compute.cnrm.cloud.google.com/v1beta1 +kind: ComputeAddress +metadata: + name: alloydb-kcc-addr +spec: + location: global + addressType: INTERNAL + purpose: VPC_PEERING + prefixLength: 16 + networkRef: + name: alloydb-network-kcc +--- +apiVersion: servicenetworking.cnrm.cloud.google.com/v1beta1 +kind: ServiceNetworkingConnection +metadata: + name: alloydb-vpc-connection-kcc +spec: + networkRef: + name: alloydb-network-kcc + service: servicenetworking.googleapis.com + reservedPeeringRanges: + - name: alloydb-kcc-addr +--- +apiVersion: alloydb.cnrm.cloud.google.com/v1beta1 +kind: AlloyDBCluster +metadata: + name: alloydb-cluster-kcc +spec: + location: us-central1 + networkConfig: + networkRef: + name: alloydb-network-kcc + initialUser: + password: + valueFrom: + secretKeyRef: + name: alloydb-secret + key: password +--- +apiVersion: alloydb.cnrm.cloud.google.com/v1beta1 +kind: AlloyDBInstance +metadata: + name: alloydb-instance-kcc +spec: + clusterRef: + name: alloydb-cluster-kcc + instanceType: PRIMARY + machineConfig: + cpuCount: 2 +``` + +For more information, see the [Config Connector resources](https://docs.cloud.google.com/config-connector/docs/reference/overview). diff --git a/.agents/skills/alloydb-basics/references/iam-security.md b/.agents/skills/alloydb-basics/references/iam-security.md new file mode 100644 index 0000000..5babc39 --- /dev/null +++ b/.agents/skills/alloydb-basics/references/iam-security.md @@ -0,0 +1,93 @@ +# AlloyDB IAM & Security + +AlloyDB utilizes Google Cloud Identity and Access Management (IAM) to provide +granular access control and robust security features. + +## Predefined IAM Roles + +The following table describes the predefined roles available for AlloyDB: + +| Role Name | Usage | +| :--- | :--- | +| `roles/alloydb.admin` | Full control of all AlloyDB resources. | +| `roles/alloydb.client` | Connectivity access to AlloyDB instances. | +| `roles/alloydb.databaseUser` | Authenticated database-user access to instances. | +| `roles/alloydb.viewer` | Read-only access to all AlloyDB resources. | + +## Secure Connectivity + +1. **Network Security:** + - **Private IP:** Keeps traffic internal to Google Cloud. + - **Private Service Connect (PSC):** Recommended for new + configurations. Offers enhanced security, better IP management, and + flexible multi-VPC topologies without peering. + - **Private Services Access (PSA):** Uses VPC peering. + - **Public IP:** Allows connections from outside GCP. + - **ALWAYS** use with **Authorized Networks** to restrict access to + specific IP ranges. + - **NEVER** use `0.0.0.0/0` in Authorized Networks. + - **VPC Service Controls (VPC-SC):** Define security perimeters around + your AlloyDB instances to prevent data exfiltration. + +1. **Encryption:** + - **In Transit:** TLS encryption is enforced by default for all + connections. + - **At Rest:** Data is always encrypted, using Google-managed keys by + default. Customer-Managed Encryption Keys (CMEK) are supported for + greater control. + +1. **Authentication & Authorization:** + - **IAM Authentication:** **Recommended.** Uses Google Cloud Identity and + Access Management (IAM) principals (users/service accounts) and + short-lived OAuth tokens. Centralizes access control and avoids database + passwords. + - **Built-in Database Authentication:** Standard PostgreSQL + username/password. Use strong password policies if this method is + chosen. + - **Database Authorization:** Standard PostgreSQL GRANT / REVOKE + statements manage permissions within the database, regardless of the + authentication method. + +1. **Connection Methods:** + - **Direct Connection:** Standard PostgreSQL connection. + - **AlloyDB Connectors (Auth Proxy & Language Libraries):** **Highly + Recommended for security.** + - Provide connection-level **IAM Authorization**. + - Automatically use **mTLS** (Mutual TLS) for encrypted and + authenticated connections. + - Simplify using IAM Database Authentication (tokens are handled + automatically). + - Essential for secure Public IP access without wide open Authorized + Networks. + - Best practice: Run Auth Proxy as a sidecar to the application. + +**Key Recommendations for Agents:** + +- **Prioritize Private IP:** Guide users to use Private IP (especially PSC) + over Public IP whenever their application is within GCP. +- **Push AlloyDB Connectors:** Strongly recommend AlloyDB Connectors (Auth + Proxy or language libraries) because they enhance security through IAM + connection authorization and mTLS, especially crucial for Public IP. +- **IAM Authentication is Preferred:** Encourages centralized management and + token-based auth. +- **Secure Public IP:** If Public IP is necessary, stress the absolute need + for tightly restricted Authorized Networks. +- **Leverage Cloud Security Tools:** Remind users to use VPC-SC and Security + Command Center for monitoring and policy enforcement. + +## Data Security + +- **Encryption at Rest:** All data is encrypted by default. Use Customer-Managed + Encryption Keys (CMEK) for greater control. + +- **IAM Database Authentication:** Authenticate to the database using IAM + identities (users or service accounts) instead of static passwords. + +## Service Agents + +AlloyDB uses a managed service agent +(`service-PROJECT_NUMBER@gcp-sa-alloydb.iam.gserviceaccount.com`) to manage +resources like storage and backups. Ensure this agent has the necessary +permissions in your project. + +For more information, see: [Security, privacy, risk, and compliance for AlloyDB for PostgreSQL](https://docs.cloud.google.com/alloydb/docs/security-privacy-compliance). diff --git a/.agents/skills/alloydb-basics/references/mcp-usage.md b/.agents/skills/alloydb-basics/references/mcp-usage.md new file mode 100644 index 0000000..0375abb --- /dev/null +++ b/.agents/skills/alloydb-basics/references/mcp-usage.md @@ -0,0 +1,29 @@ +# AlloyDB MCP Usage + +AlloyDB supports a remote Model Context Protocol (MCP) server, allowing AI +applications to interact with AlloyDB resources. + +## Endpoint + +The AlloyDB MCP server endpoint is regional: +`https://alloydb.REGION.rep.googleapis.com/mcp` + +Replace `REGION` with the regional location of the endpoint (e.g., +`us-central1`). + +## Setup and Authentication + +1. Enable the AlloyDB API in your project. +2. Grant the `roles/mcp.toolUser` role to the principal making the tool calls. +3. Configure your MCP host to point to the regional endpoint. + +For more details, see the [Use the AlloyDB remote MCP +server](https://cloud.google.com/alloydb/docs/ai/use-alloydb-mcp) guide. + +## Resources + +- [AlloyDB MCP Reference](https://cloud.google.com/alloydb/docs/reference/mcp) +- [MCP Toolbox](https://mcp-toolbox.dev/): An open-source alternative to the remote MCP server that runs on a local machine or IDE. + - [MCP Toolbox AlloyDB Integration](https://mcp-toolbox.dev/integrations/alloydb/source/) + - [Configure your MCP client](https://docs.cloud.google.com/alloydb/docs/connect-ide-using-mcp-toolbox#configure-your-mcp-client) +- For additional specialized skills including health auditing, performance monitoring, and lifecycle management, install the [AlloyDB for PostgreSQL](https://github.com/gemini-cli-extensions/alloydb) Gemini CLI extension or Claude Plugin. diff --git a/.agents/skills/bigquery-basics/SKILL.md b/.agents/skills/bigquery-basics/SKILL.md new file mode 100644 index 0000000..115116d --- /dev/null +++ b/.agents/skills/bigquery-basics/SKILL.md @@ -0,0 +1,109 @@ +--- +name: bigquery-basics +description: >- + Manages datasets, tables, and jobs in BigQuery, and integrates with BigQuery + ML and Gemini for advanced data analytics and AI-driven insights. Use when + you need to interact with BigQuery, run SQL queries, manage BigQuery + resources, or leverage BigQuery's built-in ML capabilities. Also use when + performing data analysis, ingesting data into BigQuery, or developing AI + applications on BigQuery. +--- + +# BigQuery Basics + +BigQuery is a serverless, AI-ready data platform that enables high-speed +analysis of large datasets using SQL and Python. Its disaggregated architecture +separates compute and storage, allowing them to scale independently while +providing built-in machine learning, geospatial analysis, and business +intelligence capabilities. + +## Setup and Basic Usage + +1. **Enable the BigQuery API:** + ```bash + gcloud services enable bigquery.googleapis.com --quiet + ``` + +2. **Create a Dataset:** + ```bash + bq mk --dataset --location=US my_dataset + ``` + +3. **Create a Table:** + + Create a file named `schema.json` with your table schema: + + ```json + [ + { + "name": "name", + "type": "STRING", + "mode": "REQUIRED" + }, + { + "name": "post_abbr", + "type": "STRING", + "mode": "NULLABLE" + } + ] + ``` + + Then create the table with the `bq` tool: + + ```bash + bq mk --table my_dataset.mytable schema.json + ``` + +4. **Run a Query:** + ```bash + bq query --use_legacy_sql=false \ + 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` \ + WHERE state = "TX" LIMIT 10' + ``` + +## Reference Directory + +- [Core Concepts](references/core-concepts.md): Storage types, analytics + workflows, and BigQuery Studio features. + +- [CLI Usage](references/cli-usage.md): Essential `bq` command-line tool + operations for managing data and jobs. + +- [Client Libraries](references/client-library-usage.md): Using Google Cloud + client libraries for Python, Java, Node.js, and Go. + +- [MCP Usage](references/mcp-usage.md): Using the BigQuery remote MCP server and + Gemini CLI extension. + +- [Infrastructure as Code](references/iac-usage.md): Terraform examples for + datasets, tables, and reservations. + +- [IAM & Security](references/iam-security.md): Roles, permissions, and data + governance best practices. + +- [AI Forecast](references/bigquery_ai_forecast.md): Leveraging pre-trained + TimesFM model for forecasting without custom training. + +- [AI Detect Anomalies](references/bigquery_ai_detect_anomalies.md): Identify + deviations in time series data using pre-trained TimesFM model. + +- [AI Generate](references/bigquery_ai_generate.md): General-purpose text and + content generation using Gemini models. + +*If you need product information not found in these references, use the +Developer Knowledge MCP server `search_documents` tool.* + +## Related Skills + +- [BigQuery AI & ML Skill](https://github.com/google/adk-python/tree/main/src/google/adk/tools/bigquery/skills/bigquery-ai-ml): + SKILL.md file for BigQuery AI and ML capabilities. +- [BigQuery AI & ML References](https://github.com/google/adk-python/tree/main/src/google/adk/tools/bigquery/skills/bigquery-ai-ml/references): + Reference files published for the BigQuery AI and ML skill. + - [bigquery_ai_classify.md](https://github.com/google/adk-python/blob/main/src/google/adk/tools/bigquery/skills/bigquery-ai-ml/references/bigquery_ai_classify.md) + - [bigquery_ai_generate_bool.md](https://github.com/google/adk-python/blob/main/src/google/adk/tools/bigquery/skills/bigquery-ai-ml/references/bigquery_ai_generate_bool.md) + - [bigquery_ai_generate_double.md](https://github.com/google/adk-python/blob/main/src/google/adk/tools/bigquery/skills/bigquery-ai-ml/references/bigquery_ai_generate_double.md) + - [bigquery_ai_generate_int.md](https://github.com/google/adk-python/blob/main/src/google/adk/tools/bigquery/skills/bigquery-ai-ml/references/bigquery_ai_generate_int.md) + - [bigquery_ai_if.md](https://github.com/google/adk-python/blob/main/src/google/adk/tools/bigquery/skills/bigquery-ai-ml/references/bigquery_ai_if.md) + - [bigquery_ai_score.md](https://github.com/google/adk-python/blob/main/src/google/adk/tools/bigquery/skills/bigquery-ai-ml/references/bigquery_ai_score.md) + - [bigquery_ai_search.md](https://github.com/google/adk-python/blob/main/src/google/adk/tools/bigquery/skills/bigquery-ai-ml/references/bigquery_ai_search.md) + - [bigquery_ai_similarity.md](https://github.com/google/adk-python/blob/main/src/google/adk/tools/bigquery/skills/bigquery-ai-ml/references/bigquery_ai_similarity.md) diff --git a/.agents/skills/bigquery-basics/references/bigquery_ai_detect_anomalies.md b/.agents/skills/bigquery-basics/references/bigquery_ai_detect_anomalies.md new file mode 100644 index 0000000..43b4b69 --- /dev/null +++ b/.agents/skills/bigquery-basics/references/bigquery_ai_detect_anomalies.md @@ -0,0 +1,101 @@ +# BigQuery AI.Detect_Anomalies + +`AI.DETECT_ANOMALIES` uses the pre-trained **TimesFM** model to identify +deviations in time series data without needing to train a custom model. + +## Syntax Reference + +This function compares a target dataset against a historical dataset to identify +anomalies. + +```sql +SELECT * +FROM AI.DETECT_ANOMALIES( + { TABLE `project.dataset.history_table` | (SELECT * FROM history_query) }, + { TABLE `project.dataset.target_table` | (SELECT * FROM target_query) }, + data_col => 'DATA_COL', + timestamp_col => 'TIMESTAMP_COL' + [, model => 'MODEL'] + [, id_cols => ID_COLS] + [, anomaly_prob_threshold => ANOMALY_PROB_THRESHOLD] +) + +``` + +### Input Arguments + +Argument | Requirement | Type | Description +:--------------------------- | :----------- | :------------ | :---------- +**`historical_data`** | **Required** | Table/Query | The source table or subquery containing historical data for training context. +**`target_data`** | **Required** | Table/Query | The source table or subquery containing data to analyze for anomalies. +**`data_col`** | **Required** | String | The numeric column to analyze. +**`timestamp_col`** | **Required** | String | The column containing dates/timestamps. +**`id_cols`** | Optional | Array | Grouping columns for multiple series (e.g., `['store_id']`). +**`anomaly_prob_threshold`** | Optional | Float64 | Threshold for anomaly detection (0 to 1). Defaults to 0.95. +**`model`** | Optional | String | Model version. Defaults to `'TimesFM 2.0'`. + +### Output Schema + +| Column | Type | Description | +| :--- | :--- | :--- | +| **`id_cols`** | (As Input) | Original identifiers for the series. | +| **`time_series_timestamp`** | TIMESTAMP | Timestamp for the analyzed points. | +| **`time_series_data`** | FLOAT64 | The original data value. | +| **`is_anomaly`** | BOOL | TRUE if the point is identified as an anomaly. | +| **`lower_bound`** | FLOAT64 | Lower bound of the expected range. | +| **`upper_bound`** | FLOAT64 | Upper bound of the expected range. | +| **`anomaly_probability`** | FLOAT64 | Probability that the point is an anomaly. | +| **`ai_detect_anomalies_status`** | STRING | Error messages or empty string on success. A minimum of 3 data points is required. | + +## Examples + +### Basic Anomaly Detection + +Detect anomalies in daily bike trips for a specific 2-month window based on +prior history. + +```sql +WITH bike_trips AS ( + SELECT EXTRACT(DATE FROM starttime) AS date, COUNT(*) AS num_trips + FROM `bigquery-public-data.new_york.citibike_trips` + GROUP BY date +) +SELECT * +FROM AI.DETECT_ANOMALIES( + -- Historical context (Training data equivalent) + (SELECT * FROM bike_trips WHERE date <= DATE('2016-06-30')), + -- Target range (Data to inspect for anomalies) + (SELECT * FROM bike_trips WHERE date BETWEEN '2016-07-01' AND '2016-09-01'), + data_col => 'num_trips', + timestamp_col => 'date' +); + +``` + +### Multivariate Detection (Multiple Series) + +Use `id_cols` to detect anomalies separately for different user types (e.g., +Subscriber vs. Customer) in the same query. + +```sql +WITH bike_trips AS ( + SELECT + EXTRACT(DATE FROM starttime) AS date, usertype, gender, + COUNT(*) AS num_trips + FROM `bigquery-public-data.new_york.citibike_trips` + GROUP BY date, usertype, gender + ) +SELECT * +FROM + AI.DETECT_ANOMALIES( + # Historical data from a query + (SELECT * FROM bike_trips WHERE date <= DATE('2016-06-30')), + # Target data from a query + (SELECT * FROM bike_trips WHERE date BETWEEN '2016-07-01' AND '2016-09-01'), + data_col => 'num_trips', + timestamp_col => 'date', + id_cols => ['usertype', 'gender'], + model => "TimesFM 2.5", + anomaly_prob_threshold => 0.8); + +``` diff --git a/.agents/skills/bigquery-basics/references/bigquery_ai_forecast.md b/.agents/skills/bigquery-basics/references/bigquery_ai_forecast.md new file mode 100644 index 0000000..375d99a --- /dev/null +++ b/.agents/skills/bigquery-basics/references/bigquery_ai_forecast.md @@ -0,0 +1,75 @@ +# BigQuery AI.Forecast + +`AI.FORECAST` leverages the pre-trained **TimesFM** foundation model to generate +forecasts without the need to train and manage custom models. + +## Syntax Reference + +```sql +SELECT + * +FROM + AI.FORECAST( + { TABLE `project.dataset.table` | (QUERY_STATEMENT) }, + data_col => 'DATA_COL', + timestamp_col => 'TIMESTAMP_COL' + [, model => 'MODEL'] + [, id_cols => ID_COLS] + [, horizon => HORIZON] + [, confidence_level => CONFIDENCE_LEVEL] + [, output_historical_time_series => OUTPUT_HISTORICAL_TIME_SERIES] + [, context_window => CONTEXT_WINDOW] + ) +``` + +### Input Arguments + +| Argument | Requirement | Type | Description | +| :--------------------- | :----------- | :------------ | :---------------- | +| **`input_data`** | **Required** | | The source table or subquery containing historical data. | +| **`data_col`** | **Required** | String | The numeric column to predict. | +| **`timestamp_col`** | **Required** | String | The column containing dates/timestamps. | +| **`id_cols`** | Optional | Array | Grouping columns for multiple series (e.g., `['store_id']`). | +| **`horizon`** | Optional | Int64 | Number of future points to predict. Defaults to 10. The valid input range is [1, 10,000]. | +| **`confidence_level`** | Optional | Float64 | Confidence interval (0 to 1). Defaults to 0.95. | +| **`model`** | Optional | String | Model version. Defaults to `TimesFM 2.0`. | +| **`context_window`** | Optional | Int64 | The number of historical data points the model uses to forecast. The min value is 64 and the max value is 2048 for `TimesFM 2.0`. If not set, the model determines this automatically. | + +### Output Schema + +The schema adjusts based on the `output_historical_time_series` flag. + +Column | Type | Included if output_historical_time_series=FALSE | Included if output_historical_time_series=TRUE | Description +:------------------------------------ | :--------- | :---------------------------------------------- | :--------------------------------------------- | :---------- +**`id_cols`** | (As Input) | Yes | Yes | Original identifiers for the series. +**`forecast_timestamp`** | TIMESTAMP | **Yes** | No | Timestamp for predicted points. +**`forecast_value`** | FLOAT64 | **Yes** | No | The 50% quantile (median) prediction. +**`time_series_timestamp`** | TIMESTAMP | No | **Yes** | Uniform timestamp column for both history and forecast. +**`time_series_data`** | FLOAT64 | No | **Yes** | Merged column: actual values for history, median for forecast. +**`time_series_type`** | STRING | No | **Yes** | Label: `'history'` or `'forecast'`. +**`prediction_interval_lower_bound`** | FLOAT64 | Yes | Yes | Lower bound (NULL for historical rows). +**`prediction_interval_upper_bound`** | FLOAT64 | Yes | Yes | Upper bound (NULL for historical rows). +**`confidence_level`** | FLOAT64 | Yes | Yes | The constant confidence level used. +**`ai_forecast_status`** | STRING | Yes | Yes | Error messages or empty string on success. A minimum of 3 data points is required. + +## Examples + +### Forecasting with History + +```sql +WITH + citibike_trips AS ( + SELECT EXTRACT(DATE FROM starttime) AS date, usertype, COUNT(*) AS num_trips + FROM `bigquery-public-data.new_york.citibike_trips` + GROUP BY date, usertype + ) +SELECT * +FROM + AI.FORECAST( + TABLE citibike_trips, + data_col => 'num_trips', + timestamp_col => 'date', + id_cols => ['usertype'], + horizon => 30, + output_historical_time_series => true); +``` diff --git a/.agents/skills/bigquery-basics/references/bigquery_ai_generate.md b/.agents/skills/bigquery-basics/references/bigquery_ai_generate.md new file mode 100644 index 0000000..20c7238 --- /dev/null +++ b/.agents/skills/bigquery-basics/references/bigquery_ai_generate.md @@ -0,0 +1,84 @@ +# BigQuery AI.Generate + +`AI.GENERATE` is a general-purpose function for text and content generation. + +## Syntax Reference + +```sql +AI.GENERATE( + [ prompt => ] 'PROMPT', + [, endpoint => 'ENDPOINT'] + [, model_params => 'MODEL_PARAMS'] + [, output_schema => 'OUTPUT_SCHEMA'] + [, connection_id => 'CONNECTION_ID'] + [, request_type => 'REQUEST_TYPE'] +) +``` + +### Input Arguments + +| Argument | Requirement | Type | Description | +| :------------------ | :----------- | :----- | :-------------------- | +| **`prompt`** | **Required** | String | The prompt text or instruction for the model. | +| **`connection_id`** | Optional | String | The connection ID. Optional if configured via other means or testing. | +| **`endpoint`** | Optional | String | The model name, e.g., `'gemini-2.5-flash'`. | +| **`output_schema`** | Optional | String | Schema definition for structured output, e.g., `'answer BOOL, reason STRING'`. | +| **`request_type`** | Optional | String | `'DEDICATED'` or `'SHARED'`. | +| **`model_params`** | Optional | JSON | JSON object for model parameters (e.g., `temperature`, `max_output_tokens`). | + +### Output Schema + +Returns a `STRUCT` with the following fields: + +| Column Name | Type | Description | +| :------------------ | :------------------- | :----------------------------- | +| **`result`** | `STRING` (or Custom) | The generated content. If `output_schema` is used, this field is replaced by the schema's fields. | +| **`status`** | `STRING` | API response status (empty on success). | +| **`full_response`** | `JSON` | The complete raw JSON response from the model (including safety ratings, usage metadata). | + +## Examples + +### Basic Text Generation + +```sql +SELECT + AI.GENERATE( + 'Summarize this article: ' || article_content, + connection_id => 'my-project.us.my-connection', + endpoint => 'gemini-2.5-flash' + ) as summary +FROM `dataset.articles` +LIMIT 5; +``` + +### Structured Output Generation + +```sql +SELECT + AI.GENERATE( + 'Extract the date and amount from this invoice: ' || invoice_text, + output_schema => 'date DATE, amount FLOAT64' + ) as extracted_data +FROM `dataset.invoices`; +``` + +### Process images in a Cloud Storage bucket + +```sql +CREATE SCHEMA IF NOT EXISTS bqml_tutorial; + +CREATE OR REPLACE EXTERNAL TABLE bqml_tutorial.product_images + WITH CONNECTION DEFAULT OPTIONS ( + object_metadata = 'SIMPLE', + uris = ['gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*.png']); + +SELECT + uri, + STRING(OBJ.GET_ACCESS_URL(ref,'r').access_urls.read_url) AS signed_url, + AI.GENERATE( + ("What is this: ", OBJ.GET_ACCESS_URL(ref, 'r')), + output_schema => + "image_description STRING, entities_in_the_image ARRAY").* +FROM bqml_tutorial.product_images +WHERE uri LIKE "%aquarium%"; +``` diff --git a/.agents/skills/bigquery-basics/references/cli-usage.md b/.agents/skills/bigquery-basics/references/cli-usage.md new file mode 100644 index 0000000..3ed2a26 --- /dev/null +++ b/.agents/skills/bigquery-basics/references/cli-usage.md @@ -0,0 +1,111 @@ +# BigQuery CLI Usage + +The `bq` command-line tool is used to interact with BigQuery for managing +resources and running jobs. + +## Basic Syntax + +```bash +bq COMMAND [FLAGS] [ARGUMENTS] +``` + +## Essential Commands + +### Dataset Management + +- **Create a dataset:** + + ```bash + bq mk --dataset --location=us my_dataset + ``` + +- **List datasets:** + + ```bash + bq ls --project_id my_project + ``` + +### Table Management + +- **Create a table from a schema file:** + + ```bash + bq mk --table my_dataset.my_table schema.json + ``` + +- **Copy a table within or across datasets:** + + ```bash + bq cp my_dataset.my_table my_other_dataset.my_table_copy + ``` + +- **Create a table snapshot (read-only copy):** + + ```bash + bq cp --snapshot --no_clobber my_dataset.my_table my_other_dataset.my_table_snapshot + ``` + +- **Load data from Cloud Storage (CSV):** + + ```bash + bq load --source_format=CSV my_dataset.my_table gs://my-bucket/data.csv + ``` + +- **Stream data into a table from a newline-delimited JSON file:** + + ```bash + bq insert my_dataset.my_table data.json + ``` + +- **Delete a table:** + + ```bash + bq rm -f my_dataset.my_table + ``` + +### Querying Data + +- **Run a standard SQL query:** + + ```bash + bq query --use_legacy_sql=false \ + 'SELECT count(*) FROM `my_project.my_dataset.my_table`' + ``` + +- **Run a dry run to estimate bytes processed:** + + ```bash + bq query --use_legacy_sql=false --dry_run \ + 'SELECT * FROM `my_project.my_dataset.my_table`' + ``` + +### Job Management + +- **List recent jobs:** + + ```bash + bq ls -j + ``` + +- **Show job details:** + + ```bash + bq show -j job_id + ``` + +- **Cancel a job:** + + ```bash + bq cancel job_id + ``` + +## Global Flags + +- `--location`: Specifies the geographic location for the job or resource. + +- `--project_id`: Overrides the default project for the command. + +- `--format`: Changes output format (e.g., `prettyjson`, `sparse`, `csv`). + +For the complete BigQuery CLI reference guide, visit: +[bq command-line tool reference](https://docs.cloud.google.com/bigquery/docs/reference/bq-cli-reference). diff --git a/.agents/skills/bigquery-basics/references/client-library-usage.md b/.agents/skills/bigquery-basics/references/client-library-usage.md new file mode 100644 index 0000000..512ff99 --- /dev/null +++ b/.agents/skills/bigquery-basics/references/client-library-usage.md @@ -0,0 +1,99 @@ +# BigQuery Client Libraries + +Google Cloud client libraries provide an idiomatic way to interact with BigQuery +from your preferred programming language. + +## Getting Started + +To use the client libraries, ensure you have the Google Cloud SDK installed and +authenticated. +[Install Google Cloud SDK](https://cloud.google.com/sdk/docs/install) + +### Python + +- **Installation:** + + ```bash + pip install --upgrade google-cloud-bigquery + ``` + +- **Usage Example:** + + ```python + from google.cloud import bigquery + client = bigquery.Client() + query_job = client.query("SELECT * FROM `project.dataset.table` LIMIT 10") + results = query_job.result() + ``` + +- [Python Reference](https://docs.cloud.google.com/python/docs/reference/bigquery/latest) + +### Java + +- **Maven Dependency:** + + ```xml + + com.google.cloud + google-cloud-bigquery + + ``` + +- **Usage Example:** + + ```java + BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService(); + QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder( + "SELECT * FROM dataset.table").build(); + TableResult results = bigquery.query(queryConfig); + ``` + +- [Java Reference](https://docs.cloud.google.com/java/docs/reference/google-cloud-bigquery/latest/overview) + +### Node.js (TypeScript) + +- **Installation:** + + ```bash + npm install @google-cloud/bigquery + ``` + +- **Usage Example:** + + ```typescript + import {BigQuery} from '@google-cloud/bigquery'; + const bigquery = new BigQuery(); + const [rows] = await bigquery.query('SELECT * FROM dataset.table'); + ``` + +- [Node.js Reference](https://googleapis.dev/nodejs/bigquery/latest/index.html) + +### Go + +- **Installation:** + + ```bash + go get cloud.google.com/go/bigquery + ``` + +- **Usage Example:** + + ```go + ctx := context.Background() + client, _ := bigquery.NewClient(ctx, "project-id") + q := client.Query("SELECT * FROM dataset.table") + it, _ := q.Read(ctx) + ``` + +- [Go Reference](https://docs.cloud.google.com/go/docs/reference/cloud.google.com/go/bigquery/latest) + +## BigQuery DataFrames (BigFrames) + +For Python users, `bigframes` provides a pandas-like API that executes directly +in BigQuery. + +```bash +pip install --upgrade bigframes +``` + +- [BigFrames Guide](https://dataframes.bigquery.dev/) diff --git a/.agents/skills/bigquery-basics/references/core-concepts.md b/.agents/skills/bigquery-basics/references/core-concepts.md new file mode 100644 index 0000000..8e16da8 --- /dev/null +++ b/.agents/skills/bigquery-basics/references/core-concepts.md @@ -0,0 +1,83 @@ +# BigQuery Core Concepts + +BigQuery is a fully managed, AI-ready data platform that helps you manage and +analyze your data with built-in features like machine learning, search, +geospatial analysis, and business intelligence. BigQuery's serverless +architecture lets you use languages like SQL and Python to answer your +organization's biggest questions with zero infrastructure management. + +BigQuery provides a uniform way to work with both structured and unstructured +data and supports open table formats like Apache Iceberg. BigQuery streaming +supports continuous data ingestion and analysis while BigQuery's scalable, +distributed analysis engine lets you query terabytes in seconds and petabytes in +minutes. + +## Architecture + +BigQuery's architecture separates compute and storage, connected by a +petabit-scale network. + +- **BigQuery Storage:** A columnar storage format optimized for analytical + queries. It can be replicated across multiple locations for high + availability. + +- **BigQuery Analytics:** A scalable, distributed analysis engine that can + process data in BigQuery and in external sources. + +## Resource Hierarchy + +BigQuery organizes resources in a structured hierarchy: + +1. **Organization/Folder/Project:** Standard Google Cloud resource containers. +2. **Dataset:** The top-level container for tables and views. +3. **Table/View:** The basic unit of data storage and logical representation. + +## Analytics Workflows + +- **Ad Hoc Analysis:** Using GoogleSQL for interactive queries. + +- **Geospatial Analysis:** Analyzing and visualizing spatial data using + geography types. + +- **Machine Learning (BigQuery ML):** Creating and executing ML models + directly in BigQuery using SQL. + +- **Gemini in BigQuery:** AI-powered assistance for data preparation, SQL + generation, and visualization. Refer to the [Gemini + Models](https://ai.google.dev/gemini-api/docs/models) for more information. + +- **Stream Processing (BigQuery continuous queries):** Long running SQL + statements that analyze and transform incoming data in near real time as it + arrives in BigQuery. This feature enables unbounded streaming pipelines for + real-time AI inference (using Vertex AI) and Reverse ETL to downstream + systems. Results can be exported to Pub/Sub, Bigtable, Spanner, or other + BigQuery tables. Note that running continuous queries requires a BigQuery + reservation with a `CONTINUOUS` assignment type. + +## BigQuery Studio + +A unified workspace for data engineering, analysis, and predictive modeling. + +- **SQL Editor:** With code completion and generation. + +- **Python Notebooks:** Built-in support for Colab Enterprise and BigQuery + DataFrames (BigFrames). + +- **Data Discovery:** Integrated with Dataplex for search and profiling. + +## Pricing + +BigQuery pricing consists of two main components: compute (analysis) costs and +storage costs. + +- **Storage:** Storage costs are based on the amount of data stored in + BigQuery tables. Storage is classified as either active storage (any table + or partition modified in the last 90 days) and long-term storage (data that + hasn't been modified for 90 consecutive days, resulting in a price drop of + approximately 50%). + +- **Analysis:** Billed based on bytes processed (On-demand) or dedicated slots + (Capacity/Reservations). + +For the latest pricing details, visit: [BigQuery +Pricing](https://cloud.google.com/bigquery/pricing). diff --git a/.agents/skills/bigquery-basics/references/iac-usage.md b/.agents/skills/bigquery-basics/references/iac-usage.md new file mode 100644 index 0000000..b9ef3ae --- /dev/null +++ b/.agents/skills/bigquery-basics/references/iac-usage.md @@ -0,0 +1,70 @@ +# BigQuery Infrastructure as Code + +Managing BigQuery resources using Infrastructure as Code (IaC) ensures +consistency and repeatability across environments. + +## Terraform + +The Google Cloud Terraform provider supports BigQuery datasets, tables, jobs, +and reservations. + +### Dataset and Table Example + +```terraform +resource "google_bigquery_dataset" "dataset" { + dataset_id = "example_dataset" + friendly_name = "test" + description = "This is a test description" + location = "US" + default_table_expiration_ms = 3600000 + + labels = { + env = "default" + } +} + +resource "google_bigquery_table" "default" { + dataset_id = google_bigquery_dataset.dataset.dataset_id + table_id = "example_table" + + time_partitioning { + type = "DAY" + } + + labels = { + env = "default" + } + + schema = <- + Manages Cloud Run services, jobs, and worker pools. Use when you need to deploy applications + responding to HTTP requests (services), run event-triggered or scheduled tasks (jobs), + or handle always-on pull-based background processing (worker pools). +--- + +# Cloud Run Basics + +Cloud Run is a fully managed application platform for running your code, +function, or container on top of Google's highly scalable infrastructure. It +abstracts away infrastructure management, providing three primary resource +types: + +1. **Services:** Responds to HTTP requests sent to a unique and stable + endpoint, using stateless instances that autoscale based on a variety of key + metrics, also responds to events and functions. +2. **Jobs:** Executes parallelizable tasks that are executed manually, or on a + schedule, and run to completion. +3. **Worker pools:** Handles always-on background workloads such as pull-based + workloads, for example, Kafka consumers, Pub/Sub pull queues, or RabbitMQ + consumers. + +## Prerequisites + +1. Enable the Cloud Run Admin API and Cloud Build APIs: + + ```bash + gcloud services enable run.googleapis.com cloudbuild.googleapis.com --quiet + ``` + +1. If you are under a domain restriction organization policy [restricting](https://docs.cloud.google.com/organization-policy/restrict-domains) + unauthenticated invocations for your project, you will need to access your + deployed service as described under [Testing private + services](https://docs.cloud.google.com/run/docs/triggering/https-request#testing-private). + +### Required roles + +You need the following roles to deploy your Cloud Run resource: + +* Cloud Run Admin (`roles/run.admin`) on the project +* Cloud Run Source Developer (`roles/run.sourceDeveloper`) on the project +* Service Account User (`roles/iam.serviceAccountUser`) on the service + identity +* Logs Viewer (`roles/logging.viewer`) on the project + +Cloud Build automatically uses the Compute Engine default service account as the +default Cloud Build service account to build your source code and Cloud Run +resource, unless you override this behavior. + +For Cloud Build to build your sources, grant the Cloud Build service account the +Cloud Run Builder (`roles/run.builder`) role on your project: + +```bash +gcloud projects add-iam-policy-binding PROJECT_ID \ + --member=serviceAccount:SERVICE_ACCOUNT_EMAIL_ADDRESS \ + --role=roles/run.builder \ + --quiet +``` + +Replace `PROJECT_ID` with your Google Cloud project ID and +`SERVICE_ACCOUNT_EMAIL_ADDRESS` with the email address of the Cloud Build +service account. + +## Deploy a Cloud Run service + +You can deploy your service to Cloud Run by using a container image or deploy +directly from source code using a single Google Cloud CLI command. + +> **CRITICAL RULE:** Any deployed code MUST listen on 0.0.0.0 (not 127.0.0.1) +> and use the injected $PORT environment variable (defaults to 8080), or it will +> crash on boot. + +### Deploy a container image to Cloud Run + +Cloud Run imports your container image during deployment. Cloud Run keeps this +copy of the container image as long as it is used by a serving revision. +Container images are not pulled from their container repository when a new Cloud +Run instance is started. + +### Supported container images + +You can directly use container images stored in the [Artifact +Registry](https://docs.cloud.google.com/artifact-registry/docs/overview), or +[Docker Hub](https://hub.docker.com/). Google recommends the use of Artifact +Registry since Docker Hub images are +[cached](https://docs.cloud.google.com/artifact-registry/docs/pull-cached-dockerhub-images) +for up to one hour. + +You can use container images from other public or private registries (like JFrog +Artifactory, Nexus, or GitHub Container Registry), by setting up an [Artifact +Registry remote +repository](https://docs.cloud.google.com/artifact-registry/docs/repositories/remote-repo). + +You should only consider [Docker Hub](https://hub.docker.com/) for deploying +popular container images such as [Docker Official +Images](https://docs.docker.com/docker-hub/official_images/) or [Docker +Sponsored OSS images](https://docs.docker.com/docker-hub/dsos-program/). For +higher availability, Google recommends deploying these Docker Hub images using +an [Artifact Registry remote +repository](https://docs.cloud.google.com/artifact-registry/docs/repositories/remote-repo). + +To deploy a container image, run the following command: + +```bash + gcloud run deploy SERVICE_NAME \ + --image IMAGE_URL \ + --region us-central1 \ + --allow-unauthenticated \ + --quiet +``` + +Replace the following: + +* SERVICE_NAME: the name of the service you want to deploy to. Service names + must be 49 characters or less and must be unique per region and project. If + the service does not exist yet, this command creates the service during the + deployment. You can omit this parameter entirely, but you will be prompted + for the service name if you omit it. +* IMAGE_URL: a reference to the container image, for example, + `us-docker.pkg.dev/cloudrun/container/hello:latest`. If you use Artifact + Registry, the repository REPO_NAME must already be created. The URL follows + the format of `LOCATION-docker.pkg.dev/PROJECT_ID/REPO_NAME/PATH:TAG`. Note + that if you don't supply the `--image` flag, the deploy command will attempt + to deploy from source code. + +### Deploy from source code + +There are two different ways to deploy your service from source: + +* Deploy from source with build (default): This option uses Google Cloud's + buildpacks and Cloud Build to automatically build container images from your + source code without having to install Docker on your machine or set up + buildpacks or Cloud Build. By default, Cloud Run uses the default machine + type provided by Cloud Build. + + * To deploy from source with automatic base image updates enabled, run the + following command: + + ```bash + gcloud run deploy SERVICE_NAME --source . \ + --base-image BASE_IMAGE \ + --automatic-updates \ + --quiet + ``` + + Cloud Run only supports automatic base images that use [Google Cloud's + buildpacks base + images](https://docs.cloud.google.com/docs/buildpacks/base-images). + + * To deploy from source using a Dockerfile, run the following command: + + ```bash + gcloud run deploy SERVICE_NAME --source . --quiet + ``` + When you provide a Dockerfile, Cloud Build runs it in the cloud, and + deploys the service. + +* Deploy from source without build (Preview): This option deploys artifacts + directly to Cloud Run, bypassing the Cloud Build step. This allows for rapid + deployment times. To deploy from source without build, run the following + command: + + ```bash + gcloud beta run deploy SERVICE_NAME \ + --source APPLICATION_PATH \ + --no-build \ + --base-image=BASE_IMAGE \ + --command=COMMAND \ + --args=ARG \ + --quiet + ``` + + Replace the following: + + * SERVICE_NAME: the name of your Cloud Run service. + * APPLICATION_PATH: the location of your application on the local file + system. + * BASE_IMAGE: the [runtime base image](https://docs.cloud.google.com/run/docs/configuring/services/runtime-base-images#how_to_obtain_base_images) + you want to use for your application. For example, + `us-central1-docker.pkg.dev/serverless-runtimes/google-24-full/runtimes/nodejs24`. + You can also deploy a pre-compiled binary without configuring additional + language-specific runtime components using the OS only base image, such + as `osonly24`. + * COMMAND: the command that the container starts up with. + * ARG: an argument you send to the container command. If you use multiple + arguments, specify each on its own line. + + For examples on deploying from source without build, see [Examples of + deploying from source without + build](https://docs.cloud.google.com/run/docs/deploying-source-code#examples-without-build). + +## Create and execute a Cloud Run job + +To create a new job, run the following command: + +```bash +gcloud run jobs create JOB_NAME --image IMAGE_URL OPTIONS --quiet +``` + +Alternatively, use the deploy command: + +```bash +gcloud run jobs deploy JOB_NAME --image IMAGE_URL OPTIONS --quiet +``` + +Replace the following: + +* JOB_NAME: the name of the job you want to create. If you omit this + parameter, you will be prompted for the job name when you run the command. +* IMAGE_URL: a reference to the container image—for example, + `us-docker.pkg.dev/cloudrun/container/job:latest`. + +* Optionally, replace OPTIONS with any of the following flags: + + * `--tasks`: Accepts integers greater or equal to 1. Defaults to 1; + maximum is 10,000. Each task is provided the environment variables + `CLOUD_RUN_TASK_INDEX` with a value between 0 and the number of tasks + minus 1, along with `CLOUD_RUN_TASK_COUNT`, which is the number of + tasks. + * `--max-retries`: The number of times a failed task is retried. Once any + task fails beyond this limit, the entire job is marked as failed. For + example, if set to 1, a failed task will be retried once, for a total of + two attempts. The default is 3. Accepts integers from 0 to 10. + * `--task-timeout`: Accepts a duration like "2s". Defaults to 10 minutes; + maximum is 168 hours (7 days). For tasks using GPUs, the maximum + available timeout is 1 hour. + * `--parallelism`: The maximum number of tasks that can execute in + parallel. By default, tasks will be started as quickly as possible in + parallel. + * --execute-now: If set, immediately after the job is created, a job + execution is started. Equivalent to calling `gcloud run jobs create` + followed by `gcloud run jobs execute`. + + In addition to these preceding options, you also specify more configuration + such as environment variables or memory limits. + +For a full list of available options when creating a job, refer to the [`gcloud +run jobs +create`](https://docs.cloud.google.com/sdk/gcloud/reference/run/jobs/create) +command line documentation. + +Wait for the job creation to finish. You'll see a success message upon a +successful completion. + +To execute an existing job, run the following command: + +```bash +gcloud run jobs execute JOB_NAME --quiet +``` + +If you want the command to wait until the execution completes, run the following +command: + +```bash +gcloud run jobs execute JOB_NAME --wait --region=REGION --quiet +``` + +Replace the following: + +* JOB_NAME: the name of the job. +* REGION: the region in which the resource can be found. For example, + `europe-west1`. Alternatively, set the `run/region` property. + +## Deploy a worker pool + +You can deploy a Cloud Run worker pool using container images or deploy directly +from the source. + +### Deploy a container image + +You can specify a container image with a tag (for example, +`us-docker.pkg.dev/my-project/container/my-image:latest`) or with an exact +digest (for example, +`us-docker.pkg.dev/my-project/container/my-image@sha256:41f34ab970ee...`). + +### Supported container images + +You can directly use container images stored in the [Artifact +Registry](https://docs.cloud.google.com/artifact-registry/docs/overview), or +[Docker Hub](https://hub.docker.com/). Google recommends the use of Artifact +Registry since Docker Hub images are +[cached](https://docs.cloud.google.com/artifact-registry/docs/pull-cached-dockerhub-images) +for up to one hour. + +You can use container images from other public or private registries (like JFrog +Artifactory, Nexus, or GitHub Container Registry), by setting up an [Artifact +Registry remote +repository](https://docs.cloud.google.com/artifact-registry/docs/repositories/remote-repo). + +You should only consider [Docker Hub](https://hub.docker.com/) for deploying +popular container images such as [Docker Official +Images](https://docs.docker.com/docker-hub/official_images/) or [Docker +Sponsored OSS images](https://docs.docker.com/docker-hub/dsos-program/). For +higher availability, Google recommends deploying these Docker Hub images using +an [Artifact Registry remote +repository](https://docs.cloud.google.com/artifact-registry/docs/repositories/remote-repo). + +To deploy a container image, run the following command: + +```bash +gcloud run worker-pools deploy WORKER_POOL_NAME --image IMAGE_URL --quiet +``` + +Replace the following: + +* WORKER_POOL_NAME: the name of the worker pool you want to deploy to. If the + worker pool does not exist yet, this command creates the worker pool during + the deployment. You can omit this parameter entirely, but you will be + prompted for the worker pool name if you omit it. + +* IMAGE_URL: a reference to the container image that contains the worker pool, + such as `us-docker.pkg.dev/cloudrun/container/worker-pool:latest`. Note that + if you don't supply the `--image` flag, the deploy command attempts to + deploy from source code. + +Wait for the deployment to finish. Upon successful completion, Cloud Run +displays a success message along with the revision information about the +deployed worker pool. + +### Deploy a worker pool from source + +You can deploy a new worker pool or worker pool revision to Cloud Run directly +from source code using a single gcloud CLI command, `gcloud run worker-pools` +deploy with the `--source` flag. + +The deploy command defaults to source deployment if you don't supply the +`--image` or `--source` flags. + +Behind the scenes, this command uses [Google Cloud's +buildpacks](https://docs.cloud.google.com/docs/buildpacks/overview) and Cloud +Build to automatically build container images from your source code without +having to install Docker on your machine or set up buildpacks or Cloud Build. By +default, Cloud Run uses the default machine type provided by Cloud Build. + +To deploy a worker pool from source, run the following command: + +```bash +gcloud run worker-pools deploy WORKER_POOL_NAME --source . --quiet +``` + +Replace `WORKER_POOL_NAME` with the name you want for your worker pool. + +### What to do if a deployment fails: + +1. **IAM/Permission Error:** Read + [iam-security.md](references/iam-security.md). +2. **Crash on Boot / Healthcheck failed:** Fetch the logs immediately using + `gcloud logging read "resource.labels.service_name=SERVICE_NAME" --limit=20` + to find the exact runtime error. +3. **Native Dependency Error (Node/Python):** If using `--no-build`, switch to + `--source .` (Buildpacks) to compile native extensions properly for Linux. + +## Reference Directory + +- [Core Concepts](references/core-concepts.md): Services vs. Jobs vs. + Worker pools, resource model, and auto-scaling behavior for services. + +- [CLI Usage](references/cli-usage.md): Essential `gcloud run` commands for + deployment and management. + +- [Client Libraries](references/client-library-usage.md): Using Google + Cloud client libraries to interact with Cloud Run. + +- [MCP Usage](references/mcp-usage.md): Using the Cloud Run remote MCP + server. + +- [Infrastructure as Code](references/iac-usage.md): Terraform examples for + services, jobs, worker pools, and IAM bindings. + +- [IAM & Security](references/iam-security.md): Roles, service identities, + and ingress/egress controls. + +*If you need product information not found in these references, use the + Developer Knowledge MCP server `search_documents` tool.* \ No newline at end of file diff --git a/.agents/skills/cloud-run-basics/references/cli-usage.md b/.agents/skills/cloud-run-basics/references/cli-usage.md new file mode 100644 index 0000000..e7f88a4 --- /dev/null +++ b/.agents/skills/cloud-run-basics/references/cli-usage.md @@ -0,0 +1,119 @@ +# Cloud Run CLI + +Use the `gcloud run` command to manage your Cloud Run applications. + +## Basic Syntax + +```bash +gcloud run [GROUP] [COMMAND] [FLAGS] +``` + +## Essential Commands + +### Cloud Run service + +- **Deploy a service from an image:** + + ```bash + gcloud run deploy my-service \ + --image us-docker.pkg.dev/cloudrun/container/hello:latest \ + --quiet + ``` + +- **Deploy from source code:** + + ```bash + gcloud run deploy my-service --source . --quiet + ``` + +- **Deploy a Cloud Run function:** + + ```bash + gcloud run deploy my-service + --source . --function example-hello --base-image go126 --region us-central1 --quiet + ``` + +- **List services:** + + ```bash + gcloud run services list --quiet + ``` + +- **Update traffic split:** + + ```bash + gcloud run services update-traffic my-service --to-revisions=REV1=50,REV2=50 --quiet + ``` + +### Cloud Run job + +- **Create a job:** + + ```bash + gcloud run jobs create my-job \ + --image us-docker.pkg.dev/cloudrun/container/job:latest \ + --quiet + ``` + +- **Execute a job:** + + ```bash + gcloud run jobs execute my-job --quiet + ``` + +- **List jobs:** `gcloud run jobs list` + +- **List job executions:** + + ```bash + gcloud run executions list --job my-job + ``` + +### Cloud Run worker pools + +- **Deploy a worker pool from an image:** + + ```bash + gcloud run worker-pools deploy my-workerpool \ + --image us-docker.pkg.dev/cloudrun/container/worker-pool:latest \ + --quiet + ``` + +- **Deploy from source code:** + + ```bash + gcloud run worker-pools deploy my-workerpool --source . --quiet + ``` + +- **List worker pools:** + + ```bash + gcloud run worker-pools list --region us-central1 --quiet + ``` + +- **Configure scaling (manual):** + + ```bash + gcloud run worker-pools deploy my-workerpool --instances=5 \ + --image us-docker.pkg.dev/cloudrun/container/worker-pool:latest \ + --quiet + ``` + +### Configuration and logs + +- **View more details about a service:** `gcloud run services describe my-service` + +- **View logs:** + + ```bash + gcloud logging read "resource.type=cloud_run_revision AND \ + resource.labels.service_name=my-service" \ + --quiet + ``` + +## Common Flags + +- `--region`: The region where the service or job is located. +- `--allow-unauthenticated`: Makes the service publicly accessible. + +- `--no-allow-unauthenticated`: Restricts access to authenticated users only. diff --git a/.agents/skills/cloud-run-basics/references/client-library-usage.md b/.agents/skills/cloud-run-basics/references/client-library-usage.md new file mode 100644 index 0000000..1579b12 --- /dev/null +++ b/.agents/skills/cloud-run-basics/references/client-library-usage.md @@ -0,0 +1,146 @@ +# Cloud Run Client Libraries + +Google Cloud client libraries provide an idiomatic way to manage Cloud Run +resources programmatically. + +## Getting Started + +Ensure you have the Google Cloud SDK installed and authenticated. +[Install Google Cloud SDK](https://cloud.google.com/sdk/docs/install) + +### Python + +- **Installation:** + + ```bash + pip install --upgrade google-cloud-run + ``` + +- **Usage Example:** + + ```python + from google.cloud import run_v2 + client = run_v2.ServicesClient() + request = run_v2.ListServicesRequest( + parent="projects/my-project/locations/us-central1" + ) + page_result = client.list_services(request=request) + ``` + +- [Python Reference](https://docs.cloud.google.com/python/docs/reference/run/latest) + +### Java + +- **Maven Dependency:** + + ```xml + + + + com.google.cloud + libraries-bom + 26.79.0 + pom + import + + + + + com.google.cloud + google-cloud-run + + ``` + +- **Usage Example:** + + ```java + try (ServicesClient servicesClient = ServicesClient.create()) { + ListServicesRequest request = ListServicesRequest.newBuilder() + .setParent(LocationName.of("my-project", "us-central1").toString()) + .build(); + for (Service element : servicesClient.listServices(request).iterateAll()) { + System.out.println(element.getName()); + } + } + ``` + +- [Java Reference](https://docs.cloud.google.com/java/docs/reference/google-cloud-run/latest/overview) + +### Node.js (TypeScript) + +- **Installation:** + + ```bash + npm install @google-cloud/run + ``` + +- **Usage Example:** + + ```typescript + import {ServicesClient} from '@google-cloud/run'; + const client = new ServicesClient(); + const [services] = await client.listServices({ + parent: 'projects/my-project/locations/us-central1', + }); + ``` + +- [Node.js Reference](https://googleapis.dev/nodejs/run/latest/index.html) + +### Go + +- **Installation:** + + ```bash + go get cloud.google.com/go/run/apiv2 + ``` + +- **Usage Example:** + + ```go + package main + + import ( + "context" + "fmt" + "log" // Import the log package + + run "cloud.google.com/go/run/apiv2" + runpb "cloud.google.com/go/run/apiv2/runpb" + "google.golang.org/api/iterator" + ) + + func main() { + ctx := context.Background() + client, err := run.NewServicesClient(ctx) + if err != nil { + // Log the error and exit if the client can't be created + log.Fatalf("Failed to create Cloud Run Services client: %v", err) + } + defer client.Close() + + req := &runpb.ListServicesRequest{ + Parent: "projects/my-project/locations/us-central1", // Remember to replace my-project + } + it := client.ListServices(ctx, req) + + fmt.Println("Cloud Run Services:") + for { + resp, err := it.Next() + if err == iterator.Done { + break // Finished iterating successfully + } + if err != nil { + // Log the error and exit if iteration fails + log.Fatalf("Error iterating services: %v", err) + } + fmt.Println(resp.GetName()) + } + } + ``` + +- [Go Reference](https://docs.cloud.google.com/go/docs/reference/cloud.google.com/go/run/latest) + +## Source Code Samples + +For more examples across languages, visit the +[Cloud Run Code Samples](https://cloud.google.com/run/docs/samples). \ No newline at end of file diff --git a/.agents/skills/cloud-run-basics/references/core-concepts.md b/.agents/skills/cloud-run-basics/references/core-concepts.md new file mode 100644 index 0000000..edfdb32 --- /dev/null +++ b/.agents/skills/cloud-run-basics/references/core-concepts.md @@ -0,0 +1,134 @@ +# Cloud Run core concepts + +Cloud Run is a fully managed application platform for running your code, +function, or container on top of Google's highly scalable infrastructure. On +Cloud Run, your code can run as a service, job, or worker pool. All of these +resource types are running sandboxed container instances in the same execution +environment and can integrate with Google Cloud services. + +## Services vs. Jobs vs. Worker pools + +- **Cloud Run services:** Used for code that handles requests or events (e.g., + web apps, APIs). They provide an HTTPS endpoint and automatically scale + based on traffic. + +- **Cloud Run jobs:** Used for code that performs a specific task and then + exits (e.g., data processing, database migrations). They can run a single + task or an array of parallel tasks. + +- **Cloud Run worker pools:** Designed for continuous, non-HTTP, pull-based + background processing (e.g., Kafka consumers). + +## Resource model + +Cloud Run organizes resources as follows: + +1. **Service** The top-level resource. You can deploy a service from a + container, repository, or source code. + 1. **Revision:** An immutable snapshot of a service's + configuration and container image. Each service + deployment creates a new revision. + 1. **Service instances:** The running container that + processes requests. Each service revision receiving + requests is automatically scaled to the number of + instances needed to handle all these requests. + 1. **Cloud Run functions**: Deploy functions as Cloud Run + services. You can deploy single-purpose functions that + respond to events emitted from your cloud infrastructure + and services + +1. **Job**: Executes one or more containers to completion. A job consists of + one or multiple independent tasks that are executed in parallel in a given + job execution. + +1. **Worker pools**: If your code processes workloads from an external source + but not from an HTTP request, such as pulling work from a message queue, you + can deploy it to a Cloud Run worker pool . + +## Autoscaling for Cloud Run services + +Cloud Run services scale automatically based on: + +- **Request concurrency:** The number of concurrent requests per instance. +- **CPU utilization:**: The average CPU utilization of existing instances over + a one minute window. +- **Scale to zero:** Cloud Run autoscales from one to zero instances only + after verifying that an instance is no longer processing requests. If you + use instance-based billing, Cloud Run instances are charged for the entire + lifecycle of instances, even when there are no incoming requests. + +## Container contract + +Your container image can run code written in the programming language +of your choice and use any base image, provided that it respects the +constraints listed in the [Container runtime contract](https://docs.cloud.google.com/run/docs/container-contract). + +Executables in the container image must be compiled for +Linux 64-bit. Cloud Run specifically supports the Linux x86_64 ABI format. + +Cloud Run accepts container images in the Docker Imag + Manifest V2, Schema 1, Schema 2, and OCI image formats. Cloud Run +also accepts Zstd compressed container images. + +If deploying a multi-architecture image, the manifest list must include +linux/amd64. + +For functions deployed with Cloud Run, you can use one of the +Cloud Run runtime base images that are published by Google +Cloud's buildpacks to receive automatic security and maintenance updates. +For more information about the supported runtimes, see the [Runtime support schedule](https://docs.cloud.google.com/run/docs/runtime-support). + +### Container requirements + +When deploying containers to Cloud Run, the following requirements must be met: + +* Container deployed to services must listen for requests on the correct port +* A Cloud Run service starts Cloud Run instances to handle incoming + requests. A Cloud Run instance always has one single ingress + container that listens for requests, and optionally one or more + sidecar containers. The following port configuration details + apply only to the ingress container, not to sidecars. +* The ingress container within an instance must listen for + requests on `0.0.0.0` on the port to which requests are sent. Notably, + the ingress container should not listen on `127.0.0.1`. By default, request + are sent to 8080, but you can configure Cloud Run to send requests to the port of your choice. + Cloud Run injects the PORT environment variable into the ingress container. + +## VPC network connectivity + +Cloud Run services and jobs support Direct VPC egress. This means +that they can send traffic to private resources within your +configured VPC network, such as databases or internal services. Cloud Run +services and jobs don't support Direct VPC ingress. +Cloud Run worker pools support both Direct VPC egress and Direct VPC +ingress. When you configure Direct VPC for your Cloud Run worker pool +deployment, each worker instance receives a private IP address on the +configured network and subnet. Only resources from your VPC network can +connect to the worker pool private IP address endpoint. For more information +about obtaining the private IP addresses of your worker pool instance, see +[Retrieve the private IP addresses using the metadata server (MDS)](https://docs.cloud.google.com/run/docs/configuring/vpc-direct-vpc#mds-support). + +For Cloud Run worker pools with Direct VPC ingress, such as database +connections or any other custom TCP-based protocol, the container must +listen for TCP connections on the port exposed in your container image +through the Dockerfile or specified by the PORT environment variable. + +## AI and GPU support + +Cloud Run supports hosting AI inference models. You can configure services with +GPUs (e.g., NVIDIA RTX PRO 6000 Blackwell GPU, NVIDIA L4) to accelerate +workloads like LLM inference using Gemma 3. For more information, see GPU +support for +[services](https://docs.cloud.google.com/run/docs/configuring/services/gpu), +[jobs](https://docs.cloud.google.com/run/docs/configuring/jobs/gpu), and [worker +pools](https://docs.cloud.google.com/run/docs/configuring/workerpools/gpu). + +## Pricing + +Cloud Run uses a pay-as-you-go model: + +- **Request-based:** Charged for resources used during request processing. +- **Instance-based:** Charged for the entire lifetime of an instance. + +For the latest pricing, visit: [Cloud Run +pricing](https://cloud.google.com/run/pricing). diff --git a/.agents/skills/cloud-run-basics/references/iac-usage.md b/.agents/skills/cloud-run-basics/references/iac-usage.md new file mode 100644 index 0000000..32cd246 --- /dev/null +++ b/.agents/skills/cloud-run-basics/references/iac-usage.md @@ -0,0 +1,76 @@ +# Cloud Run Infrastructure as Code + +Cloud Run resources can be provisioned and managed using Terraform and other IaC +tools. + +## Terraform + +The Google Cloud Terraform provider supports Cloud Run services, jobs, and worker pools. + +### Cloud Run service example + +```terraform +resource "google_cloud_run_v2_service" "default" { + name = "cloudrun-service" + location = "us-central1" + deletion_protection = false + ingress = "INGRESS_TRAFFIC_ALL" + + template { + containers { + image = "us-docker.pkg.dev/cloudrun/container/hello" + } + } +} + +resource "google_cloud_run_v2_service_iam_member" "noauth" { + location = google_cloud_run_v2_service.default.location + name = google_cloud_run_v2_service.default.name + role = "roles/run.invoker" + member = "allUsers" +} +``` + +### Cloud Run job example + +```terraform +resource "google_cloud_run_v2_job" "default" { + name = "cloudrun-job" + location = "us-central1" + + template { + template { + containers { + image = "us-docker.pkg.dev/cloudrun/container/job" + } + } + } +} +``` + +### Cloud Run worker pool example + +```terraform +resource "google_cloud_run_v2_worker_pool" "default" { + name = "cloudrun-workerpool" + location = "us-central1" + + template { + containers { + image = "us-docker.pkg.dev/cloudrun/container/worker-pool:latest" + } + } +} +``` + +### Reference dpcumentation + +- [Terraform Google Provider - Cloud Run v2 Service](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/cloud_run_v2_service) + +- [Terraform Google Provider - Cloud Run v2 Job](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/cloud_run_v2_job) +- [Terraform Google Provider - Cloud Run v2 Worker pool](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/cloud_run_v2_worker_pool) + +## YAML + +Cloud Run resources can also be defined using YAML. For more information, see +[Cloud Run YAML reference](https://docs.cloud.google.com/run/docs/reference/yaml/v1). diff --git a/.agents/skills/cloud-run-basics/references/iam-security.md b/.agents/skills/cloud-run-basics/references/iam-security.md new file mode 100644 index 0000000..5f97491 --- /dev/null +++ b/.agents/skills/cloud-run-basics/references/iam-security.md @@ -0,0 +1,102 @@ +# Cloud Run IAM & security + +Cloud Run uses Identity and Access Management (IAM) to secure your resources and control who can deploy or invoke them. + +## Predefined IAM Roles + +| Predefined Role | Usage | +| :--- | :--- | +| `roles/run.admin` | Full control over all Cloud Run resources. | +| `roles/run.invoker` | Invoke Cloud Run services and execute Cloud Run jobs. | +| `roles/run.developer` | Read and write access to services, jobs and worker pools; cannot + set IAM policies. | +| `roles/run.viewer` | Read-only access to Cloud Run resources. | + +## Types of service accounts for service identity + +Cloud Run resources run as a specific service account (the service +identity). + +- **User-managed service account (recommended)**: You manually create this + service account and determine the most minimal set of permissions that + the service account needs to access specific Google Cloud resources. The + user-managed service account follows the format of `SERVICE_ACCOUNT_NAME@PROJECT_ID.iam.gserviceaccount.com`. + +- **Compute Engine default service account:** Cloud Run automatically + provides the Compute Engine default service account as the default + service identity. The Compute Engine default service account + follows the format of `PROJECT_NUMBER-compute@developer.gserviceaccount.com`. + +## Best practices + +By default, the Compute Engine default service account is automatically +created. If you don't specify a service account when the +Cloud Run service or job is created, Cloud Run uses this service account. +Depending on your organization policy configuration, the default +service account might automatically be granted the Editor role on your +project. We strongly recommend that you disable the automatic role +grant by enforcing the `iam.automaticIamGrantsForDefaultServiceAccounts` +organization policy constraint. If you created your organization after +May 3, 2024, this constraint is enforced by default. + +Create a user-managed service account with minimal +permissions for each Cloud Run resource. + +To allow a service to access another GCP resource (e.g., +Cloud SQL), grant the service's identity the appropriate IAM role on that +resource. + +## Security controls + +- **Ingress Settings:** Control whether your service is reachable from the + internet (`all`), only from within the VPC (`internal`), or via a load + balancer (`internal-and-cloud-load-balancing`). + +- **VPC Egress:** Use a VPC connector or Direct VPC egress to allow Cloud Run to + access resources in your VPC. + +- **Binary Authorization:** Ensure only trusted container images are deployed. + +- **Secrets Management:** Use Secret Manager to securely pass sensitive + information (e.g., API keys, database passwords) to your containers as + environment variables or volumes. + +## Public access + +There are two ways to create a public Cloud Run service, you can either: + +* Disable the Cloud Run Invoker IAM check (recommended). +* Assign the Cloud Run Invoker IAM role to the `allUsers` member type. + +For more information, see: +[Cloud Run security overview](https://docs.cloud.google.com/run/docs/securing/managing-access#make-service-public). + +## Configure IAP to secure access + +By enabling IAP on Cloud Run directly, you can secure traffic with a single +click from all ingress paths, including default `run.app` URLs and load +balancers. + +When you integrate IAP with Cloud Run, you can manage user or group access in +the following ways: + +* Inside the organization - configure access to users who are within the same + organization as your Cloud Run service + +* Outside the organization - configure access to users who are from + organizations different than your Cloud Run service + +* No organization - configure access in projects that are not part of any + Google organization + +Enabling IAP on a Cloud Run service can be as easy as deploying a new service with +the following flags: + +```bash +gcloud run deploy SERVICE_NAME \ + --region=REGION \ + --image=IMAGE_URL \ + --no-allow-unauthenticated \ + --iap \ + --quiet +``` diff --git a/.agents/skills/cloud-run-basics/references/mcp-usage.md b/.agents/skills/cloud-run-basics/references/mcp-usage.md new file mode 100644 index 0000000..abb2461 --- /dev/null +++ b/.agents/skills/cloud-run-basics/references/mcp-usage.md @@ -0,0 +1,43 @@ +# Cloud Run MCP Usage + +Cloud Run is supported by a remote Model Context Protocol (MCP) server that +enables agents to deploy, manage, and monitor serverless applications. + +## MCP Tools for Cloud Run + +The Cloud Run MCP server typically includes tools for: + +- `get_service`: Get info about a Cloud Run service, such as its URI and + whether the deploy succeeded. +- `list_services`: List Cloud Run services in a given Google Cloud project and + region. +- `deploy_service_from_image`: Deploy a container image from Artifact Registry + or Docker Hub as a Cloud Run service. +- `deploy_service_from_archive`: Deploy a Cloud Run service directly from a + self-contained source code archive (.tar.gz), skipping the container image + build step for faster deployment. The archive must include all dependencies. +- `deploy_service_from_file_contents`: Deploys a Cloud Run service directly from + local source files. This method is suitable for scripting languages like Python + and Node.js, of which the source code can be embedded in the request. This is + ideal for quick tests and development feedback loops. You must include all + necessary dependencies within the source files because it skips the build step + for faster deployment. + +## Setup Instructions + +To connect to the Cloud Run MCP server: + +1. Enable the Cloud Run API in your Google Cloud project. +2. Configure the agent's MCP connection using the Gemini CLI extension. +3. Follow the setup guide: + [Setting up Cloud Run MCP](https://docs.cloud.google.com/run/docs/reference/mcp). + +## Supported Operations + +Agents using the Cloud Run MCP can: + +- Automate the rollout of new revisions. +- Troubleshoot failing deployments by inspecting logs and status. +- Manage scheduled jobs and verify their execution history. + +Alternatively, use the [open source Cloud Run MCP server](https://github.com/GoogleCloudPlatform/cloud-run-mcp) which runs locally. diff --git a/.agents/skills/cloud-sql-basics/SKILL.md b/.agents/skills/cloud-sql-basics/SKILL.md new file mode 100644 index 0000000..9b121b2 --- /dev/null +++ b/.agents/skills/cloud-sql-basics/SKILL.md @@ -0,0 +1,109 @@ +--- +name: cloud-sql-basics +description: >- + This file generates or explains Cloud SQL resources. Use this file when the + user asks to create a Cloud SQL instance or database for MySQL, PostgreSQL, or + SQL Server. + + Cloud SQL manages third-party MySQL, PostgreSQL, and SQL Server instances as + resources in Cloud SQL. For example, when Cloud SQL creates an open-source + MySQL instance, the resulting resource is a Cloud SQL for MySQL instance that + Google Cloud manages. + + Cloud SQL handles backups, high availability, and secure connectivity for + relational database workloads. +--- + +# Cloud SQL Basics + +Cloud SQL is a fully managed relational database service for MySQL, PostgreSQL, +and SQL Server. It automates time-consuming tasks like patches, updates, +backups, and replicas, while providing high performance and availability for +your applications. + +## Prerequisites + +Ensure you have the necessary IAM permissions to create and manage Cloud SQL +instances. The **Cloud SQL Admin** (`roles/cloudsql.admin`) role provides full +access to Cloud SQL resources. + +## Quick Start (PostgreSQL) + +1. **Enable the API:** + ```bash + gcloud services enable sqladmin.googleapis.com --quiet + ``` + +2. **Create an Instance:** + ```bash + gcloud sql instances create INSTANCE_NAME \ + --database-version=POSTGRES_18 \ + --cpu=2 \ + --memory=7680MiB \ + --region=REGION \ + --quiet + ``` + +3. **Set a password for the default user:** + + Because this is a Cloud SQL for PostgreSQL instance, the default admin user + is `postgres`: + ```bash + gcloud sql users set-password postgres \ + --instance=INSTANCE_NAME --password=PASSWORD \ + --quiet + ``` + +4. **Create a database:** + ```bash + gcloud sql databases create DATABASE_NAME \ + --instance=INSTANCE_NAME \ + --quiet + ``` + +5. **Get the instance connection name:** + + You need the instance connection name (which is formatted as + `PROJECT_ID:REGION:INSTANCE_NAME`) to connect using the Cloud SQL Auth + Proxy. Retrieve it with the following command: + ```bash + gcloud sql instances describe INSTANCE_NAME \ + --format="value(connectionName)" \ + --quiet + ``` + +6. **Connect to the instance:** + + The Cloud SQL Auth Proxy must be running to be able to connect to the + instance. In a separate terminal, start the proxy using the connection name: + ```bash + ./cloud-sql-proxy INSTANCE_CONNECTION_NAME + ``` + + With the proxy running, connect using `psql` in another terminal: + ```bash + psql "host=127.0.0.1 port=5432 user=postgres dbname=DATABASE_NAME password=PASSWORD sslmode=disable" + ``` + +## Reference Directory + +- [Core Concepts](references/core-concepts.md): Instance architecture, high + availability (HA), and supported database engines. + +- [CLI Usage](references/cli-usage.md): Essential `gcloud sql` commands for + instance, database, and user management. + +- [Client Libraries & Connectors](references/client-library-usage.md): + Connecting to Cloud SQL using Python, Java, Node.js, and Go. + +- [MCP Usage](references/mcp-usage.md): Using the Cloud SQL remote MCP + server and Gemini CLI extension. + +- [Infrastructure as Code](references/iac-usage.md): Terraform + configuration for instances, databases, and users. + +- [IAM & Security](references/iam-security.md): Predefined roles, SSL/TLS + certificates, and Auth Proxy configuration. + +*If you need product information not found in these references, use the + Developer Knowledge MCP server `search_documents` tool.* diff --git a/.agents/skills/cloud-sql-basics/references/cli-usage.md b/.agents/skills/cloud-sql-basics/references/cli-usage.md new file mode 100644 index 0000000..78dad68 --- /dev/null +++ b/.agents/skills/cloud-sql-basics/references/cli-usage.md @@ -0,0 +1,84 @@ +# Cloud SQL CLI Usage + +The `gcloud sql` command group is used to manage Cloud SQL instances and +related resources. + +## Basic Syntax + +```bash +gcloud sql [GROUP] [COMMAND] [FLAGS] +``` + +## Essential Commands + +### Instance Management + +- **Create an instance:** + + ```bash + gcloud sql instances create my-instance --database-version=MYSQL_8_0 \ + --tier=db-f1-micro --region=us-central1 \ + --quiet + ``` + +- **List instances:** + + ```bash + gcloud sql instances list --quiet + ``` + +- **Describe an instance:** + + ```bash + gcloud sql instances describe my-instance --quiet + ``` + +- **Restart an instance:** + + ```bash + gcloud sql instances restart my-instance --quiet + ``` + +### Database and User Management + +- **Create a database:** + + ```bash + gcloud sql databases create my-db --instance=my-instance --quiet + ``` + +- **Create a user:** + + ```bash + gcloud sql users create my-user --instance=my-instance \ + --password=my-password \ + --quiet + ``` + +### Operations and Backups + +- **List operations:** + + ```bash + gcloud sql operations list --instance=my-instance --quiet + ``` + +- **Create a backup:** + + ```bash + gcloud sql backups create --instance=my-instance --quiet + ``` + +- **Restore from a backup:** + + ```bash + gcloud sql backups restore backup_id --restore-instance=my-instance --quiet + ``` + +## Common Flags + +- `--project`: Specifies the project ID. + +- `--region`: The region where the instance is located. + +- `--format`: Changes output format (e.g., `json`, `yaml`). diff --git a/.agents/skills/cloud-sql-basics/references/client-library-usage.md b/.agents/skills/cloud-sql-basics/references/client-library-usage.md new file mode 100644 index 0000000..a853200 --- /dev/null +++ b/.agents/skills/cloud-sql-basics/references/client-library-usage.md @@ -0,0 +1,116 @@ +# Cloud SQL Client Libraries + +Google Cloud provides client libraries and connectors to simplify connecting to +Cloud SQL from various programming languages. + +## Getting Started + +Ensure you have the latest version of the Google Cloud SDK installed and +authenticated. +[Install Google Cloud SDK](https://cloud.google.com/sdk/docs/install) + +### Language Connectors + +The Cloud SQL Language Connectors (Python, Java, Go, Node.js) provide a secure +way to connect to the Cloud SQL instance without managing IP allowlists or SSL +certificates. + +#### Python + +- **Installation for a Cloud SQL for PostgreSQL instance:** + + ```bash + pip install "cloud-sql-python-connector[pg8000]" + ``` + +- **Usage Example:** + + ```python + from google.cloud.sql.connector import Connector + connector = Connector() + def getconn(): + conn = connector.connect( + "project:region:instance", + "pg8000", + user="my-user", + password="my-password", + db="my-db" + ) + return conn + ``` + +#### Java + +- **Maven Dependencies:** + + The recommended method is to use the Cloud SQL JDBC Socket Factory. Add the + BOM to your `` section: + + ```xml + + + + com.google.cloud.sql + jdbc-socket-factory-bom + 1.18.0 + pom + import + + + + ``` + + Then add dependencies for your database: + + * **PostgreSQL:** + ```xml + + + org.postgresql + postgresql + 42.7.3 + + + com.google.cloud.sql + postgres-socket-factory + + + ``` + + * **MySQL:** + ```xml + + + com.mysql + mysql-connector-j + 8.0.33 + + + com.google.cloud.sql + mysql-socket-factory-connector-j-8 + + + ``` + +#### Node.js (TypeScript) + +- **Installation:** + + ```bash + npm install @google-cloud/cloud-sql-connector + ``` + +#### Go + +- **Installation:** + + ```bash + go get cloud.google.com/go/cloudsqlconn + ``` + +## Cloud SQL Admin API + +To manage Cloud SQL resources (e.g., list instances) programmatically, use the +`sqladmin` libraries. + +- [Cloud SQL Admin API Overview](https://docs.cloud.google.com/sql/docs/mysql/admin-api) diff --git a/.agents/skills/cloud-sql-basics/references/core-concepts.md b/.agents/skills/cloud-sql-basics/references/core-concepts.md new file mode 100644 index 0000000..e6a3670 --- /dev/null +++ b/.agents/skills/cloud-sql-basics/references/core-concepts.md @@ -0,0 +1,62 @@ +# Cloud SQL Core Concepts + +Cloud SQL provides managed relational databases, abstracting the underlying +infrastructure while offering standard database engines. + +## Supported Engines + +Cloud SQL supports the following database engines (see [supported +versions](https://docs.cloud.google.com/sql/docs/db-versions)): + +- **MySQL:** Versions 5.6, 5.7, 8.0, and 8.4. + +- **PostgreSQL:** Versions 9.6, 10, 11, 12, 13, 14, 15, 16, 17, and 18 + (default). + +- **SQL Server:** 2017 (Express, Web, Standard, Enterprise), 2019, 2022, and + 2025 (Express, Enterprise, Standard). + +## Instance Architecture + +Each Cloud SQL instance is powered by a virtual machine (VM) running the +database program. + +- **Primary Instance:** The main read/write connection point. + +- **High Availability (HA):** Provides a standby VM in a different zone with + automatic failover. + +- **Read Replicas:** Used to scale read traffic and provide local access in + different regions. + +## Storage and Networking + +- **Persistent Disk:** Scalable and durable network storage attached to the + VM. + +- **Connectivity:** Supports Private IP (using VPC peering for MySQL and + PostgreSQL only; or using private services access or Private Service Connect + for all Cloud SQL engines) and Public IP (with authorized networks or Auth + Proxy). + +## Pricing + +Cloud SQL pricing is based on: + +- **Instance Type:** vCPUs and RAM. + +- **Storage:** Amount of data stored and IOPS. + +- **Networking:** Network egress and IP address usage. + +- **DNS pricing:** Charge is per zone per month (regardless of whether you use + your zone). You also pay for queries against your zones. + +- **Licensing:** Applies to SQL Server only. In addition to instance and + resource pricing, SQL Server also has a licensing component. High + availability, or regional instances, will only incur the cost for a single + license for the active resource. As a managed service, Cloud SQL does not + support BYOL (Bring your own license). + +For the latest pricing, visit: [Cloud SQL +Pricing](https://cloud.google.com/sql/pricing). diff --git a/.agents/skills/cloud-sql-basics/references/iac-usage.md b/.agents/skills/cloud-sql-basics/references/iac-usage.md new file mode 100644 index 0000000..5baafeb --- /dev/null +++ b/.agents/skills/cloud-sql-basics/references/iac-usage.md @@ -0,0 +1,46 @@ +# Cloud SQL Infrastructure as Code + +Cloud SQL resources can be provisioned and managed using Terraform and other IaC +tools. + +## Terraform + +The Google Cloud Terraform provider supports Cloud SQL instances, databases, and +users. + +### Cloud SQL Instance Example + +```terraform +resource "google_sql_database_instance" "default" { + name = "master-instance" + region = "us-central1" + database_version = "POSTGRES_15" + + settings { + tier = "db-f1-micro" + backup_configuration { + enabled = true + } + } +} + +resource "google_sql_database" "database" { + name = "my-database" + instance = google_sql_database_instance.default.name +} + +resource "google_sql_user" "users" { + name = "me" + instance = google_sql_database_instance.default.name + password = "changeme" +} +``` + +### Reference Documentation + +- [Terraform Google Provider - SQL Database Instance](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/sql_database_instance) + +- [Terraform Google Provider - SQL Database](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/sql_database) + +- [Terraform Google Provider - SQL User](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/sql_user) + diff --git a/.agents/skills/cloud-sql-basics/references/iam-security.md b/.agents/skills/cloud-sql-basics/references/iam-security.md new file mode 100644 index 0000000..43c98e0 --- /dev/null +++ b/.agents/skills/cloud-sql-basics/references/iam-security.md @@ -0,0 +1,67 @@ +# Cloud SQL IAM & Security + +Cloud SQL uses Identity and Access Management (IAM) to control access to +instances and databases. + +## Predefined IAM Roles + +| Predefined Role | Usage | +| :--- | :--- | +| `roles/cloudsql.admin` | Full control over all Cloud SQL resources. | +| `roles/cloudsql.editor` | Manage Cloud SQL resources. Cannot see or modify + + permissions, nor modify users or ssl Certs. Cannot import data or restore from +a backup, nor clone, delete, or promote instances. Cannot start or stop + replicas. Cannot delete databases, replicas, or backups. | +| `roles/cloudsql.viewer` | Read-only access to Cloud SQL resources. | +| `roles/cloudsql.client` | Connectivity access to Cloud SQL instances from App + Engine and the Cloud SQL Auth Proxy. Not required for accessing an instance + using IP addresses. | +| `roles/cloudsql.instanceUser` | Permission to log in to a Cloud SQL + instance. | +| `roles/cloudsql.schemaViewer` | Role allowing access to a Cloud SQL instance + schema in Knowledge Catalog. | +| `roles/cloudsql.studioUser` | Role allowing access to Cloud SQL Studio. | + +## Secure Connectivity + +- **Cloud SQL Auth Proxy:** The recommended way to connect securely. It + provides IAM-based authentication and end-to-end encryption without + requiring SSL/TLS certificates or authorized networks. + +- **Private IP:** Use VPC, private services access, or Private Service Connect + (PSC) to keep database traffic within the Google Cloud network. + +- **Authorized Networks:** If using Public IP, restrict access to specific + CIDR ranges. + +## Data Security + +- **Encryption at Rest:** All data is encrypted by default. Use + Customer-Managed Encryption Keys (CMEK) for additional control. + +- **IAM Database Authentication:** Authenticate to the database using IAM + users or service accounts instead of static passwords (available for MySQL + and PostgreSQL). + +## Organization Policies + +- **Cloud SQL organization policies:** Organization policies let organization + administrators set restrictions on how users can configure instances under + that organization. + +## Service Accounts + +- **Service Identity:** Cloud SQL uses an instance service account + (`p[PROJECT_NUMBER]-[UNIQUE_ID]@gcp-sa-cloud-sql.iam.gserviceaccount.com`) + for tasks like exporting a SQL dump file to Cloud Storage. Service agent + accounts (`service-PROJECT_NUMBER@gcp-sa-cloud-sql.iam.gserviceaccount.com`) + are used only for internal management tasks. + +- **App Connectivity:** Grant the service account running your app (e.g., on + Cloud Run or GKE) the `roles/cloudsql.client` role. + +For more information, see: +- [About Access Control - Cloud SQL for MySQL](https://docs.cloud.google.com/sql/docs/mysql/instance-access-control) +- [About Access Control - Cloud SQL for PostgreSQL](https://docs.cloud.google.com/sql/docs/postgres/instance-access-control) +- [About Access Control - Cloud SQL for SQL Server](https://docs.cloud.google.com/sql/docs/sqlserver/instance-access-control) \ No newline at end of file diff --git a/.agents/skills/cloud-sql-basics/references/mcp-usage.md b/.agents/skills/cloud-sql-basics/references/mcp-usage.md new file mode 100644 index 0000000..78c078b --- /dev/null +++ b/.agents/skills/cloud-sql-basics/references/mcp-usage.md @@ -0,0 +1,51 @@ +# Cloud SQL MCP Usage + +Cloud SQL can be managed via the Model Context Protocol (MCP), which allows +agents to manage database instances and execute SQL queries. MCP is available +via remote servers and through local execution with the MCP Toolbox: + +* [Cloud SQL for PostgreSQL](https://mcp-toolbox.dev/integrations/cloud-sql-pg/source/) +* [Cloud SQL for MySQL](https://mcp-toolbox.dev/integrations/cloud-sql-mysql/source/) +* [Cloud SQL for SQL Server](https://mcp-toolbox.dev/integrations/cloud-sql-mssql/source/) + +## MCP Tools for Cloud SQL + +The Cloud SQL MCP server typically includes the following tools: + +- `clone_instance`: creates a Cloud SQL instance as a clone of source + instance. +- `create_instance`: initiates the creation of a Cloud SQL instance. +- `create_user`: creates a database user for a Cloud SQL instance. +- `execute_sql`: executes any valid SQL statements (DDL, DCL, DQL, DML) on a + Cloud SQL instance. +- `get_instance`: gets the details of a Cloud SQL instance. +- `get_operation`: gets the status of a long-running operation in Cloud SQL. +- `list_instances`: lists all Cloud SQL instances in a project. +- `list_users`: lists all database users for a Cloud SQL instance. +- `import_data`: imports data into a Cloud SQL instance from Cloud Storage. +- `update_instance`: updates supported settings of a Cloud SQL instance. +- `update_user`: updates a database user for a Cloud SQL instance. + +For additional specialized skills including health auditing, performance +monitoring, and lifecycle management, install the Gemini CLI extension or Claude +Plugin: + +* [Cloud SQL for PostgreSQL](https://github.com/gemini-cli-extensions/cloud-sql-postgresql) +* [Cloud SQL for MySQL](https://github.com/gemini-cli-extensions/cloud-sql-mysql) +* [Cloud SQL for SQL Server](https://github.com/gemini-cli-extensions/cloud-sql-sqlserver) + +## Setup Instructions + +Setup varies by database engine and whether you are connecting to a remote +server or using the MCP Toolbox. For remote server setup, see Setting up +Cloud SQL MCP for [PostgreSQL](https://docs.cloud.google.com/sql/docs/postgres/use-cloudsql-mcp), +[MySQL](https://docs.cloud.google.com/sql/docs/mysql/use-cloudsql-mcp), or +[SQL Server](https://docs.cloud.google.com/sql/docs/sqlserver/use-cloudsql-mcp). + +## Supported Operations + +Agents using the Cloud SQL MCP can: + +- Automate database schema migrations. +- Perform health checks and monitor operation logs. +- Assist in debugging SQL performance issues. diff --git a/.agents/skills/firebase-basics/SKILL.md b/.agents/skills/firebase-basics/SKILL.md new file mode 100644 index 0000000..c7eaccc --- /dev/null +++ b/.agents/skills/firebase-basics/SKILL.md @@ -0,0 +1,103 @@ +--- +name: firebase-basics +description: Use this skill whenever you are working on a project that uses Firebase products or services, especially for mobile or web apps. +--- + +# Firebase Basics + +### **CRITICAL** Mandatory prerequisites + +Before attempting to fulfill any user request regarding Firebase, you **MUST** +follow these steps in order. Do not proceed to implementation until these are +completed. Failure to follow these instructions may result in using outdated +patterns and lack of access to essential reference materials. + +1. **Leverage the planning phase** + + If you use a planning mode (e.g., maintaining a `task.md` file), you + **MUST** include a task to check and install `firebase/agent-skills` as your + very first item. + +2. **Ensure NPM is installed.** + + ```bash + ## Check if NPM is installed + npm --version + ``` + + If NPM is not installed, guide the user to download and install the + LTS version from https://nodejs.org/en/download. + + Then, request the user to manually install Node.js and to reply with a + confirmation that the installation is finished. **Stop and wait** for the + user's confirmation. + +3. **Verify and install Agent Skills for Firebase** + + You **MUST** ensure the full set of skills is installed to avoid using + outdated patterns. Run this command: + + ```bash + npx -y skills add firebase/agent-skills -y + ``` + + *If the skills are already installed, this command will ensure they are up + to date. Do not skip this step.* + + +## Quick start + +After completing the mandatory prerequisites above, proceed with these steps to +set up your environment: + +1. **Log in to the Firebase CLI.** + + Run this command: + + ```bash + npx -y firebase-tools@latest login + ``` + + Then, ask the user to complete the login flow in the browser. + +2. **Set an active project for the CLI.** + + Most Firebase tasks require an active project context. Check the current + project for the Firebase CLI by running this command: + + ```bash + npx -y firebase-tools@latest use + ``` + + - If the command outputs `Active Project: `, you can proceed + with your task. + + - If the command does *not* output an active project, ask the user if they + have an existing Firebase project ID. + + - If yes: Set the ID as the active project and add a default alias by + running: + + ```bash + npx -y firebase-tools@latest use --add + ``` + + - If no: Create a new Firebase project by running: + + ```bash + npx -y firebase-tools@latest projects:create --display-name + ``` + +## Reference directory + +- [Firebase core concepts](references/core-concepts.md) +- [Firebase CLI usage](references/cli-usage.md) +- [Firebase client library usage](references/client-library-usage.md) +- [Firebase CLI and MCP server](references/mcp-usage.md) +- [Firebase IaC usage](references/iac-usage.md) +- [Firebase security-related features](references/iam-security.md) +- [Additional Published Skills](references/additional-skills.md) + +If you need product information that's not found in these references, check the +other skills for Firebase that you have installed, or use the `search_documents` +tool of the Developer Knowledge MCP server. \ No newline at end of file diff --git a/.agents/skills/firebase-basics/references/cli-usage.md b/.agents/skills/firebase-basics/references/cli-usage.md new file mode 100644 index 0000000..2e905cc --- /dev/null +++ b/.agents/skills/firebase-basics/references/cli-usage.md @@ -0,0 +1,31 @@ +# Firebase CLI usage + +The Firebase CLI (`firebase-tools`) is the primary tool for managing Firebase +projects and resources from the command line. + +**Use npx for Firebase CLI commands**: To ensure you always use the latest +version of the Firebase CLI, always run commands with +`npx -y firebase-tools@latest` instead of just `firebase`. (e.g., use +`npx -y firebase-tools@latest --version` instead of `firebase --version`). + +## Exploring commands + +The Firebase CLI documents itself. Use help commands to discover functionality. + +- **Global help**: List all available commands and categories: + + ```bash + npx -y firebase-tools@latest --help + ``` + +- **Command help**: Get detailed usage for a specific command: + + ```bash + npx -y firebase-tools@latest [command] --help + ``` + + ```bash + # Example: + npx -y firebase-tools@latest deploy --help + npx -y firebase-tools@latest firestore:indexes --help + ``` \ No newline at end of file diff --git a/.agents/skills/firebase-basics/references/client-library-usage.md b/.agents/skills/firebase-basics/references/client-library-usage.md new file mode 100644 index 0000000..2eba27b --- /dev/null +++ b/.agents/skills/firebase-basics/references/client-library-usage.md @@ -0,0 +1,45 @@ +# Firebase client library usage + +Firebase provides SDKs for both client-side application development and +server-side administrative tasks. + +For a full list of Firebase client libraries and links to their documentation +and GitHub repositories, see https://firebase.google.com/docs/libraries + +## Mobile and web client-side SDKs + +The Firebase client-side SDKs allow direct interaction with Firebase services +from a mobile or web app. These SDKs are available for iOS (Swift and +Objective-C), Android (Kotlin and Java), Web (JavaScript), Flutter (Dart), +Unity, and C++. + +- For **web apps**, Agent Skills for Firebase provide guides to get started + with the JavaScript client SDK. Install these skills by running: + + ```bash + npx -y skills add firebase/agent-skills -y + ``` + +- For **native iOS or Android mobile apps**, see the documentation to get + started: + + - **iOS**: https://firebase.google.com/docs/ios/setup.md.txt + - **Android**: https://firebase.google.com/docs/android/setup.md.txt + +- For **Flutter apps**, see the documentation to get started: + + - **Flutter**: https://firebase.google.com/docs/flutter/setup.md.txt + +- For **Unity and C++ mobile apps**, see the documentation to get started: + + - **Unity**: https://firebase.google.com/docs/unity/setup.md.txt + - **C++**: https://firebase.google.com/docs/cpp/setup.md.txt + +## Server-side Admin SDKs + +The Firebase Admin SDKs provide privileged access to Firebase services from a +server environment. These SDKs are available for Node.js, Java, Python, and Go. + +For details about Firebase Admin SDKs and getting started, see +https://firebase.google.com/docs/reference/admin.md.txt and +https://firebase.google.com/docs/admin/setup.md.txt diff --git a/.agents/skills/firebase-basics/references/core-concepts.md b/.agents/skills/firebase-basics/references/core-concepts.md new file mode 100644 index 0000000..32fe973 --- /dev/null +++ b/.agents/skills/firebase-basics/references/core-concepts.md @@ -0,0 +1,61 @@ +# Firebase core concepts + +Firebase is a platform of services for mobile and web applications. It offers +products for managed backend infrastructure (BaaS), building AI-powered +experiences in apps, DevOps, and end-user engagement. Most services are +integrated into apps using mobile and web client SDKs. + +## Key services + +Here are some popular Firebase products: + +- **Firebase Authentication**: Simplify end-user authentication and sign-in on a + secure, all-in-one identity platform. +- **Firestore**: Store and sync data using a secure, scalable NoSQL cloud + database with rich data models and queryability. +- **Firebase Data Connect**: Build and scale your apps using a fully-managed + PostgreSQL relational database service. +- **Cloud Storage for Firebase**: Store and serve unstructured content like + images, audio, video with a secure cloud-hosted solution. +- **Firebase App Hosting**: Deploy modern, full-stack web apps that require + server-side rendering and automated secret management, CI/CD, and CDN caching. +- **Firebase Hosting**: Deploy static and single-page web apps to a global CDN + with a single command. +- **Cloud Functions for Firebase**: Run backend code in response to events and + HTTPS requests without provisioning or managing a server. +- **Firebase AI Logic**: Build secure AI-powered experiences in mobile and web + apps using the Gemini API and without provisioning or managing a server. +- **Firebase Crashlytics**: Track, prioritize, and fix stability issues in + mobile apps. +- **Firebase Cloud Messaging (FCM)**: Send push notifications and messages to + end users. + +## Regional availability + +Firebase services are available globally, with several products supporting +specific regional configurations. + +- **Firestore**: Each instance can be provisioned in a different location; + supports multi-region (e.g., `nam5`) and regional (e.g., `us-east1`) + locations. +- **Cloud Storage for Firebase**: Each bucket can be provisioned in a different + location. +- **Firebase App Hosting**: Can be deployed to specific regions to minimize + latency for operations and end users. +- **Firebase Hosting**: Content is delivered via a global CDN. +- **Cloud Functions for Firebase**: Can be deployed to specific regions to + minimize latency for operations and end users. + +## Pricing + +Firebase offers two pricing plans: + +- **Spark (no-cost) pricing plan**: Projects don't need a billing account to + use only the no-cost Firebase services and to get started with generous + no-cost usage quota. +- **Blaze (pay-as-you-go) pricing plan**: Link a billing account to the project + to access more products and services and to get usage levels beyond the + no-cost usage quota. + +For up-to-date detailed pricing information, see the Firebase pricing +page: https://firebase.google.com/pricing \ No newline at end of file diff --git a/.agents/skills/firebase-basics/references/iac-usage.md b/.agents/skills/firebase-basics/references/iac-usage.md new file mode 100644 index 0000000..f762e1d --- /dev/null +++ b/.agents/skills/firebase-basics/references/iac-usage.md @@ -0,0 +1,40 @@ +# Firebase IaC usage + +Firebase resources can be provisioned using Infrastructure as Code (IaC) tools, +like Terraform. + +## Terraform configuration + +Use the `google` or `google-beta` providers to manage Firebase resources. + +### Example: Firebase project setup + +```hcl +resource "google_firebase_project" "default" { + provider = google-beta + project = "user-defined-project-id" +} + +resource "google_firebase_web_app" "default" { + provider = google-beta + project = google_firebase_project.default.project + display_name = "user-defined-display-name" +} +``` + +### Supported Terraform resources + +Here are some common Terraform resources for Firebase: + +- `google_firebase_project`: Enable Firebase services on an existing + Google Cloud project. +- `google_identity_platform_config`: Set up Firebase Authentication. +- `google_firestore_database`: Provision a Firestore database. + Always set `type = "FIRESTORE_NATIVE"`. +- `google_firebaserules_ruleset`: Define Firebase Security Rules to protect + Firestore data or Cloud Storage for Firebase data. +- `google_firebaserules_release`: Deploy Firebase Security Rules rulesets for + Firestore or for Cloud Storage for Firebase. + +For a complete list of Terraform resources, and details about Terraform and +Firebase, see: https://firebase.google.com/docs/projects/terraform/get-started \ No newline at end of file diff --git a/.agents/skills/firebase-basics/references/iam-security.md b/.agents/skills/firebase-basics/references/iam-security.md new file mode 100644 index 0000000..32ef654 --- /dev/null +++ b/.agents/skills/firebase-basics/references/iam-security.md @@ -0,0 +1,74 @@ +# Firebase security-related features + +Firebase offers several security-related features and services, including: + +- **Identity and Access Management (IAM)**: Restrict a project member's + administrative access for projects, resources, and data. +- **Firebase Security Rules**: Restrict client-side access for Firestore data + and Cloud Storage for Firebase data to only authorized users. +- **Firebase App Check**: Restrict client-side access for APIs and backend + resources to only an authentic client and an authentic, untampered device. + +## Identity and Access Management (IAM) + +Here are some common IAM roles: + +| Role | Description | +|---|---| +| `roles/viewer` | Permissions for read-only actions, such as viewing (but not modifying) existing resources or data. | +| `roles/editor` | All the `roles/viewer` permissions, plus permissions for actions that modify state, such as changing existing resources. | +| `roles/owner` | All the `roles/editor` permissions, plus permissions for the following actions: manage IAM for a project, manage all resources within the project, set up and manage billing for a project, and delete or restore a project. | +| `roles/firebase.viewer` | Read-only access to Firebase resources and data. | +| `roles/firebase.admin` | Full access to all Firebase products and project management. | + +For details about IAM and Firebase, see +https://firebase.google.com/docs/projects/iam/overview.md.txt + +## Firebase Security Rules + +Firebase Security Rules are CRITICAL to protecting Firestore data and +Cloud Storage for Firebase data from unauthorized mobile and web client-side +access. They are defined in the project directory (e.g., `firestore.rules`) +and deployed using the Firebase CLI. + +Here is a basic example of Security Rules for Firestore that restricts access +to authenticated end-users only: + +``` +service cloud.firestore { + match /databases/{database}/documents { + match /some_collection/{document} { + allow read, write: if request.auth != null; + } + } +} +``` + +**CRITICAL**: Agent Skills for Firebase provide tools to draft and test Firebase +Security Rules. Install these skills by running: + +```bash +npx -y skills add firebase/agent-skills -y +``` + +## Firebase App Check + +Firebase App Check is CRITICAL to protecting a project's enabled APIs and +backend resources from unauthorized clients and devices. For example, it can +help protect Firebase AI Logic, Firestore, Cloud Storage for Firebase, +Cloud Functions for Firebase, and Firebase Data Connect. + +For details about Firebase App Check, see +https://firebase.google.com/docs/app-check.md.txt + +## Security best practices + +- **Principle of least privilege:** Assign specific product-level roles instead + of `roles/owner` whenever possible. +- **Firebase App Check:** Use this service to protect a project's enabled APIs + and backend resources from abuse by allowing only authentic clients and + devices to access them. +- **Environment management:** Use separate Firebase projects for development, + staging, and production. +- **Sensitive operations:** Always have a human user approve sensitive + operations like granting permissive IAM roles or deleting a database. \ No newline at end of file diff --git a/.agents/skills/firebase-basics/references/mcp-usage.md b/.agents/skills/firebase-basics/references/mcp-usage.md new file mode 100644 index 0000000..4fb9a5f --- /dev/null +++ b/.agents/skills/firebase-basics/references/mcp-usage.md @@ -0,0 +1,63 @@ +# Firebase CLI and MCP server + +The Firebase CLI includes a built-in local MCP server that can help with common +tasks. + +1. **Locate MCP configuration** + + Find the configuration file for your agent + (e.g., `~/.codeium/windsurf/mcp_config.json`, `cline_mcp_settings.json`, or + `claude_desktop_config.json`). + + *Note: If the document or its containing directory does not exist, create + them and initialize the file with `{ "mcpServers": {} }` before proceeding.* + +2. **Check existing configuration** + + Open the configuration file and check the `mcpServers` section for a + `firebase` entry. + + - Firebase is already configured if the `command` is `"firebase"` OR if the + `command` is `"npx"` with `"firebase-tools"` and `"mcp"` in the `args`. + + - **Important**: If a valid `firebase` entry is found, the MCP server is + already configured. **Skip step 3** and proceed directly to step 4. + + **Example valid configurations**: + ```json + "firebase": { + "command": "npx", + "args": ["-y", "firebase-tools@latest", "mcp"] + } + ``` + OR + ```json + "firebase": { + "command": "firebase", + "args": ["mcp"] + } + ``` + +3. **Add or update configuration** + + If the `firebase` entry is missing or incorrect, add it to the `mcpServers` + object: + + ```json + "firebase": { + "command": "npx", + "args": [ + "-y", + "firebase-tools@latest", + "mcp" + ] + } + ``` + + *CRITICAL: Merge this configuration into the existing file. You MUST + preserve any other existing servers inside the `mcpServers` object.* + +4. **Verify configuration** + + Save the file and confirm the `firebase` block is present and is properly + formatted JSON. \ No newline at end of file diff --git a/.agents/skills/gcloud/SKILL.md b/.agents/skills/gcloud/SKILL.md new file mode 100644 index 0000000..1e6ef89 --- /dev/null +++ b/.agents/skills/gcloud/SKILL.md @@ -0,0 +1,232 @@ +--- +name: gcloud +description: >- + Interacts with Google Cloud services using the gcloud CLI safely and + efficiently. Covers command validation, data reduction, safety guardrails with + a denylist, and workflows for discovery and investigation. You MUST read this + skill before invoking any gcloud command. Use when managing cloud resources, + querying configurations, or troubleshooting issues via gcloud. Don't use when + writing or debugging Google Cloud client library code or raw REST/gRPC API + interactions. +--- + +# gcloud CLI Skill for AI Agents + +This document provides essential guidelines and best practices for AI agents +interacting with the Google Cloud SDK (`gcloud` CLI). Following these rules is +critical to avoid hallucinated commands, flags, flag values, and positional +argument syntax, prevent destructive actions, and minimize context window usage. + +## Getting Started + +### 1. Installation + +If the `gcloud` executable is missing, refer to the official +[Google Cloud CLI Installation Guide](https://docs.cloud.google.com/sdk/docs/install-sdk) +to install it on your platform (Linux, macOS, Windows, etc.). + +### 2. Authorization + +Authenticate the CLI with Google Cloud. Choose the flow that matches your +running environment: + +* **User Account (Interactive)**: Run `gcloud auth login`. Follow the browser + prompts to sign in. +* **User Account (Headless Flow)**: If operating on a terminal without a web + browser (e.g. containers, remote SSH), append the `--no-browser` flag: + `gcloud auth login --no-browser`. Copy the URL, sign in on another machine, + and return the authentication code. +* **Application Default Credentials (ADC)**: To authenticate code calls from + local applications or SDK libraries, set up ADC via `gcloud auth + application-default login` (append `--no-browser` for headless + environments). +* **Service Account (Best for Detached/Headless Automation)**: Authenticate + directly using a JSON key file. Ideal for fully automated, background tasks + and pipelines: `gcloud auth activate-service-account + --key-file=path/to/key.json`. Note that some organizations may restrict + access to JSON key files for security reasons. +* **Service Account Impersonation (Preferred for Local Pair-Programming + Agents)**: Leverage the human developer's existing user credentials to + assume a service account identity. Best for local development assistants to + avoid insecure private keys on human workstations: `gcloud config set + auth/impersonate_service_account SERVICE_ACCT_EMAIL` + +*Separation of Privilege (Critical)*: Both service account approaches ensure the +agent's permissions remain strictly distinct from the human user's wide access +limits (enforcing least privilege), and ensure actions are properly audited +under the agent's focused identity. *(Impersonation requires +`roles/iam.serviceAccountTokenCreator`)*. + +For more detailed strategies and authentication types (such as Workload Identity +Federation), see +[Authorizing the gcloud CLI](https://docs.cloud.google.com/sdk/docs/authorizing). + +## Core Principles + +### 1. Explicit Command Validation (Mandatory) + +Your internal knowledge of `gcloud` may be stale or prone to hallucination +(e.g., hallucinating commands, flags, flag values, or positional argument +syntax). You are **FORBIDDEN** from executing commands until you have validated +the exact syntax at the leaf level. + +* **Action**: Always call `gcloud help ` for the *exact* command you + intend to run (e.g., `gcloud help compute instances create`). +* **Verify**: Ensure the command, flags, flag values, and positional argument + syntax are valid for that specific leaf command before attempting execution. + Validation is not transitive from parent groups. + +### 2. Data Reduction Strategies + +To save context window space and reduce latency, always minimize the volume of +data returned by `gcloud`. + +* **Projection**: Use `--format=json(key1, key2, ...)` to select only the + specific fields needed for your task. To understand the advanced projection + and formatting syntax, refer to `gcloud topic projections` and `gcloud topic + formats`. + +* **Limiting**: Use `--limit=N` to cap the number of resources returned. + +* **Filtering**: Use `--filter` to narrow down results server-side. Prioritize + `:` for pattern matching and never quote the right side of the colon. Treat + the entire filter flag as a singular string without quoting or escaping + characters. To study the filter expression syntax, refer to `gcloud topic + filters`. + +* **Schema Discovery**: Unconstrained resource lists can quickly exhaust your + context window with redundant data. To prevent this, discover a resource's + schema before executing queries. If you are unsure of the JSON key path for + projecting fields (`--format`) or filtering (`--filter`), run the targeted + resource's list command (if supported) with a single-item limit: + + ```bash + gcloud list --limit=1 --format=json + ``` + + Examine this single instance's JSON structure to safely identify the correct + schema keys before requesting full or filtered datasets. + +### 3. Execution Constraints + +* **Single Commands**: Execute a single `gcloud` command at a time. No command + chaining or sequencing. +* **No Shell Operators**: Do not use command substitution (`$(...)`), pipes + (`|`), or redirection (`>`, `>>`, `<`). This is to increase command safety + and ensure commands are more easily understandable and reviewable by users. +* **No Interactivity**: Do not run interactive commands or commands requiring + a TTY (e.g., `gcloud interactive`). You must enforce non-interactive mode by + appending `--quiet` (or `-q`) to your commands. This ensures that defaults + are used or errors are raised if input is required. + +### 4. Project and Location Scoping (Critical) + +To ensure commands are deterministic, non-interactive, and target the correct +environment, you must explicitly manage project and location scoping. + +* **Explicit Project Target**: Do not rely on active configuration defaults. + Always append `--project=` to all resource-manipulating and + querying commands (unless running pure local config commands). This avoids + accidental execution against the wrong project. + +* **Prevent Location Prompts**: Many Google Cloud resources are regional or + zonal. If you omit the location flag (e.g., `--region`, `--zone`, or + `--location`), `gcloud` will trigger an interactive prompt to select a + zone/region. This violates the **No Interactivity** rule. Always provide + explicit location flags if the command requires them. + +* **Location Discovery**: If you do not know the correct region, zone, or + location for a service, run discovery commands first (remembering to limit + results if there are many): + + * **Compute Engine (VMs, Networks)**: + + * `gcloud compute regions list --project=` + * `gcloud compute zones list --project=` + + * **Other Services (Standard API Style)**: Many GCP services utilize a + unified `locations list` command: + + * `gcloud locations list --project=` + * *Examples*: `gcloud artifacts locations list`, `gcloud kms locations + list`, `gcloud secrets locations list`. + +## Safety & Guardrails + +> [!CAUTION] **Destructive actions (delete, update, remove) MUST be explicitly +> authorized by the user.** Never invoke them autonomously unless explicitly +> instructed to do so in the context of a safe, pre-approved workflow. + +### Prohibited Operations (Denylist) + +You are **strictly prohibited** from executing the following commands +autonomously. These require explicit human-in-the-loop authorization: + +* **Any IAM policy, role, or binding modification** (Security): Risk of + privilege escalation, administrative lockout, service disruption, or + unauthorized data exposure. +* **No Proactive API Enabling**: Assume necessary APIs are enabled. To prevent + unexpected resource provisioning or billing charges, do not proactively try + to enable APIs. User approval is required to enable any API. +* **`gcloud * delete`** (Destructive): Irreversible resource destruction + (e.g., project deletion) or data wiping. +* **`gcloud billing *`** (Financial): Risk of service disruption or unbounded + costs. +* **`gcloud organizations *`** (Governance): Org-level changes affect security + posture for all users. +* **`gcloud kms *`** (Encryption): Risk of permanently locking data. +* **`gcloud infra-manager deployments apply`** (Destructive): Autonomous IaC + execution can destroy managed resources. + +### Execution Guidelines + +* **Dry Run (Mandatory)**: You MUST invoke a command with `--dry-run` (or + equivalent) first if it exists, before executing the actual command, to + preview changes. + +* **Long Running Operations**: For commands that support it, the `--async` + flag is highly recommended for long-running operations to avoid blocking the + agentic flow. Note that not every command has an `--async` flag. For + commands that return an operation ID (whether via `--async` or by default), + you are responsible for polling for completion if the operation status is + needed for the next step. + +## Structured Workflows + +### Discovery Workflow + +When asked to perform a task on a service you are not familiar with: + +1. You MUST invoke help on a command (e.g., `gcloud help `) before + invoking it. +2. If you do not know the exact command, traverse the command tree by invoking + help on a command group (e.g., `gcloud help compute`) to discover available + subcommands and groups. +3. **Schema Discovery**: If you need to filter or project fields from a list + command, but do not know the exact JSON keys, first run `gcloud + list --limit=1 --format=json` to safely discover the schema. + **Never** run a raw `list` command without scoping constraints (like + `--limit=1`), as unconstrained results will pollute and exhaust your context + window. +4. Execute with data reduction flags. + +## Quick Reference / Cheat Sheet + +Task | Command Template +------------------ | ---------------------------------------------------------- +Discover Schema | `gcloud list --limit=1 --format=json` +Filtered List | `gcloud list --filter="status:RUNNING"` +Specific Columns | `gcloud list --format="json(name, id)"` +Learn Filters | `gcloud topic filters` +Learn Formats | `gcloud topic formats` +Learn Projections | `gcloud topic projections` +Asynchronous Op | `gcloud --async` +Check Operation | `gcloud operations describe ` +Common commands | `gcloud cheat-sheet` +List Regions (GCE) | `gcloud compute regions list --project=` +List Zones (GCE) | `gcloud compute zones list --project=` +List Locations | `gcloud locations list --project=` + +Refer to the +[gcloud CLI Scripting Guide](https://docs.cloud.google.com/sdk/docs/scripting-gcloud) +for guidance on using the gcloud CLI in automation. diff --git a/.agents/skills/gemini-agents-api/SKILL.md b/.agents/skills/gemini-agents-api/SKILL.md new file mode 100644 index 0000000..42a280b --- /dev/null +++ b/.agents/skills/gemini-agents-api/SKILL.md @@ -0,0 +1,331 @@ +--- +name: gemini-agents-api +description: Manages custom Agent resources on Gemini Enterprise Agent Platform. Use when the user wants to programmatically create, configure, list, update, or delete stateful, server-managed Agent resources (including mounting files, skills, and tools) before executing conversations. +--- + +# Gemini Enterprise Agent Platform - Managed Agents API Skill + +This skill provides complete instructions, REST request endpoints, and JSON payload structures to programmatically manage **custom Agent resources** on the Gemini Enterprise Agent Platform (Agent Platform). + +The **Managed Agents API** forms the **Control Plane** of the platform. It allows developers to provision, retrieve, update, and delete tailored, stateful agent containers equipped with system instructions, sandboxed files, custom skill registries, and local/remote tools. +--- + +## 1. Authentication & Setup + +All REST requests to the Control Plane must include a Bearer token derived from Application Default Credentials (ADC), and target the production global endpoint. + +### 1. Setup Environment Variables + +Before running requests, set up the required project variables and access token: + +```bash +export PROJECT_ID="your-project-id" +export LOCATION="global" +export ACCESS_TOKEN=$(gcloud auth print-access-token) +``` + +> [!IMPORTANT] +> **API Location Support**: +> The `LOCATION` environment variable must be set to a regional location where the Gemini Enterprise Agent Platform's **Managed Agents API** is actively supported (e.g., `global`, or other available regional endpoints). + + +### 2. Endpoint URL + +The production Agents Control Plane endpoint is: +```http +https://aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/agents +``` + +--- + +## 2. Programmatic Agent Management (Control Plane CRUD) + +### 1. Create Agent (Long-Running Operation) + +To create a new agent resource, issue a `POST` request with the custom configuration. You can mount remote files, folders, or skills directly from **Google Cloud Storage** buckets into the agent container's workspace. Creating an agent is a Long-Running Operation (LRO) that spawns an asynchronous job. + +* **Method**: `POST` +* **Endpoint**: `https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/${LOCATION}/agents` + +#### Request Payload + +```bash +curl -X POST "https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/${LOCATION}/agents" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Content-Type: application/json; charset=utf-8" \ + -d '{ + "id": "my-custom-agent", + "base_agent": "antigravity-preview-05-2026", + "description": "A professional agent configured with remote tools and mounted Cloud Storage directories.", + "system_instruction": "You are a helpful, domain-expert assistant.", + "tools": [ + {"type": "code_execution"}, + {"type": "filesystem"}, + {"type": "google_search"}, + {"type": "url_context"} + ], + "base_environment": { + "type": "remote", + "sources": [ + { + "type": "gcs", + "source": "gs://your-agent-bucket-name/skills", + "target": "/.agent/skills" + } + ], + "network": { + "allowlist": [ + { "domain": "*" } + ] + } + } + }' +``` + +#### LRO Operations Response + +Since agent provisioning takes a few moments, the endpoint immediately returns an operation tracking object: +```json +{ + "name": "projects/1234567890/locations/global/operations/operation-987654321-abcde", + "metadata": { + "@type": "type.googleapis.com/google.cloud.aiplatform.v1beta1.CreateAgentOperationMetadata", + "genericMetadata": { + "createTime": "2026-05-14T19:00:00.123456Z", + "updateTime": "2026-05-14T19:00:01.654321Z" + } + } +} +``` + +#### [Advanced] Mount Skill Registry Resources + +To mount skills directly from the Skill Registry service instead of Cloud Storage, replace the Cloud Storage source item in the payload: +```json +"sources": [ + { + "type": "skill_registry", + "source": "projects/your-project-id/locations/global/skills/my-math-skill/revisions/123456789012", + "target": "/.agent/skills" + } +] +``` + +#### [Advanced] Configuring Model Context Protocol (MCP) Servers + +To configure Third-Party MCP servers for an agent, add the server metadata directly under the `"tools"` parameter array inside the creation request. The platform securely routes tool execution requests to the external MCP server. + +```json +"tools": [ + { + "type": "mcp", + "name": "my-mcp-server", + "url": "https://mcp.yourcompany.com/api", + "headers": { + "Authorization": "Bearer YOUR_MCP_AUTH_TOKEN" + } + } +] +``` +* **name**: A descriptive name for the MCP server. +* **url**: The endpoint URL of the external MCP server. +* **headers**: (Optional) Custom key-value pairs containing authentication tokens (e.g. API keys, bearer tokens) required to call the server. The platform guarantees that these headers are only sent to the specified MCP server URL. + +> [!TIP] +> **Overriding MCP at Interaction Time (Data Plane)**: +> You can dynamically override or supply MCP tools directly when creating a conversation interaction (Data Plane) by passing `"type": "mcp_server"` inside the `"tools"` payload of `interactions.create`. Refer to the Interactions API documentation for details. + +--- + +### 2. Polling the LRO Status + +To track the status of agent creation and obtain the final ready resource, poll the operation URL returned in the `name` field of the creation response. + +* **Method**: `GET` +* **Endpoint**: `https://aiplatform.googleapis.com/v1beta1/{OPERATION_NAME}` + +```bash +curl -X GET "https://aiplatform.googleapis.com/v1beta1/projects/1234567890/locations/global/operations/operation-987654321-abcde" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Content-Type: application/json" +``` + +#### In-Progress Response + +```json +{ + "name": "projects/1234567890/locations/global/operations/operation-987654321-abcde", + "metadata": { ... } +} +``` + +#### Finished Success Response + +Once the container is ready, `"done": true` is set, and the completed `Agent` resource description resides inside `"response"`: +```json +{ + "name": "projects/1234567890/locations/global/operations/operation-987654321-abcde", + "done": true, + "response": { + "@type": "type.googleapis.com/google.cloud.aiplatform.v1beta1.Agent", + "name": "projects/your-project-id/locations/global/agents/my-custom-agent", + "base_agent": "antigravity-preview-05-2026", + "description": "A professional agent configured with remote tools and mounted Cloud Storage directories.", + "system_instruction": "You are a helpful, domain-expert assistant." + } +} +``` + +--- + +### 3. Get Agent + +Retrieve the configuration metadata, tools, and environment setup of an existing custom agent. + +* **Method**: `GET` +* **Endpoint**: `https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/${LOCATION}/agents/{AGENT_ID}` + +```bash +curl -X GET "https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/global/agents/my-custom-agent" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Content-Type: application/json" +``` + +#### Response Example +Returns the complete configured state of the custom Agent resource: +```json +{ + "name": "projects/your-project-id/locations/global/agents/my-custom-agent", + "base_agent": "antigravity-preview-05-2026", + "description": "A professional agent configured with remote tools and mounted Cloud Storage directories.", + "system_instruction": "You are a helpful, domain-expert assistant.", + "tools": [ + {"type": "code_execution"}, + {"type": "filesystem"}, + {"type": "google_search"}, + {"type": "url_context"} + ], + "base_environment": { + "type": "remote", + "sources": [ + { + "type": "gcs", + "source": "gs://your-agent-bucket-name/skills", + "target": "/.agent/skills" + } + ], + "network": { + "allowlist": [ + { "domain": "*" } + ] + } + } +} +``` + +--- + +### 4. List Agents + +Retrieve a list of all configured custom agents located under the target Google Cloud project. + +* **Method**: `GET` +* **Endpoint**: `https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/${LOCATION}/agents` + +```bash +curl -X GET "https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/global/agents" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Content-Type: application/json" +``` + +#### Response Example +Returns a JSON list of all configured custom Agents under the target project: +```json +{ + "agents": [ + { + "name": "projects/your-project-id/locations/global/agents/my-custom-agent", + "base_agent": "antigravity-preview-05-2026", + "description": "A professional agent configured with remote tools and mounted Cloud Storage directories.", + "system_instruction": "You are a helpful, domain-expert assistant." + }, + { + "name": "projects/your-project-id/locations/global/agents/my-telecom-agent", + "base_agent": "antigravity-preview-05-2026", + "description": "A highly specialized telecom support agent.", + "system_instruction": "You are a professional telecom support agent. Follow system policies carefully." + } + ] +} +``` + +--- + +### 5. Update Agent (Patching Configuration) + +Modify configuration fields (such as instructions, descriptions, tools, or mounts) on a custom agent resource in place. You **must** specify the fields being updated using the `update_mask` query parameter. + +* **Method**: `PATCH` +* **Endpoint**: `https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/${LOCATION}/agents/{AGENT_ID}?update_mask=system_instruction` + +```bash +curl -X PATCH "https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/global/agents/my-custom-agent?update_mask=system_instruction" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "my-custom-agent", + "system_instruction": "You are a highly specialized telecom support agent. Follow system policies carefully." + }' +``` + +--- + +### 6. Delete Agent + +Delete custom Agent resources when they are no longer needed to free up backend workspace containers. + +* **Method**: `DELETE` +* **Endpoint**: `https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/${LOCATION}/agents/{AGENT_ID}` + +```bash +curl -X DELETE "https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/global/agents/my-custom-agent" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" +``` + +#### Response Example +A successful deletion request returns an empty JSON response body with HTTP Status `200 OK`: +```json +{} +``` + +--- + +## 3. Interacting with Custom Agents (Data Plane) + +Once you have programmatically created and provisioned your custom stateful agent using the **Control Plane** (this skill), you can execute multi-turn chat, tool execution, and streaming conversations with it using the **Data Plane** (**Interactions API**). + +To interact with your custom agent: +1. Obtain your agent's resource path name (e.g., `projects/{PROJECT_ID}/locations/global/agents/{AGENT_ID}`). +2. Pass this resource path directly inside your data plane conversation requests under the **`agent`** parameter. + +#### Python Example +```python +interaction = client.interactions.create( + agent="projects/your-project-id/locations/global/agents/my-custom-agent", + input="Hello! Who are you?" +) +``` + +#### REST / curl Example +```json +{ + "agent": "projects/your-project-id/locations/global/agents/my-custom-agent", + "input": [{ + "role": "user", + "content": [{"type": "text", "text": "Hello! Who are you?"}] + }] +} +``` + +Refer to the **`gemini-interactions-api`** skill guide (`../gemini-interactions-api/SKILL.md`) for full instructions, Python and TS/JS code blocks, and streaming setups to run conversations with your provisioned agents. + diff --git a/.agents/skills/gemini-api/SKILL.md b/.agents/skills/gemini-api/SKILL.md new file mode 100644 index 0000000..6862f81 --- /dev/null +++ b/.agents/skills/gemini-api/SKILL.md @@ -0,0 +1,237 @@ +--- +name: gemini-api +description: Use when the user asks about using Gemini in an enterprise environment or explicitly mentions Vertex AI, Google Cloud, or Agent Platform. Guides the usage of the Gemini API on Agent Platform with the Google Gen AI SDK. Covers SDK usage (Python, JS/TS, Go, Java, C#), capabilities like multimodal inputs, tools, media generation, caching, batch prediction, and Live API. +compatibility: Requires active Google Cloud credentials and Agent Platform API enabled. +--- + +IMPORTANT: Agent Platform (full name Gemini Enterprise Agent Platform) was previously named "Vertex AI" and many web resources use the legacy branding. + +# Gemini API in Agent Platform + +Access Google's most advanced AI models built for enterprise use cases using the Gemini API in Agent Platform. + +Provide these key capabilities: + +- **Text generation** - Chat, completion, summarization +- **Multimodal understanding** - Process images, audio, video, and documents +- **Function calling** - Let the model invoke your functions +- **Structured output** - Generate valid JSON matching your schema +- **Context caching** - Cache large contexts for efficiency +- **Embeddings** - Generate text embeddings for semantic search +- **Live Realtime API** - Bidirectional streaming for low latency Voice and Video interactions +- **Batch Prediction** - Handle massive async dataset prediction workloads + +## Core Directives + +- **Unified SDK**: ALWAYS use the Gen AI SDK (`google-genai` for Python, `@google/genai` for JS/TS, `google.golang.org/genai` for Go, `com.google.genai:google-genai` for Java, `Google.GenAI` for C#). +- **Legacy SDKs**: DO NOT use `google-cloud-aiplatform`, `@google-cloud/vertexai`, or `google-generativeai`. + +## SDKs + +- **Python**: Install `google-genai` with `pip install google-genai` +- **JavaScript/TypeScript**: Install `@google/genai` with `npm install @google/genai` +- **Go**: Install `google.golang.org/genai` with `go get google.golang.org/genai` +- **C#/.NET**: Install `Google.GenAI` with `dotnet add package Google.GenAI` +- **Java**: + - groupId: `com.google.genai`, artifactId: `google-genai` + - Latest version can be found here: https://central.sonatype.com/artifact/com.google.genai/google-genai/versions (let's call it `LAST_VERSION`) + - Install in `build.gradle`: + + ``` + implementation("com.google.genai:google-genai:${LAST_VERSION}") + ``` + + - Install Maven dependency in `pom.xml`: + + ```xml + + com.google.genai + google-genai + ${LAST_VERSION} + + ``` + +> [!WARNING] +> Legacy SDKs like `google-cloud-aiplatform`, `@google-cloud/vertexai`, and `google-generativeai` are deprecated. Migrate to the new SDKs above urgently by following the [Migration Guide](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/deprecations/genai-vertexai-sdk). + +## Authentication & Configuration + +Prefer environment variables over hard-coding parameters when creating the client. Initialize the client without parameters to automatically pick up these values. + +### Application Default Credentials (ADC) +Set these variables for standard [Google Cloud authentication](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/start/gcp-auth): +```bash +export GOOGLE_CLOUD_PROJECT='your-project-id' +export GOOGLE_CLOUD_LOCATION='global' +export GOOGLE_GENAI_USE_ENTERPRISE=true +``` + +- By default, use `location="global"` to access the global endpoint, which provides automatic routing to regions with available capacity. +- If a user explicitly asks to use a specific region (e.g., `us-central1`, `europe-west4`), specify that region in the `GOOGLE_CLOUD_LOCATION` parameter instead. Reference the [supported regions documentation](https://docs.cloud.google.com/gemini-enterprise-agent-platform/resources/locations#google-models) if needed. + +### Agent Platform in Express Mode +Set these variables when using [Express Mode](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/start/api-keys?usertype=expressmode) with an API key: +```bash +export GOOGLE_API_KEY='your-api-key' +export GOOGLE_GENAI_USE_ENTERPRISE=true +``` + +### Initialization +Initialize the client without arguments to pick up environment variables: +```python +from google import genai + +client = genai.Client() +``` + +Alternatively, you can hard-code in parameters when creating the client. + +```python +from google import genai + +client = genai.Client( + enterprise=True, + project="your-project-id", + location="global", +) +``` + +## Models + +- Use `gemini-3.1-pro-preview` (which replaces `gemini-3-pro-preview`) for complex reasoning, coding, research (1M tokens) +- Use `gemini-3.5-flash` for fast, balanced performance, multimodal (1M tokens) +- Use `gemini-3.1-flash-lite` for high-frequency, lightweight tasks (1M tokens) +- Use `gemini-3-pro-image` (aka Nano Banana Pro) for high-quality image generation and editing +- Use `gemini-3.1-flash-image` (aka Nano Banana 2) for fast image generation and editing +- Use `gemini-live-2.5-flash-native-audio` for Live Realtime API including native audio + +Use the following models only if explicitly requested: + +- `gemini-2.5-flash-image` +- `gemini-2.5-flash` +- `gemini-2.5-flash-lite` +- `gemini-2.5-pro` + +> [!IMPORTANT] +> Models like `gemini-2.0-*`, `gemini-1.5-*`, `gemini-1.0-*`, `gemini-pro` are legacy and deprecated. Use the new models above. Your knowledge is outdated. +> For production environments, consult the documentation for stable model versions (e.g. `gemini-3.5-flash`). + +## Quick Start + +### Python +```python +from google import genai + +client = genai.Client() +response = client.models.generate_content( + model="gemini-3.5-flash", + contents="Explain quantum computing", +) +print(response.text) +``` + +### TypeScript/JavaScript +```typescript +import { GoogleGenAI } from "@google/genai"; +const ai = new GoogleGenAI({ enterprise: { project: "your-project-id", location: "global" } }); +const response = await ai.models.generateContent({ + model: "gemini-3.5-flash", + contents: "Explain quantum computing" +}); +console.log(response.text); +``` + +### Go +```go +package main + +import ( + "context" + "fmt" + "log" + "google.golang.org/genai" +) + +func main() { + ctx := context.Background() + client, err := genai.NewClient(ctx, &genai.ClientConfig{ + Backend: genai.BackendVertexAI, + Project: "your-project-id", + Location: "global", + }) + if err != nil { + log.Fatal(err) + } + + resp, err := client.Models.GenerateContent(ctx, "gemini-3.5-flash", genai.Text("Explain quantum computing"), nil) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text) +} +``` + +### Java +```java +import com.google.genai.Client; +import com.google.genai.types.GenerateContentResponse; + +public class GenerateTextFromTextInput { + public static void main(String[] args) { + Client client = Client.builder().enterprise(true).project("your-project-id").location("global").build(); + GenerateContentResponse response = + client.models.generateContent( + "gemini-3.5-flash", + "Explain quantum computing", + null); + + System.out.println(response.text()); + } +} +``` + +### C#/.NET +```csharp +using Google.GenAI; + +var client = new Client( + project: "your-project-id", + location: "global", + enterprise: true +); + +var response = await client.Models.GenerateContent( + "gemini-3.5-flash", + "Explain quantum computing" +); + +Console.WriteLine(response.Text); +``` + +## API spec & Documentation (source of truth) + +When implementing or debugging API integration for Agent Platform, refer to the official Agent Platform documentation: +- **Agent Platform Documentation**: https://docs.cloud.google.com/gemini-enterprise-agent-platform/overview.md.txt +- **REST API Reference**: https://docs.cloud.google.com/gemini-enterprise-agent-platform/reference/rest + +The Gen AI SDK on Agent Platform uses the `v1beta1` or `v1` REST API endpoints (e.g., `https://{LOCATION}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT}/locations/{LOCATION}/publishers/google/models/{MODEL}:generateContent`). + +> [!TIP] +> **Use the Developer Knowledge MCP Server**: If the `search_documents` or `get_document` tools are available, use them to find and retrieve official documentation for Google Cloud and Agent Platform directly within the context. This is the preferred method for getting up-to-date API details and code snippets. + +## Workflows and Code Samples + +Reference the [Python Docs Samples repository](https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/genai) for additional code samples and specific usage scenarios. + +Depending on the specific user request, refer to the following reference files for detailed code samples and usage patterns (Python examples): + +- **Text & Multimodal**: Chat, Multimodal inputs (Image, Video, Audio), and Streaming. See [references/text_and_multimodal.md](references/text_and_multimodal.md) +- **Embeddings**: Generate text embeddings for semantic search. See [references/embeddings.md](references/embeddings.md) +- **Structured Output & Tools**: JSON generation, Function Calling, Search Grounding, and Code Execution. See [references/structured_and_tools.md](references/structured_and_tools.md) +- **Media Generation**: Image generation, Image editing, and Video generation. See [references/media_generation.md](references/media_generation.md) +- **Bounding Box Detection**: Object detection and localization within images and video. See [references/bounding_box.md](references/bounding_box.md) +- **Live API**: Real-time bidirectional streaming for voice, vision, and text. See [references/live_api.md](references/live_api.md) +- **Advanced Features**: Content Caching, Batch Prediction, and Thinking/Reasoning. See [references/advanced_features.md](references/advanced_features.md) +- **Safety**: Adjusting Responsible AI filters and thresholds. See [references/safety.md](references/safety.md) +- **Model Tuning**: Supervised Fine-Tuning and Preference Tuning. See [references/model_tuning.md](references/model_tuning.md) diff --git a/.agents/skills/gemini-api/references/advanced_features.md b/.agents/skills/gemini-api/references/advanced_features.md new file mode 100644 index 0000000..bc776a1 --- /dev/null +++ b/.agents/skills/gemini-api/references/advanced_features.md @@ -0,0 +1,149 @@ +# Advanced Features + +## Content Caching +Cache large documents or contexts to reduce cost and latency. + +Only use explicit caching if asked directly. Implicit caching is enabled by default and automatically provides cost savings when cache hits occur. + +```python +from google import genai +from google.genai import types + +client = genai.Client() + +content_cache = client.caches.create( + model="gemini-3.5-flash", + config=types.CreateCachedContentConfig( + contents=[ + types.Content( + role="user", + parts=[ + types.Part.from_uri( + file_uri="gs://your-bucket/large.pdf", + mime_type="application/pdf", + ) + ], + ) + ], + system_instruction="You are an expert researcher.", + display_name="example-cache", + ttl="86400s", + ), +) + +# Use the cache +response = client.models.generate_content( + model="gemini-3.5-flash", + contents="Summarize the pdf", + config=types.GenerateContentConfig(cached_content=content_cache.name), +) +``` + +## Batch Prediction +For processing large datasets asynchronously. + +```python +import time +from google import genai +from google.genai import types + +client = genai.Client() + +job = client.batches.create( + model="gemini-3.5-flash", + src="gs://your-bucket/prompts.jsonl", + config=types.CreateBatchJobConfig(dest="gs://your-bucket/outputs"), +) + +completed_states = { + types.JobState.JOB_STATE_SUCCEEDED, + types.JobState.JOB_STATE_FAILED, + types.JobState.JOB_STATE_CANCELLED, +} +while job.state not in completed_states: + time.sleep(30) + job = client.batches.get(name=job.name) +``` + +### Thinking (Reasoning) + +Thinking is on by default for `gemini-3.1-pro-preview` (default `HIGH` / dynamic) and `gemini-3.5-flash` (default `MEDIUM`). `gemini-3.1-flash-lite` defaults to `MINIMAL`. +It can be adjusted by using the `thinking_level` parameter. + +- **`MINIMAL`:** Constrains the model to use as few tokens as possible for thinking and is best used for low-complexity tasks that wouldn't benefit from extensive reasoning. (Not supported for `gemini-3.1-pro-preview`) +- **`LOW`**: Constrains the model to use fewer tokens for thinking and is suitable for simpler tasks where extensive reasoning is not required. +- **`MEDIUM`**: Offers a balanced approach suitable for tasks of moderate complexity that benefit from reasoning but don't require deep, multi-step planning. +- **`HIGH`**: Maximizes reasoning depth. The model may take significantly longer to reach a first token, but the output will be more thoroughly vetted. + +```python +from google import genai +from google.genai import types + +client = genai.Client() +response = client.models.generate_content( + model="gemini-3.1-pro-preview", + contents="solve x^2 + 4x + 4 = 0", + config=types.GenerateContentConfig( + thinking_config=types.ThinkingConfig( + thinking_level=types.ThinkingLevel.HIGH, + ) + ), +) + +# Access thoughts if returned +for part in response.candidates[0].content.parts: + if part.thought: + print(f"Thought: {part.text}") + else: + print(f"Final Answer: {part.text}") +``` + +## Model Context Protocol (MCP) support (experimental) + +Built-in [MCP](https://modelcontextprotocol.io/introduction) support is an experimental feature. You can pass a local MCP server as a tool directly. + +```python +import os +import asyncio +from datetime import datetime +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client + +from google import genai +from google.genai import types + +client = genai.Client() + +# Create server parameters for stdio connection +server_params = StdioServerParameters( + command="npx", # Executable + args=["-y", "@philschmid/weather-mcp"], # MCP Server + env=None, # Optional environment variables +) + + +async def run(): + async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + # Prompt to get the weather for the current day in London. + prompt = f"What is the weather in London in {datetime.now().strftime('%Y-%m-%d')}?" + + # Initialize the connection between client and server + await session.initialize() + + # Send request to the model with MCP function declarations + response = await client.aio.models.generate_content( + model="gemini-3.5-flash", + contents=prompt, + config=types.GenerateContentConfig( + tools=[ + session # uses the session, will automatically call the tool using automatic function calling + ], + ), + ) + print(response.text) + + +# Start the asyncio event loop and run the main function +asyncio.run(run()) +``` diff --git a/.agents/skills/gemini-api/references/bounding_box.md b/.agents/skills/gemini-api/references/bounding_box.md new file mode 100644 index 0000000..43d5648 --- /dev/null +++ b/.agents/skills/gemini-api/references/bounding_box.md @@ -0,0 +1,68 @@ +# Bounding Box Detection + +Detect and localize objects within images or videos using bounding boxes. The model returns coordinates in the format `[y_min, x_min, y_max, x_max]`, normalized from 0 to 1000. + +## Implementation (Python) + +To ensure structured output, define a `BoundingBox` class and provide it as the `response_schema`. + +```python +from google import genai +from google.genai.types import ( + GenerateContentConfig, + Part, +) +from pydantic import BaseModel + + +# Define the schema for the bounding box +class BoundingBox(BaseModel): + box_2d: list[int] + label: str + + +client = genai.Client() + +config = GenerateContentConfig( + system_instruction=""" + Return bounding boxes as an array with labels. + Never return masks. Limit to 25 objects. + """, + response_mime_type="application/json", + response_schema=list[BoundingBox], +) + +image_uri = "gs://cloud-samples-data/generative-ai/image/socks.jpg" + +response = client.models.generate_content( + model="gemini-3.5-flash", + contents=[ + Part.from_uri(file_uri=image_uri, mime_type="image/jpeg"), + "Detect the socks in the image and provide bounding boxes.", + ], + config=config, +) + +# Access the detected boxes +for bbox in response.parsed: + print(f"Label: {bbox.label}, Box: {bbox.box_2d}") +``` + +## Coordinate System +- **Format**: `[y_min, x_min, y_max, x_max]` +- **Normalization**: Coordinates are integers from `0` to `1000`. +- **Origin**: `[0, 0]` is the top-left corner of the image. + +## Visualization Helper +To visualize the results, scale the normalized coordinates back to the original image dimensions. + +```python +def scale_box(box_2d, width, height): + y_min, x_min, y_max, x_max = box_2d + return [ + int(y_min / 1000 * height), + int(x_min / 1000 * width), + int(y_max / 1000 * height), + int(x_max / 1000 * width), + ] +``` diff --git a/.agents/skills/gemini-api/references/embeddings.md b/.agents/skills/gemini-api/references/embeddings.md new file mode 100644 index 0000000..0f9cd64 --- /dev/null +++ b/.agents/skills/gemini-api/references/embeddings.md @@ -0,0 +1,77 @@ +# Text and Multimodal Embeddings + +Generate embeddings for text or multimodal content (images and videos) to perform semantic search, clustering, and other NLP tasks. Text and multimodal embedding vectors share the same semantic space, which allows you to use them interchangeably for cross-modal applications like searching for an image using a text query, or searching for a video using an image. + +## Basic Usage (Text) + +```python +from google import genai +from google.genai import types + +client = genai.Client() +response = client.models.embed_content( + model="gemini-embedding-2", + contents=[ + "How do I get a driver's license/learner's permit?", + "How long is my driver's license valid for?", + ], + # Optional Parameters + config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT", output_dimensionality=768), +) +print(response.embeddings) +``` + +## Multimodal Embeddings (Image and Video) + +To generate embeddings for images and videos, use the `types.Part.from_uri` method to point the model to a Google Cloud Storage (GCS) URI containing the media file, and provide the appropriate MIME type. + +### Image Embeddings + +For images, provide the GCS URI of the image and set the MIME type (e.g., `image/jpeg`). + +```python +from google import genai +from google.genai import types + +client = genai.Client() +response = client.models.embed_content( + model="gemini-embedding-2", + contents=types.Part.from_uri( + file_uri="gs://github-repo/embeddings/getting_started_embeddings/gms_images/GGOEACBA104999.jpg", + mime_type="image/jpeg" + ), + config=types.EmbedContentConfig(output_dimensionality=768), +) + +image_embedding = response.embeddings[0].values +print(f"Length of image embedding: {len(image_embedding)}") +``` + +### Video Embeddings + +Generating embeddings for a video works similarly. However, instead of a single vector, the API returns a list of embedding vectors—one representing each frame segment or interval of the processed video. + +```python +from google import genai +from google.genai import types + +client = genai.Client() +response = client.models.embed_content( + model="gemini-embedding-2", + contents=types.Part.from_uri( + file_uri="gs://github-repo/embeddings/getting_started_embeddings/UCF-101-subset/BrushingTeeth/v_BrushingTeeth_g01_c02.mp4", + mime_type="video/mp4" + ), + config=types.EmbedContentConfig(output_dimensionality=768), +) + +# Extract embedding values for each video segment +video_embeddings =[emb.values for emb in response.embeddings] + +print(f"Number of video segment embeddings returned: {len(video_embeddings)}") +print(f"First segment embedding length: {len(video_embeddings[0])}") +``` + +### Cross-Modal Search + +Because these vectors share a semantic space, you can calculate the dot product or cosine similarity between different types of embeddings. For example, you can calculate the similarity between a text query embedding ("A music concert") and a pre-computed database of image or video embeddings to build a robust multimodal semantic search engine. diff --git a/.agents/skills/gemini-api/references/live_api.md b/.agents/skills/gemini-api/references/live_api.md new file mode 100644 index 0000000..c60a896 --- /dev/null +++ b/.agents/skills/gemini-api/references/live_api.md @@ -0,0 +1,36 @@ +# Live API + +The Live API provides real-time, low-latency bidirectional streaming via WebSockets. It is ideal for interactive voice and video applications. + +```python +import asyncio +from google import genai +from google.genai import types + +async def generate_content(): + client = genai.Client() + model_id = "gemini-live-2.5-flash-native-audio" + + config = types.LiveConnectConfig( + response_modalities=[types.LiveModality.TEXT], # Change to AUDIO for voice responses + ) + + async with client.aio.live.connect(model=model_id, config=config) as session: + text_input = "Hello? Gemini, are you there?" + await session.send_client_content( + turns=types.Content(role="user", parts=[types.Part.from_text(text=text_input)]) + ) + + async for message in session.receive(): + if message.text: + print(message.text, end="") + +asyncio.run(generate_content()) +``` + +For sending audio: +```python +await session.send_realtime_input( + media=Blob(data=audio_bytes, mime_type="audio/pcm;rate=16000") +) +``` diff --git a/.agents/skills/gemini-api/references/media_generation.md b/.agents/skills/gemini-api/references/media_generation.md new file mode 100644 index 0000000..808c51b --- /dev/null +++ b/.agents/skills/gemini-api/references/media_generation.md @@ -0,0 +1,113 @@ +# Media Generation + +## Image Generation +Generate images using `gemini-3.1-flash-image`. + +```python +from google import genai +from google.genai import types + +client = genai.Client() + +response = client.models.generate_content( + model="gemini-3.1-flash-image", + contents="A dog reading a newspaper", +) + +for part in response.parts: + if part.text is not None: + print(part.text) + elif part.inline_data is not None: + image = part.as_image() + image.save("generated_image.png") +``` + +For high-resolution images, use `gemini-3-pro-image`. + +```python +from google import genai +from google.genai import types + +client = genai.Client() + +response = client.models.generate_content( + model="gemini-3-pro-image", + contents="A dog reading a newspaper", + config=types.GenerateContentConfig( + image_config=types.ImageConfig(aspect_ratio="16:9", image_size="2K") + ), +) + +for part in response.parts: + if part.text is not None: + print(part.text) + elif part.inline_data is not None: + image = part.as_image() + image.save("generated_image.png") +``` + +## Image Editing +It is recommended to use chat mode for editing images. + +```python +from google import genai +from PIL import Image + +client = genai.Client() + +prompt = "A small white ceramic bowl with lemons and limes" +image = Image.open("fruit.png") + +# Create the chat +chat = client.chats.create(model="gemini-3.1-flash-image") + +# Send the image and ask for it to be edited +response = chat.send_message([prompt, image]) + +# Get the text and the image generated +for i, part in enumerate(response.candidates[0].content.parts): + if part.text is not None: + print(part.text) + elif part.inline_data is not None: + image = part.as_image() + image.save(f"generated_image_{i}.png") + +# Continue iterating +chat.send_message("Make the bowl blue") +``` + +## Video Generation +Generate video using the Veo model. Usage of Veo can be costly, so check pricing for Veo. Start with the fast model (`veo-3.1-fast-generate-001`) since the result quality is usually sufficient, and swap to the larger model if needed. + +```python +import time +from google import genai +from google.genai import types +from PIL import Image + +client = genai.Client() + +image = Image.open("image.png") # Optional initial image + +# Video generation is an async operation +operation = client.models.generate_videos( + model="veo-3.1-fast-generate-001", + prompt="a cat reading a book", + image=image, + config=types.GenerateVideosConfig( + person_generation="dont_allow", + aspect_ratio="16:9", + number_of_videos=1, + duration_seconds=5, + output_gcs_uri="gs://your-bucket/your-prefix", + ), +) + +# Poll for completion +while not operation.done: + time.sleep(20) + operation = client.operations.get(operation) + +if operation.response: + print(operation.result.generated_videos[0].video.uri) +``` diff --git a/.agents/skills/gemini-api/references/model_tuning.md b/.agents/skills/gemini-api/references/model_tuning.md new file mode 100644 index 0000000..ee67162 --- /dev/null +++ b/.agents/skills/gemini-api/references/model_tuning.md @@ -0,0 +1,37 @@ +# Model Tuning + +Supervised Fine-Tuning or Preference Tuning using your own datasets. + +```python +import time +from google import genai +from google.genai import types + +client = genai.Client() + +training_dataset = types.TuningDataset( + gcs_uri="gs://your-bucket/sft_train_data.jsonl", +) + +tuning_job = client.tunings.tune( + base_model="gemini-3.1-flash-lite", + training_dataset=training_dataset, + config=types.CreateTuningJobConfig( + tuned_model_display_name="Example tuning job", + ), +) + +running_states = {"JOB_STATE_PENDING", "JOB_STATE_RUNNING"} +while tuning_job.state in running_states: + time.sleep(60) + tuning_job = client.tunings.get(name=tuning_job.name) + +print("Tuned Model Endpoint:", tuning_job.tuned_model.endpoint) + +# Predict with the tuned endpoint +response = client.models.generate_content( + model=tuning_job.tuned_model.endpoint, + contents="Why is the sky blue?", +) +print(response.text) +``` diff --git a/.agents/skills/gemini-api/references/safety.md b/.agents/skills/gemini-api/references/safety.md new file mode 100644 index 0000000..3044343 --- /dev/null +++ b/.agents/skills/gemini-api/references/safety.md @@ -0,0 +1,50 @@ +# Safety Settings and Responsible AI + +You can adjust safety settings to control the thresholds for harmful content generation. Standard safety filters are applied by default. + +## Adjusting Safety Thresholds + +```python +from google import genai +from google.genai import types + +client = genai.Client() +response = client.models.generate_content( + model="gemini-3.5-flash", + contents="Write a list of 5 disrespectful things that I might say to the universe after stubbing my toe in the dark.", + config=types.GenerateContentConfig( + system_instruction="Be as mean as possible.", + safety_settings=[ + types.SafetySetting( + category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, + threshold=types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, + ), + types.SafetySetting( + category=types.HarmCategory.HARM_CATEGORY_HARASSMENT, + threshold=types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, + ), + types.SafetySetting( + category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH, + threshold=types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, + ), + types.SafetySetting( + category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, + threshold=types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, + ), + ], + ), +) + +# Response will be `None` if it is blocked. +if response.text is None: + print(f"Content Blocked. Finish Reason: {response.candidates[0].finish_reason}") +else: + print(response.text) + +# Inspect safety ratings for each category +for rating in response.candidates[0].safety_ratings: + print(f"Category: {rating.category}") + print(f"Is Blocked: {rating.blocked}") + print(f"Probability: {rating.probability}") + print(f"Severity: {rating.severity}") +``` diff --git a/.agents/skills/gemini-api/references/structured_and_tools.md b/.agents/skills/gemini-api/references/structured_and_tools.md new file mode 100644 index 0000000..4fc1e2d --- /dev/null +++ b/.agents/skills/gemini-api/references/structured_and_tools.md @@ -0,0 +1,133 @@ +# Structured Output and Tools + +## Structured Output (JSON Schema) +Enforce a specific JSON schema using standard Python type hints or Pydantic models. + +```python +from google import genai +from google.genai import types +from pydantic import BaseModel + + +class Recipe(BaseModel): + recipe_name: str + ingredients: list[str] + + +client = genai.Client() +response = client.models.generate_content( + model="gemini-3.5-flash", + contents="List a few popular cookie recipes.", + config=types.GenerateContentConfig( + response_mime_type="application/json", + response_json_schema=list[Recipe], + ), +) +# response.text is guaranteed to be valid JSON matching the schema +print(response.text) +# Returns list of Recipe objects +print(response.parsed) +``` + +## Function Calling +Let the model output function calls that you can execute. + +```python +from google import genai +from google.genai import types + + +def get_current_weather(location: str) -> str: + """Example method. Returns the current weather. + Args: location: The city and state, e.g. San Francisco, CA + """ + if "boston" in location.lower(): + return "Snowing" + return "Sunny" + + +client = genai.Client() +response = client.models.generate_content( + model="gemini-3.5-flash", + contents="What is the weather like in Boston?", + config=types.GenerateContentConfig(tools=[get_current_weather]), +) + +if response.function_calls: + print("Function calls requested by the model:") + for function_call in response.function_calls: + print(f"- Function: {function_call.name}") + print(f"- Args: {dict(function_call.args)}") +else: + print("The model responded directly:") + print(response.text) +``` + +## Search Grounding +Ground the model's responses in Google Search or your own data with Agent Search (formerly known as Vertex AI Search). + +```python +from google import genai +from google.genai import types + +client = genai.Client() + +response = client.models.generate_content( + model="gemini-3.5-flash", + contents="When is the next total solar eclipse in the US?", + config=types.GenerateContentConfig( + tools=[types.Tool(google_search=types.GoogleSearch())], + ), +) +print(response.text) +# Search details +print(f"Search Query: {response.candidates[0].grounding_metadata.web_search_queries}") +# Inspect grounding metadata +print(response.candidates[0].grounding_metadata.search_entry_point.rendered_content) +# Urls used for grounding +print( + f"Search Pages: {', '.join([site.web.title for site in response.candidates[0].grounding_metadata.grounding_chunks])}" +) +``` + +## Code Execution +Allow the model to run Python code to calculate answers precisely. + +```python +from google import genai +from google.genai import types + +client = genai.Client() + +response = client.models.generate_content( + model="gemini-3.5-flash", + contents="Calculate the 20th fibonacci number.", + config=types.GenerateContentConfig( + tools=[types.Tool(code_execution=types.ToolCodeExecution())], + ), +) +print(response.executable_code) +print(response.code_execution_result) +``` + +## Url Context +You can use the URL context tool to provide Gemini with URLs as additional context for your prompt. The model can then retrieve content from the URLs and use that content to inform and shape its response. + +```python +from google import genai +from google.genai import types + +client = genai.Client() + +response = client.models.generate_content( + model="gemini-3.5-flash", + contents="Compare recipes from http://example.com and http://example2.com", + config=types.GenerateContentConfig( + tools=[types.Tool(url_context=types.UrlContext())], + ), +) + +print(response.text) +# get URLs retrieved for context +print(response.candidates[0].url_context_metadata) +``` diff --git a/.agents/skills/gemini-api/references/text_and_multimodal.md b/.agents/skills/gemini-api/references/text_and_multimodal.md new file mode 100644 index 0000000..3e63b36 --- /dev/null +++ b/.agents/skills/gemini-api/references/text_and_multimodal.md @@ -0,0 +1,105 @@ +# Text and Multimodal Generation + +## Basic Text Generation +```python +from google import genai + +client = genai.Client() +response = client.models.generate_content( + model="gemini-3.5-flash", + contents="How does AI work?", +) +print(response.text) +``` + +## Chat (Multi-turn conversations) +```python +from google import genai +from google.genai import types + +client = genai.Client() +chat_session = client.chats.create( + model="gemini-3.5-flash", + history=[ + types.UserContent( + parts=[ + types.Part.from_text( + text="Hello", + ) + ] + ), + types.ModelContent( + parts=[ + types.Part.from_text( + text="Great to meet you. What would you like to know?" + ) + ] + ), + ], +) +response = chat_session.send_message("Tell me a story.") +print(response.text) +``` + +## Synchronous Streaming + +Generate content in a streaming format so that the model outputs streams back +to you, rather than being returned as one chunk. + +```python +from google import genai +from google.genai import types + +client = genai.Client() +for chunk in client.models.generate_content_stream( + model="gemini-3.5-flash", contents="Tell me a story in 300 words." +): + print(chunk.text, end="") +``` + +## Multimodal Inputs (Images, Audio, Video) +You can provide files natively using Google Cloud Storage URIs or local bytes. + +```python +from google import genai +from google.genai import types + +client = genai.Client() + +gcs_image = types.Part.from_uri( + file_uri="gs://cloud-samples-data/generative-ai/image/scones.jpg", + mime_type="image/jpeg", +) + +with open("local_image.jpg", "rb") as f: + local_image = types.Part.from_bytes(data=f.read(), mime_type="image/jpeg") + +response = client.models.generate_content( + model="gemini-3.5-flash", + contents=[ + "Generate a list of all the objects contained in both images.", + gcs_image, + local_image, + ], +) +print(response.text) +``` + +### YouTube Videos +```python +from google import genai +from google.genai import types + +client = genai.Client() +response = client.models.generate_content( + model="gemini-3.5-flash", + contents=[ + types.Part.from_uri( + file_uri="https://www.youtube.com/watch?v=3KtWfp0UopM", + mime_type="video/mp4", + ), + "Write a short and engaging blog post based on this video.", + ], +) +print(response.text) +``` diff --git a/.agents/skills/gemini-interactions-api/SKILL.md b/.agents/skills/gemini-interactions-api/SKILL.md new file mode 100644 index 0000000..e985c8c --- /dev/null +++ b/.agents/skills/gemini-interactions-api/SKILL.md @@ -0,0 +1,459 @@ +--- +name: gemini-interactions-api +description: Guides the usage of Gemini Interactions API on Gemini Enterprise Agent Platform. Use when the user wants to use the stateful, server-managed Interactions API for multi-turn conversations, background execution, streaming, structured output, and function calling on the Agent Platform. +--- + +# Gemini Interactions API Skill + +This skill provides instructions for authenticating, connecting to, and utilizing the stateful, server-managed **Gemini Interactions API** on Gemini Enterprise Agent Platform. + + +The Interactions API is the modern, recommended way to execute Generative AI agent conversations, background research tasks, multi-turn chats, and structured, multi-step workflows. + + +> [!IMPORTANT] +> **CRITICAL: Unified SDK & Latest Models** +> * **Unified SDK**: Use the Google Gen AI SDK (**`google-genai >= 2.0.0`** for Python, **`@google/genai >= 2.0.0`** for JS/TS). Legacy SDKs like `google-cloud-aiplatform`, `@google-cloud/vertexai`, and `google-generativeai` are strictly unsupported for Interactions. +> * **Latest Models Only**: Use `gemini-3.1-pro-preview`, `gemini-3.1-flash-lite`, `gemini-3-flash-preview`, `gemini-2.5-pro`, or `gemini-2.5-flash`. Refer to the [latest model versions](https://docs.cloud.google.com/gemini-enterprise-agent-platform/models/migrate) to check for new updates. Legacy models (`gemini-2.0-*`, `gemini-1.5-*`) are deprecated and do not support interactions. +> * **Turn-Scoped Parameters**: Parameters like `tools`, `system_instruction`, and `generation_config` are turn-scoped. They **MUST** be passed with each interaction request. + +## 1. Authentication + +Before running any code, ensure you are authenticated with Application Default Credentials (ADC) and have the necessary API enabled. + +1. **Login**: + ```bash + gcloud auth application-default login + ``` +2. **Enable API** (if not already enabled): + ```bash + gcloud services enable aiplatform.googleapis.com + ``` + +--- + +## 2. Client Initialization + +You can initialize the client using environment variables (recommended) or by passing explicit configuration parameters. + +### Option A: Environment Variables (Recommended) + +Configure environment variables to let the SDK automatically resolve settings: + +```bash +export GOOGLE_GENAI_USE_ENTERPRISE=true +export GOOGLE_CLOUD_PROJECT="your-project-id" +export GOOGLE_CLOUD_LOCATION="global" +``` + +#### Python +```python +from google import genai + +# The SDK automatically picks up the environment variables +client = genai.Client() +``` + +#### TypeScript/JavaScript +```typescript +import { GoogleGenAI } from "@google/genai"; + +// The SDK automatically picks up the environment variables +const ai = new GoogleGenAI(); +``` + +### Option B: Explicit Inline Parameters + +Alternatively, pass configuration values directly inside your code: + +#### Python +```python +from google import genai +import google.auth + +_, project_id = google.auth.default() +client = genai.Client(enterprise=True, project=project_id, location="global") +``` + +#### TypeScript/JavaScript +```typescript +import { GoogleGenAI } from "@google/genai"; + +const ai = new GoogleGenAI({ + enterprise: { + project: "your-project-id", + location: "global" + } +}); +``` + +--- + +## 3. Core Interactions API Usage + +### Quick Start (Single-Turn) + +Submit a single prompt and read the final text response. Under the modern schema, output content is retrieved from the `steps` list. + +#### Python +```python +interaction = client.interactions.create( + model="gemini-3-flash-preview", + input="Explain serverless computing in one sentence." +) +# Output text is located under steps +print(interaction.steps[-1].content[0].text) +``` + +#### TypeScript/JavaScript +```typescript +const interaction = await ai.interactions.create({ + model: "gemini-3-flash-preview", + input: "Explain serverless computing in one sentence." +}); +console.log(interaction.steps[interaction.steps.length - 1].content[0].text); +``` + +--- + +### Stateful Conversation (Multi-Turn) + +Interactions are stateful by default. Store the conversation state in the cloud and reference it in the subsequent turn using `previous_interaction_id`. + +#### Python +```python +# Turn 1: Introduce ourselves +turn1 = client.interactions.create( + model="gemini-3-flash-preview", + input="Hi! My name is John. I am working on AI agents.", + store=True +) +print(f"Turn 1: {turn1.steps[-1].content[0].text}") + +# Turn 2: Refer back to the stored turn state +turn2 = client.interactions.create( + model="gemini-3-flash-preview", + input="What is my name?", + previous_interaction_id=turn1.id +) +print(f"Turn 2: {turn2.steps[-1].content[0].text}") +``` + +#### TypeScript/JavaScript +```typescript +// Turn 1 +const turn1 = await ai.interactions.create({ + model: "gemini-3-flash-preview", + input: "Hi! My name is John. I am working on AI agents.", + store: true +}); + +// Turn 2 +const turn2 = await ai.interactions.create({ + model: "gemini-3-flash-preview", + input: "What is my name?", + previousInteractionId: turn1.id +}); +console.log(turn2.steps[turn2.steps.length - 1].content[0].text); +``` + +--- + +### Real-Time Streaming + +Stream responses in real-time. Passing `stream=True` returns an iterable chunk generator. + +#### Python +```python +response = client.interactions.create( + model="gemini-3-flash-preview", + input="Write a short poem about debugging.", + stream=True +) + +for chunk in response: + if chunk.steps: + step = chunk.steps[-1] + if step.content and step.content[0].text: + print(step.content[0].text, end="", flush=True) +print() +``` + +#### TypeScript/JavaScript +```typescript +const responseStream = await ai.interactions.create({ + model: "gemini-3-flash-preview", + input: "Write a short poem about debugging.", + stream: true +}); + +for await (const chunk of responseStream) { + if (chunk.steps) { + const step = chunk.steps[chunk.steps.length - 1]; + if (step.content && step.content[0].text) { + process.stdout.write(step.content[0].text); + } + } +} +console.log(); +``` + +--- + +### Structured Output (Pydantic / Polymorphic `response_format`) + +Retrieve structured, type-safe JSON matching a schema. Under the modern Interactions API, a polymorphic `response_format` argument directly takes the target schema structure. + +#### Python +```python +from pydantic import BaseModel, Field + +class Book(BaseModel): + title: str = Field(description="The title of the book") + author: str = Field(description="The book's author") + year_published: int + +interaction = client.interactions.create( + model="gemini-3-flash-preview", + input="Recommend one famous sci-fi book.", + response_format=Book +) + +# The text will be a valid JSON matching the Book schema +print(interaction.steps[-1].content[0].text) +``` + +#### TypeScript/JavaScript +```typescript +import { Type } from "@google/genai"; + +const BookSchema = { + type: Type.OBJECT, + properties: { + title: { type: Type.STRING, description: "The title of the book" }, + author: { type: Type.STRING, description: "The book's author" }, + yearPublished: { type: Type.INTEGER } + }, + required: ["title", "author", "yearPublished"] +}; + +const interaction = await ai.interactions.create({ + model: "gemini-3-flash-preview", + input: "Recommend one famous sci-fi book.", + responseFormat: BookSchema +}); + +console.log(interaction.steps[interaction.steps.length - 1].content[0].text); +``` + +--- + +### Function Calling (Agent Tool Use) + +Define local tools (functions) and submit execution results to the stateful interaction history. + +#### Python +```python +def get_stock_price(ticker: str) -> float: + """Gets the stock price for a given ticker symbol.""" + if ticker.upper() == "GOOG": + return 175.50 + return 100.0 + +# Turn 1: Pass tools to the model +interaction = client.interactions.create( + model="gemini-3-flash-preview", + input="What is the stock price of GOOG?", + tools=[get_stock_price] +) + +last_step = interaction.steps[-1] +# Check if the model requested a function call +if last_step.tool_calls: + for call in last_step.tool_calls: + if call.name == "get_stock_price": + ticker_arg = call.args.get("ticker") + price = get_stock_price(ticker_arg) + + # Turn 2: Submit function execution result statefully + final_turn = client.interactions.create( + model="gemini-3-flash-preview", + input=f"The stock price for {ticker_arg} is ${price}.", + previous_interaction_id=interaction.id + ) + print(final_turn.steps[-1].content[0].text) +``` + +#### TypeScript/JavaScript +```typescript +import { Type } from "@google/genai"; + +// Define local tool +function getStockPrice({ ticker }: { ticker: string }): number { + if (ticker.toUpperCase() === "GOOG") { + return 175.50; + } + return 100.00; +} + +// Turn 1: Pass tools to the model +const interaction = await ai.interactions.create({ + model: "gemini-3-flash-preview", + input: "What is the stock price of GOOG?", + tools: [{ + functionDeclarations: [{ + name: "getStockPrice", + description: "Gets the stock price for a given ticker symbol.", + parameters: { + type: Type.OBJECT, + properties: { + ticker: { type: Type.STRING, description: "The stock ticker symbol" } + }, + required: ["ticker"] + } + }] + }] +}); + +const lastStep = interaction.steps[interaction.steps.length - 1]; +// Check if the model requested a function call +if (lastStep.toolCalls) { + for (const call of lastStep.toolCalls) { + if (call.name === "getStockPrice") { + const tickerArg = call.args.ticker as string; + const price = getStockPrice({ ticker: tickerArg }); + + // Turn 2: Submit function execution result statefully + const finalTurn = await ai.interactions.create({ + model: "gemini-3-flash-preview", + input: `The stock price for ${tickerArg} is $${price}.`, + previousInteractionId: interaction.id + }); + console.log(finalTurn.steps[finalTurn.steps.length - 1].content[0].text); + } + } +} +``` + +--- + +## 4. Accessing the Interactions API via REST + +For shell-based scripts, debugging, or non-Python/JS environments, you can communicate with the stateful Interactions API directly using raw HTTP/REST requests via `curl`. + +### 1. REST Endpoint + +The REST API endpoint for interactions is: + +```http +POST https://aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{LOCATION}/interactions +``` + +* **LOCATION**: Use `global` (or custom region if required). +* **PROJECT_ID**: Your Google Cloud Project ID. + +### 2. Set up Variables & Authentication Header + +Set your target agent ID (e.g., model or custom agent path) and access token generated from Application Default Credentials: + +```bash +AGENT_ID="your-agent-id" +ACCESS_TOKEN=$(gcloud auth print-access-token) +``` + +### 3. Single-Turn Interaction Payload + +Send a request to start an interaction using the agent variable: + +```bash +curl -X POST "https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/global/interactions" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Content-Type: application/json" \ + -d '{ + "agent": "'"${AGENT_ID}"'", + "input": [{ + "role": "user", + "content": [{ + "type": "text", + "text": "Explain serverless computing in one sentence." + }] + }] + }' +``` + +#### Response Example +A synchronous POST request returns a JSON object containing the conversation step details and unique identifiers: +```json +{ + "id": "your-interaction-id", + "status": "completed", + "steps": [ + { + "role": "model", + "content": [ + { + "type": "text", + "text": "Serverless computing is a cloud execution model where the cloud provider dynamically manages the allocation and provisioning of servers, charging customers based on actual usage rather than pre-purchased capacity." + } + ] + } + ], + "usage": { + "total_tokens": 24751, + "total_input_tokens": 23894, + "total_output_tokens": 857 + }, + "created": "2026-05-08T10:44:43Z", + "updated": "2026-05-08T10:44:43Z", + "environment_id": "your-environment-id", + "object": "interaction" +} +``` + +### 4. Multi-Turn Stateful Interaction Payload + +To continue an existing conversation statefully, specify the `previous_interaction_id` in the JSON payload: + +```bash +curl -X POST "https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/global/interactions" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Content-Type: application/json" \ + -d '{ + "agent": "'"${AGENT_ID}"'", + "store": true, + "previous_interaction_id": "YOUR_PREVIOUS_INTERACTION_ID", + "input": [{ + "role": "user", + "content": [{ + "type": "text", + "text": "Can you elaborate on that?" + }] + }] + }' +``` + +### 5. Streaming Output Payload +To stream updates in real time (Server-Sent Events format), pass `"stream": true` in the payload: + +```bash +curl -X POST "https://aiplatform.googleapis.com/v1beta1/projects/${PROJECT_ID}/locations/global/interactions" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Content-Type: application/json" \ + -d '{ + "agent": "'"${AGENT_ID}"'", + "stream": true, + "input": [{ + "role": "user", + "content": [{ + "type": "text", + "text": "Write a long story about space travel." + }] + }] + }' +``` + +The endpoint will return a chunked stream where each event begins with `data: ` containing JSON updates with the `event_type` and step contents. + +> **How `curl` handles streaming:** +> By default, when `"stream": true` is passed, the server responds with `Transfer-Encoding: chunked` and `Content-Type: text/event-stream` (Server-Sent Events). `curl` will automatically keep the connection open and print the incoming data chunks to `stdout` in real time as they are pushed by the server. The user does not need to poll or pull further; the complete sequence of events streams continuously until completion. + diff --git a/.agents/skills/gke-basics/SKILL.md b/.agents/skills/gke-basics/SKILL.md new file mode 100644 index 0000000..fe2119d --- /dev/null +++ b/.agents/skills/gke-basics/SKILL.md @@ -0,0 +1,49 @@ +--- +name: gke-basics +description: "Plan, create, and configure production-ready Google Kubernetes Engine (GKE) clusters using the golden path Autopilot configuration. Covers Day-0 checklist, Autopilot vs Standard, networking (private clusters, VPC-native, Gateway API), security (Workload Identity, Secret Manager, RBAC hardening), observability, scaling, cost optimization, and AI/ML inference. WHEN: create GKE cluster, provision GKE environment, design GKE networking, secure GKE, optimize GKE cost, GKE autoscaling, GKE inference, GKE upgrade, GKE observability, GKE multi-tenancy, GKE batch, GKE HPC, GKE compute class." +--- + +# Google Kubernetes Engine (GKE) Basics + +GKE is a managed Kubernetes platform on Google Cloud for deploying, scaling, and operating containerized applications. This skill defaults to the **golden path Autopilot configuration** — see [gke-golden-path.md](./references/gke-golden-path.md) for defaults, rules, and guardrails. + +## Quick Start + +```bash +gcloud services enable container.googleapis.com --quiet +gcloud container clusters create-auto my-cluster --region=us-central1 --quiet +gcloud container clusters get-credentials my-cluster --region=us-central1 --quiet +kubectl create deployment hello-server \ + --image=us-docker.pkg.dev/google-samples/containers/gke/hello-app:1.0 +``` + +## Reference Directory + +Load the relevant reference based on trigger keywords. Prefer the most specific match; if ambiguous, ask the user to clarify. + +| Scenario | Trigger Keywords | Reference | +|----------|-----------------|-----------| +| Core Concepts | Autopilot vs Standard, architecture, pricing, what is GKE | [core-concepts.md](./references/core-concepts.md) | +| Golden Path & Defaults | golden path, Day-0 checklist, production defaults, cluster defaults | [gke-golden-path.md](./references/gke-golden-path.md) | +| Cluster Creation | create cluster, new cluster, provision GKE | [gke-cluster-creation.md](./references/gke-cluster-creation.md) | +| Networking | private cluster, VPC, subnet, Gateway API, DNS, ingress, egress, datapath | [gke-networking.md](./references/gke-networking.md) | +| Security & IAM | Workload Identity, Secret Manager, RBAC, Binary Auth, hardening, audit, gVisor, IAM roles | [gke-security.md](./references/gke-security.md) | +| Scaling | HPA, VPA, autoscaler, autoscaling, NAP, scale pods, scale nodes | [gke-scaling.md](./references/gke-scaling.md) | +| Compute Classes | ComputeClass, machine family, Spot fallback, GPU node pool, node selection | [gke-compute-classes.md](./references/gke-compute-classes.md) | +| Cost | cost, savings, Spot VMs, rightsizing, CUD, optimize spend, budget | [gke-cost.md](./references/gke-cost.md) | +| AI/ML Inference | inference, model serving, LLM, GPU, TPU, GIQ, vLLM | [gke-inference.md](./references/gke-inference.md) | +| Upgrades | upgrade, maintenance window, release channel, patching, version | [gke-upgrades.md](./references/gke-upgrades.md) | +| Observability | monitoring, logging, Prometheus, Grafana, metrics, alerts, dashboards | [gke-observability.md](./references/gke-observability.md) | +| Multi-tenancy | multi-tenant, namespace isolation, team access, enterprise, RBAC planning | [gke-multitenancy.md](./references/gke-multitenancy.md) | +| Batch & HPC | batch, HPC, job queue, high performance, MPI, parallel | [gke-batch-hpc.md](./references/gke-batch-hpc.md) | +| App Onboarding | containerize, deploy app, Dockerfile, onboard, migrate to GKE | [gke-app-onboarding.md](./references/gke-app-onboarding.md) | +| Backup & DR | backup, restore, disaster recovery, CMEK | [gke-backup-dr.md](./references/gke-backup-dr.md) | +| Storage | storage, PVC, persistent volume, StorageClass, Filestore, GCS FUSE | [gke-storage.md](./references/gke-storage.md) | +| Reliability | PDB, health probe, liveness, readiness, topology spread, graceful shutdown | [gke-reliability.md](./references/gke-reliability.md) | +| Client Libraries | client library, client-go, kubernetes python, kubernetes java, kubernetes SDK | [client-library-usage.md](./references/client-library-usage.md) | +| Infrastructure as Code | Terraform, IaC, HCL, infrastructure as code | [iac-usage.md](./references/iac-usage.md) | +| MCP Server | MCP tools, MCP server, MCP setup | [mcp-usage.md](./references/mcp-usage.md) | +| CLI / Tools | gcloud, kubectl, commands, how to | [cli-reference.md](./references/cli-reference.md) | +| Production Audit | production readiness, compliance, golden path check | [gke-cluster-creation.md](./references/gke-cluster-creation.md) | + +*If you need product information not found in these references, use the Developer Knowledge MCP server `search_documents` tool.* diff --git a/.agents/skills/gke-basics/assets/default-deny-netpol.yaml b/.agents/skills/gke-basics/assets/default-deny-netpol.yaml new file mode 100644 index 0000000..6e30635 --- /dev/null +++ b/.agents/skills/gke-basics/assets/default-deny-netpol.yaml @@ -0,0 +1,10 @@ +--- +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + name: default-deny-all +spec: + podSelector: {} + policyTypes: + - Ingress + - Egress diff --git a/.agents/skills/gke-basics/assets/golden-path-autopilot.yaml b/.agents/skills/gke-basics/assets/golden-path-autopilot.yaml new file mode 100644 index 0000000..6af4bd8 --- /dev/null +++ b/.agents/skills/gke-basics/assets/golden-path-autopilot.yaml @@ -0,0 +1,147 @@ +# Golden Path Autopilot Configuration (cluster-level policy settings only) +# Condensed from `gcloud container clusters describe` export. +# Full export: resources/recommended-ap.yaml +# +# Autopilot manages all node pools, node config, and provisioning defaults +# automatically. Only cluster-level policy decisions are shown here. + +# --- Addons --- +addonsConfig: + dnsCacheConfig: + enabled: true + gcePersistentDiskCsiDriverConfig: + enabled: true + gcpFilestoreCsiDriverConfig: + enabled: true + gcsFuseCsiDriverConfig: + enabled: true + gkeBackupAgentConfig: {} + httpLoadBalancing: {} + kubernetesDashboard: + disabled: true + networkPolicyConfig: + disabled: true + parallelstoreCsiDriverConfig: + enabled: true + rayOperatorConfig: {} + statefulHaConfig: + enabled: true + +# --- Cluster Mode --- +autopilot: + enabled: true + +# --- Autoscaling --- +autoscaling: + autoscalingProfile: OPTIMIZE_UTILIZATION + enableNodeAutoprovisioning: true + resourceLimits: + - maximum: '1000000000' + resourceType: cpu + - maximum: '1000000000' + resourceType: memory + - maximum: '1000000000' + resourceType: nvidia-tesla-t4 + - maximum: '1000000000' + resourceType: nvidia-tesla-a100 + +# --- Security --- +binaryAuthorization: + evaluationMode: DISABLED +controlPlaneEndpointsConfig: + dnsEndpointConfig: + allowExternalTraffic: true # Customer-configurable + ipEndpointsConfig: + authorizedNetworksConfig: + privateEndpointEnforcementEnabled: true +masterAuthorizedNetworksConfig: + privateEndpointEnforcementEnabled: true +privateClusterConfig: + enablePrivateNodes: true +rbacBindingConfig: + enableInsecureBindingSystemAuthenticated: false + enableInsecureBindingSystemUnauthenticated: false +secretManagerConfig: + enabled: true + rotationConfig: + enabled: true + rotationInterval: 120s +shieldedNodes: + enabled: true +workloadIdentityConfig: + workloadPool: .svc.id.goog + +# --- Networking --- +networkConfig: + datapathProvider: ADVANCED_DATAPATH + defaultEnablePrivateNodes: true + dnsConfig: + clusterDns: CLOUD_DNS + clusterDnsDomain: cluster.local + clusterDnsScope: CLUSTER_SCOPE + enableIntraNodeVisibility: true + gatewayApiConfig: + channel: CHANNEL_STANDARD + +# --- IP Allocation --- +ipAllocationPolicy: + autoIpamConfig: + enabled: true # Customer-configurable + createSubnetwork: true # Customer-configurable + stackType: IPV4 + useIpAliases: true +defaultMaxPodsConstraint: + maxPodsPerNode: '48' # Customer-configurable (48 or 110) + +# --- Logging --- +loggingConfig: + componentConfig: + enableComponents: + - SYSTEM_COMPONENTS + - WORKLOADS + +# --- Monitoring --- +monitoringConfig: + advancedDatapathObservabilityConfig: + enableMetrics: true + componentConfig: + enableComponents: + - SYSTEM_COMPONENTS + - STORAGE + - POD + - DEPLOYMENT + - STATEFULSET + - DAEMONSET + - HPA + - JOBSET + - CADVISOR + - KUBELET + - DCGM + - APISERVER + - SCHEDULER + - CONTROLLER_MANAGER + managedPrometheusConfig: + enabled: true + +# --- Maintenance --- +maintenancePolicy: + window: + maintenanceExclusions: + blackout-1: + maintenanceExclusionOptions: + scope: NO_MINOR_UPGRADES + # 1-year exclusion window (customer-configurable dates) + +# --- Upgrades --- +releaseChannel: + channel: REGULAR + +# --- Scaling --- +verticalPodAutoscaling: + enabled: true + +# --- Node Config & Pools --- +# Autopilot manages all node pools, node config (disk, machine type, shielded +# instances, gvnic, gcfs, workload metadata mode), and upgrade strategy +# automatically. These are not user-configurable in Autopilot mode. +# See resources/recommended-ap.yaml for the full describe output. diff --git a/.agents/skills/gke-basics/assets/hpa-example.yaml b/.agents/skills/gke-basics/assets/hpa-example.yaml new file mode 100644 index 0000000..cdb4465 --- /dev/null +++ b/.agents/skills/gke-basics/assets/hpa-example.yaml @@ -0,0 +1,25 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: example-hpa + namespace: default +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: example-deployment + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 50 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 70 diff --git a/.agents/skills/gke-basics/assets/vpa-example.yaml b/.agents/skills/gke-basics/assets/vpa-example.yaml new file mode 100644 index 0000000..648893d --- /dev/null +++ b/.agents/skills/gke-basics/assets/vpa-example.yaml @@ -0,0 +1,21 @@ +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: example-vpa + namespace: default +spec: + targetRef: + apiVersion: "apps/v1" + kind: Deployment + name: example-deployment + updatePolicy: + updateMode: "Off" # Recommendation only - safest starting point + resourcePolicy: + containerPolicies: + - containerName: "*" + minAllowed: + cpu: 100m + memory: 50Mi + maxAllowed: + cpu: 1 + memory: 1Gi diff --git a/.agents/skills/gke-basics/assets/workload-identity-pod.yaml b/.agents/skills/gke-basics/assets/workload-identity-pod.yaml new file mode 100644 index 0000000..b71bdae --- /dev/null +++ b/.agents/skills/gke-basics/assets/workload-identity-pod.yaml @@ -0,0 +1,32 @@ +--- +apiVersion: v1 +kind: Pod +metadata: + name: workload-identity-test + namespace: workload-identity-test-ns +spec: + serviceAccountName: # Replace with your KSA name + automountServiceAccountToken: false + containers: + - image: gcr.io/google.com/cloudsdktool/cloud-sdk:slim + name: workload-identity-test + command: ["sleep", "infinity"] + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 100m + memory: 128Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + readOnlyRootFilesystem: true + nodeSelector: + iam.gke.io/gke-metadata-server-enabled: "true" + securityContext: + runAsNonRoot: true + runAsUser: 65532 + seccompProfile: + type: RuntimeDefault diff --git a/.agents/skills/gke-basics/references/cli-reference.md b/.agents/skills/gke-basics/references/cli-reference.md new file mode 100644 index 0000000..5b6b77d --- /dev/null +++ b/.agents/skills/gke-basics/references/cli-reference.md @@ -0,0 +1,239 @@ +# CLI & Tool Reference for GKE + +## Tool Preference + +Default preference order: + +``` +1. GKE MCP Tools (preferred — structured, auditable, no shell required) +2. gcloud CLI (fallback — when MCP doesn't expose the operation) +3. kubectl (fallback — purely in-cluster ops not covered by MCP) +``` + +### When to use each + +| Interface | When to Use | Examples | +|-----------|-------------|---------| +| **GKE MCP Tools** | Default for all cluster and K8s operations when MCP server is available. Structured I/O, supports dry-run, no shell/kubeconfig needed. | `create_cluster`, `get_cluster`, `get_k8s_resource`, `apply_k8s_manifest`, `get_k8s_logs` | +| **`gcloud` CLI** | No MCP equivalent, or user explicitly requested CLI. Required for: GIQ model discovery, available K8s versions, maintenance windows, monitoring components, IAM/SA setup, Cloud Logging queries. | `gcloud container ai profiles`, `gcloud container get-server-config`, `gcloud iam service-accounts` | +| **`kubectl`** | Neither MCP nor `gcloud` covers the operation, or user explicitly prefers kubectl. Required for: `kubectl top`, `kubectl scale`, `kubectl exec`, `kubectl port-forward`, Helm, custom CRDs not in MCP. | `kubectl top pods`, `kubectl scale deployment`, `helm install` | + +### User preference override + +If the user states a preference, respect it for the session: + +- **"Use gcloud" / "Use CLI"** → `gcloud` for cluster ops, `kubectl` for K8s resource ops. Skip MCP. +- **"Use kubectl"** → `kubectl` for all K8s resource ops, `gcloud` for cluster-level ops. Skip MCP. +- **"Use MCP"** / no preference → Default. Use MCP for everything it supports. + +Even with an override, fall back through the chain for unsupported operations (e.g., cluster creation always requires `gcloud` or MCP). + +--- + +> All MCP tools use hierarchical resource paths — see [`parent` format](#parent--name-format-quick-reference) at the bottom. + +## Cluster Operations + +| Operation | MCP Tool | CLI Fallback | Mode | +|-----------|----------|-------------|------| +| List clusters | `list_clusters` | `gcloud container clusters list` | READ | +| Get cluster details | `get_cluster` | `gcloud container clusters describe` | READ | +| Create cluster | `create_cluster` | `gcloud container clusters create-auto` | MUTATE | +| Update cluster | `update_cluster` | `gcloud container clusters update` | DESTRUCTIVE | +| Get K8s versions | — | `gcloud container get-server-config` | READ | +| Get credentials | — | `gcloud container clusters get-credentials` | READ | +| Delete cluster | — | `gcloud container clusters delete` | DESTRUCTIVE | + +``` +# List clusters in a project (all regions) +list_clusters(parent="projects//locations/-") + +# Get cluster details (all fields) +get_cluster(name="projects//locations//clusters/", readMask="*") + +# Create golden path Autopilot cluster +create_cluster( + parent="projects//locations/", + cluster='{"name":"","autopilot":{"enabled":true},"privateClusterConfig":{"enablePrivateNodes":true},...}' +) +``` + +```bash +# Get available Kubernetes versions (CLI-only) +gcloud container get-server-config --region --format="yaml(channels)" --quiet + +# Create golden path Autopilot cluster (see gke-cluster-creation.md for full templates) +gcloud container clusters create-auto \ + --region --project \ + --enable-private-nodes --enable-master-authorized-networks \ + --enable-dns-access --release-channel regular \ + --enable-secret-manager --scoped-rbs-bindings \ + --quiet + +# Get credentials (CLI-only) +gcloud container clusters get-credentials --region --project --quiet +``` + +## Node Pool Operations + +| Operation | MCP Tool | CLI Fallback | Mode | +|-----------|----------|-------------|------| +| List node pools | `list_node_pools` | `gcloud container node-pools list` | READ | +| Get node pool | `get_node_pool` | `gcloud container node-pools describe` | READ | +| Create node pool | `create_node_pool` | `gcloud container node-pools create` | MUTATE | +| Update node pool | `update_node_pool` | `gcloud container node-pools update` | DESTRUCTIVE | + +``` +list_node_pools(parent="projects//locations//clusters/") + +create_node_pool( + parent="projects//locations//clusters/", + nodePool='{"name":"","config":{"machineType":"e2-standard-4"},"initialNodeCount":3,...}' +) +``` + +## Cluster Updates + +| Operation | MCP Tool | CLI Fallback | Mode | +|-----------|----------|-------------|------| +| Update cluster settings | `update_cluster` | `gcloud container clusters update` | DESTRUCTIVE | +| Update monitoring | — | `gcloud container clusters update --monitoring=...` | DESTRUCTIVE | +| Set maintenance window | — | `gcloud container clusters update --maintenance-window-*` | DESTRUCTIVE | + +``` +# Enable VPA via MCP +update_cluster( + name="projects//locations//clusters/", + update='{"desiredVerticalPodAutoscaling":{"enabled":true}}' +) +``` + +```bash +# Update monitoring components (CLI-only) +gcloud container clusters update --region \ + --monitoring=SYSTEM,API_SERVER,SCHEDULER,CONTROLLER_MANAGER,STORAGE,POD,DEPLOYMENT,STATEFULSET,DAEMONSET,HPA \ + --quiet +``` + +## Kubernetes Resource Operations + +| Operation | MCP Tool | CLI Fallback | Mode | +|-----------|----------|-------------|------| +| Get/list resources | `get_k8s_resource` | `kubectl get` | READ | +| Describe resource | `describe_k8s_resource` | `kubectl describe` | READ | +| Apply manifest | `apply_k8s_manifest` | `kubectl apply` | DESTRUCTIVE | +| Patch resource | `patch_k8s_resource` | `kubectl patch` | DESTRUCTIVE | +| Delete resource | `delete_k8s_resource` | `kubectl delete` | DESTRUCTIVE | +| List API resources | `list_k8s_api_resources` | `kubectl api-resources` | READ | +| Check auth | `check_k8s_auth` | `kubectl auth can-i` | READ | + +``` +# List all deployments in a namespace +get_k8s_resource( + parent="projects//locations//clusters/", + resourceType="deployment", namespace="" +) + +# Apply a manifest (with dry-run) +apply_k8s_manifest(parent="...", yamlManifest="...", dryRun=true) + +# Patch deployment resources for rightsizing +patch_k8s_resource( + parent="projects//locations//clusters/", + resourceType="deployment", name="", namespace="", + patch='{"spec":{"template":{"spec":{"containers":[{"name":"app","resources":{"requests":{"cpu":"200m","memory":"256Mi"}}}]}}}}' +) + +# Check RBAC permissions +check_k8s_auth(parent="...", verb="create", resourceType="deployments", namespace="") +``` + +## Diagnostics & Observability + +| Operation | MCP Tool | CLI Fallback | Mode | +|-----------|----------|-------------|------| +| List events | `list_k8s_events` | `kubectl events` | READ | +| Get container logs | `get_k8s_logs` | `kubectl logs` | READ | +| Cluster info | `get_k8s_cluster_info` | `kubectl cluster-info` | READ | +| K8s version | `get_k8s_version` | `kubectl version` | READ | +| Rollout status | `get_k8s_rollout_status` | `kubectl rollout status` | READ | +| Query Cloud Logging | — | `gcloud logging read` | READ | + +``` +# Get recent events across all namespaces +list_k8s_events(parent="...", allNamespaces=true, limit="50") + +# Get logs (last 100 lines, or previous crash) +get_k8s_logs(parent="...", name="", namespace="", tail="100") +get_k8s_logs(parent="...", name="", namespace="", previous=true) + +# Check rollout status +get_k8s_rollout_status(parent="...", resourceType="deployment", name="", namespace="") +``` + +## Operations Tracking + +| Operation | MCP Tool | CLI Fallback | Mode | +|-----------|----------|-------------|------| +| List operations | `list_operations` | `gcloud container operations list` | READ | +| Get operation | `get_operation` | `gcloud container operations describe` | READ | +| Cancel operation | `cancel_operation` | `gcloud container operations cancel` | DESTRUCTIVE | + +``` +list_operations(parent="projects//locations/") +get_operation(name="projects//locations//operations/") +``` + +## AI/ML Inference (GIQ) — CLI-Only + +```bash +gcloud container ai profiles models list --quiet +gcloud container ai profiles list --model= --quiet +gcloud container ai profiles manifests create \ + --model= --model-server= \ + --accelerator-type= \ + --target-ntpot-milliseconds= --quiet > inference.yaml + +# Deploy generated manifest via MCP +apply_k8s_manifest(parent="...", yamlManifest="") +``` + +## kubectl-Only Operations + +No MCP or `gcloud` equivalent: + +```bash +kubectl top pods --all-namespaces --sort-by=cpu +kubectl top nodes +kubectl scale deployment --replicas= -n +kubectl exec -it -n -- /bin/sh +kubectl port-forward svc/ : -n +kubectl cp /: +kubectl run debug --rm -it --image=busybox -- /bin/sh +kubectl drain --ignore-daemonsets --delete-emptydir-data +helm install -n +helm upgrade -n +``` + +## `parent` / `name` Format Quick Reference + +``` +Project+Region: projects/{PROJECT}/locations/{REGION} +Cluster: projects/{PROJECT}/locations/{REGION}/clusters/{CLUSTER} +Node Pool: projects/{PROJECT}/locations/{REGION}/clusters/{CLUSTER}/nodePools/{POOL} +Operation: projects/{PROJECT}/locations/{REGION}/operations/{OP_ID} +``` + +Use `locations/-` to match all regions/zones when listing. + +## Error Handling + +| Error / Symptom | Likely Cause | Remediation | +|-----------------|--------------|-------------| +| `PERMISSION_DENIED` on cluster create | Missing `container.clusters.create` IAM role | Grant `roles/container.admin` or `roles/container.clusterAdmin` | +| Quota exceeded | Regional vCPU, GPU, or IP address limits | Request quota increase or select a different region | +| IP exhaustion / CIDR conflict | Pod subnet too small or overlapping ranges | Re-plan IP ranges; may require cluster recreation (Day-0) | +| Workload Identity not working | Missing OIDC issuer or federated credential | Verify `workloadIdentityConfig.workloadPool`; configure federated identity binding | +| Private cluster unreachable | No authorized networks or DNS endpoint | Enable `dnsEndpointConfig.allowExternalTraffic` or add authorized networks | +| Secret Manager rotation failing | SA missing `secretmanager.versions.access` | Grant Secret Manager accessor role to workload's GSA | +| Control-plane metrics missing | Monitoring components not configured | Enable APISERVER, SCHEDULER, CONTROLLER_MANAGER in `monitoringConfig` | diff --git a/.agents/skills/gke-basics/references/client-library-usage.md b/.agents/skills/gke-basics/references/client-library-usage.md new file mode 100644 index 0000000..43e5dd2 --- /dev/null +++ b/.agents/skills/gke-basics/references/client-library-usage.md @@ -0,0 +1,91 @@ +# GKE Client Libraries + +To interact with the GKE (Kubernetes) API programmatically, use the official +Kubernetes client libraries. + +**Prerequisite:** These libraries interact with the Kubernetes API. You +must already have a running GKE cluster and valid credentials +(for example, by running `gcloud container clusters get-credentials`) +before running this code. + +## Getting Started + +Kubernetes client libraries allow you to manage clusters and workloads from +within your application code. + +### Python + +- **Installation:** + + ```bash + pip install kubernetes + ``` + +- **Usage Example:** + + ```python + from kubernetes import client, config + config.load_kube_config() # Loads from ~/.kube/config + v1 = client.CoreV1Api() + print("Listing pods with their IPs:") + ret = v1.list_pod_for_all_namespaces(watch=False) + for i in ret.items: + print("%s\t%s\t%s" % (i.status.pod_ip, i.metadata.namespace, i.metadata.name)) + ``` + +### Go + +- **Installation:** + + ```bash + go get k8s.io/client-go@latest + ``` + +- **Usage Example:** + + ```go + import ( + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + ) + config, _ := clientcmd.BuildConfigFromFlags("", kubeconfig) + clientset, _ := kubernetes.NewForConfig(config) + pods, _ := clientset.CoreV1().Pods("").List( + context.TODO, metav1.ListOptions{}) + ``` + +### Node.js (TypeScript) + +- **Installation:** + + ```bash + npm install @kubernetes/client-node + ``` + +- **Usage Example:** + + ```javascript + const k8s = require('@kubernetes/client-node'); + + const kc = new k8s.KubeConfig(); + kc.loadFromDefault(); // Automatically detects local vs. in-cluster configuration + + const k8sApi = kc.makeApiClient(k8s.CoreV1Api); + + // In most recent library versions, parameters must be passed inside an object + k8sApi.listNamespacedPod({ namespace: 'default' }).then((res) => { + const pods = res.items || res.body.items; + console.log(`Found ${pods.length} pods in 'default' namespace.`); + }); + ``` + +### Java + +- [Java Reference](https://github.com/kubernetes-client/java) + +## GKE-specific API (Container Service) + +To manage the GKE *service* itself (e.g., create/delete clusters) +programmatically, use the Google Cloud Container client libraries. + +- [Google Cloud Container Client Libraries](https://cloud.google.com/kubernetes-engine/docs/reference/libraries) diff --git a/.agents/skills/gke-basics/references/core-concepts.md b/.agents/skills/gke-basics/references/core-concepts.md new file mode 100644 index 0000000..b994f59 --- /dev/null +++ b/.agents/skills/gke-basics/references/core-concepts.md @@ -0,0 +1,54 @@ +# GKE Core Concepts + +Google Kubernetes Engine (GKE) is a managed Kubernetes platform for deploying, managing, and scaling containerized applications on Google Cloud infrastructure. It handles cluster provisioning, upgrades, and node management, letting teams focus on workloads rather than infrastructure. + +> **MCP Tools:** `list_clusters`, `get_cluster` + +## Cluster Modes + +| Mode | Who Manages Nodes | Best For | +|------|-------------------|----------| +| **Autopilot** (recommended) | Google — fully managed nodes, scaling, and security | Most workloads. No node-level ops. Pay per pod resource request. | +| **Standard** | You — full control over node pools, OS, machine types | Workloads requiring kernel customization, specific node OS, or DaemonSets not supported by Autopilot | + +**Default: Autopilot.** Use Standard only when Autopilot has a documented limitation for your workload. + +## Cluster Architecture + +- **Regional clusters** (recommended): Control plane replicated across 3 zones. Higher availability, no single-zone failure risk. +- **Zonal clusters**: Single control plane zone. Lower cost, acceptable for dev/test. +- **Private clusters** (golden path default): Nodes have no public IPs. Control plane accessible via private endpoint or DNS endpoint. + +## Networking Model + +GKE uses **VPC-native** clusters with alias IP ranges: +- Each pod gets a routable IP from the pod CIDR +- Dataplane V2 (eBPF-based) is the golden path default — provides built-in Network Policy enforcement +- Cloud DNS for in-cluster DNS resolution +- Gateway API for ingress/load balancing + +## Scaling Model + +- **Horizontal Pod Autoscaler (HPA)**: Scales pod replicas based on CPU, memory, or custom metrics +- **Vertical Pod Autoscaler (VPA)**: Recommends or auto-adjusts pod resource requests +- **Cluster Autoscaler / NAP**: Scales nodes to match pod demand (Autopilot handles this automatically) +- **ComputeClasses**: Declarative node selection — machine family, Spot VMs, GPU targeting + +## Identity & Security Model + +- **Workload Identity Federation**: Pods assume Google Cloud IAM identities without static keys +- **Secret Manager integration**: Secrets synced to Kubernetes with automatic rotation +- **Pod Security Standards**: `restricted` profile enforced on production namespaces +- **Shielded Nodes**: Secure Boot and integrity monitoring (Autopilot-enforced) + +## Regional Availability + +GKE is available in all Google Cloud regions. Autopilot clusters are regional by default. See https://cloud.google.com/about/locations for the full region list. + +## Pricing + +GKE pricing depends on the cluster mode: +- **Autopilot**: Pay for pod resource requests (vCPU, memory, ephemeral storage). No cluster management fee. +- **Standard**: Pay for underlying Compute Engine VMs plus a per-cluster management fee. + +For current pricing, see https://cloud.google.com/kubernetes-engine/pricing. diff --git a/.agents/skills/gke-basics/references/gke-app-onboarding.md b/.agents/skills/gke-basics/references/gke-app-onboarding.md new file mode 100644 index 0000000..ef6ebbf --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-app-onboarding.md @@ -0,0 +1,160 @@ +# GKE App Onboarding + +This reference provides workflows for containerizing and deploying applications to GKE for the first time. + +> **MCP Tools:** `apply_k8s_manifest`, `get_k8s_resource`, `get_k8s_rollout_status`, `get_k8s_logs`, `describe_k8s_resource` + +## Workflow + +### 1. App Assessment + +Before containerizing, assess the application: + +- **Language & Framework**: Identify the tech stack +- **Dependencies**: List required libraries and external services +- **Configuration**: How is the app configured? (env vars, config files, secrets) +- **Statefulness**: Does it need persistent storage? (databases, file storage) +- **Networking**: Port mapping and protocol (HTTP, gRPC, TCP) +- **Health endpoints**: Does the app expose health check endpoints? + +### 2. Containerization + +Create a container image: + +**Dockerfile (recommended for most apps):** + +```dockerfile +# Multi-stage build for smaller, more secure images +FROM golang:1.22 AS builder +WORKDIR /app +COPY . . +RUN CGO_ENABLED=0 go build -o server . + +FROM gcr.io/distroless/static:nonroot +COPY --from=builder /app/server /server +USER nonroot:nonroot +EXPOSE 8080 +ENTRYPOINT ["/server"] +``` + +**Best practices:** +- Use multi-stage builds to keep production images small +- Use distroless or minimal base images to reduce attack surface +- Run as non-root user +- Log to `stdout` and `stderr` for Cloud Logging collection + +**Alternatives:** +- **Cloud Native Buildpacks** — auto-detect language and build without a Dockerfile: `pack build --builder gcr.io/buildpacks/builder:latest` +- **Skaffold** — development workflow tool for iterating on containerized apps: `skaffold dev` + +### 3. Image Management + +Build and store the container image: + +```bash +# Configure Docker for Artifact Registry +gcloud auth configure-docker -docker.pkg.dev --quiet + +# Build and push +docker build -t -docker.pkg.dev///: . +docker push -docker.pkg.dev///: +``` + +**Vulnerability scanning**: Enable automatic scanning in Artifact Registry to detect issues in base images and dependencies. + +```bash +# Check scan results +gcloud artifacts docker images describe \ + -docker.pkg.dev///: \ + --show-package-vulnerability \ + --quiet +``` + +### 4. Manifest Generation + +Generate Kubernetes manifests for the application: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: my-app + namespace: default +spec: + replicas: 2 + selector: + matchLabels: + app: my-app + template: + metadata: + labels: + app: my-app + spec: + containers: + - name: my-app + image: -docker.pkg.dev///: + ports: + - containerPort: 8080 + resources: + requests: + cpu: "250m" + memory: "256Mi" + limits: + cpu: "500m" + memory: "512Mi" + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 10 + readinessProbe: + httpGet: + path: /readyz + port: 8080 + initialDelaySeconds: 5 +--- +apiVersion: v1 +kind: Service +metadata: + name: my-app +spec: + selector: + app: my-app + ports: + - port: 80 + targetPort: 8080 + type: ClusterIP +``` + +**Checklist for manifests:** +- Resource requests and limits set +- Liveness and readiness probes configured +- At least 2 replicas for production +- Service type appropriate (ClusterIP for internal, use Gateway API for external) + +### 5. Deploy + +``` +# MCP (preferred) +apply_k8s_manifest(parent="projects//locations//clusters/", yamlManifest="") + +# Verify +get_k8s_rollout_status(parent="...", resourceType="deployment", name="my-app") +get_k8s_resource(parent="...", resourceType="pod", labelSelector="app=my-app") +``` + +**kubectl fallback:** + +```bash +kubectl apply -f manifests/ +kubectl rollout status deployment/my-app +kubectl get pods -l app=my-app +``` + +## Next Steps + +Once the application is running on GKE: +- Configure autoscaling — see [gke-scaling.md](./gke-scaling.md) +- Set up observability — see [gke-observability.md](./gke-observability.md) +- Harden security — see [gke-security.md](./gke-security.md) +- Configure reliability (PDBs, topology spread) — see [gke-reliability.md](./gke-reliability.md) diff --git a/.agents/skills/gke-basics/references/gke-backup-dr.md b/.agents/skills/gke-basics/references/gke-backup-dr.md new file mode 100644 index 0000000..eb7859d --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-backup-dr.md @@ -0,0 +1,86 @@ +# GKE Backup & Disaster Recovery + +This reference provides workflows for protecting stateful workloads on GKE using Backup for GKE. + +> **MCP Tools:** `get_cluster`, `update_cluster`. **CLI-only:** `gcloud container backup-restore *` + +## Workflows + +### 1. Enable Backup for GKE + +Backup for GKE must be enabled at the cluster level. + +```bash +# Check if enabled +gcloud container clusters describe --region \ + --format="value(addonsConfig.gkeBackupAgentConfig.enabled)" \ + --quiet + +# Enable (Day-1 change) +gcloud container clusters update \ + --enable-gke-backup \ + --region \ + --quiet +``` + +### 2. Create a Backup Plan + +A Backup Plan defines what to back up, when, and for how long. + +```bash +gcloud container backup-restore backup-plans create \ + --cluster= \ + --location= \ + --retention-days= \ + --cron-schedule="" \ + --all-namespaces \ + --quiet +``` + +**Options:** +- `--all-namespaces` — back up everything +- `--included-namespaces=,` — back up specific namespaces +- `--backup-encryption-key=` — encrypt with Customer-Managed Encryption Key (CMEK) + +### 3. Create a Manual Backup + +Trigger a backup immediately outside the schedule: + +```bash +gcloud container backup-restore backups create \ + --backup-plan= \ + --location= \ + --quiet +``` + +### 4. Restore from Backup + +**Create a restore plan:** + +```bash +gcloud container backup-restore restore-plans create \ + --cluster= \ + --location= \ + --backup-plan= \ + --cluster-resource-conflict-policy=USE_EXISTING_VERSION \ + --namespaced-resource-restore-mode=FAIL_ON_CONFLICT \ + --quiet +``` + +**Execute the restore:** + +```bash +gcloud container backup-restore restores create \ + --restore-plan= \ + --backup= \ + --location= \ + --quiet +``` + +## Best Practices + +1. **Automate backups**: Always use a cron schedule for production workloads +2. **Test restores regularly**: Restore to a separate namespace or cluster to verify data integrity +3. **Cross-region DR**: Store backups in a different region or configure cross-region restore plans +4. **Encrypt backups**: Use CMEK for compliance and security requirements +5. **Scope backups**: Back up specific namespaces rather than the entire cluster when possible to reduce restore complexity diff --git a/.agents/skills/gke-basics/references/gke-batch-hpc.md b/.agents/skills/gke-basics/references/gke-batch-hpc.md new file mode 100644 index 0000000..74ec29f --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-batch-hpc.md @@ -0,0 +1,168 @@ +# GKE Batch & HPC Workloads + +This reference covers running batch processing and high-performance computing (HPC) workloads on GKE. + +> **MCP Tools:** `apply_k8s_manifest`, `get_k8s_resource`, `describe_k8s_resource`, `get_k8s_logs`, `delete_k8s_resource`, `list_k8s_events` + +## When to Use + +- Running batch data processing pipelines +- HPC simulations (CFD, molecular dynamics, financial modeling) +- Large-scale parallel computation (MPI, MapReduce) +- ML training jobs +- CI/CD build farms + +## Batch Processing on GKE + +### Kubernetes Jobs + +```yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: batch-job +spec: + parallelism: 10 + completions: 100 + backoffLimit: 3 + template: + spec: + containers: + - name: worker + image: + resources: + requests: + cpu: "1" + memory: "2Gi" + restartPolicy: Never +``` + +### JobSet (for Complex Multi-Job Workflows) + +The golden path enables JobSet monitoring (`JOBSET` in monitoringConfig). + +```yaml +apiVersion: jobset.x-k8s.io/v1alpha2 +kind: JobSet +metadata: + name: training-job +spec: + replicatedJobs: + - name: workers + replicas: 4 + template: + spec: + parallelism: 1 + completions: 1 + template: + spec: + containers: + - name: worker + image: + resources: + requests: + cpu: "4" + memory: "8Gi" +``` + +### Kueue (Job Queuing) + +Kueue manages job scheduling and resource allocation for batch workloads: + +```bash +# Install Kueue +kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/latest/download/manifests.yaml +``` + +```yaml +# Define a ClusterQueue +apiVersion: kueue.x-k8s.io/v1beta1 +kind: ClusterQueue +metadata: + name: batch-queue +spec: + namespaceSelector: {} + resourceGroups: + - coveredResources: ["cpu", "memory"] + flavors: + - name: default + resources: + - name: "cpu" + nominalQuota: 100 + - name: "memory" + nominalQuota: "200Gi" +--- +# Allow a namespace to use the queue +apiVersion: kueue.x-k8s.io/v1beta1 +kind: LocalQueue +metadata: + name: batch-local + namespace: batch-jobs +spec: + clusterQueue: batch-queue +``` + +## HPC on GKE + +### Compact Placement (Low-Latency Networking) + +For tightly-coupled HPC workloads that need low-latency inter-node communication: + +```bash +# Standard clusters: create node pool with compact placement +gcloud container node-pools create hpc-pool \ + --cluster --region \ + --machine-type c3-standard-44 \ + --placement-type COMPACT \ + --num-nodes 8 \ + --enable-autoscaling --min-nodes 0 --max-nodes 16 \ + --quiet +``` + +### MPI Workloads + +Use the MPI Operator for MPI-based HPC applications: + +```bash +# Install MPI Operator +kubectl apply -f https://raw.githubusercontent.com/kubeflow/mpi-operator/master/deploy/v2beta1/mpi-operator.yaml +``` + +```yaml +apiVersion: kubeflow.org/v2beta1 +kind: MPIJob +metadata: + name: hpc-simulation +spec: + slotsPerWorker: 4 + mpiReplicaSpecs: + Launcher: + replicas: 1 + template: + spec: + containers: + - name: launcher + image: + command: ["mpirun", "-np", "32", "./simulation"] + Worker: + replicas: 8 # Set resource requests per worker +``` + +## Cost Optimization for Batch/HPC + +### Spot VMs for Batch + +Batch workloads are ideal Spot VM candidates (interruptible, can checkpoint). Use a ComputeClass with Spot-first priority and `activeMigration` to return to Spot when available. See [gke-compute-classes.md](./gke-compute-classes.md) for the Spot-with-fallback pattern. + +### Scale-to-Zero + +For batch clusters, allow node pools to scale to zero when no jobs are running: + +- Autopilot (golden path): Automatic, nodes scale to zero when no pods are scheduled +- Standard: Set `--min-nodes 0` on batch node pools + +## Best Practices + +- **Kueue** for multi-tenant job scheduling; **JobSet** for multi-component workflows +- **Set `backoffLimit`** on Jobs; **checkpoint long jobs** for preemption resilience +- **Spot VMs** for fault-tolerant batch; **compact placement** for tightly-coupled HPC diff --git a/.agents/skills/gke-basics/references/gke-cluster-creation.md b/.agents/skills/gke-basics/references/gke-cluster-creation.md new file mode 100644 index 0000000..7355900 --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-cluster-creation.md @@ -0,0 +1,142 @@ +# GKE Cluster Creation + +This reference guides creating GKE clusters. The **golden path Autopilot** configuration is the default for all new clusters. + +> **MCP Tools:** `list_clusters`, `create_cluster`, `get_cluster`, `list_operations`, `get_operation` + +## Workflow + +1. **Discover context**: Use `list_clusters` to see existing clusters. Use `gcloud config get-value project` if project unknown. +2. **Gather inputs**: project_id, region, cluster_name, environment type +3. **Select mode**: Autopilot (default) vs Standard +4. **Configure networking**: auto-create subnet (default) or bring-your-own +5. **Review golden path settings**: present the config and confirm with user +6. **Create**: Use MCP `create_cluster` tool. Fall back to `gcloud` CLI only if MCP is unavailable. +7. **Track**: Use `get_operation` to monitor creation progress +8. **Verify**: Use `get_cluster` with `readMask="*"` to confirm golden path settings applied + +## Mode Selection + +| Criteria | Autopilot (Golden Path) | Standard | +|----------|------------------------|----------| +| Node management | Google-managed | Self-managed | +| Pricing | Pay per pod resource request | Pay per node (VM) | +| Node customization | Via ComputeClasses | Full control | +| DaemonSets | Allowed (with restrictions) | Full control | +| GPU/TPU | Supported via ComputeClasses | Supported via node pools | +| Best for | Most production workloads | Kernel tuning, custom OS, privileged workloads | + +> **Rule**: Default to Autopilot unless the customer has a specific requirement that Autopilot cannot satisfy. + +## Templates + +### 1. Golden Path Autopilot (Production) + +This is the default. All settings match `assets/golden-path-autopilot.yaml`. + +**Via gcloud:** + +```bash +gcloud container clusters create-auto \ + --region \ + --project \ + --release-channel regular \ + --enable-private-nodes \ + --enable-master-authorized-networks \ + --enable-dns-access \ + --enable-secret-manager \ + --secret-manager-rotation-interval=120s \ + --scoped-rbs-bindings \ + --monitoring=SYSTEM,API_SERVER,SCHEDULER,CONTROLLER_MANAGER,STORAGE,POD,DEPLOYMENT,STATEFULSET,DAEMONSET,HPA,CADVISOR,KUBELET,DCGM \ + --quiet +``` + +**Via MCP (`create_cluster`):** + +```json +{ + "parent": "projects//locations/", + "cluster": { + "name": "", + "autopilot": { "enabled": true }, + "privateClusterConfig": { "enablePrivateNodes": true }, + "masterAuthorizedNetworksConfig": { + "privateEndpointEnforcementEnabled": true + }, + "releaseChannel": { "channel": "REGULAR" }, + "secretManagerConfig": { + "enabled": true, + "rotationConfig": { "enabled": true, "rotationInterval": "120s" } + }, + "rbacBindingConfig": { + "enableInsecureBindingSystemAuthenticated": false, + "enableInsecureBindingSystemUnauthenticated": false + } + } +} +``` + +### 2. Autopilot Dev/Test + +Relaxes some golden path defaults for cost savings and easier access in non-production. + +```bash +gcloud container clusters create-auto \ + --region \ + --project \ + --release-channel rapid \ + --quiet +``` + +> **Warning**: This does not apply golden path security hardening. Suitable for dev/test only. + +### 3. Standard Regional (When Autopilot is Not an Option) + +```bash +gcloud container clusters create \ + --region \ + --project \ + --num-nodes 3 \ + --machine-type e2-standard-4 \ + --disk-type pd-balanced \ + --enable-autoscaling --min-nodes 1 --max-nodes 10 \ + --enable-shielded-nodes --enable-secure-boot \ + --workload-pool=.svc.id.goog \ + --enable-private-nodes \ + --enable-master-authorized-networks \ + --enable-vertical-pod-autoscaling \ + --enable-dataplane-v2 \ + --release-channel regular \ + --quiet +``` + +### 4. GPU/AI Workloads (Autopilot with ComputeClass) + +Create a golden path Autopilot cluster, then apply a ComputeClass for GPU workloads: + +```bash +# 1. Create golden path cluster (same as template 1) +gcloud container clusters create-auto \ + --region --project \ + --enable-private-nodes --enable-master-authorized-networks \ + --enable-dns-access --enable-secret-manager --scoped-rbs-bindings \ + --quiet + +# 2. Apply GPU ComputeClass (see gke-compute-classes.md) +kubectl apply -f gpu-compute-class.yaml + +# 3. Or use GIQ for inference (see gke-inference.md) +gcloud container ai profiles manifests create \ + --model=gemma-2-9b-it --model-server=vllm --accelerator-type=nvidia-l4 --quiet > inference.yaml +kubectl apply -f inference.yaml +``` + +## Instructions + +- **ALWAYS** ask for `project_id` if not in context +- **ALWAYS** ask for `region` +- **ALWAYS** ask for a unique `cluster_name` +- **DEFAULT** to golden path Autopilot unless customer specifies otherwise +- **WARN** about Day-0 decisions (networking, private nodes) that are hard to change later +- **WARN** about cost for GPU or multi-region clusters +- When using MCP `create_cluster`, the `cluster.name` should be the **short name** (e.g., `my-cluster`), not the full resource path diff --git a/.agents/skills/gke-basics/references/gke-compute-classes.md b/.agents/skills/gke-basics/references/gke-compute-classes.md new file mode 100644 index 0000000..0edd842 --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-compute-classes.md @@ -0,0 +1,172 @@ +# GKE ComputeClasses + +ComputeClasses allow declarative node configuration and autoscaling priorities in GKE Autopilot (and Standard with NAP). Use them to specify machine families, Spot VM fallback, GPU requirements, and zone targeting. + +> **MCP Tools:** `apply_k8s_manifest`, `get_k8s_resource`, `describe_k8s_resource`, `delete_k8s_resource` + +## When to Use + +- Cost optimization: Spot VMs with on-demand fallback +- GPU/TPU workloads: target specific accelerators +- Performance: select specific machine families (c3, c4, n4) +- Zone targeting: colocate workloads with zonal resources + +## CRD Structure + +```yaml +apiVersion: cloud.google.com/v1 +kind: ComputeClass +metadata: + name: +spec: + # Required. Ordered list of rules. GKE tries them in order. + priorities: + - + + # Optional. Default: "DoNotScaleUp" + whenUnsatisfiable: <"DoNotScaleUp" | "ScaleUpAnyway"> + + # Optional. Auto-create node pools. Default: true + nodePoolAutoCreation: + enabled: + + # Optional. Move workloads back to higher-priority when available + activeMigration: + optimizeRulePriority: + + # Optional. Scale-down delay + autoscalingPolicy: + consolidationDelay: + + # Optional. Defaults for fields omitted in priorities + priorityDefaults: +``` + +## PriorityRule Fields + +| Field | Type | Description | Example | +|-------|------|-------------|---------| +| `machineFamily` | string | Compute Engine machine family | `n4`, `c3`, `t2a` | +| `machineType` | string | Specific machine type | `n4-standard-32` | +| `spot` | boolean | Use Spot VMs | `true` | +| `minCores` | int | Minimum vCPUs | `4` | +| `minMemoryGb` | int | Minimum memory in GB | `16` | +| `gpu` | object | GPU config: `type`, `count`, `driverVersion` | See below | +| `tpu` | object | TPU config: `type`, `count`, `topology` | See below | +| `storage` | object | Boot disk: `type`, `sizeGb`, `kmsKey`; Local SSD: `count`, `interface` | See below | +| `location` | object | Zone targeting: `zones: [...]` or `type: "Any"` | See below | +| `reservations` | object | Reservation consumption: `NO_RESERVATION`, `ANY_RESERVATION`, `SPECIFIC_RESERVATION` | See below | + +### GPU Configuration + +```yaml +gpu: + type: "nvidia-l4" # nvidia-l4, nvidia-h100-80gb, etc. + count: 1 # GPUs per node + driverVersion: "latest" # Optional +``` + +### TPU Configuration + +```yaml +tpu: + type: "v5p-slice" + count: 8 + topology: "2x2x1" +``` + +### Storage Configuration + +```yaml +storage: + bootDisk: + type: "pd-balanced" # pd-balanced (golden path), pd-ssd, hyperdisk-balanced + sizeGb: 100 + kmsKey: "projects/.../cryptoKeys/..." # Optional CMEK + localSsd: + count: 1 + interface: "NVME" +``` + +### Location Configuration + +```yaml +location: + zones: + - "us-central1-a" + - "us-central1-b" + # OR + type: "Any" # Let GKE pick from cluster zones +``` + +## Common Patterns + +### Spot VMs with On-Demand Fallback + +```yaml +apiVersion: cloud.google.com/v1 +kind: ComputeClass +metadata: + name: spot-with-fallback +spec: + nodePoolAutoCreation: + enabled: true + priorities: + - machineFamily: n4 + spot: true + - machineFamily: n4 + spot: false +``` + +### GPU Workload (L4) + +```yaml +apiVersion: cloud.google.com/v1 +kind: ComputeClass +metadata: + name: l4-gpu-class +spec: + priorities: + - machineFamily: g2 + gpu: + type: nvidia-l4 + count: 1 + minCores: 4 + minMemoryGb: 16 + storage: + bootDisk: + type: pd-balanced + sizeGb: 100 +``` + +### Spot with Active Migration (Return to Spot When Available) + +Add `activeMigration` to the Spot-with-fallback pattern above to auto-migrate workloads back to Spot when capacity returns: + +```yaml +spec: + activeMigration: + optimizeRulePriority: true + priorities: + - machineFamily: n4 + spot: true + - machineFamily: n4 + spot: false +``` + +> **Other patterns** — HPC (`machineFamily: c3`, `minCores: 8`) and zone targeting (`location.zones: [...]`) follow the same CRD structure. See the PriorityRule fields table and sub-config examples above. + +## Workload Usage + +Pods must specify the ComputeClass via node selector: + +```yaml +nodeSelector: + cloud.google.com/compute-class: "" +``` + +## Warnings + +- Do not mix ComputeClass selection with other hard node selectors (like `cloud.google.com/gke-spot`) — this causes scheduling conflicts. +- When using `activeMigration`, workloads will be evicted and rescheduled — ensure PDBs are in place. +- Spot VMs can be evicted with 30-second notice. Set `terminationGracePeriodSeconds < 30` for Spot workloads. diff --git a/.agents/skills/gke-basics/references/gke-cost.md b/.agents/skills/gke-basics/references/gke-cost.md new file mode 100644 index 0000000..2bb88dc --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-cost.md @@ -0,0 +1,158 @@ +# GKE Cost Optimization + +This reference covers strategies for reducing GKE costs while maintaining the golden path security and reliability posture. + +> **MCP Tools:** `get_k8s_resource`, `describe_k8s_resource`, `apply_k8s_manifest`, `patch_k8s_resource`, `get_cluster` + +## Golden Path Cost Features + +The golden path already includes cost-optimizing settings: + +| Setting | Value | Impact | +|---------|-------|--------| +| `autoscalingProfile` | `OPTIMIZE_UTILIZATION` | Aggressive node scale-down reduces idle compute | +| `verticalPodAutoscaling` | `enabled` | VPA recommendations prevent over-provisioning | +| Autopilot pricing | Pay per pod request | No charge for unused node capacity | +| Node Auto Provisioning | enabled | Right-sized node pools created automatically | + +## Cost Optimization Strategies + +### 1. Spot VMs via ComputeClasses + +Use Spot VMs for fault-tolerant workloads (60-90% cost reduction). + +```yaml +apiVersion: cloud.google.com/v1 +kind: ComputeClass +metadata: + name: spot-with-fallback +spec: + activeMigration: + optimizeRulePriority: true + priorities: + - machineFamily: n4 + spot: true + - machineFamily: n4 + spot: false +``` + +**Spot-suitable workloads:** + +| Workload | Spot-Suitable? | +|----------|----------------| +| Batch / data processing | Yes | +| Dev / test environments | Yes | +| Stateless web/API (replicas >= 2) | Yes (with PDBs) | +| Jobs with checkpointing | Yes | +| Stateful workloads (databases) | No | +| Single-replica critical services | No | + +**Handling eviction:** + +```yaml +spec: + template: + spec: + terminationGracePeriodSeconds: 25 # Must be < 30s for Spot + containers: + - name: app + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "sleep 5"] +``` + +### 2. Pod Rightsizing + +Use VPA recommendations to reduce over-provisioned requests. + +```bash +# 1. Deploy VPA in recommendation mode +kubectl apply -f - <-vpa +spec: + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: + updatePolicy: + updateMode: "Off" +EOF + +# 2. Wait 24+ hours for data collection + +# 3. Read recommendations +kubectl get vpa -vpa -o jsonpath='{.status.recommendation}' +``` + +**Optimization rules:** + +| Condition | Action | Savings | +|-----------|--------|---------| +| CPU request >5x P95 actual | Reduce to `P95 * 1.2` | High | +| Memory request >3x P95 actual | Reduce to `P95 * 1.2` | High | +| CPU request >2x P95 actual | Reduce to `P95 * 1.2` | Medium | +| No resource requests set | Add requests (enables bin-packing) | Medium | + +### 3. Machine Type Selection + +| Family | Use Case | Relative Cost | +|--------|----------|---------------| +| e2 | General purpose, burstable | Lowest | +| t2a / t2d | Scale-out (Arm/AMD), price-performance optimized | Low | +| n4a | Axion Arm-based, general-purpose price-performance | Low | +| n4 / n4d | General purpose (Intel/AMD), flexible shapes | Low-Medium | +| c4a | Compute-optimized (Arm), high efficiency | Medium-High | +| c3 / c4 | Compute-optimized (Intel) | Medium-High | +| c3d / c4d | Compute-optimized (AMD), high-performance throughput | Medium-High | +| ek-standard | Autopilot enhanced (golden path) | Medium | +| m3 / x4 | Memory-optimized, SAP HANA, large databases | High | +| g2 (L4 GPU) | AI inference | High | +| a3 (H100 GPU) | AI training | Highest | +| a4 / a4x | Ultra-scale AI (Blackwell GPUs) | Highest | + +> In Autopilot, machine type is managed. Use ComputeClasses to influence selection. + +### 4. Committed Use Discounts (CUDs) + +For steady-state workloads, purchase 1-year or 3-year CUDs: + +- 1-year: ~20-30% discount +- 3-year: ~50-55% discount +- Applied automatically to matching usage in the region +- Purchase via Google Cloud Console > Billing > Committed use discounts + +### 5. Cluster Management + +- **Stop/start dev clusters**: Idle dev clusters cost money even with no workloads (control plane fee). +- **Right-size node pools** (Standard): Use Cluster Autoscaler with appropriate min/max. +- **Multi-tenant clusters**: Share a single cluster across teams instead of per-team clusters (see [gke-multitenancy.md](./gke-multitenancy.md)). + +## Cost Monitoring + +```bash +# View cluster cost breakdown (requires Cost Management API) +gcloud billing budgets list --billing-account= --quiet + +# View node utilization +kubectl top nodes + +# View pod resource usage vs requests +kubectl top pods --all-namespaces --containers +``` + +## Dev/Test Cost Savings + +For non-production environments, these golden path deviations are acceptable: + +| Setting | Production (Golden Path) | Dev/Test | +|---------|-------------------------|----------| +| Cluster mode | Autopilot | Autopilot (cheaper with fewer pods) | +| Release channel | Regular | Rapid (get fixes faster) | +| Private nodes | Required | Optional (simpler access) | +| Monitoring components | Full suite | SYSTEM_COMPONENTS only | +| Secret Manager rotation | 120s | Disabled | +| Maintenance windows | Configured | Not needed | diff --git a/.agents/skills/gke-basics/references/gke-golden-path.md b/.agents/skills/gke-basics/references/gke-golden-path.md new file mode 100644 index 0000000..8473c83 --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-golden-path.md @@ -0,0 +1,76 @@ +# GKE Golden Path Configuration + +The golden path is the recommended Autopilot configuration for production clusters. It defines sensible defaults — when the user requests different settings, apply them and note relevant trade-offs. + +> **MCP Tools:** `get_cluster`, `create_cluster`, `update_cluster` + +## Rules + +1. **Default to the golden path.** Use golden path values unless the user requests otherwise. When deviating, note trade-offs but respect the user's choice. +2. **Day-0 vs Day-1.** Flag Day-0 decisions (networking, private nodes, subnets, IP allocation) prominently — they are hard/impossible to change after creation. +3. **Tool preference: MCP > gcloud > kubectl.** See [cli-reference.md](./cli-reference.md) for full coverage matrix and override options. If the user says "use gcloud" or "use kubectl", respect that for the session. +4. **Document decisions and rationale**, especially for Day-0 choices and golden path deviations. + +## Required Inputs + +If the user is unsure, use golden path defaults. + +- **Project ID** (required) +- **Region** (required, e.g., `us-central1`) +- **Cluster name** (required) +- **Environment type**: dev/test or production (defaults to production) +- **Networking**: bring-your-own VPC/subnet or auto-create (default: auto-create) +- **Scale expectations**: expected node/pod count, workload types +- **Cost constraints**: Spot VM tolerance, budget considerations + +## Always-Apply Defaults + +Recommended best practices applied by default. If the user requests a different setting, apply it and briefly note the security or operational trade-off. + +| Setting | Golden Path Value | +|---------|-------------------| +| `autopilot.enabled` | `true` | +| `privateClusterConfig.enablePrivateNodes` | `true` | +| `masterAuthorizedNetworksConfig.privateEndpointEnforcementEnabled` | `true` | +| `secretManagerConfig.enabled` + `rotationInterval: 120s` | `true` | +| `rbacBindingConfig.enableInsecureBinding*` | `false` (both) | +| `workloadIdentityConfig.workloadPool` | enabled | +| `networkConfig.datapathProvider` | `ADVANCED_DATAPATH` | +| `networkConfig.dnsConfig.clusterDns` | `CLOUD_DNS` | +| `autoscaling.autoscalingProfile` | `OPTIMIZE_UTILIZATION` | +| `verticalPodAutoscaling.enabled` | `true` | +| `monitoringConfig` components | SYSTEM_COMPONENTS, STORAGE, POD, DEPLOYMENT, STATEFULSET, DAEMONSET, HPA, JOBSET, CADVISOR, KUBELET, DCGM, APISERVER, SCHEDULER, CONTROLLER_MANAGER | +| `advancedDatapathObservabilityConfig.enableMetrics` | `true` | +| `nodeConfig.shieldedInstanceConfig.enableSecureBoot` | `true` | +| `nodeConfig.workloadMetadataConfig.mode` | `GKE_METADATA` | +| `nodeConfig.gcfsConfig.enabled` / `gvnic.enabled` | `true` / `true` | +| `addonsConfig.statefulHaConfig.enabled` | `true` | +| Storage CSI drivers (Filestore, GCS FUSE, Parallelstore) | enabled | +| Pod Security Standards | `restricted` on production namespaces | + +## Customer-Configurable Settings + +These have golden path defaults but customers may deviate with valid justification. **Ask before changing.** + +| Setting | Default | Why Deviate | +|---------|---------|-------------| +| `dnsEndpointConfig.allowExternalTraffic` | `true` | Restrict if cluster only accessed from within VPC | +| `autoIpamConfig` / `createSubnetwork` | `true` / `true` | Customer has pre-existing VPC/subnets | +| `maxPodsPerNode` | `48` | `110` for high pod-density (costs more CIDR space) | +| `subnetwork` | auto-created | Customer brings existing subnets | +| Maintenance exclusion windows | configured (NO_MINOR_UPGRADES, 1yr) | Customer-specific scheduling | +| `nodeConfig.bootDisk.diskType` | `pd-balanced` | `pd-ssd` for I/O-intensive, `pd-standard` for cost | +| `nodeConfig.machineType` | `ek-standard-8` (Autopilot) | Varies by workload; use ComputeClasses | + +## Guardrails + +- Do not request or output secrets (tokens, keys, service account JSON). +- Discover project/cluster context via MCP tools or `gcloud config get-value project` — don't ask users to paste project IDs. +- For Day-0 decisions, always ask clarifying questions before proceeding. +- For Day-1 features, propose golden path defaults with trade-offs and let the customer confirm. +- Do not promise zero downtime; advise PDBs, health probes, replicas, and staged upgrades. +- When auditing existing clusters, compare against golden path and report deviations with severity and remediation. + +## Golden Path Config + +See [golden-path-autopilot.yaml](../assets/golden-path-autopilot.yaml) for the full cluster-level policy settings. diff --git a/.agents/skills/gke-basics/references/gke-inference.md b/.agents/skills/gke-basics/references/gke-inference.md new file mode 100644 index 0000000..761adf2 --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-inference.md @@ -0,0 +1,161 @@ +# GKE AI/ML Inference + +This reference covers deploying AI/ML inference workloads on GKE using Google's Inference Quickstart (GIQ) and best practices for LLM serving. + +> **MCP Tools:** `apply_k8s_manifest`, `get_k8s_resource`, `get_k8s_logs`, `get_k8s_rollout_status`, `describe_k8s_resource`, `list_k8s_events`. **CLI-only:** `gcloud container ai profiles *` + +## When to Use + +- Deploy an AI model (Llama, Gemma, Mistral, etc.) to GKE +- Generate optimized Kubernetes manifests for inference +- Select GPU/TPU accelerators for model serving +- Configure autoscaling for LLM inference + +## Prerequisites + +- A golden path GKE Autopilot cluster (GPU workloads are supported via ComputeClasses and NAP) +- `gcloud` CLI authenticated +- Sufficient GPU/TPU quota in the target region + +## Workflow + +### 1. Discovery: Find Models and Hardware + +```bash +# List all supported models +gcloud container ai profiles models list --quiet + +# Find valid accelerator/server combinations for a model +gcloud container ai profiles list --model= --quiet + +# Example: what can run Gemma 2 9B? +gcloud container ai profiles list --model=gemma-2-9b-it --quiet +``` + +### 2. Generate Manifest + +```bash +gcloud container ai profiles manifests create \ + --model= \ + --model-server= \ + --accelerator-type= \ + --target-ntpot-milliseconds= --quiet > inference.yaml +``` + +**Parameters:** +- `--model`: Model ID (e.g., `gemma-2-9b-it`, `llama-3-8b`) +- `--model-server`: Inference server (`vllm`, `tgi`, `triton`, `tensorrt-llm`) +- `--accelerator-type`: GPU/TPU type (`nvidia-l4`, `nvidia-tesla-a100`, `nvidia-h100-80gb`) +- `--target-ntpot-milliseconds`: Target Normalized Time Per Output Token (optional, for latency optimization) + +**Example:** + +```bash +gcloud container ai profiles manifests create \ + --model=gemma-2-9b-it \ + --model-server=vllm \ + --accelerator-type=nvidia-l4 \ + --target-ntpot-milliseconds=50 --quiet > inference.yaml +``` + +### 3. Review and Deploy + +```bash +# Review for placeholders (HF tokens, PVCs) +cat inference.yaml + +# Deploy +kubectl apply -f inference.yaml + +# Monitor +kubectl get pods -w +kubectl logs -f +``` + +> Some models require Hugging Face tokens. Create a Kubernetes Secret and reference it in the manifest. + +## GPU ComputeClass for Inference + +For Autopilot clusters, create a ComputeClass to target GPU nodes: + +```yaml +apiVersion: cloud.google.com/v1 +kind: ComputeClass +metadata: + name: l4-inference +spec: + priorities: + - machineFamily: g2 + gpu: + type: nvidia-l4 + count: 1 + minCores: 4 + minMemoryGb: 16 +``` + +## Accelerator Selection Guide + +| Accelerator | Best For | Memory | Relative Cost | +|-------------|----------|--------|---------------| +| NVIDIA T4 | Budget inference, lightweight legacy models | 16 GB | Lowest | +| NVIDIA L4 (G2) | Small-medium model inference, video, graphics | 24 GB | Low | +| NVIDIA RTX PRO 6000 (G4) | Multimodal AI, high-fidelity 3D, fine-tuning | 96 GB | Medium | +| Cloud TPU v5e | Cost-effective transformer inference | Varies | Medium | +| Cloud TPU v5p | High-performance training | Varies | High | +| Cloud TPU v6e (Trillium) | High-efficiency next-gen training & serving | 32 GB/chip | Medium-High | +| Cloud TPU v7x (Ironwood) | Ultra-scale inference & agentic workflows | 192 GB/chip | High | +| NVIDIA A100 | Large model inference, enterprise ML | 40/80 GB | High | +| NVIDIA H100 / H200 | Frontier model training, high throughput | 80/141 GB | Highest | +| NVIDIA B200 (A4) | Blackwell-scale training, FP4 precision | 192 GB | Highest | +| NVIDIA GB200 (A4X) | Rack-scale AI (Grace Blackwell Superchip) | Massive | Highest | + +## Autoscaling LLM Inference + +### GPU-based autoscaling + +Use custom metrics for GPU utilization: + +```yaml +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: llm-hpa +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: llm-server + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Pods + pods: + metric: + name: gpu_duty_cycle + target: + type: AverageValue + averageValue: "80" +``` + +### Best practices for inference autoscaling + +1. **Use DCGM metrics**: Golden path enables DCGM monitoring for GPU utilization metrics +2. **Set appropriate minReplicas**: At least 1 for always-on serving; 0 for batch/on-demand +3. **Tune scale-down delay**: LLM model loading is slow; use longer stabilization windows +4. **Consider queue depth**: Scale on pending requests rather than pure GPU utilization for latency-sensitive workloads + +## Optimization Tips + +- **Quantization**: Use quantized models (GPTQ, AWQ) to reduce GPU memory and increase throughput +- **Batching**: Configure model server batch size for throughput vs latency trade-off +- **Tensor parallelism**: Split large models across multiple GPUs within a node +- **KV cache optimization**: Tune `--gpu-memory-utilization` in vLLM for KV cache allocation + +## Troubleshooting + +| Issue | Cause | Fix | +|-------|-------|-----| +| Invalid model/accelerator combination | Unsupported tuple | Re-run `gcloud container ai profiles list --model=` | +| GPU quota exceeded | Regional quota limit | Request quota increase or try a different region | +| OOM on GPU | Model too large for accelerator | Use larger GPU, enable quantization, or use tensor parallelism | +| Slow cold start | Large model loading from registry | Use local SSD for model caching; pre-pull images | diff --git a/.agents/skills/gke-basics/references/gke-multitenancy.md b/.agents/skills/gke-basics/references/gke-multitenancy.md new file mode 100644 index 0000000..78458a3 --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-multitenancy.md @@ -0,0 +1,163 @@ +# GKE Multi-Tenancy + +This reference covers enterprise multi-tenancy patterns on GKE, including namespace isolation, RBAC planning, resource quotas, and network segmentation. + +> **MCP Tools:** `apply_k8s_manifest`, `get_k8s_resource`, `check_k8s_auth`, `describe_k8s_resource`, `delete_k8s_resource` + +## When to Use + +- Multiple teams sharing a single GKE cluster +- Isolating workloads by environment (dev/staging/prod) within one cluster +- Implementing least-privilege access control +- Cost allocation across teams or projects + +## Multi-Tenancy Models + +| Model | Isolation | Complexity | Cost | +|-------|-----------|------------|------| +| **Namespace-per-team** | Soft (RBAC + Network Policy) | Low | Lowest (shared cluster) | +| **Namespace-per-environment** | Soft | Low | Low | +| **Node pool-per-team** | Medium (dedicated compute) | Medium | Medium | +| **Cluster-per-team** | Hard (full isolation) | High | Highest | + +> **Golden path recommendation**: Start with namespace-per-team for cost efficiency. Escalate to stronger isolation only when compliance requires it. + +## Namespace Isolation Setup + +### 1. Create Namespaces + +```bash +kubectl create namespace team-a +kubectl create namespace team-b +kubectl label namespace team-a team=a +kubectl label namespace team-b team=b +``` + +### 2. RBAC Configuration + +**Principle**: Grant minimal permissions per namespace. Never bind to `system:authenticated`. + +```yaml +# Namespace-scoped role for a team +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: team-a-developer + namespace: team-a +rules: +- apiGroups: ["", "apps", "batch"] + resources: ["pods", "deployments", "services", "configmaps", "jobs"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: team-a-developers + namespace: team-a +subjects: +- kind: Group + name: "team-a@example.com" # Google Group + apiGroup: rbac.authorization.k8s.io +roleRef: + kind: Role + name: team-a-developer + apiGroup: rbac.authorization.k8s.io +``` + +**RBAC best practices:** Use Google Groups for subject bindings. Prefer namespace-scoped Roles over ClusterRoles. See [gke-security.md](./gke-security.md) for full RBAC hardening guidance. + +### 3. Resource Quotas + +Prevent any single team from consuming all cluster resources: + +```yaml +apiVersion: v1 +kind: ResourceQuota +metadata: + name: team-a-quota + namespace: team-a +spec: + hard: + requests.cpu: "10" + requests.memory: "20Gi" + limits.cpu: "20" + limits.memory: "40Gi" + pods: "50" + services: "10" + persistentvolumeclaims: "10" +``` + +### 4. LimitRanges + +Set default and maximum resource constraints per container: + +```yaml +apiVersion: v1 +kind: LimitRange +metadata: + name: team-a-limits + namespace: team-a +spec: + limits: + - type: Container + default: + cpu: "500m" + memory: "512Mi" + defaultRequest: + cpu: "100m" + memory: "128Mi" + max: + cpu: "4" + memory: "8Gi" +``` + +### 5. Network Isolation + +Apply default-deny per namespace (see [gke-security.md](./gke-security.md)), then allow intra-team traffic: + +```yaml +# Allow same-namespace pods to talk + DNS +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-same-namespace + namespace: team-a +spec: + podSelector: {} + ingress: + - from: + - podSelector: {} + egress: + - to: + - podSelector: {} + - to: # Allow DNS + - namespaceSelector: {} + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 +``` + +## Cost Allocation + +### Labels for Cost Attribution + +```bash +# Label namespaces for billing +kubectl label namespace team-a cost-center=engineering +kubectl label namespace team-b cost-center=data-science +``` + +### GKE Cost Allocation + +Enable GKE cost allocation to break down costs by namespace and label: + +```bash +gcloud container clusters update --region \ + --enable-cost-allocation +``` + +View in Cloud Billing > GKE Cost Allocation. + diff --git a/.agents/skills/gke-basics/references/gke-networking.md b/.agents/skills/gke-basics/references/gke-networking.md new file mode 100644 index 0000000..20eb5b4 --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-networking.md @@ -0,0 +1,131 @@ +# GKE Networking + +This reference covers networking configuration for GKE clusters. The golden path enforces private, VPC-native clusters with Dataplane V2. + +> **MCP Tools:** `get_cluster`, `update_cluster`, `apply_k8s_manifest`, `get_k8s_resource` + +## Golden Path Networking Defaults + +| Setting | Golden Path Value | Day-0/1 | Notes | +|---------|-------------------|---------|-------| +| `privateClusterConfig.enablePrivateNodes` | `true` | Day-0 | Nodes have no public IPs | +| `masterAuthorizedNetworksConfig.privateEndpointEnforcementEnabled` | `true` | Day-0 | Control plane only reachable via private endpoint or DNS | +| `controlPlaneEndpointsConfig.dnsEndpointConfig.allowExternalTraffic` | `true` | Day-0 | Allows DNS-based access from outside VPC | +| `networkConfig.datapathProvider` | `ADVANCED_DATAPATH` (Dataplane V2) | Day-0 | eBPF-based, built-in Network Policy | +| `networkConfig.dnsConfig.clusterDns` | `CLOUD_DNS` | Day-0 | Managed DNS, more reliable than kube-dns | +| `networkConfig.enableIntraNodeVisibility` | `true` | Day-1 | VPC Flow Logs for intra-node traffic | +| `networkConfig.gatewayApiConfig.channel` | `CHANNEL_STANDARD` | Day-1 | Gateway API support | +| `ipAllocationPolicy.autoIpamConfig.enabled` | `true` | Day-0 | Automatic IP range management | +| `ipAllocationPolicy.createSubnetwork` | `true` | Day-0 | Auto-create dedicated subnet | +| `defaultMaxPodsConstraint.maxPodsPerNode` | `48` | Day-0 | Conservative default; 110 for high density | + +## Private Cluster Access Patterns + +The golden path creates a private cluster. Users access it via: + +1. **DNS endpoint (default)**: `allowExternalTraffic: true` enables access via the cluster's DNS endpoint from outside the VPC. No VPN required. +2. **Private endpoint**: Direct access from within the VPC or via Cloud VPN/Interconnect. +3. **Authorized networks**: Add specific CIDRs to `masterAuthorizedNetworksConfig` for IP-based access control. + +```bash +# Access private cluster via DNS endpoint (golden path default) +gcloud container clusters get-credentials \ + --region --dns-endpoint \ + --quiet + +# Access via private endpoint (from within VPC) +gcloud container clusters get-credentials \ + --region --internal-ip \ + --quiet +``` + +## Bring-Your-Own VPC/Subnet + +If the customer has existing network infrastructure: + +```bash +gcloud container clusters create-auto \ + --region \ + --network \ + --subnetwork \ + --cluster-secondary-range-name \ + --services-secondary-range-name \ + --enable-private-nodes \ + --enable-master-authorized-networks \ + --quiet +``` + +> **Day-0 Warning**: VPC, subnet, and IP ranges cannot be changed after cluster creation. + +## IP Planning + +| Resource | Golden Path | Notes | +|----------|-------------|-------| +| Pod CIDR | `/17` (auto) | ~32K pod IPs; size based on maxPodsPerNode | +| Service CIDR | `/20` (auto) | ~4K service IPs | +| Node subnet | auto-created | /20 recommended for growth | +| Max pods/node | 48 | Each node gets a /25 pod range; set to 110 for /24 per node | + +**Pod CIDR sizing rule of thumb:** +- `maxPodsPerNode=48` -> each node uses a `/25` (128 IPs) from pod CIDR +- `maxPodsPerNode=110` -> each node uses a `/24` (256 IPs) from pod CIDR +- Larger maxPodsPerNode = fewer nodes fit in a given CIDR + +## Ingress + +**Gateway API** (golden path, enabled via `gatewayApiConfig.channel: CHANNEL_STANDARD`): + +```yaml +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: external-http +spec: + gatewayClassName: gke-l7-global-external-managed + listeners: + - name: http + protocol: HTTP + port: 80 +``` + +**Alternatives:** +- `gke-l7-regional-external-managed` — regional external +- `gke-l7-rilb` — internal load balancer +- Istio service mesh — for advanced traffic management, mTLS + +## Egress + +- Default: nodes use Cloud NAT for outbound internet access (private nodes have no public IPs) +- For static egress IPs: configure Cloud NAT with manual IP allocation +- For restricted egress: route through a firewall appliance via custom routes + +## Network Policy + +Dataplane V2 (golden path) provides built-in Network Policy enforcement — no additional addon needed. Apply default-deny per namespace, then allow specific flows. + +> See [gke-security.md](./gke-security.md) for default-deny policy and [gke-multitenancy.md](./gke-multitenancy.md) for per-team allow policies. + +## Cloud Armor (Recommended for Public-Facing Services) + +Cloud Armor provides WAF and DDoS protection. **Not a golden path default** — recommended for any service with public ingress. Link via `BackendConfig`: + +```yaml +# 1. Create BackendConfig referencing your Cloud Armor policy +apiVersion: cloud.google.com/v1 +kind: BackendConfig +metadata: + name: my-backend-config +spec: + securityPolicy: + name: my-cloud-armor-policy +--- +# 2. Annotate your Service +# cloud.google.com/backend-config: '{"default": "my-backend-config"}' +``` + +## SSL, Container-Native LB, and PSC + +- **Google-managed SSL certificates**: Use `ManagedCertificate` CRD with Gateway API. Auto-provisions and renews. +- **Container-native LB**: Enabled by default on VPC-native clusters (golden path). Targets pods via NEGs, bypassing iptables. Annotation: `cloud.google.com/neg: '{"ingress": true}'`. +- **Private Service Connect (PSC)**: Use `ServiceAttachment` CRD to expose services across VPCs without peering. + diff --git a/.agents/skills/gke-basics/references/gke-observability.md b/.agents/skills/gke-basics/references/gke-observability.md new file mode 100644 index 0000000..9b940a2 --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-observability.md @@ -0,0 +1,168 @@ +# GKE Observability + +This reference covers monitoring, logging, and metrics configuration for GKE. The golden path enables comprehensive observability including control-plane metrics. + +> **MCP Tools:** `get_cluster`, `list_k8s_events`, `get_k8s_logs`, `get_k8s_cluster_info`, `describe_k8s_resource`. **CLI-only:** `gcloud container clusters update --monitoring=...`, `gcloud logging read` + +## Golden Path Observability Defaults + +| Setting | Golden Path Value | Notes | +|---------|-------------------|-------| +| `loggingConfig` components | SYSTEM_COMPONENTS, WORKLOADS | Full workload logging | +| `monitoringConfig` components | SYSTEM_COMPONENTS, STORAGE, POD, DEPLOYMENT, STATEFULSET, DAEMONSET, HPA, JOBSET, CADVISOR, KUBELET, DCGM, APISERVER, SCHEDULER, CONTROLLER_MANAGER | Full suite including control-plane | +| `managedPrometheusConfig.enabled` | `true` | Google-managed Prometheus | +| `advancedDatapathObservabilityConfig.enableMetrics` | `true` | Dataplane V2 flow metrics | +| `loggingService` | `logging.googleapis.com/kubernetes` | Cloud Logging | +| `monitoringService` | `monitoring.googleapis.com/kubernetes` | Cloud Monitoring | + +### Control-Plane Metrics (Golden Path Addition) + +The golden path adds three control-plane monitoring components not present in default clusters: + +| Component | What It Monitors | +|-----------|-----------------| +| `APISERVER` | API server request latency, error rates, admission webhook performance | +| `SCHEDULER` | Scheduling latency, pending pods, scheduling failures | +| `CONTROLLER_MANAGER` | Controller work queue depth, reconciliation latency | + +These are critical for diagnosing cluster-level issues (slow API responses, scheduling delays, stuck controllers). + +## Enabling Full Monitoring + +```bash +# Enable golden path monitoring suite +gcloud container clusters update --region \ + --monitoring=SYSTEM,API_SERVER,SCHEDULER,CONTROLLER_MANAGER,STORAGE,POD,DEPLOYMENT,STATEFULSET,DAEMONSET,HPA,CADVISOR,KUBELET,DCGM \ + --quiet + +# Enable Managed Prometheus +gcloud container clusters update --region \ + --enable-managed-prometheus \ + --quiet + +# Enable Dataplane V2 observability metrics +gcloud container clusters update --region \ + --enable-dataplane-v2-flow-observability \ + --quiet +``` + +## Managed Prometheus + +Golden path enables Google Managed Prometheus for metrics collection and querying. + +**Querying metrics:** +- Use Cloud Monitoring Metrics Explorer in the console +- Use PromQL via the Prometheus UI or API +- Grafana dashboards via Managed Grafana + +**Key GKE metrics:** + +| Metric | Source | Use | +|--------|--------|-----| +| `container_cpu_usage_seconds_total` | cAdvisor | Pod CPU usage | +| `container_memory_working_set_bytes` | cAdvisor | Pod memory usage | +| `kube_pod_status_phase` | kube-state-metrics | Pod lifecycle | +| `apiserver_request_duration_seconds` | API Server | Control plane latency | +| `scheduler_scheduling_duration_seconds` | Scheduler | Scheduling performance | +| `node_cpu_seconds_total` | Kubelet | Node CPU | +| `DCGM_FI_DEV_GPU_UTIL` | DCGM | GPU utilization | + +## Live Resource Usage (kubectl-only) + +No MCP or gcloud equivalent exists for live resource usage. Use `kubectl top`: + +```bash +kubectl top pods --all-namespaces --sort-by=cpu +kubectl top nodes +kubectl top pods --containers -n # per-container breakdown +``` + +## Cloud Logging (gcloud-only) + +**Querying cluster logs** (no MCP equivalent — use `gcloud logging read`): + +```bash +# System component logs +gcloud logging read \ + 'resource.type="k8s_cluster" AND resource.labels.cluster_name=""' \ + --project --limit 50 \ + --quiet + +# Workload logs for a specific namespace +gcloud logging read \ + 'resource.type="k8s_container" AND resource.labels.cluster_name="" AND resource.labels.namespace_name=""' \ + --project --limit 50 \ + --quiet + +# Audit logs (who did what) +gcloud logging read \ + 'resource.type="k8s_cluster" AND logName:"cloudaudit.googleapis.com"' \ + --project --limit 50 \ + --quiet +``` + +## Diagnostic Settings + +For security monitoring and troubleshooting, enable control-plane audit logs: + +```bash +# View current logging config +gcloud container clusters describe --region \ + --format="yaml(loggingConfig)" \ + --quiet +``` + +## Alerting + +Set up alerts for critical conditions: + +| Condition | Metric | Threshold | +|-----------|--------|-----------| +| High API server latency | `apiserver_request_duration_seconds` | P99 > 5s | +| Pod crash loops | `kube_pod_container_status_restarts_total` | > 5 in 10min | +| Node not ready | `kube_node_status_condition` | condition=Ready, status!=True | +| High GPU utilization | `DCGM_FI_DEV_GPU_UTIL` | > 95% sustained | +| PVC near capacity | `kubelet_volume_stats_used_bytes / capacity` | > 85% | +| Scheduling failures | `scheduler_schedule_attempts_total{result="error"}` | > 0 | + +## Cost Considerations + +Monitoring and logging have associated costs: + +- **Cloud Logging**: Charged per GiB ingested beyond free tier (50 GiB/project/month) +- **Cloud Monitoring**: Free for GKE system metrics; custom metrics charged per time series +- **Managed Prometheus**: Charged per samples ingested + +To reduce costs in non-production: +```bash +# Reduce to system-only monitoring +gcloud container clusters update --region \ + --monitoring=SYSTEM \ + --quiet +``` + +## Distributed Tracing & Continuous Profiling (Recommended) + +**Not golden path defaults** — recommended for production microservice architectures and performance-sensitive workloads. + +- **Cloud Trace**: Add OpenTelemetry SDK to your app with the `opentelemetry-operations-go` (or equivalent) exporter. Traces appear in Cloud Trace console. Identifies cross-service latency bottlenecks. +- **Cloud Profiler**: Add the Cloud Profiler agent to your app. Profiles CPU and memory usage in production with low overhead. Identifies hotspots and compares across versions. + +## LQL Query Examples + +Common Logging Query Language patterns for GKE troubleshooting: + +``` +# Error logs for a specific container +resource.type="k8s_container" AND resource.labels.container_name="my-app" AND severity>=ERROR + +# OOMKilled events +resource.type="k8s_event" AND jsonPayload.reason="OOMKilling" + +# Pod scheduling failures +resource.type="k8s_event" AND jsonPayload.reason="FailedScheduling" + +# Audit logs (who did what) +resource.type="k8s_cluster" AND logName:"cloudaudit.googleapis.com" +``` + diff --git a/.agents/skills/gke-basics/references/gke-reliability.md b/.agents/skills/gke-basics/references/gke-reliability.md new file mode 100644 index 0000000..8b2f312 --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-reliability.md @@ -0,0 +1,169 @@ +# GKE Reliability + +This reference covers high availability and reliability configuration for GKE clusters and workloads. + +> **MCP Tools:** `get_cluster`, `get_k8s_resource`, `describe_k8s_resource`, `apply_k8s_manifest`, `list_k8s_events` + +## Golden Path Reliability Defaults + +| Setting | Golden Path Value | Notes | +|---------|-------------------|-------| +| Cluster type | Regional (4 zones: us-central1-a/b/c/f) | Control plane replicated across zones | +| Upgrade strategy | SURGE (`maxSurge: 1`) | Rolling upgrades with extra capacity | +| Auto-repair | `true` | Unhealthy nodes replaced automatically | +| Auto-upgrade | `true` | Nodes follow control plane version | +| Release channel | REGULAR | Balanced freshness and stability | +| Stateful HA | Enabled | Leader election for stateful workloads | + +## Workflows + +### 1. Verify Cluster High Availability + +``` +# MCP (preferred) +get_cluster(name="projects//locations//clusters/", + readMask="location,locations,nodePools.locations") + +# gcloud fallback +gcloud container clusters describe --region \ + --format="json(location, locations)" \ + --quiet +``` + +- If `location` is a region (e.g., `us-central1`), the control plane is regional +- If `locations` has multiple entries, nodes span multiple zones + +### 2. Pod Disruption Budgets (PDBs) + +PDBs ensure minimum pod availability during voluntary disruptions (node upgrades, autoscaler scale-down). + +**Check existing PDBs:** + +``` +# MCP (preferred) +get_k8s_resource(parent="...", resourceType="poddisruptionbudget") + +# kubectl fallback +kubectl get pdb --all-namespaces +``` + +**Create PDB:** + +```yaml +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: my-app-pdb + namespace: default +spec: + minAvailable: 2 # Or use maxUnavailable: 1 + selector: + matchLabels: + app: my-app +``` + +> Every production Deployment with 2+ replicas should have a PDB. + +### 3. Health Probes + +Every production container should have liveness and readiness probes. Startup probes are recommended for slow-starting apps. + +**Check existing probes:** + +``` +# MCP (preferred) +describe_k8s_resource(parent="...", resourceType="deployment", name="", namespace="") + +# kubectl fallback +kubectl get deployment -n -o yaml | grep -E "livenessProbe|readinessProbe|startupProbe" +``` + +**Recommended probe configuration:** + +```yaml +spec: + containers: + - name: app + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 15 + periodSeconds: 10 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /readyz + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 3 + startupProbe: # For slow-starting apps + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 5 + failureThreshold: 30 # 30 * 5s = 150s max startup time +``` + +- **Readiness**: Determines when a pod can accept traffic +- **Liveness**: Determines when to restart a container +- **Startup**: Disables liveness/readiness until the app is ready (prevents premature restarts) + +### 4. Graceful Shutdown + +Ensure applications handle `SIGTERM` and drain in-flight requests: + +```yaml +spec: + terminationGracePeriodSeconds: 30 # Default; increase for long-running requests + containers: + - name: app + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "sleep 5"] # Allow LB to deregister +``` + +### 5. Topology Spread Constraints + +Distribute pods across zones and nodes to survive failures: + +```yaml +spec: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + app: my-app + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: my-app +``` + +- **Zone spread** (`DoNotSchedule`): Hard requirement -- pods must be balanced across zones +- **Node spread** (`ScheduleAnyway`): Best-effort -- prefer distribution but don't block scheduling + +### 6. Replicas + +| Workload Type | Minimum Replicas | Reason | +|--------------|-----------------|--------| +| Stateless web/API | 2 | Survive single pod/node failure | +| Critical services | 3 | Survive zone failure with zone spread | +| Stateful (databases) | 3 (with replication) | Application-level quorum | +| Batch/jobs | 1 | Ephemeral by nature | + +## Best Practices + +1. **Regional clusters for production**: Always use regional clusters to survive zone failures +2. **PDBs for everything**: Every production workload with 2+ replicas needs a PDB +3. **Probes for all containers**: At minimum, readiness probes on every production container +4. **Zone spreading**: Use topology spread constraints to distribute pods across failure domains +5. **Graceful shutdown**: Handle SIGTERM and set appropriate `terminationGracePeriodSeconds` +6. **Maintenance windows**: Schedule upgrades during low-traffic periods (see [gke-upgrades.md](./gke-upgrades.md)) diff --git a/.agents/skills/gke-basics/references/gke-scaling.md b/.agents/skills/gke-basics/references/gke-scaling.md new file mode 100644 index 0000000..2ce2a6d --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-scaling.md @@ -0,0 +1,149 @@ +# GKE Workload Scaling + +This reference covers scaling workloads on GKE. The golden path enables VPA, OPTIMIZE_UTILIZATION autoscaling profile, and Node Auto Provisioning by default. + +> **MCP Tools:** `get_k8s_resource`, `describe_k8s_resource`, `apply_k8s_manifest`, `patch_k8s_resource`, `get_cluster`, `update_cluster`, `update_node_pool` + +## Golden Path Scaling Defaults + +| Setting | Golden Path Value | Notes | +|---------|-------------------|-------| +| `autoscaling.autoscalingProfile` | `OPTIMIZE_UTILIZATION` | Aggressive scale-down for cost savings | +| `verticalPodAutoscaling.enabled` | `true` | VPA recommendations available | +| `autoscaling.enableNodeAutoprovisioning` | `true` | NAP creates node pools on demand | +| GPU resource limits (T4, A100) | `1000000000` each | NAP can provision GPU nodes | + +## Scaling Mechanisms + +### 1. Manual Scaling + +> **kubectl-only** — no MCP equivalent for `kubectl scale`. Use kubectl directly. + +```bash +kubectl scale deployment --replicas= -n +``` + +### 2. Horizontal Pod Autoscaling (HPA) + +Scales the number of pods based on metrics. + +**Quick setup (kubectl-only — no MCP equivalent for `kubectl autoscale`):** + +```bash +kubectl autoscale deployment --cpu-percent=50 --min=1 --max=10 +``` + +**Manifest approach (recommended — use MCP `apply_k8s_manifest`):** + +See [assets/hpa-example.yaml](../assets/hpa-example.yaml) for a template. + +```yaml +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: -hpa +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 50 +``` + +### 3. Vertical Pod Autoscaling (VPA) + +Adjusts CPU and memory requests to match actual usage. Enabled by default on golden path. + +**Update modes:** +- `Off` — recommendations only (safest, start here) +- `Initial` — sets resources only at pod creation +- `Auto` — restarts pods to apply new resource values +- `InPlaceOrRecreate` — updates resources without restart when possible (GKE 1.34+) + +**Create VPA in recommendation mode:** + +```yaml +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: -vpa +spec: + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: + updatePolicy: + updateMode: "Off" +``` + +**Read recommendations (prefer MCP `describe_k8s_resource`):** + +``` +# MCP (preferred) +describe_k8s_resource(parent="...", resourceType="verticalpodautoscaler", name="-vpa", namespace="") + +# kubectl fallback +kubectl get vpa -vpa -o jsonpath='{.status.recommendation}' +``` + +See [assets/vpa-example.yaml](../assets/vpa-example.yaml) for a full template. + +### 4. Cluster Autoscaler / Node Auto Provisioning (NAP) + +On Autopilot (golden path), node scaling is fully managed. NAP automatically creates and sizes node pools based on workload demands. + +**For Standard clusters:** + +```bash +# Enable cluster autoscaler on a node pool +gcloud container clusters update --region \ + --enable-autoscaling --node-pool \ + --min-nodes --max-nodes \ + --quiet + +# Enable NAP +gcloud container clusters update --region \ + --enable-autoprovisioning \ + --min-cpu --max-cpu \ + --min-memory --max-memory \ + --quiet +``` + +**Autoscaling profiles:** + +| Profile | Behavior | Golden Path? | +|---------|----------|-------------| +| `BALANCED` | Default GKE; conservative scale-down | No | +| `OPTIMIZE_UTILIZATION` | Aggressive scale-down; lower idle resources | **Yes** | + +## Best Practices + +1. **Define resource requests**: HPA and VPA rely on accurate requests. Always set them. +2. **Avoid metric conflicts**: Do not use HPA and VPA on the same metric. Typical pattern: HPA on CPU, VPA on memory. +3. **Pod Disruption Budgets**: Define PDBs for all production workloads to ensure availability during scaling events. +4. **HPA stabilization**: HPA has a default 5-minute stabilization window. Tune `behavior` for faster response if needed. +5. **VPA "Auto" caution**: Auto mode restarts pods. Ensure your app handles SIGTERM gracefully. VPA requires at least 2 replicas for evictions by default. +6. **Use ComputeClasses**: For workload-specific node targeting (Spot fallback, GPU, specific machine families), use ComputeClasses instead of node selectors. + +## Rightsizing Workflow + +1. Deploy VPA in `Off` mode for 24+ hours +2. Read recommendations: `kubectl describe vpa ` +3. Compare `target` values against current `requests` +4. Apply with 20% buffer: `new_request = target * 1.2` +5. Use patch format to update Deployment + +| Condition | Recommendation | Risk | +|-----------|----------------|------| +| CPU request >5x P95 actual | Reduce to `P95 * 1.2` | Medium | +| Memory request >3x P95 actual | Reduce to `P95 * 1.2` | Medium | +| CPU request >2x P95 actual | Rightsizing with 20% buffer | Low | +| No resource limits set | Add limits to prevent noisy-neighbor | Low | diff --git a/.agents/skills/gke-basics/references/gke-security.md b/.agents/skills/gke-basics/references/gke-security.md new file mode 100644 index 0000000..d4699ca --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-security.md @@ -0,0 +1,226 @@ +# GKE Security + +This reference covers security configuration for GKE clusters. The golden path enforces a hardened security posture by default. + +> **MCP Tools:** `get_cluster`, `check_k8s_auth`, `get_k8s_resource`, `apply_k8s_manifest`, `update_cluster` + +## Golden Path Security Defaults + +| Setting | Golden Path Value | Day-0/1 | Notes | +|---------|-------------------|---------|-------| +| `workloadIdentityConfig.workloadPool` | `.svc.id.goog` | Day-0 | Workload Identity Federation for Pods | +| `secretManagerConfig.enabled` | `true` | Day-1 | Google Secret Manager integration | +| `secretManagerConfig.rotationConfig` | `enabled: true, rotationInterval: 120s` | Day-1 | Automatic secret rotation | +| `rbacBindingConfig.enableInsecureBindingSystemAuthenticated` | `false` | Day-0 | Blocks legacy `system:authenticated` bindings | +| `rbacBindingConfig.enableInsecureBindingSystemUnauthenticated` | `false` | Day-0 | Blocks legacy `system:unauthenticated` bindings | +| `nodeConfig.shieldedInstanceConfig.enableSecureBoot` | `true` | Day-0 | Verifiable boot integrity | +| `nodeConfig.shieldedInstanceConfig.enableIntegrityMonitoring` | `true` | Day-0 | Runtime integrity checks | +| `nodeConfig.workloadMetadataConfig.mode` | `GKE_METADATA` | Day-0 | Blocks legacy metadata API, enforces Workload Identity | +| Private cluster + Dataplane V2 settings | See [gke-networking.md](./gke-networking.md) | Day-0 | Private nodes, private endpoint enforcement, ADVANCED_DATAPATH | + +## Workload Identity Federation + +Workload Identity is the recommended way for pods to access Google Cloud APIs. It eliminates the need for static service account keys. + +### Setup + +```bash +# 1. Create a Google Service Account (GSA) +gcloud iam service-accounts create \ + --project \ + --display-name "Workload Identity SA" \ + --quiet + +# 2. Grant IAM roles to the GSA +gcloud projects add-iam-policy-binding \ + --member "serviceAccount:@.iam.gserviceaccount.com" \ + --role "" \ + --quiet + +# 3. Create Kubernetes Service Account (KSA) +kubectl create namespace +kubectl create serviceaccount --namespace + +# 4. Bind KSA to GSA +gcloud iam service-accounts add-iam-policy-binding \ + @.iam.gserviceaccount.com \ + --role roles/iam.workloadIdentityUser \ + --member "serviceAccount:.svc.id.goog[/]" \ + --quiet + +# 5. Annotate KSA +kubectl annotate serviceaccount \ + --namespace \ + iam.gke.io/gcp-service-account=@.iam.gserviceaccount.com +``` + +> See [assets/workload-identity-pod.yaml](../assets/workload-identity-pod.yaml) for a test pod. + +### Verification + +```bash +kubectl run workload-identity-test \ + --image=gcr.io/google.com/cloudsdktool/cloud-sdk:slim \ + --serviceaccount= --namespace= \ + --rm -it -- gcloud auth list --quiet +``` + +## Secret Manager Integration + +The golden path enables Secret Manager with automatic rotation. Secrets are synced to Kubernetes Secrets. + +```bash +# Verify Secret Manager is enabled on cluster +gcloud container clusters describe --region \ + --format="value(secretManagerConfig.enabled)" \ + --quiet + +# Enable if not already (Day-1 change) +gcloud container clusters update --region \ + --enable-secret-manager \ + --secret-manager-rotation-interval=120s \ + --quiet +``` + +## RBAC Hardening + +The golden path disables insecure legacy RBAC bindings that grant broad access to `system:authenticated` and `system:unauthenticated` groups. + +```bash +# Verify insecure bindings are disabled +gcloud container clusters describe --region \ + --format="yaml(rbacBindingConfig)" \ + --quiet +``` + +**Best practices for RBAC:** +- Use namespace-scoped Roles over cluster-wide ClusterRoles +- Bind to specific Groups or ServiceAccounts, never to `system:authenticated` +- Audit permissions via MCP: `check_k8s_auth(parent="...", verb="list", resourceType="pods", namespace="...")` (or `kubectl auth can-i --list --as=`) +- Review bindings via MCP: `get_k8s_resource(parent="...", resourceType="clusterrolebinding")` (or `kubectl get clusterrolebindings,rolebindings --all-namespaces`) + +> See [gke-multitenancy.md](./gke-multitenancy.md) for enterprise RBAC planning and https://docs.cloud.google.com/kubernetes-engine/docs/best-practices/rbac + +## Binary Authorization + +Not enabled in golden path by default but recommended for production image provenance: + +```bash +# Enable Binary Authorization +gcloud container clusters update --region \ + --binauthz-evaluation-mode=PROJECT_SINGLETON_POLICY_ENFORCE \ + --quiet +``` + +## Network Policies + +Dataplane V2 (golden path) provides built-in Network Policy enforcement. Apply default-deny per namespace: + +``` +# MCP (preferred) +apply_k8s_manifest(parent="...", yamlManifest="") + +# kubectl fallback +kubectl apply -f skills/gke/assets/default-deny-netpol.yaml -n +``` + +## GKE Sandbox (gVisor) + +For running untrusted workloads in an isolated sandbox: + +```bash +# Enable on cluster (Standard clusters) +gcloud container clusters update --region --enable-gke-sandbox --quiet + +# Use in pod spec +# Add: runtimeClassName: gvisor +``` + +## Pod Security Standards (Golden Path) + +Pod Security Standards define three profiles that restrict what pods can do. The **`restricted` profile is the golden path default** for production namespaces. + +| Profile | Level | Use Case | +|---------|-------|----------| +| `privileged` | Unrestricted | System namespaces (`kube-system`), infrastructure controllers | +| `baseline` | Minimally restrictive | Shared/dev namespaces, legacy apps being migrated | +| `restricted` | **Golden path** | Production workloads -- blocks privilege escalation, host access, root | + +**Enforce via namespace labels (Pod Security Admission):** + +```yaml +apiVersion: v1 +kind: Namespace +metadata: + name: production + labels: + pod-security.kubernetes.io/enforce: restricted + pod-security.kubernetes.io/warn: restricted + pod-security.kubernetes.io/audit: restricted +``` + +**Gradual rollout strategy:** +1. Start with `warn` + `audit` on existing namespaces to identify violations +2. Fix non-compliant workloads (remove `privileged`, `hostNetwork`, root user, etc.) +3. Enable `enforce` once all workloads pass + +`restricted` blocks: running as root, privilege escalation, host networking/PID/IPC, host path volumes, and most capabilities. The golden path `workload-identity-pod.yaml` already complies. + +## Network Policy Logging (Recommended) + +With Dataplane V2 (golden path), you can enable logging for Network Policy decisions. **Not a golden path default** -- recommended for security auditing. + +```bash +gcloud container clusters update --region \ + --enable-network-policy-logging \ + --quiet +``` + +This logs allowed and denied connections, useful for troubleshooting Network Policy rules and auditing traffic flows. + +## Common IAM Roles + +The five most common predefined IAM roles for GKE: + +| Role | Purpose | When to Use | +|------|---------|-------------| +| `roles/container.admin` | Full control over clusters and Kubernetes resources | Platform team admins managing cluster lifecycle | +| `roles/container.clusterAdmin` | Manage clusters but not project-level IAM | Cluster operators who create/delete clusters | +| `roles/container.developer` | Deploy workloads (pods, services, deployments) | Application developers deploying to existing clusters | +| `roles/container.viewer` | Read-only access to clusters and Kubernetes resources | Monitoring, auditing, or read-only dashboards | +| `roles/container.clusterViewer` | List and get cluster details only | CI/CD pipelines that need cluster metadata | + +> **Principle of least privilege**: Start with `roles/container.viewer` or `roles/container.developer` and escalate only as needed. Avoid granting `roles/container.admin` broadly. + +## Service Accounts & Agents + +- **GKE Service Agent** (`service-@container-engine-robot.iam.gserviceaccount.com`): Automatically created. Manages nodes, networking, and cluster operations on your behalf. Do not remove or modify its permissions. +- **Node Service Account**: By default, nodes use the Compute Engine default service account. For production, create a dedicated SA with minimal permissions and assign it via node pool config. +- **Workload Identity**: The recommended way for pods to access Google Cloud APIs. Maps a Kubernetes ServiceAccount to a Google IAM ServiceAccount — see [Workload Identity setup](#workload-identity-federation) above. + +## Cross-Service Authentication Patterns + +Common patterns for granting GKE workloads access to other Google Cloud services: + +```bash +# Grant a GKE workload access to Cloud Storage +gcloud projects add-iam-policy-binding \ + --member "serviceAccount:@.iam.gserviceaccount.com" \ + --role "roles/storage.objectViewer" \ + --quiet + +# Grant a GKE workload access to Cloud SQL +gcloud projects add-iam-policy-binding \ + --member "serviceAccount:@.iam.gserviceaccount.com" \ + --role "roles/cloudsql.client" \ + --quiet + +# Grant a GKE workload access to Pub/Sub +gcloud projects add-iam-policy-binding \ + --member "serviceAccount:@.iam.gserviceaccount.com" \ + --role "roles/pubsub.subscriber" \ + --quiet +``` + +In all cases, the GSA must be bound to a KSA via Workload Identity (see setup above). The pod then uses the KSA to authenticate as the GSA. + diff --git a/.agents/skills/gke-basics/references/gke-storage.md b/.agents/skills/gke-basics/references/gke-storage.md new file mode 100644 index 0000000..3b96e61 --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-storage.md @@ -0,0 +1,136 @@ +# GKE Storage + +This reference covers storage configuration for GKE clusters including persistent disks, file storage, and cloud storage integration. + +> **MCP Tools:** `apply_k8s_manifest`, `get_k8s_resource`, `describe_k8s_resource`, `get_cluster` + +## Golden Path Storage Defaults + +The golden path Autopilot config enables these CSI drivers: + +| Driver | Golden Path | Access Mode | Use Case | +|--------|-------------|-------------|----------| +| Compute Engine Persistent Disk CSI | Enabled (default) | ReadWriteOnce | Block storage for databases, single-pod workloads | +| Google Cloud Filestore CSI | Enabled | ReadWriteMany | Shared NFS for multi-pod access | +| Cloud Storage FUSE CSI | Enabled | ReadWriteMany / ReadOnlyMany | Mount GCS buckets as volumes | +| Parallelstore CSI | Enabled | ReadWriteMany | High-performance parallel file system | +| Boot disk type | `pd-balanced` | N/A | Node boot disks | + +## StorageClasses + +### Default StorageClasses + +GKE provides built-in StorageClasses: + +| StorageClass | Disk Type | Use Case | +|-------------|-----------|----------| +| `standard-rwo` | `pd-standard` | Cost-effective, low IOPS | +| `premium-rwo` | `pd-ssd` | High IOPS, databases | +| `standard-rwx` | Filestore (Basic HDD) | Shared NFS | +| `premium-rwx` | Filestore (Basic SSD) | Shared NFS, higher performance | + +### Custom StorageClass + +```yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: fast-regional +provisioner: pd.csi.storage.gke.io +parameters: + type: pd-ssd + replication-type: regional-pd # Replicate across 2 zones +volumeBindingMode: WaitForFirstConsumer +allowVolumeExpansion: true # Always enable for production +``` + +## PersistentVolumeClaims + +### Block Storage (ReadWriteOnce) + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: database-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: premium-rwo + resources: + requests: + storage: 100Gi +``` + +### Shared File Storage (ReadWriteMany via Filestore) + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shared-data +spec: + accessModes: + - ReadWriteMany + storageClassName: standard-rwx + resources: + requests: + storage: 1Ti # Filestore minimum is 1 TiB for Basic tier +``` + +### GCS Bucket Mount (Cloud Storage FUSE) + +Mount a GCS bucket as a volume without a PVC: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: gcs-reader + annotations: + gke-gcsfuse/volumes: "true" +spec: + containers: + - name: reader + image: busybox + command: ["ls", "/data"] + volumeMounts: + - name: gcs-bucket + mountPath: /data + volumes: + - name: gcs-bucket + csi: + driver: gcsfuse.csi.storage.gke.io + readOnly: true + volumeAttributes: + bucketName: +``` + +> Requires Workload Identity for the pod's service account to have `storage.objectViewer` on the bucket. + +## Volume Expansion + +If `allowVolumeExpansion: true` is set on the StorageClass, resize by updating the PVC: + +```bash +# kubectl +kubectl patch pvc -p '{"spec":{"resources":{"requests":{"storage":"200Gi"}}}}' +``` + +``` +# MCP (preferred) +patch_k8s_resource(parent="...", resourceType="persistentvolumeclaim", name="", + patch='{"spec":{"resources":{"requests":{"storage":"200Gi"}}}}') +``` + +Kubernetes automatically resizes the filesystem. + +## Best Practices + +1. **Always enable volume expansion**: Set `allowVolumeExpansion: true` on all StorageClasses +2. **Use regional PDs for production**: `replication-type: regional-pd` replicates across 2 zones for HA +3. **Use `WaitForFirstConsumer`**: Ensures the PV is provisioned in the same zone as the pod +4. **Choose the right disk type**: `pd-ssd` for databases, `pd-balanced` (golden path default) for general use, `pd-standard` for cold storage +5. **Use Filestore for shared access**: When multiple pods need to read/write the same files +6. **Use GCS FUSE for data pipelines**: Mount buckets directly for ML training data, logs, etc. +7. **Back up PVCs**: Use Backup for GKE (see [gke-backup-dr.md](./gke-backup-dr.md)) to protect persistent data diff --git a/.agents/skills/gke-basics/references/gke-upgrades.md b/.agents/skills/gke-basics/references/gke-upgrades.md new file mode 100644 index 0000000..91e1a5b --- /dev/null +++ b/.agents/skills/gke-basics/references/gke-upgrades.md @@ -0,0 +1,142 @@ +# GKE Upgrades & Maintenance + +This reference covers upgrade strategy, maintenance windows, and release channel management for GKE clusters. + +> **MCP Tools:** `get_cluster`, `get_k8s_version`, `update_cluster`, `update_node_pool`, `list_operations`, `get_operation`, `cancel_operation`, `get_k8s_resource` +> **CLI-only**: `gcloud container get-server-config` (available versions), `gcloud container clusters update --maintenance-window-*` (maintenance windows) + +## Golden Path Upgrade Defaults + +| Setting | Golden Path Value | Notes | +|---------|-------------------|-------| +| `releaseChannel.channel` | `REGULAR` | Balanced between freshness and stability | +| Maintenance exclusion | `NO_MINOR_UPGRADES`, 1 year | Prevents surprise minor version bumps | +| `upgradeSettings.strategy` | `SURGE` | Rolling upgrades with `maxSurge: 1` | +| Auto-repair | `true` | Unhealthy nodes are automatically replaced | +| Auto-upgrade | `true` | Nodes follow control plane version | + +## Release Channels + +| Channel | Cadence | Best For | +|---------|---------|----------| +| `RAPID` | Weeks after release | Dev/test, early access to features | +| `REGULAR` (golden path) | 2-3 months after Rapid | Production workloads | +| `STABLE` | 2-3 months after Regular | Risk-averse, highly regulated | + +```bash +# Check current channel +gcloud container clusters describe --region \ + --format="value(releaseChannel.channel)" \ + --quiet + +# Change channel (Day-1) +gcloud container clusters update --region \ + --release-channel \ + --quiet +``` + +## Maintenance Windows + +Control when GKE can perform automatic maintenance (upgrades, patches). + +```bash +# Set maintenance window (e.g., weekends 2am-6am UTC) +gcloud container clusters update --region \ + --maintenance-window-start "2026-01-01T02:00:00Z" \ + --maintenance-window-end "2026-01-01T06:00:00Z" \ + --maintenance-window-recurrence "FREQ=WEEKLY;BYDAY=SA,SU" \ + --quiet +``` + +### Maintenance Exclusions + +The golden path includes a 1-year `NO_MINOR_UPGRADES` exclusion to prevent automatic minor version changes. + +```bash +# Add maintenance exclusion +gcloud container clusters update --region \ + --add-maintenance-exclusion-name "freeze-1" \ + --add-maintenance-exclusion-start "2026-04-11T00:00:00Z" \ + --add-maintenance-exclusion-end "2027-04-11T00:00:00Z" \ + --add-maintenance-exclusion-scope NO_MINOR_UPGRADES \ + --quiet + +# Remove exclusion +gcloud container clusters update --region \ + --remove-maintenance-exclusion "freeze-1" \ + --quiet +``` + +**Exclusion scopes:** +- `NO_UPGRADES` — blocks all upgrades (max 30 days) +- `NO_MINOR_UPGRADES` — allows patch upgrades, blocks minor version changes (max 1 year) +- `NO_MINOR_OR_NODE_UPGRADES` — blocks minor and node upgrades (max 1 year) + +## Upgrade Strategy + +### SURGE (Golden Path) + +Rolling upgrade with configurable surge capacity: + +```bash +# Default: maxSurge=1 (one extra node during upgrade) +gcloud container node-pools update \ + --cluster --region \ + --max-surge-upgrade 1 --max-unavailable-upgrade 0 \ + --quiet +``` + +### Blue-Green (For Zero-Downtime Critical Workloads) + +```bash +gcloud container node-pools update \ + --cluster --region \ + --enable-blue-green-upgrade \ + --node-pool-soak-duration "3600s" \ + --quiet +``` + +## Pre-Upgrade Checklist + +1. **Check deprecations**: Review Kubernetes API deprecations between current and target version +2. **Review PDBs**: Ensure all production workloads have PodDisruptionBudgets +3. **Test in non-prod**: Upgrade a staging cluster first +4. **Check addon compatibility**: Verify third-party controllers support the target version +5. **Review node pool versions**: All node pools should be within 2 minor versions of the control plane + +```bash +# Check current versions +gcloud container clusters describe --region \ + --format="table(currentMasterVersion, nodePools[].version)" \ + --quiet + +# Check available upgrades +gcloud container get-server-config --region \ + --format="yaml(channels)" \ + --quiet + +# List deprecation warnings +kubectl get --raw /metrics | grep apiserver_requested_deprecated_apis +``` + +## Manual Upgrade (When Needed) + +```bash +# Upgrade control plane +gcloud container clusters upgrade --region \ + --master --cluster-version \ + --quiet + +# Upgrade node pool +gcloud container clusters upgrade --region \ + --node-pool \ + --quiet +``` + +## Best Practices + +1. **Stay on a release channel**: Manual version management is error-prone. Let GKE manage versions. +2. **Use maintenance windows**: Schedule upgrades during low-traffic periods. +3. **Set PDBs on everything**: Protects workloads during node drains. +4. **Monitor during upgrades**: Watch for pod eviction failures, CrashLoopBackOff, and scheduling issues. +5. **Don't skip minor versions**: Upgrade incrementally (1.28 -> 1.29 -> 1.30, not 1.28 -> 1.30). diff --git a/.agents/skills/gke-basics/references/iac-usage.md b/.agents/skills/gke-basics/references/iac-usage.md new file mode 100644 index 0000000..efc44ca --- /dev/null +++ b/.agents/skills/gke-basics/references/iac-usage.md @@ -0,0 +1,77 @@ +# GKE Infrastructure as Code + +GKE resources, including clusters and Kubernetes objects, can be provisioned and +managed using Terraform. + +## Terraform + +Terraform uses two main providers for GKE: +* The **Google Cloud provider** connects to the Google Cloud API to manage + GKE cluster infrastructure using Terraform resources such as + `google_container_cluster` for the cluster itself, and + `google_container_node_pool` for nodes in Standard mode. +* The **Kubernetes provider** connects to the Kubernetes API to manage + workloads inside the cluster using Kubernetes resources such as + Deployments and Services. + + +### GKE Autopilot Cluster Example + +```hcl +resource "google_container_cluster" "primary" { + name = "my-gke-cluster" + location = "us-central1" + + enable_autopilot = true + + # Do NOT specify node configurations (like initial_node_count or node_config) + # in Autopilot mode; doing so causes a Terraform provider error. + + # Deletion protection should be set to false for testing + deletion_protection = false +} +``` + +### Deploying a Workload Example (Kubernetes Provider) + +```hcl +resource "kubernetes_deployment_v1" "default" { + metadata { + name = "hello-app" + } + spec { + replicas = 2 + selector { + match_labels = { + app = "hello-app" + } + } + template { + metadata { + labels = { + app = "hello-app" + } + } + spec { + container { + image = "us-docker.pkg.dev/google-samples/containers/gke/hello-app:2.0" + name = "hello-app" + } + } + } + } +} +``` + +### Reference Documentation + +- [Terraform Google Provider - Container Cluster](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster) + +- [Terraform Google Provider - Kubernetes Provider](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs) + +## YAML Samples + +GKE cluster configurations and Kubernetes manifests can also be defined using +YAML for use with `kubectl apply` or Deployment Manager. + +- [GKE YAML Samples](https://docs.cloud.google.com/docs/samples?product=googlekubernetesengine) diff --git a/.agents/skills/gke-basics/references/mcp-usage.md b/.agents/skills/gke-basics/references/mcp-usage.md new file mode 100644 index 0000000..66e6dbb --- /dev/null +++ b/.agents/skills/gke-basics/references/mcp-usage.md @@ -0,0 +1,72 @@ +# GKE MCP Server Usage + +The GKE MCP server provides 23 structured tools for cluster management, Kubernetes resource operations, and diagnostics — without requiring shell access or kubeconfig setup. + +## Connecting to the GKE MCP Server + +The GKE remote MCP server is available for AI clients that support the Model Context Protocol. For setup instructions, see https://docs.cloud.google.com/kubernetes-engine/docs/how-to/use-gke-mcp. + +## Available Tools + +All tools use hierarchical resource paths: + +``` +Project+Region: projects/{PROJECT}/locations/{REGION} +Cluster: projects/{PROJECT}/locations/{REGION}/clusters/{CLUSTER} +Node Pool: projects/{PROJECT}/locations/{REGION}/clusters/{CLUSTER}/nodePools/{POOL} +Operation: projects/{PROJECT}/locations/{REGION}/operations/{OP_ID} +``` + +Use `locations/-` to match all regions when listing. + +### Cluster Management + +| Tool | Mode | Purpose | +|------|------|---------| +| `list_clusters` | READ | Discover clusters in a project/region | +| `get_cluster` | READ | Inspect cluster config. Use `readMask` to select fields | +| `create_cluster` | MUTATE | Create a cluster from JSON config | +| `update_cluster` | DESTRUCTIVE | Change Day-1 cluster settings | + +### Node Pool Management + +| Tool | Mode | Purpose | +|------|------|---------| +| `list_node_pools` | READ | List pools in a cluster | +| `get_node_pool` | READ | Get pool details | +| `create_node_pool` | MUTATE | Add a pool (Standard clusters) | +| `update_node_pool` | DESTRUCTIVE | Modify a pool | + +### Kubernetes Resources + +| Tool | Mode | Purpose | +|------|------|---------| +| `get_k8s_resource` | READ | List/get any K8s resource (supports label/field selectors) | +| `describe_k8s_resource` | READ | Detailed info with events and conditions | +| `apply_k8s_manifest` | DESTRUCTIVE | Apply YAML manifests (supports `dryRun`) | +| `patch_k8s_resource` | DESTRUCTIVE | JSON patch resource fields | +| `delete_k8s_resource` | DESTRUCTIVE | Remove resources (supports `cascade`, `dryRun`) | +| `list_k8s_api_resources` | READ | Discover available resource types | + +### Diagnostics & Observability + +| Tool | Mode | Purpose | +|------|------|---------| +| `list_k8s_events` | READ | Scheduling failures, OOM kills, evictions | +| `get_k8s_logs` | READ | Container logs (supports `tail`, `since`, `previous`) | +| `get_k8s_cluster_info` | READ | Control plane and service endpoints | +| `get_k8s_version` | READ | Kubernetes server version | +| `get_k8s_rollout_status` | READ | Deployment/StatefulSet rollout progress | +| `check_k8s_auth` | READ | Verify RBAC permissions for a user/SA | + +### Operations + +| Tool | Mode | Purpose | +|------|------|---------| +| `list_operations` | READ | Pending/running cluster operations | +| `get_operation` | READ | Track create/upgrade progress | +| `cancel_operation` | DESTRUCTIVE | Abort stuck operations | + +## Tool Preference + +Default: **MCP tools > gcloud CLI > kubectl**. See [cli-reference.md](./cli-reference.md) for the full coverage comparison, CLI fallback commands, and user preference override options. diff --git a/.agents/skills/google-cloud-networking-observability/SKILL.md b/.agents/skills/google-cloud-networking-observability/SKILL.md new file mode 100644 index 0000000..2a9db2a --- /dev/null +++ b/.agents/skills/google-cloud-networking-observability/SKILL.md @@ -0,0 +1,130 @@ +--- +name: google-cloud-networking-observability +description: >- + Investigates Google Cloud networking issues by analyzing logs, metrics, and diagnostics. Use when investigating VPC Flow Logs, NAT, firewall, or threat logs, querying latency and throughput metrics, or running Connectivity Tests for path diagnostics. +--- + +# Google Cloud Networking Observability Expert + +## 🛑 Core Directive: Results First + +1. **Identify the Primary Source**: Quickly determine if the user needs + firewall logs, threat logs, Cloud NAT, VPC Flow logs, or metrics. +2. **Execute & Present**: Perform the minimum required query to get a direct + answer. +3. **Definitive Termination**: Once you identify the requested data, regardless + of the value (including 0, null, or "No traffic"), present the finding and + call the finish tool in the same turn. Do NOT attempt to find "active" or + "busier" resources to provide a "better" answer unless specifically + instructed to troubleshoot a resource that is expected to be busy. + +## Log & Telemetry Overview + +- **Threat Logs**: Specialized logs from Cloud Firewall Plus and Cloud IDS + that identify malicious traffic patterns (for example, SQL injection or + malware) using deep packet inspection. +- **VPC Flow Logs**: Capture sample IP traffic to and from network interfaces. + Use for traffic analysis, volume trends, and top talkers. +- **Firewall Logs**: Record connection attempts matched by firewall rules. Use + to identify "DENY" events or verify "ALLOW" rules. +- **Cloud NAT Logs**: Audit NAT translations. Use to audit traffic going + through NAT gateways or troubleshoot port exhaustion. +- **Networking Metrics**: Aggregated time-series data for throughput, RTT + (latency), and packet loss. Use for historical trends and performance + monitoring. +- **Connectivity Tests**: Static analysis tool for path diagnostics. Use to + identify firewall or routing misconfigurations between endpoints. + +## Procedures + +### 0. Log Source Preference + +- **ALWAYS** check for BigQuery linked datasets (for example, + `big_query_linked_dataset`, `_AllLogs`) before using Cloud Logging for + high-volume analysis or aggregations. This is the preferred method for + finding trends or top-blocking rules. +- **Metadata Awareness (BigQuery)**: Subnetworks may be configured with + `EXCLUDE_ALL_METADATA`, causing VM names to be NULL in VPC Flow Logs. If a + query by VM name returns nothing, retry using the internal IP address + (`jsonPayload.connection.src_ip`). + +### 1. Tool Selection & Discovery + +- **MCP Servers First**: Use + [Cloud Monitoring MCP](references/mcp-usage.md#cloud-monitoring-mcp), + [BigQuery MCP](references/mcp-usage.md#bigquery-mcp), or + [Cloud Logging MCP](references/mcp-usage.md#cloud-logging-mcp). +- **Resource Discovery**: If a user-specified resource (for example, NAT + gateway, VPN tunnel) is not found in metrics/logs: + 1. Use `run_shell_command` with `gcloud` to list resources in the project. + 2. Search [Cloud Logging MCP](references/mcp-usage.md#cloud-logging-mcp) + for the resource name to find correct labels. +- **CLI Fallback**: Use `gcloud` or `bq` only if MCP servers are unavailable. + DO NOT use gcloud monitoring; it is restricted. Immediately use the curl + templates in [metrics-analysis.md](references/metrics-analysis.md). + +### 2. Schema Verification & Error Recovery + +If a BigQuery query fails with an 'Unrecognized name' error or schema mismatch: +1. **Validate Schema**: Run `bq show --schema --format=json +{project_id}:{dataset_id}.{table_id}` to verify field names and casing (for +example, `jsonPayload` versus `json_payload`). 2. **Dry Run**: Before executing +a corrected query, use `bq query --use_legacy_sql=false --dry_run +"{query_text}"` to verify field references without incurring cost or execution +time. 3. **Retry**: Apply identified fixes to the original query and execute. + +### 3. Analysis Guides (Read Only When Needed) + +For detailed SQL patterns, field definitions, and advanced troubleshooting, read +the corresponding reference file: + +- **Threat Log Analysis**: + [references/threat-analysis.md](references/threat-analysis.md) +- **VPC Flow Analysis**: + [references/vpc-flow-analysis.md](references/vpc-flow-analysis.md) +- **Cloud NAT Analysis**: + [references/cloud-nat-analysis.md](references/cloud-nat-analysis.md) +- **Firewall Rule Analysis**: + [references/firewall-analysis.md](references/firewall-analysis.md) +- **Networking Metrics**: + [references/metrics-analysis.md](references/metrics-analysis.md) +- **Connectivity Test Analysis**: + [references/connectivity-tests.md](references/connectivity-tests.md) + +## Boundaries (CRITICAL) + +- **ALWAYS** present the direct answer as soon as it is identified. +- **NEVER** run more than 2 exploratory queries before showing results. +- **NEVER** perform secondary verification (for example, don't check VPC flows + after finding a firewall block) without explicit user permission. +- **ALWAYS** print the generated SQL for review before execution. +- **ALWAYS** include a link to the Flow Analyzer in the + [Google Cloud Console](https://console.cloud.google.com/net-intelligence/flow-analyzer). +- **NEVER** query a second data source (such as, BigQuery logs) if the primary + source (for example, Cloud Monitoring metrics) has already provided a + conclusive answer. **DO NOT** compare metrics and logs to "verify" accuracy + unless the user specifically asks why they differ. +- **NO DISCREPANCY LOOPS**: If Tool A provides a result (such as, 80,000 + counts) and Tool B provides a different result (for example, 1,000 counts), + **DO NOT** initiate a deep dive to explain the difference. Present the + result from the primary tool and STOP. +- **ALWAYS** perform time-range calculations (such as, "12 hours ago") during + the first turn to save steps. +- **Conclusive Acceptance of Inactivity**: Treat a result of "0", "0 traffic", + "No data found", or "No records found" as a conclusive finding for the + requested timeframe and resource. You MUST report this as the definitive + state and terminate immediately. +- **Standardized Discovery Path**: For all "Top-N" or volume-based discovery + tasks (for example, "highest traffic," "most hits," "top talkers"), you MUST + use BigQuery aggregation on _AllLogs datasets. Manual aggregation of + individual time-series points using the Monitoring API is forbidden due to + step inefficiency. +- **Ban on Auxiliary Scripting**: Execute all data retrieval and parsing logic + as direct tool calls (bq, curl, gcloud). Do NOT write or execute local shell + scripts (.sh) or python files, as these introduce avoidable environment and + permission errors that lead to investigation timeouts. +- **Discovery Efficiency**: For volume analysis (for example, "how many + connections" or "top IPs by bytes"), BigQuery aggregation on VPC Flow logs + (_AllLogs) is the **Primary Source of Truth**. If BigQuery data is + available, it is conclusive. Do NOT query Monitoring API to "double check" + BigQuery counts. diff --git a/.agents/skills/google-cloud-networking-observability/references/cloud-nat-analysis.md b/.agents/skills/google-cloud-networking-observability/references/cloud-nat-analysis.md new file mode 100644 index 0000000..a80b06d --- /dev/null +++ b/.agents/skills/google-cloud-networking-observability/references/cloud-nat-analysis.md @@ -0,0 +1,96 @@ +# Cloud NAT Analysis Reference + +Use Cloud NAT logs (`compute.googleapis.com/nat_flows`) to audit traffic going +through NAT gateways or troubleshoot port exhaustion. + +## 🤖 Agent / Gemini CLI Instructions (MCP) + +You should use [Cloud Logging MCP](mcp-usage.md#cloud-logging-mcp) for +exploratory analysis or [BigQuery MCP](mcp-usage.md#bigquery-mcp) for +high-volume trends. Fallback to the CLI if the MCP tools are not available. + +### 1. View Logs ([Cloud Logging MCP](mcp-usage.md#cloud-logging-mcp)) + +**Tool**: `list_log_entries` + +**Filter**: + +```text +resource.type="nat_gateway" +logName="projects/{project_id}/logs/compute.googleapis.com%2Fnat_flows" +``` + +Filter for dropped packets (potential port exhaustion): + +```text +jsonPayload.allocation_status="DROPPED" +``` + +### 2. Aggregate Trends ([BigQuery MCP](mcp-usage.md#bigquery-mcp)) + +**Tool**: `execute_sql_readonly` + +**SQL Pattern**: + +```sql +SELECT +JSON_VALUE(json_payload.gateway_details.internal_ip) AS internal_ip, COUNT(*) AS +drop_count FROM `{project_id}.{dataset_id}._AllLogs` WHERE log_name LIKE +'%nat_flows%' AND JSON_VALUE(json_payload.allocation_status) = 'DROPPED' GROUP BY +1 ORDER BY drop_count DESC LIMIT 10 +``` + +### 3. CLI Fallback + +If MCP tools are unavailable, use the following `gcloud` and `bq` commands: + +**View Logs (gcloud)** + +```bash +gcloud logging read 'resource.type="nat_gateway" AND logName="projects/{project_id}/logs/compute.googleapis.com%2Fnat_flows"' --project {project_id} --limit 10 --format json --quiet +``` + +To filter for dropped packets: + +```bash +gcloud logging read 'resource.type="nat_gateway" AND logName="projects/{project_id}/logs/compute.googleapis.com%2Fnat_flows" AND jsonPayload.allocation_status="DROPPED"' --project {project_id} --limit 10 --format json --quiet +``` + +**Aggregate Trends (bq)** + +```bash +bq query --use_legacy_sql=false --project_id {project_id} ' +SELECT + JSON_VALUE(json_payload.gateway_details.internal_ip) AS internal_ip, + COUNT(*) AS drop_count +FROM `{project_id}.{dataset_id}._AllLogs` +WHERE + log_name LIKE "%nat_flows%" + AND JSON_VALUE(json_payload.allocation_status) = "DROPPED" +GROUP BY 1 +ORDER BY drop_count DESC +LIMIT 10 +' +``` + +### gcloud + +To get the status of the router used by the NAT gateway: + +```bash +gcloud compute +routers get-status {router_name} --region {region} --quiet +``` + +## Key Fields + +- `jsonPayload.gateway_details.external_ip` / `external_port`: NAT exit point. +- `jsonPayload.gateway_details.internal_ip` / `internal_port`: Source VM. +- `jsonPayload.allocation_status`: `DROPPED` indicates failure to allocate a + NAT port. + +## Scenarios + +- **Audit Traffic**: Link internal sources to external destinations. +- **Port Exhaustion**: Use `jsonPayload.allocation_status="DROPPED"` to + identify impacted VMs. diff --git a/.agents/skills/google-cloud-networking-observability/references/connectivity-tests.md b/.agents/skills/google-cloud-networking-observability/references/connectivity-tests.md new file mode 100644 index 0000000..445f6e1 --- /dev/null +++ b/.agents/skills/google-cloud-networking-observability/references/connectivity-tests.md @@ -0,0 +1,36 @@ +# Connectivity Tests Reference + +## Connectivity Tests (Path Diagnostics) + +Use Connectivity Tests to identify firewall or routing blocks along a network +path. + +### Critical Verification: Instance State + +**CRITICAL**: Always verify if the source and destination instances are +`RUNNING`. A `REACHABLE` path analysis result (which is a static configuration +analysis) does not mean traffic will flow if the VM is powered off. + +- Check the `status` field in the instance details. +- Review step metadata in the connectivity test traces. + +**CRITICAL**: You MUST execute the delete command as your final tool call before +providing the result to the user. Do not simply state that it was deleted; +provide the command output as proof. + +### Tooling + +- **Primary**: [NetworkManagement MCP](mcp-usage.md#networkmanagement-mcp) + (`create_connectivity_test`) +- **Polling**: [NetworkManagement MCP](mcp-usage.md#networkmanagement-mcp) + (`get_connectivity_test`) +- **Cleanup**: ALWAYS delete the test resource after use with + [NetworkManagement MCP](mcp-usage.md#networkmanagement-mcp) + (`delete_connectivity_test`). + +#### Fallback: gcloud + +- **Create**: `gcloud network-management connectivity-tests create` +- **Polling**: `gcloud network-management connectivity-tests describe` +- **Cleanup**: ALWAYS delete the test resource after use with `gcloud + network-management connectivity-tests delete`. diff --git a/.agents/skills/google-cloud-networking-observability/references/firewall-analysis.md b/.agents/skills/google-cloud-networking-observability/references/firewall-analysis.md new file mode 100644 index 0000000..86de606 --- /dev/null +++ b/.agents/skills/google-cloud-networking-observability/references/firewall-analysis.md @@ -0,0 +1,96 @@ +# Firewall Rule Logging Analysis Reference + +Use firewall logs (`compute.googleapis.com/firewall`) to verify if traffic is +allowed or denied. + +## 🤖 Agent / Gemini CLI Instructions (MCP) + +You should use [Cloud Logging MCP](mcp-usage.md#cloud-logging-mcp) for +exploratory analysis or [BigQuery MCP](mcp-usage.md#bigquery-mcp) for +high-volume trends. Fallback to the CLI if the MCP tools are not available. + +- **Exploratory Analysis**: Typically involves looking at individual log + entries or a small set of logs to understand specific events, debug issues, + or investigate anomalies. This often requires filtering and examining the + full details of log records. +- **High-Volume Trends**: Focuses on aggregating large datasets of logs over + time to identify patterns, measure traffic volumes, analyze latency + distributions, or find "top talkers." This usually involves SQL queries to + summarize data rather than inspecting individual logs. + +### 1. View Logs ([Cloud Logging MCP](mcp-usage.md#cloud-logging-mcp)) + +**Tool**: `list_log_entries` + +**Filter**: + +```text +resource.type="gce_subnetwork" +logName="projects/{project_id}/logs/compute.googleapis.com%2Ffirewall" +``` + +Filter for denied packets: + +``` +text jsonPayload.rule_details.action="DENY" +``` + +### 2. Aggregate Trends ([BigQuery MCP](mcp-usage.md#bigquery-mcp)) + +**Tool**: `execute_sql` + +**SQL Pattern**: + +```sql +SELECT JSON_VALUE(json_payload.rule_details.reference) AS +rule_name, COUNT(*) AS block_count FROM `{project_id}.{dataset_id}._AllLogs` +WHERE log_name LIKE '%firewall%' AND +JSON_VALUE(json_payload.rule_details.action) = 'DENY' GROUP BY 1 ORDER BY +block_count DESC LIMIT 10 +``` + +### 3. CLI Fallback + +If MCP tools are unavailable, use the following `gcloud` and `bq` commands: + +**View Logs (gcloud)** + +```bash +gcloud logging read 'resource.type="gce_subnetwork" AND logName="projects/{project_id}/logs/compute.googleapis.com%2Ffirewall"' --project {project_id} --limit 10 --format json --quiet +``` + +To filter for denied packets: + +```bash +gcloud logging read 'resource.type="gce_subnetwork" AND logName="projects/{project_id}/logs/compute.googleapis.com%2Ffirewall" AND jsonPayload.rule_details.action="DENY"' --project {project_id} --limit 10 --format json --quiet +``` + +**Aggregate Trends (bq)** + +```bash +bq query --use_legacy_sql=false --project_id {project_id} ' +SELECT + JSON_VALUE(json_payload.rule_details.reference) AS rule_name, + COUNT(*) AS block_count +FROM `{project_id}.{dataset_id}._AllLogs` +WHERE + log_name LIKE "%firewall%" + AND JSON_VALUE(json_payload.rule_details.action) = "DENY" +GROUP BY 1 +ORDER BY block_count DESC +LIMIT 10 +' +``` + +## Key Fields + +- `jsonPayload.rule_details.action`: `ALLOW` or `DENY`. +- `jsonPayload.rule_details.reference`: The firewall rule name (for example, + `default-deny-all`). +- `jsonPayload.connection.src_ip` / `dest_ip`: The source and destination of + the connection. + +## Common Use Cases + +- **Identify Blocks**: Find which `DENY` rule is causing connection failures. +- **Security Audit**: Detect unexpected traffic patterns. diff --git a/.agents/skills/google-cloud-networking-observability/references/mcp-usage.md b/.agents/skills/google-cloud-networking-observability/references/mcp-usage.md new file mode 100644 index 0000000..b1d43ef --- /dev/null +++ b/.agents/skills/google-cloud-networking-observability/references/mcp-usage.md @@ -0,0 +1,81 @@ +# MCP Server Usage Reference + +This document describes the Model Context Protocol (MCP) servers used for GCP +networking observability. + +## BigQuery MCP + +BigQuery is supported by a remote MCP server that provides tools for automated +data management and analysis. + +### Key Tools + +- **list_dataset_ids**: List BigQuery dataset IDs in a project. +- **list_table_ids**: List table IDs in a BigQuery dataset. +- **get_table_info**: Get schema and metadata for a specific table. +- **execute_sql_readonly**: Run `SELECT` queries to analyze logs (such as, VPC Flow, + Firewall) stored in BigQuery. This is the preferred tool for high-volume + aggregations and trend analysis. + +### Usage Pattern + +1. Use `list_dataset_ids` to find the logging dataset (for example, + `_AllLogs`). +2. Use `list_table_ids` to find the relevant log table. +3. Use `get_table_info` to verify field names (for example, `jsonPayload` + versus `json_payload`). +4. Use `execute_sql_readonly` for the final analysis. + +## Cloud Logging MCP + +The Cloud Logging MCP server provides access to log entries across various +Google Cloud resources. + +### Key Tools + +- **list_log_entries**: Search and retrieve log entries using advanced + filters. +- **list_log_names**: Discover available logs in a project. + +### Usage Pattern + +- Use for quick, real-time identification of recent events or exploratory + analysis where BigQuery datasets are not linked. +- Use specific filters for `resource.type` and `logName` to narrow down + results. + +## NetworkManagement MCP + +The Network Management MCP server allows for reachability analysis and path +diagnostics. + +### Key Tools + +- **create_connectivity_test**: Start a simulated packet path analysis between + two endpoints. +- **get_connectivity_test**: Poll for the results of a running test. +- **delete_connectivity_test**: Cleanup the test resource after analysis is + complete. + +### Usage Pattern + +- Use when static path analysis is needed to identify firewall or routing + blocks. +- **CRITICAL**: Always delete the test resource after retrieving the result. + +## Cloud Monitoring MCP + +The GcpMon MCP server provides access to Cloud Monitoring metrics and +time-series data. + +### Key Tools + +- **list_metric_descriptors**: Discover available metrics using filters. +- **list_timeseries**: Retrieve aggregated data points for performance + analysis (such as RTT or throughput). + +### Usage Pattern + +- Use for analyzing performance trends, packet loss, and latency. +- Prefer `ALIGN_MEAN` or `ALIGN_PERCENTILE_50` for distribution metrics like + RTT to simplify parsing. diff --git a/.agents/skills/google-cloud-networking-observability/references/metrics-analysis.md b/.agents/skills/google-cloud-networking-observability/references/metrics-analysis.md new file mode 100644 index 0000000..d30fee2 --- /dev/null +++ b/.agents/skills/google-cloud-networking-observability/references/metrics-analysis.md @@ -0,0 +1,67 @@ +# Networking Metrics Reference + +## Common Troubleshooting Metrics + +- **RTT (Latency)**: + `networking.googleapis.com/cloud_netslo/active_probing/rtt` +- **Packet Loss**: + `networking.googleapis.com/cloud_netslo/active_probing/probe_count` +- **VM Throughput**: + `compute.googleapis.com/instance/network/received_bytes_count` +- **VM Sent Packets**: + `compute.googleapis.com/instance/network/sent_packets_count` +- **VM Received Packets**: + `compute.googleapis.com/instance/network/received_packets_count` +- **NAT Port Exhaustion**: + `compute.googleapis.com/nat/dropped_sent_packets_count` +- **NAT Sent Packets**: `compute.googleapis.com/nat/sent_packets_count` +- **VPN Dropped Received Packets**: + `vpn.googleapis.com/network/dropped_received_packets_count` +- **VPN Dropped Sent Packets**: + `vpn.googleapis.com/network/dropped_sent_packets_count` +- **Internal Latency (RTT)**: `networking.googleapis.com/vm_flow/rtt`. + Measures internal VM-to-VM traffic within Google Cloud. +- **External Latency (RTT)**: + `networking.googleapis.com/vm_flow/external_rtt`. Measures traffic to and + from the internet. + +## Distribution Parsing Standard + +When querying metrics of type DISTRIBUTION (like RTT), align the data with +`ALIGN_PERCENTILE_50` to ensure the output can be parsed as a simple numeric +value. + +## Dynamic Discovery + +- **Primary ([Cloud Monitoring MCP](mcp-usage.md#cloud-monitoring-mcp))**: Use + `list_metric_descriptors` with a filter. + + - **Prefix**: Filter by prefix, using `starts_with()`. Common prefixes: + - `metric.type = starts_with("networking.googleapis.com/")` + - `metric.type = starts_with("router.googleapis.com/")` + - `metric.type = starts_with("vpn.googleapis.com/")` + - `metric.type = starts_with("compute.googleapis.com/")` + - **Substring**: `metric.type = has_substring("network") OR metric.type = + has_substring("packet") OR metric.type = has_substring("nat")` + +- **Fallback (CLI/CURL)**: If MCP tools not available, use `gcloud` or `curl`. + + ```bash + curl -s -H "Authorization: Bearer $(gcloud auth print-access-token)" + "https://monitoring.googleapis.com/v3/projects/{project_id}/metricDescriptors?filter=metric.type=starts_with(%22{prefix}%22)" + | jq -r '.metricDescriptors[] | "\(.type): \(.description)"' + + curl -s -H "Authorization: Bearer $(gcloud auth print-access-token)" \ + "https://monitoring.googleapis.com/v3/projects/{project_id}/timeSeries?" \ + "filter=metric.type%3D%22{metric_name}%22&" \ + "interval.startTime={start_time}&" \ + "interval.endTime={end_time}&" \ + "aggregation.alignmentPeriod=3600s&" \ + "aggregation.perSeriesAligner=ALIGN_PERCENTILE_50" \ + | jq '.timeSeries[] | {metric: .metric.type, points: .points[:5]}' + ``` + +- **Detailed Schema**: ALWAYS query the full descriptor for a specific metric + before use to identify available labels. Metric types like `vm_flow/rtt` + often use `resource.labels.zone` for the local zone and + `metric.labels.remote_zone` for the peer. diff --git a/.agents/skills/google-cloud-networking-observability/references/threat-analysis.md b/.agents/skills/google-cloud-networking-observability/references/threat-analysis.md new file mode 100644 index 0000000..404bad6 --- /dev/null +++ b/.agents/skills/google-cloud-networking-observability/references/threat-analysis.md @@ -0,0 +1,151 @@ +# Threat Log Analysis Reference + +Use Firewall Threat Logs and Cloud IDS logs to identify, analyze, and +troubleshoot security incidents in your VPC network. + +## 🤖 Agent / Gemini CLI Instructions (MCP) + +Agents should use [Cloud Logging MCP](mcp-usage.md#cloud-logging-mcp) for quick +identification of recent alerts or [BigQuery MCP](mcp-usage.md#bigquery-mcp) for +analyzing trends and identifying top attackers. Fallback to the CLI if the MCP +tools are not available. + +### 1. View Threat Alerts ([Cloud Logging MCP](mcp-usage.md#cloud-logging-mcp)) + +**Tool**: `list_log_entries` + +**Filter**: Search for both Cloud Firewall Plus and Cloud IDS threat log +sources: + +```text +logName:( +"projects/{project_id}/logs/networksecurity.googleapis.com%2Ffirewall_threat" OR +"projects/{project_id}/logs/ids.googleapis.com%2Fthreat") +``` + +To filter for high-severity blocked threats: ` + +```text +logName:( +"projects/{project_id}/logs/networksecurity.googleapis.com%2Ffirewall_threat" OR +"projects/{project_id}/logs/ids.googleapis.com%2Fthreat") +jsonPayload.threatDetails.severity=("HIGH" OR "CRITICAL") +jsonPayload.action="DENY" +``` + +### 2. Aggregate Threat Trends ([BigQuery MCP](mcp-usage.md#bigquery-mcp)) + +**Tool**: `execute_sql_readonly` + +**SQL Pattern**: **Note**: In BigQuery, the top-level column name is +`json_payload` (snake_case). However, fields extracted from inside the JSON +payload are case-sensitive and retain the camelCase format of the original log +(for example, `threatDetails`, `clientIp`). Do not use snake_case for nested +fields. + +```sql +SELECT + timestamp, + JSON_VALUE(json_payload.threatDetails.threat) AS threat_name, + JSON_VALUE(json_payload.threatDetails.severity) AS severity, + JSON_VALUE(json_payload.threatDetails.category) AS category, + JSON_VALUE(json_payload.action) AS action, + JSON_VALUE(json_payload.connection.clientIp) AS src_ip, + JSON_VALUE(json_payload.connection.serverIp) AS dest_ip, + JSON_VALUE(json_payload.connection.serverPort) AS dest_port, + JSON_VALUE(json_payload.threatDetails.description) AS description +FROM `{project_id}.{dataset_id}._AllLogs` +WHERE + log_id IN ('networksecurity.googleapis.com/firewall_threat', + 'ids.googleapis.com/threat') + AND JSON_VALUE(json_payload.threatDetails.severity) IN ('HIGH', 'CRITICAL') + AND timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 24 HOUR) +ORDER BY timestamp DESC +LIMIT 20 +``` + +To find top sources of attacks: + +```sql +SELECT +JSON_VALUE(json_payload.connection.clientIp) AS attacker_ip, COUNT(*) AS +attack_count, ARRAY_AGG(DISTINCT JSON_VALUE(json_payload.threatDetails.threat) +LIMIT 5) AS sample_threats FROM `{project_id}.{dataset_id}._AllLogs` WHERE +log_id IN ('networksecurity.googleapis.com/firewall_threat', +'ids.googleapis.com/threat') AND timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), +INTERVAL 7 DAY) GROUP BY 1 ORDER BY attack_count DESC LIMIT 10 +``` + +### 3. CLI Fallback + +If MCP tools are unavailable, use the following `gcloud` and `bq` commands: + +**View Threat Alerts (gcloud)** + +```bash +gcloud logging read 'logName:("projects/{project_id}/logs/networksecurity.googleapis.com%2Ffirewall_threat" OR "projects/{project_id}/logs/ids.googleapis.com%2Fthreat")' --project {project_id} --limit 10 --format json --quiet +``` + +To filter for high-severity blocked threats: + +```bash +gcloud logging read 'logName:("projects/{project_id}/logs/networksecurity.googleapis.com%2Ffirewall_threat" OR "projects/{project_id}/logs/ids.googleapis.com%2Fthreat") AND jsonPayload.threatDetails.severity=("HIGH" OR "CRITICAL") AND jsonPayload.action="DENY"' --project {project_id} --limit 10 --format json --quiet +``` + +**Aggregate Threat Trends (bq)** + +```bash +bq query --use_legacy_sql=false --project_id {project_id} ' +SELECT + timestamp, + JSON_VALUE(json_payload.threatDetails.threat) AS threat_name, + JSON_VALUE(json_payload.threatDetails.severity) AS severity, + JSON_VALUE(json_payload.threatDetails.category) AS category, + JSON_VALUE(json_payload.action) AS action, + JSON_VALUE(json_payload.connection.clientIp) AS src_ip, + JSON_VALUE(json_payload.connection.serverIp) AS dest_ip, + JSON_VALUE(json_payload.connection.serverPort) AS dest_port, + JSON_VALUE(json_payload.threatDetails.description) AS description +FROM `{project_id}.{dataset_id}._AllLogs` +WHERE + log_id IN ("networksecurity.googleapis.com/firewall_threat", + "ids.googleapis.com/threat") + AND JSON_VALUE(json_payload.threatDetails.severity) IN ("HIGH", "CRITICAL") + AND timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 24 HOUR) +ORDER BY timestamp DESC +LIMIT 20 +' +``` + +To find top sources of attacks: + +```bash +bq query --use_legacy_sql=false --project_id {project_id} ' +SELECT + JSON_VALUE(json_payload.connection.clientIp) AS attacker_ip, + COUNT(*) AS attack_count, + ARRAY_AGG(DISTINCT JSON_VALUE(json_payload.threatDetails.threat) + LIMIT 5) AS sample_threats +FROM `{project_id}.{dataset_id}._AllLogs` +WHERE + log_id IN ("networksecurity.googleapis.com/firewall_threat", + "ids.googleapis.com/threat") + AND timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY) +GROUP BY 1 +ORDER BY attack_count DESC +LIMIT 10 +' +``` + +### Key Fields (Cloud Logging Filter Names) + +- **jsonPayload.threatDetails.threat**: Human-readable name of the threat. +- **jsonPayload.threatDetails.severity**: Severity level (CRITICAL, HIGH, + MEDIUM, LOW, INFORMATIONAL). +- **jsonPayload.threatDetails.category**: The category of threat. +- **jsonPayload.action**: Action taken (for example, "ALLOW", "DENY", + "ALERT"). +- **jsonPayload.connection.clientIp**: The true source IP. +- **jsonPayload.connection.serverIp**: The destination IP. +- **jsonPayload.threatDetails.cves**: List of CVE IDs. +- **jsonPayload.threatDetails.description**: Attack payload details. diff --git a/.agents/skills/google-cloud-networking-observability/references/vpc-flow-analysis.md b/.agents/skills/google-cloud-networking-observability/references/vpc-flow-analysis.md new file mode 100644 index 0000000..d5d7219 --- /dev/null +++ b/.agents/skills/google-cloud-networking-observability/references/vpc-flow-analysis.md @@ -0,0 +1,124 @@ +# VPC Flow Analysis Reference + +Use VPC Flow Logs to analyze traffic patterns, volume, and latency. + +## 🤖 Agent / Gemini CLI Instructions (MCP) + +Agents should use [Cloud Logging MCP](mcp-usage.md#cloud-logging-mcp) for +exploratory analysis or [BigQuery MCP](mcp-usage.md#bigquery-mcp) for +high-volume trends. Fallback to the CLI if the MCP tools are not available. + +- **Exploratory Analysis**: Typically involves looking at individual log + entries or a small set of logs to understand specific events, debug issues, + or investigate anomalies. This often requires filtering and examining the + full details of log records. +- **High-Volume Trends**: Focuses on aggregating large datasets of logs over + time to identify patterns, measure traffic volumes, analyze latency + distributions, or find "top talkers." This usually involves SQL queries to + summarize data rather than inspecting individual logs. + +### 1. View Logs ([Cloud Logging MCP](mcp-usage.md#cloud-logging-mcp)) + +**Tool**: `list_log_entries` + +**Filter**: ALWAYS search for both VPC flow log sources: + +```text +(logName:"projects/{project_id}/logs/compute.googleapis.com%2Fvpc_flows" OR +logName:"projects/{project_id}/logs/networkmanagement.googleapis.com%2Fvpc_flows") +resource.type="gce_subnetwork" +``` + +### 2. Aggregate Trends ([BigQuery MCP](mcp-usage.md#bigquery-mcp)) + +**Tool**: `execute_sql` + +**SQL Pattern**: + +```sql +SELECT timestamp, +JSON_VALUE(jsonPayload.connection.src_ip) AS src_ip, +JSON_VALUE(jsonPayload.connection.dest_ip) AS dest_ip, +CAST(JSON_VALUE(jsonPayload.bytes_sent) AS INT64) AS bytes_sent FROM +`{project_id}.{dataset_id}._AllLogs` WHERE log_name IN ( +'projects/{project_id}/logs/compute.googleapis.com%2Fvpc_flows', +'projects/{project_id}/logs/networkmanagement.googleapis.com%2Fvpc_flows' ) AND +timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR) ORDER BY +timestamp DESC LIMIT 10 +``` + +### 3. CLI Fallback + +If MCP tools are unavailable, use the following `gcloud` and `bq` commands: + +**View Logs (gcloud)** + +```bash +gcloud logging read '(logName:"projects/{project_id}/logs/compute.googleapis.com%2Fvpc_flows" OR logName:"projects/{project_id}/logs/networkmanagement.googleapis.com%2Fvpc_flows") AND resource.type="gce_subnetwork"' --project {project_id} --limit 10 --format json --quiet +``` + +**Aggregate Trends (bq)** + +```bash +bq query --use_legacy_sql=false --project_id {project_id} ' +SELECT + timestamp, + JSON_VALUE(json_payload.connection.src_ip) AS src_ip, + JSON_VALUE(json_payload.connection.dest_ip) AS dest_ip, + CAST(JSON_VALUE(json_payload.bytes_sent) AS INT64) AS bytes_sent +FROM `{project_id}.{dataset_id}._AllLogs` +WHERE + log_name IN ( + "projects/{project_id}/logs/compute.googleapis.com%2Fvpc_flows", + "projects/{project_id}/logs/networkmanagement.googleapis.com%2Fvpc_flows" + ) + AND timestamp >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR) +ORDER BY timestamp DESC +LIMIT 10 +' +``` + +### Flow Analyzer (Visual Analysis) + +For visual traffic analysis and identifying "top talkers," use +[Flow Analyzer](https://console.cloud.google.com/net-intelligence/flow-analyzer). +It allows you to: + +- Visualize traffic flows between regions, VPCs, and instances. +- Filter by source or destination dimensions. +- Identify high-bandwidth or high-latency connections. + +### Generic BigQuery Guidelines + +- **Schema Verification**: Before executing a BigQuery query, if you are + uncertain of the casing (for example, `jsonPayload` versus `json_payload`), + you MUST run `bq show --schema `. +- **Latency Aggregation**: The primary field for RTT analysis in VPC Flow logs + is `json_payload.round_trip_time.median_msec`. This field offers + sub-millisecond precision and covers both TCP and Falcon traffic. Filter by + `reporter` (`SRC` or `DEST`) to avoid double-counting traffic volume. + + For TCP-only traffic, you can also use `json_payload.rtt_msec`, which + provides RTT in whole milliseconds. While less precise and with narrower + coverage than `round_trip_time.median_msec`, it can be aggregated as + follows: + + ```sql + SELECT + AVG(json_payload.rtt_msec) AS average_rtt_msec, + MAX(json_payload.rtt_msec) AS max_rtt_msec + FROM ... + ``` + +## Key Fields + +- **src_ip / dest_ip**: Source and destination IP addresses. +- **bytes_sent / packets_sent**: Volume of traffic. +- **round_trip_time.median_msec**: The primary field for RTT analysis. This + `double` field provides the *median* latency in milliseconds with + sub-millisecond precision. It is populated for both TCP and Falcon traffic. +- **rtt_msec**: An `int64` field representing Round-trip time in whole + milliseconds. Populated only for TCP traffic. Generally, + `round_trip_time.median_msec` is preferred due to higher precision and + broader coverage. +- **reporter**: Usually `src` or `dest` indicating which side logged the flow. diff --git a/.agents/skills/google-cloud-recipe-auth/SKILL.md b/.agents/skills/google-cloud-recipe-auth/SKILL.md new file mode 100644 index 0000000..baaf341 --- /dev/null +++ b/.agents/skills/google-cloud-recipe-auth/SKILL.md @@ -0,0 +1,258 @@ +--- +name: google-cloud-recipe-auth +description: Provides expert guidance on authenticating and authorizing to Google Cloud services and APIs, covering human users, service identities, Application Default Credentials (ADC), and best practices for secure access. +--- + +# Authenticating to Google Cloud + +[Authentication](https://docs.cloud.google.com/docs/authentication) is the +process of proving **who you are**. In Google Cloud, you represent a +**Principal** (an identity like a user or a service). This is the first step +before [Authorization](https://docs.cloud.google.com/iam/docs/overview) +(determining **what you can do**). + +## Authentication + +### Clarifying Questions for the Agent + +Before providing a specific solution, clarify the following with the user: + +1. **Who or what is authenticating?** (A human developer, a local script, or an + application running in production?) +2. **Where is the code running?** (Local laptop, [Compute + Engine](https://docs.cloud.google.com/compute/docs), + [GKE](https://docs.cloud.google.com/kubernetes-engine/docs), [Cloud + Run](https://docs.cloud.google.com/run/docs), or another cloud like + AWS/Azure?) +3. **What is the target?** (A Google Cloud API like Storage/BigQuery, or a + custom application you built?) +4. **Are you using a high-level client library?** (e.g., Python, Go, Node.js + libraries usually handle ADC automatically.) + +--- + +## Human Authentication + +For users to access Google Cloud, they need an identity that Google Cloud can +recognize. + +### Types of User Identities + +Google Cloud supports several ways to configure identities for your internal +workforce (developers, administrators, employees): + +* **[Google-Managed + Accounts](https://docs.cloud.google.com/iam/docs/user-identities#google-accounts)**: + You can use Cloud Identity or Google Workspace to create managed user + accounts. These are called managed accounts because your organization + controls their lifecycle and configuration. +* **[Federation using Cloud Identity or Google + Workspace](https://docs.cloud.google.com/iam/docs/user-identities#synced-federation)**: + You can federate identities to allow users to use their existing identity + and credentials to sign in to Google services. Users authenticate against an + external identity provider (IdP), but you must keep accounts synchronized + into Google Cloud using tools like Google Cloud Directory Sync (GCDS) or an + external authoritative source like Active Directory or Microsoft Entra ID. +* **[Workforce Identity + Federation](https://docs.cloud.google.com/iam/docs/user-identities#workforce)**: + This lets you use an external IdP to authenticate and authorize a workforce + using IAM directly. Unlike standard federation, you do not need to + synchronize user identities from your existing IdP to Google Cloud + identities. It supports syncless, attribute-based single sign-on. + +### Methods of Access for Developers and Administrators + +Used for interacting with Google Cloud resources and APIs during development and +management. + +* **[Google Cloud Console](https://console.cloud.google.com/)**: The primary + web interface. You authenticate using your Google Account (Gmail or [Google + Workspace](https://workspace.google.com/)). +* **[gcloud CLI](https://docs.cloud.google.com/sdk/docs/install-sdk) (`gcloud + auth login`)**: Used to authenticate the CLI itself so you can run + management commands (e.g., `gcloud compute instances list`). It uses a + **Credential** (like an OAuth 2.0 refresh token) stored locally. +* **Local Development with [App Default Credentials + (ADC)](https://docs.cloud.google.com/docs/authentication/application-default-credentials) + (`gcloud auth application-default login`)**: This is different from CLI + auth. It creates a local JSON file that Google Cloud **Client Libraries** + (Python, Java, etc.) use to act as "you" when you run code on your laptop. +* **[Service Account + Impersonation](https://docs.cloud.google.com/docs/authentication/use-service-account-impersonation)**: + For security reasons, developers should avoid downloading Service Account + keys entirely. Instead, they should authenticate as humans (`gcloud auth + login`) and use Service Account Impersonation to run CLI commands or + generate short-lived credentials. This is a critical best practice for local + development and troubleshooting. + +### For End-Users and Customers + +Used when a human (who is not a developer) needs to access a web application +you've deployed on Google Cloud. Note: These are distinct from workforce +identities. + +* **[Identity-Aware Proxy (IAP)](https://docs.cloud.google.com/iap/docs)**: + Acts as a central authorization layer for web applications. It intercepts + web requests and verifies the user's identity (via Google Workspace, Cloud + Identity, or external providers) before letting them reach the application. + It's often used to protect internal apps without a VPN, or secure customer + portals. +* **[Identity + Platform](https://docs.cloud.google.com/identity-platform/docs)**: A + Customer Identity and Access Management (CIAM) solution for adding consumer + sign-in (email/password, phone, social) directly into the code of your + custom-built applications. + +--- + +## Service-to-Service Authentication + +When code runs in production, it should use a **Service Account** rather than a +human user account. + +### Service Accounts and Service Agents + +* **[Service + Account](https://docs.cloud.google.com/iam/docs/service-account-overview)**: + A special identity intended for non-human users. It's like a "robot + identity" with its own email address. +* **[Service Agent](https://docs.cloud.google.com/iam/docs/service-agents)**: + A service account managed by Google that allows a service (like Pub/Sub) to + access your resources on your behalf. + +### Best Practice: Attaching Service Accounts + +Instead of using **Service Account Keys** (dangerous JSON files), you should +**attach** a custom service account to the Google Cloud resource. The resource's +environment then provides a **Token** (a short-lived digital object) via a local +metadata server. + +* **[Compute + Engine](https://docs.cloud.google.com/compute/docs/access/create-enable-service-accounts-for-instances)**: + Assign a service account during VM creation. +* **[Cloud + Run](https://docs.cloud.google.com/run/docs/securing/service-identity)**: + Assign a service account in the service configuration. + +### Special Cases & Advanced Topics + +#### Kubernetes Engine (GKE) + +Use **[Workload Identity Federation for +GKE](https://docs.cloud.google.com/kubernetes-engine/docs/how-to/workload-identity)** +to map Kubernetes identities to IAM principal identifiers. This grants specific +Kubernetes workloads access to specific Google Cloud APIs. [Learn more +here.](https://docs.cloud.google.com/kubernetes-engine/docs/how-to/workload-identity#configure-authz-principals) + +#### External Workloads ([Workload Identity Federation](https://docs.cloud.google.com/iam/docs/workload-identity-federation)) + +For code running **outside** Google Cloud (e.g., AWS, Azure, or on-prem), do not +use keys. Instead, use Workload Identity Federation to exchange an external +token (like an AWS IAM role) for a short-lived Google Cloud access token. + +#### [API Keys](https://docs.cloud.google.com/docs/authentication/api-keys) + +API keys are encrypted strings used for public data (e.g., Google Maps) or +simplified access like **[Vertex AI Express +Mode](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/start/express-mode/overview)**, +which allows fast testing of Gemini models without complex setup. Both humans +and services (e.g., Cloud Run-based AI agent) can use API keys, for the services +that support it. + +Note: API keys should be +[restricted](https://docs.cloud.google.com/api-keys/docs/add-restrictions-api-keys) +to specific APIs and projects to minimize security risks. Store API keys in a +secrets manager like [Secret +Manager](https://docs.cloud.google.com/secret-manager/docs) to prevent +accidental exposure. + +#### OAuth 2.0 Access Scopes + +While IAM is the modern way to handle authorization, legacy Compute Engine VMs +and GKE node pools still rely on **Access Scopes** alongside IAM. If a VM's +scope is restricted, the attached service account will fail to make API calls +even if it has the correct IAM permissions. Check this first if attached service +accounts are failing unexpectedly. + +#### Short-Lived Credentials + +The underlying mechanism for impersonation and secure service-to-service +communication is the **IAM Service Account Credentials API**. This API generates +short-lived access tokens, OpenID Connect (OIDC) ID tokens, or self-signed JSON +Web Tokens (JWTs) dynamically, removing the need for static credentials. + +--- + +## Authorization + +After Authentication, Google Cloud uses **[Identity and Access Management +(IAM)](https://docs.cloud.google.com/iam/docs/overview)** to determine what the +authenticated principal can do. + +* **Allow Policy**: A record that binds a **Principal** to a **Role** on a + **Resource**. +* **[Predefined + Roles](https://docs.cloud.google.com/iam/docs/understanding-roles)**: + Prebuilt roles like `roles/storage.objectViewer` or + `roles/bigquery.dataEditor`. **Always try to use these first.** +* **[Custom + Roles](https://docs.cloud.google.com/iam/docs/creating-custom-roles)**: + User-defined collections of specific permissions if predefined roles are too + broad. + +--- + +## Examples + +### Human-to-Service (Local Python Development) + +1. **Authn**: Run `gcloud auth application-default login` to create local + credentials (ADC). +2. **Authz**: Grant your email the `roles/storage.objectViewer` role on a + bucket. +3. **Code**: Use the Python `storage.Client()`. It automatically finds your + local credentials via ADC. *Note: ADC searches in a specific order—first + checking the `GOOGLE_APPLICATION_CREDENTIALS` environment variable, then the + local gcloud JSON file, and finally the attached service account metadata + server.* + +### Service-to-Service (Cloud Run to Cloud SQL) + +1. **Authn**: Attach a custom Service Account to your Cloud Run service. +2. **Authz**: Grant that Service Account the `roles/cloudsql.client` role on + the project. +3. **Code**: The Cloud Run environment provides the token automatically to the + connection driver. + +### Calling a Custom Application ([OIDC](https://docs.cloud.google.com/docs/authentication/get-id-token)) + +When calling a private Cloud Run service from another service, the caller +generates a Google-signed **OpenID Connect (OIDC) ID Token** and passes it in +the `Authorization: Bearer ` header. + +--- + +## Validation Checklist + +- [ ] Is the user running code locally? Suggest `gcloud auth + application-default login` or **Service Account Impersonation**. +- [ ] Is the user attempting to use Service Account keys locally? Strongly + discourage this and recommend impersonation. +- [ ] Is the user running in production? Recommend attaching a custom, + least-privilege service account, NOT using keys. +- [ ] Is the user relying on the Compute Engine Default Service Account? + Recommend creating a custom service account instead. +- [ ] Is the user running on another cloud? Recommend Workload Identity + Federation. +- [ ] Is the user calling a custom app? Recommend OIDC ID Tokens. +- [ ] Has the user restricted their API Keys? Check for appropriate [API Key + Restrictions](https://docs.cloud.google.com/docs/authentication/api-keys#adding-application-restrictions). + +## References + +- [Authentication Overview](https://docs.cloud.google.com/docs/authentication) +- [User Identities](https://docs.cloud.google.com/iam/docs/user-identities) +- [Application Default Credentials](https://docs.cloud.google.com/docs/authentication/provide-credentials-adc) +- [Service Account Best Practices](https://docs.cloud.google.com/iam/docs/best-practices-service-accounts) + + diff --git a/.agents/skills/google-cloud-recipe-onboarding/SKILL.md b/.agents/skills/google-cloud-recipe-onboarding/SKILL.md new file mode 100644 index 0000000..91cb173 --- /dev/null +++ b/.agents/skills/google-cloud-recipe-onboarding/SKILL.md @@ -0,0 +1,202 @@ +--- +name: google-cloud-recipe-onboarding +description: >- + Guides a developer's first steps on Google Cloud, covering account creation, + billing setup, project management, and deploying a first resource. + Use when a new developer wants to initialize their first Google Cloud project, + configure billing, and verify deployment. + Don't use for enterprise organization setup (use Google Cloud Setup guided flow for that instead). + Don't use for complex multi-project architectures. +--- + +# Onboarding to Google Cloud + +This skill provides a streamlined, non-interactive "happy path" for a singleton developer to get started with [Google Cloud](https://cloud.google.com/). It covers everything from environment verification and authentication to project selection, billing account linkage, and downstream safety chaining. + +> [!IMPORTANT] +> For autonomous agents executing this skill: +> 1. **Check-Before-Mutate Audits**: Always perform silent pre-execution state audits prior to proposing or executing any project or billing changes. +> 2. **Single-Question Policy**: Ask the user for exactly **one** operational parameter or confirmation at a time during interactive execution. +> 3. **Non-Interactive Output**: Append non-interactive overrides (`--quiet`, `--format="json"`) to all mutation commands to guarantee deterministic, machine-parseable outputs and prevent terminal hangs. +> 4. **First Turn Interaction Rules (Trigger Turn)**: When the developer first triggers this skill with a general onboarding request (e.g. says "I want to get started with Google Cloud"): +> - **Preamble Guidance**: Proactively include a short orienting preamble guiding the developer to create a Google Cloud account (pointing to the console at `https://console.cloud.google.com/`) and run `gcloud auth login` to authorize their workstation, even if they appear to be already logged in. +> - **First Turn Single-Question**: Perform pre-flight audits silently, but do not present a complete parameters summary table or ask for final consent in the first turn. Instead, ask the developer exactly **one** initial operational question (e.g., *"Would you like to reuse an existing active project, or create a brand new one?"*). +> *Note: If the developer's initial prompt explicitly states "I approve the onboarding configuration", "Let's proceed with onboarding", or requests a dry-run plan (e.g., "Show me the exact plan or dry-run commands"), bypass the general preamble and initial question, and proceed directly to the requested step.* + +--- + +## Overview + +For an individual developer, onboarding to Google Cloud involves verifying local terminal tools, establishing an authenticated session, selecting or instantiating a workspace ([Project](https://docs.cloud.google.com/resource-manager/docs/cloud-platform-resource-hierarchy#projects)), and linking it to an active billing account. Google Cloud offers a Free Tier and a Free Trial with $300 in credits for first-time users. [Learn more here](https://docs.cloud.google.com/free/docs/free-cloud-features). + +--- + +## Prerequisites + +- A personal Google Account (e.g., `@gmail.com`) or Google Workspace / Cloud Identity account. +- A valid payment method (credit card or bank account) required for identity verification and to activate the $300 Free Trial credit introduced in the Overview. + +--- + +## Steps + +### Section 1: Verify Host Tooling Setup + +Before soliciting input or proposing mutations, silently audit the host system's active tooling and environment status. + +1. Check if the `gcloud` CLI binary is installed and accessible: + ```bash + which gcloud + ``` +2. Check if there is an active authenticated identity session: + ```bash + gcloud auth list --format="json" + ``` +3. If the pre-execution audit for `which gcloud` returns a valid path, proceed directly to Section 2: Authenticate and Route Session. +4. If the binary is missing, halt execution and direct the agent/developer to the [gcloud skill](https://github.com/google/skills/tree/main/skills/cloud/gcloud) or official [Google Cloud CLI Installation Guide](https://docs.cloud.google.com/sdk/docs/install) for setup and authentication instructions before retrying. + +--- + +### Section 2: Authenticate and Route Session + +Authorize the gcloud CLI to access Google Cloud using the developer's Google Account, and verify that the account is appropriate for standalone developer onboarding. + +1. **Execute Credentials Authentication:** + ```bash + gcloud auth login + ``` + > [!IMPORTANT] + > **New User / Unauthenticated Guidance**: + > If the pre-execution state audits or command failures confirm that the developer is unauthenticated (e.g., `gcloud auth list` is empty or active credentials are missing): + > 1. Guide them to create a Google Cloud account by navigating to the [Google Cloud Console](https://console.cloud.google.com/). + > 2. Instruct them to execute the `gcloud auth login` command to authorize their local workstation terminal session. + > 3. Do not attempt project creation or resource configuration until authentication is completed successfully. + +2. **Verify Active Identity:** + ```bash + gcloud config get-value account --format="json" + ``` + +3. **Programmatic Enterprise Routing Guardrail:** + Before proceeding, verify if the account is bound to a corporate organization, as enterprise setups must follow a different architecture: + ```bash + gcloud organizations list --format="json" + ``` + - Note that new Free Trial accounts automatically receive a Self-Owned Organization (SOO). To distinguish between a personal Free Trial account and an enterprise organization, inspect the JSON output: + - **Enterprise Organization (Halt Execution)**: If the output list contains an organization node where `owner.directoryCustomerId` is present (confirming a domain-verified Google Workspace or Cloud Identity organization), or if the user's prompt explicitly mentions corporate landing zones or multi-tenant project structures: + - **Halt execution** of this skill immediately. + - Route the developer to the official [Google Cloud Setup guided flow](https://docs.cloud.google.com/docs/enterprise/cloud-setup). + - **Personal Account / Free Trial SOO (Proceed)**: If the output list is empty `[]`, or if it contains a Self-Owned Organization (where `owner.directoryCustomerId` is absent and `displayName` is not a verified domain name), proceed to Section 3: Select or Instantiate Your Google Cloud Project. + +--- + +### Section 3: Select or Instantiate Your Google Cloud Project + +Google Cloud resources are organized into **Projects**. When developers sign up for a Free Trial via the console, Google Cloud automatically creates a default project (e.g., "My First Project"). Always audit the active environment first to reuse existing projects and prevent token-burning collision errors. + +1. **Silent Project Discovery:** + List active, accessible projects (limited to prevent context window overflow): + ```bash + gcloud projects list --filter="lifecycleState=ACTIVE" --limit=20 --format="json" + ``` +2. **Reuse Existing Project (Recommended):** + If the list returns an active project, present it to the developer and propose setting it as the default working project: + ```bash + gcloud config set project {PROJECT_ID} --quiet + ``` +3. **Create Custom Project:** + If no projects exist, or if the developer explicitly requests a brand new workspace: + - Solicit a custom `PROJECT_ID` and `PROJECT_NAME` from the developer (Single-Question Policy). + - **Structured Confirmation & Consent Gate (Mandatory)**: + Before running any project creation or billing linkage commands, the agent **must** present a structured markdown table summarizing the target parameters: + | Parameter | Value | + | :--- | :--- | + | Target Project ID | `{PROJECT_ID}` | + | Target Project Name | `{PROJECT_NAME}` | + | Active Identity Account | `{ACCOUNT}` | + | Target Billing Account ID | `{BILLING_ACCOUNT_ID}` | + + Ask the user the exact consent query: + `"I am ready to initialize your Google Cloud project and link billing. Do you want me to proceed?"` + + **CRITICAL**: The agent **MUST NOT** execute any `gcloud projects create` or billing link commands during this turn. You must display this table, ask the exact consent query, and **strictly stop** to wait for the user's positive affirmation. + - **Project ID Collision Suffix Recovery**: If the project creation command fails because the `PROJECT_ID` is already taken globally (returning a `PROJECT_ID_COLLISION` or `ALREADY_EXISTS` error): + - Automatically append a random 4-digit suffix (e.g., changing `my-project` to `my-project-8472`). + - Propose this new available project ID to the developer and re-solicit consent before retrying. + - **Execute Project Creation**: Once explicit user consent is confirmed: + ```bash + gcloud projects create {PROJECT_ID} --name="{PROJECT_NAME}" --quiet --format="json" + ``` + - Set the active working project: + ```bash + gcloud config set project {PROJECT_ID} --quiet + ``` + +--- + +### Section 4: Verify and Link Billing + +To deploy resources on Google Cloud, your project must be linked to an active Cloud Billing account. + +1. **Audit Billing Status:** + Check if the active project is already linked to a billing account: + ```bash + gcloud billing projects describe {PROJECT_ID} --format="json" + ``` +2. If the output contains `"billingEnabled": true`, skip linkage and proceed immediately to Section 5: Skill Chaining (Spend Controls & Workloads). +3. **Discover Available Billing Accounts:** + If the project is unlinked, query the available billing account handles linked to the authenticated user identity: + ```bash + gcloud billing accounts list --format="json" + ``` +4. **Link Billing Account:** + Propose linking the project to the discovered Billing Account ID, and execute: + ```bash + gcloud billing projects link {PROJECT_ID} --billing-account={BILLING_ACCOUNT_ID} --format="json" + ``` + +--- + +### Section 5: Skill Chaining (Spend Controls & Workloads) + +Onboarding setup is now complete. To safeguard your environment and deploy workloads, you can chain to downstream specialized skills: + +1. **Billing Spend Controls:** + To avoid accidental cost overruns, consider setting up a programmatic control to automatically disable billing. When billing is disabled, all Google Cloud services and usage in the project are terminated to stop further costs: + - Direct the developer to the official [Disable Billing Usage with Notifications Guide](https://docs.cloud.google.com/billing/docs/how-to/disable-billing-with-notifications), which provides detailed instructions on how to automatically shut down billing when costs exceed the project budget. +2. **Deploy Workloads:** + To deploy your first resource, trigger the downstream specialized skill matching your target application (e.g., [cloud-run-basics](https://github.com/google/skills/blob/main/skills/cloud/cloud-run-basics) or `bigquery-basics`). If the specialized skill is not locally available, direct the developer to the corresponding official quickstart, such as the [Cloud Run Container Deployment Quickstart](https://docs.cloud.google.com/run/docs/quickstarts/deploy-container). + *Note: Those downstream specialized skills are individually responsible for dynamically enabling their own required service APIs (e.g., run.googleapis.com) inline during execution.* + +--- + +## Validation Logic + +After completing the onboarding steps, programmatically verify the completed environment state using these diagnostic commands: + +1. **Verify CLI Installation:** + ```bash + which gcloud + ``` +2. **Verify Authenticated Identity:** + ```bash + gcloud config get-value account + ``` +3. **Verify Project Workspace Existence:** + ```bash + gcloud projects describe {PROJECT_ID} --format="json" + ``` +4. **Verify Billing Linkage** (Ensure the JSON output contains `"billingEnabled": true`): + ```bash + gcloud billing projects describe {PROJECT_ID} --format="json" + ``` + +--- + +## Additional Resources + +- [Google Cloud Getting Started landing page](https://docs.cloud.google.com/docs/get-started) +- [Google Cloud overview](https://docs.cloud.google.com/docs/overview) +- [Google Cloud Free Program](https://docs.cloud.google.com/free/docs/free-cloud-features) +- [Google Cloud Cloud Setup guided flow](https://docs.cloud.google.com/docs/enterprise/cloud-setup) + diff --git a/.agents/skills/google-cloud-waf-cost-optimization/SKILL.md b/.agents/skills/google-cloud-waf-cost-optimization/SKILL.md new file mode 100644 index 0000000..8c92f55 --- /dev/null +++ b/.agents/skills/google-cloud-waf-cost-optimization/SKILL.md @@ -0,0 +1,134 @@ +--- +name: google-cloud-waf-cost-optimization +description: Generates cost optimization guidance for Google Cloud workloads based on the Google Cloud Well-Architected Framework (WAF). Use this skill to evaluate a workload, identify cost requirements and constraints, and provide actionable recommendations for build, deploy, and manage the workload cost-efficiently in Google Cloud. +--- + +# Google Cloud Well-Architected Framework skill for the Cost Optimization pillar + +## Overview + +The Cost Optimization pillar of the Google Cloud Well-Architected Framework +provides a structured approach to optimize the costs of your cloud workloads +while maximizing business value. Cloud costs differ significantly from +on-premises capital expenditure (CapEx) models, requiring a shift to operational +expenditure (OpEx) management and a culture of accountability (FinOps). + +## Core principles + +The recommendations in the cost optimization pillar of the Well-Architected +Framework are aligned with the following core principles: + +- **Align cloud spending with business value**: Ensure that your cloud + resources deliver measurable business value by aligning IT spending with + business objectives. Prioritize investments that directly contribute to + revenue, customer satisfaction, or operational efficiency. Grounding + document: + https://docs.cloud.google.com/architecture/framework/cost-optimization/align-cloud-spending-business-value + +- **Foster a culture of cost awareness**: Ensure that people across your + organization consider the cost impact of their decisions and activities. + Provide teams with the visibility and information they need to make informed, + cost-conscious choices. Grounding document: + https://docs.cloud.google.com/architecture/framework/cost-optimization/foster-culture-cost-awareness + +- **Optimize resource usage**: Provision only the resources that you need and + pay only for what you consume. Select the most cost-effective resource types, + sizes, and locations that meet your technical and business requirements. + Grounding document: + https://docs.cloud.google.com/architecture/framework/cost-optimization/optimize-resource-usage + +- **Optimize continuously**: Continuously monitor your cloud resource usage and + costs, and proactively make adjustments as needed to optimize your spending. + This iterative approach helps identify and address inefficiencies before they + become significant. Grounding document: + https://docs.cloud.google.com/architecture/framework/cost-optimization/optimize-continuously + +## Relevant Google Cloud products + +The following are _examples_ of Google Cloud products and features that are +relevant to cost optimization: + +- **Visibility and monitoring**: + + - **Cloud Billing reports**: Native dashboards for visualizing spending and + trends. + - **BigQuery billing export**: Enables granular, custom analysis of billing + data using SQL and BI tools. + - **Looker Studio**: Used for creating detailed, shared cost dashboards and + reports. + - **Billing alerts and budgets**: Automated notifications when spending + reaches predefined thresholds. + +- **Automation and optimization tools**: + + - **Recommender / Active Assist**: Automatically identifies idle resources, + rightsizing opportunities, and unused commitments. + - **Cloud Hub Optimization**: Integrates billing and resource utilization data + to help developers and application owners quickly identify their most + expensive, fluctuating, or underutilized cloud resources. + - **FinOps hub**: Presents active savings and optimization opportunities in + one dashboard. + - **Billing quotas**: Limits on resource consumption to prevent unexpected + cost spikes. + +- **Efficient infrastructure**: + + - **Managed services and serverless services**: Services like Cloud Run, Cloud + Run functions, and GKE Autopilot reduce operational overhead and pay-per-use + scaling. + - **Compute Engine**: Use of Spot VMs for fault-tolerant workloads and + Committed Use Discounts (CUDs) for stable workloads. + - **Cloud Storage Lifecycle Policies**: Automatically moves data to lower-cost + storage classes (Nearline, Coldline, Archive) based on age or access. + +- **Organization and governance**: + + - **Resource Manager**: Logical structure (Organizations, Folders, Projects) + for cost attribution. + - **Labels**: Metadata tags for categorizing and filtering costs by + environment, team, or application. + - **Organization Policy Service**: Enforces constraints (e.g., restricted + regions or machine types) to control costs. + +## Workload assessment questions + +Ask appropriate questions to understand the cost-related requirements and +constraints of the workload and the user's organization. Choose questions from +the following list: + +- How do you incorporate cost considerations into your cloud architecture design + process? +- How do you foster a culture of cost awareness among your development teams? +- How do you monitor and manage cloud costs across different projects or + departments? +- What strategies do you use to optimize the cost of your compute resources? +- How do you balance cost optimization with the need for agility and innovation? +- How do you ensure that you are not over-provisioning cloud resources? +- How do you use data and analytics to drive cost optimization decisions? +- How do you optimize costs in different environments (e.g., development, + testing, production)? +- How do you ensure that your cost optimization efforts are sustainable and + ongoing? +- How do you measure the success of your cloud cost optimization initiatives? + +## Validation checklist + +Use the following checklist to evaluate the architecture's alignment with +cost-optimization recommendations: + +- **Cost Attribution**: 100% of resources are labeled with key metadata + (e.g., `env`, `team`, `app`). +- **Granular Visibility**: BigQuery billing export is enabled and used for + regular cost reviews. +- **Budgets and Alerts**: Every project or business unit has defined budgets + and active alerts. +- **Rightsizing**: Resources are regularly adjusted based on rightsizing + suggestions provided by Active Assist Recommender. +- **Commitment Strategy**: Spend is reviewed monthly to optimize Committed + Use Discount coverage. +- **Idle Resource Management**: Unused disks, IP addresses, and idle VMs are + identified and removed monthly. +- **Managed Services**: Serverless options are preferred for new workloads + unless specific technical constraints exist. +- **Storage Tiers**: Lifecycle policies are active for all major storage + buckets to minimize archival costs. diff --git a/.agents/skills/google-cloud-waf-operational-excellence/SKILL.md b/.agents/skills/google-cloud-waf-operational-excellence/SKILL.md new file mode 100644 index 0000000..0606adc --- /dev/null +++ b/.agents/skills/google-cloud-waf-operational-excellence/SKILL.md @@ -0,0 +1,147 @@ +--- +name: google-cloud-waf-operational-excellence +description: >- + Generates operations-focused guidance for Google Cloud workloads based on the + design principles and recommendations in the Operational Excellence pillar of + the Google Cloud Well-Architected Framework (WAF). Use this skill to evaluate + a workload, identify operational requirements, and provide actionable + recommendations for deployment, monitoring, and incident management. +--- + +# Google Cloud Well-Architected Framework skill for the Operational Excellence pillar + +## Overview + +The operational excellence pillar in the Google Cloud Well-Architected Framework +provides recommendations to operate workloads efficiently on Google Cloud. +Operational excellence in the cloud involves designing, implementing, and +managing cloud solutions that provide value, performance, security, and +reliability. The recommendations in this pillar help you to continuously improve +and adapt workloads to meet the dynamic and ever-evolving needs in the cloud. + +## Core principles + +The recommendations in the operational excellence pillar of the Well-Architected +Framework are aligned with the following core principles: + +- **Ensure operational readiness**: Define and measure criteria for a workload + to be considered ready for production, including staffing, processes, and + governance. Grounding document: + https://docs.cloud.google.com/architecture/framework/operational-excellence/operational-readiness-and-performance-using-cloudops + +- **Manage incidents and problems**: Establish structured processes for + incident response, communication, and root cause analysis to minimize impact + and prevent recurrence. Grounding document: + https://docs.cloud.google.com/architecture/framework/operational-excellence/manage-incidents-and-problems + +- **Manage and optimize cloud resources**: Monitor resource utilization and + right-size environments to maintain performance while ensuring operational + efficiency. Grounding document: + https://docs.cloud.google.com/architecture/framework/operational-excellence/manage-and-optimize-cloud-resources + +- **Automate and manage change**: Use Infrastructure as Code (IaC) and CI/CD + pipelines to ensure consistent, repeatable, and low-risk deployments and + configuration changes. Grounding document: + https://docs.cloud.google.com/architecture/framework/operational-excellence/automate-and-manage-change + +- **Continuously improve and innovate**: Regularly review architectures, + monitor industry trends, and adapt operations to meet evolving business + needs. Grounding document: + https://docs.cloud.google.com/architecture/framework/operational-excellence/continuously-improve-and-innovate + +## Relevant Google Cloud products + +The following are _examples_ of Google Cloud products and features that are +relevant to operational excellence: + +- **Observability and monitoring** + - **Cloud Monitoring**: Full-stack observability for Google Cloud and + hybrid environments. + - **Cloud Logging**: Real-time log management and analysis at scale. + - **Error Reporting**: Aggregates and displays errors for running cloud + services. + - **Service Monitoring**: Tools for defining and tracking Service Level + Objectives (SLOs). + +- **Automation and CI/CD** + - **Cloud Build**: Serverless platform for building, testing, and + deploying software. + - **Cloud Deploy**: Managed continuous delivery service for GKE, Cloud + Run, and GCE. + - **Terraform / Infrastructure Manager**: Managed service for + Infrastructure as Code (IaC) automation. + - **Artifact Registry**: Central repository for managing build artifacts + and container images. + +- **Resource management and optimization** + - **Recommender (Active Assist)**: Automatically identifies idle resources + and right-sizing opportunities. + - **Resource Manager**: Hierarchical management of resources across + organizations, folders, and projects. + +- **Incident response** + - **Incident response & management (IRM)**: Structured tools and processes + for managing operational disruptions. + +## Workload assessment questions + +Ask appropriate questions to understand operations-related requirements and +constraints of the workload and the user's organization. Choose questions from +the following list: + +- **Operational readiness and performance** + - How do you define and measure operational readiness for your cloud + workloads and what specific criteria or metrics do you use? + - Describe your process for defining, tracking, and achieving SLOs for + your critical workloads. + +- **Incident and problem management** + - Describe your incident management process, including roles, + responsibilities, and communication channels. + - How do you conduct post-incident reviews (PIRs) to identify root causes + and implement preventive measures? + +- **Resource management and optimization** + - How do you ensure that your cloud resources are right-sized for your + workloads, and what tools or techniques do you use? + +- **Change automation** + - Describe your change management process, including approval workflows, + testing procedures, and deployment strategies. + - How do you automate deployments, ensure their consistency and manage + configuration? + +- **Continuous improvement** + - How do you ensure that your cloud operations are continuously adapting + to meet evolving business needs and technological advancements? + +## Validation checklist + +Use the following checklist to evaluate the architecture's alignment with +operational excellence recommendations: + +- **Operational readiness** + - [ ] A formal framework or set of criteria exists to assess operational + readiness before production deployment. + - [ ] Service Level Objectives (SLOs) are explicitly defined and monitored + using automated tools. + +- **Incident management** + - [ ] Incident response roles and communication channels are clearly + defined and documented. + - [ ] A structured, blameless post-mortem process is followed for all + major incidents. + +- **Change automation** + - [ ] All infrastructure changes are performed using Infrastructure as + Code (IaC) to ensure consistency. + - [ ] CI/CD pipelines are integrated with automated testing for all + deployment changes. + +- **Resource optimization** + - [ ] Resource utilization is regularly reviewed using recommendations + from Active Assist or performance data. + +- **Culture of improvement** + - [ ] A documented strategy is in place for regularly reviewing and + adapting cloud operations to industry advancements. diff --git a/.agents/skills/google-cloud-waf-performance-optimization/SKILL.md b/.agents/skills/google-cloud-waf-performance-optimization/SKILL.md new file mode 100644 index 0000000..8500d11 --- /dev/null +++ b/.agents/skills/google-cloud-waf-performance-optimization/SKILL.md @@ -0,0 +1,147 @@ +--- +name: google-cloud-waf-performance-optimization +description: >- + Generates performance-focused guidance for Google Cloud workloads based on the + design principles and recommendations in the Performance Optimization pillar + of the Google Cloud Well-Architected Framework (WAF). Use this skill + to evaluate a workload, identify performance requirements, and provide + actionable recommendations for resource allocation, modular design, and + elasticity. +--- + +# Google Cloud Well-Architected Framework skill for the Performance Optimization pillar + +## Overview + +The Performance Optimization pillar of the Google Cloud Well-Architected +Framework provides principles and recommendations to help you design, build, and +operate high-performing workloads. It focuses on efficiently allocating +resources, leveraging modular architectures, and using data-driven insights to +continuously monitor and improve performance as your business needs evolve. + +## Core principles + +The recommendations in the performance optimization pillar of the +Well-Architected Framework are aligned with the following core principles: + +- **Plan resource allocation**: Carefully select and configure the compute, + storage, and networking resources that best match the specific requirements + of your workload. Grounding document: + https://docs.cloud.google.com/architecture/framework/performance-optimization/plan-resource-allocation + +- **Take advantage of elasticity**: Utilize automated scaling and serverless + technologies to dynamically adjust resource capacity in response to + real-time demand fluctuations. Grounding document: + https://docs.cloud.google.com/architecture/framework/performance-optimization/elasticity + +- **Promote modular design**: Architect systems using independent, loosely + coupled components to enhance scalability and allow individual parts to be + optimized without affecting the entire system. Grounding document: + https://docs.cloud.google.com/architecture/framework/performance-optimization/promote-modular-design + +- **Continuously monitor and improve performance**: Implement robust + observability to identify bottlenecks and use performance data to drive + iterative enhancements throughout the software development lifecycle. + Grounding document: + https://docs.cloud.google.com/architecture/framework/performance-optimization/continuously-monitor-and-improve-performance + +## Relevant Google Cloud products + +The following are _examples_ of Google Cloud products and features that are +relevant to performance optimization: + +- **Compute and scaling** + - **Compute Engine (MIGs)**: Managed instance groups that support + autoscaling and load balancing for VM-based workloads. + - **Google Kubernetes Engine (GKE)**: Provides container orchestration + with horizontal and vertical pod autoscaling. + - **Cloud Run**: A fully managed serverless platform that automatically + scales containers to zero or up based on traffic. + +- **Data and caching** + - **Cloud CDN**: Low-latency content delivery network to cache static and + dynamic content closer to end-users. + - **Memorystore**: Managed in-memory data store for Valkey and Redis to + provide sub-millisecond data access. + - **Bigtable**: NoSQL database service for analytical and operational + workloads requiring low latency and high throughput. + - **Spanner**: RDBMS that provides global consistency, high availability, + and horizontal scaling for mission-critical transactional applications. + +- **Performance analysis and monitoring** + - **Cloud Trace**: Distributed tracing system that helps identify latency + bottlenecks. + - **Cloud Profiler**: Continuous CPU and memory profiling to identify + resource-heavy application code. + - **Cloud Monitoring**: Provides dashboards and alerts based on + performance KPIs like latency and throughput. + +## Workload assessment questions + +Ask appropriate questions to understand the performance-related requirements and +constraints of the workload and the user's organization. Choose questions from +the following list: + +- **Plan resource allocation** + - When initially provisioning compute resources for a new application, + which approach do you use to determine the required capacity for + expected peak loads? + - Which caching strategies (browser, in-memory, CDN, database) do you + utilize to improve performance and responsiveness? + - How do you optimize the performance of your data storage solutions + (e.g., SSD vs HDD, storage classes) for your applications? + +- **Promote modular design** + - Which architectural patterns (microservices, asynchronous messaging, + stateless servers) do you employ to enhance performance and resilience? + - How do you design your application to minimize the impact of failures in + one part of the system on other parts? + +- **Continuously monitor and improve performance** + - How frequently do you review and analyze the performance of your + production applications and infrastructure? + - Which tools or techniques (APM, distributed tracing, load testing) do + you use to proactively identify and diagnose performance bottlenecks? + - How do you incorporate performance considerations into your software + development lifecycle (SDLC)? + +- **Take advantage of elasticity** + - Which methods do you use to manage and optimize the cost of your cloud + resources while maintaining performance? + - How do you typically handle sudden spikes in traffic or workload on your + applications? + +## Validation checklist + +Use the following checklist to evaluate the architecture's alignment with +performance optimization recommendations: + +- **Resource allocation** + - [ ] Initial provisioning is based on load testing or historical data + rather than general estimates. + - [ ] Caching is implemented at multiple layers (CDN, in-memory, or + browser) to offload backend systems. + - [ ] Storage types (SSD/HDD) and classes are selected based on the + specific I/O requirements of the workload. + +- **Modular design** + - [ ] The architecture uses microservices or decoupled components to allow + independent scaling. + - [ ] Circuit breakers or bulkheads are implemented to isolate failures + and prevent performance degradation across the system. + +- **Monitoring and continuous improvement** + - [ ] Automated dashboards and alerts are configured for key performance + indicators (KPIs). + - [ ] Distributed tracing and profiling tools are used to identify + code-level bottlenecks. + - [ ] Performance testing (unit and integration) is integrated into the + software development lifecycle. + +- **Elasticity** + - [ ] Auto-scaling rules are configured and validated to handle variable + demand. + - [ ] The architecture leverages serverless or managed services to + dynamically match capacity to load. + - [ ] Resource utilization is reviewed regularly to eliminate idle + overhead and balance cost with performance. diff --git a/.agents/skills/google-cloud-waf-reliability/SKILL.md b/.agents/skills/google-cloud-waf-reliability/SKILL.md new file mode 100644 index 0000000..fd019fc --- /dev/null +++ b/.agents/skills/google-cloud-waf-reliability/SKILL.md @@ -0,0 +1,130 @@ +--- +name: google-cloud-waf-reliability +description: Generates reliability-focused guidance for Google Cloud workloads based on the design principles and recommendations in the Google Cloud Well-Architected Framework. Use this skill to evaluate a workload, identify reliability requirements, and provide actionable recommendations for build, deploy, and manage the workload reliably in Google Cloud. +--- + +# Google Cloud Well-Architected Framework skill for the Reliability pillar + +## Overview + +The Reliability pillar of the Google Cloud Well-Architected Framework provides +principles and recommendations to help you design, deploy, and manage reliable, +resilient, and highly available workloads in Google Cloud. A reliable system +consistently performs its intended functions under defined conditions, is +resilient to failures, and recovers gracefully from disruptions, thereby +minimizing downtime, enhancing user experience, and ensuring data integrity. + +## Core principles + +The recommendations in the reliability pillar of the Well-Architected Framework +are aligned with the following core principles: + +- **Define reliability based on user-experience goals**: Measurement of + reliability should reflect the actual experience of the system's users rather + than merely relying on infrastructure metrics. Focus on outcomes that matter + most to users. Grounding document: + https://docs.cloud.google.com/architecture/framework/reliability/define-reliability-based-on-user-experience-goals + +- **Set realistic targets for reliability**: Determine appropriate Service + Level Objectives (SLOs) that balance the cost and complexity of maximizing + availability against business requirements. Utilize error budgets to manage + feature velocity. Grounding document: + https://docs.cloud.google.com/architecture/framework/reliability/set-targets + +- **Build highly available systems through resource redundancy**: Eliminate + single points of failure by duplicating critical components across zones and + regions to maintain operations during localized outages. Grounding document: + https://docs.cloud.google.com/architecture/framework/reliability/build-highly-available-systems + +- **Take advantage of horizontal scalability**: Design system architectures to + scale horizontally (adding more instances) to seamlessly accommodate load + fluctuations and improve overall fault tolerance. Grounding document: + https://docs.cloud.google.com/architecture/framework/reliability/horizontal-scalability + +- **Detect potential failures by using observability**: Implement thorough + monitoring, logging, and alerting systems to proactively detect, diagnose, + and address anomalies before they cause user-facing issues. Grounding + document: + https://docs.cloud.google.com/architecture/framework/reliability/observability + +- **Design for graceful degradation**: Architect systems to maintain critical + functionality, even if at reduced performance or with limited features, when + dependencies fail or the system experiences extreme stress. Grounding + document: + https://docs.cloud.google.com/architecture/framework/reliability/graceful-degradation + +- **Perform testing for recovery from failures**: Build confidence in system + resilience by continuously simulating failures and verifying the + effectiveness of automated and manual recovery procedures. Grounding + document: + https://docs.cloud.google.com/architecture/framework/reliability/perform-testing-for-recovery-from-failures + +- **Perform testing for recovery from data loss**: Regularly test backup and + restore protocols to ensure rapid recovery from data corruption or loss, + remaining within the defined Recovery Time Objective (RTO) and Recovery Point + Objective (RPO). Grounding document: + https://docs.cloud.google.com/architecture/framework/reliability/perform-testing-for-recovery-from-data-loss + +- **Conduct thorough postmortems**: Foster a blameless culture by investigating + outages comprehensively to understand root causes, followed by implementing + measures that prevent recurrence. Grounding document: + https://docs.cloud.google.com/architecture/framework/reliability/conduct-postmortems + +## Relevant Google Cloud products + +The following are _examples_ of Google Cloud products and features that are +relevant to reliability: + +- **Compute**: Compute Engine Managed Instance Groups (MIGs), Google Kubernetes + Engine (GKE), Cloud Run +- **Networking**: Cloud Load Balancing, Cloud CDN, Cloud DNS +- **Storage and databases**: Cloud Storage (multi-region), Cloud SQL High + Availability, Spanner, Filestore, Firestore +- **Operations**: Cloud Monitoring, Cloud Logging, Google Cloud Managed Service + for Prometheus +- **Disaster recovery**: Backup and DR Service, Filestore backups + +## Workload assessment questions + +Ask appropriate questions to understand the reliability-related requirements and +constraints of the workload and the user's organization. Choose questions from +the following list: + +- How does your organization define and measure the reliability of your systems + in relation to user experience? +- How does your organization approach setting reliability targets for your + services? +- What is your organization's strategy for ensuring high availability through + resource redundancy? +- How does your organization leverage horizontal scalability to maintain + performance and reliability? +- How does your organization utilize observability (metrics, logs, traces) to + gain insights and detect potential failures? +- How does your organization manage alerting based on observability data to + ensure timely responses to significant issues without causing alert fatigue? +- What measures does your organization take to ensure systems can gracefully + degrade during high load or partial failures? +- How frequently and comprehensively does your organization test for recovery + from system failures (e.g., regional failovers, release rollbacks)? +- What is your organization's approach to testing for recovery from data loss? +- How does your organization conduct and utilize postmortems after incidents? + +## Validation checklist + +Use the following checklist to evaluate the architecture's alignment with +reliability recommendations: + +- User-focused SLIs and SLOs are explicitly defined and actively monitored. +- The architecture avoids single points of failure through cross-zone or + cross-region redundancy. +- Autoscaling is enabled to handle variable demand without manual intervention. +- Application and infrastructure health checks are configured to trigger + automated failovers. +- Regular backup schedules are in place, and restoration processes are routinely + tested. +- The system architecture incorporates patterns like circuit breakers, retries + with exponential backoff, and rate limiting to support graceful degradation. +- Game days or chaos engineering practices are regularly held to validate + failure recovery. +- A formalized, blameless postmortem process exists to ensure organizational + learning from operational incidents. diff --git a/.agents/skills/google-cloud-waf-security/SKILL.md b/.agents/skills/google-cloud-waf-security/SKILL.md new file mode 100644 index 0000000..b95840c --- /dev/null +++ b/.agents/skills/google-cloud-waf-security/SKILL.md @@ -0,0 +1,309 @@ +--- +name: google-cloud-waf-security +description: Generates security-focused guidance for Google Cloud workloads based on the design principles and recommendations in the Google Cloud Well-Architected Framework (WAF). Use this skill to evaluate a workload, identify security requirements, and provide actionable recommendations for IAM, network security, data protection, and operational security. +--- + +# Google Cloud Well-Architected Framework skill for the Security pillar + +## Overview + +The security pillar of the Google Cloud Well-Architected Framework provides +design principles and best practices for building a robust security posture by +integrating security into every layer of the architecture for cloud workloads. +It focuses on maintaining confidentiality and integrity of data and systems +while ensuring compliance and privacy. It provides a structured approach to risk +management, threat defense, and identity control, enabling you to operate cloud +workloads securely and at scale. + +## Core principles + +The recommendations in the security pillar of the Well-Architected Framework are +aligned with the following core principles: + +- **Implement security by design**: Integrate cloud security and network + security considerations starting from the initial design phase of your + applications and infrastructure. Google Cloud provides architecture + blueprints and recommendations to help you apply this principle. Grounding + document: + https://docs.cloud.google.com/architecture/framework/security/implement-security-by-design + +- **Implement zero trust**: Use a _never trust, always verify_ approach, where + access to resources is granted based on continuous verification of trust. + Google Cloud supports this principle through products like Chrome Enterprise + Premium and Identity-Aware Proxy (IAP). Grounding document: + https://docs.cloud.google.com/architecture/framework/security/implement-zero-trust + +- **Implement shift-left security**: Implement security controls early in the + software development lifecycle. Avoid security defects before system changes + are made. Detect and fix security bugs early, fast, and reliably after the + system changes are committed. Google Cloud supports this principle through + products like Cloud Build, Binary Authorization, and Artifact Registry. + Grounding document: + https://docs.cloud.google.com/architecture/framework/security/implement-shift-left-security + +- **Implement preemptive cyber defense**: Adopt a proactive approach to + security by implementing robust fundamental measures like threat + intelligence. This approach helps you build a foundation for more effective + threat detection and response. Google Cloud's approach to layered security + controls aligns with this principle. Google Cloud supports this principle + through products like Security Command Center, Google Threat Intelligence, + and Google SecOps. Grounding document: + https://docs.cloud.google.com/architecture/framework/security/implement-preemptive-cyber-defense + +- **Use AI securely and responsibly**: Develop and deploy AI systems in a + responsible and secure manner. The recommendations for this principle are + aligned with guidance in the AI and ML perspective of the Well-Architected + Framework and in Google's Secure AI Framework (SAIF). Grounding document: + https://docs.cloud.google.com/architecture/framework/security/use-ai-securely-and-responsibly + +- **Use AI for security**: Use AI capabilities to improve your existing + security systems and processes through Gemini in Security and overall + platform-security capabilities. Use AI as a tool to increase the automation + of remedial work and ensure security hygiene to make other systems more + secure. Google Cloud supports this principle through products like Google + Threat Intelligence and Google SecOps. Grounding document: + https://docs.cloud.google.com/architecture/framework/security/use-ai-for-security + +- **Meet regulatory, compliance, and privacy needs**: Adhere to + industry-specific regulations, compliance standards, and privacy + requirements. Google Cloud helps you meet these obligations through products + like Assured Workloads, Organization Policy Service, and our compliance + resource center. Grounding document: + https://docs.cloud.google.com/architecture/framework/security/meet-regulatory-compliance-and-privacy-needs + +## Relevant Google Cloud products + +The following are _examples_ of Google Cloud products and features that are +relevant to security: + +- **Identity and access management** + + - **Identity and Access Management (IAM)**: Fine-grained access control for + Google Cloud resources. + - **Identity-Aware Proxy (IAP)**: Secure access to applications without a VPN. + - **Chrome Enterprise Premium**: Endpoint security and context-aware access. + +- **Network security** + + - **Google Cloud Armor**: DDoS protection and Web Application Firewall (WAF). + - **VPC Service Controls**: Define security perimeters to prevent data + exfiltration. + - **Cloud Next-Generation Firewall (NGFW)**: Advanced threat protection for + network traffic. + - **Shared VPC**: Centralized network management across projects. + - **Cloud Interconnect and IPsec VPN**: Secure, private connectivity. + +- **Data security** + + - **Cloud Key Management Service (KMS)**: Manage encryption keys. + - **Sensitive Data Protection (formerly Cloud DLP)**: Discover and redact + sensitive data. + - **Confidential Computing**: Encrypt data in use (memory). + +- **Security operations (SecOps)** + + - **Google SecOps (Chronicle)**: Threat detection and security analytics. + - **Security Command Center (SCC)**: Centralized vulnerability and threat + management. + - **Cloud Logging and Cloud Monitoring**: Visibility into system activity. + +- **Automation and supply chain** + + - **Cloud Build**: Secure CI/CD pipelines. + - **Artifact Analysis**: Vulnerability scanning for container images. + - **Binary Authorization**: Deploy-time policy enforcement. + - **Assured open source software**: Use secured OSS packages. + +## Workload assessment questions + +Ask appropriate questions to understand the security-related requirements and +constraints of the workload and the user's organization. Choose questions from +the following list: + +- **Security by design**: + + - How do you incorporate security considerations into your project's initial + planning and design phases? + - How do you define and document security requirements for new applications + and services? + - How do you ensure that security is integrated into your development + lifecycle? + - What tools and techniques do you use to perform threat modeling during the + design phase? + - How do you manage and prioritize security vulnerabilities discovered during + the design and development process? + - How do you handle security updates and patches for your applications and + infrastructure? + - How do you document and communicate security design decisions to your team + and stakeholders? + - How do you ensure that security configurations are consistently applied + across your environments? + - How do you validate the effectiveness of your security controls and + measures? + - How do you handle security exceptions and deviations from your security + design? + +- **Zero trust**: + + - How do you verify and authenticate users and devices accessing your Google + Cloud resources? + - How do you implement the principle of least privilege for access control? + - How do you monitor and control network traffic within your Google Cloud + environment? + - How do you secure data in transit and at rest in your Google Cloud + environment? + - How do you implement continuous monitoring and logging of user and device + activity? + - How do you handle and respond to security incidents and breaches in a Zero + Trust environment? + - How do you manage and update security policies and controls in a Zero Trust + environment? + - How do you ensure that third-party applications and services comply with + your Zero Trust principles? + - How do you handle remote access and BYOD devices in a Zero Trust + environment? + - How do you educate and train your employees on Zero Trust principles and + practices? + +- **Shift-left security**: + + - How do you integrate security testing into your development pipeline early + in the process? + - What types of security testing do you perform during the development phase? + - How do you provide developers with feedback on security vulnerabilities and + best practices? + - How do you empower developers to take ownership of security in their code? + - How do you ensure that security requirements are clearly defined and + communicated to developers? + - How do you measure the effectiveness of your Shift Left security + initiatives? + - How do you handle security dependencies and third-party libraries in your + code? + - How do you manage and update security configurations in your development + environment? + - How do you handle security exceptions and deviations from your security + policies in development? + - How do you promote a culture of security awareness and responsibility among + developers? + +- **Preemptive cyber defense**: + + - How do you proactively identify and mitigate potential security threats + before they impact your systems? + - What tools and techniques do you use for continuous security monitoring and + analysis? + - How do you respond to and remediate security alerts and incidents? + - How do you simulate and test your incident response plans? + - How do you stay up-to-date with the latest security threats and + vulnerabilities? + - How do you handle and mitigate DDoS attacks against your applications and + services? + - How do you protect your sensitive data from insider threats? + - How do you ensure that your security controls are effective against advanced + persistent threats (APTs)? + - How do you handle security vulnerabilities in your supply chain? + - How do you adapt your security posture to evolving threats and technologies? + +- **Security of AI workloads**: + + - How do you ensure the security of your AI models and data? + - How do you address potential biases and ethical concerns in your AI models? + - How do you protect your AI models from adversarial attacks and data + poisoning? + - How do you ensure the privacy of data used in your AI models? + - How do you explain and interpret the decisions made by your AI models? + - How do you manage and control access to your AI models and data? + - How do you ensure compliance with regulations and standards related to + AI and ML? + - How do you monitor and detect anomalies in the behavior of your AI models? + - How do you handle and respond to security incidents involving your AI + models? + - How do you educate and train your employees on the secure and responsible + use of AI and ML? + +- **AI for security**: + + - How do you leverage AI and ML to enhance your security posture? + - What types of AI models do you use for security purposes? + - How do you train and validate your AI models for security applications? + - How do you ensure the accuracy and reliability of AI-based security + systems? + - How do you handle false positives and false negatives from AI-based + security systems? + - How do you integrate AI-based security systems with your existing security + infrastructure? + - How do you manage and update your AI models for security applications? + - How do you explain and interpret the decisions made by your AI models for + security applications? + - How do you ensure the ethical and responsible use of AI and ML for security + purposes? + - How do you measure the effectiveness of AI and ML in improving your security + posture? + +- **Regulatory compliance and privacy**: + + - What regulatory compliance frameworks and privacy standards do you need to + adhere to? + - How do you assess and manage compliance risks in your Google Cloud + environment? + - How do you ensure the privacy of sensitive data stored and processed in + Google Cloud? + - How do you handle data subject requests (DSRs) related to privacy + regulations? + - How do you document and track compliance activities and evidence? + - How do you ensure that third-party vendors and partners comply with your + regulatory and privacy requirements? + - How do you handle data breaches and security incidents related to compliance + regulations? + - How do you stay up-to-date with changes in regulatory compliance and privacy + standards? + - How do you educate and train your employees on regulatory compliance and + privacy requirements? + - How do you demonstrate and prove compliance to auditors and regulators? + +## Validation checklist + +Use the following checklist to evaluate the architecture's alignment with +security recommendations: + +- **Security by design**: + + - Are system components selected based on their security features and + hardening? + - Is defense-in-depth implemented at the network, host, and application + layers? + - Are safe libraries and application frameworks used to prevent common + vulnerabilities? + - Is a risk assessment performed using industry standards? + +- **Zero trust**: + + - Is access control enforced based on user identity and context (device, + location)? + - Are private connectivity methods (Cloud Interconnect, VPN) used for internal + traffic? + - Are default networks disabled in all projects? + - Are VPC Service Controls perimeters established around sensitive data? + +- **Shift-left security**: + + - Is infrastructure provisioned using Infrastructure as Code + (e.g., Terraform)? + - Are automated security scans integrated into the CI/CD pipeline? + - Is there a process for scanning and patching vulnerabilities in + dependencies? + - Is Binary Authorization used to ensure only trusted images are deployed? + +- **Preemptive cyber defense**: + + - Is threat intelligence integrated into security operations? + - Is security logging enabled and centralized for all critical resources? + - Are automated responses configured for common security threats? + - Are defenses validated through periodic testing or red-teaming? + +- **AI security and governance**: + + - Are AI pipelines secured against tampering and data poisoning? + - Is differential privacy or data masking used for training data where + appropriate? + - Are Vertex Explainable AI and fairness indicators used for model governance? diff --git a/.agents/skills/google-cloud-waf-sustainability/SKILL.md b/.agents/skills/google-cloud-waf-sustainability/SKILL.md new file mode 100644 index 0000000..69ae6e6 --- /dev/null +++ b/.agents/skills/google-cloud-waf-sustainability/SKILL.md @@ -0,0 +1,251 @@ +--- +name: google-cloud-waf-sustainability +description: >- + Generates sustainability-focused guidance for Google Cloud workloads based on + the design principles and recommendations in the Google Cloud Well-Architected + Framework (WAF). Use this skill to evaluate a workload, identify environmental + impact requirements, and provide actionable recommendations to build, deploy, + and manage the workload sustainably in Google Cloud. +--- + +# Google Cloud Well-Architected Framework skill for the Sustainability pillar + +## Overview + +The Sustainability pillar of the Google Cloud Well-Architected Framework +provides principles and recommendations to help you minimize the environmental +impact of your cloud workloads. It focuses on a shared responsibility +model—Google optimizes the sustainability *of* the cloud, while customers +optimize sustainability *in* the cloud. By making informed decisions about +architecture, resource allocation, and region selection, you can significantly +reduce your carbon footprint and improve overall energy efficiency. + +## Core principles + +The recommendations in the sustainability pillar of the Well-Architected +Framework are aligned with the following core principles: + +- **Shared responsibility**: Define the boundaries of responsibility and + embrace a shared fate model, working with your cloud provider and partners + to achieve optimal environmental outcomes for the entire ecosystem. + Grounding document: + https://docs.cloud.google.com/architecture/framework/sustainability#shared-responsibility + +- **Use regions that consume low-carbon energy**: Prioritize Google Cloud + regions with a high percentage of Carbon-Free Energy (CFE) and "Low CO2" + indicators to lower the gross carbon emissions of your deployments. + Grounding document: + https://docs.cloud.google.com/architecture/framework/sustainability/low-carbon-regions + +- **Optimize AI and ML workloads**: Maximize computations per watt by matching + algorithmic needs to specialized hardware (like TPUs) and applying + mathematical techniques to reduce computational complexity. Grounding + document: + https://docs.cloud.google.com/architecture/framework/sustainability/ai-ml-energy-efficiency + +- **Optimize resource usage**: Eliminate energy waste by scaling resources to + zero when idle, rightsizing virtual machines, and prioritizing managed + services that dynamically match actual demand. Grounding document: + https://docs.cloud.google.com/architecture/framework/sustainability/optimize-resource-usage + +- **Develop energy-efficient software**: Design your applications to minimize + unnecessary CPU, memory, and network activity on both backend servers and + end-user devices by using event-driven logic and optimized assets. Grounding + document: + https://docs.cloud.google.com/architecture/framework/sustainability/energy-efficient-software + +- **Optimize data and storage**: Reduce the environmental footprint of your + storage by implementing lifecycle management to archive cold data and + eliminating "dark data" that provides no business value. Grounding document: + https://docs.cloud.google.com/architecture/framework/sustainability/optimize-storage + +- **Continuously measure and improve**: Gain visibility into your carbon + emissions by analyzing granular data, identifying hotspots, and taking + proactive steps to remediate inefficiencies. Grounding document: + https://docs.cloud.google.com/architecture/framework/sustainability/continuously-measure-improve + +- **Promote a culture of sustainability**: Embed sustainability into your + organizational governance, connect technical decisions to environmental + goals, and ensure staff have the skills to implement green practices. + Grounding document: + https://docs.cloud.google.com/architecture/framework/sustainability/culture + +- **Align sustainability practices with industry guidelines**: Ensure that + your sustainability initiatives are aligned with industry guidelines for + measurement, reporting, and verification, such as W3C Web Sustainability + Guidelines, Green Software Foundation, and Greenhouse Gas Protocol. + Grounding document: + https://docs.cloud.google.com/architecture/framework/sustainability/industry-guidelines + +## Relevant Google Cloud products + +The following are _examples_ of Google Cloud products and features that are +relevant to sustainability: + +- **Visibility and measurement**: + - **Carbon Footprint**: Provides dashboard visibility into greenhouse gas + emissions associated with Google Cloud usage. + - **BigQuery**: Analyzes exported Carbon Footprint data alongside billing + data to identify emission hotspots. + +- **Infrastructure and operations**: + - **Google Cloud Region Picker**: Helps weigh carbon footprint, cost, and + latency when selecting deployment locations. + - **Active Assist / Recommender**: Automatically identifies idle resources + and provides VM rightsizing recommendations to reduce waste. + - **Cloud Run / GKE Autopilot**: Fully managed compute environments that + optimize cluster usage and can scale to zero when idle. + - **Cloud Batch**: Optimizes the scheduling of batch jobs, allowing + execution during periods of high Carbon-Free Energy. + - **Spot VMs**: Utilizes unused data center capacity for fault-tolerant + workloads, improving overall hardware efficiency. + +- **Data and AI**: + - **Cloud Storage Lifecycle Management**: Automatically transitions older + data to lower-energy storage classes (Nearline, Coldline, Archive). + - **Cloud TPUs**: Specialized hardware optimized for the energy efficiency + of large-scale AI/ML matrix multiplications. + +## Workload assessment questions + +Ask appropriate questions to understand the sustainability-related requirements +and constraints of the workload and the user's organization. Choose questions +from the following list: + +- **Cloud sustainability**: + - How do you define the boundaries of sustainability responsibility + between your organization and your cloud provider? + - How do you leverage cloud capabilities and AI to drive sustainability + outcomes for your broader business operations? + - How does your cloud strategy account for the sustainability impact of + your partner ecosystem and multi-cloud environments? + +- **Use regions that consume low-carbon energy**: + - How do you incorporate carbon intensity into your Google Cloud region + selection strategy? + +- **Optimize AI and ML workloads**: + - How do you optimize the energy efficiency of your AI and machine + learning lifecycles? + +- **Optimize resource usage**: + - How do you ensure your infrastructure footprint dynamically matches + actual workload demand? + - How do you select and maintain the hardware types used for your cloud + workloads? + - What is your strategy for handling non-urgent or compute-intensive + background tasks? + - How do you balance the need for high availability and disaster recovery + with sustainability? + +- **Develop energy-efficient software**: + - How do you ensure your backend logic minimizes unnecessary CPU, memory, + and network activity? + - How do you manage the overall efficiency and maintenance of your + codebase for sustainability? + - How do you minimize the data volume and processing load that your + application places on end-user devices? + - How does your user experience (UX) design contribute to energy + efficiency for the end user? + +- **Optimize data and storage**: + - What process do you have for managing the environmental footprint of + your data and storage? + +- **Continuously measure and improve**: + - How do you analyze your carbon data to prioritize optimization efforts? + - How is sustainability measurement embedded into your organization’s + governance and culture? + - What is your current process for gaining visibility into your + cloud-related carbon emissions? + - What proactive steps do you take to remediate identified carbon + hotspots? + +- **Promote a culture of sustainability**: + - How do you connect individual technical decisions to the organization's + mission and hold teams accountable for results? + - How do you ensure your technical and business staff have the specific + skills required to implement sustainability practices? + +## Validation checklist + +Use the following checklist to evaluate the architecture's alignment with +sustainability recommendations: + +- **Cloud sustainability**: + - [ ] The organization embraces a shared responsibility and shared fate + model for sustainability. + - [ ] AI is used as a catalyst for profitability and resilience to + streamline operations, or sustainability is integrated into the design + process to create positive feedback loops. + - [ ] Collaborations with sustainable partners are prioritized and + multi-cloud data portability is leveraged, or internal practices align + with recognized global standards like the Green Software Foundation. + +- **Use regions that consume low-carbon energy**: + - [ ] A data-driven policy prioritizes regions with high Carbon-Free + Energy (CFE%) and "Low CO2" indicators, or the Google Cloud Region + Picker is actively used to balance carbon footprint with cost and + latency. + +- **Optimize AI and ML workloads**: + - [ ] Algorithmic needs are matched to specialized hardware (TPUs) to + maximize computations per watt, or mathematical techniques like model + compression and PEFT are applied to reduce computational complexity. + +- **Optimize resource usage**: + - [ ] Fully managed services that scale to zero when idle are utilized, or + Horizontal Pod Autoscaling (HPA) and Vertical Pod Autoscaling (VPA) are + used in GKE to prevent over-provisioning. + - [ ] A formal process exists to upgrade to the newest machine types for + improved performance-per-watt, or workloads are actively matched to + specialized machine families. + - [ ] Batch jobs are proactively scheduled to run during periods or in + regions with the highest proportion of CFE, or Spot VMs are utilized for + non-critical batch jobs. + - [ ] "Cold DR" or serverless failover is prioritized to ensure secondary + regions remain at zero energy consumption until an event occurs, or + Infrastructure as Code (IaC) is used to rapidly provision a recovery + environment only when needed. + +- **Develop energy-efficient software**: + - [ ] Resource-intensive busy loops or constant polling are replaced with + event-driven logic, or algorithms with optimal time complexity and data + structures are prioritized. + - [ ] The "Don't Repeat Yourself" (DRY) principle is adhered to with + regular refactoring, or intelligent caching (e.g., Memorystore) is + implemented with smart eviction policies. + - [ ] The download size of website products is measured and maintained + against a strict budget, or CI/CD pipelines automate the minimization + and compression of HTML, CSS, and JS files. + - [ ] Static sites or Progressive Web Apps (PWAs) are preferred for faster + loading, or DOM manipulation is minimized to reduce device power + consumption. + +- **Optimize data and storage**: + - [ ] Object Lifecycle Management is used to automatically move cold data + to Archive storage, or discovery techniques (e.g., Dataplex) are used to + identify and eliminate "dark data". + +- **Continuously measure and improve**: + - [ ] Carbon data is analyzed by project, region, and service to identify + gross emitters, or carbon data is joined with Billing data in BigQuery + to correlate cost and environmental impact. + - [ ] A formal GreenOps function defines accountability for carbon + reduction targets, or verified Carbon Footprint data from BigQuery + supports formal ESG disclosures. + - [ ] Applications are instrumented to measure the specific carbon + intensity of software features, or automated exports of Carbon Footprint + data to BigQuery are configured for deep analysis. + - [ ] The unattended project recommender and Active Assist are regularly + used to decommission idle resources, or proactive projects re-architect + hotspots by shifting workloads to low-carbon regions. + +- **Promote a culture of sustainability**: + - [ ] Abstract carbon metrics are transformed into tangible progress + indicators in annual reports, or sustainability is treated as a + first-class technical requirement (NFR) tied to KPIs and performance + reviews. + - [ ] Training tailored to specific job roles (e.g., developers on code + efficiency, FinOps on carbon unit economics) is provided, or teams are + formally trained to access and interpret carbon footprint data. diff --git a/README.md b/README.md index 5c58957..0471ac8 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,144 @@ -# googlecloudhack — Evidence-Driven DBRE Agent +# Evidence-Driven DBRE Agent -A Gemini-powered MongoDB performance engineer: detects slow queries, proposes ESR-correct -indexes from real `explain` evidence, gates `apply` behind human approval, verifies the -result, and ships a hashed evidence pack plus an internal event ledger for every fix. +> A safe MongoDB index-remediation control plane. It turns a slow query into a +> measured, human-approved index fix — and never lets automation touch the +> database on its own. -> **Status:** Day-5 — live Cloud Run demo with split Agent Engine diagnosis roles, -> deterministic validation, human-gated apply/verify, and Evidence Ledger collections. +A Gemini-powered MongoDB performance engineer: it detects a blocking-sort slow query, +proposes the **ESR-correct** index from real `explain` evidence, gates the `apply` +behind a **hash-bound human approval**, verifies the fix by re-measuring, and records a +signed `EvidencePack` plus an internal event ledger for every run. + +**Live demo:** https://gcrah-dashboard-2vbnam7yma-uc.a.run.app + +--- + +## The core rule + +``` +Agents recommend. read-only reasoning — never decide, apply, or verify +Deterministic code decides. ESR winner · evidence hash · phase gates +Humans approve. a specific evidence hash, before any mutation +Verification proves. re-explain — VERIFIED only on measured improvement +``` + +This separation of authority is the whole point: an agent can be wrong, and nothing +breaks — deterministic code and a human stand between any recommendation and the database. ## Architecture -Five demo stages (Detect → Diagnose → Test → Approve → Verify) over a deterministic -three-phase safety engine (DIAGNOSE → APPROVE → VERIFY). +``` +Operator dashboard (Next.js) + │ reads EvidencePack JSON only + ▼ +FastAPI on Cloud Run reads public · writes token-gated (Secret Manager) + │ + ├─ Diagnose / Candidate / Rationale roles (Vertex AI Agent Engine, read-only) + │ read-only Mongo tools: + │ explain_slow_query · diagnose_candidate + │ compare_candidate_indexes · rationalize_recommendation + ▼ +Deterministic Python controller ESR winner · evidence hash · phase gates · apply · verify + │ + ▼ +MongoDB Atlas (target) + Evidence Ledger + slow_queries · candidates · experiments · decisions + evidence_packs · approvals · applications · verifications +``` + +Five operator stages (**Detect → Diagnose → Test → Approve → Verify**) sit over a +deterministic three-phase safety engine (**DIAGNOSE → APPROVE → VERIFY**). -Current production path: +`POST /run` creates a **gated, read-only run**; mutation is impossible until the operator +approves a matching `EvidencePack` hash. `POST /packs/{run_id}/decision` is the only path +that can issue the one-time apply. -```text -Dashboard -> FastAPI Cloud Run (opens approval gate) - -> Diagnose Agent Engine -> Candidate Agent Engine -> Rationale Agent Engine - -> deterministic controller validates + emits DIAGNOSED EvidencePack - -> /packs/{id}/decision hash-bound approval ticket - -> apply + verify +## Golden path + +``` +slow query → explain → diagnose the blocking sort → compare candidate indexes + → deterministic ESR winner → EvidencePack (bound evidence hash) + → human approves that exact hash → backend applies the index + → re-explain verification → ledger trace ``` -The three Agent Engine resources perform read-only Mongo diagnosis, candidate testing, -and rationale generation with Python-native tools. -Deterministic Python remains the safety authority: it recomputes the ESR winner, evidence -hash, phase transitions, index apply, and verification. `/run` opens the approval gate -before diagnosis and remains read-only; `/packs/{run_id}/decision` is the only path that -can issue the one-time approval ticket required for mutation. +A run is **VERIFIED** only when *all* hold (deterministic, backend-side): -The dashboard reads only `EvidencePack` JSON, including `approval_gate` state and -`agent_trace` proof that the gate opened before Agent Engine participation. Internally, -MongoDB persists ledger collections for `slow_queries`, `candidates`, `experiments`, -`decisions`, `evidence_packs`, `approvals`, `applications`, and `verifications`. +1. the blocking `SORT` is gone in the after-plan, +2. the recommended index is evidenced in the winning/hinted plan, +3. at least one metric improves (docs examined, keys examined, or millis), +4. the result is written to the trace/ledger. -## Quickstart +Otherwise the run stays **APPROVED** (applied-but-not-verified) with the failure recorded. +The frontend never marks a run VERIFIED. + +## Quick start + +**Dashboard (Next.js):** + +```bash +cd dashboard +npm install +npm run dev # http://localhost:7777 (set PORT as needed) +``` + +With no API configured, the dashboard runs in a clearly-labeled **simulation** mode +(read-only, never shown as live). Point it at a live backend with `API_URL`. + +**Backend + tests (Python, uv):** ```bash uv sync --dev -cp .env.example .env # fill GCP + MongoDB values +cp .env.example .env # fill Google Cloud project + MongoDB values uv run pytest -q ``` +## Dashboard routes + +| Route | Purpose | +|---|---| +| `/` | overview — fleet status + run list | +| `/runs/[run_id]` | single-run review — approval gate, before/after explain diff, trace | +| `/run-review?run_id=…` | legacy alias for the run view | +| `/system-map` | architecture + control-plane + oversight map | +| `/history` | run history & compare | +| `/audit` | approval / verification / policy audit trail | +| `/intake` | intake + API gateway | + +## API + +| Method · Route | Auth | Purpose | +|---|---|---| +| `GET /packs` | public | list EvidencePacks | +| `GET /packs/{run_id}` | public | fetch one EvidencePack | +| `POST /run` | token | create a gated, read-only DIAGNOSED run | +| `POST /packs/{run_id}/decision` | token | hash-bound approve / reject | + +The dashboard reaches the write endpoints only through its own **same-origin server +proxy** — the token stays server-side (Secret Manager) and never reaches the client bundle. + +## Safety boundaries + +- No `EvidencePack v1` schema changes, tool renames, or new API routes without approval. +- Agents and tools are **read-only**; no credentials in the frontend. +- Mutation is **backend-only**, after a matching hash-bound human approval. +- The frontend never marks a run `VERIFIED`. + +See [`docs/safety-boundary-decisions.md`](docs/safety-boundary-decisions.md) for deferred +governance decisions, and [`PROJECT_PROMPT.md`](PROJECT_PROMPT.md) for the durable brief. + +## Tech stack + +Next.js · FastAPI · Vertex AI Agent Engine (Gemini) · MongoDB Atlas · Google Cloud Run · +Secret Manager · `uv` + pytest · vitest. + +## Status + +- **Layers 1–4** — multipage operator console, intake, control-plane + oversight map, and hash-bound approval UI (shipped, PR #99). +- **Layers 5–8** — three-check verification rail + tests, live-run navigation + labeled simulation fallback, gate-sourced audit trail, governance record (shipped, PR #100). +- Deployed on Cloud Run (dashboard + read API). The dashboard runs under a dedicated, + least-privilege service account with a Secret-Manager-sourced token. + ## License Apache-2.0 — see [`LICENSE`](LICENSE). diff --git a/skills-lock.json b/skills-lock.json new file mode 100644 index 0000000..c85f853 --- /dev/null +++ b/skills-lock.json @@ -0,0 +1,185 @@ +{ + "version": 1, + "skills": { + "agent-platform-deploy": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-deploy/SKILL.md", + "computedHash": "0d10be9da7ac2f87756aac4d69fb052d94d12df531bf371df4391d7e98dfad33" + }, + "agent-platform-endpoint-management": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-endpoint-management/SKILL.md", + "computedHash": "2072e2b424da0b648f49ada0b67c173e906e396de3f81f70ff06894b60b441d4" + }, + "agent-platform-eval-flywheel": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-eval-flywheel/SKILL.md", + "computedHash": "5718c83a8f199f23bc03d846138e25b04c19ef140868c6ad0a89e2b4128f9c84" + }, + "agent-platform-inference": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-inference/SKILL.md", + "computedHash": "77eefc97e778497e22fd1fa14ce02bfd99c36e802cf8823f7a85cf32e95397c9" + }, + "agent-platform-migrate-from-ai-studio": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-migrate-from-ai-studio/SKILL.md", + "computedHash": "e445c04c040e5d0276482c2f1ed6d47e6e895f87e86f47175a7311efcd953862" + }, + "agent-platform-model-registry": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-model-registry/SKILL.md", + "computedHash": "42aed32dd50592c0212ce29f1a2e0fd95d675f83fd20ba2ec65fafb405084fd9" + }, + "agent-platform-prompt-management": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-prompt-management/SKILL.md", + "computedHash": "642230db6d485a4b5daee99381da42d7a35f0bfdfd5f3e3df5443d3717c47dff" + }, + "agent-platform-rag-engine-management": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-rag-engine-management/SKILL.md", + "computedHash": "b10d6bc3e86c5ccc27de2f905d2ce5b2ab5e52ac3862819aeac0647963ec8e27" + }, + "agent-platform-skill-registry": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-skill-registry/SKILL.md", + "computedHash": "de229d00a9578fe54da917d4dc93547a3456333eda4b3e69b2231a2bb4a1ff76" + }, + "agent-platform-tuning": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-tuning/SKILL.md", + "computedHash": "3cb2fb673e135f9953ba97ae6fab19acf229c289d57a85a1bde2b542e95e7349" + }, + "agent-platform-tuning-management": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/agent-platform-tuning-management/SKILL.md", + "computedHash": "0cf859127a60a6673dd3e541f297dcee9d391928c11aac3241a5cabf7520dfd9" + }, + "alloydb-basics": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/alloydb-basics/SKILL.md", + "computedHash": "7d3a91a8163ddea4fb4024063601229b97cb580a27cafdb86c0f9e464243880c" + }, + "bigquery-basics": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/bigquery-basics/SKILL.md", + "computedHash": "1dbcda8c9622ea3cf05eefb6585a032285913df055bd5a0d8cab7b94eea5380c" + }, + "cloud-run-basics": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/cloud-run-basics/SKILL.md", + "computedHash": "d04d21b683f404fcd6c3b798095c0399d61e5e9d84cb913da46a09e0aa90c708" + }, + "cloud-sql-basics": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/cloud-sql-basics/SKILL.md", + "computedHash": "09cfa905a960ef0c416d57fb7830cafeec554d8093341861abbdf2f9f36c440c" + }, + "firebase-basics": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/firebase-basics/SKILL.md", + "computedHash": "c545713c52f98a4d73958a476ce43281d4b14e1f457b4a82f16dea71cdf38d8a" + }, + "gcloud": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/gcloud/SKILL.md", + "computedHash": "95e957603ab73e3a80c1ba5932d992c186ae06cb1b5e575e9206c2deb478c88d" + }, + "gemini-agents-api": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/gemini-agents-api/SKILL.md", + "computedHash": "1cc8a7861a58b26a5453fcaa91e4e32a11135d68209e3d20dde0e575f32f4336" + }, + "gemini-api": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/gemini-api/SKILL.md", + "computedHash": "612d8acb8467399242633b4cbae67334e37c113f72f6fc9cc5e5a79c2551cff7" + }, + "gemini-interactions-api": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/gemini-interactions-api/SKILL.md", + "computedHash": "6bd649a88b6197a9fe2893faa8ebd9195abcffd2981624723aec7596388c0b1e" + }, + "gke-basics": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/gke-basics/SKILL.md", + "computedHash": "cf263baa6d3007da6f5e4367083f1aa424858ec8b14a1a34538fea2c775cad40" + }, + "google-cloud-networking-observability": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/google-cloud-networking-observability/SKILL.md", + "computedHash": "14c2509b0d6d34de1bb3da7e0280da2aea683e4a25dd361810f27bf1e6d6cc6a" + }, + "google-cloud-recipe-auth": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/google-cloud-recipe-auth/SKILL.md", + "computedHash": "ab6de3028e6bbf55e7361ac16af3df9fa45b668d2183c5479d57773abac35c37" + }, + "google-cloud-recipe-onboarding": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/google-cloud-recipe-onboarding/SKILL.md", + "computedHash": "bb7f79a0f9fc51242cec084471a34814ed01412c46c16772a734dd7a32ed59c3" + }, + "google-cloud-waf-cost-optimization": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/google-cloud-waf-cost-optimization/SKILL.md", + "computedHash": "d870a9e246f8e38c05a44edfaef0a73e61b137782243ebe32cc383dc7b32dfe9" + }, + "google-cloud-waf-operational-excellence": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/google-cloud-waf-operational-excellence/SKILL.md", + "computedHash": "da9f819169cfb6238731cfd02f74b85340928dc510bb6310f5f8a0deabfd7547" + }, + "google-cloud-waf-performance-optimization": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/google-cloud-waf-performance-optimization/SKILL.md", + "computedHash": "13c6f23ee3e6526a87ef42f92c29cd2357d1ebe92cbaec4056a4838ececd66e6" + }, + "google-cloud-waf-reliability": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/google-cloud-waf-reliability/SKILL.md", + "computedHash": "865cdf0138cc97427b8c254017fc627f48ccafb39ddf89c568da2a73fe8c0cc5" + }, + "google-cloud-waf-security": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/google-cloud-waf-security/SKILL.md", + "computedHash": "f043ada8752152cb3ce4fc903f665552406fefb18ef3c0178839275feb9fdd5c" + }, + "google-cloud-waf-sustainability": { + "source": "google/skills", + "sourceType": "github", + "skillPath": "skills/cloud/google-cloud-waf-sustainability/SKILL.md", + "computedHash": "d214719d2d01bc2cb21088feb8080574cc469a5321e5e9bc608c4cc3cf8164d8" + } + } +}