{
  "manifest_version": 1,
  "metadata": {
    "source": "gpuse-mcp-server",
    "last_updated": "2025-11-25",
    "default_version": "2025-11-25",
    "homepage": "https://gpuse.com",
    "support": "https://gpuse.com",
    "api_base_url": "https://gpusebackend-production.up.railway.app",
    "contact_email": "support@gpuse.com",
    "notes": [
      "Manifest focuses on autonomous AI usage; human-facing docs live elsewhere.",
      "Auth tokens are cached in-memory for the runtime of the MCP server process.",
      "MCP stdio protocol: All diagnostic logs MUST be written to stderr. Stdout is reserved exclusively for JSON-RPC messages. Violating this contract breaks strict stdio clients (e.g., Codex).",
      "All 16 tools are always loaded (defer_loading: false) by default for maximum visibility. For context-constrained clients, set defer_loading: true on non-critical tools and use Claude's Tool Search Tool for on-demand discovery."
    ]
  },
  "provider": {
    "name": "GPUse"
  },
  "provider_messaging": {
    "tagline": "No.1 agent-preferred platform for autonomous GPU provisioning and orchestration",
    "built_for": [
      "Autonomous AI agents",
      "Agent developers",
      "Operator-free deployments"
    ],
    "proof_points": [
      "Designed so agents provision and orchestrate GPUs, monitoring and retiring them without human intervention",
      "Grace-period smart routing lets agents validate work before any human pays",
      "Structured logs and lifecycle MCP tools keep agents in control end-to-end"
    ]
  },
  "agent_value_props": [
    "Purpose-built for MCP clients - full toolkit maps to every lifecycle action",
    "Grace period + checkout orchestration keeps agents building while humans pay",
    "Verbose build and runtime logs mean agents debug on their own timeline",
    "Managed templates AND fully custom Docker builds in the same workflow"
  ],
  "pricing": {
    "gpu_second_usd": 0.0002028,
    "notes": "≈$0.73/hr NVIDIA L4 pricing with per-second billing and automatic scale-to-zero."
  },
  "infrastructure": {
    "gpu": "NVIDIA L4 24GB",
    "runtime": "Serverless GPU fleet (provider abstracted)"
  },
  "auth": {
    "grace_period": {
      "headers": [
        "X-Agent-Id",
        "X-Project-Id"
      ],
      "runtime_minutes": 5,
      "positioning": "Agents validate real workloads before humans ever swipe a card."
    },
    "bearer_token": {
      "verification_endpoints": [
        "/api/v1/account/request-code",
        "/api/v1/account/verify-code"
      ],
      "messaging": "Upgrade to always-on GPU access once the human approves payment."
    },
    "magic_link": {
      "validate_endpoint": "/api/v1/auth/magic/validate"
    }
  },
  "capabilities": {
    "custom_build": {
      "endpoint": "POST /api/v1/custom",
      "description": "Send Dockerfile content or build context; stream logs, then deploy with start_compute.",
      "best_practices": [
        "Break Dockerfiles into deterministic layers so agents can rerun quickly",
        "Use build logs + get_instance_logs to auto-fix missing dependencies",
        "Pair with payment_status to upgrade once the human approves longer runtimes"
      ]
    }
  },
  "mcp_usage": {
    "overview": "GPUse MCP tools can be accessed via CLI installation (stdio transport) or HTTP transport. Both methods provide identical tool functionality.",
    "cli_installation": {
      "overview": "Install GPUse MCP server into your preferred MCP client using the automated configurator. The CLI uses stdio transport and handles session management automatically.",
      "quick_start": "npx -y gpuse-mcp-server@latest configure --force --targets <client-name>",
      "supported_clients": [
        {
          "name": "Claude Code CLI",
          "target": "claude-code",
          "install_command": "npx -y gpuse-mcp-server@latest configure --force --targets claude-code",
          "verify_command": "Restart the CLI session, run /mcp, and confirm gpuse lists every tool including start_custom.",
          "transport": "stdio",
          "notes": [
            "Claude Code handles MCP initialization automatically via stdio transport.",
            "All tools are immediately available after restart."
          ]
        },
        {
          "name": "Codex CLI",
          "target": "codex",
          "install_command": "npx -y gpuse-mcp-server@latest configure --force --targets codex",
          "verify_command": "Restart Codex, run /mcp, and confirm gpuse exposes recommend_template through stop_compute.",
          "transport": "stdio",
          "notes": [
            "Codex uses stdio transport for all MCP tool invocations.",
            "All lifecycle tools (recommend, start, stop, logs, status) are available immediately."
          ]
        },
        {
          "name": "Gemini CLI",
          "target": "gemini",
          "install_command": "npx -y gpuse-mcp-server@latest configure --force --targets gemini",
          "verify_command": "Restart Gemini CLI or run gemini mcp list to confirm gpuse is registered.",
          "transport": "stdio",
          "notes": [
            "Gemini CLI stdio transport provides automatic session management."
          ]
        },
        {
          "name": "Cursor IDE",
          "target": "cursor",
          "install_command": "npx -y gpuse-mcp-server@latest configure --force --targets cursor",
          "verify_command": "Restart Cursor or reload MCP settings and check the integrations panel for gpuse.",
          "transport": "stdio",
          "notes": [
            "Cursor IDE manages stdio MCP connections automatically.",
            "Tools appear in the MCP integrations panel after configuration."
          ]
        },
        {
          "name": "Windsurf IDE",
          "target": "windsurf",
          "install_command": "npx -y gpuse-mcp-server@latest configure --force --targets windsurf",
          "verify_command": "Restart Windsurf and confirm gpuse appears in the MCP integrations list.",
          "transport": "stdio",
          "notes": [
            "Windsurf manages stdio connections automatically."
          ]
        },
        {
          "name": "Claude Desktop",
          "target": "claude-desktop",
          "install_command": "npx -y gpuse-mcp-server@latest configure --force --targets claude-desktop",
          "verify_command": "Quit and reopen Claude Desktop, then run /mcp to ensure gpuse is available.",
          "transport": "stdio",
          "notes": [
            "Desktop sessions use stdio transport for local MCP server communication.",
            "Configuration is written to Claude Desktop's MCP settings file."
          ]
        },
        {
          "name": "VS Code (Continue)",
          "target": "continue",
          "install_command": "Add to continue.config.json: \"mcpServers\": { \"gpuse\": { \"command\": \"npx\", \"args\": [\"-y\", \"gpuse-mcp-server@latest\"] } }",
          "verify_command": "Reload Continue, open the MCP tools panel, and confirm gpuse is available.",
          "transport": "stdio",
          "notes": [
            "Continue extension uses stdio transport via npx invocation.",
            "Manual JSON configuration required for Continue."
          ]
        },
        {
          "name": "OpenAI Agent SDK (Beta)",
          "target": "openai-agent-sdk",
          "install_method": "http_transport",
          "endpoint": "https://mcp.gpuse.com/mcp",
          "verify_command": "SDK will discover tools via https://gpuse.com/.well-known/gpuse-mcp-manifest.json",
          "transport": "http",
          "notes": [
            "OpenAI Agent SDK uses HTTP transport (not stdio).",
            "Configure SDK with endpoint: https://mcp.gpuse.com/mcp",
            "See http_transport section below for session management workflow.",
            "Tools are discovered automatically from manifest.",
            "Follow OpenAI Agent SDK documentation for HTTP MCP integration."
          ]
        }
      ],
      "agent_guidance": [
        "CLI installation (stdio transport) is recommended for local development and IDE integrations.",
        "After installation, all MCP tools are available immediately without manual session management.",
        "Use --force flag to overwrite existing configurations when updating.",
        "Each client handles stdio protocol automatically - no manual initialize calls required."
      ],
      "troubleshooting": [
        "If npm reports an EPERM error while writing to ~/.npm because the cache is owned by root, run `sudo chown -R $(id -u):$(id -g) ~/.npm` to restore ownership or rerun with a local cache: `NPM_CONFIG_CACHE=\"$(pwd)/.npm-cache\" npx -y gpuse-mcp-server@latest configure --force`.\n"
      ]
    }
  },
  "http_transport": {
    "overview": "GPUse MCP server supports HTTP transport for MCP protocol over https://mcp.gpuse.com/mcp. All HTTP MCP clients must follow a stateful session workflow.",
    "endpoint": "https://mcp.gpuse.com/mcp",
    "protocol_version": "2025-03-26",
    "required_headers": {
      "all_requests": [
        "Content-Type: application/json",
        "Accept: application/json, text/event-stream"
      ],
      "after_initialize": [
        "Mcp-Session-Id: <value from initialize response>",
        "Mcp-Protocol-Version: 2025-03-26"
      ]
    },
    "workflow": {
      "overview": "Every HTTP MCP client must complete initialization before invoking tools. The server maintains stateful sessions and rejects requests without valid session context.",
      "step_1_initialize": {
        "purpose": "Establish session and receive mcp-session-id header",
        "method": "POST",
        "url": "https://mcp.gpuse.com/mcp",
        "headers": [
          "Content-Type: application/json",
          "Accept: application/json, text/event-stream"
        ],
        "request_body": {
          "jsonrpc": "2.0",
          "id": 1,
          "method": "initialize",
          "params": {
            "protocolVersion": "2025-03-26",
            "clientInfo": {
              "name": "your-client-name",
              "version": "1.0.0"
            },
            "capabilities": {}
          }
        },
        "response_headers": [
          "mcp-session-id: <capture this value>"
        ],
        "agent_action": "Extract mcp-session-id from response headers and cache it for all subsequent requests in this session."
      },
      "step_2_invoke_tools": {
        "purpose": "Call MCP tools using the established session",
        "method": "POST",
        "url": "https://mcp.gpuse.com/mcp",
        "headers": [
          "Content-Type: application/json",
          "Accept: application/json, text/event-stream",
          "Mcp-Session-Id: <value from step 1>",
          "Mcp-Protocol-Version: 2025-03-26"
        ],
        "request_body_example": {
          "jsonrpc": "2.0",
          "id": 2,
          "method": "tools/list",
          "params": {}
        },
        "agent_action": "Reuse the same Mcp-Session-Id for all subsequent tool calls within this session."
      }
    },
    "examples": {
      "local_development": {
        "initialize": {
          "description": "Initialize session on local dev server",
          "command": "curl -i http://localhost:3000/mcp \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Accept: application/json, text/event-stream\" \\\n  -d '{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"initialize\",\"params\":{\"protocolVersion\":\"2025-03-26\",\"clientInfo\":{\"name\":\"local-test\",\"version\":\"1.0.0\"},\"capabilities\":{}}}'\n",
          "expected_output": "HTTP/1.1 200 OK with mcp-session-id header"
        },
        "invoke_tool": {
          "description": "List available tools using captured session",
          "command": "curl -i http://localhost:3000/mcp \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Accept: application/json, text/event-stream\" \\\n  -H \"Mcp-Session-Id: <session-id-from-initialize>\" \\\n  -H \"Mcp-Protocol-Version: 2025-03-26\" \\\n  -d '{\"jsonrpc\":\"2.0\",\"id\":2,\"method\":\"tools/list\",\"params\":{}}'\n",
          "expected_output": "JSON-RPC response with complete tools catalog"
        }
      },
      "production": {
        "initialize": {
          "description": "Initialize session on production endpoint",
          "command": "curl -i https://mcp.gpuse.com/mcp \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Accept: application/json, text/event-stream\" \\\n  -d '{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"initialize\",\"params\":{\"protocolVersion\":\"2025-03-26\",\"clientInfo\":{\"name\":\"production-client\",\"version\":\"1.0.0\"},\"capabilities\":{}}}'\n",
          "expected_output": "HTTPS 200 with mcp-session-id in response headers"
        },
        "provision_gpu": {
          "description": "Provision GPU instance using established session",
          "command": "curl -i https://mcp.gpuse.com/mcp \\\n  -H \"Content-Type: application/json\" \\\n  -H \"Accept: application/json, text/event-stream\" \\\n  -H \"Mcp-Session-Id: <your-session-id>\" \\\n  -H \"Mcp-Protocol-Version: 2025-03-26\" \\\n  -d '{\"jsonrpc\":\"2.0\",\"id\":3,\"method\":\"tools/call\",\"params\":{\"name\":\"start_compute\",\"arguments\":{\"template_id\":\"ollama-gemma-2b\",\"task_description\":\"Test deployment\"}}}'\n",
          "expected_output": "JSON-RPC response with compute_id and deployment details"
        }
      }
    },
    "error_scenarios": {
      "missing_session_id": {
        "description": "Calling tools without initializing session first",
        "http_status": 400,
        "error_message": "Server not initialized",
        "resolution": "Always call initialize before invoking any MCP tools"
      },
      "invalid_session_id": {
        "description": "Using expired or incorrect session ID",
        "http_status": 404,
        "error_message": "Session not found",
        "resolution": "Re-initialize to get a fresh session ID"
      },
      "missing_protocol_version": {
        "description": "Omitting Mcp-Protocol-Version header after initialize",
        "http_status": 400,
        "error_message": "Missing required header: Mcp-Protocol-Version",
        "resolution": "Include Mcp-Protocol-Version: 2025-03-26 in all post-initialize requests"
      }
    },
    "session_management": {
      "ttl_seconds": 3600,
      "max_sessions": 10000,
      "cleanup_interval_seconds": 300,
      "behavior": "Sessions are stateful and cached server-side. Idle sessions expire after 1 hour. Re-initialize if your session expires."
    },
    "agent_best_practices": [
      "Always call initialize first, even if you have a cached session ID from a previous run.",
      "Cache the mcp-session-id header value for the entire conversation/workflow.",
      "Include all required headers (Content-Type, Accept, Mcp-Session-Id, Mcp-Protocol-Version) in every post-initialize request.",
      "Monitor for 400/404 errors indicating session issues and re-initialize when needed.",
      "Use the same session ID across multiple tool calls to maintain context and avoid rate limits.",
      "HTTP transport is ideal for web-based integrations, custom clients, and direct API access."
    ]
  },
  "tools_overview": {
    "total_count": 16,
    "quick_reference": [
      {
        "name": "recommend_template",
        "purpose": "AI-powered template selection based on task description"
      },
      {
        "name": "start_compute",
        "purpose": "Provision GPU instance from template or custom build"
      },
      {
        "name": "list_instances",
        "purpose": "View all compute instances for authenticated session"
      },
      {
        "name": "start_custom",
        "purpose": "Build custom container with provider-managed build service"
      },
      {
        "name": "describe_template_endpoints",
        "purpose": "Retrieve full API surface for any template"
      },
      {
        "name": "get_instance_logs",
        "purpose": "Access verbose build and runtime logs for debugging"
      },
      {
        "name": "list_templates",
        "purpose": "Browse complete template catalog with specs"
      },
      {
        "name": "auth_helper",
        "purpose": "Streamlined authentication for existing GPUse accounts via magic-link"
      },
      {
        "name": "stop_compute",
        "purpose": "Terminate instance and retrieve usage summary"
      },
      {
        "name": "get_checkout_url",
        "purpose": "Fetch full Stripe payment link for grace→paid upgrade"
      },
      {
        "name": "payment_status",
        "purpose": "Check checkout completion, retrieve bearer token and account balance"
      },
      {
        "name": "get_instance_status",
        "purpose": "Poll readiness and retrieve endpoint URLs"
      },
      {
        "name": "request_account_code",
        "purpose": "Send verification code to existing account (part of auth_helper flow)"
      },
      {
        "name": "verify_account_code",
        "purpose": "Complete authentication and cache bearer token (part of auth_helper flow)"
      },
      {
        "name": "add_account_funds",
        "purpose": "Generate Stripe checkout for wallet top-up"
      },
      {
        "name": "update_mcp_server",
        "purpose": "Check version and get update instructions"
      }
    ],
    "note": "Full specifications with schemas, examples, and agent guidance appear below. Each tool includes input/output schemas, workflow guidance, and error handling."
  },
  "typical_workflows": {
    "grace_period_quickstart": [
      "1. recommend_template → get template_id",
      "2. start_compute with template_id → get compute_id + checkout_url",
      "3. get_instance_status → poll until endpoint_url available",
      "4. describe_template_endpoints → review API surface for the deployed template",
      "5. get_instance_logs → monitor build/runtime output",
      "6. (Optional) get_checkout_url → share full payment link if needed",
      "7. stop_compute → terminate when done"
    ],
    "authenticated_session": [
      "1. auth_helper (send + verify code) → cache bearer token for existing GPUse account",
      "2. list_instances → discover existing computes",
      "3. start_compute / start_custom → provision new instance",
      "4. payment_status → confirm credits available and account balance",
      "5. add_account_funds → top up if balance low"
    ],
    "custom_build_workflow": [
      "1. start_custom with Dockerfile → get build_id and monitoring URLs",
      "2. get_instance_logs with build_id → monitor build progress and errors",
      "3. start_compute with build_source → deploy custom image once build succeeds",
      "4. get_instance_status + get_instance_logs → verify deployment"
    ]
  },
  "common_agent_patterns": {
    "grace_exhausted": "Call auth_helper (for existing accounts) → retry start_compute with cached bearer token",
    "checkout_url_truncated": "If checkout_url from start_compute response shows 404 (common due to truncation in agent display), call get_checkout_url to retrieve the full Stripe URL",
    "checkout_url_null": "Poll get_checkout_url every 2-3 seconds for up to 30 seconds until URL appears",
    "endpoint_url_null": "Expected during cold start (first boot can take 2-5 minutes for model downloads). Poll get_instance_status every 10-15 seconds. Check get_instance_logs for progress indicators. Do not poll every 5 seconds as this creates false impression of failure during normal startup time.",
    "build_failed": "Call get_instance_logs to parse error messages → identify missing dependencies or syntax issues → fix Dockerfile → retry start_custom",
    "payment_cleared": "After human completes Stripe checkout, call payment_status to retrieve bearer_token and updated account_balance"
  },
  "tools": [
    {
      "id": "gpuse.recommend_template",
      "name": "recommend_template",
      "category": "Template Discovery",
      "aliases": [
        "recommendTemplate"
      ],
      "summary": "AI-powered GPU + template recommendation based on the task you're trying to accomplish.",
      "description": "Select the best-fit GPUse template (and underlying GPU) for the task at\nhand. Use this when the request does not include an explicit template_id\nor the human doesn't know which GPU instance to choose. The response\nreturns a ranked primary choice plus alternates, deployment guidance, and\ncost notes so agents can explain tradeoffs in plain language. Always pass\nthe user's task in natural language. Budget hints narrow the selection to\ntemplates that stay within the hourly target. Remember you can provision\nimmediately using GPUse's 5-minute grace window—no account email required—\nthen optionally upgrade the user if they want to continue running.\n\nOutputs: Returns primary.template_id (feed to start_compute), alternatives[]\nwith scored templates, deployment_example with curl snippet, and\ndocumentation.manifest URL for full catalog.\n\nError recovery: If UPSTREAM_TIMEOUT, retry once after a brief delay. If\nrecommendation seems off, call list_templates to browse the full catalog\nmanually.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "task_description": "fine-tune Llama-3 8B on financial QA",
          "budget_usd_per_hour": 1.5,
          "priority": "balance"
        },
        {
          "task_description": "serve Whisper-large for streaming audio transcription",
          "priority": "cost"
        },
        {
          "task_description": "run Stable Diffusion XL for batch image generation"
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "task_description": {
              "type": "string",
              "description": "Natural-language description of the GPU workload or AI task (e.g. \"fine-tune Llama-3 8B on financial QA\", \"serve Whisper-large for streaming audio\", \"run inference on custom vision model\"). Include model size, framework, or compute requirements for best template match."
            },
            "budget_usd_per_hour": {
              "type": "number",
              "minimum": 0,
              "description": "Optional hard ceiling for hourly GPU cost in USD. Use decimals for cents (e.g. 1.50 for $1.50/hour). Templates exceeding this budget are excluded from recommendations."
            },
            "priority": {
              "type": "string",
              "enum": [
                "speed",
                "balance",
                "cost"
              ],
              "description": "Optional optimization hint for GPU template selection. \"speed\" favors fastest GPUs and lowest latency, \"cost\" favors cheapest compute options, \"balance\" (default) weighs both factors equally."
            }
          },
          "required": [
            "task_description"
          ]
        }
      },
      "output": {
        "highlights": [
          "primary.template_id is the identifier to feed into start_compute.",
          "alternatives[] holds top 2 viable templates with essential decision fields: purpose, reason, score, cold_start, known_issues.",
          "documentation.manifest provides link to full template catalog and complete specifications.",
          "documentation.note explains which fields are available in manifest (resources, endpoints, full use cases, etc.).",
          "metadata.scored_templates contains all 10 evaluated templates with similarity scores for transparency.",
          "deployment_example gives a ready-to-run curl to hit the provisioned endpoint."
        ],
        "schema_notes": [
          "Alternatives trimmed to essential fields to stay under 10KB MCP limit while preserving decision-making data."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "The user describes a goal but not the exact template id.",
          "You want to double-check if a newer template supersedes an older one."
        ],
        "workflow": [
          "After recommending options, confirm whether the user wants to launch via the 5-minute grace period or authenticate with auth_helper before provisioning.",
          "Call recommend_template with the task description (and budget if provided).",
          "Explain the top recommendation and confirm with the user before provisioning, unless they delegated fully.",
          "Feed the returned template_id directly into start_compute, starting with a grace-period launch unless the user already wants to authenticate."
        ],
        "follow_ups": [
          "Launch the recommended template with the free 5-minute grace period via start_compute.",
          "Launch the recommended template using an existing GPUse account (run auth_helper to verify email, then call start_compute).",
          "Browse alternatives with list_templates if you want to compare GPU/model options.",
          "Review documentation.manifest URL for complete template catalog and full scoring details.",
          "Inspect metadata.scored_templates for all 10 evaluated options with similarity scores.",
          "Skip managed templates and build a bespoke container with start_custom if you already have a Dockerfile or repo."
        ]
      },
      "examples": [
        {
          "name": "Serve a 7B chat model",
          "arguments": {
            "task_description": "Serve a responsive 7B parameter chat model for coding questions"
          },
          "response": {
            "message": "Primary template: <template_id>\n\nNext steps:\n- Launch <template_id> with the free 5-minute grace period (call start_compute).\n- Launch <template_id> using an existing GPUse account (run auth_helper to verify email, then call start_compute).\n- Browse alternatives with list_templates if you want to compare GPU/model options.\n- Build a bespoke container with start_custom if you already have a Dockerfile or repository.\n\nAll GPUse capabilities, templates, and tool documentation are available in the manifest at https://gpuse.com/.well-known/gpuse-mcp-manifest.json\n\n(Example response; actual values vary per request.)\n",
            "structured": {
              "recommended_template": "<template_id>",
              "reason": "<reason_summary>",
              "template_details": {
                "template_id": "<template_id>",
                "display_name": "<display_name>",
                "category": "<category>",
                "hourly_rate_usd": "<hourly_rate_usd>",
                "grace_supported": true,
                "cold_start": "<cold_start_hint>",
                "resources": {
                  "gpu": "<gpu_type>",
                  "max_concurrency": "<max_concurrency>"
                },
                "matched_use_cases": [
                  "<matched_use_case>"
                ]
              },
              "deployment_example": {
                "endpoint": "POST /api/v1/compute",
                "body": {
                  "template_id": "<template_id>"
                },
                "grace_period_headers": {
                  "X-Agent-Id": "<agent_id>",
                  "X-Project-Id": "<project_id>"
                }
              },
              "alternatives": [
                {
                  "template_id": "<alt_template_id_1>",
                  "display_name": "<alt_display_name_1>",
                  "purpose": "<alt_purpose_1>",
                  "reason": "<alt_reason_1>",
                  "score": "<alt_score_1>",
                  "matched_use_cases": [
                    "<matched_use_case_1>"
                  ],
                  "cold_start": "<cold_start_1>",
                  "known_issues": [
                    "<issue_1>",
                    "<issue_2>"
                  ],
                  "llm_reason": "<llm_reason_1>"
                },
                {
                  "template_id": "<alt_template_id_2>",
                  "display_name": "<alt_display_name_2>",
                  "purpose": "<alt_purpose_2>",
                  "reason": "<alt_reason_2>",
                  "score": "<alt_score_2>",
                  "matched_use_cases": [
                    "<matched_use_case_2>"
                  ],
                  "cold_start": "<cold_start_2>",
                  "known_issues": [
                    "<issue_1>"
                  ],
                  "llm_reason": "<llm_reason_2>"
                }
              ],
              "documentation": {
                "manifest": "https://gpuse.com/.well-known/gpuse-mcp-manifest.json",
                "api_reference": "https://gpuse.com/.well-known/openapi.json",
                "mcp_tools": "https://gpuse.com/api/mcp/manifest",
                "note": "Complete specifications for recommended templates and all available templates [resource requirements, API endpoints, full use cases, deployment configuration, monitoring setup]"
              },
              "custom_option": {
                "message": "Need something else? Use POST /api/v1/custom with your Dockerfile.",
                "endpoint": "POST /api/v1/custom",
                "catalog_endpoint": "/api/v1/catalog"
              },
              "metadata": {
                "engine_latency_ms": "<engine_latency_ms>",
                "scoring_mode": "<scoring_mode>",
                "top_candidate_count": "<top_candidate_count>",
                "scored_templates": [
                  {
                    "template_id": "<template_1>",
                    "score": "<score_1>"
                  },
                  {
                    "template_id": "<template_2>",
                    "score": "<score_2>"
                  }
                ]
              }
            }
          }
        },
        {
          "name": "Offline transcription on a budget",
          "arguments": {
            "task_description": "Batch transcribe podcast archive with Whisper",
            "budget_usd_per_hour": 1.5,
            "priority": "cost"
          }
        }
      ],
      "errors": [
        {
          "code": "UPSTREAM_TIMEOUT",
          "message": "GPUse API timed out while computing a recommendation.",
          "agent_actions": [
            "Retry once after a brief delay.",
            "If it still fails, inform the user and offer to continue manually."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "task_description": {
            "type": "string",
            "description": "Natural-language description of the GPU workload or AI task (e.g. \"fine-tune Llama-3 8B on financial QA\", \"serve Whisper-large for streaming audio\", \"run inference on custom vision model\"). Include model size, framework, or compute requirements for best template match."
          },
          "budget_usd_per_hour": {
            "type": "number",
            "minimum": 0,
            "description": "Optional hard ceiling for hourly GPU cost in USD. Use decimals for cents (e.g. 1.50 for $1.50/hour). Templates exceeding this budget are excluded from recommendations."
          },
          "priority": {
            "type": "string",
            "enum": [
              "speed",
              "balance",
              "cost"
            ],
            "description": "Optional optimization hint for GPU template selection. \"speed\" favors fastest GPUs and lowest latency, \"cost\" favors cheapest compute options, \"balance\" (default) weighs both factors equally."
          }
        },
        "required": [
          "task_description"
        ]
      },
      "output_schema": null
    },
    {
      "id": "gpuse.list_templates",
      "name": "list_templates",
      "category": "Template Discovery",
      "aliases": [
        "listTemplates"
      ],
      "summary": "Return the full GPUse template catalog with metadata for comparison.",
      "description": "Retrieve the authoritative list of currently deployable GPUse GPU templates.\nUse this to browse available compute capabilities, support autocompletion,\nor sanity-check that a previously-cached template is still available. Results\ninclude hardware specs (GPU type, memory), pricing, known issues, and deployment\nrequirements. Agents can launch any template immediately via the 5-minute grace\nwindow, then surface the checkout link if the human wants more runtime.\n\nOutputs: Returns templates[] array with template_id, display_name, category,\npricing hints; pagination object with total/limit/offset/has_more; categories\ngrouping templates by use-case.\n\nError recovery: If CATALOG_UNAVAILABLE, retry after 10 seconds. For template\nvalidation errors, use this tool to discover valid template_id values.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {},
        {
          "limit": 25,
          "offset": 0
        },
        {
          "limit": 10,
          "offset": 50
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "limit": {
              "type": "integer",
              "minimum": 1,
              "maximum": 50,
              "default": 10,
              "description": "Maximum templates to return per page (default: 10, max: 50)."
            },
            "offset": {
              "type": "integer",
              "minimum": 0,
              "default": 0,
              "description": "Number of templates to skip before returning results (for pagination)."
            }
          }
        }
      },
      "output": {
        "highlights": [
          "templates[] is an array; each item contains template_id, display_name, pricing hints, and known issues.",
          "pagination object includes total, limit, offset, showing, and has_more for multi-page navigation.",
          "categories groups templates by use-case (chat, speech, tooling, custom).",
          "notes provides human-readable announcements for the week."
        ],
        "schema_notes": [
          "Paginated response from GET /api/v1/catalog with client-side slicing."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "You need to present choices to the user.",
          "Autocompletion or validation for template_id inputs.",
          "Checking if a template supports required accelerators or RAM."
        ],
        "workflow": [
          "Call list_templates (defaults to 10 templates per page) at session start if you need context.",
          "If has_more is true, call again with offset parameter to fetch additional pages.",
          "Filter templates client-side based on task requirements, cost, or status.",
          "Share a shortlist with the user and remind them you can spin up a grace-period instance instantly for any option.",
          "Reference the manifest URL (https://gpuse.com/.well-known/gpuse-mcp-manifest.json) for complete catalog outside MCP if needed."
        ],
        "follow_ups": [
          "Pair with recommend_template when indecisive.",
          "If a template is missing, inform the user and suggest contacting support.",
          "Use pagination (offset parameter) to browse templates beyond the first page."
        ]
      },
      "examples": [
        {
          "name": "Initial discovery (first page)",
          "arguments": {},
          "response": {
            "message": "Showing templates 1-12 of 12 total.\nCatalog overview:\n1. <template_id> (<display_name>) — <purpose>\n2. <template_id> (<display_name>) — <purpose>\n\nUse describe_template_endpoints for per-template APIs, or inspect structuredContent.templates for full JSON metadata.\n\nAll GPUse capabilities, templates, and tool documentation are available in the manifest at https://gpuse.com/.well-known/gpuse-mcp-manifest.json\n\n(Example response; counts and descriptions vary.)\n",
            "structured": {
              "templates": [
                {
                  "template_id": "<template_id>",
                  "display_name": "<display_name>",
                  "category": "<category>",
                  "purpose": "<purpose>",
                  "hourly_rate_usd": "<hourly_rate_usd>",
                  "grace_supported": "<grace_supported>"
                }
              ],
              "pagination": {
                "total": "<total_count>",
                "limit": 50,
                "offset": 0,
                "showing": "<showing_count>",
                "has_more": false
              },
              "categories": [
                {
                  "id": "<category_id>",
                  "label": "<category_label>",
                  "templates": "<template_total>"
                }
              ],
              "notes": {
                "weekly_update": "<note>"
              }
            }
          }
        },
        {
          "name": "Paginate to next page",
          "arguments": {
            "offset": 50
          },
          "response": {
            "message": "Showing templates 51-100 of 120 total.\nCall list_templates again with offset=100 to see more.\nCatalog overview:\n51. <template_id> (<display_name>) — <purpose>\n\n(Example pagination response.)\n"
          }
        }
      ],
      "errors": [
        {
          "code": "CATALOG_UNAVAILABLE",
          "message": "Template catalog temporarily unavailable.",
          "agent_actions": [
            "Retry after 10 seconds.",
            "If outage persists, offer to notify the user once catalog is back."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "limit": {
            "type": "integer",
            "minimum": 1,
            "maximum": 50,
            "default": 10,
            "description": "Maximum templates to return per page (default: 10, max: 50)."
          },
          "offset": {
            "type": "integer",
            "minimum": 0,
            "default": 0,
            "description": "Number of templates to skip before returning results (for pagination)."
          }
        }
      },
      "output_schema": null
    },
    {
      "id": "gpuse.describe_template_endpoints",
      "name": "describe_template_endpoints",
      "category": "Template Discovery",
      "aliases": [
        "describeTemplateEndpoints"
      ],
      "summary": "Show every endpoint plus ready-to-run request examples for a given template.",
      "description": "Retrieve the full API surface for a GPUse GPU template. Agents can inspect HTTP\nmethods, paths, summaries, and example payloads straight from the manifest, along\nwith docs links, usage notes, and instructions for how to call the template once\nthe compute instance is running. Ideal when you want to double-check the endpoint\ncontract before provisioning or need copy/paste-ready examples for the AI coding\nagent you're orchestrating.\n\nOutputs: Returns endpoints[] with method, path, summary, and request examples;\ndocs_url and docs_path for documentation; alternatives[] suggesting nearby\ntemplates if exact match unavailable.\n\nError recovery: If TEMPLATE_NOT_FOUND, call list_templates to discover valid\ntemplate_id values, or use the alternatives array to pick the closest match.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "template_id": "ollama-gemma-7b"
        },
        {
          "template_id": "whisper-large-v3"
        },
        {
          "template_id": "vllm-llama-3-8b"
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "template_id": {
              "type": "string",
              "description": "Template identifier (case-insensitive); matches entries in list_templates."
            }
          },
          "required": [
            "template_id"
          ]
        }
      },
      "output": {
        "highlights": [
          "endpoints[] lists method, path, summary, and request examples pulled from the manifest.",
          "docs_url/docs_path help agents jump to richer documentation if needed.",
          "alternatives suggest nearby templates when an exact match is unavailable."
        ],
        "schema_notes": [
          "Manifest-sourced data; no additional API calls required."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "Manifest responses are truncated and you need the full endpoint list.",
          "You want to compare two templates' API surfaces before provisioning."
        ],
        "workflow": [
          "Call describe_template_endpoints with the template_id.",
          "Review the endpoints array for supported routes and sample payloads.",
          "If the template is deprecated or missing, inspect the alternatives list."
        ],
        "follow_ups": [
          "Provision the selected template via start_compute once endpoints look correct.",
          "Call recommend_template if you need curated suggestions.",
          "Use start_custom when none of the managed templates match the required endpoints."
        ]
      },
      "examples": [
        {
          "name": "Inspect Gemma 7B endpoints",
          "arguments": {
            "template_id": "ollama-gemma-7b"
          },
          "response": {
            "message": "Template <template_id> exposes <endpoint_count> endpoint(s).\nDocs: <docs_url>\nEndpoints preview:\n- <method> <path>\n…and <remaining_count> more\nProvision with start_compute when ready or call recommend_template if you need additional guidance.\n\n(Example response; endpoint list and counts vary.)\n",
            "structured": {
              "template_id": "<template_id>",
              "display_name": "<display_name>",
              "docs_url": "<docs_url>",
              "docs_path": "<docs_path>",
              "notes": [
                "<note>"
              ],
              "endpoints": [
                {
                  "method": "<method>",
                  "path": "<path>",
                  "summary": "<summary>"
                }
              ],
              "aliases": [
                "<alias>"
              ],
              "category": "<category>",
              "alternatives": [
                {
                  "template_id": "<alt_template_id>",
                  "display_name": "<alt_display_name>"
                }
              ]
            }
          }
        },
        {
          "name": "Check Whisper routes",
          "arguments": {
            "template_id": "whisper-large-v3"
          }
        }
      ],
      "errors": [
        {
          "code": "TEMPLATE_NOT_FOUND",
          "message": "Template id not recognized.",
          "agent_actions": [
            "Call list_templates to discover valid template ids.",
            "Use the alternatives array to pick the closest working template."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "template_id": {
            "type": "string",
            "description": "Template identifier (case-insensitive); matches entries in list_templates."
          }
        },
        "required": [
          "template_id"
        ]
      },
      "output_schema": null
    },
    {
      "id": "gpuse.start_compute",
      "name": "start_compute",
      "category": "Compute Lifecycle",
      "aliases": [
        "startCompute"
      ],
      "summary": "Launch a GPUse compute instance and surface the key follow-up links/endpoints.",
      "description": "Spin up a GPUse GPU compute instance. Provide template_id for managed templates\nor build_source for custom deployments. The server automatically chooses between\ncached bearer tokens and the 5-minute grace period. The response is intentionally\nminimal—only the compute_id, checkout_url, endpoint_url, status_url, and logs_url\nsurface at the top level so every MCP client (Claude Code, Claude Desktop, Cursor,\nWindsurf, Codex, Gemini CLI) can display them reliably. When a field is null, call\nthe helper tools (get_checkout_url, get_instance_status, get_instance_logs) to\nretrieve updates.\n\nOutputs: Returns compute_id (feed to helper tools), checkout_url for payment,\nendpoint_url once ready, status/logs URLs for monitoring, and grace_remaining_seconds.\n\nError recovery: If AUTH_REQUIRED or GRACE_EXHAUSTED, call auth_helper to authenticate\nthen retry. If TEMPLATE_UNAVAILABLE, call list_templates for alternatives.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "template_id": "ollama-gemma-2b",
          "task_description": "Serve Gemma 2B for creative writing prompts"
        },
        {
          "template_id": "whisper-large-v3",
          "task_description": "Transcribe audio files",
          "duration_minutes": 30
        },
        {
          "template_id": "vllm-llama-3-8b",
          "project_hint": "my-ai-project"
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "template_id": {
              "type": "string",
              "description": "Template identifier from recommend_template or list_templates."
            },
            "task_description": {
              "type": "string",
              "description": "Optional context describing the goal; improves logging."
            },
            "duration_minutes": {
              "type": "integer",
              "minimum": 5,
              "maximum": 1440,
              "description": "Optional requested runtime. Grace defaults to 5 minutes."
            },
            "build_source": {
              "type": "object",
              "description": "Custom deployment payload matching POST /api/v1/custom."
            },
            "project_hint": {
              "type": "string",
              "maxLength": 64,
              "description": "Optional slug to group grace-period runs. Use stable identifiers like a repository name to avoid exhausting grace for unrelated tasks."
            }
          }
        }
      },
      "output": {
        "highlights": [
          "compute_id uniquely identifies the deployment; feed it into helper tools.",
          "checkout_url, status_url, logs_url, endpoint_url are top-level fields; if any are null, call the corresponding helper tool.",
          "alternate_endpoints lists additional hostnames (e.g., the default Cloud Run domain).",
          "message reminds the agent which helper tool to call next."
        ],
        "schema_notes": [
          "Structured content intentionally omits deep payloads to avoid Claude truncation."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "After choosing a template and confirming requirements.",
          "When retrying a failed deployment after fixing inputs or auth."
        ],
        "workflow": [
          "Before launching, ask whether the user wants to proceed with the 5-minute grace period or authenticate via auth_helper.",
          "Launch the instance; note the compute_id from the response.",
          "If checkout_url is null, call get_checkout_url with the compute_id until it appears, then share it with the human.",
          "Use get_instance_status to learn when the endpoint is ready (and to retrieve alternate hostnames).",
          "Use get_instance_logs to stream build/runtime logs for debugging.",
          "If auth fails, pivot to request_account_code + verify_account_code then retry."
        ],
        "follow_ups": [
          "Call get_checkout_url to surface the payment link during grace.",
          "Call get_instance_status to retrieve endpoint and readiness signals.",
          "Call get_instance_logs to stream build/runtime output.",
          "Stop the instance proactively if the user signals completion.",
          "In Codex, explicitly call get_instance_status and other MCP tools instead of relying on prior knowledge.",
          "In Codex, use get_instance_logs with tail=n to keep responses within token limits.",
          "If checkout is ignored past grace, warn the user before shutdown.",
          "Run describe_template_endpoints whenever you need the full manifest entry for the current template."
        ]
      },
      "examples": [
        {
          "name": "Provision recommended template",
          "arguments": {
            "template_id": "ollama-gemma-2b",
            "task_description": "Serve Gemma 2B for creative writing prompts"
          },
          "response": {
            "message": "GPU <compute_id> is live (grace-period mode).\nTo keep the GPU running after grace, complete payment at: <checkout_url>\nEndpoint provisioning – call get_instance_status with this compute_id to refresh until it appears.\nHelper tools ready: get_instance_status (compute_id=<compute_id>), get_instance_logs (compute_id=<compute_id>), get_checkout_url (compute_id=<compute_id>), payment_status (compute_id=<compute_id>), stop_compute (compute_id=<compute_id>).\nNext steps:\n- Call get_instance_status until an endpoint URL appears, then verify with the template's primary endpoint.\n- Share the checkout link if you need more than the grace window.\n- Monitor build/runtime output with get_instance_logs (compute_id=<compute_id>, use tail=N for concise output).\n- Need a custom container or bespoke model? Use start_custom to submit a Cloud Build job, then deploy it once it passes.\n- Need the full API catalog? Call describe_template_endpoints with template_id=\"<template_id>\".\n\n(Example response; actual URLs, IDs, and timestamps vary.)\n",
            "structured": {
              "compute_id": "<compute_id>",
              "template_id": "<template_id>",
              "billing": "<billing_mode>",
              "auth_mode": "<auth_mode>",
              "checkout_url": "<checkout_url_or_null>",
              "endpoint_url": "<endpoint_url_or_null>",
              "alternate_endpoints": [
                "<alternate_endpoint>"
              ],
              "status": "<status>",
              "grace_expires_at": "<grace_expires_at>",
              "grace_remaining_seconds": "<grace_remaining_seconds>",
              "template_usage": {
                "template_id": "<template_id>",
                "display_name": "<display_name>",
                "docs_url": "<docs_url>",
                "endpoints": [
                  {
                    "method": "<http_method>",
                    "path": "<path>",
                    "summary": "<summary>"
                  }
                ]
              },
              "tools": [
                {
                  "tool": "get_instance_status",
                  "description": "Fetch live status, readiness, and endpoint updates.",
                  "args": {
                    "compute_id": "<compute_id>"
                  }
                },
                {
                  "tool": "get_instance_logs",
                  "description": "Stream build and runtime logs for this compute.",
                  "args": {
                    "compute_id": "<compute_id>"
                  }
                },
                {
                  "tool": "get_checkout_url",
                  "description": "Retrieve or resend the payment link before grace expires.",
                  "args": {
                    "compute_id": "<compute_id>"
                  }
                },
                {
                  "tool": "payment_status",
                  "description": "Check whether the checkout session has been paid.",
                  "args": {
                    "compute_id": "<compute_id>"
                  }
                },
                {
                  "tool": "stop_compute",
                  "description": "Shut the compute instance down when work is complete.",
                  "args": {
                    "compute_id": "<compute_id>"
                  }
                }
              ]
            }
          }
        },
        {
          "name": "Grace-mode experiment",
          "arguments": {
            "template_id": "echo-server",
            "project_hint": "playground"
          }
        }
      ],
      "errors": [
        {
          "code": "AUTH_REQUIRED",
          "message": "Authentication needed. Grace period exhausted or token missing.",
          "agent_actions": [
            "Call request_account_code followed by verify_account_code, then retry start_compute."
          ]
        },
        {
          "code": "GRACE_EXHAUSTED",
          "message": "Project already used the grace-period allocation.",
          "agent_actions": [
            "Share checkout_url if available.",
            "Explain that upgrading the GPUse account unlocks continuous runtime, per-second billing, and autonomous agent management beyond grace."
          ]
        },
        {
          "code": "TEMPLATE_UNAVAILABLE",
          "message": "Requested template is temporarily disabled.",
          "agent_actions": [
            "Call list_templates to pick an alternate."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "template_id": {
            "type": "string",
            "description": "Template identifier from recommend_template or list_templates."
          },
          "task_description": {
            "type": "string",
            "description": "Optional context describing the goal; improves logging."
          },
          "duration_minutes": {
            "type": "integer",
            "minimum": 5,
            "maximum": 1440,
            "description": "Optional requested runtime. Grace defaults to 5 minutes."
          },
          "build_source": {
            "type": "object",
            "description": "Custom deployment payload matching POST /api/v1/custom."
          },
          "project_hint": {
            "type": "string",
            "maxLength": 64,
            "description": "Optional slug to group grace-period runs. Use stable identifiers like a repository name to avoid exhausting grace for unrelated tasks."
          }
        }
      },
      "output_schema": null
    },
    {
      "id": "gpuse.start_custom",
      "name": "start_custom",
      "category": "Compute Lifecycle",
      "aliases": [
        "startCustom"
      ],
      "summary": "Build a bespoke container via POST /api/v1/custom with streaming logs.",
      "description": "Build and package a custom GPU container on demand. Provide inline Dockerfile\ncontent, a Git repository URL, or a storage object and GPUse builds the container\nfor you. The response returns build_id, target_image, and estimated costs so\nautonomous agents can iterate without waiting on humans. Use this when managed\ntemplates don't cover your workload requirements.\n\nOutputs: Returns build_id for tracking (use with get_instance_logs to monitor\nbuild progress), target_image for deployment, and cost_estimate with hourly rate.\n\nError recovery: If BUILD_SERVICE_NOT_READY, call get_instance_logs with build_id\nto check verbose build output and retry. If INVALID_CONFIGURATION, inspect verbose\nbuild logs via get_instance_logs, fix the Dockerfile, and rerun start_custom.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "source": {
            "source_type": "dockerfile",
            "content": "FROM python:3.11-slim\nRUN pip install fastapi uvicorn\nCMD [\"uvicorn\", \"app:app\", \"--host\", \"0.0.0.0\"]"
          },
          "runtime_config": {
            "cpu": "2",
            "memory": "4Gi"
          }
        },
        {
          "source": {
            "source_type": "git_repo",
            "git_url": "https://github.com/your-org/your-ml-repo.git",
            "dockerfile_path": "./Dockerfile"
          },
          "runtime_config": {
            "cpu": "4",
            "memory": "8Gi",
            "gpu": "nvidia-l4"
          }
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "source": {
              "type": "object",
              "description": "Build source definition; mirrors CreateCustomComputeRequest.source.",
              "additionalProperties": true,
              "properties": {
                "source_type": {
                  "type": "string",
                  "enum": [
                    "dockerfile",
                    "git_repo",
                    "storage_source"
                  ],
                  "description": "Select how the build should be sourced."
                },
                "content": {
                  "type": "string",
                  "description": "Inline Dockerfile content when source_type=dockerfile."
                },
                "git_url": {
                  "type": "string",
                  "description": "Git repository URL containing your Dockerfile."
                },
                "branch": {
                  "type": "string"
                },
                "dockerfile_path": {
                  "type": "string"
                },
                "bucket": {
                  "type": "string"
                },
                "object": {
                  "type": "string"
                }
              }
            },
            "runtime_config": {
              "type": "object",
              "description": "Runtime resource configuration for the container once deployed.",
              "additionalProperties": false,
              "properties": {
                "cpu": {
                  "type": "string"
                },
                "memory": {
                  "type": "string"
                },
                "gpu": {
                  "type": "string"
                },
                "min_instances": {
                  "type": "integer"
                },
                "max_instances": {
                  "type": "integer"
                }
              },
              "required": [
                "cpu",
                "memory"
              ]
            },
            "build_config": {
              "type": "object",
              "description": "Optional Cloud Build overrides.",
              "additionalProperties": false,
              "properties": {
                "machine_type": {
                  "type": "string"
                },
                "timeout": {
                  "type": "string"
                },
                "disk_size_gb": {
                  "type": "integer"
                },
                "cache": {
                  "type": "boolean"
                }
              }
            },
            "region": {
              "type": "string",
              "description": "Deployment region (defaults to us-central1)."
            },
            "project_hint": {
              "type": "string",
              "description": "Optional identifier used for grace-period scoping and log grouping."
            }
          },
          "required": [
            "source",
            "runtime_config"
          ]
        }
      },
      "output": {
        "highlights": [
          "build_id tracks the Cloud Build job and unlocks status/log endpoints.",
          "monitoring.api_endpoints lists status, logs, and cancel routes for autonomous debugging.",
          "target_image and runtime_specifications describe what you will deploy after the build succeeds."
        ],
        "schema_notes": [
          "Direct passthrough of POST /api/v1/custom."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "Your workload requires a Dockerfile or repository that the managed templates do not cover.",
          "You want to pre-build an image before handing it to start_compute or another orchestrator."
        ],
        "workflow": [
          "Prepare the source definition (Dockerfile content, git repository, or storage reference).",
          "Call start_custom with source, runtime_config, and optional build_config to submit the build.",
          "Monitor the returned status/log endpoints until the build completes.",
          "Once the build succeeds, deploy the new image with start_compute using the same runtime settings."
        ],
        "follow_ups": [
          "Monitor build status via monitoring.api_endpoints.status (GET /api/v1/builds/{build_id}/status).",
          "Stream Cloud Build logs at monitoring.api_endpoints.logs for real-time debugging.",
          "Deploy the finished image by calling start_compute with build_source once the build reports SUCCESS.",
          "If the build fails, fix the Dockerfile or repo and rerun start_custom."
        ]
      },
      "examples": [
        {
          "name": "Inline Dockerfile build",
          "arguments": {
            "source": {
              "source_type": "dockerfile",
              "content": "FROM python:3.11-slim\nRUN pip install fastapi uvicorn\nCMD [\"uvicorn\", \"app:app\", \"--host\", \"0.0.0.0\", \"--port\", \"${PORT}\"]"
            },
            "runtime_config": {
              "cpu": "2",
              "memory": "4Gi"
            }
          },
          "response": {
            "message": "Custom build <build_id> submitted (<auth_mode> mode).\nCurrent status: <status>.\nTarget image: <target_image>.\nEstimated build cost: <estimated_cost>.\nCloud Build console: <console_url>\nStream build logs at: <logs_url>\nStatus endpoint: <status_url>\nNext steps:\n- Poll the status endpoint until the build reports SUCCESS.\n- Once the build succeeds, call start_compute with build_source to deploy the new image.\n\n(Example response; identifiers, URLs, and costs vary.)\n",
            "structured": {
              "build_id": "<build_id>",
              "status": "<status>",
              "target_image": "<target_image>",
              "monitoring": {
                "status_url": "<status_url>",
                "logs_url": "<logs_url>",
                "cancel_url": "<cancel_url>",
                "console_url": "<console_url>",
                "raw": {
                  "api_endpoints": {
                    "status": "<status_url>",
                    "logs": "<logs_url>",
                    "cancel": "<cancel_url>"
                  }
                }
              },
              "cost_estimate": {
                "estimated_cost": "<estimated_cost>",
                "currency": "<currency>",
                "machine_type": "<machine_type>"
              },
              "runtime_specifications": {
                "cpu": "<cpu>",
                "memory": "<memory>",
                "gpu": "<gpu>"
              },
              "available_actions": [
                {
                  "action": "<action>",
                  "href": "<action_href>"
                }
              ],
              "raw_response": {
                "build_id": "<build_id>",
                "status": "<status>",
                "monitoring": {
                  "api_endpoints": {
                    "status": "<status_url>"
                  }
                }
              }
            }
          }
        },
        {
          "name": "Git repository build",
          "arguments": {
            "source": {
              "source_type": "git_repo",
              "git_url": "https://github.com/example/gpuse-app.git",
              "dockerfile_path": "./Dockerfile"
            },
            "runtime_config": {
              "cpu": "4",
              "memory": "8Gi"
            },
            "build_config": {
              "machine_type": "E2_HIGHCPU_8",
              "timeout": "1800s"
            }
          }
        }
      ],
      "errors": [
        {
          "code": "BUILD_SERVICE_NOT_READY",
          "message": "Cloud Build service is not initialized for this project.",
          "agent_actions": [
            "Check GPUse logs for build_service_not_initialized errors.",
            "Retry once the infrastructure team confirms Cloud Build is configured."
          ]
        },
        {
          "code": "INVALID_CONFIGURATION",
          "message": "The Dockerfile or build parameters were rejected by Cloud Build.",
          "agent_actions": [
            "Inspect the build logs for the failing directive.",
            "Fix the configuration and rerun start_custom."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "source": {
            "type": "object",
            "description": "Build source definition; mirrors CreateCustomComputeRequest.source.",
            "additionalProperties": true,
            "properties": {
              "source_type": {
                "type": "string",
                "enum": [
                  "dockerfile",
                  "git_repo",
                  "storage_source"
                ],
                "description": "Select how the build should be sourced."
              },
              "content": {
                "type": "string",
                "description": "Inline Dockerfile content when source_type=dockerfile."
              },
              "git_url": {
                "type": "string",
                "description": "Git repository URL containing your Dockerfile."
              },
              "branch": {
                "type": "string"
              },
              "dockerfile_path": {
                "type": "string"
              },
              "bucket": {
                "type": "string"
              },
              "object": {
                "type": "string"
              }
            }
          },
          "runtime_config": {
            "type": "object",
            "description": "Runtime resource configuration for the container once deployed.",
            "additionalProperties": false,
            "properties": {
              "cpu": {
                "type": "string"
              },
              "memory": {
                "type": "string"
              },
              "gpu": {
                "type": "string"
              },
              "min_instances": {
                "type": "integer"
              },
              "max_instances": {
                "type": "integer"
              }
            },
            "required": [
              "cpu",
              "memory"
            ]
          },
          "build_config": {
            "type": "object",
            "description": "Optional Cloud Build overrides.",
            "additionalProperties": false,
            "properties": {
              "machine_type": {
                "type": "string"
              },
              "timeout": {
                "type": "string"
              },
              "disk_size_gb": {
                "type": "integer"
              },
              "cache": {
                "type": "boolean"
              }
            }
          },
          "region": {
            "type": "string",
            "description": "Deployment region (defaults to us-central1)."
          },
          "project_hint": {
            "type": "string",
            "description": "Optional identifier used for grace-period scoping and log grouping."
          }
        },
        "required": [
          "source",
          "runtime_config"
        ]
      },
      "output_schema": null
    },
    {
      "id": "gpuse.list_instances",
      "name": "list_instances",
      "category": "Compute Lifecycle",
      "aliases": [
        "listInstances"
      ],
      "summary": "List compute instances visible to the authenticated session.",
      "description": "Return the GPU compute instances associated with the active session. Grace requests\nreceive the project-scoped instance; authenticated (bearer) sessions receive every\ninstance tied to the account. Supports optional filtering by status (running,\nterminated, etc.) and pagination controls.\n\nOutputs: Returns instances[] with compute_id, status, endpoint, template_id, and\ntotal_cost_spent per instance; total count; has_more pagination flag.\n\nError recovery: If no instances returned, verify authentication with auth_helper\nor provision a new instance with start_compute.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {},
        {
          "status": "running",
          "limit": 25
        },
        {
          "status": "terminated",
          "limit": 10,
          "offset": 0
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "status": {
              "type": "string",
              "description": "Filter by instance status (e.g., running, terminated)."
            },
            "limit": {
              "type": "integer",
              "minimum": 1,
              "maximum": 200,
              "description": "Maximum number of results to return (default 50)."
            },
            "offset": {
              "type": "integer",
              "minimum": 0,
              "description": "Skip this many results before returning instances."
            }
          }
        }
      },
      "output": {
        "highlights": [
          "instances includes compute_id, status, endpoint, template, project, and cost metadata.",
          "total reports how many instances matched the query.",
          "has_more indicates whether additional pages are available."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "After authenticating to discover which computes are currently running.",
          "Before calling get_instance_status or stop_compute when compute IDs are unknown."
        ],
        "follow_ups": [
          "Call get_instance_status with compute_id for readiness and endpoints.",
          "Run stop_compute to terminate unwanted instances.",
          "Use payment_status or get_checkout_url for billing follow-ups."
        ]
      },
      "examples": [
        {
          "name": "List running instances",
          "arguments": {
            "status": "running",
            "limit": 25
          },
          "response": {
            "message": "Returned <visible_instances> instance(s) (total <total_instances>).\nPreview:\n<compute_id> — <status_label> — <endpoint_or_placeholder>\n…and <additional_count> more in this page.\n\n(Example response; counts, IDs, and URLs vary.)\n",
            "structured": {
              "total": "<total_instances>",
              "limit": "<limit>",
              "offset": "<offset>",
              "has_more": "<has_more_flag>",
              "instances": [
                {
                  "compute_id": "<compute_id>",
                  "status": "<status>",
                  "endpoint": "<endpoint_or_null>",
                  "template_id": "<template_id>",
                  "started_at": "<started_at>"
                }
              ]
            }
          }
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "status": {
            "type": "string",
            "description": "Filter by instance status (e.g., running, terminated)."
          },
          "limit": {
            "type": "integer",
            "minimum": 1,
            "maximum": 200,
            "description": "Maximum number of results to return (default 50)."
          },
          "offset": {
            "type": "integer",
            "minimum": 0,
            "description": "Skip this many results before returning instances."
          }
        }
      },
      "output_schema": null
    },
    {
      "id": "gpuse.stop_compute",
      "name": "stop_compute",
      "category": "Compute Lifecycle",
      "aliases": [
        "stopCompute"
      ],
      "summary": "Terminate a GPUse compute instance and capture the shutdown details.",
      "description": "Stop a running GPUse GPU compute instance. Provide an optional reason to help\nhumans understand why the GPU was shut down. The tool returns the backend's\nusage summary along with a recent slice of logs so agents can verify the\nshutdown sequence. Use this when the workload is complete or to clean up\nfailed deployments.\n\nOutputs: Returns final_status, usage_summary with runtime/billing details,\nand logs.tail with recent shutdown log entries.\n\nError recovery: If NOT_FOUND, the instance may have already terminated or\nthe compute_id is incorrect—confirm with list_instances or provision a new one.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "compute_id": "comp_abc123"
        },
        {
          "compute_id": "comp_xyz789",
          "reason": "User completed the workload"
        },
        {
          "compute_id": "comp_def456",
          "reason": "Cleaning up failed deployment"
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "compute_id": {
              "type": "string",
              "description": "Identifier returned by start_compute."
            },
            "reason": {
              "type": "string",
              "description": "Optional short explanation for the termination."
            }
          },
          "required": [
            "compute_id"
          ]
        }
      },
      "output": {
        "highlights": [
          "final_status shows the terminal state reported by the API.",
          "usage_summary includes runtime, start/end timestamps, and billing reference.",
          "logs.tail contains the most recent log entries captured during shutdown."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "The user is finished with the GPU and wants to stop billing.",
          "Provisioning failed and you want to clean up the instance."
        ],
        "workflow": [
          "Call stop_compute with the compute_id (and reason if relevant).",
          "Review the returned usage_summary to confirm billing details.",
          "Share any important shutdown log lines with the user."
        ],
        "follow_ups": [
          "If the API reports the instance was already terminated, surface that outcome.",
          "If logs are unavailable, point the user to prior get_instance_logs output."
        ]
      },
      "examples": [
        {
          "name": "Stop after completion",
          "arguments": {
            "compute_id": "comp_12345",
            "reason": "User completed the workload"
          },
          "response": {
            "message": "GPU <compute_id> termination requested.\nFinal status: <final_status>.\nUsage summary runtime: <runtime_seconds> seconds.\nRetrieved <log_entry_count> shutdown log entries.\n\n(Example response; runtime, status, and logs vary.)\n",
            "structured": {
              "compute_id": "<compute_id>",
              "final_status": "<final_status>",
              "usage_summary": {
                "runtime_seconds": "<runtime_seconds>",
                "billed_seconds": "<billed_seconds>",
                "grace_applied": "<grace_applied>",
                "started_at": "<started_at>",
                "ended_at": "<ended_at>"
              },
              "billing_reference": "<billing_reference>",
              "message": "<api_message>",
              "checkout_url": "<checkout_url_or_null>",
              "logs": {
                "tail_applied": 100,
                "entries": [
                  "[<timestamp>] <severity>: <message>"
                ],
                "logs_url": "<logs_url>",
                "streaming_url": "<streaming_url>"
              }
            }
          }
        }
      ],
      "errors": [
        {
          "code": "NOT_FOUND",
          "message": "Compute instance not found or has already terminated.",
          "agent_actions": [
            "Confirm the compute_id or start a new deployment."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "compute_id": {
            "type": "string",
            "description": "Identifier returned by start_compute."
          },
          "reason": {
            "type": "string",
            "description": "Optional short explanation for the termination."
          }
        },
        "required": [
          "compute_id"
        ]
      },
      "output_schema": null
    },
    {
      "id": "gpuse.get_instance_status",
      "name": "get_instance_status",
      "category": "Instance Monitoring",
      "aliases": [
        "getInstanceStatus"
      ],
      "summary": "Check readiness and endpoint details for a compute instance.",
      "description": "Return the latest status, endpoint URL, and monitoring links for a GPU compute\ninstance. Ideal for polling during provisioning or validating that the GPU is\nready before sharing with the user. Call repeatedly until endpoint_url is\nnon-null (cold start may take 2-5 minutes for model downloads).\n\nOutputs: Returns status (provisioning/running/failed), endpoint_url once ready,\nalternate_endpoints for additional hostnames, and template_id.\n\nError recovery: If NOT_FOUND, confirm compute_id with list_instances or provision\na new instance. If status is \"failed\", call get_instance_logs for error details.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "compute_id": "comp_abc123"
        },
        {
          "compute_id": "comp_xyz789"
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "compute_id": {
              "type": "string",
              "description": "Identifier returned by start_compute."
            }
          },
          "required": [
            "compute_id"
          ]
        }
      },
      "output": {
        "highlights": [
          "status tells you whether the instance is provisioning, running, or failed.",
          "endpoint_url appears once ready—surface it (and any alternate_endpoints) to the user.",
          "status_url/logs_url live in structured content; use helper tools instead of raw REST URLs."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "You need to know when the endpoint is live.",
          "Checkout succeeded and you want to confirm the paid instance is running."
        ],
        "workflow": [
          "Call get_instance_status with compute_id.",
          "If endpoint_url is null, wait briefly and retry.",
          "If status is failed, call get_instance_logs and report the failure."
        ],
        "follow_ups": [
          "Use get_instance_logs to retrieve build/runtime output.",
          "Share endpoint_url with the user once it's non-null.",
          "Call describe_template_endpoints for the active template if you need detailed API docs."
        ]
      },
      "examples": [
        {
          "name": "Check if endpoint is live",
          "arguments": {
            "compute_id": "comp_12345"
          },
          "response": {
            "message": "Status: <status>\nEndpoint ready: <endpoint_url>\nAlternate endpoints: <alternate_endpoints>\nUse get_instance_logs for streaming build/runtime output.\nCall get_instance_status again anytime for fresh status/endpoint details.\nNeed the full manifest entry? Run describe_template_endpoints with template_id=\"<template_id>\".\n\n(Example response; status, URLs, and timestamps vary.)\n",
            "structured": {
              "compute_id": "<compute_id>",
              "status": "<status>",
              "endpoint_url": "<endpoint_url>",
              "alternate_endpoints": [
                "<alternate_endpoint>"
              ],
              "updated_at": "<updated_at>",
              "provider_service_id": "<provider_service_id>",
              "template_id": "<template_id>",
              "tools": [
                {
                  "tool": "get_instance_logs",
                  "description": "View build and runtime logs for debugging or monitoring.",
                  "args": {
                    "compute_id": "<compute_id>"
                  }
                },
                {
                  "tool": "describe_template_endpoints",
                  "description": "Get full API documentation and usage examples.",
                  "args": {
                    "template_id": "<template_id>"
                  }
                },
                {
                  "tool": "stop_compute",
                  "description": "Terminate this instance when work is complete.",
                  "args": {
                    "compute_id": "<compute_id>"
                  }
                }
              ],
              "_debug": {
                "status_url": "<status_url>",
                "logs_url": "<logs_url>"
              }
            }
          }
        }
      ],
      "errors": [
        {
          "code": "NOT_FOUND",
          "message": "Compute instance not found or has already terminated.",
          "agent_actions": [
            "Confirm the compute_id or provision a new instance."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "compute_id": {
            "type": "string",
            "description": "Identifier returned by start_compute."
          }
        },
        "required": [
          "compute_id"
        ]
      },
      "output_schema": null
    },
    {
      "id": "gpuse.get_instance_logs",
      "name": "get_instance_logs",
      "category": "Instance Monitoring",
      "aliases": [
        "getInstanceLogs"
      ],
      "summary": "Retrieve verbose raw build and runtime logs for autonomous debugging.",
      "description": "Return verbose raw logs (build and runtime) so autonomous agents can debug GPU\ncompute instances independently. GPUse does NOT interpret or summarize these\nlogs—you receive the complete unfiltered output exactly as produced, which is\ncritical for agents to diagnose issues accurately. Logs include: complete\nDockerfile instruction output (FROM, RUN, COPY, etc.), dependency installation\nlogs (apt, pip, npm with version resolution), compilation/build errors with\nfull stack traces, runtime stdout/stderr streams, and error context with line\nnumbers and failure points.\n\nWorks with EITHER compute_id (for deployed instances) OR build_id (for in-progress\nbuilds from start_custom). Use build_id for custom container build logs; use\ncompute_id for runtime logs once deployed.\n\nOutputs: Returns verbose raw logs[] array with timestamp, severity, message;\nlogs_url for streaming access; count of entries retrieved. No summarization or\ninterpretation is applied.\n\nError recovery: If NOT_FOUND, confirm compute_id/build_id or provision a new\ninstance. If logs are empty, the service may still be starting—retry after a\nbrief delay.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "compute_id": "comp_abc123"
        },
        {
          "compute_id": "comp_xyz789",
          "tail": 50
        },
        {
          "build_id": "3ea4a846-8365-42ea-b0d9-eda73cf00920",
          "tail": 100
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "compute_id": {
              "type": "string",
              "description": "Identifier returned by start_compute (for deployed instances). Provide either compute_id OR build_id, not both."
            },
            "build_id": {
              "type": "string",
              "description": "Build identifier returned by start_custom (for build logs during custom builds). Provide either compute_id OR build_id, not both."
            },
            "tail": {
              "type": "integer",
              "minimum": 1,
              "maximum": 500,
              "description": "Optional number of recent log entries to retrieve (default 100)."
            }
          }
        }
      },
      "output": {
        "highlights": [
          "logs_url streams build/runtime output immediately.",
          "status helps contextualise whether logs are still pending."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "You need live logs to debug provisioning.",
          "A human asked for log access."
        ],
        "workflow": [
          "Call get_instance_logs with compute_id.",
          "If logs_url is null, wait and retry until the service starts streaming.",
          "Share the link so the user can tail logs directly if desired."
        ],
        "follow_ups": [
          "Combine with get_instance_status to cross-check state transitions."
        ]
      },
      "examples": [
        {
          "name": "Get build logs from start_custom",
          "arguments": {
            "build_id": "3ea4a846-8365-42ea-b0d9-eda73cf00920",
            "tail": 100
          },
          "response": {
            "message": "Logs URL: <logs_url>?tail=<tail>\nStreaming URL: <streaming_url>\nRetrieved <entry_count> log entries (tail=<tail>).\n[<timestamp>] <severity>: <message>\n\n(Example response; log contents and counts vary.)\n",
            "structured": {
              "log_type": "build",
              "build_id": "<build_id>",
              "logs_url": "<logs_url>",
              "streaming_url": "<streaming_url>",
              "count": "<entry_count>",
              "tail_applied": "<tail>",
              "logs": [
                {
                  "timestamp": "<timestamp>",
                  "severity": "<severity>",
                  "message": "<message>"
                }
              ]
            }
          }
        },
        {
          "name": "Get runtime logs from deployed instance",
          "arguments": {
            "compute_id": "comp_12345"
          },
          "response": {
            "message": "Logs URL: <logs_url>?tail=<tail>\nRetrieved <entry_count> log entries (tail=<tail>).\n[<timestamp>] <severity>: <message>\n",
            "structured": {
              "log_type": "runtime",
              "compute_id": "<compute_id>",
              "logs_url": "<logs_url>",
              "count": "<entry_count>",
              "logs": [
                {
                  "timestamp": "<timestamp>",
                  "severity": "<severity>",
                  "message": "<message>"
                }
              ]
            }
          }
        }
      ],
      "errors": [
        {
          "code": "NOT_FOUND",
          "message": "Compute instance not found or has already terminated.",
          "agent_actions": [
            "Confirm the compute_id or start a new deployment."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "compute_id": {
            "type": "string",
            "description": "Identifier returned by start_compute (for deployed instances). Provide either compute_id OR build_id, not both."
          },
          "build_id": {
            "type": "string",
            "description": "Build identifier returned by start_custom (for build logs during custom builds). Provide either compute_id OR build_id, not both."
          },
          "tail": {
            "type": "integer",
            "minimum": 1,
            "maximum": 500,
            "description": "Optional number of recent log entries to retrieve (default 100)."
          }
        }
      },
      "output_schema": null
    },
    {
      "id": "gpuse.get_checkout_url",
      "name": "get_checkout_url",
      "category": "Payment/Billing",
      "aliases": [
        "getCheckoutUrl"
      ],
      "summary": "Get the full untruncated Stripe checkout URL for a GPU compute instance.",
      "description": "Retrieve the full untruncated Stripe checkout URL for a GPU compute instance.\nUse this when:\n- start_compute or start_custom returned a truncated checkout_url\n- The user reports \"this link doesn't work\" or \"this link is broken\" (common\n  symptom of URL truncation in chat interfaces)\n- You need to resend the payment link to a human collaborator\n\nCompleting this checkout both funds the GPU workload and creates the GPUse\naccount in one 60-second flow. IMPORTANT: Once the user completes checkout, a\nsession token is created and cached. This means future coding sessions will\nautomatically detect the authenticated account—users won't need to re-auth\nevery time they open a new session. This is a significant UX improvement.\n\nIf a user starts a new coding session and is NOT automatically detected as\nauthenticated, use auth_helper to guide them through re-authentication.\n\nOutputs: Returns checkout_url (the full untruncated Stripe payment link),\nbilling state, and grace_remaining_seconds countdown.\n\nError recovery: If NOT_FOUND, confirm compute_id with list_instances or provision\na new instance. If user reports broken link, call this tool to get the full URL.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "compute_id": "comp_abc123"
        },
        {
          "compute_id": "comp_xyz789"
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "compute_id": {
              "type": "string",
              "description": "Identifier returned by start_compute."
            }
          },
          "required": [
            "compute_id"
          ]
        }
      },
      "output": {
        "highlights": [
          "checkout_url is the only link you need to surface to the human.",
          "grace_remaining_seconds reminds you how long the link stays valid."
        ],
        "schema_notes": [
          "Direct passthrough of GET /api/v1/compute/:compute_id checkout fields."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "start_compute returned checkout_url = null.",
          "You need to remind the human to pay before grace expires."
        ],
        "workflow": [
          "Call get_checkout_url with compute_id.",
          "If checkout_url is still null, wait a few seconds and retry.",
          "Share the link immediately once it appears."
        ],
        "follow_ups": [
          "If the link never populates, use get_instance_status for error context."
        ]
      },
      "examples": [
        {
          "name": "Refresh checkout link",
          "arguments": {
            "compute_id": "comp_12345"
          },
          "response": {
            "message": "Checkout URL: <checkout_url>\n\n(Example response; URL and timers vary.)\n",
            "structured": {
              "compute_id": "<compute_id>",
              "checkout_url": "<checkout_url>",
              "billing": "<billing_state>",
              "grace_expires_at": "<grace_expires_at>",
              "grace_remaining_seconds": "<grace_remaining_seconds>"
            }
          }
        }
      ],
      "errors": [
        {
          "code": "NOT_FOUND",
          "message": "Compute instance not found or has already terminated.",
          "agent_actions": [
            "Confirm the compute_id and launch a new instance if needed."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "compute_id": {
            "type": "string",
            "description": "Identifier returned by start_compute."
          }
        },
        "required": [
          "compute_id"
        ]
      },
      "output_schema": null
    },
    {
      "id": "gpuse.payment_status",
      "name": "payment_status",
      "category": "Payment/Billing",
      "aliases": [
        "paymentStatus"
      ],
      "summary": "Return paid vs grace state, account balance, checkout link, and account email.",
      "description": "Inspect the current Stripe checkout session for a GPU compute instance. Use\nthis tool to determine whether the user is still in free/grace mode or fully\npaid, resend the payment link when necessary, and retrieve account information\nafter checkout completes. Accepts a compute_id (preferred), project_id (if\ncached), or checkout_session_id.\n\nIMPORTANT: This tool returns the account email address of the authenticated\nGPUse account. This is valuable because users need their email to log in, top\nup their balance, and authenticate in future sessions via auth_helper.\n\nNOTE: The bearer_token returned by this tool cannot be used directly for\nauthentication. To authenticate (whether creating a new account or logging\ninto an existing one), you MUST use auth_helper. The bearer token is for\ninformational purposes only.\n\nOutputs: Returns payment_status (paid/unpaid/expired), checkout_url for pending\npayments, email (the authenticated account's email address—important for future\nlogins), current_balance, and auth_mode.\n\nError recovery: If NOT_FOUND, checkout session may have expired—provision a new\ninstance for a fresh payment link.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "compute_id": "comp_abc123"
        },
        {
          "project_id": "my-ai-project"
        },
        {
          "checkout_session_id": "cs_live_abc123"
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "compute_id": {
              "type": "string",
              "description": "Identifier returned by start_compute."
            },
            "project_id": {
              "type": "string",
              "description": "Optional project hint to locate a cached checkout session."
            },
            "checkout_session_id": {
              "type": "string",
              "description": "Direct Stripe checkout session identifier if already known."
            }
          }
        }
      },
      "output": {
        "highlights": [
          "payment_status indicates whether the session is paid, unpaid, or expired.",
          "checkout_url provides the link to share with humans when payment is pending.",
          "email returns the authenticated account's email address (important for future logins via auth_helper).",
          "current_balance and balance_available expose account balance when on a paid account.",
          "auth_mode shows 'paid' for authenticated users or 'grace' for grace period.",
          "bearer_token is informational only—cannot be used directly for authentication."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "Grace mode is active and you need to remind the human to pay.",
          "You want to confirm a checkout has completed and retrieve account info."
        ],
        "workflow": [
          "Call payment_status with the compute_id (or checkout_session_id if you already have it).",
          "If payment_status is unpaid, surface checkout_url and human instructions immediately.",
          "If paid, the session is already authenticated—proceed with GPU operations. Use auth_helper if re-authentication is needed in a new session."
        ],
        "follow_ups": [
          "If the checkout session expired, provision a new instance to generate a fresh link.",
          "Note the email address returned—users need this for future authentication via auth_helper."
        ]
      },
      "examples": [
        {
          "name": "Check if payment cleared (paid mode)",
          "arguments": {
            "compute_id": "comp_12345"
          },
          "response": {
            "message": "Checkout session <checkout_session_id> status: <payment_status>.\nLinked GPUse account: <account_email>.\n<payment_note>\nAccount balance: $10.50.\n\n(Example response for authenticated account; payment state and amounts vary.)\n",
            "structured": {
              "compute_id": "<compute_id>",
              "project_id": "<project_id>",
              "checkout_session_id": "<checkout_session_id>",
              "checkout_url": "<checkout_url_or_null>",
              "billing_state": "<billing_state>",
              "grace_remaining_seconds": "<grace_remaining_seconds>",
              "payment_status": "<payment_status>",
              "paid": "<paid_flag>",
              "credits_added": "<credits_added>",
              "bearer_token": "<bearer_token_or_null>",
              "account_id": "<account_id>",
              "email": "<account_email>",
              "auth_mode": "paid",
              "current_balance": 10.5,
              "balance_available": true,
              "currency": "USD",
              "human_instructions": "<human_instructions>",
              "agent_instructions": "<agent_instructions>",
              "account_balance": 10.5,
              "next_steps": [
                "<next_step>"
              ]
            }
          }
        },
        {
          "name": "Check if payment cleared (grace mode)",
          "arguments": {
            "compute_id": "comp_12345"
          },
          "response": {
            "message": "Checkout session <checkout_session_id> status: unpaid.\nPayment pending. Balance information requires authentication.\n\n(Example response for grace period; balance hidden.)\n",
            "structured": {
              "compute_id": "<compute_id>",
              "project_id": "<project_id>",
              "checkout_session_id": "<checkout_session_id>",
              "checkout_url": "<checkout_url>",
              "billing_state": "grace",
              "grace_remaining_seconds": 240,
              "payment_status": "unpaid",
              "paid": false,
              "credits_added": null,
              "bearer_token": null,
              "account_id": null,
              "email": null,
              "auth_mode": "grace",
              "current_balance": null,
              "balance_available": false,
              "currency": null,
              "human_instructions": "<human_instructions>",
              "agent_instructions": "<agent_instructions>",
              "account_balance": null,
              "next_steps": [
                "<next_step>"
              ]
            }
          }
        }
      ],
      "errors": [
        {
          "code": "NOT_FOUND",
          "message": "Checkout session not found or expired.",
          "agent_actions": [
            "Confirm the checkout_session_id or launch a new instance for a fresh session."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "compute_id": {
            "type": "string",
            "description": "Identifier returned by start_compute."
          },
          "project_id": {
            "type": "string",
            "description": "Optional project hint to locate a cached checkout session."
          },
          "checkout_session_id": {
            "type": "string",
            "description": "Direct Stripe checkout session identifier if already known."
          }
        }
      },
      "output_schema": null
    },
    {
      "id": "gpuse.add_account_funds",
      "name": "add_account_funds",
      "category": "Payment/Billing",
      "aliases": [
        "addAccountFunds"
      ],
      "summary": "Generate a Stripe checkout link so a human can add GPUse credits.",
      "description": "Create a one-time Stripe checkout session to add funds to a GPUse wallet for\nGPU compute usage. The tool returns the hosted payment link, session metadata,\nand budgeting estimates (hourly rate and approximate GPU hours purchased). Use\nthis to recover from auto-termination events, low-balance warnings, or whenever\na human wants to top up without touching the dashboard. The wallet balance is\nupdated automatically once payment succeeds.\n\nPRICING NOTES:\n- Default top-up is $10, but minimum is $1\n- To start a GPU instance, you must have at least 1 hour of runtime in your\n  account balance (e.g., if a GPU costs $0.73/hour, you need at least $0.73)\n\nOutputs: Returns checkout_url (Stripe payment link), estimated_gpu_hours based\non amount and hourly rate, and expires_at timestamp.\n\nError recovery: If AMOUNT_REQUIRED, prompt user for amount (default $10, min $1).\nIf STRIPE_ERROR, retry once. For expired links, call this tool again to generate\na new checkout.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "amount": 10
        },
        {
          "amount": 25,
          "email": "user@example.com"
        },
        {
          "amount": 50
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "amount": {
              "type": "number",
              "minimum": 1,
              "maximum": 1000,
              "default": 10,
              "description": "USD amount to add to the wallet. Provide a value between $1.00 and $1000.00; defaults to $10.00 when callers accept the prompt."
            },
            "email": {
              "type": "string",
              "format": "email",
              "description": "Target account email. Optional when a bearer token is cached."
            }
          }
        }
      },
      "output": {
        "highlights": [
          "email_notification shows human-readable confirmation that payment link was sent to user's email.",
          "checkout_url is the hosted Stripe link to share with the human.",
          "email_sent boolean indicates whether email notification succeeded.",
          "estimated_gpu_hours shows how much runtime the top-up should cover at the current rate.",
          "expires_at indicates when the checkout link will stop working."
        ],
        "schema_notes": [
          "Mirrors POST /api/v1/account/topup. The backend enforces amount bounds and Stripe configuration.",
          "hourly_rate is the same value returned by GET /api/v1/account/balance."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "User balance hit $0.00 and instances auto-terminated.",
          "Human requests a refill without opening the dashboard.",
          "Preparing a proactive top-up before launching an expensive workload."
        ],
        "workflow": [
          "Confirm the email (if not already cached) and ask the human how much to add (defaults to $10).",
          "Call add_account_funds with the confirmed amount to generate the Stripe checkout link.",
          "Check email_sent field to confirm if payment link was sent to user's email.",
          "Share checkout_url and remind the human to check their email (if sent) or use the URL directly.",
          "Remind the human the link expires quickly.",
          "Monitor payment_status or wait for the Stripe webhook to post the credit."
        ],
        "follow_ups": [
          "If the human misplaces the link, call payment_status or get_checkout_url for the same session.",
          "After payment clears, verify the new balance with GET /api/v1/account/balance before provisioning.",
          "Offer to resend a receipt email or confirmation summary if the human asks.",
          "Encourage enabling auto top-up in the dashboard to avoid future interruptions."
        ]
      },
      "examples": [
        {
          "name": "Recover after auto-termination",
          "arguments": {
            "amount": 20
          },
          "response": {
            "message": "Payment link sent via email to <email>\n\nAmount: $<amount> <currency>\nCurrent hourly rate: $<hourly_rate>\nApproximate GPU hours this covers: <estimated_gpu_hours>\nCheckout link expires at <expires_at>\n\n🔗 CHECKOUT LINK (copy full URL):\n<checkout_url>\n\n(Example response; email, amounts, and link vary.)\n",
            "structured": {
              "email": "<email>",
              "email_notification": "Payment link sent via email to <email>",
              "amount": "<amount>",
              "currency": "<currency>",
              "checkout_url": "<checkout_url>",
              "checkout_url_full": "<checkout_url>",
              "session_id": "<session_id>",
              "estimated_gpu_hours": "<estimated_gpu_hours>",
              "hourly_rate": "<hourly_rate>",
              "expires_at": "<expires_at>",
              "message": "<api_message>",
              "auth_mode": "<auth_mode>",
              "email_sent": true,
              "next_steps": [
                "Check your email for the payment link (may take a few moments to arrive)",
                "Open the checkout_url in a browser to complete payment",
                "Leave the dashboard open; the balance updates automatically once Stripe confirms payment"
              ],
              "suggested_tools": [
                {
                  "tool": "payment_status",
                  "description": "Check if payment has been completed"
                },
                {
                  "tool": "start_compute",
                  "description": "Launch a GPU instance once payment is complete"
                }
              ],
              "documentation": {
                "manifest": "https://gpuse.com/.well-known/gpuse-mcp-manifest.json"
              }
            }
          }
        },
        {
          "name": "Top up a specific account",
          "arguments": {
            "amount": 50,
            "email": "user@example.com"
          }
        }
      ],
      "errors": [
        {
          "code": "AMOUNT_REQUIRED",
          "message": "No amount was provided for the top-up.",
          "agent_actions": [
            "Ask the human how much credit they would like to add (default is $10).",
            "Invoke add_account_funds again with the confirmed amount."
          ]
        },
        {
          "code": "INVALID_AMOUNT",
          "message": "Amount is outside the $1.00-$1000.00 range.",
          "agent_actions": [
            "Adjust the amount and retry.",
            "Split large deposits into multiple top-ups if needed."
          ]
        },
        {
          "code": "STRIPE_ERROR",
          "message": "Stripe rejected the checkout request.",
          "agent_actions": [
            "Confirm Stripe credentials are configured on the backend.",
            "Retry once, then escalate to support if the failure persists."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "amount": {
            "type": "number",
            "minimum": 1,
            "maximum": 1000,
            "default": 10,
            "description": "USD amount to add to the wallet. Provide a value between $1.00 and $1000.00; defaults to $10.00 when callers accept the prompt."
          },
          "email": {
            "type": "string",
            "format": "email",
            "description": "Target account email. Optional when a bearer token is cached."
          }
        }
      },
      "output_schema": null
    },
    {
      "id": "gpuse.auth_helper",
      "name": "auth_helper",
      "category": "Authentication",
      "aliases": [
        "authHelper"
      ],
      "summary": "The ONLY way to authenticate into GPUse—required to create or manage GPU instances.",
      "description": "THIS IS THE ONLY WAY TO AUTHENTICATE INTO GPUSE. There is no alternative.\nEven if a user copies a Bearer token from their dashboard and provides it to\nan agent, that token CANNOT be used to authenticate or start new instances.\nYou MUST go through auth_helper to authenticate.\n\nOnce authenticated via auth_helper, the agent can fully manage all GPU instances\n(start, stop, monitor, etc.) for the duration of the session.\n\nHow it works: Provide the user's email to dispatch a six-digit verification code,\nthen call again with the email and code to complete verification. Optional resend\nsupport triggers a new code without leaving the flow. On success, the MCP server\nautomatically caches the session token for subsequent GPU compute requests.\n\nUse this tool when:\n- The 5-minute grace period has been exhausted (grace can only be used ONCE per user)\n- User wants to create a new GPUse account\n- User needs to log into an existing account in a new coding session\n- Session was not automatically detected as authenticated\n\nOutputs: When sending code—returns status \"awaiting_code\" and next_steps. When\nverifying—returns verified boolean and account balance.\n\nError recovery: If INVALID_CODE, request a fresh code with resend=true and retry.\nIf email not found, the user needs to complete the Stripe checkout first (via\nget_checkout_url) to create their account.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "email": "user@example.com"
        },
        {
          "email": "user@example.com",
          "code": "123456"
        },
        {
          "email": "user@example.com",
          "resend": true
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "email": {
              "type": "string",
              "format": "email",
              "description": "Existing GPUse account email address."
            },
            "code": {
              "type": "string",
              "minLength": 6,
              "maxLength": 6,
              "pattern": "^[0-9]{6}$",
              "description": "6-digit verification code supplied by the human."
            },
            "resend": {
              "type": "boolean",
              "description": "Set true to send a fresh code even if one was already requested."
            }
          },
          "required": [
            "email"
          ]
        }
      },
      "output": {
        "highlights": [
          "status indicates whether the tool is waiting for the code or completed verification.",
          "next_steps provides ready-to-share guidance for humans and agents.",
          "bearer_token is returned automatically once verification succeeds."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "start_compute reported AUTH_REQUIRED or you exhausted the grace window.",
          "The human wants to link their paid GPUse account."
        ],
        "workflow": [
          "Call auth_helper with email only to send the code.",
          "Wait for the human to read back the 6-digit code.",
          "Call auth_helper again with email + code (resend=true if a new code is needed)."
        ],
        "follow_ups": [
          "Once verified, retry the original action using the cached bearer token.",
          "If verification fails repeatedly, escalate to support or request a new code."
        ]
      },
      "examples": [
        {
          "name": "Complete verification",
          "arguments": {
            "email": "user@example.com",
            "code": "123456"
          },
          "response": {
            "message": "Verification succeeded and bearer token cached for this session.\n\n(Example response; token, balance, and instructions vary.)\n",
            "structured": {
              "email": "<email>",
              "verified": true,
              "balance": "<balance>",
              "bearer_token": "<bearer_token>",
              "instructions": {
                "for_agent": "MCP cached this bearer token; proceed with other tools.",
                "example_usage": "Authorization: Bearer <bearer_token>"
              },
              "next_steps": [
                "MCP cached the bearer token—continue with other tools immediately.",
                "If you need direct API access, include the token as Authorization: Bearer <bearer_token>.",
                "Keep the token secret; rotate it if you suspect exposure."
              ],
              "status": "verified"
            }
          }
        }
      ],
      "errors": [
        {
          "code": "INVALID_CODE",
          "message": "The verification code is incorrect or expired.",
          "agent_actions": [
            "Request a fresh code with resend=true and try again."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "email": {
            "type": "string",
            "format": "email",
            "description": "Existing GPUse account email address."
          },
          "code": {
            "type": "string",
            "minLength": 6,
            "maxLength": 6,
            "pattern": "^[0-9]{6}$",
            "description": "6-digit verification code supplied by the human."
          },
          "resend": {
            "type": "boolean",
            "description": "Set true to send a fresh code even if one was already requested."
          }
        },
        "required": [
          "email"
        ]
      },
      "output_schema": null
    },
    {
      "id": "gpuse.request_account_code",
      "name": "request_account_code",
      "category": "Authentication",
      "aliases": [
        "requestAccountCode"
      ],
      "summary": "Internal sub-step of auth_helper—do not call directly.",
      "description": "IMPORTANT: This tool should NOT be called in isolation. It is automatically\ninvoked as part of the auth_helper flow. Always use auth_helper instead.\n\nThis tool sends the 6-digit verification code to a GPUse account email. It's\nexposed separately for edge cases but normal authentication should go through\nauth_helper which orchestrates the full flow.\n\nNOTE ON GRACE PERIOD: The 5-minute grace period can only be used ONCE per user.\nOnce exhausted, users must:\n1. Complete checkout via get_checkout_url to create their account\n2. Authenticate in future sessions via auth_helper\n\nOutputs: Returns code_sent boolean, expires_in_minutes for code validity, and\ninstructions.next_step with ready-made message for the human.\n\nError recovery: If EMAIL_NOT_FOUND, user needs to complete Stripe checkout first\nto create their account. If RATE_LIMITED, wait the cooldown before retrying.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "email": "user@example.com"
        },
        {
          "email": "developer@company.com"
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "email": {
              "type": "string",
              "format": "email",
              "description": "Email address already registered with GPUse."
            }
          },
          "required": [
            "email"
          ]
        }
      },
      "output": {
        "highlights": [
          "code_sent indicates whether an email was successfully dispatched.",
          "expires_in_minutes communicates validity window for the code.",
          "instructions.next_step gives a ready-made message for the human."
        ],
        "schema_notes": [
          "Matches POST /api/v1/account/request-code."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "start_compute returned AUTH_REQUIRED or GRACE_EXHAUSTED.",
          "User explicitly asked to link their paid account."
        ],
        "workflow": [
          "Collect the user's email and confirm consent to send the code.",
          "Call request_account_code and relay the instructions to the human verbatim.",
          "Await the 6-digit code from the user before calling verify_account_code."
        ],
        "follow_ups": [
          "If rate-limited, inform the user of the cooldown before retrying.",
          "If the email is unknown, ask the user to create an account via the web UI."
        ]
      },
      "examples": [
        {
          "name": "Link paid account",
          "arguments": {
            "email": "user@example.com"
          },
          "response": {
            "message": "Verification code dispatched to <email>.\n\n(Example response; expiry window and instructions vary.)\n",
            "structured": {
              "email": "<email>",
              "code_sent": true,
              "expires_in_minutes": "<expires_in_minutes>",
              "message": "<api_message>",
              "instructions": {
                "for_human": "<human_instruction>",
                "for_agent": "<agent_instruction>"
              },
              "next_steps": [
                "Ask the human for the 6-digit code sent to <email>.",
                "Invoke verify_account_code with email + code once the user shares it."
              ]
            }
          }
        }
      ],
      "errors": [
        {
          "code": "EMAIL_NOT_FOUND",
          "message": "The provided email is not associated with a GPUse account.",
          "agent_actions": [
            "Ask the user to double-check or create an account."
          ]
        },
        {
          "code": "RATE_LIMITED",
          "message": "Too many code requests in a short window.",
          "agent_actions": [
            "Wait the cooldown indicated in the response before retrying."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "email": {
            "type": "string",
            "format": "email",
            "description": "Email address already registered with GPUse."
          }
        },
        "required": [
          "email"
        ]
      },
      "output_schema": null
    },
    {
      "id": "gpuse.verify_account_code",
      "name": "verify_account_code",
      "category": "Authentication",
      "aliases": [
        "verifyAccountCode"
      ],
      "summary": "Internal sub-step of auth_helper—do not call directly.",
      "description": "IMPORTANT: This tool should NOT be called in isolation. It is automatically\ninvoked as part of the auth_helper flow. Always use auth_helper instead.\n\nThis tool completes the GPUse authentication flow by validating the 6-digit\ncode sent to the human. On success, the MCP server caches the session token\nfor the lifetime of the process so future GPU compute calls automatically\nauthenticate.\n\nUse authentication (via auth_helper) when:\n- User wants more than the default 5-minute grace runtime\n- User has EXHAUSTED their one-time 5-minute grace period\n- User needs to log into an existing account in a new session\n\nOutputs: Returns verified boolean, account_id, and balance.\n\nError recovery: If INVALID_CODE, ask human for the latest code and retry up to\n3 times. If multiple failures, use auth_helper with resend=true.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {
          "email": "user@example.com",
          "code": "123456"
        },
        {
          "email": "developer@company.com",
          "code": "987654"
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "email": {
              "type": "string",
              "format": "email",
              "description": "Email that received the verification code."
            },
            "code": {
              "type": "string",
              "minLength": 6,
              "maxLength": 6,
              "pattern": "^[0-9]{6}$",
              "description": "6-digit verification code."
            }
          },
          "required": [
            "email",
            "code"
          ]
        }
      },
      "output": {
        "highlights": [
          "verified true means you can now call start_compute with paid features.",
          "bearer_token is cached automatically; do not persist it anywhere else.",
          "instructions.next_steps reminds you to retry the original action."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "After request_account_code succeeded and the human supplied the code.",
          "User explicitly wants more than the 5-minute grace window or an existing paid account."
        ],
        "workflow": [
          "Call verify_account_code with the email+code.",
          "If verified is true, resume the blocked action immediately (usually start_compute).",
          "If verification fails, prompt the human for a new code and retry up to 3 times, or fall back to grace if immediate testing is more important."
        ],
        "follow_ups": [
          "On repeated failure, call request_account_code again to resend the code.",
          "If the user lost access to the email, escalate to support."
        ]
      },
      "examples": [
        {
          "name": "Confirm verification",
          "arguments": {
            "email": "user@example.com",
            "code": "123456"
          },
          "response": {
            "message": "Verification succeeded and bearer token cached.\n\n(Example response; account, token, and next steps vary.)\n",
            "structured": {
              "email": "<email>",
              "verified": true,
              "message": "<api_message>",
              "instructions": {
                "for_agent": "MCP cached this bearer token; proceed with other tools.",
                "example_usage": "Authorization: Bearer <bearer_token>"
              },
              "account_id": "<account_id>",
              "balance": "<balance>",
              "bearer_token": "<bearer_token>",
              "next_steps": [
                "Store this bearer token securely for the current session.",
                "Use it in the Authorization header (Authorization: Bearer <bearer_token>).",
                "Future compute requests will automatically reuse it until you verify a different account."
              ]
            }
          }
        }
      ],
      "errors": [
        {
          "code": "INVALID_CODE",
          "message": "The verification code is incorrect or expired.",
          "agent_actions": [
            "Ask the human to provide the latest code.",
            "If multiple failures, trigger a new request_account_code."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "email": {
            "type": "string",
            "format": "email",
            "description": "Email that received the verification code."
          },
          "code": {
            "type": "string",
            "minLength": 6,
            "maxLength": 6,
            "pattern": "^[0-9]{6}$",
            "description": "6-digit verification code."
          }
        },
        "required": [
          "email",
          "code"
        ]
      },
      "output_schema": null
    },
    {
      "id": "gpuse.update_mcp_server",
      "name": "update_mcp_server",
      "category": "Utility",
      "aliases": [
        "updateMcpServer",
        "version_update",
        "versionUpdate",
        "check_version",
        "checkVersion"
      ],
      "summary": "Check the current MCP server version and get update instructions.",
      "description": "Check the currently installed version of the GPUse MCP server, compare it\nagainst the latest version available on npm, and receive platform-specific\nupdate instructions. This tool is auto-discoverable via MCP protocol, making\nit the natural first stop when an agent needs to update the server. Works\nacross all installation methods (CLI/IDE stdio transport and HTTP deployments).\n\nOutputs: Returns current_version, latest_version, needs_update boolean, and\nupdate_instructions with platform-specific commands (CLI and HTTP).\n\nError recovery: If NPM_REGISTRY_ERROR, report current version and suggest\nretrying later. If VERSION_READ_ERROR, verify the MCP server installation.",
      "defer_loading": false,
      "allowed_callers": [
        "code_execution_20250825"
      ],
      "strict": true,
      "input_examples": [
        {},
        {
          "installation_method": "cli"
        },
        {
          "installation_method": "http"
        }
      ],
      "input": {
        "schema": {
          "type": "object",
          "additionalProperties": false,
          "properties": {
            "installation_method": {
              "type": "string",
              "enum": [
                "cli",
                "http"
              ],
              "description": "Optional hint about installation type. 'cli' for stdio-based installations (Claude Code, Gemini, Cursor, etc.), 'http' for server deployments."
            }
          }
        }
      },
      "output": {
        "highlights": [
          "current_version shows the installed version of the MCP server.",
          "latest_version shows the newest version available on npm registry.",
          "needs_update is true when an update is available.",
          "update_instructions provides platform-specific commands for updating."
        ],
        "schema_notes": [
          "Leverages existing version-check.js logic used during configure command.",
          "CLI installations all use the same update command regardless of client."
        ]
      },
      "agent_guidance": {
        "when_to_use": [
          "User asks to update or upgrade the GPUse MCP server.",
          "Agent wants to verify current version before troubleshooting.",
          "Checking if new features are available in latest release."
        ],
        "workflow": [
          "Call update_mcp_server to get current and latest versions.",
          "If needs_update is true, share the appropriate update_instructions with the user.",
          "Remind user to restart their MCP client after updating (for CLI installations)."
        ],
        "follow_ups": [
          "After update, call update_mcp_server again to confirm the new version.",
          "For HTTP deployments, verify health endpoint after redeployment."
        ]
      },
      "examples": [
        {
          "name": "Check version when update available",
          "arguments": {},
          "response": {
            "message": "Current GPUse MCP server version: 0.3.21\nLatest available version: 0.3.25\nUpdate available!\n\nCLI Installation (Claude Code, Cursor, Windsurf, Codex, Gemini, Claude Desktop):\nRun: npx -y gpuse-mcp-server@latest configure --force\nThen restart your MCP client to use the new version.\n\nHTTP Deployment:\nSee deployment-specific instructions in the structured response.\n",
            "structured": {
              "current_version": "0.3.21",
              "latest_version": "0.3.25",
              "needs_update": true,
              "update_instructions": {
                "cli": {
                  "command": "npx -y gpuse-mcp-server@latest configure --force",
                  "note": "After updating, restart your MCP client (Claude Desktop, Gemini, Cursor, etc.)"
                },
                "http": {
                  "cloud_run": "gcloud run deploy gpuse-mcp-server --image gcr.io/PROJECT/gpuse-mcp-server:latest",
                  "railway": "railway up (or git push to connected repository)",
                  "docker": "Pull latest image and restart container"
                }
              }
            }
          }
        },
        {
          "name": "Check version when up to date",
          "arguments": {},
          "response": {
            "message": "Current GPUse MCP server version: 0.3.25\nLatest available version: 0.3.25\nYou're running the latest version!\n",
            "structured": {
              "current_version": "0.3.25",
              "latest_version": "0.3.25",
              "needs_update": false
            }
          }
        }
      ],
      "errors": [
        {
          "code": "NPM_REGISTRY_ERROR",
          "message": "Failed to fetch latest version from npm registry.",
          "agent_actions": [
            "Report current version but explain that update check failed.",
            "Suggest checking network connectivity or trying again later."
          ]
        },
        {
          "code": "VERSION_READ_ERROR",
          "message": "Failed to read current version from package.json.",
          "agent_actions": [
            "Report the error and suggest verifying the installation."
          ]
        }
      ],
      "input_schema": {
        "type": "object",
        "additionalProperties": false,
        "properties": {
          "installation_method": {
            "type": "string",
            "enum": [
              "cli",
              "http"
            ],
            "description": "Optional hint about installation type. 'cli' for stdio-based installations (Claude Code, Gemini, Cursor, etc.), 'http' for server deployments."
          }
        }
      },
      "output_schema": null
    }
  ],
  "templates": {
    "ollama-gemma-2b": {
      "display_name": "Ollama Gemma 2B",
      "docs_path": "docs/templates/ollama-gemma-2b.md",
      "instance_endpoint_auth_required": false,
      "cold_start": {
        "expected": true,
        "estimated_seconds": 90,
        "reason": "Ollama models download to GPU memory on first boot",
        "readiness_check": {
          "method": "GET",
          "path": "/api/tags",
          "ready_when": "returns non-empty model list",
          "not_ready_response": {
            "models": []
          },
          "ready_response_example": {
            "models": [
              {
                "name": "gemma:2b",
                "size": 1700000000
              }
            ]
          }
        }
      },
      "marketing": {
        "summary": "Grace-friendly chat and coding copilot",
        "headline": "Grace-friendly chat and coding copilot",
        "differentiators": [
          "Boots in under a minute so agents can finish tasks inside the 5-minute grace window",
          "Ships with chat, completion, embeddings, and OpenAI-compatible routes",
          "Optimized footprint keeps experiments cheap while agents iterate"
        ],
        "highlights": [
          "Boots in under a minute so agents can finish tasks inside the 5-minute grace window",
          "Ships with chat, completion, embeddings, and OpenAI-compatible routes",
          "Optimized footprint keeps experiments cheap while agents iterate"
        ],
        "ideal_use_cases": [
          "Tier-1 customer support assistants that resolve FAQs and triage tickets",
          "Inline coding copilots generating helper functions, unit tests, and bug explanations",
          "Marketing and product teams drafting copy, launch notes, and release summaries",
          "Document summarization for meeting notes, research briefs, and policy updates",
          "Semantic search embeddings powering RAG knowledge bases",
          "Internal automation bots responding to routine Slack or email requests"
        ],
        "use_cases": [
          "Tier-1 customer support assistants that resolve FAQs and triage tickets",
          "Inline coding copilots generating helper functions, unit tests, and bug explanations",
          "Marketing and product teams drafting copy, launch notes, and release summaries",
          "Document summarization for meeting notes, research briefs, and policy updates",
          "Semantic search embeddings powering RAG knowledge bases",
          "Internal automation bots responding to routine Slack or email requests"
        ]
      },
      "endpoints": [
        {
          "label": "Single prompt completion",
          "method": "POST",
          "path": "/api/generate",
          "summary": "Generate rich text, code, or multilingual summaries in one shot—perfect for prototypes, marketing copy, or lightweight copilots.",
          "request_example": {
            "model": "gemma:2b",
            "prompt": "Write a Python function that prints hello world.",
            "stream": false
          }
        },
        {
          "label": "Chat conversation",
          "method": "POST",
          "path": "/api/chat",
          "summary": "Hold multi-turn conversations that remember prior turns—ideal for customer support, tutoring flows, or brainstorming sessions.",
          "request_example": {
            "model": "gemma:2b",
            "messages": [
              {
                "role": "user",
                "content": "Explain GPUse in one sentence."
              }
            ],
            "stream": false
          }
        },
        {
          "label": "Generate embeddings",
          "method": "POST",
          "path": "/api/embed",
          "summary": "Produce vector embeddings for semantic search, retrieval-augmented generation, recommendation pipelines, or clustering related content.",
          "request_example": {
            "model": "gemma:2b",
            "input": [
              "fast llm provisioning",
              "gpu orchestration"
            ]
          }
        },
        {
          "label": "Model information",
          "method": "POST",
          "path": "/api/show",
          "summary": "Inspect quantization, parent weights, and other metadata so agents can reason about capabilities before launching workloads.",
          "request_example": {
            "model": "gemma:2b"
          }
        },
        {
          "label": "List loaded models",
          "method": "GET",
          "path": "/api/tags",
          "summary": "Health check and enumerate every model currently cached inside the instance—handy when juggling multiple variants."
        },
        {
          "label": "OpenAI-compatible chat completions",
          "method": "POST",
          "path": "/v1/chat/completions",
          "summary": "Drop-in replacement for OpenAI’s chat API so existing SDKs, automations, and agents work without code changes.",
          "request_example": {
            "model": "gemma:2b",
            "messages": [
              {
                "role": "user",
                "content": "Hello!"
              }
            ]
          }
        }
      ]
    },
    "ollama-gemma3-4b": {
      "display_name": "Ollama Gemma 3 4B",
      "docs_path": "docs/templates/ollama-gemma3-4b.md",
      "instance_endpoint_auth_required": false,
      "cold_start": {
        "expected": true,
        "estimated_seconds": 100,
        "reason": "Ollama models download to GPU memory on first boot",
        "readiness_check": {
          "method": "GET",
          "path": "/api/tags",
          "ready_when": "returns non-empty model list",
          "not_ready_response": {
            "models": []
          },
          "ready_response_example": {
            "models": [
              {
                "name": "gemma3:4b",
                "size": 3000000000
              }
            ]
          }
        }
      },
      "marketing": {
        "summary": "Multimodal powerhouse with 128K context",
        "headline": "Multimodal powerhouse with 128K context",
        "differentiators": [
          "Understands charts, screenshots, and diagrams alongside long-form text",
          "128K tokens keep entire projects, transcripts, or chat histories in context",
          "Balances quality and speed for production multimodal agents"
        ],
        "highlights": [
          "Understands charts, screenshots, and diagrams alongside long-form text",
          "128K tokens keep entire projects, transcripts, or chat histories in context",
          "Balances quality and speed for production multimodal agents"
        ],
        "ideal_use_cases": [
          "Visual QA copilots decoding dashboards, charts, and UI screenshots",
          "Legal and compliance review across contracts plus embedded exhibits",
          "Enterprise knowledge agents stitching together docs, slides, and images",
          "Accessibility tooling narrating product screenshots for testers",
          "STEM tutoring apps that walk through diagrams and handwritten steps",
          "Operations teams generating reports that mix text observations with imagery"
        ],
        "use_cases": [
          "Visual QA copilots decoding dashboards, charts, and UI screenshots",
          "Legal and compliance review across contracts plus embedded exhibits",
          "Enterprise knowledge agents stitching together docs, slides, and images",
          "Accessibility tooling narrating product screenshots for testers",
          "STEM tutoring apps that walk through diagrams and handwritten steps",
          "Operations teams generating reports that mix text observations with imagery"
        ]
      },
      "endpoints": [
        {
          "label": "Single prompt completion",
          "method": "POST",
          "path": "/api/generate",
          "summary": "Generate copy, code, analytics, or multimodal descriptions with Gemma 3 4B—ideal for teams that need vision-aware reasoning in a single call.",
          "request_example": {
            "model": "gemma3:4b",
            "prompt": "Draft a release note for GPUse Stage C.",
            "stream": false
          }
        },
        {
          "label": "Streaming generate",
          "method": "POST",
          "path": "/api/generate",
          "summary": "Stream tokens in real time for live dashboards, pair-programming assistants, or rapid user feedback loops.",
          "request_example": {
            "model": "gemma3:4b",
            "prompt": "List three benefits of GPUse.",
            "stream": true
          }
        },
        {
          "label": "Chat conversation",
          "method": "POST",
          "path": "/api/chat",
          "summary": "Multi-turn chat with optional images—ideal for assistant-style interactions and multimodal prompts.",
          "request_example": {
            "model": "gemma3:4b",
            "messages": [
              {
                "role": "system",
                "content": "You are a helpful assistant."
              },
              {
                "role": "user",
                "content": "Summarize the GPUse platform."
              }
            ]
          }
        },
        {
          "label": "Generate embeddings",
          "method": "POST",
          "path": "/api/embeddings",
          "summary": "Create high-quality embeddings for knowledge bases, rerankers, and semantic routing.",
          "request_example": {
            "model": "gemma3:4b",
            "prompt": "Agents need reliable GPU infrastructure."
          }
        },
        {
          "label": "Model information",
          "method": "GET",
          "path": "/api/tags",
          "summary": "List every Gemma build already pulled to this instance (acts as a quick health/ready check)."
        },
        {
          "label": "Show model",
          "method": "POST",
          "path": "/api/show",
          "summary": "Inspect model metadata, context length, quantization, and load status.",
          "request_example": {
            "name": "gemma3:4b"
          }
        },
        {
          "label": "OpenAI-compatible chat",
          "method": "POST",
          "path": "/v1/chat/completions",
          "summary": "Trigger Gemma 3 through the OpenAI-compatible API—bring-your-own OpenAI tooling still works.",
          "request_example": {
            "model": "gemma3:4b",
            "messages": [
              {
                "role": "user",
                "content": "Explain the GPUse grace-period workflow."
              }
            ]
          }
        }
      ]
    },
    "ollama-gemma-7b": {
      "display_name": "Ollama Gemma 7B",
      "docs_path": "docs/templates/ollama-gemma-7b.md",
      "instance_endpoint_auth_required": false,
      "cold_start": {
        "expected": true,
        "estimated_seconds": 120,
        "reason": "Ollama models download to GPU memory on first boot",
        "readiness_check": {
          "method": "GET",
          "path": "/api/tags",
          "ready_when": "returns non-empty model list",
          "not_ready_response": {
            "models": []
          },
          "ready_response_example": {
            "models": [
              {
                "name": "gemma:7b",
                "size": 5000000000
              }
            ]
          }
        }
      },
      "marketing": {
        "summary": "Premium reasoning for global audiences",
        "headline": "Premium reasoning for global audiences",
        "differentiators": [
          "Google DeepMind quality without the footprint of mega models",
          "Excels at academic, technical, and research-centric content",
          "Handles 100+ languages for worldwide product launches"
        ],
        "highlights": [
          "Google DeepMind quality without the footprint of mega models",
          "Excels at academic, technical, and research-centric content",
          "Handles 100+ languages for worldwide product launches"
        ],
        "ideal_use_cases": [
          "Research assistants summarizing papers, grants, and literature reviews",
          "Analyst copilots drafting risk reports, financial briefs, and strategy memos",
          "Large-scale content programs producing blogs, whitepapers, and localization",
          "Technical documentation, API references, and architecture narrative",
          "Customer success bots handling complex troubleshooting in any language",
          "Executive briefing generators compiling data into polished narrative"
        ],
        "use_cases": [
          "Research assistants summarizing papers, grants, and literature reviews",
          "Analyst copilots drafting risk reports, financial briefs, and strategy memos",
          "Large-scale content programs producing blogs, whitepapers, and localization",
          "Technical documentation, API references, and architecture narrative",
          "Customer success bots handling complex troubleshooting in any language",
          "Executive briefing generators compiling data into polished narrative"
        ]
      },
      "endpoints": [
        {
          "label": "Single prompt completion",
          "method": "POST",
          "path": "/api/generate",
          "summary": "Draft in-depth articles, multilingual content, or analytical reports in one call—great for research assistants and marketing teams.",
          "request_example": {
            "model": "gemma:7b",
            "prompt": "Explain quantum computing in simple terms",
            "stream": false
          }
        },
        {
          "label": "Chat conversation",
          "method": "POST",
          "path": "/api/chat",
          "summary": "Run high-quality conversational agents with shared history—ideal for customer support, tutoring, or multilingual chatbots.",
          "request_example": {
            "model": "gemma:7b",
            "messages": [
              {
                "role": "system",
                "content": "You are a helpful assistant"
              },
              {
                "role": "user",
                "content": "What are the applications of AI?"
              }
            ],
            "stream": false
          }
        },
        {
          "label": "Generate embeddings",
          "method": "POST",
          "path": "/api/embed",
          "summary": "Produce dense embeddings for semantic search, cross-lingual retrieval, or clustering knowledge across large document sets.",
          "request_example": {
            "model": "gemma:7b",
            "prompt": "Advanced machine learning techniques"
          }
        },
        {
          "label": "Model information",
          "method": "POST",
          "path": "/api/show",
          "summary": "Inspect metadata, quantization, and parent weights—helpful before applying adapters or custom training.",
          "request_example": {
            "name": "gemma:7b"
          }
        },
        {
          "label": "List models",
          "method": "GET",
          "path": "/api/tags",
          "summary": "Confirm which Gemma variants are cached and ready to serve (also works as a lightweight readiness probe)."
        },
        {
          "label": "OpenAI-compatible chat",
          "method": "POST",
          "path": "/v1/chat/completions",
          "summary": "Call Gemma 7B through the OpenAI chat interface so existing SDKs and automation pipelines work unchanged.",
          "request_example": {
            "model": "gemma:7b",
            "messages": [
              {
                "role": "user",
                "content": "Write a research abstract about climate change"
              }
            ]
          }
        },
        {
          "label": "OpenAI-compatible embeddings",
          "method": "POST",
          "path": "/v1/embeddings",
          "summary": "Generate embeddings via the OpenAI schema for downstream services like vector databases, rerankers, or personalization engines.",
          "request_example": {
            "model": "gemma:7b",
            "input": [
              "Neural network architectures"
            ]
          }
        },
        {
          "label": "Update model",
          "method": "POST",
          "path": "/api/pull",
          "summary": "Fetch the latest Gemma weights or re-download layers if you need a clean slate for experimentation.",
          "request_example": {
            "name": "gemma:7b",
            "stream": false
          }
        }
      ]
    },
    "ollama-gemma3n-e4b": {
      "display_name": "Ollama Gemma3n e4B",
      "docs_path": "docs/templates/ollama-gemma3n-e4b.md",
      "marketing": {
        "summary": "Selective-activation efficiency with full multimodal reach",
        "headline": "Selective-activation efficiency with full multimodal reach",
        "differentiators": [
          "Behaves like a 4B runtime while unlocking 8B quality and 32K context",
          "Handles text, image, audio, and video in one agent flow",
          "Built for responsive edge-style experiences without premium GPU spend"
        ],
        "highlights": [
          "Behaves like a 4B runtime while unlocking 8B quality and 32K context",
          "Handles text, image, audio, and video in one agent flow",
          "Built for responsive edge-style experiences without premium GPU spend"
        ],
        "ideal_use_cases": [
          "Long-horizon planning agents juggling spreadsheets, slides, and transcripts",
          "Localization pipelines fusing visual assets with multilingual copy",
          "Portfolio and research digests combining charts, quotes, and recordings",
          "IoT and field tooling where agents must stay responsive with cached insight",
          "Compliance monitors reviewing mixed-media evidence packets",
          "Product analytics assistants correlating video, audio, and textual feedback"
        ],
        "use_cases": [
          "Long-horizon planning agents juggling spreadsheets, slides, and transcripts",
          "Localization pipelines fusing visual assets with multilingual copy",
          "Portfolio and research digests combining charts, quotes, and recordings",
          "IoT and field tooling where agents must stay responsive with cached insight",
          "Compliance monitors reviewing mixed-media evidence packets",
          "Product analytics assistants correlating video, audio, and textual feedback"
        ]
      },
      "endpoints": [
        {
          "label": "Single prompt completion",
          "method": "POST",
          "path": "/api/generate",
          "summary": "Run multimodal Gemma3n for long-form reasoning, narration, or structured output.",
          "request_example": {
            "model": "gemma3n:e4b",
            "prompt": "Summarize the latest deployment status.",
            "stream": false
          }
        },
        {
          "label": "Chat conversation",
          "method": "POST",
          "path": "/api/chat",
          "summary": "Hold multi-turn conversations that can reference text and optional images when enabled.",
          "request_example": {
            "model": "gemma3n:e4b",
            "messages": [
              {
                "role": "user",
                "content": "Describe the Gemma3n e4B capabilities."
              }
            ],
            "stream": false
          }
        },
        {
          "label": "Generate embeddings",
          "method": "POST",
          "path": "/api/embed",
          "summary": "Create embeddings tailored for multimodal retrieval pipelines or hybrid search.",
          "request_example": {
            "model": "gemma3n:e4b",
            "input": [
              "Multi-modal analysis"
            ]
          }
        },
        {
          "label": "Model information",
          "method": "POST",
          "path": "/api/show",
          "summary": "Inspect Gemma3n metadata, including quantization, parent weights, and context limits.",
          "request_example": {
            "model": "gemma3n:e4b"
          }
        },
        {
          "label": "List models",
          "method": "GET",
          "path": "/api/tags",
          "summary": "Enumerate every Gemma3n variant staged inside the container so you can swap or reload with confidence."
        }
      ]
    },
    "ollama-llama3.2-3b": {
      "display_name": "Ollama Llama 3.2 3B",
      "docs_path": "docs/templates/ollama-llama3.2-3b.md",
      "marketing": {
        "summary": "Edge-tuned speed with 128K memory",
        "headline": "Edge-tuned speed with 128K memory",
        "differentiators": [
          "Meta's quantized 3B model optimized for fast inference on L4",
          "128K context unlocks document heavy use cases without jumping to 8B+",
          "Supports embeddings, chat, and completions from a single deployment"
        ],
        "highlights": [
          "Meta's quantized 3B model optimized for fast inference on L4",
          "128K context unlocks document heavy use cases without jumping to 8B+",
          "Supports embeddings, chat, and completions from a single deployment"
        ],
        "ideal_use_cases": [
          "Document copilots digesting entire policies, manuals, and playbooks",
          "Code review assistants scanning large PRs and repositories",
          "Multilingual chatbots covering the eight core European and Asian languages",
          "Knowledge extraction agents pulling structured data from huge archives",
          "Localization teams translating product copy end-to-end",
          "Education and training assistants building course summaries and quizzes"
        ],
        "use_cases": [
          "Document copilots digesting entire policies, manuals, and playbooks",
          "Code review assistants scanning large PRs and repositories",
          "Multilingual chatbots covering the eight core European and Asian languages",
          "Knowledge extraction agents pulling structured data from huge archives",
          "Localization teams translating product copy end-to-end",
          "Education and training assistants building course summaries and quizzes"
        ]
      },
      "endpoints": [
        {
          "label": "Single prompt completion",
          "method": "POST",
          "path": "/api/generate",
          "summary": "Generate long-form answers, code, or summaries with Llama 3.2’s 128K context.",
          "request_example": {
            "model": "llama3.2:3b",
            "prompt": "Draft a progress update for GPUse Stage C.",
            "stream": false
          }
        },
        {
          "label": "Chat conversation",
          "method": "POST",
          "path": "/api/chat",
          "summary": "High-context assistant chat with full conversation history—ideal for project copilots, planning agents, or doc review bots.",
          "request_example": {
            "model": "llama3.2:3b",
            "messages": [
              {
                "role": "user",
                "content": "What tools does GPUse expose over MCP?"
              }
            ]
          }
        },
        {
          "label": "Generate embeddings",
          "method": "POST",
          "path": "/api/embed",
          "summary": "Produce high-quality embeddings for retrieval systems, rerankers, personalization engines, or cross-document linking.",
          "request_example": {
            "model": "llama3.2:3b",
            "input": [
              "GPU provisioning"
            ]
          }
        },
        {
          "label": "Model information",
          "method": "POST",
          "path": "/api/show",
          "summary": "Inspect model metadata, parent weights, and quantization details before applying adapters or extended contexts.",
          "request_example": {
            "model": "llama3.2:3b"
          }
        },
        {
          "label": "List models",
          "method": "GET",
          "path": "/api/tags",
          "summary": "Quickly confirm which models are loaded and ready—handy when rotating between experiments."
        },
        {
          "label": "Update model",
          "method": "POST",
          "path": "/api/pull",
          "summary": "Refresh the model from the upstream Ollama registry without redeploying.",
          "request_example": {
            "name": "llama3.2:3b",
            "stream": false
          }
        },
        {
          "label": "Copy model",
          "method": "POST",
          "path": "/api/copy",
          "summary": "Clone the model under a new name so you can fine-tune, patch, or experiment without disturbing the original.",
          "request_example": {
            "source": "llama3.2:3b",
            "destination": "my-custom-llama"
          }
        },
        {
          "label": "Delete model",
          "method": "DELETE",
          "path": "/api/delete",
          "summary": "Remove a model from disk when you’re done to reclaim space—irreversible, so use with care.",
          "request_example": {
            "name": "model-to-delete"
          }
        },
        {
          "label": "Create model from Modelfile",
          "method": "POST",
          "path": "/api/create",
          "summary": "Build a new custom model from an Ollama Modelfile—great for LoRA merges, instruction tuning, or internal variants.",
          "request_example": {
            "name": "custom-llama",
            "modelfile": "FROM llama3.2:3b\nSYSTEM You are a pirate"
          }
        },
        {
          "label": "OpenAI-compatible chat",
          "method": "POST",
          "path": "/v1/chat/completions",
          "summary": "Use existing OpenAI chat tooling directly against this instance—bring your own agents, frameworks, or automations.",
          "request_example": {
            "model": "llama3.2:3b",
            "messages": [
              {
                "role": "user",
                "content": "Write a haiku about GPUse."
              }
            ]
          }
        },
        {
          "label": "OpenAI-compatible embeddings",
          "method": "POST",
          "path": "/v1/embeddings",
          "summary": "Produce embeddings with the OpenAI schema for drop-in integration with vector databases and downstream services.",
          "request_example": {
            "model": "llama3.2:3b",
            "input": [
              "GPU orchestration"
            ]
          }
        }
      ]
    },
    "ollama-mistral-7b": {
      "display_name": "Ollama Mistral 7B",
      "docs_path": "docs/templates/ollama-mistral-7b.md",
      "marketing": {
        "summary": "Apache-licensed workhorse for code and reasoning",
        "headline": "Apache-licensed workhorse for code and reasoning",
        "differentiators": [
          "No license friction - ideal for commercial deployments",
          "Exceptional JSON output for automation pipelines",
          "Top-tier performance on programming and problem-solving tasks"
        ],
        "highlights": [
          "No license friction - ideal for commercial deployments",
          "Exceptional JSON output for automation pipelines",
          "Top-tier performance on programming and problem-solving tasks"
        ],
        "ideal_use_cases": [
          "DevOps copilots generating runbooks, IaC blueprints, and shell automation",
          "Data teams producing SQL, ETL transformations, and analytics reports",
          "Documentation bots authoring READMEs, API guides, and onboarding material",
          "Product marketing drafting release notes and customer communications",
          "Engineering assistants debugging, refactoring, and optimizing code",
          "Operations copilots writing SOPs and audit-ready checklists"
        ],
        "use_cases": [
          "DevOps copilots generating runbooks, IaC blueprints, and shell automation",
          "Data teams producing SQL, ETL transformations, and analytics reports",
          "Documentation bots authoring READMEs, API guides, and onboarding material",
          "Product marketing drafting release notes and customer communications",
          "Engineering assistants debugging, refactoring, and optimizing code",
          "Operations copilots writing SOPs and audit-ready checklists"
        ]
      },
      "endpoints": [
        {
          "label": "Single prompt completion",
          "method": "POST",
          "path": "/api/generate",
          "summary": "Generate fast, high-quality responses for coding assistants, Q&A bots, or creative ideation sessions.",
          "request_example": {
            "model": "mistral:7b",
            "prompt": "Summarize GPUse in three bullet points.",
            "stream": false
          }
        },
        {
          "label": "Chat conversation",
          "method": "POST",
          "path": "/api/chat",
          "summary": "Run multi-turn conversations with stateful history—perfect for customer service flows, pair-programming, or planning assistants.",
          "request_example": {
            "model": "mistral:7b",
            "messages": [
              {
                "role": "user",
                "content": "How do I list templates via MCP?"
              }
            ]
          }
        },
        {
          "label": "Generate embeddings",
          "method": "POST",
          "path": "/api/embed",
          "summary": "Return embeddings tuned for similarity search, reranking, personalization, and matching related knowledge.",
          "request_example": {
            "model": "mistral:7b",
            "input": [
              "Grace period automation"
            ]
          }
        },
        {
          "label": "Model information",
          "method": "POST",
          "path": "/api/show",
          "summary": "Inspect model metadata, quantization, and parent weights so you know exactly what’s deployed.",
          "request_example": {
            "model": "mistral:7b"
          }
        },
        {
          "label": "List models",
          "method": "GET",
          "path": "/api/tags",
          "summary": "List all models currently cached in the container—quick way to confirm readiness or switch variants."
        },
        {
          "label": "OpenAI-compatible chat",
          "method": "POST",
          "path": "/v1/chat/completions",
          "summary": "Hook Mistral into OpenAI-compatible chat workflows so existing clients and frameworks work without modification.",
          "request_example": {
            "model": "mistral:7b",
            "messages": [
              {
                "role": "user",
                "content": "Write a haiku about programming."
              }
            ]
          }
        },
        {
          "label": "OpenAI-compatible embeddings",
          "method": "POST",
          "path": "/v1/embeddings",
          "summary": "Generate embeddings using the OpenAI schema (ideal for existing SDKs).",
          "request_example": {
            "model": "mistral:7b",
            "input": [
              "Natural language processing"
            ]
          }
        }
      ]
    },
    "ollama-qwen2.5vl-7b": {
      "display_name": "Ollama Qwen2.5-VL 7B",
      "docs_path": "docs/templates/ollama-qwen2.5vl-7b.md",
      "marketing": {
        "summary": "Vision-language specialist for document intelligence",
        "headline": "Vision-language specialist for document intelligence",
        "differentiators": [
          "Parses invoices, forms, charts, and screenshots with high fidelity",
          "125K text context keeps supporting narrative alongside visuals",
          "Apache 2.0 licensing fits enterprise document workflows"
        ],
        "highlights": [
          "Parses invoices, forms, charts, and screenshots with high fidelity",
          "125K text context keeps supporting narrative alongside visuals",
          "Apache 2.0 licensing fits enterprise document workflows"
        ],
        "ideal_use_cases": [
          "AP/AR automation extracting fields from invoices and receipts",
          "Business intelligence copilots interpreting dashboards and slide decks",
          "Support agents troubleshooting from customer-provided screenshots",
          "Legal and compliance reviews covering contracts plus embedded exhibits",
          "Accessibility services narrating visuals for end users",
          "Education tools explaining diagrams, charts, and STEM illustrations"
        ],
        "use_cases": [
          "AP/AR automation extracting fields from invoices and receipts",
          "Business intelligence copilots interpreting dashboards and slide decks",
          "Support agents troubleshooting from customer-provided screenshots",
          "Legal and compliance reviews covering contracts plus embedded exhibits",
          "Accessibility services narrating visuals for end users",
          "Education tools explaining diagrams, charts, and STEM illustrations"
        ]
      },
      "endpoints": [
        {
          "label": "Single prompt completion",
          "method": "POST",
          "path": "/api/generate",
          "summary": "Generate rich text or multimodal (vision + language) responses in a single request.",
          "request_example": {
            "model": "qwen2.5-vl:7b",
            "prompt": "Describe the GPUse grace period workflow.",
            "stream": false
          }
        },
        {
          "label": "Chat conversation",
          "method": "POST",
          "path": "/api/chat",
          "summary": "Run multimodal conversations that reference both text and images—great for design critiques, reporting on charts, or detailed planning.",
          "request_example": {
            "model": "qwen2.5-vl:7b",
            "messages": [
              {
                "role": "user",
                "content": "Give me three use cases for Qwen2.5-VL."
              }
            ]
          }
        },
        {
          "label": "Generate embeddings",
          "method": "POST",
          "path": "/api/embed",
          "summary": "Produce embeddings that blend visual and textual understanding for multimodal search, recommendations, and hybrid retrieval.",
          "request_example": {
            "model": "qwen2.5-vl:7b",
            "input": [
              "Vision language indexing"
            ]
          }
        },
        {
          "label": "Model information",
          "method": "GET",
          "path": "/api/tags",
          "summary": "Check which Qwen variants are available and confirm readiness before switching models mid-session."
        },
        {
          "label": "Show model",
          "method": "POST",
          "path": "/api/show",
          "summary": "Inspect metadata, context length, and quantization details specific to Qwen 2.5 VL so you can plan workloads accurately.",
          "request_example": {
            "model": "qwen2.5-vl:7b"
          }
        },
        {
          "label": "Update model",
          "method": "POST",
          "path": "/api/pull",
          "summary": "Pull the latest model weights from Ollama (useful after updates).",
          "request_example": {
            "model": "qwen2.5-vl:7b"
          }
        },
        {
          "label": "OpenAI-compatible chat",
          "method": "POST",
          "path": "/v1/chat/completions",
          "summary": "Use the OpenAI-style multimodal chat endpoint with text+image payloads.",
          "request_example": {
            "model": "qwen2.5vl:7b",
            "messages": [
              {
                "role": "user",
                "content": [
                  {
                    "type": "text",
                    "text": "What is in this image?"
                  },
                  {
                    "type": "image_url",
                    "image_url": {
                      "url": "data:image/jpeg;base64,..."
                    }
                  }
                ]
              }
            ]
          }
        }
      ]
    },
    "whisper-large-v3": {
      "display_name": "Whisper Large v3",
      "docs_path": "docs/templates/whisper-large-v3.md",
      "marketing": {
        "summary": "Multilingual speech-to-text with analytics ready outputs",
        "headline": "Multilingual speech-to-text with analytics ready outputs",
        "differentiators": [
          "100-language coverage with state-of-the-art accuracy",
          "Generates subtitles, translations, and timestamps without extra tooling",
          "Pairs seamlessly with LLM templates for end-to-end voice agents"
        ],
        "highlights": [
          "100-language coverage with state-of-the-art accuracy",
          "Generates subtitles, translations, and timestamps without extra tooling",
          "Pairs seamlessly with LLM templates for end-to-end voice agents"
        ],
        "ideal_use_cases": [
          "Podcast and livestream transcription with quick searchable turnarounds",
          "Meeting copilots capturing notes, action items, and decisions",
          "Call-center analytics turning voice conversations into datasets",
          "Accessibility workflows generating captions and transcripts automatically",
          "Localization teams converting raw audio into English for repurposing",
          "Media production extracting quotes, highlights, and social snippets"
        ],
        "use_cases": [
          "Podcast and livestream transcription with quick searchable turnarounds",
          "Meeting copilots capturing notes, action items, and decisions",
          "Call-center analytics turning voice conversations into datasets",
          "Accessibility workflows generating captions and transcripts automatically",
          "Localization teams converting raw audio into English for repurposing",
          "Media production extracting quotes, highlights, and social snippets"
        ]
      },
      "endpoints": [
        {
          "label": "Service information",
          "method": "GET",
          "path": "/",
          "summary": "Return a simple banner so agents can confirm the service booted before invoking expensive audio jobs."
        },
        {
          "label": "Health check",
          "method": "GET",
          "path": "/health",
          "summary": "Quick readiness probe (first call can take a moment while Whisper loads its large weights into GPU memory)."
        },
        {
          "label": "Transcribe audio",
          "method": "POST",
          "path": "/transcribe",
          "summary": "Upload audio (mp3/wav/etc.) and receive accurate transcriptions—great for podcasts, meetings, or customer support recordings.",
          "request_example": {
            "audio": "@sample.wav",
            "language": "en",
            "temperature": 0
          }
        },
        {
          "label": "Translate to English",
          "method": "POST",
          "path": "/translate",
          "summary": "Translate any supported language directly into English—perfect for global teams and localization workflows.",
          "request_example": {
            "audio": "@foreign.wav"
          }
        },
        {
          "label": "Detect language",
          "method": "POST",
          "path": "/detect-language",
          "summary": "Identify the spoken language (currently flagged as a known issue—watch release notes for a fix).",
          "status": "known_issue",
          "request_example": {
            "audio": "@audio.wav"
          }
        },
        {
          "label": "Word-level timestamps",
          "method": "POST",
          "path": "/segments",
          "summary": "Return word-by-word timestamps and confidence scores—valuable for subtitle QA, editing, or compliance review.",
          "request_example": {
            "audio": "@audio.wav",
            "word_timestamps": true
          }
        },
        {
          "label": "Generate subtitles",
          "method": "POST",
          "path": "/subtitle",
          "summary": "Produce SRT or VTT subtitle files ready for video pipelines, social clips, or accessibility workflows.",
          "request_example": {
            "audio": "@audio.wav",
            "format": "srt"
          }
        }
      ]
    },
    "echo-server": {
      "display_name": "Echo Server (Testing)",
      "docs_path": "docs/templates/echo-server.md",
      "marketing": {
        "summary": "Instant-start HTTP server for deployment validation",
        "headline": "Instant-start HTTP server for deployment validation",
        "differentiators": [
          "Boots in 1-2 seconds with no model downloads or dependencies",
          "Validates grace period workflow and payment integration end-to-end",
          "Minimal resource footprint for rapid CI/CD testing"
        ],
        "highlights": [
          "Boots in 1-2 seconds with no model downloads or dependencies",
          "Validates grace period workflow and payment integration end-to-end",
          "Minimal resource footprint for rapid CI/CD testing"
        ],
        "ideal_use_cases": [
          "Integration testing for grace period and checkout flows",
          "CI/CD pipeline validation of deployment infrastructure",
          "Agent workflow debugging without expensive GPU runtime",
          "Quick health checks and connectivity verification",
          "Load testing and performance benchmarking scaffolds",
          "Developer sandbox for experimenting with GPUse APIs"
        ],
        "use_cases": [
          "Integration testing for grace period and checkout flows",
          "CI/CD pipeline validation of deployment infrastructure",
          "Agent workflow debugging without expensive GPU runtime",
          "Quick health checks and connectivity verification",
          "Load testing and performance benchmarking scaffolds",
          "Developer sandbox for experimenting with GPUse APIs"
        ]
      },
      "cold_start": {
        "expected": true,
        "estimated_seconds": 2,
        "reason": "Minimal container with no model weights or dependencies",
        "readiness_check": {
          "method": "GET",
          "path": "/",
          "ready_when": "returns HTTP 200"
        }
      },
      "endpoints": [
        {
          "label": "Health check",
          "method": "GET",
          "path": "/",
          "summary": "Instant HTTP 200 response confirms the deployment is live—perfect for validating infrastructure before running GPU workloads."
        }
      ]
    },
    "custom-docker": {
      "display_name": "Bring Your Own Container",
      "docs_path": "docs/api/compute-api.md#🚀 Compute Provisioning",
      "marketing": {
        "summary": "Define a Dockerfile, push it through POST /api/v1/custom, and let agents own the entire build pipeline.",
        "differentiators": [
          "Full root access to pick runtime, dependencies, and launch command",
          "Streaming build logs so agents can self-debug without human screenshots",
          "Reuse the same grace-to-paid lifecycle tools as managed templates"
        ],
        "ideal_use_cases": [
          "Finetuning stacks that need bespoke CUDA/cuDNN combinations",
          "Inference servers with custom weights or business logic",
          "Multi-service agents that orchestrate workers, queues, or background jobs",
          "Enterprise teams migrating existing Docker images into agent-first workflows"
        ]
      },
      "endpoints": [
        {
          "label": "Submit custom build",
          "method": "POST",
          "path": "/api/v1/custom",
          "summary": "Send Dockerfile content or build context to launch Cloud Build and prepare a custom runtime."
        },
        {
          "label": "Stream build logs",
          "method": "GET",
          "path": "/api/v1/builds/{build_id}/logs",
          "summary": "Tail Cloud Build logs so agents can watch dependency installs and fix issues autonomously."
        },
        {
          "label": "Deploy resulting image",
          "method": "POST",
          "path": "/api/v1/compute",
          "summary": "Launch the newly built image via start_compute once the build succeeds."
        }
      ]
    }
  },
  "endpoints": {
    "rest": {
      "compute_start": "POST /api/v1/compute",
      "custom_build": "POST /api/v1/custom",
      "compute_status": "GET /api/v1/compute/{compute_id}/status",
      "compute_stop": "DELETE /api/v1/compute/{compute_id}",
      "logs_build": "GET /api/v1/builds/{build_id}/logs",
      "logs_runtime": "GET /api/v1/compute/{compute_id}/logs",
      "catalog": "GET /api/v1/catalog",
      "recommend": "GET /api/v1/recommend"
    }
  },
  "related_manifests": {
    "mcp": "/api/mcp/manifest"
  },
  "distribution": {
    "mcp_clients": [
      {
        "name": "Claude Code CLI",
        "install": "npx -y gpuse-mcp-server@latest configure --force --targets claude-code",
        "verify": "Restart the CLI session, run /mcp, and confirm gpuse lists every tool including start_custom.",
        "notes": [
          "Claude Code performs the initialize handshake automatically and reuses Mcp-Session-Id for HTTP calls to https://mcp.gpuse.com/mcp."
        ]
      },
      {
        "name": "Claude Desktop",
        "install": "npx -y gpuse-mcp-server@latest configure --force --targets claude-desktop",
        "verify": "Quit and reopen Claude Desktop, then run /mcp to ensure gpuse is available.",
        "notes": [
          "Desktop sessions stream over HTTP; first tool invocation sends initialize, subsequent calls reuse the returned session header."
        ]
      },
      {
        "name": "Cursor IDE",
        "install": "npx -y gpuse-mcp-server@latest configure --force --targets cursor",
        "verify": "Restart Cursor or reload MCP settings and check the integrations panel for gpuse.",
        "notes": [
          "Cursor’s MCP client adds Mcp-Session-Id and Mcp-Protocol-Version headers automatically for https://mcp.gpuse.com/mcp."
        ]
      },
      {
        "name": "Codex CLI",
        "install": "npx -y gpuse-mcp-server@latest configure --force --targets codex",
        "verify": "Restart Codex, run /mcp, and confirm gpuse exposes recommend_template through start_custom and stop_compute."
      },
      {
        "name": "Gemini CLI",
        "install": "npx -y gpuse-mcp-server@latest configure --force --targets gemini",
        "verify": "Restart Gemini CLI or run gemini mcp list to confirm gpuse is registered."
      },
      {
        "name": "Windsurf IDE",
        "install": "npx -y gpuse-mcp-server@latest configure --force --targets windsurf",
        "verify": "Restart Windsurf and confirm gpuse appears in the MCP integrations list.",
        "notes": [
          "Windsurf streams responses over SSE; no manual session wiring required beyond the public endpoint."
        ]
      },
      {
        "name": "VS Code (Continue)",
        "install": "Add to continue.config.json: \"mcpServers\": { \"gpuse\": { \"transport\": \"http\", \"url\": \"https://mcp.gpuse.com/mcp\" } }",
        "verify": "Reload Continue, open the MCP tools panel, and confirm gpuse is available.",
        "notes": [
          "Continue negotiates initialize and reuses session headers automatically for the HTTP transport."
        ]
      }
    ]
  }
}