Python SDK and CLI for the YottaML public API.
- Python 3.9 or higher
pip install yottamlThis installs both the yottaml Python package and the yotta CLI.
Set your API key as an environment variable (recommended):
export YOTTA_API_KEY=your-api-keyOr pass it directly to any command:
yotta --api-key your-api-key pods list| Option | Default | Description |
|---|---|---|
--api-key |
$YOTTA_API_KEY |
API key |
--base-url |
https://api.yottalabs.ai |
API base URL |
--debug |
off | Enable debug logging |
# List all pods
yotta pods list
# Filter by region and/or status
yotta pods list --region us-east-3 --status 1
# Get pod details
yotta pods get <pod_id>
# Create a pod
yotta pods create \
--image nvidia/cuda:12.1.0-base-ubuntu22.04 \
--gpu-type NVIDIA_RTX_4090_24G \
--gpu-count 1 \
--region us-east-3 \
--name my-pod
# Create with environment variables and exposed ports
yotta pods create \
--image myrepo/myimage:latest \
--gpu-type NVIDIA_RTX_4090_24G \
--gpu-count 1 \
--env MODEL_PATH=/models \
--env DEBUG=1 \
--expose '[{"port":8080,"protocol":"http"}]'
# Pause / resume / delete
yotta pods pause <pod_id>
yotta pods resume <pod_id>
yotta pods delete <pod_id>pods create options:
| Option | Required | Description |
|---|---|---|
--image |
Yes | Container image |
--gpu-type |
Yes | GPU type, e.g. NVIDIA_RTX_4090_24G |
--gpu-count |
No (default 1) | Number of GPUs (must be power of 2) |
--region |
No | Acceptable region(s), repeatable |
--name |
No | Pod name |
--resource-type |
No | GPU (default) or CPU |
--container-volume |
No | Container volume in GB |
--persistent-volume |
No | Persistent volume in GB |
--persistent-mount-path |
No | Persistent volume mount path |
--image-registry |
No | Docker registry URL for private images |
--credential-id |
No | Stored registry credential ID |
--image-public-type |
No | PUBLIC or PRIVATE |
--init-cmd |
No | Initialization command |
--env KEY=VALUE |
No | Environment variable, repeatable |
--expose |
No | JSON array of ports, e.g. '[{"port":22,"protocol":"SSH"}]' |
--min-vram |
No | Minimum single-card VRAM in GB |
--min-ram |
No | Minimum single-card RAM in GB |
--min-vcpu |
No | Minimum single-card vCPU count |
--shm |
No | Shared memory in GB |
# List all deployments
yotta serverless list
# Filter by status
yotta serverless list --status RUNNING --status STOPPED
# Get deployment details
yotta serverless get <deployment_id>
# Create a deployment
yotta serverless create \
--name my-endpoint \
--image yottalabsai/pytorch:latest \
--resources '[{"region":"us-east","gpuType":"NVIDIA_RTX_4090_24G","gpuCount":1}]' \
--workers 1 \
--service-mode QUEUE \
--volume 256
# List workers
yotta serverless workers <deployment_id>
# Scale workers
yotta serverless scale <deployment_id> --workers 3
# Stop / start / delete
yotta serverless stop <deployment_id>
yotta serverless start <deployment_id>
yotta serverless delete <deployment_id>serverless create options:
| Option | Required | Description |
|---|---|---|
--name |
Yes | Endpoint name (max 20 chars) |
--image |
Yes | Container image |
--resources |
Yes | JSON array: [{"region":"...","gpuType":"...","gpuCount":N}] |
--workers |
Yes | Initial worker count |
--service-mode |
Yes | ALB, QUEUE, or CUSTOM |
--volume |
Yes | Container volume in GB (min 20) |
--image-registry |
No | Image registry URL |
--credential-id |
No | Registry credential ID |
--init-cmd |
No | Initialization command |
--env KEY=VALUE |
No | Environment variable, repeatable |
--expose-port |
No | Port to expose |
--expose-protocol |
No | Expose protocol (e.g. HTTP) |
--webhook |
No | Webhook URL for worker status notifications |
# Submit a task
yotta tasks create \
--endpoint-id <deployment_id> \
--worker-port 8080 \
--process-uri /process \
--input '{"prompt":"hello"}'
# Get task details
yotta tasks get --endpoint-id <deployment_id> <task_id>
# List tasks (paginated)
yotta tasks list --endpoint-id <deployment_id>
yotta tasks list --endpoint-id <deployment_id> --status SUCCESS --page 2 --page-size 20
# Get queued/processing task count
yotta tasks count --endpoint-id <deployment_id>yotta gpus list# List credentials
yotta credentials list
# Get a credential
yotta credentials get <credential_id>
# Create a credential
yotta credentials create \
--name my-registry \
--type DOCKER_HUB \
--username myuser \
--password mytoken
# Update a credential
yotta credentials update <credential_id> --password newtoken
# Delete a credential
yotta credentials delete <credential_id>from yottaml.pod import PodApi
client = PodApi(api_key="your-api-key")
# List pods
client.get_pods()
client.get_pods(region_list=["us-east-3"])
# Get a pod
client.get_pod("12345")
# Create a pod
client.new_pod(
image="nvidia/cuda:12.1.0-base-ubuntu22.04",
gpu_type="NVIDIA_RTX_4090_24G",
gpu_count=1,
regions=["us-east-3"],
name="my-pod",
expose=[{"port": 22, "protocol": "SSH"}],
)
# Pause / resume / delete
client.pause_pod("12345")
client.resume_pod("12345")
client.delete_pod("12345")from yottaml.elastic import ElasticApi
client = ElasticApi(api_key="your-api-key")
# List deployments
client.get_deployments()
client.get_deployments(status_list=["RUNNING"])
# Get one deployment
client.get_deployment_detail(42)
# Create a deployment
client.create_deployment(
name="my-endpoint",
image="yottalabsai/pytorch:latest",
resources=[{"region": "us-east", "gpuType": "NVIDIA_RTX_4090_24G", "gpuCount": 1}],
workers=1,
service_mode="QUEUE",
container_volume_in_gb=256,
)
# Scale workers
client.scale_workers(42, workers=3)
# Stop / start / delete
client.stop_deployment(42)
client.start_deployment(42)
client.delete_deployment(42)
# List workers
client.get_workers(42)from yottaml.skywalker import SkywalkerTaskApi
client = SkywalkerTaskApi(api_key="your-api-key")
# Submit a task
client.create_task(
endpoint_id=42,
worker_port=8080,
process_uri="/process",
input={"prompt": "hello"},
webhook="https://example.com/hook",
)
# Get task detail
client.get_task(endpoint_id=42, task_id="abc123")
# List tasks
client.list_tasks(endpoint_id=42, status="SUCCESS", page=1, page_size=20)
# Get processing count
client.get_processing_count(endpoint_id=42)from yottaml.gpu import GpuApi
client = GpuApi(api_key="your-api-key")
client.get_gpus()from yottaml.credential import CredentialApi
client = CredentialApi(api_key="your-api-key")
client.create_credential(name="my-reg", type="DOCKER_HUB", username="user", password="token")
client.get_credentials()
client.get_credential("1")
client.update_credential("1", password="newtoken")
client.delete_credential("1")All API clients accept these keyword arguments:
client = PodApi(
api_key="your-api-key", # or set YOTTA_API_KEY env var
base_url="https://api.yottalabs.ai", # optional override
timeout=30, # request timeout in seconds
debug=True, # log requests and responses
)from yottaml.error import ClientError, ServerError
try:
client.get_pod("99999")
except ClientError as e:
print(e.status_code) # HTTP status code (4xx)
print(e.error_code) # API error code
print(e.error_message) # API error message
except ServerError as e:
print(e) # 5xx server error