Workloads are trainings, workspaces, and deployments that are fully controlled by Run:ai. Workloads can be native, third party integrations, and typical Kubernetes workload types. For more information, see Workloads overview.
Retrieve a list of active workloads with details.
Executed successfully.
Unauthorized
Forbidden
unexpected error
unexpected error
{- "next": 1,
- "workloads": [
- {
- "tenantId": 1001,
- "runningPods": 1,
- "phaseUpdatedAt": "2022-06-08T11:28:24.131Z",
- "k8sPhaseUpdatedAt": "2022-06-08T11:28:24.131Z",
- "updatedAt": "2022-06-08T11:28:24.131Z",
- "source": "CLI",
- "deletedAt": "2022-08-12T19:28:24.131Z",
- "type": "runai-job",
- "name": "very-important-job",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "priority": 50,
- "priorityClassName": "high-priority",
- "submittedBy": "researcher@run.ai",
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "projectName": "proj-1",
- "projectId": "1",
- "departmentName": "department-1",
- "departmentId": "1",
- "namespace": "runai-proj-1",
- "createdAt": "2022-01-01T03:49:52.531Z",
- "workloadRequestedResources": {
- "gpuRequestType": "portion",
- "gpu": {
- "limit": 1.5,
- "request": 1
}, - "gpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "cpu": {
- "limit": 1.5,
- "request": 1
}, - "cpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "migProfile": [
- "1g.5gb"
], - "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "podsRequestedResources": {
- "gpuRequestType": "portion",
- "gpu": {
- "limit": 1.5,
- "request": 1
}, - "gpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "cpu": {
- "limit": 1.5,
- "request": 1
}, - "cpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "migProfile": [
- "1g.5gb"
], - "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "allocatedResources": {
- "gpu": 1.5,
- "migProfile": [
- "1g.5gb"
], - "gpuMemory": "200Mi",
- "cpu": 0.5,
- "cpuMemory": "0B",
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "actionsSupport": {
- "delete": true,
- "suspend": true
}, - "phase": "Creating",
- "conditions": [
- {
- "type": "Ready",
- "status": "False",
- "message": "Resource validation failed: ...",
- "reason": "ErrorConfig",
- "lastTransitionTime": "2022-01-01T03:49:52.531Z"
}
], - "phaseMessage": "Not enough resources in the requested nodepool",
- "k8sPhase": "Pending",
- "requestedPods": {
- "number": 1,
- "min": 2,
- "max": 5,
- "parallelism": 3,
- "completions": 5
}, - "requestedNodePools": [
- "default"
], - "currentNodePools": [
- "default"
], - "completedAt": "2022-01-01T03:49:52.531Z",
- "images": [
- "busybox:latest"
], - "urls": [
- "string"
], - "datasources": [
- {
- "type": "pvc",
- "name": "my-pvc-datasource-1",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"
}
], - "environments": [
- {
- "connections": [
- {
- "name": "my-pytorch-env",
- "toolType": "pytorch",
- "connectionType": "ExternalUrl",
- "authorizationType": "public",
- "authorizedUsers": [
- "user@company.ai",
- "another@company.ai"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "containerPort": 8080
}
], - "name": "pytorch",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "replicaType": "Master"
}
], - "externalConnections": [
- {
- "name": "my-pytorch-env",
- "toolType": "pytorch",
- "connectionType": "ExternalUrl",
- "authorizationType": "public",
- "authorizedUsers": [
- "user@company.ai",
- "another@company.ai"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "containerPort": 8080
}
], - "distributedFramework": "Pytorch",
- "additionalFields": { },
- "preemptible": true,
- "environmentVariables": {
- "property1": "string",
- "property2": "string"
}, - "command": "sleep",
- "arguments": "1000",
- "phaseReason": { },
- "idleGpus": 3,
- "idleAllocatedGpus": 1
}
]
}
Retrieve workload data using a workloadId
.
Executed successfully.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "tenantId": 1001,
- "runningPods": 1,
- "phaseUpdatedAt": "2022-06-08T11:28:24.131Z",
- "k8sPhaseUpdatedAt": "2022-06-08T11:28:24.131Z",
- "updatedAt": "2022-06-08T11:28:24.131Z",
- "source": "CLI",
- "deletedAt": "2022-08-12T19:28:24.131Z",
- "type": "runai-job",
- "name": "very-important-job",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "priority": 50,
- "priorityClassName": "high-priority",
- "submittedBy": "researcher@run.ai",
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "projectName": "proj-1",
- "projectId": "1",
- "departmentName": "department-1",
- "departmentId": "1",
- "namespace": "runai-proj-1",
- "createdAt": "2022-01-01T03:49:52.531Z",
- "workloadRequestedResources": {
- "gpuRequestType": "portion",
- "gpu": {
- "limit": 1.5,
- "request": 1
}, - "gpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "cpu": {
- "limit": 1.5,
- "request": 1
}, - "cpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "migProfile": [
- "1g.5gb"
], - "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "podsRequestedResources": {
- "gpuRequestType": "portion",
- "gpu": {
- "limit": 1.5,
- "request": 1
}, - "gpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "cpu": {
- "limit": 1.5,
- "request": 1
}, - "cpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "migProfile": [
- "1g.5gb"
], - "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "allocatedResources": {
- "gpu": 1.5,
- "migProfile": [
- "1g.5gb"
], - "gpuMemory": "200Mi",
- "cpu": 0.5,
- "cpuMemory": "0B",
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "actionsSupport": {
- "delete": true,
- "suspend": true
}, - "phase": "Creating",
- "conditions": [
- {
- "type": "Ready",
- "status": "False",
- "message": "Resource validation failed: ...",
- "reason": "ErrorConfig",
- "lastTransitionTime": "2022-01-01T03:49:52.531Z"
}
], - "phaseMessage": "Not enough resources in the requested nodepool",
- "k8sPhase": "Pending",
- "requestedPods": {
- "number": 1,
- "min": 2,
- "max": 5,
- "parallelism": 3,
- "completions": 5
}, - "requestedNodePools": [
- "default"
], - "currentNodePools": [
- "default"
], - "completedAt": "2022-01-01T03:49:52.531Z",
- "images": [
- "busybox:latest"
], - "urls": [
- "string"
], - "datasources": [
- {
- "type": "pvc",
- "name": "my-pvc-datasource-1",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"
}
], - "environments": [
- {
- "connections": [
- {
- "name": "my-pytorch-env",
- "toolType": "pytorch",
- "connectionType": "ExternalUrl",
- "authorizationType": "public",
- "authorizedUsers": [
- "user@company.ai",
- "another@company.ai"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "containerPort": 8080
}
], - "name": "pytorch",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "replicaType": "Master"
}
], - "externalConnections": [
- {
- "name": "my-pytorch-env",
- "toolType": "pytorch",
- "connectionType": "ExternalUrl",
- "authorizationType": "public",
- "authorizedUsers": [
- "user@company.ai",
- "another@company.ai"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "containerPort": 8080
}
], - "distributedFramework": "Pytorch",
- "additionalFields": { },
- "preemptible": true,
- "environmentVariables": {
- "property1": "string",
- "property2": "string"
}, - "command": "sleep",
- "arguments": "1000",
- "phaseReason": { },
- "idleGpus": 3,
- "idleAllocatedGpus": 1,
- "pendingSchedulingMessages": [
- {
- "nodePool": "default",
- "phaseReason": "NonPreemptibleOverQuota",
- "reason": "Non-preemptible over quota",
- "orgType": { },
- "userMessage": "You have reached the limit of non-preemptible resources"
}
]
}
Retrieve the number of workloads.
Executed successfully.
Unauthorized
Forbidden
unexpected error
unexpected error
{- "count": 1
}
Retrieves workload data by telemetry type.
Executed successfully.
Bad request.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "type": "ALLOCATION_RATIO",
- "timestamp": "2023-06-06 12:09:18.211",
- "values": [
- {
- "value": "85",
- "groups": [
- {
- "key": "department",
- "value": "1",
- "name": "department-A"
}
]
}
]
}
Retrieves workloads data metrics from the metrics database. Use in reporting and analysis tools.
Executed successfully.
Partial success.
Bad request.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "measurements": [
- {
- "type": "ALLOCATED_GPU",
- "labels": "{'gpu': '3'}",
- "values": [
- {
- "value": "85",
- "timestamp": "2023-06-06 12:09:18.211"
}
]
}
]
}