Workloads are trainings, workspaces, and inferences that are fully controlled by NVIDIA Run:ai. Workloads can be native, third party integrations, and typical Kubernetes workload types. For more information, see Workloads overview.
Retrieve a list of active workloads with details.
Executed successfully.
Unauthorized
Forbidden
unexpected error
unexpected error
{- "next": 1,
- "workloads": [
- {
- "tenantId": 1001,
- "runningPods": 1,
- "phaseUpdatedAt": "2022-06-08T11:28:24.131Z",
- "k8sPhaseUpdatedAt": "2022-06-08T11:28:24.131Z",
- "updatedAt": "2022-06-08T11:28:24.131Z",
- "source": "CLI",
- "deletedAt": "2022-08-12T19:28:24.131Z",
- "type": "runai-job",
- "name": "very-important-job",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "priority": 50,
- "priorityClassName": "high-priority",
- "submittedBy": "researcher@run.ai",
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "projectName": "proj-1",
- "projectId": "1",
- "departmentName": "department-1",
- "departmentId": "1",
- "namespace": "runai-proj-1",
- "createdAt": "2022-01-01T03:49:52.531Z",
- "workloadRequestedResources": {
- "gpuRequestType": "portion",
- "gpu": {
- "limit": 1.5,
- "request": 1
}, - "gpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "cpu": {
- "limit": 1.5,
- "request": 1
}, - "cpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "migProfile": [
- "1g.5gb"
], - "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "podsRequestedResources": {
- "gpuRequestType": "portion",
- "gpu": {
- "limit": 1.5,
- "request": 1
}, - "gpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "cpu": {
- "limit": 1.5,
- "request": 1
}, - "cpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "migProfile": [
- "1g.5gb"
], - "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "allocatedResources": {
- "gpu": 1.5,
- "migProfile": [
- "1g.5gb"
], - "gpuMemory": "200Mi",
- "cpu": 0.5,
- "cpuMemory": "0B",
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "actionsSupport": {
- "delete": true,
- "suspend": true
}, - "phase": "Creating",
- "conditions": [
- {
- "type": "Ready",
- "status": "False",
- "message": "Resource validation failed: ...",
- "reason": "ErrorConfig",
- "lastTransitionTime": "2022-01-01T03:49:52.531Z"
}
], - "phaseMessage": "Not enough resources in the requested nodepool",
- "k8sPhase": "Pending",
- "requestedPods": {
- "number": 1,
- "min": 2,
- "max": 5,
- "parallelism": 3,
- "completions": 5
}, - "requestedNodePools": [
- "default"
], - "currentNodePools": [
- "default"
], - "completedAt": "2022-01-01T03:49:52.531Z",
- "images": [
- "alpine:latest"
], - "urls": [
- "string"
], - "datasources": [
- {
- "type": "pvc",
- "name": "my-pvc-datasource-1",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"
}
], - "environments": [
- {
- "connections": [
- {
- "name": "my-pytorch-env",
- "toolType": "pytorch",
- "connectionType": "ExternalUrl",
- "authorizationType": "public",
- "authorizedUsers": [
- "user@company.ai",
- "another@company.ai"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "containerPort": 8080
}
], - "name": "pytorch",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "replicaType": "Master"
}
], - "externalConnections": [
- {
- "name": "my-pytorch-env",
- "toolType": "pytorch",
- "connectionType": "ExternalUrl",
- "authorizationType": "public",
- "authorizedUsers": [
- "user@company.ai",
- "another@company.ai"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "containerPort": 8080
}
], - "distributedFramework": "Pytorch",
- "additionalFields": { },
- "preemptible": true,
- "environmentVariables": {
- "property1": "string",
- "property2": "string"
}, - "command": "sleep",
- "arguments": "1000",
- "phaseReason": "NonPreemptibleOverQuota",
- "idleGpus": 3,
- "idleAllocatedGpus": 1,
- "category": "Train"
}
]
}
Retrieves a list of workload categories. These categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.
List of categories retrieved successfully.
Bad request.
Unauthorized
Forbidden
unexpected error
{- "categories": [
- {
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "name": "string",
- "description": "string",
- "createdAt": "2019-08-24T14:15:22Z",
- "createdBy": "string"
}
]
}
Retrieves a specific workload category by its ID. Workload categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.
Category retrieved successfully
Bad request.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
{- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "name": "string",
- "description": "string",
- "createdAt": "2019-08-24T14:15:22Z",
- "createdBy": "string"
}
Retrieves a list of workload types with their configurations - their corresponding workload categories and priorities.
List of types retrieved successfully
Bad request.
Unauthorized
Forbidden
unexpected error
{- "types": [
- {
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "categoryName": "Build",
- "priorityName": "medium",
- "updatedAt": "2019-08-24T14:15:22Z",
- "updatedBy": "string"
}
]
}
Retrieves a specific workload type by its ID.
Type retrieved successfully
Bad request.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
{- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "categoryName": "Build",
- "priorityName": "medium",
- "updatedAt": "2019-08-24T14:15:22Z",
- "updatedBy": "string"
}
Update the default category or priority assigned to a workload type.
Workload type to update.
Updated successfully
Bad request.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
{- "categoryId": "337f5e5d-288b-40d5-be14-901cc3acacc0",
- "priorityId": "a57eab25-838b-40cc-a576-57e4056f1d6c"
}
{- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "categoryName": "Build",
- "priorityName": "medium",
- "updatedAt": "2019-08-24T14:15:22Z",
- "updatedBy": "string"
}
Retrieve workload data using a workloadId
.
Executed successfully.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "tenantId": 1001,
- "runningPods": 1,
- "phaseUpdatedAt": "2022-06-08T11:28:24.131Z",
- "k8sPhaseUpdatedAt": "2022-06-08T11:28:24.131Z",
- "updatedAt": "2022-06-08T11:28:24.131Z",
- "source": "CLI",
- "deletedAt": "2022-08-12T19:28:24.131Z",
- "type": "runai-job",
- "name": "very-important-job",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "priority": 50,
- "priorityClassName": "high-priority",
- "submittedBy": "researcher@run.ai",
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "projectName": "proj-1",
- "projectId": "1",
- "departmentName": "department-1",
- "departmentId": "1",
- "namespace": "runai-proj-1",
- "createdAt": "2022-01-01T03:49:52.531Z",
- "workloadRequestedResources": {
- "gpuRequestType": "portion",
- "gpu": {
- "limit": 1.5,
- "request": 1
}, - "gpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "cpu": {
- "limit": 1.5,
- "request": 1
}, - "cpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "migProfile": [
- "1g.5gb"
], - "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "podsRequestedResources": {
- "gpuRequestType": "portion",
- "gpu": {
- "limit": 1.5,
- "request": 1
}, - "gpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "cpu": {
- "limit": 1.5,
- "request": 1
}, - "cpuMemory": {
- "limit": "2G",
- "request": "200M"
}, - "migProfile": [
- "1g.5gb"
], - "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "allocatedResources": {
- "gpu": 1.5,
- "migProfile": [
- "1g.5gb"
], - "gpuMemory": "200Mi",
- "cpu": 0.5,
- "cpuMemory": "0B",
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "actionsSupport": {
- "delete": true,
- "suspend": true
}, - "phase": "Creating",
- "conditions": [
- {
- "type": "Ready",
- "status": "False",
- "message": "Resource validation failed: ...",
- "reason": "ErrorConfig",
- "lastTransitionTime": "2022-01-01T03:49:52.531Z"
}
], - "phaseMessage": "Not enough resources in the requested nodepool",
- "k8sPhase": "Pending",
- "requestedPods": {
- "number": 1,
- "min": 2,
- "max": 5,
- "parallelism": 3,
- "completions": 5
}, - "requestedNodePools": [
- "default"
], - "currentNodePools": [
- "default"
], - "completedAt": "2022-01-01T03:49:52.531Z",
- "images": [
- "alpine:latest"
], - "urls": [
- "string"
], - "datasources": [
- {
- "type": "pvc",
- "name": "my-pvc-datasource-1",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08"
}
], - "environments": [
- {
- "connections": [
- {
- "name": "my-pytorch-env",
- "toolType": "pytorch",
- "connectionType": "ExternalUrl",
- "authorizationType": "public",
- "authorizedUsers": [
- "user@company.ai",
- "another@company.ai"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "containerPort": 8080
}
], - "name": "pytorch",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "replicaType": "Master"
}
], - "externalConnections": [
- {
- "name": "my-pytorch-env",
- "toolType": "pytorch",
- "connectionType": "ExternalUrl",
- "authorizationType": "public",
- "authorizedUsers": [
- "user@company.ai",
- "another@company.ai"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "containerPort": 8080
}
], - "distributedFramework": "Pytorch",
- "additionalFields": { },
- "preemptible": true,
- "environmentVariables": {
- "property1": "string",
- "property2": "string"
}, - "command": "sleep",
- "arguments": "1000",
- "phaseReason": "NonPreemptibleOverQuota",
- "idleGpus": 3,
- "idleAllocatedGpus": 1,
- "category": "Train",
- "pendingSchedulingMessages": [
- {
- "nodePool": "default",
- "phaseReason": "NonPreemptibleOverQuota",
- "reason": "Non-preemptible over quota",
- "orgType": "PROJECT",
- "userMessage": "You have reached the limit of non-preemptible resources"
}
]
}
Retrieve the number of workloads.
Executed successfully.
Unauthorized
Forbidden
unexpected error
unexpected error
{- "count": 1
}
Retrieves workload data by telemetry type.
Executed successfully.
Bad request.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "type": "ALLOCATION_RATIO",
- "timestamp": "2023-06-06 12:09:18.211",
- "values": [
- {
- "value": "85",
- "groups": [
- {
- "key": "department",
- "value": "1",
- "name": "department-A"
}
]
}
]
}
Retrieves workloads data metrics from the metrics database. Use in reporting and analysis tools.
Executed successfully.
Partial success.
Bad request.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "measurements": [
- {
- "type": "ALLOCATED_GPU",
- "labels": "{'gpu': '3'}",
- "values": [
- {
- "value": "85",
- "timestamp": "2023-06-06 12:09:18.211"
}
]
}
]
}