The NVIDIA NIM API provides endpoints to create and manage workloads that deploy NVIDIA Inference Microservices (NIM) through the NIM Operator. These workloads package optimized NVIDIA model servers and run as managed services on the NVIDIA Run:ai platform. Each request includes NVIDIA Run:ai scheduling metadata (for example, project, priority, and category) and a NIM service specification that defines the container image, compute resources, environment variables, storage, and networking configuration. Once submitted, NVIDIA Run:ai handles scheduling, orchestration, and lifecycle management of the NIM service to ensure reliable and efficient model serving.
Create a NVIDIA NIM service
required | object (WorkloadV2MetadataCreateParams) |
required | object or null (NimServiceSpec) |
Workload creation accepted
Bad request.
Unauthorized
Forbidden
The specified resource already exists
unexpected error
unexpected error
{- "metadata": {
- "name": "my-workload-name",
- "projectId": 1,
- "priority": "string",
- "category": "string"
}, - "spec": {
- "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "compute": {
- "cpuCoreLimit": 2,
- "cpuCoreRequest": 0.5,
- "cpuMemoryLimit": "30M",
- "cpuMemoryRequest": "20M",
- "gpuDevicesRequest": 1,
- "gpuMemoryLimit": "10M",
- "gpuMemoryRequest": "10M",
- "gpuPortionLimit": 0.5,
- "gpuPortionRequest": 0.5,
- "gpuRequestType": "portion"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "configMap": {
- "name": "my-config-map",
- "key": "MY_POSTGRES_SCHEMA"
}, - "podFieldRef": {
- "path": "metadata.name"
}, - "exclude": false,
- "description": "Home directory of the user."
}
], - "image": "python:3.8",
- "imagePullPolicy": "Always",
- "imagePullSecrets": [
- {
- "name": "string",
- "userCredential": true,
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "security": {
- "runAsGid": 30,
- "runAsUid": 500
}, - "servingPort": {
- "serviceType": "ClusterIP",
- "port": 8000,
- "grpcPort": 8001,
- "metricsPort": 8002,
- "exposeExternally": true,
- "exposedUrl": "string"
}, - "storage": {
- "nimCache": {
- "name": "nim-cache-a",
- "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
}, - "pvc": {
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem",
- "addedAttrValues": [
- {
- "key": "dnsname",
- "value": "my.dns.com"
}
]
}
}
}, - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
], - "ngcAuthSecret": "string",
- "replicas": 2
}
}{- "metadata": {
- "name": "my-workload-name",
- "projectId": 1,
- "priority": "string",
- "category": "string",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "gvk": {
- "group": "apps",
- "version": "v1",
- "kind": "Deployment"
}, - "projectName": "project-a",
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "tenantId": 1001,
- "departmentId": 2,
- "departmentName": "default",
- "createdAt": "2024-01-15T10:30:00Z",
- "createdBy": "user@run.ai",
- "updatedAt": "2024-01-15T10:35:00Z",
- "updatedBy": "user@run.ai",
- "deletedAt": "2024-01-15T10:35:00Z",
- "deletedBy": "user@run.ai"
}, - "desiredPhase": "Running",
- "spec": {
- "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "compute": {
- "cpuCoreLimit": 2,
- "cpuCoreRequest": 0.5,
- "cpuMemoryLimit": "30M",
- "cpuMemoryRequest": "20M",
- "gpuDevicesRequest": 1,
- "gpuMemoryLimit": "10M",
- "gpuMemoryRequest": "10M",
- "gpuPortionLimit": 0.5,
- "gpuPortionRequest": 0.5,
- "gpuRequestType": "portion"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "configMap": {
- "name": "my-config-map",
- "key": "MY_POSTGRES_SCHEMA"
}, - "podFieldRef": {
- "path": "metadata.name"
}, - "exclude": false,
- "description": "Home directory of the user."
}
], - "image": "python:3.8",
- "imagePullPolicy": "Always",
- "imagePullSecrets": [
- {
- "name": "string",
- "userCredential": true,
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "security": {
- "runAsGid": 30,
- "runAsUid": 500
}, - "servingPort": {
- "serviceType": "ClusterIP",
- "port": 8000,
- "grpcPort": 8001,
- "metricsPort": 8002,
- "exposeExternally": true,
- "exposedUrl": "string"
}, - "storage": {
- "nimCache": {
- "name": "nim-cache-a",
- "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
}, - "pvc": {
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem",
- "addedAttrValues": [
- {
- "key": "dnsname",
- "value": "my.dns.com"
}
]
}
}
}, - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
], - "ngcAuthSecret": "string",
- "replicas": 2
}
}Retrieve details of a specific NVIDIA NIM service, by id
Successfully retrieved the workload
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "metadata": {
- "name": "my-workload-name",
- "projectId": 1,
- "priority": "string",
- "category": "string",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "gvk": {
- "group": "apps",
- "version": "v1",
- "kind": "Deployment"
}, - "projectName": "project-a",
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "tenantId": 1001,
- "departmentId": 2,
- "departmentName": "default",
- "createdAt": "2024-01-15T10:30:00Z",
- "createdBy": "user@run.ai",
- "updatedAt": "2024-01-15T10:35:00Z",
- "updatedBy": "user@run.ai",
- "deletedAt": "2024-01-15T10:35:00Z",
- "deletedBy": "user@run.ai"
}, - "desiredPhase": "Running",
- "spec": {
- "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "compute": {
- "cpuCoreLimit": 2,
- "cpuCoreRequest": 0.5,
- "cpuMemoryLimit": "30M",
- "cpuMemoryRequest": "20M",
- "gpuDevicesRequest": 1,
- "gpuMemoryLimit": "10M",
- "gpuMemoryRequest": "10M",
- "gpuPortionLimit": 0.5,
- "gpuPortionRequest": 0.5,
- "gpuRequestType": "portion"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "configMap": {
- "name": "my-config-map",
- "key": "MY_POSTGRES_SCHEMA"
}, - "podFieldRef": {
- "path": "metadata.name"
}, - "exclude": false,
- "description": "Home directory of the user."
}
], - "image": "python:3.8",
- "imagePullPolicy": "Always",
- "imagePullSecrets": [
- {
- "name": "string",
- "userCredential": true,
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "security": {
- "runAsGid": 30,
- "runAsUid": 500
}, - "servingPort": {
- "serviceType": "ClusterIP",
- "port": 8000,
- "grpcPort": 8001,
- "metricsPort": 8002,
- "exposeExternally": true,
- "exposedUrl": "string"
}, - "storage": {
- "nimCache": {
- "name": "nim-cache-a",
- "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
}, - "pvc": {
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem",
- "addedAttrValues": [
- {
- "key": "dnsname",
- "value": "my.dns.com"
}
]
}
}
}, - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
], - "ngcAuthSecret": "string",
- "replicas": 2
}
}