The NVIDIA NIM API provides endpoints to create and manage workloads that deploy NVIDIA Inference Microservices (NIM) through the NIM Operator. These workloads package optimized NVIDIA model servers and run as managed services on the NVIDIA Run:ai platform. Each request includes NVIDIA Run:ai scheduling metadata (for example, project, priority, and category) and a NIM service specification that defines the container image, compute resources, environment variables, storage, and networking configuration. Once submitted, NVIDIA Run:ai handles scheduling, orchestration, and lifecycle management of the NIM service to ensure reliable and efficient model serving.
Create a NVIDIA NIM service
required | object (NIMServiceMetadataCreateParams) |
required | object or null (NimServiceSpec) |
Workload creation accepted
Bad submission request.
Unauthorized
Forbidden
The specified resource already exists
unexpected error
unexpected error
{- "metadata": {
- "name": "my-workload-name",
- "useGivenNameAsPrefix": true,
- "projectId": 1
}, - "spec": {
- "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "autoscaling": {
- "maxReplicas": 1,
- "metric": "http_requests_total",
- "metricThreshold": 0,
- "minReplicas": 1,
- "scaleWindowSeconds": 60
}, - "category": "string",
- "compute": {
- "cpuCoreLimit": 2,
- "cpuCoreRequest": 0.5,
- "cpuMemoryLimit": "30M",
- "cpuMemoryRequest": "20M",
- "gpuDevicesRequest": 1,
- "gpuMemoryLimit": "10M",
- "gpuMemoryRequest": "10M",
- "gpuPortionLimit": 0.5,
- "gpuPortionRequest": 0.5,
- "gpuRequestType": "portion"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "configMap": {
- "name": "my-config-map",
- "key": "MY_POSTGRES_SCHEMA"
}, - "podFieldRef": {
- "path": "metadata.name"
}, - "userCredential": {
- "name": "my_postgres_user_and_password",
- "key": "POSTGRES_PASSWORD"
}, - "exclude": false,
- "description": "Home directory of the user."
}
], - "image": "python:3.8",
- "imagePullPolicy": "Always",
- "imagePullSecrets": [
- {
- "name": "string",
- "userCredential": true,
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "modelStore": {
- "nimCache": {
- "name": "nim-cache-a",
- "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
}, - "pvc": {
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem",
- "addedAttrValues": [
- {
- "key": "dnsname",
- "value": "my.dns.com"
}
]
}
}
}, - "multiNode": {
- "workers": 3
}, - "ngcAuthSecret": "string",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "preemptibility": "preemptible",
- "priorityClass": "string",
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "replicas": 2,
- "security": {
- "runAsGid": 30,
- "runAsUid": 500
}, - "servingPort": {
- "serviceType": "ClusterIP",
- "port": 8000,
- "grpcPort": 8001,
- "metricsPort": 8002,
- "exposeExternally": true,
- "exposedUrl": "string",
- "exposedProtocol": "http"
}, - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
]
}
}{- "metadata": {
- "name": "my-workload-name",
- "projectId": 1,
- "priority": "string",
- "category": "string",
- "preemptibility": "preemptible",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "gvk": {
- "group": "apps",
- "version": "v1",
- "kind": "Deployment"
}, - "projectName": "project-a",
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "tenantId": 1001,
- "departmentId": 2,
- "departmentName": "default",
- "createdAt": "2024-01-15T10:30:00Z",
- "createdBy": "user@run.ai",
- "updatedAt": "2024-01-15T10:35:00Z",
- "updatedBy": "user@run.ai",
- "deletedAt": "2024-01-15T10:35:00Z",
- "deletedBy": "user@run.ai"
}, - "desiredPhase": "Running",
- "spec": {
- "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "autoscaling": {
- "maxReplicas": 1,
- "metric": "http_requests_total",
- "metricThreshold": 0,
- "minReplicas": 1,
- "scaleWindowSeconds": 60
}, - "category": "string",
- "compute": {
- "cpuCoreLimit": 2,
- "cpuCoreRequest": 0.5,
- "cpuMemoryLimit": "30M",
- "cpuMemoryRequest": "20M",
- "gpuDevicesRequest": 1,
- "gpuMemoryLimit": "10M",
- "gpuMemoryRequest": "10M",
- "gpuPortionLimit": 0.5,
- "gpuPortionRequest": 0.5,
- "gpuRequestType": "portion"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "configMap": {
- "name": "my-config-map",
- "key": "MY_POSTGRES_SCHEMA"
}, - "podFieldRef": {
- "path": "metadata.name"
}, - "userCredential": {
- "name": "my_postgres_user_and_password",
- "key": "POSTGRES_PASSWORD"
}, - "exclude": false,
- "description": "Home directory of the user."
}
], - "image": "python:3.8",
- "imagePullPolicy": "Always",
- "imagePullSecrets": [
- {
- "name": "string",
- "userCredential": true,
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "modelStore": {
- "nimCache": {
- "name": "nim-cache-a",
- "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
}, - "pvc": {
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem",
- "addedAttrValues": [
- {
- "key": "dnsname",
- "value": "my.dns.com"
}
]
}
}
}, - "multiNode": {
- "workers": 3
}, - "ngcAuthSecret": "string",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "preemptibility": "preemptible",
- "priorityClass": "string",
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "replicas": 2,
- "security": {
- "runAsGid": 30,
- "runAsUid": 500
}, - "servingPort": {
- "serviceType": "ClusterIP",
- "port": 8000,
- "grpcPort": 8001,
- "metricsPort": 8002,
- "exposeExternally": true,
- "exposedUrl": "string",
- "exposedProtocol": "http"
}, - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
]
}
}Retrieve details of a specific NVIDIA NIM service, by id
Successfully retrieved the workload
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "metadata": {
- "name": "my-workload-name",
- "projectId": 1,
- "priority": "string",
- "category": "string",
- "preemptibility": "preemptible",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "gvk": {
- "group": "apps",
- "version": "v1",
- "kind": "Deployment"
}, - "projectName": "project-a",
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "tenantId": 1001,
- "departmentId": 2,
- "departmentName": "default",
- "createdAt": "2024-01-15T10:30:00Z",
- "createdBy": "user@run.ai",
- "updatedAt": "2024-01-15T10:35:00Z",
- "updatedBy": "user@run.ai",
- "deletedAt": "2024-01-15T10:35:00Z",
- "deletedBy": "user@run.ai"
}, - "desiredPhase": "Running",
- "spec": {
- "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "autoscaling": {
- "maxReplicas": 1,
- "metric": "http_requests_total",
- "metricThreshold": 0,
- "minReplicas": 1,
- "scaleWindowSeconds": 60
}, - "category": "string",
- "compute": {
- "cpuCoreLimit": 2,
- "cpuCoreRequest": 0.5,
- "cpuMemoryLimit": "30M",
- "cpuMemoryRequest": "20M",
- "gpuDevicesRequest": 1,
- "gpuMemoryLimit": "10M",
- "gpuMemoryRequest": "10M",
- "gpuPortionLimit": 0.5,
- "gpuPortionRequest": 0.5,
- "gpuRequestType": "portion"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "configMap": {
- "name": "my-config-map",
- "key": "MY_POSTGRES_SCHEMA"
}, - "podFieldRef": {
- "path": "metadata.name"
}, - "userCredential": {
- "name": "my_postgres_user_and_password",
- "key": "POSTGRES_PASSWORD"
}, - "exclude": false,
- "description": "Home directory of the user."
}
], - "image": "python:3.8",
- "imagePullPolicy": "Always",
- "imagePullSecrets": [
- {
- "name": "string",
- "userCredential": true,
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "modelStore": {
- "nimCache": {
- "name": "nim-cache-a",
- "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
}, - "pvc": {
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem",
- "addedAttrValues": [
- {
- "key": "dnsname",
- "value": "my.dns.com"
}
]
}
}
}, - "multiNode": {
- "workers": 3
}, - "ngcAuthSecret": "string",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "preemptibility": "preemptible",
- "priorityClass": "string",
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "replicas": 2,
- "security": {
- "runAsGid": 30,
- "runAsUid": 500
}, - "servingPort": {
- "serviceType": "ClusterIP",
- "port": 8000,
- "grpcPort": 8001,
- "metricsPort": 8002,
- "exposeExternally": true,
- "exposedUrl": "string",
- "exposedProtocol": "http"
}, - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
]
}
}Update the specification of an existing NVIDIA NIM service.
Workload update request accepted
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "spec": {
- "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "autoscaling": {
- "maxReplicas": 1,
- "metric": "http_requests_total",
- "metricThreshold": 0,
- "minReplicas": 1,
- "scaleWindowSeconds": 60
}, - "category": "string",
- "compute": {
- "cpuCoreLimit": 2,
- "cpuCoreRequest": 0.5,
- "cpuMemoryLimit": "30M",
- "cpuMemoryRequest": "20M",
- "gpuDevicesRequest": 1,
- "gpuMemoryLimit": "10M",
- "gpuMemoryRequest": "10M",
- "gpuPortionLimit": 0.5,
- "gpuPortionRequest": 0.5,
- "gpuRequestType": "portion"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "configMap": {
- "name": "my-config-map",
- "key": "MY_POSTGRES_SCHEMA"
}, - "podFieldRef": {
- "path": "metadata.name"
}, - "userCredential": {
- "name": "my_postgres_user_and_password",
- "key": "POSTGRES_PASSWORD"
}, - "exclude": false,
- "description": "Home directory of the user."
}
], - "image": "python:3.8",
- "imagePullPolicy": "Always",
- "imagePullSecrets": [
- {
- "name": "string",
- "userCredential": true,
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "modelStore": {
- "nimCache": {
- "name": "nim-cache-a",
- "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
}, - "pvc": {
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem",
- "addedAttrValues": [
- {
- "key": "dnsname",
- "value": "my.dns.com"
}
]
}
}
}, - "multiNode": {
- "workers": 3
}, - "ngcAuthSecret": "string",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "preemptibility": "preemptible",
- "priorityClass": "string",
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "replicas": 2,
- "security": {
- "runAsGid": 30,
- "runAsUid": 500
}, - "servingPort": {
- "serviceType": "ClusterIP",
- "port": 8000,
- "grpcPort": 8001,
- "metricsPort": 8002,
- "exposeExternally": true,
- "exposedUrl": "string",
- "exposedProtocol": "http"
}, - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
]
}
}{- "metadata": {
- "name": "my-workload-name",
- "projectId": 1,
- "priority": "string",
- "category": "string",
- "preemptibility": "preemptible",
- "id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
- "gvk": {
- "group": "apps",
- "version": "v1",
- "kind": "Deployment"
}, - "projectName": "project-a",
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "tenantId": 1001,
- "departmentId": 2,
- "departmentName": "default",
- "createdAt": "2024-01-15T10:30:00Z",
- "createdBy": "user@run.ai",
- "updatedAt": "2024-01-15T10:35:00Z",
- "updatedBy": "user@run.ai",
- "deletedAt": "2024-01-15T10:35:00Z",
- "deletedBy": "user@run.ai"
}, - "desiredPhase": "Running",
- "spec": {
- "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "autoscaling": {
- "maxReplicas": 1,
- "metric": "http_requests_total",
- "metricThreshold": 0,
- "minReplicas": 1,
- "scaleWindowSeconds": 60
}, - "category": "string",
- "compute": {
- "cpuCoreLimit": 2,
- "cpuCoreRequest": 0.5,
- "cpuMemoryLimit": "30M",
- "cpuMemoryRequest": "20M",
- "gpuDevicesRequest": 1,
- "gpuMemoryLimit": "10M",
- "gpuMemoryRequest": "10M",
- "gpuPortionLimit": 0.5,
- "gpuPortionRequest": 0.5,
- "gpuRequestType": "portion"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "configMap": {
- "name": "my-config-map",
- "key": "MY_POSTGRES_SCHEMA"
}, - "podFieldRef": {
- "path": "metadata.name"
}, - "userCredential": {
- "name": "my_postgres_user_and_password",
- "key": "POSTGRES_PASSWORD"
}, - "exclude": false,
- "description": "Home directory of the user."
}
], - "image": "python:3.8",
- "imagePullPolicy": "Always",
- "imagePullSecrets": [
- {
- "name": "string",
- "userCredential": true,
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "modelStore": {
- "nimCache": {
- "name": "nim-cache-a",
- "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
}, - "pvc": {
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem",
- "addedAttrValues": [
- {
- "key": "dnsname",
- "value": "my.dns.com"
}
]
}
}
}, - "multiNode": {
- "workers": 3
}, - "ngcAuthSecret": "string",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "preemptibility": "preemptible",
- "priorityClass": "string",
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "replicas": 2,
- "security": {
- "runAsGid": 30,
- "runAsUid": 500
}, - "servingPort": {
- "serviceType": "ClusterIP",
- "port": 8000,
- "grpcPort": 8001,
- "metricsPort": 8002,
- "exposeExternally": true,
- "exposedUrl": "string",
- "exposedProtocol": "http"
}, - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
]
}
}