NVIDIA NIM

The NVIDIA NIM API provides endpoints to create and manage workloads that deploy NVIDIA Inference Microservices (NIM) through the NIM Operator. These workloads package optimized NVIDIA model servers and run as managed services on the NVIDIA Run:ai platform. Each request includes NVIDIA Run:ai scheduling metadata (for example, project, priority, and category) and a NIM service specification that defines the container image, compute resources, environment variables, storage, and networking configuration. Once submitted, NVIDIA Run:ai handles scheduling, orchestration, and lifecycle management of the NIM service to ensure reliable and efficient model serving.

Create a NVIDIA NIM service. [Experimental]

Create a NVIDIA NIM service

SecuritybearerAuth

Request

Request Body schema: application/json
required

required	object (WorkloadV2MetadataCreateParams)
required	object or null (NimServiceSpec)

Responses

202

Workload creation accepted

400

Bad request.

401

Unauthorized

403

Forbidden

409

The specified resource already exists

500

unexpected error

503

unexpected error

post/api/v2/workloads/nim-services

Request samples

Payload

application/json

{"metadata": {"name": "my-workload-name",
"projectId": 1,
"priority": "string",
"category": "string"
},
"spec": {"annotations": [{"name": "billing",
"value": "my-billing-unit",
"exclude": false
}
],
"compute": {"cpuCoreLimit": 2,
"cpuCoreRequest": 0.5,
"cpuMemoryLimit": "30M",
"cpuMemoryRequest": "20M",
"gpuDevicesRequest": 1,
"gpuMemoryLimit": "10M",
"gpuMemoryRequest": "10M",
"gpuPortionLimit": 0.5,
"gpuPortionRequest": 0.5,
"gpuRequestType": "portion"
},
"environmentVariables": [{"name": "HOME",
"value": "/home/my-folder",
"secret": {"name": "postgress_secret",
"key": "POSTGRES_PASSWORD"
},
"configMap": {"name": "my-config-map",
"key": "MY_POSTGRES_SCHEMA"
},
"podFieldRef": {"path": "metadata.name"
},
"exclude": false,
"description": "Home directory of the user."
}
],
"image": "python:3.8",
"imagePullPolicy": "Always",
"imagePullSecrets": [{"name": "string",
"userCredential": true,
"exclude": false
}
],
"labels": [{"name": "stage",
"value": "initial-research",
"exclude": false
}
],
"nodePools": ["my-node-pool-a",
"my-node-pool-b"
],
"probes": {"readiness": {"initialDelaySeconds": 0,
"periodSeconds": 1,
"timeoutSeconds": 1,
"successThreshold": 1,
"failureThreshold": 1,
"handler": {"httpGet": {"path": "/",
"port": 1,
"host": "example.com",
"scheme": "HTTP"
}
}
}
},
"security": {"runAsGid": 30,
"runAsUid": 500
},
"servingPort": {"serviceType": "ClusterIP",
"port": 8000,
"grpcPort": 8001,
"metricsPort": 8002,
"exposeExternally": true,
"exposedUrl": "string"
},
"storage": {"nimCache": {"name": "nim-cache-a",
"profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
},
"pvc": {"existingPvc": false,
"claimName": "my-claim",
"readOnly": false,
"claimInfo": {"size": "1G",
"storageClass": "my-storage-class",
"accessModes": {"readWriteOnce": true,
"readOnlyMany": false,
"readWriteMany": false
},
"volumeMode": "Filesystem",
"addedAttrValues": [{"key": "dnsname",
"value": "my.dns.com"
}
]
}
}
},
"tolerations": [{"name": "string",
"operator": "Equal",
"key": "string",
"value": "string",
"effect": "NoSchedule",
"seconds": 1,
"exclude": false
}
],
"ngcAuthSecret": "string",
"replicas": 2
}
}

Response samples

application/json

{"metadata": {"name": "my-workload-name",
"projectId": 1,
"priority": "string",
"category": "string",
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
"gvk": {"group": "apps",
"version": "v1",
"kind": "Deployment"
},
"projectName": "project-a",
"clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
"tenantId": 1001,
"departmentId": 2,
"departmentName": "default",
"createdAt": "2024-01-15T10:30:00Z",
"createdBy": "user@run.ai",
"updatedAt": "2024-01-15T10:35:00Z",
"updatedBy": "user@run.ai",
"deletedAt": "2024-01-15T10:35:00Z",
"deletedBy": "user@run.ai"
},
"desiredPhase": "Running",
"spec": {"annotations": [{"name": "billing",
"value": "my-billing-unit",
"exclude": false
}
],
"compute": {"cpuCoreLimit": 2,
"cpuCoreRequest": 0.5,
"cpuMemoryLimit": "30M",
"cpuMemoryRequest": "20M",
"gpuDevicesRequest": 1,
"gpuMemoryLimit": "10M",
"gpuMemoryRequest": "10M",
"gpuPortionLimit": 0.5,
"gpuPortionRequest": 0.5,
"gpuRequestType": "portion"
},
"environmentVariables": [{"name": "HOME",
"value": "/home/my-folder",
"secret": {"name": "postgress_secret",
"key": "POSTGRES_PASSWORD"
},
"configMap": {"name": "my-config-map",
"key": "MY_POSTGRES_SCHEMA"
},
"podFieldRef": {"path": "metadata.name"
},
"exclude": false,
"description": "Home directory of the user."
}
],
"image": "python:3.8",
"imagePullPolicy": "Always",
"imagePullSecrets": [{"name": "string",
"userCredential": true,
"exclude": false
}
],
"labels": [{"name": "stage",
"value": "initial-research",
"exclude": false
}
],
"nodePools": ["my-node-pool-a",
"my-node-pool-b"
],
"probes": {"readiness": {"initialDelaySeconds": 0,
"periodSeconds": 1,
"timeoutSeconds": 1,
"successThreshold": 1,
"failureThreshold": 1,
"handler": {"httpGet": {"path": "/",
"port": 1,
"host": "example.com",
"scheme": "HTTP"
}
}
}
},
"security": {"runAsGid": 30,
"runAsUid": 500
},
"servingPort": {"serviceType": "ClusterIP",
"port": 8000,
"grpcPort": 8001,
"metricsPort": 8002,
"exposeExternally": true,
"exposedUrl": "string"
},
"storage": {"nimCache": {"name": "nim-cache-a",
"profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
},
"pvc": {"existingPvc": false,
"claimName": "my-claim",
"readOnly": false,
"claimInfo": {"size": "1G",
"storageClass": "my-storage-class",
"accessModes": {"readWriteOnce": true,
"readOnlyMany": false,
"readWriteMany": false
},
"volumeMode": "Filesystem",
"addedAttrValues": [{"key": "dnsname",
"value": "my.dns.com"
}
]
}
}
},
"tolerations": [{"name": "string",
"operator": "Equal",
"key": "string",
"value": "string",
"effect": "NoSchedule",
"seconds": 1,
"exclude": false
}
],
"ngcAuthSecret": "string",
"replicas": 2
}
}

Get a NVIDIA NIM service. [Experimental]

Retrieve details of a specific NVIDIA NIM service, by id

SecuritybearerAuth

Request

path Parameters

WorkloadV2Id

required

string <uuid>

The ID of the workload.

Responses

200

Successfully retrieved the workload

401

Unauthorized

403

Forbidden

404

The specified resource was not found

500

unexpected error

503

unexpected error

get/api/v2/workloads/nim-services/{WorkloadV2Id}

Response samples

application/json

{"metadata": {"name": "my-workload-name",
"projectId": 1,
"priority": "string",
"category": "string",
"id": "497f6eca-6276-4993-bfeb-53cbbbba6f08",
"gvk": {"group": "apps",
"version": "v1",
"kind": "Deployment"
},
"projectName": "project-a",
"clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
"tenantId": 1001,
"departmentId": 2,
"departmentName": "default",
"createdAt": "2024-01-15T10:30:00Z",
"createdBy": "user@run.ai",
"updatedAt": "2024-01-15T10:35:00Z",
"updatedBy": "user@run.ai",
"deletedAt": "2024-01-15T10:35:00Z",
"deletedBy": "user@run.ai"
},
"desiredPhase": "Running",
"spec": {"annotations": [{"name": "billing",
"value": "my-billing-unit",
"exclude": false
}
],
"compute": {"cpuCoreLimit": 2,
"cpuCoreRequest": 0.5,
"cpuMemoryLimit": "30M",
"cpuMemoryRequest": "20M",
"gpuDevicesRequest": 1,
"gpuMemoryLimit": "10M",
"gpuMemoryRequest": "10M",
"gpuPortionLimit": 0.5,
"gpuPortionRequest": 0.5,
"gpuRequestType": "portion"
},
"environmentVariables": [{"name": "HOME",
"value": "/home/my-folder",
"secret": {"name": "postgress_secret",
"key": "POSTGRES_PASSWORD"
},
"configMap": {"name": "my-config-map",
"key": "MY_POSTGRES_SCHEMA"
},
"podFieldRef": {"path": "metadata.name"
},
"exclude": false,
"description": "Home directory of the user."
}
],
"image": "python:3.8",
"imagePullPolicy": "Always",
"imagePullSecrets": [{"name": "string",
"userCredential": true,
"exclude": false
}
],
"labels": [{"name": "stage",
"value": "initial-research",
"exclude": false
}
],
"nodePools": ["my-node-pool-a",
"my-node-pool-b"
],
"probes": {"readiness": {"initialDelaySeconds": 0,
"periodSeconds": 1,
"timeoutSeconds": 1,
"successThreshold": 1,
"failureThreshold": 1,
"handler": {"httpGet": {"path": "/",
"port": 1,
"host": "example.com",
"scheme": "HTTP"
}
}
}
},
"security": {"runAsGid": 30,
"runAsUid": 500
},
"servingPort": {"serviceType": "ClusterIP",
"port": 8000,
"grpcPort": 8001,
"metricsPort": 8002,
"exposeExternally": true,
"exposedUrl": "string"
},
"storage": {"nimCache": {"name": "nim-cache-a",
"profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
},
"pvc": {"existingPvc": false,
"claimName": "my-claim",
"readOnly": false,
"claimInfo": {"size": "1G",
"storageClass": "my-storage-class",
"accessModes": {"readWriteOnce": true,
"readOnlyMany": false,
"readWriteMany": false
},
"volumeMode": "Filesystem",
"addedAttrValues": [{"key": "dnsname",
"value": "my.dns.com"
}
]
}
}
},
"tolerations": [{"name": "string",
"operator": "Equal",
"key": "string",
"value": "string",
"effect": "NoSchedule",
"seconds": 1,
"exclude": false
}
],
"ngcAuthSecret": "string",
"replicas": 2
}
}

➔ Next to Workspaces

NVIDIA NIM

Create a NVIDIA NIM service. [Experimental]

Request Body schema: application/jsonrequired

Get a NVIDIA NIM service. [Experimental]

path Parameters

Request Body schema: application/json
required