Inferences

Inference workloads deploy trained models into a production environment to generate predictions from live data. These workloads are prioritized over Trainings and Workspaces during scheduling. NVIDIA Run:ai Inference workloads support auto-scaling to maintain service-level agreements (SLAs) by dynamically adjusting resources as demand changes.

Create an inference.

Create an inference using container related fields.

SecuritybearerAuth

Request

Request Body schema: application/json

name required	string (WorkloadName) non-empty The name of the workload.
useGivenNameAsPrefix	boolean Default: false When true, the requested name will be treated as a prefix. The final name of the workload will be composed of the name followed by a random set of characters.
projectId required	string (ProjectId2) The id of the project.
clusterId required	string <uuid> (ClusterId) The id of the cluster.
	object or null (InferenceSpecSpec)

Responses

202

Request completed successfully.

400

Bad request.

401

Unauthorized

403

Forbidden

503

unexpected error

post/api/v1/workloads/inferences

Request samples

Payload

application/json

{"name": "my-workload-name",
"useGivenNameAsPrefix": true,
"projectId": 1,
"clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
"spec": {"command": "python",
"args": "-x my-script.py",
"image": "python:3.8",
"imagePullPolicy": "Always",
"workingDir": "/home/myfolder",
"createHomeDir": true,
"probes": {"readiness": {"initialDelaySeconds": 0,
"periodSeconds": 1,
"timeoutSeconds": 1,
"successThreshold": 1,
"failureThreshold": 1,
"handler": {"httpGet": {"path": "/",
"port": 1,
"host": "example.com",
"scheme": "HTTP"
}
}
}
},
"nodeType": "my-node-type",
"nodeAffinityRequired": {"nodeSelectorTerms": [{"matchExpressions": [{"key": "string",
"operator": "In",
"values": [null
]
}
]
}
]
},
"podAffinity": {"type": "Required",
"key": "string"
},
"category": "string",
"priorityClass": "string",
"nodePools": ["my-node-pool-a",
"my-node-pool-b"
],
"environmentVariables": [{"name": "HOME",
"value": "/home/my-folder",
"secret": {"name": "postgress_secret",
"key": "POSTGRES_PASSWORD"
},
"configMap": {"name": "my-config-map",
"key": "MY_POSTGRES_SCHEMA"
},
"podFieldRef": {"path": "metadata.name"
},
"exclude": false,
"description": "Home directory of the user."
}
],
"annotations": [{"name": "billing",
"value": "my-billing-unit",
"exclude": false
}
],
"labels": [{"name": "stage",
"value": "initial-research",
"exclude": false
}
],
"imagePullSecrets": [{"name": "string",
"userCredential": true,
"exclude": false
}
],
"tolerations": [{"name": "string",
"operator": "Equal",
"key": "string",
"value": "string",
"effect": "NoSchedule",
"seconds": 1,
"exclude": false
}
],
"ports": [{"container": 8080,
"serviceType": "LoadBalancer",
"external": 30080,
"toolType": "pytorch",
"toolName": "my-pytorch",
"name": "port-instance-a",
"exclude": false
}
],
"exposedUrls": [{"container": 8080,
"url": "https://my-url.com",
"authorizedUsers": ["user-a",
"user-b"
],
"authorizedGroups": ["group-a",
"group-b"
],
"toolType": "jupyter",
"toolName": "my-pytorch",
"name": "url-instance-a",
"exclude": false
}
],
"relatedUrls": [{"url": "https://my-url.com",
"type": "wandb",
"name": "url-instance-a",
"exclude": false
}
],
"compute": {"gpuDevicesRequest": 1,
"gpuRequestType": "portion",
"gpuPortionRequest": 0.5,
"gpuPortionLimit": 0.5,
"gpuMemoryRequest": "10M",
"gpuMemoryLimit": "10M",
"migProfile": "1g.5gb",
"cpuCoreRequest": 0.5,
"cpuCoreLimit": 2,
"cpuMemoryRequest": "20M",
"cpuMemoryLimit": "30M",
"largeShmRequest": false,
"extendedResources": [{"resource": "hardware-vendor.example/foo",
"quantity": 2,
"exclude": false
}
]
},
"servingPort": {"container": 8080,
"protocol": "http",
"authorizationType": "public",
"authorizedUsers": ["user.a@example.com",
"user.b@example.com"
],
"authorizedGroups": ["group-a",
"group-b"
],
"clusterLocalAccessOnly": true
},
"storage": {"configMapVolume": [{"name": "storage-instance-a",
"configMap": "string",
"mountPath": "string",
"subPath": "string",
"defaultMode": "0644",
"exclude": false
}
],
"dataVolume": [{"id": "123e4567-e89b-12d3-a456-426614174000",
"mountPath": "/mnt/data",
"exclude": false
}
],
"emptyDirVolume": [{"name": "storage-instance-a",
"path": "/mnt/emptydir",
"medium": "string",
"sizeLimit": "1G",
"exclude": false
}
],
"git": [{"name": "storage-instance-a",
"repository": "https://github.com/my-git/my-repo",
"branch": "main",
"revision": "string",
"path": "/container/my-repository",
"passwordSecret": "my-password-secret",
"secretKeyOfUser": "User",
"secretKeyOfPassword": "Password",
"exclude": false,
"secretRef": {"name": "my-password-secret",
"authenticatingMethod": "password",
"secretKeyOfUser": "User",
"secretKeyOfPassword": "Password"
}
}
],
"hostPath": [{"name": "storage-instance-a",
"path": "/container/directory",
"readOnly": true,
"mountPath": "/local/directory",
"mountPropagation": "None",
"exclude": false
}
],
"nfs": [{"name": "storage-instance-a",
"path": "/container/nfs",
"readOnly": true,
"server": "my.nfs.com",
"mountPath": "/local/nfs",
"exclude": false
}
],
"pvc": [{"name": "storage-instance-a",
"path": "/container/my-claim",
"existingPvc": false,
"claimName": "my-claim",
"readOnly": false,
"ephemeral": false,
"claimInfo": {"size": "1G",
"storageClass": "my-storage-class",
"accessModes": {"readWriteOnce": true,
"readOnlyMany": false,
"readWriteMany": false
},
"volumeMode": "Filesystem",
"addedAttrValues": [{"key": null,
"value": null
}
]
},
"dataSharing": false,
"exclude": false
}
],
"secretVolume": [{"name": "storage-instance-a",
"mountPath": "string",
"defaultMode": "0644",
"secret": "string",
"exclude": false
}
]
},
"security": {"uidGidSource": "fromTheImage",
"capabilities": ["CHOWN",
"KILL"
],
"seccompProfileType": "RuntimeDefault",
"runAsNonRoot": true,
"readOnlyRootFilesystem": false,
"runAsUid": 500,
"runAsGid": 30,
"supplementalGroups": "2,3,5,8"
},
"autoscaling": {"metricThresholdPercentage": 1,
"minReplicas": 0,
"maxReplicas": 1,
"initialReplicas": 0,
"activationReplicas": 1,
"concurrencyHardLimit": 0,
"scaleToZeroRetentionSeconds": 3600,
"scaleDownDelaySeconds": 3600,
"initializationTimeoutSeconds": 1,
"metric": "http_requests_total",
"metricThreshold": 0
},
"servingConfiguration": {"initializationTimeoutSeconds": 1,
"requestTimeoutSeconds": 1
}
}
}

Response samples

application/json

{"name": "my-workload-name",
"requestedName": "string",
"workloadId": "06d16c5d-4728-42fa-b573-3b11820d999f",
"projectId": 1,
"departmentId": 2,
"clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
"createdBy": "test@lab.com",
"createdAt": "2022-01-01T03:49:52.531Z",
"deletedAt": "2022-01-01T03:49:52.531Z",
"desiredPhase": "Running",
"actualPhase": "Creating",
"spec": {"command": "python",
"args": "-x my-script.py",
"image": "python:3.8",
"imagePullPolicy": "Always",
"workingDir": "/home/myfolder",
"createHomeDir": true,
"probes": {"readiness": {"initialDelaySeconds": 0,
"periodSeconds": 1,
"timeoutSeconds": 1,
"successThreshold": 1,
"failureThreshold": 1,
"handler": {"httpGet": {"path": "/",
"port": 1,
"host": "example.com",
"scheme": "HTTP"
}
}
}
},
"nodeType": "my-node-type",
"nodeAffinityRequired": {"nodeSelectorTerms": [{"matchExpressions": [{"key": "string",
"operator": "In",
"values": [null
]
}
]
}
]
},
"podAffinity": {"type": "Required",
"key": "string"
},
"category": "string",
"priorityClass": "string",
"nodePools": ["my-node-pool-a",
"my-node-pool-b"
],
"environmentVariables": [{"name": "HOME",
"value": "/home/my-folder",
"secret": {"name": "postgress_secret",
"key": "POSTGRES_PASSWORD"
},
"configMap": {"name": "my-config-map",
"key": "MY_POSTGRES_SCHEMA"
},
"podFieldRef": {"path": "metadata.name"
},
"exclude": false,
"description": "Home directory of the user."
}
],
"annotations": [{"name": "billing",
"value": "my-billing-unit",
"exclude": false
}
],
"labels": [{"name": "stage",
"value": "initial-research",
"exclude": false
}
],
"imagePullSecrets": [{"name": "string",
"userCredential": true,
"exclude": false
}
],
"tolerations": [{"name": "string",
"operator": "Equal",
"key": "string",
"value": "string",
"effect": "NoSchedule",
"seconds": 1,
"exclude": false
}
],
"ports": [{"container": 8080,
"serviceType": "LoadBalancer",
"external": 30080,
"toolType": "pytorch",
"toolName": "my-pytorch",
"name": "port-instance-a",
"exclude": false
}
],
"exposedUrls": [{"container": 8080,
"url": "https://my-url.com",
"authorizedUsers": ["user-a",
"user-b"
],
"authorizedGroups": ["group-a",
"group-b"
],
"toolType": "jupyter",
"toolName": "my-pytorch",
"name": "url-instance-a",
"exclude": false
}
],
"relatedUrls": [{"url": "https://my-url.com",
"type": "wandb",
"name": "url-instance-a",
"exclude": false
}
],
"compute": {"gpuDevicesRequest": 1,
"gpuRequestType": "portion",
"gpuPortionRequest": 0.5,
"gpuPortionLimit": 0.5,
"gpuMemoryRequest": "10M",
"gpuMemoryLimit": "10M",
"migProfile": "1g.5gb",
"cpuCoreRequest": 0.5,
"cpuCoreLimit": 2,
"cpuMemoryRequest": "20M",
"cpuMemoryLimit": "30M",
"largeShmRequest": false,
"extendedResources": [{"resource": "hardware-vendor.example/foo",
"quantity": 2,
"exclude": false
}
]
},
"servingPort": {"container": 8080,
"protocol": "http",
"authorizationType": "public",
"authorizedUsers": ["user.a@example.com",
"user.b@example.com"
],
"authorizedGroups": ["group-a",
"group-b"
],
"clusterLocalAccessOnly": true
},
"storage": {"configMapVolume": [{"name": "storage-instance-a",
"configMap": "string",
"mountPath": "string",
"subPath": "string",
"defaultMode": "0644",
"exclude": false
}
],
"dataVolume": [{"id": "123e4567-e89b-12d3-a456-426614174000",
"mountPath": "/mnt/data",
"exclude": false
}
],
"emptyDirVolume": [{"name": "storage-instance-a",
"path": "/mnt/emptydir",
"medium": "string",
"sizeLimit": "1G",
"exclude": false
}
],
"git": [{"name": "storage-instance-a",
"repository": "https://github.com/my-git/my-repo",
"branch": "main",
"revision": "string",
"path": "/container/my-repository",
"passwordSecret": "my-password-secret",
"secretKeyOfUser": "User",
"secretKeyOfPassword": "Password",
"exclude": false,
"secretRef": {"name": "my-password-secret",
"authenticatingMethod": "password",
"secretKeyOfUser": "User",
"secretKeyOfPassword": "Password"
}
}
],
"hostPath": [{"name": "storage-instance-a",
"path": "/container/directory",
"readOnly": true,
"mountPath": "/local/directory",
"mountPropagation": "None",
"exclude": false
}
],
"nfs": [{"name": "storage-instance-a",
"path": "/container/nfs",
"readOnly": true,
"server": "my.nfs.com",
"mountPath": "/local/nfs",
"exclude": false
}
],
"pvc": [{"name": "storage-instance-a",
"path": "/container/my-claim",
"existingPvc": false,
"claimName": "my-claim",
"readOnly": false,
"ephemeral": false,
"claimInfo": {"size": "1G",
"storageClass": "my-storage-class",
"accessModes": {"readWriteOnce": true,
"readOnlyMany": false,
"readWriteMany": false
},
"volumeMode": "Filesystem",
"addedAttrValues": [{"key": null,
"value": null
}
]
},
"dataSharing": false,
"exclude": false
}
],
"secretVolume": [{"name": "storage-instance-a",
"mountPath": "string",
"defaultMode": "0644",
"secret": "string",
"exclude": false
}
]
},
"security": {"uidGidSource": "fromTheImage",
"capabilities": ["CHOWN",
"KILL"
],
"seccompProfileType": "RuntimeDefault",
"runAsNonRoot": true,
"readOnlyRootFilesystem": false,
"runAsUid": 500,
"runAsGid": 30,
"supplementalGroups": "2,3,5,8"
},
"autoscaling": {"metricThresholdPercentage": 1,
"minReplicas": 0,
"maxReplicas": 1,
"initialReplicas": 0,
"activationReplicas": 1,
"concurrencyHardLimit": 0,
"scaleToZeroRetentionSeconds": 3600,
"scaleDownDelaySeconds": 3600,
"initializationTimeoutSeconds": 1,
"metric": "http_requests_total",
"metricThreshold": 0
},
"servingConfiguration": {"initializationTimeoutSeconds": 1,
"requestTimeoutSeconds": 1
}
}
}

Delete an inference.

Delete an inference using a workload id.

SecuritybearerAuth

Request

path Parameters

workloadId

required

string <uuid>

The Universally Unique Identifier (UUID) of the workload.

Responses

202

Accepted.

401

Unauthorized

403

Forbidden

404

The specified resource was not found

500

unexpected error

503

unexpected error

delete/api/v1/workloads/inferences/{workloadId}

Response samples

application/json

{"code": 202,
"message": "Request has been accepted."
}

Get inference data.

Retrieve inference details using a workload id.

SecuritybearerAuth

Request

path Parameters

workloadId

required

string <uuid>

The Universally Unique Identifier (UUID) of the workload.

Responses

200

Executed successfully.

401

Unauthorized

403

Forbidden

404

The specified resource was not found

500

unexpected error

503

unexpected error

get/api/v1/workloads/inferences/{workloadId}

Response samples

application/json

{"name": "my-workload-name",
"requestedName": "string",
"workloadId": "06d16c5d-4728-42fa-b573-3b11820d999f",
"projectId": 1,
"departmentId": 2,
"clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
"createdBy": "test@lab.com",
"createdAt": "2022-01-01T03:49:52.531Z",
"deletedAt": "2022-01-01T03:49:52.531Z",
"desiredPhase": "Running",
"actualPhase": "Creating",
"spec": {"command": "python",
"args": "-x my-script.py",
"image": "python:3.8",
"imagePullPolicy": "Always",
"workingDir": "/home/myfolder",
"createHomeDir": true,
"probes": {"readiness": {"initialDelaySeconds": 0,
"periodSeconds": 1,
"timeoutSeconds": 1,
"successThreshold": 1,
"failureThreshold": 1,
"handler": {"httpGet": {"path": "/",
"port": 1,
"host": "example.com",
"scheme": "HTTP"
}
}
}
},
"nodeType": "my-node-type",
"nodeAffinityRequired": {"nodeSelectorTerms": [{"matchExpressions": [{"key": "string",
"operator": "In",
"values": [null
]
}
]
}
]
},
"podAffinity": {"type": "Required",
"key": "string"
},
"category": "string",
"priorityClass": "string",
"nodePools": ["my-node-pool-a",
"my-node-pool-b"
],
"environmentVariables": [{"name": "HOME",
"value": "/home/my-folder",
"secret": {"name": "postgress_secret",
"key": "POSTGRES_PASSWORD"
},
"configMap": {"name": "my-config-map",
"key": "MY_POSTGRES_SCHEMA"
},
"podFieldRef": {"path": "metadata.name"
},
"exclude": false,
"description": "Home directory of the user."
}
],
"annotations": [{"name": "billing",
"value": "my-billing-unit",
"exclude": false
}
],
"labels": [{"name": "stage",
"value": "initial-research",
"exclude": false
}
],
"imagePullSecrets": [{"name": "string",
"userCredential": true,
"exclude": false
}
],
"tolerations": [{"name": "string",
"operator": "Equal",
"key": "string",
"value": "string",
"effect": "NoSchedule",
"seconds": 1,
"exclude": false
}
],
"ports": [{"container": 8080,
"serviceType": "LoadBalancer",
"external": 30080,
"toolType": "pytorch",
"toolName": "my-pytorch",
"name": "port-instance-a",
"exclude": false
}
],
"exposedUrls": [{"container": 8080,
"url": "https://my-url.com",
"authorizedUsers": ["user-a",
"user-b"
],
"authorizedGroups": ["group-a",
"group-b"
],
"toolType": "jupyter",
"toolName": "my-pytorch",
"name": "url-instance-a",
"exclude": false
}
],
"relatedUrls": [{"url": "https://my-url.com",
"type": "wandb",
"name": "url-instance-a",
"exclude": false
}
],
"compute": {"gpuDevicesRequest": 1,
"gpuRequestType": "portion",
"gpuPortionRequest": 0.5,
"gpuPortionLimit": 0.5,
"gpuMemoryRequest": "10M",
"gpuMemoryLimit": "10M",
"migProfile": "1g.5gb",
"cpuCoreRequest": 0.5,
"cpuCoreLimit": 2,
"cpuMemoryRequest": "20M",
"cpuMemoryLimit": "30M",
"largeShmRequest": false,
"extendedResources": [{"resource": "hardware-vendor.example/foo",
"quantity": 2,
"exclude": false
}
]
},
"servingPort": {"container": 8080,
"protocol": "http",
"authorizationType": "public",
"authorizedUsers": ["user.a@example.com",
"user.b@example.com"
],
"authorizedGroups": ["group-a",
"group-b"
],
"clusterLocalAccessOnly": true
},
"storage": {"configMapVolume": [{"name": "storage-instance-a",
"configMap": "string",
"mountPath": "string",
"subPath": "string",
"defaultMode": "0644",
"exclude": false
}
],
"dataVolume": [{"id": "123e4567-e89b-12d3-a456-426614174000",
"mountPath": "/mnt/data",
"exclude": false
}
],
"emptyDirVolume": [{"name": "storage-instance-a",
"path": "/mnt/emptydir",
"medium": "string",
"sizeLimit": "1G",
"exclude": false
}
],
"git": [{"name": "storage-instance-a",
"repository": "https://github.com/my-git/my-repo",
"branch": "main",
"revision": "string",
"path": "/container/my-repository",
"passwordSecret": "my-password-secret",
"secretKeyOfUser": "User",
"secretKeyOfPassword": "Password",
"exclude": false,
"secretRef": {"name": "my-password-secret",
"authenticatingMethod": "password",
"secretKeyOfUser": "User",
"secretKeyOfPassword": "Password"
}
}
],
"hostPath": [{"name": "storage-instance-a",
"path": "/container/directory",
"readOnly": true,
"mountPath": "/local/directory",
"mountPropagation": "None",
"exclude": false
}
],
"nfs": [{"name": "storage-instance-a",
"path": "/container/nfs",
"readOnly": true,
"server": "my.nfs.com",
"mountPath": "/local/nfs",
"exclude": false
}
],
"pvc": [{"name": "storage-instance-a",
"path": "/container/my-claim",
"existingPvc": false,
"claimName": "my-claim",
"readOnly": false,
"ephemeral": false,
"claimInfo": {"size": "1G",
"storageClass": "my-storage-class",
"accessModes": {"readWriteOnce": true,
"readOnlyMany": false,
"readWriteMany": false
},
"volumeMode": "Filesystem",
"addedAttrValues": [{"key": null,
"value": null
}
]
},
"dataSharing": false,
"exclude": false
}
],
"secretVolume": [{"name": "storage-instance-a",
"mountPath": "string",
"defaultMode": "0644",
"secret": "string",
"exclude": false
}
]
},
"security": {"uidGidSource": "fromTheImage",
"capabilities": ["CHOWN",
"KILL"
],
"seccompProfileType": "RuntimeDefault",
"runAsNonRoot": true,
"readOnlyRootFilesystem": false,
"runAsUid": 500,
"runAsGid": 30,
"supplementalGroups": "2,3,5,8"
},
"autoscaling": {"metricThresholdPercentage": 1,
"minReplicas": 0,
"maxReplicas": 1,
"initialReplicas": 0,
"activationReplicas": 1,
"concurrencyHardLimit": 0,
"scaleToZeroRetentionSeconds": 3600,
"scaleDownDelaySeconds": 3600,
"initializationTimeoutSeconds": 1,
"metric": "http_requests_total",
"metricThreshold": 0
},
"servingConfiguration": {"initializationTimeoutSeconds": 1,
"requestTimeoutSeconds": 1
}
}
}

Update inference spec. [Experimental]

Update the specification of an existing inference workload.

SecuritybearerAuth

Request

path Parameters

workloadId

required

string <uuid>

The Universally Unique Identifier (UUID) of the workload.

Request Body schema: application/json

object or null (CommonFlatFields)

Responses

202

Executed successfully.

401

Unauthorized

403

Forbidden

404

The specified resource was not found

500

unexpected error

503

unexpected error

patch/api/v1/workloads/inferences/{workloadId}

Request samples

Payload

application/json

{"spec": {"command": "python",
"args": "-x my-script.py",
"image": "python:3.8",
"imagePullPolicy": "Always",
"workingDir": "/home/myfolder",
"createHomeDir": true,
"probes": {"readiness": {"initialDelaySeconds": 0,
"periodSeconds": 1,
"timeoutSeconds": 1,
"successThreshold": 1,
"failureThreshold": 1,
"handler": {"httpGet": {"path": "/",
"port": 1,
"host": "example.com",
"scheme": "HTTP"
}
}
}
},
"nodeType": "my-node-type",
"nodeAffinityRequired": {"nodeSelectorTerms": [{"matchExpressions": [{"key": "string",
"operator": "In",
"values": [null
]
}
]
}
]
},
"podAffinity": {"type": "Required",
"key": "string"
},
"category": "string",
"priorityClass": "string",
"nodePools": ["my-node-pool-a",
"my-node-pool-b"
],
"environmentVariables": [{"name": "HOME",
"value": "/home/my-folder",
"secret": {"name": "postgress_secret",
"key": "POSTGRES_PASSWORD"
},
"configMap": {"name": "my-config-map",
"key": "MY_POSTGRES_SCHEMA"
},
"podFieldRef": {"path": "metadata.name"
},
"exclude": false,
"description": "Home directory of the user."
}
],
"compute": {"gpuDevicesRequest": 1,
"gpuRequestType": "portion",
"gpuPortionRequest": 0.5,
"gpuPortionLimit": 0.5,
"gpuMemoryRequest": "10M",
"gpuMemoryLimit": "10M",
"migProfile": "1g.5gb",
"cpuCoreRequest": 0.5,
"cpuCoreLimit": 2,
"cpuMemoryRequest": "20M",
"cpuMemoryLimit": "30M",
"largeShmRequest": false,
"extendedResources": [{"resource": "hardware-vendor.example/foo",
"quantity": 2,
"exclude": false
}
]
},
"autoscaling": {"metricThresholdPercentage": 1,
"minReplicas": 0,
"maxReplicas": 1,
"initialReplicas": 0,
"activationReplicas": 1,
"concurrencyHardLimit": 0,
"scaleToZeroRetentionSeconds": 3600,
"scaleDownDelaySeconds": 3600,
"initializationTimeoutSeconds": 1,
"metric": "http_requests_total",
"metricThreshold": 0
},
"servingConfiguration": {"initializationTimeoutSeconds": 1,
"requestTimeoutSeconds": 1
}
}
}

Response samples

application/json

{"name": "my-workload-name",
"requestedName": "string",
"workloadId": "06d16c5d-4728-42fa-b573-3b11820d999f",
"projectId": 1,
"departmentId": 2,
"clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
"createdBy": "test@lab.com",
"createdAt": "2022-01-01T03:49:52.531Z",
"deletedAt": "2022-01-01T03:49:52.531Z",
"desiredPhase": "Running",
"actualPhase": "Creating",
"spec": {"command": "python",
"args": "-x my-script.py",
"image": "python:3.8",
"imagePullPolicy": "Always",
"workingDir": "/home/myfolder",
"createHomeDir": true,
"probes": {"readiness": {"initialDelaySeconds": 0,
"periodSeconds": 1,
"timeoutSeconds": 1,
"successThreshold": 1,
"failureThreshold": 1,
"handler": {"httpGet": {"path": "/",
"port": 1,
"host": "example.com",
"scheme": "HTTP"
}
}
}
},
"nodeType": "my-node-type",
"nodeAffinityRequired": {"nodeSelectorTerms": [{"matchExpressions": [{"key": "string",
"operator": "In",
"values": [null
]
}
]
}
]
},
"podAffinity": {"type": "Required",
"key": "string"
},
"category": "string",
"priorityClass": "string",
"nodePools": ["my-node-pool-a",
"my-node-pool-b"
],
"environmentVariables": [{"name": "HOME",
"value": "/home/my-folder",
"secret": {"name": "postgress_secret",
"key": "POSTGRES_PASSWORD"
},
"configMap": {"name": "my-config-map",
"key": "MY_POSTGRES_SCHEMA"
},
"podFieldRef": {"path": "metadata.name"
},
"exclude": false,
"description": "Home directory of the user."
}
],
"annotations": [{"name": "billing",
"value": "my-billing-unit",
"exclude": false
}
],
"labels": [{"name": "stage",
"value": "initial-research",
"exclude": false
}
],
"imagePullSecrets": [{"name": "string",
"userCredential": true,
"exclude": false
}
],
"tolerations": [{"name": "string",
"operator": "Equal",
"key": "string",
"value": "string",
"effect": "NoSchedule",
"seconds": 1,
"exclude": false
}
],
"ports": [{"container": 8080,
"serviceType": "LoadBalancer",
"external": 30080,
"toolType": "pytorch",
"toolName": "my-pytorch",
"name": "port-instance-a",
"exclude": false
}
],
"exposedUrls": [{"container": 8080,
"url": "https://my-url.com",
"authorizedUsers": ["user-a",
"user-b"
],
"authorizedGroups": ["group-a",
"group-b"
],
"toolType": "jupyter",
"toolName": "my-pytorch",
"name": "url-instance-a",
"exclude": false
}
],
"relatedUrls": [{"url": "https://my-url.com",
"type": "wandb",
"name": "url-instance-a",
"exclude": false
}
],
"compute": {"gpuDevicesRequest": 1,
"gpuRequestType": "portion",
"gpuPortionRequest": 0.5,
"gpuPortionLimit": 0.5,
"gpuMemoryRequest": "10M",
"gpuMemoryLimit": "10M",
"migProfile": "1g.5gb",
"cpuCoreRequest": 0.5,
"cpuCoreLimit": 2,
"cpuMemoryRequest": "20M",
"cpuMemoryLimit": "30M",
"largeShmRequest": false,
"extendedResources": [{"resource": "hardware-vendor.example/foo",
"quantity": 2,
"exclude": false
}
]
},
"servingPort": {"container": 8080,
"protocol": "http",
"authorizationType": "public",
"authorizedUsers": ["user.a@example.com",
"user.b@example.com"
],
"authorizedGroups": ["group-a",
"group-b"
],
"clusterLocalAccessOnly": true
},
"storage": {"configMapVolume": [{"name": "storage-instance-a",
"configMap": "string",
"mountPath": "string",
"subPath": "string",
"defaultMode": "0644",
"exclude": false
}
],
"dataVolume": [{"id": "123e4567-e89b-12d3-a456-426614174000",
"mountPath": "/mnt/data",
"exclude": false
}
],
"emptyDirVolume": [{"name": "storage-instance-a",
"path": "/mnt/emptydir",
"medium": "string",
"sizeLimit": "1G",
"exclude": false
}
],
"git": [{"name": "storage-instance-a",
"repository": "https://github.com/my-git/my-repo",
"branch": "main",
"revision": "string",
"path": "/container/my-repository",
"passwordSecret": "my-password-secret",
"secretKeyOfUser": "User",
"secretKeyOfPassword": "Password",
"exclude": false,
"secretRef": {"name": "my-password-secret",
"authenticatingMethod": "password",
"secretKeyOfUser": "User",
"secretKeyOfPassword": "Password"
}
}
],
"hostPath": [{"name": "storage-instance-a",
"path": "/container/directory",
"readOnly": true,
"mountPath": "/local/directory",
"mountPropagation": "None",
"exclude": false
}
],
"nfs": [{"name": "storage-instance-a",
"path": "/container/nfs",
"readOnly": true,
"server": "my.nfs.com",
"mountPath": "/local/nfs",
"exclude": false
}
],
"pvc": [{"name": "storage-instance-a",
"path": "/container/my-claim",
"existingPvc": false,
"claimName": "my-claim",
"readOnly": false,
"ephemeral": false,
"claimInfo": {"size": "1G",
"storageClass": "my-storage-class",
"accessModes": {"readWriteOnce": true,
"readOnlyMany": false,
"readWriteMany": false
},
"volumeMode": "Filesystem",
"addedAttrValues": [{"key": null,
"value": null
}
]
},
"dataSharing": false,
"exclude": false
}
],
"secretVolume": [{"name": "storage-instance-a",
"mountPath": "string",
"defaultMode": "0644",
"secret": "string",
"exclude": false
}
]
},
"security": {"uidGidSource": "fromTheImage",
"capabilities": ["CHOWN",
"KILL"
],
"seccompProfileType": "RuntimeDefault",
"runAsNonRoot": true,
"readOnlyRootFilesystem": false,
"runAsUid": 500,
"runAsGid": 30,
"supplementalGroups": "2,3,5,8"
},
"autoscaling": {"metricThresholdPercentage": 1,
"minReplicas": 0,
"maxReplicas": 1,
"initialReplicas": 0,
"activationReplicas": 1,
"concurrencyHardLimit": 0,
"scaleToZeroRetentionSeconds": 3600,
"scaleDownDelaySeconds": 3600,
"initializationTimeoutSeconds": 1,
"metric": "http_requests_total",
"metricThreshold": 0
},
"servingConfiguration": {"initializationTimeoutSeconds": 1,
"requestTimeoutSeconds": 1
}
}
}

Get inference metrics data.

Retrieve inference metrics data by id. Supported from control-plane version 2.18 or later.

SecuritybearerAuth

Request

path Parameters

workloadId

required

string <uuid>

The Universally Unique Identifier (UUID) of the workload.

query Parameters

metricType required	Array of strings (InferenceWorkloadMetricType) Specify which data to request. Items Enum: "THROUGHPUT" "LATENCY"
start required	string <date-time> Start date of time range to fetch data in ISO 8601 timestamp format. Example: start=2023-06-06T12:09:18.211Z
end required	string <date-time> End date of time range to fetch data in ISO 8601 timestamp format. Example: end=2023-06-07T12:09:18.211Z
numberOfSamples	integer [ 0 .. 1000 ] Default: 20 The number of samples to take in the specified time range. Example: numberOfSamples=20

Responses

200

Executed successfully.

207

Partial success.

400

Bad request.

401

Unauthorized

403

Forbidden

404

The specified resource was not found

500

unexpected error

503

unexpected error

get/api/v1/workloads/inferences/{workloadId}/metrics

Response samples

{"measurements": [{"type": "ALLOCATED_GPU",
"labels": "{'gpu': '3'}",
"values": [{"value": "85",
"timestamp": "2023-06-06 12:09:18.211"
}
]
}
]
}

Get inference pod's metrics data.

Retrieve inference metrics pod's data by workload and pod id. Supported from control-plane version 2.18 or later.

SecuritybearerAuth

Request

path Parameters

workloadId required	string <uuid> The Universally Unique Identifier (UUID) of the workload.
podId required	string <uuid> The requested pod id.

query Parameters

metricType required	Array of strings (InferencePodMetricType) Specifies metrics data to request. Inference metrics are only available for inference workloads. Items Enum: "THROUGHPUT" "LATENCY"
start required	string <date-time> Start date of time range to fetch data in ISO 8601 timestamp format. Example: start=2023-06-06T12:09:18.211Z
end required	string <date-time> End date of time range to fetch data in ISO 8601 timestamp format. Example: end=2023-06-07T12:09:18.211Z
numberOfSamples	integer [ 0 .. 1000 ] Default: 20 The number of samples to take in the specified time range. Example: numberOfSamples=20

Responses

200

Executed successfully.

207

Partial success.

400

Bad request.

401

Unauthorized

403

Forbidden

404

The specified resource was not found

500

unexpected error

503

unexpected error

get/api/v1/workloads/inferences/{workloadId}/pods/{podId}/metrics

Response samples

{"measurements": [{"type": "ALLOCATED_GPU",
"labels": "{'gpu': '3'}",
"values": [{"value": "85",
"timestamp": "2023-06-06 12:09:18.211"
}
]
}
]
}

➔ Next to Revisions