Retrieves various information of a given Hugging Face model.
Request completed successfully.
Bad submission request.
Unauthorized
Forbidden
unexpected error
unexpected error
{- "modelName": "meta-llama/Llama-2-7b-chat-hf",
- "hfToken": "hf_aabbccDDEEFFgghhiiKKLLMMnnooPP1234",
- "requestMinMemoryRequirements": true
}
{- "modelName": "meta-llama/Llama-2-7b-chat-hf",
- "pipelineTag": "text-generation",
- "architectures": [
- "LlamaForCausalLM"
], - "vllmSupportingVersions": [
- "0.5.2"
], - "minMemoryRequirementsMb": 0
}
Create an inference using container related fields.
Request completed successfully.
Bad request.
Unauthorized
Forbidden
unexpected error
{- "name": "my-workload-name",
- "useGivenNameAsPrefix": true,
- "projectId": 1,
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "spec": {
- "command": "python",
- "args": "-x my-script.py",
- "image": "python:3.8",
- "imagePullPolicy": "Always",
- "workingDir": "/home/myfolder",
- "createHomeDir": true,
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "nodeType": "my-node-type",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "podAffinity": {
- "type": "Required",
- "key": "string"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "exclude": false
}
], - "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
], - "ports": [
- {
- "container": 8080,
- "serviceType": "LoadBalancer",
- "external": 30080,
- "toolType": "pytorch",
- "toolName": "my-pytorch",
- "name": "port-instance-a"
}
], - "exposedUrls": [
- {
- "container": 8080,
- "authorizedUsers": [
- "user-a",
- "user-b"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "toolType": "jupyter",
- "toolName": "my-pytorch",
- "name": "url-instance-a"
}
], - "compute": {
- "gpuDevicesRequest": 1,
- "gpuRequestType": "portion",
- "gpuPortionRequest": 0.5,
- "gpuPortionLimit": 0.5,
- "gpuMemoryRequest": "10M",
- "gpuMemoryLimit": "10M",
- "migProfile": "1g.5gb",
- "cpuCoreRequest": 0.5,
- "cpuCoreLimit": 2,
- "cpuMemoryRequest": "20M",
- "cpuMemoryLimit": "30M",
- "largeShmRequest": false,
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "storage": {
- "dataVolume": [
- {
- "id": "123e4567-e89b-12d3-a456-426614174000",
- "mountPath": "/mnt/data"
}
], - "pvc": [
- {
- "name": "storage-instance-a",
- "path": "/container/my-claim",
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "ephemeral": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem"
}
}
], - "hostPath": [
- {
- "name": "storage-instance-a",
- "path": "/container/directory",
- "readOnly": true,
- "mountPath": "/local/directory",
- "mountPropagation": "None"
}
], - "nfs": [
- {
- "name": "storage-instance-a",
- "path": "/container/nfs",
- "readOnly": true,
- "server": "my.nfs.com",
- "mountPath": "/local/nfs"
}
], - "git": [
- {
- "name": "storage-instance-a",
- "branch": "main",
- "revision": "string",
- "path": "/container/my-repository",
- "passwordSecret": "my-password-secret",
- "secretKeyOfUser": "User",
- "secretKeyOfPassword": "Password"
}
], - "configMapVolume": [
- {
- "name": "storage-instance-a",
- "configMap": "string",
- "mountPath": "string"
}
], - "secretVolume": [
- {
- "name": "storage-instance-a",
- "mountPath": "string",
- "secret": "string"
}
]
}, - "security": {
- "uidGidSource": "fromTheImage",
- "capabilities": [
- "CHOWN",
- "KILL"
], - "seccompProfileType": "RuntimeDefault",
- "runAsNonRoot": true,
- "readOnlyRootFilesystem": false,
- "runAsUid": 500,
- "runAsGid": 30,
- "supplementalGroups": "2,3,5,8"
}, - "servingPort": {
- "container": 8080,
- "protocol": "http",
- "authorizationType": "public",
- "authorizedUsers": [
- "user.a@example.com",
- "user.b@example.com"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "clusterLocalAccessOnly": true
}, - "autoscaling": {
- "minReplicas": 0,
- "maxReplicas": 1,
- "scaleToZeroRetentionSeconds": 3600,
- "metric": "throughput",
- "metricThreshold": 0
}
}
}
{- "name": "my-workload-name",
- "requestedName": "string",
- "workloadId": "06d16c5d-4728-42fa-b573-3b11820d999f",
- "projectId": 1,
- "departmentId": 2,
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "createdBy": "test@lab.com",
- "createdAt": "2022-01-01T03:49:52.531Z",
- "desiredPhase": "Running",
- "actualPhase": "Creating",
- "spec": {
- "command": "python",
- "args": "-x my-script.py",
- "image": "python:3.8",
- "imagePullPolicy": "Always",
- "workingDir": "/home/myfolder",
- "createHomeDir": true,
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "nodeType": "my-node-type",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "podAffinity": {
- "type": "Required",
- "key": "string"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "exclude": false
}
], - "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
], - "ports": [
- {
- "container": 8080,
- "serviceType": "LoadBalancer",
- "external": 30080,
- "toolType": "pytorch",
- "toolName": "my-pytorch",
- "name": "port-instance-a"
}
], - "exposedUrls": [
- {
- "container": 8080,
- "authorizedUsers": [
- "user-a",
- "user-b"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "toolType": "jupyter",
- "toolName": "my-pytorch",
- "name": "url-instance-a"
}
], - "compute": {
- "gpuDevicesRequest": 1,
- "gpuRequestType": "portion",
- "gpuPortionRequest": 0.5,
- "gpuPortionLimit": 0.5,
- "gpuMemoryRequest": "10M",
- "gpuMemoryLimit": "10M",
- "migProfile": "1g.5gb",
- "cpuCoreRequest": 0.5,
- "cpuCoreLimit": 2,
- "cpuMemoryRequest": "20M",
- "cpuMemoryLimit": "30M",
- "largeShmRequest": false,
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "storage": {
- "dataVolume": [
- {
- "id": "123e4567-e89b-12d3-a456-426614174000",
- "mountPath": "/mnt/data"
}
], - "pvc": [
- {
- "name": "storage-instance-a",
- "path": "/container/my-claim",
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "ephemeral": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem"
}
}
], - "hostPath": [
- {
- "name": "storage-instance-a",
- "path": "/container/directory",
- "readOnly": true,
- "mountPath": "/local/directory",
- "mountPropagation": "None"
}
], - "nfs": [
- {
- "name": "storage-instance-a",
- "path": "/container/nfs",
- "readOnly": true,
- "server": "my.nfs.com",
- "mountPath": "/local/nfs"
}
], - "git": [
- {
- "name": "storage-instance-a",
- "branch": "main",
- "revision": "string",
- "path": "/container/my-repository",
- "passwordSecret": "my-password-secret",
- "secretKeyOfUser": "User",
- "secretKeyOfPassword": "Password"
}
], - "configMapVolume": [
- {
- "name": "storage-instance-a",
- "configMap": "string",
- "mountPath": "string"
}
], - "secretVolume": [
- {
- "name": "storage-instance-a",
- "mountPath": "string",
- "secret": "string"
}
]
}, - "security": {
- "uidGidSource": "fromTheImage",
- "capabilities": [
- "CHOWN",
- "KILL"
], - "seccompProfileType": "RuntimeDefault",
- "runAsNonRoot": true,
- "readOnlyRootFilesystem": false,
- "runAsUid": 500,
- "runAsGid": 30,
- "supplementalGroups": "2,3,5,8"
}, - "servingPort": {
- "container": 8080,
- "protocol": "http",
- "authorizationType": "public",
- "authorizedUsers": [
- "user.a@example.com",
- "user.b@example.com"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "clusterLocalAccessOnly": true
}, - "autoscaling": {
- "minReplicas": 0,
- "maxReplicas": 1,
- "scaleToZeroRetentionSeconds": 3600,
- "metric": "throughput",
- "metricThreshold": 0
}
}
}
Delete an inference using a workload id.
No Content.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "code": 401,
- "message": "Issuer is not familiar."
}
Retrieve inference details using a workload id.
Executed successfully.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "name": "my-workload-name",
- "requestedName": "string",
- "workloadId": "06d16c5d-4728-42fa-b573-3b11820d999f",
- "projectId": 1,
- "departmentId": 2,
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "createdBy": "test@lab.com",
- "createdAt": "2022-01-01T03:49:52.531Z",
- "desiredPhase": "Running",
- "actualPhase": "Creating",
- "spec": {
- "command": "python",
- "args": "-x my-script.py",
- "image": "python:3.8",
- "imagePullPolicy": "Always",
- "workingDir": "/home/myfolder",
- "createHomeDir": true,
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "nodeType": "my-node-type",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "podAffinity": {
- "type": "Required",
- "key": "string"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "exclude": false
}
], - "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
], - "ports": [
- {
- "container": 8080,
- "serviceType": "LoadBalancer",
- "external": 30080,
- "toolType": "pytorch",
- "toolName": "my-pytorch",
- "name": "port-instance-a"
}
], - "exposedUrls": [
- {
- "container": 8080,
- "authorizedUsers": [
- "user-a",
- "user-b"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "toolType": "jupyter",
- "toolName": "my-pytorch",
- "name": "url-instance-a"
}
], - "compute": {
- "gpuDevicesRequest": 1,
- "gpuRequestType": "portion",
- "gpuPortionRequest": 0.5,
- "gpuPortionLimit": 0.5,
- "gpuMemoryRequest": "10M",
- "gpuMemoryLimit": "10M",
- "migProfile": "1g.5gb",
- "cpuCoreRequest": 0.5,
- "cpuCoreLimit": 2,
- "cpuMemoryRequest": "20M",
- "cpuMemoryLimit": "30M",
- "largeShmRequest": false,
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "storage": {
- "dataVolume": [
- {
- "id": "123e4567-e89b-12d3-a456-426614174000",
- "mountPath": "/mnt/data"
}
], - "pvc": [
- {
- "name": "storage-instance-a",
- "path": "/container/my-claim",
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "ephemeral": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem"
}
}
], - "hostPath": [
- {
- "name": "storage-instance-a",
- "path": "/container/directory",
- "readOnly": true,
- "mountPath": "/local/directory",
- "mountPropagation": "None"
}
], - "nfs": [
- {
- "name": "storage-instance-a",
- "path": "/container/nfs",
- "readOnly": true,
- "server": "my.nfs.com",
- "mountPath": "/local/nfs"
}
], - "git": [
- {
- "name": "storage-instance-a",
- "branch": "main",
- "revision": "string",
- "path": "/container/my-repository",
- "passwordSecret": "my-password-secret",
- "secretKeyOfUser": "User",
- "secretKeyOfPassword": "Password"
}
], - "configMapVolume": [
- {
- "name": "storage-instance-a",
- "configMap": "string",
- "mountPath": "string"
}
], - "secretVolume": [
- {
- "name": "storage-instance-a",
- "mountPath": "string",
- "secret": "string"
}
]
}, - "security": {
- "uidGidSource": "fromTheImage",
- "capabilities": [
- "CHOWN",
- "KILL"
], - "seccompProfileType": "RuntimeDefault",
- "runAsNonRoot": true,
- "readOnlyRootFilesystem": false,
- "runAsUid": 500,
- "runAsGid": 30,
- "supplementalGroups": "2,3,5,8"
}, - "servingPort": {
- "container": 8080,
- "protocol": "http",
- "authorizationType": "public",
- "authorizedUsers": [
- "user.a@example.com",
- "user.b@example.com"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "clusterLocalAccessOnly": true
}, - "autoscaling": {
- "minReplicas": 0,
- "maxReplicas": 1,
- "scaleToZeroRetentionSeconds": 3600,
- "metric": "throughput",
- "metricThreshold": 0
}
}
}
Update the specification of an existing inference workload.
object or null Container overrideable fields. In the context of assets,these are environment asset fields that can be overriden in the submit workload request. |
Executed successfully.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "spec": {
- "command": "python",
- "args": "-x my-script.py",
- "image": "python:3.8",
- "imagePullPolicy": "Always",
- "workingDir": "/home/myfolder",
- "createHomeDir": true,
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "exclude": false
}
], - "autoscaling": {
- "minReplicas": 0,
- "maxReplicas": 1,
- "scaleToZeroRetentionSeconds": 3600,
- "metric": "throughput",
- "metricThreshold": 0
}
}
}
{- "name": "my-workload-name",
- "requestedName": "string",
- "workloadId": "06d16c5d-4728-42fa-b573-3b11820d999f",
- "projectId": 1,
- "departmentId": 2,
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "createdBy": "test@lab.com",
- "createdAt": "2022-01-01T03:49:52.531Z",
- "desiredPhase": "Running",
- "actualPhase": "Creating",
- "spec": {
- "command": "python",
- "args": "-x my-script.py",
- "image": "python:3.8",
- "imagePullPolicy": "Always",
- "workingDir": "/home/myfolder",
- "createHomeDir": true,
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "example.com",
- "scheme": "HTTP"
}
}
}
}, - "nodeType": "my-node-type",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "podAffinity": {
- "type": "Required",
- "key": "string"
}, - "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
- "key": "POSTGRES_PASSWORD"
}, - "exclude": false
}
], - "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
}
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
}
], - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
}
], - "ports": [
- {
- "container": 8080,
- "serviceType": "LoadBalancer",
- "external": 30080,
- "toolType": "pytorch",
- "toolName": "my-pytorch",
- "name": "port-instance-a"
}
], - "exposedUrls": [
- {
- "container": 8080,
- "authorizedUsers": [
- "user-a",
- "user-b"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "toolType": "jupyter",
- "toolName": "my-pytorch",
- "name": "url-instance-a"
}
], - "compute": {
- "gpuDevicesRequest": 1,
- "gpuRequestType": "portion",
- "gpuPortionRequest": 0.5,
- "gpuPortionLimit": 0.5,
- "gpuMemoryRequest": "10M",
- "gpuMemoryLimit": "10M",
- "migProfile": "1g.5gb",
- "cpuCoreRequest": 0.5,
- "cpuCoreLimit": 2,
- "cpuMemoryRequest": "20M",
- "cpuMemoryLimit": "30M",
- "largeShmRequest": false,
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}
]
}, - "storage": {
- "dataVolume": [
- {
- "id": "123e4567-e89b-12d3-a456-426614174000",
- "mountPath": "/mnt/data"
}
], - "pvc": [
- {
- "name": "storage-instance-a",
- "path": "/container/my-claim",
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "ephemeral": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem"
}
}
], - "hostPath": [
- {
- "name": "storage-instance-a",
- "path": "/container/directory",
- "readOnly": true,
- "mountPath": "/local/directory",
- "mountPropagation": "None"
}
], - "nfs": [
- {
- "name": "storage-instance-a",
- "path": "/container/nfs",
- "readOnly": true,
- "server": "my.nfs.com",
- "mountPath": "/local/nfs"
}
], - "git": [
- {
- "name": "storage-instance-a",
- "branch": "main",
- "revision": "string",
- "path": "/container/my-repository",
- "passwordSecret": "my-password-secret",
- "secretKeyOfUser": "User",
- "secretKeyOfPassword": "Password"
}
], - "configMapVolume": [
- {
- "name": "storage-instance-a",
- "configMap": "string",
- "mountPath": "string"
}
], - "secretVolume": [
- {
- "name": "storage-instance-a",
- "mountPath": "string",
- "secret": "string"
}
]
}, - "security": {
- "uidGidSource": "fromTheImage",
- "capabilities": [
- "CHOWN",
- "KILL"
], - "seccompProfileType": "RuntimeDefault",
- "runAsNonRoot": true,
- "readOnlyRootFilesystem": false,
- "runAsUid": 500,
- "runAsGid": 30,
- "supplementalGroups": "2,3,5,8"
}, - "servingPort": {
- "container": 8080,
- "protocol": "http",
- "authorizationType": "public",
- "authorizedUsers": [
- "user.a@example.com",
- "user.b@example.com"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "clusterLocalAccessOnly": true
}, - "autoscaling": {
- "minReplicas": 0,
- "maxReplicas": 1,
- "scaleToZeroRetentionSeconds": 3600,
- "metric": "throughput",
- "metricThreshold": 0
}
}
}
Retrieve inference metrics data by id. Supported from control-plane version 2.18 or later.
Executed successfully.
Partial success.
Bad request.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "measurements": [
- {
- "type": "ALLOCATED_GPU",
- "labels": "{'gpu': '3'}",
- "values": [
- {
- "value": "85",
- "timestamp": "2023-06-06 12:09:18.211"
}
]
}
]
}
Retrieve inference metrics pod's data by workload and pod id. Supported from control-plane version 2.18 or later.
Executed successfully.
Partial success.
Bad request.
Unauthorized
Forbidden
The specified resource was not found
unexpected error
unexpected error
{- "measurements": [
- {
- "type": "ALLOCATED_GPU",
- "labels": "{'gpu': '3'}",
- "values": [
- {
- "value": "85",
- "timestamp": "2023-06-06 12:09:18.211"
}
]
}
]
}