Trainings are dedicated workloads that are specifically used for training models.
They are by design preemptible workloads because they are used in unattended sessions where the
scientists and researchers do not need to be present. These workloads are allowed to utilize
resources over the project's quota.
Create a training workload using container related fields.
Request completed successfully.
Bad submission request.
unexpected error
{- "name": "my-workload-name",
- "useGivenNameAsPrefix": true,
- "projectId": 1,
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "spec": {
- "command": "python",
- "args": "-x",
- "image": "python:3.8",
- "imagePullPolicy": "Always",
- "workingDir": "/home/myfolder",
- "createHomeDir": true,
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "",
- "scheme": "HTTP"
}, - "nodeType": "my-node-type",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "podAffinity": {
- "type": "Required",
- "key": "string"
}, - "tty": true,
- "stdin": true,
- "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
}, - "configMap": {
- "name": "my-config-map",
}, - "exclude": false,
- "description": "Home directory of the user."
], - "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
], - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
], - "terminateAfterPreemption": false,
- "autoDeletionTimeAfterCompletionSeconds": 15,
- "terminationGracePeriodSeconds": 20,
- "backoffLimit": 3,
- "ports": [
- {
- "container": 8080,
- "serviceType": "LoadBalancer",
- "external": 30080,
- "toolType": "pytorch",
- "toolName": "my-pytorch",
- "name": "port-instance-a"
], - "exposedUrls": [
- {
- "container": 8080,
- "authorizedUsers": [
- "user-a",
- "user-b"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "toolType": "jupyter",
- "toolName": "my-pytorch",
- "name": "url-instance-a"
], - "priorityClass": "build",
- "completions": 1,
- "parallelism": 1,
- "compute": {
- "gpuDevicesRequest": 1,
- "gpuRequestType": "portion",
- "gpuPortionRequest": 0.5,
- "gpuPortionLimit": 0.5,
- "gpuMemoryRequest": "10M",
- "gpuMemoryLimit": "10M",
- "migProfile": "1g.5gb",
- "cpuCoreRequest": 0.5,
- "cpuCoreLimit": 2,
- "cpuMemoryRequest": "20M",
- "cpuMemoryLimit": "30M",
- "largeShmRequest": false,
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}, - "storage": {
- "dataVolume": [
- {
- "id": "123e4567-e89b-12d3-a456-426614174000",
- "mountPath": "/mnt/data"
], - "pvc": [
- {
- "name": "storage-instance-a",
- "path": "/container/my-claim",
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "ephemeral": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem"
], - "hostPath": [
- {
- "name": "storage-instance-a",
- "path": "/container/directory",
- "readOnly": true,
- "mountPath": "/local/directory",
- "mountPropagation": "None"
], - "nfs": [
- {
- "name": "storage-instance-a",
- "path": "/container/nfs",
- "readOnly": true,
- "server": "",
- "mountPath": "/local/nfs"
], - "git": [
- {
- "name": "storage-instance-a",
- "branch": "main",
- "revision": "string",
- "path": "/container/my-repository",
- "passwordSecret": "my-password-secret",
- "secretKeyOfUser": "User",
- "secretKeyOfPassword": "Password"
], - "configMapVolume": [
- {
- "name": "storage-instance-a",
- "configMap": "string",
- "mountPath": "string"
], - "secretVolume": [
- {
- "name": "storage-instance-a",
- "mountPath": "string",
- "secret": "string"
], - "s3": [
- {
- "name": "storage-instance-a",
- "bucket": "my-bucket",
- "path": "/container/my-bucket",
- "accessKeySecret": "my-access-key-secret",
- "secretKeyOfAccessKeyId": "AccessKeyId",
- "secretKeyOfSecretKey": "SecretKey"
}, - "security": {
- "uidGidSource": "fromTheImage",
- "capabilities": [
- "CHOWN",
- "KILL"
], - "seccompProfileType": "RuntimeDefault",
- "runAsNonRoot": true,
- "readOnlyRootFilesystem": false,
- "runAsUid": 500,
- "runAsGid": 30,
- "supplementalGroups": "2,3,5,8",
- "allowPrivilegeEscalation": false,
- "hostIpc": false,
- "hostNetwork": false
{- "name": "my-workload-name",
- "requestedName": "string",
- "workloadId": "06d16c5d-4728-42fa-b573-3b11820d999f",
- "projectId": 1,
- "departmentId": 2,
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "createdBy": "",
- "createdAt": "2022-01-01T03:49:52.531Z",
- "desiredPhase": "Running",
- "actualPhase": "Creating",
- "spec": {
- "command": "python",
- "args": "-x",
- "image": "python:3.8",
- "imagePullPolicy": "Always",
- "workingDir": "/home/myfolder",
- "createHomeDir": true,
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "",
- "scheme": "HTTP"
}, - "nodeType": "my-node-type",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "podAffinity": {
- "type": "Required",
- "key": "string"
}, - "tty": true,
- "stdin": true,
- "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
}, - "configMap": {
- "name": "my-config-map",
}, - "exclude": false,
- "description": "Home directory of the user."
], - "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
], - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
], - "terminateAfterPreemption": false,
- "autoDeletionTimeAfterCompletionSeconds": 15,
- "terminationGracePeriodSeconds": 20,
- "backoffLimit": 3,
- "ports": [
- {
- "container": 8080,
- "serviceType": "LoadBalancer",
- "external": 30080,
- "toolType": "pytorch",
- "toolName": "my-pytorch",
- "name": "port-instance-a"
], - "exposedUrls": [
- {
- "container": 8080,
- "authorizedUsers": [
- "user-a",
- "user-b"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "toolType": "jupyter",
- "toolName": "my-pytorch",
- "name": "url-instance-a"
], - "priorityClass": "build",
- "completions": 1,
- "parallelism": 1,
- "compute": {
- "gpuDevicesRequest": 1,
- "gpuRequestType": "portion",
- "gpuPortionRequest": 0.5,
- "gpuPortionLimit": 0.5,
- "gpuMemoryRequest": "10M",
- "gpuMemoryLimit": "10M",
- "migProfile": "1g.5gb",
- "cpuCoreRequest": 0.5,
- "cpuCoreLimit": 2,
- "cpuMemoryRequest": "20M",
- "cpuMemoryLimit": "30M",
- "largeShmRequest": false,
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}, - "storage": {
- "dataVolume": [
- {
- "id": "123e4567-e89b-12d3-a456-426614174000",
- "mountPath": "/mnt/data"
], - "pvc": [
- {
- "name": "storage-instance-a",
- "path": "/container/my-claim",
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "ephemeral": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem"
], - "hostPath": [
- {
- "name": "storage-instance-a",
- "path": "/container/directory",
- "readOnly": true,
- "mountPath": "/local/directory",
- "mountPropagation": "None"
], - "nfs": [
- {
- "name": "storage-instance-a",
- "path": "/container/nfs",
- "readOnly": true,
- "server": "",
- "mountPath": "/local/nfs"
], - "git": [
- {
- "name": "storage-instance-a",
- "branch": "main",
- "revision": "string",
- "path": "/container/my-repository",
- "passwordSecret": "my-password-secret",
- "secretKeyOfUser": "User",
- "secretKeyOfPassword": "Password"
], - "configMapVolume": [
- {
- "name": "storage-instance-a",
- "configMap": "string",
- "mountPath": "string"
], - "secretVolume": [
- {
- "name": "storage-instance-a",
- "mountPath": "string",
- "secret": "string"
], - "s3": [
- {
- "name": "storage-instance-a",
- "bucket": "my-bucket",
- "path": "/container/my-bucket",
- "accessKeySecret": "my-access-key-secret",
- "secretKeyOfAccessKeyId": "AccessKeyId",
- "secretKeyOfSecretKey": "SecretKey"
}, - "security": {
- "uidGidSource": "fromTheImage",
- "capabilities": [
- "CHOWN",
- "KILL"
], - "seccompProfileType": "RuntimeDefault",
- "runAsNonRoot": true,
- "readOnlyRootFilesystem": false,
- "runAsUid": 500,
- "runAsGid": 30,
- "supplementalGroups": "2,3,5,8",
- "allowPrivilegeEscalation": false,
- "hostIpc": false,
- "hostNetwork": false
Delete a training using a workload id.
No Content.
The specified resource was not found
unexpected error
unexpected error
{- "code": 401,
- "message": "Issuer is not familiar."
Retrieve training details using a workload id.
Executed successfully.
The specified resource was not found
unexpected error
unexpected error
{- "name": "my-workload-name",
- "requestedName": "string",
- "workloadId": "06d16c5d-4728-42fa-b573-3b11820d999f",
- "projectId": 1,
- "departmentId": 2,
- "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
- "createdBy": "",
- "createdAt": "2022-01-01T03:49:52.531Z",
- "desiredPhase": "Running",
- "actualPhase": "Creating",
- "spec": {
- "command": "python",
- "args": "-x",
- "image": "python:3.8",
- "imagePullPolicy": "Always",
- "workingDir": "/home/myfolder",
- "createHomeDir": true,
- "probes": {
- "readiness": {
- "initialDelaySeconds": 0,
- "periodSeconds": 1,
- "timeoutSeconds": 1,
- "successThreshold": 1,
- "failureThreshold": 1,
- "handler": {
- "httpGet": {
- "path": "/",
- "port": 1,
- "host": "",
- "scheme": "HTTP"
}, - "nodeType": "my-node-type",
- "nodePools": [
- "my-node-pool-a",
- "my-node-pool-b"
], - "podAffinity": {
- "type": "Required",
- "key": "string"
}, - "tty": true,
- "stdin": true,
- "environmentVariables": [
- {
- "name": "HOME",
- "value": "/home/my-folder",
- "secret": {
- "name": "postgress_secret",
}, - "configMap": {
- "name": "my-config-map",
}, - "exclude": false,
- "description": "Home directory of the user."
], - "annotations": [
- {
- "name": "billing",
- "value": "my-billing-unit",
- "exclude": false
], - "labels": [
- {
- "name": "stage",
- "value": "initial-research",
- "exclude": false
], - "tolerations": [
- {
- "name": "string",
- "operator": "Equal",
- "key": "string",
- "value": "string",
- "effect": "NoSchedule",
- "seconds": 1,
- "exclude": false
], - "terminateAfterPreemption": false,
- "autoDeletionTimeAfterCompletionSeconds": 15,
- "terminationGracePeriodSeconds": 20,
- "backoffLimit": 3,
- "ports": [
- {
- "container": 8080,
- "serviceType": "LoadBalancer",
- "external": 30080,
- "toolType": "pytorch",
- "toolName": "my-pytorch",
- "name": "port-instance-a"
], - "exposedUrls": [
- {
- "container": 8080,
- "authorizedUsers": [
- "user-a",
- "user-b"
], - "authorizedGroups": [
- "group-a",
- "group-b"
], - "toolType": "jupyter",
- "toolName": "my-pytorch",
- "name": "url-instance-a"
], - "priorityClass": "build",
- "completions": 1,
- "parallelism": 1,
- "compute": {
- "gpuDevicesRequest": 1,
- "gpuRequestType": "portion",
- "gpuPortionRequest": 0.5,
- "gpuPortionLimit": 0.5,
- "gpuMemoryRequest": "10M",
- "gpuMemoryLimit": "10M",
- "migProfile": "1g.5gb",
- "cpuCoreRequest": 0.5,
- "cpuCoreLimit": 2,
- "cpuMemoryRequest": "20M",
- "cpuMemoryLimit": "30M",
- "largeShmRequest": false,
- "extendedResources": [
- {
- "resource": "hardware-vendor.example/foo",
- "quantity": 2,
- "exclude": false
}, - "storage": {
- "dataVolume": [
- {
- "id": "123e4567-e89b-12d3-a456-426614174000",
- "mountPath": "/mnt/data"
], - "pvc": [
- {
- "name": "storage-instance-a",
- "path": "/container/my-claim",
- "existingPvc": false,
- "claimName": "my-claim",
- "readOnly": false,
- "ephemeral": false,
- "claimInfo": {
- "size": "1G",
- "storageClass": "my-storage-class",
- "accessModes": {
- "readWriteOnce": true,
- "readOnlyMany": false,
- "readWriteMany": false
}, - "volumeMode": "Filesystem"
], - "hostPath": [
- {
- "name": "storage-instance-a",
- "path": "/container/directory",
- "readOnly": true,
- "mountPath": "/local/directory",
- "mountPropagation": "None"
], - "nfs": [
- {
- "name": "storage-instance-a",
- "path": "/container/nfs",
- "readOnly": true,
- "server": "",
- "mountPath": "/local/nfs"
], - "git": [
- {
- "name": "storage-instance-a",
- "branch": "main",
- "revision": "string",
- "path": "/container/my-repository",
- "passwordSecret": "my-password-secret",
- "secretKeyOfUser": "User",
- "secretKeyOfPassword": "Password"
], - "configMapVolume": [
- {
- "name": "storage-instance-a",
- "configMap": "string",
- "mountPath": "string"
], - "secretVolume": [
- {
- "name": "storage-instance-a",
- "mountPath": "string",
- "secret": "string"
], - "s3": [
- {
- "name": "storage-instance-a",
- "bucket": "my-bucket",
- "path": "/container/my-bucket",
- "accessKeySecret": "my-access-key-secret",
- "secretKeyOfAccessKeyId": "AccessKeyId",
- "secretKeyOfSecretKey": "SecretKey"
}, - "security": {
- "uidGidSource": "fromTheImage",
- "capabilities": [
- "CHOWN",
- "KILL"
], - "seccompProfileType": "RuntimeDefault",
- "runAsNonRoot": true,
- "readOnlyRootFilesystem": false,
- "runAsUid": 500,
- "runAsGid": 30,
- "supplementalGroups": "2,3,5,8",
- "allowPrivilegeEscalation": false,
- "hostIpc": false,
- "hostNetwork": false
Suspend a training from running using a workload id.
The specified resource was not found
unexpected error
unexpected error
{- "code": 202,
- "message": "Request has been accepted."
Resume a training that was suspended using a workload id.
The specified resource was not found
unexpected error
unexpected error
{- "code": 202,
- "message": "Request has been accepted."