mlrun.model#

class mlrun.model.DataSource(name: Optional[str] = None, path: Optional[str] = None, attributes: Optional[Dict[str, str]] = None, key_field: Optional[str] = None, time_field: Optional[str] = None, schedule: Optional[str] = None, start_time: Optional[Union[datetime.datetime, str]] = None, end_time: Optional[Union[datetime.datetime, str]] = None)[source]#

Bases: mlrun.model.ModelObj

online or offline data source spec

class mlrun.model.DataTarget(kind: Optional[str] = None, name: str = '', path=None, online=None)[source]#

Bases: mlrun.model.DataTargetBase

data target with extra status information (used in the feature-set/vector status)

class mlrun.model.DataTargetBase(kind: Optional[str] = None, name: str = '', path=None, attributes: Optional[Dict[str, str]] = None, after_step=None, partitioned: bool = False, key_bucketing_number: Optional[int] = None, partition_cols: Optional[List[str]] = None, time_partitioning_granularity: Optional[str] = None, max_events: Optional[int] = None, flush_after_seconds: Optional[int] = None, after_state=None, storage_options: Optional[Dict[str, str]] = None)[source]#

Bases: mlrun.model.ModelObj

data target spec, specify a destination for the feature set data

classmethod from_dict(struct=None, fields=None)[source]#: create an object from a python dictionary

class mlrun.model.FeatureSetProducer(kind=None, name=None, uri=None, owner=None, sources=None)[source]#

Bases: mlrun.model.ModelObj

information about the task/job which produced the feature set data

class mlrun.model.HyperParamOptions(param_file=None, strategy=None, selector: Optional[mlrun.model.HyperParamStrategies] = None, stop_condition=None, parallel_runs=None, dask_cluster_uri=None, max_iterations=None, max_errors=None, teardown_dask=None)[source]#

Bases: mlrun.model.ModelObj

Hyper Parameter Options

Parameters

param_file (str) – hyper params input file path/url, instead of inline
strategy (str) – hyper param strategy - grid, list or random
selector (str) – selection criteria for best result ([min|max.]<result>), e.g. max.accuracy
stop_condition (str) – early stop condition e.g. “accuracy > 0.9”
parallel_runs (int) – number of param combinations to run in parallel (over Dask)
dask_cluster_uri (str) – db uri for a deployed dask cluster function, e.g. db://myproject/dask
max_iterations (int) – max number of runs (in random strategy)
max_errors (int) – max number of child runs errors for the overall job to fail
teardown_dask (bool) – kill the dask cluster pods after the runs

mlrun.model.NewTask(name=None, project=None, handler=None, params=None, hyper_params=None, param_file=None, selector=None, strategy=None, inputs=None, outputs=None, in_path=None, out_path=None, artifact_path=None, secrets=None, base=None)[source]#: Creates a new task - see new_task

class mlrun.model.RunMetadata(uid=None, name=None, project=None, labels=None, annotations=None, iteration=None)[source]#

Bases: mlrun.model.ModelObj

Run metadata

class mlrun.model.RunObject(spec: Optional[mlrun.model.RunSpec] = None, metadata: Optional[mlrun.model.RunMetadata] = None, status: Optional[mlrun.model.RunStatus] = None)[source]#

Bases: mlrun.model.RunTemplate

A run

artifact(key) → mlrun.datastore.base.DataItem[source]#: return artifact DataItem by key

logs(watch=True, db=None, offset=0)[source]#: return or watch on the run logs

output(key)[source]#: return the value of a specific result or artifact by key

property outputs#: return a dict of outputs, result values and artifact uris

refresh()[source]#: refresh run state from the db

show()[source]#: show the current status widget, in jupyter notebook

state()[source]#: current run state

property ui_url: str#: UI URL (for relevant runtimes)

uid()[source]#: run unique id

wait_for_completion(sleep=3, timeout=0, raise_on_failure=True, show_logs=None, logs_interval=None)[source]#: Wait for remote run to complete. Default behavior is to wait until reached terminal state or timeout passed, if timeout is 0 then wait forever It pulls the run status from the db every sleep seconds. If show_logs is not False and logs_interval is not None, it will print the logs when run reached terminal state If show_logs is not False and logs_interval is defined, it will print the logs every logs_interval seconds if show_logs is False it will not print the logs, will still pull the run state until it reaches terminal state

class mlrun.model.RunSpec(parameters=None, hyperparams=None, param_file=None, selector=None, handler=None, inputs=None, outputs=None, input_path=None, output_path=None, function=None, secret_sources=None, data_stores=None, strategy=None, verbose=None, scrape_metrics=None, hyper_param_options=None, allow_empty_resources=None)[source]#

Bases: mlrun.model.ModelObj

Run specification

to_dict(fields=None, exclude=None)[source]#: convert the object to a python dictionary

class mlrun.model.RunStatus(state=None, error=None, host=None, commit=None, status_text=None, results=None, artifacts=None, start_time=None, last_update=None, iterations=None, ui_url=None)[source]#

Bases: mlrun.model.ModelObj

Run status

class mlrun.model.RunTemplate(spec: Optional[mlrun.model.RunSpec] = None, metadata: Optional[mlrun.model.RunMetadata] = None)[source]#

Bases: mlrun.model.ModelObj

Run template

set_label(key, value)[source]#: set a key/value label for the task

with_hyper_params(hyperparams, selector=None, strategy: Optional[mlrun.model.HyperParamStrategies] = None, **options)[source]#

set hyper param values and configurations, see parameters in: HyperParamOptions

example:

grid_params = {"p1": [2,4,1], "p2": [10,20]}
task = mlrun.new_task("grid-search")
task.with_hyper_params(grid_params, selector="max.accuracy")

with_input(key, path)[source]#

set task data input, path is an Mlrun global DataItem uri

examples:

task.with_input("data", "/file-dir/path/to/file")
task.with_input("data", "s3://<bucket>/path/to/file")
task.with_input("data", "v3io://[<remote-host>]/<data-container>/path/to/file")

with_param_file(param_file, selector=None, strategy: Optional[mlrun.model.HyperParamStrategies] = None, **options)[source]#

set hyper param values (from a file url) and configurations, see parameters in: HyperParamOptions

example:

grid_params = "s3://<my-bucket>/path/to/params.json"
task = mlrun.new_task("grid-search")
task.with_param_file(grid_params, selector="max.accuracy")

with_params(**kwargs)[source]#: set task parameters using key=value, key2=value2, ..

with_secrets(kind, source)[source]#

read secrets from a source provider to be used in workflows, example:

task.with_secrets('file', 'file.txt')
task.with_secrets('inline', {'key': 'val'})
task.with_secrets('env', 'ENV1,ENV2')

task.with_secrets('vault', ['secret1', 'secret2'...])

# If using with k8s secrets, the k8s secret is managed by MLRun, through the project-secrets
# mechanism. The secrets will be attached to the running pod as environment variables.
task.with_secrets('kubernetes', ['secret1', 'secret2'])

# If using an empty secrets list [] then all accessible secrets will be available.
task.with_secrets('vault', [])

# To use with Azure key vault, a k8s secret must be created with the following keys:
# kubectl -n <namespace> create secret generic azure-key-vault-secret \
#     --from-literal=tenant_id=<service principal tenant ID> \
#     --from-literal=client_id=<service principal client ID> \
#     --from-literal=secret=<service principal secret key>

task.with_secrets('azure_vault', {
    'name': 'my-vault-name',
    'k8s_secret': 'azure-key-vault-secret',
    # An empty secrets list may be passed ('secrets': []) to access all vault secrets.
    'secrets': ['secret1', 'secret2'...]
})

Parameters

kind – secret type (file, inline, env)
source – secret data or link (see example)

Returns

The RunTemplate object

class mlrun.model.TargetPathObject(base_path=None, run_id=None, is_single_file=False)[source]#

Bases: object

Class configuring the target path This class will take consideration of a few parameters to create the correct end result path:

run_id

if run_id is provided target will be considered as run_id mode which require to contain a {run_id} place holder in the path.
is_single_file

if true then run_id must be the directory containing the output file or generated before the file name (run_id/output.file).
base_path

if contains the place holder for run_id, run_id must not be None. if run_id passed and place holder doesn’t exist the place holder will be generated in the correct place.

mlrun.model.new_task(name=None, project=None, handler=None, params=None, hyper_params=None, param_file=None, selector=None, hyper_param_options=None, inputs=None, outputs=None, in_path=None, out_path=None, artifact_path=None, secrets=None, base=None) → mlrun.model.RunTemplate[source]#

Creates a new task

Parameters

name – task name
project – task project
handler – code entry-point/handler name
params – input parameters (dict)
hyper_params – dictionary of hyper parameters and list values, each hyper param holds a list of values, the run will be executed for every parameter combination (GridSearch)
param_file – a csv file with parameter combinations, first row hold the parameter names, following rows hold param values
selector – selection criteria for hyper params e.g. “max.accuracy”
hyper_param_options – hyper parameter options, see: HyperParamOptions
inputs – dictionary of input objects + optional paths (if path is omitted the path will be the in_path/key)
outputs – dictionary of input objects + optional paths (if path is omitted the path will be the out_path/key)
in_path – default input path/url (prefix) for inputs
out_path – default output path/url (prefix) for artifacts
artifact_path – default artifact output path
secrets – extra secrets specs, will be injected into the runtime e.g. [‘file=<filename>’, ‘env=ENV_KEY1,ENV_KEY2’]
base – task instance to use as a base instead of a fresh new task instance