mlrun.projects#

class mlrun.projects.MlrunProject(name=None, description=None, params=None, functions=None, workflows=None, artifacts=None, artifact_path=None, conda=None, metadata=None, spec=None, default_requirements: Optional[Union[str, List[str]]] = None)[source]#

Bases: mlrun.model.ModelObj

property artifact_path: str#

This is a property of the spec, look there for documentation leaving here for backwards compatibility with users code that used MlrunProjectLegacy

property artifacts: list#

This is a property of the spec, look there for documentation leaving here for backwards compatibility with users code that used MlrunProjectLegacy

build_function(function: Union[str, mlrun.runtimes.base.BaseRuntime], with_mlrun: Optional[bool] = None, skip_deployed: bool = False, image=None, base_image=None, commands: Optional[list] = None, secret_name='', requirements: Optional[Union[str, List[str]]] = None, mlrun_version_specifier=None, builder_env: Optional[dict] = None, overwrite_build_params: bool = False) Union[mlrun.projects.operations.BuildStatus, kfp.dsl._container_op.ContainerOp][source]#

deploy ML function, build container with its dependencies

Parameters
  • function – name of the function (in the project) or function object

  • with_mlrun – add the current mlrun package to the container build

  • skip_deployed – skip the build if we already have an image for the function

  • image – target image name/path

  • base_image – base image name/path (commands and source code will be added to it)

  • commands – list of docker build (RUN) commands e.g. [‘pip install pandas’]

  • secret_name – k8s secret for accessing the docker registry

  • requirements – list of python packages or pip requirements file path, defaults to None

  • mlrun_version_specifier – which mlrun package version to include (if not current)

  • builder_env – Kaniko builder pod env vars dict (for config/credentials) e.g. builder_env={“GIT_TOKEN”: token}, does not work yet in KFP

  • overwrite_build_params – overwrite the function build parameters with the provided ones, or attempt to add to existing parameters

clear_context()[source]#

delete all files and clear the context dir

property context: str#

This is a property of the spec, look there for documentation leaving here for backwards compatibility with users code that used MlrunProjectLegacy

create_remote(url, name='origin', branch=None)[source]#

create remote for the project git

Parameters
  • url – remote git url

  • name – name for the remote (default is ‘origin’)

  • branch – Git branch to use as source

create_vault_secrets(secrets)[source]#
deploy_function(function: Union[str, mlrun.runtimes.base.BaseRuntime], dashboard: str = '', models: Optional[list] = None, env: Optional[dict] = None, tag: Optional[str] = None, verbose: Optional[bool] = None, builder_env: Optional[dict] = None, mock: Optional[bool] = None) Union[mlrun.projects.operations.DeployStatus, kfp.dsl._container_op.ContainerOp][source]#

deploy real-time (nuclio based) functions

Parameters
  • function – name of the function (in the project) or function object

  • dashboard – url of the remote Nuclio dashboard (when not local)

  • models – list of model items

  • env – dict of extra environment variables

  • tag – extra version tag

  • verbose – add verbose prints/logs

  • builder_env – env vars dict for source archive config/credentials e.g. builder_env={“GIT_TOKEN”: token}

  • mock – deploy mock server vs a real Nuclio function (for local simulations)

property description: str#

This is a property of the spec, look there for documentation leaving here for backwards compatibility with users code that used MlrunProjectLegacy

export(filepath=None, include_files: Optional[str] = None)[source]#

save the project object into a yaml file or zip archive (default to project.yaml)

By default the project object is exported to a yaml file, when the filepath suffix is ‘.zip’ the project context dir (code files) are also copied into the zip, the archive path can include DataItem urls (for remote object storage, e.g. s3://<bucket>/<path>).

Parameters
  • filepath – path to store project .yaml or .zip (with the project dir content)

  • include_files – glob filter string for selecting files to include in the zip archive

func(key, sync=False) mlrun.runtimes.base.BaseRuntime[source]#

get function object by name

Parameters

sync – will reload/reinit the function

Returns

function object

property functions: list#

This is a property of the spec, look there for documentation leaving here for backwards compatibility with users code that used MlrunProjectLegacy

get_artifact(key, tag=None, iter=None)[source]#

Return an artifact object

Parameters
  • key – artifact key

  • tag – version tag

  • iter – iteration number (for hyper-param tasks)

Returns

Artifact object

get_artifact_uri(key: str, category: str = 'artifact', tag: Optional[str] = None) str[source]#

return the project artifact uri (store://..) from the artifact key

example:

uri = project.get_artifact_uri("my_model", category="model", tag="prod")
Parameters
  • key – artifact key/name

  • category – artifact category (artifact, model, feature-vector, ..)

  • tag – artifact version tag, default to latest version

get_function(key, sync=False, enrich=False, ignore_cache=False, copy_function=True) mlrun.runtimes.base.BaseRuntime[source]#

get function object by name

Parameters
  • key – name of key for search

  • sync – will reload/reinit the function from the project spec

  • enrich – add project info/config/source info to the function object

  • ignore_cache – read the function object from the DB (ignore the local cache)

  • copy_function – return a copy of the function object

Returns

function object

get_function_objects() Dict[str, mlrun.runtimes.base.BaseRuntime][source]#

“get a virtual dict with all the project functions ready for use in a pipeline

get_param(key: str, default=None)[source]#

get project param by key

get_run_status(run, timeout=None, expected_statuses=None, notifiers: Optional[mlrun.utils.notifications.notification_pusher.CustomNotificationPusher] = None)[source]#
get_secret(key: str)[source]#

get a key based secret e.g. DB password from the context secrets can be specified when invoking a run through files, env, ..

get_store_resource(uri)[source]#

get store resource object by uri

get_vault_secrets(secrets=None, local=False)[source]#
import_artifact(item_path: str, new_key=None, artifact_path=None, tag=None)[source]#

Import an artifact object/package from .yaml, .json, or .zip file

Parameters
  • item_path – dataitem url or file path to the file/package

  • new_key – overwrite the artifact key/name

  • artifact_path – target artifact path (when not using the default)

  • tag – artifact tag to set

Returns

artifact object

kind = 'project'#
list_artifacts(name=None, tag=None, labels=None, since=None, until=None, iter: Optional[int] = None, best_iteration: bool = False, kind: Optional[str] = None, category: Optional[Union[str, mlrun.api.schemas.artifact.ArtifactCategories]] = None) mlrun.lists.ArtifactList[source]#

List artifacts filtered by various parameters.

The returned result is an ArtifactList (list of dict), use .to_objects() to convert it to a list of RunObjects, .show() to view graphically in Jupyter, and .to_df() to convert to a DataFrame.

Examples:

# Get latest version of all artifacts in project
latest_artifacts = project.list_artifacts('', tag='latest')
# check different artifact versions for a specific artifact, return as objects list
result_versions = project.list_artifacts('results', tag='*').to_objects()
Parameters
  • name – Name of artifacts to retrieve. Name is used as a like query, and is not case-sensitive. This means that querying for name may return artifacts named my_Name_1 or surname.

  • tag – Return artifacts assigned this tag.

  • labels – Return artifacts that have these labels.

  • since – Not in use in HTTPRunDB.

  • until – Not in use in HTTPRunDB.

  • iter – Return artifacts from a specific iteration (where iter=0 means the root iteration). If None (default) return artifacts from all iterations.

  • best_iteration – Returns the artifact which belongs to the best iteration of a given run, in the case of artifacts generated from a hyper-param run. If only a single iteration exists, will return the artifact from that iteration. If using best_iter, the iter parameter must not be used.

  • kind – Return artifacts of the requested kind.

  • category – Return artifacts of the requested category.

list_functions(name=None, tag=None, labels=None)[source]#

Retrieve a list of functions, filtered by specific criteria.

example:

functions = project.list_functions(tag="latest")
Parameters
  • name – Return only functions with a specific name.

  • tag – Return function versions with specific tags.

  • labels – Return functions that have specific labels assigned to them.

Returns

List of function objects.

list_models(name=None, tag=None, labels=None, since=None, until=None, iter: Optional[int] = None, best_iteration: bool = False)[source]#

List models in project, filtered by various parameters.

Examples:

# Get latest version of all models in project
latest_models = project.list_models('', tag='latest')
Parameters
  • name – Name of artifacts to retrieve. Name is used as a like query, and is not case-sensitive. This means that querying for name may return artifacts named my_Name_1 or surname.

  • tag – Return artifacts assigned this tag.

  • labels – Return artifacts that have these labels.

  • since – Not in use in HTTPRunDB.

  • until – Not in use in HTTPRunDB.

  • iter – Return artifacts from a specific iteration (where iter=0 means the root iteration). If None (default) return artifacts from all iterations.

  • best_iteration – Returns the artifact which belongs to the best iteration of a given run, in the case of artifacts generated from a hyper-param run. If only a single iteration exists, will return the artifact from that iteration. If using best_iter, the iter parameter must not be used.

list_runs(name=None, uid=None, labels=None, state=None, sort=True, last=0, iter=False, start_time_from: Optional[datetime.datetime] = None, start_time_to: Optional[datetime.datetime] = None, last_update_time_from: Optional[datetime.datetime] = None, last_update_time_to: Optional[datetime.datetime] = None, **kwargs) mlrun.lists.RunList[source]#

Retrieve a list of runs, filtered by various options.

The returned result is a `` (list of dict), use .to_objects() to convert it to a list of RunObjects, .show() to view graphically in Jupyter, .to_df() to convert to a DataFrame, and compare() to generate comparison table and PCP plot.

Example:

# return a list of runs matching the name and label and compare
runs = project.list_runs(name='download', labels='owner=admin')
runs.compare()
# If running in Jupyter, can use the .show() function to display the results
project.list_runs(name='').show()
Parameters
  • name – Name of the run to retrieve.

  • uid – Unique ID of the run.

  • project – Project that the runs belongs to.

  • labels – List runs that have a specific label assigned. Currently only a single label filter can be applied, otherwise result will be empty.

  • state – List only runs whose state is specified.

  • sort – Whether to sort the result according to their start time. Otherwise, results will be returned by their internal order in the DB (order will not be guaranteed).

  • last – Deprecated - currently not used.

  • iter – If True return runs from all iterations. Otherwise, return only runs whose iter is 0.

  • start_time_from – Filter by run start time in [start_time_from, start_time_to].

  • start_time_to – Filter by run start time in [start_time_from, start_time_to].

  • last_update_time_from – Filter by run last update time in (last_update_time_from, last_update_time_to).

  • last_update_time_to – Filter by run last update time in (last_update_time_from, last_update_time_to).

log_artifact(item, body=None, tag='', local_path='', artifact_path=None, format=None, upload=None, labels=None, target_path=None, **kwargs)[source]#

log an output artifact and optionally upload it to datastore

example:

project.log_artifact(
    "some-data",
    body=b"abc is 123",
    local_path="model.txt",
    labels={"framework": "xgboost"},
)
Parameters
  • item – artifact key or artifact class ()

  • body – will use the body as the artifact content

  • local_path – path to the local file we upload, will also be use as the destination subpath (under “artifact_path”)

  • artifact_path – target artifact path (when not using the default) to define a subpath under the default location use: artifact_path=context.artifact_subpath(‘data’)

  • format – artifact file format: csv, png, ..

  • tag – version tag

  • target_path – absolute target path (instead of using artifact_path + local_path)

  • upload – upload to datastore (default is True)

  • labels – a set of key/value labels to tag the artifact with

Returns

artifact object

log_dataset(key, df, tag='', local_path=None, artifact_path=None, upload=None, labels=None, format='', preview=None, stats=None, target_path='', extra_data=None, label_column: Optional[str] = None, **kwargs) mlrun.artifacts.dataset.DatasetArtifact[source]#

log a dataset artifact and optionally upload it to datastore

example:

raw_data = {
    "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"],
    "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"],
    "age": [42, 52, 36, 24, 73],
    "testScore": [25, 94, 57, 62, 70],
}
df = pd.DataFrame(raw_data, columns=["first_name", "last_name", "age", "testScore"])
project.log_dataset("mydf", df=df, stats=True)
Parameters
  • key – artifact key

  • df – dataframe object

  • label_column – name of the label column (the one holding the target (y) values)

  • local_path – path to the local dataframe file that exists locally. The given file extension will be used to save the dataframe to a file If the file exists, it will be uploaded to the datastore instead of the given df.

  • artifact_path – target artifact path (when not using the default). to define a subpath under the default location use: artifact_path=context.artifact_subpath(‘data’)

  • tag – version tag

  • format – optional, format to use (e.g. csv, parquet, ..)

  • target_path – absolute target path (instead of using artifact_path + local_path)

  • preview – number of lines to store as preview in the artifact metadata

  • stats – calculate and store dataset stats in the artifact metadata

  • extra_data – key/value list of extra files/charts to link with this dataset

  • upload – upload to datastore (default is True)

  • labels – a set of key/value labels to tag the artifact with

Returns

artifact object

log_model(key, body=None, framework='', tag='', model_dir=None, model_file=None, algorithm=None, metrics=None, parameters=None, artifact_path=None, upload=None, labels=None, inputs: Optional[List[mlrun.features.Feature]] = None, outputs: Optional[List[mlrun.features.Feature]] = None, feature_vector: Optional[str] = None, feature_weights: Optional[list] = None, training_set=None, label_column=None, extra_data=None, **kwargs)[source]#

log a model artifact and optionally upload it to datastore

example:

project.log_model("model", body=dumps(model),
                  model_file="model.pkl",
                  metrics=context.results,
                  training_set=training_df,
                  label_column='label',
                  feature_vector=feature_vector_uri,
                  labels={"app": "fraud"})
Parameters
  • key – artifact key or artifact class ()

  • body – will use the body as the artifact content

  • model_file – path to the local model file we upload (see also model_dir) or to a model file data url (e.g. http://host/path/model.pkl)

  • model_dir – path to the local dir holding the model file and extra files

  • artifact_path – target artifact path (when not using the default) to define a subpath under the default location use: artifact_path=context.artifact_subpath(‘data’)

  • framework – name of the ML framework

  • algorithm – training algorithm name

  • tag – version tag

  • metrics – key/value dict of model metrics

  • parameters – key/value dict of model parameters

  • inputs – ordered list of model input features (name, type, ..)

  • outputs – ordered list of model output/result elements (name, type, ..)

  • upload – upload to datastore (default is True)

  • labels – a set of key/value labels to tag the artifact with

  • feature_vector – feature store feature vector uri (store://feature-vectors/<project>/<name>[:tag])

  • feature_weights – list of feature weights, one per input column

  • training_set – training set dataframe, used to infer inputs & outputs

  • label_column – which columns in the training set are the label (target) columns

  • extra_data – key/value list of extra files/charts to link with this dataset value can be absolute path | relative path (to model dir) | bytes | artifact object

Returns

artifact object

property metadata: mlrun.projects.project.ProjectMetadata#
property mountdir: str#

This is a property of the spec, look there for documentation leaving here for backwards compatibility with users code that used MlrunProjectLegacy

property name: str#

Project name, this is a property of the project metadata

property notifiers#
property params: str#

This is a property of the spec, look there for documentation leaving here for backwards compatibility with users code that used MlrunProjectLegacy

pull(branch=None, remote=None)[source]#

pull/update sources from git or tar into the context dir

Parameters
  • branch – git branch, if not the current one

  • remote – git remote, if other than origin

push(branch, message=None, update=True, remote=None, add: Optional[list] = None)[source]#

update spec and push updates to remote git repo

Parameters
  • branch – target git branch

  • message – git commit message

  • update – update files (git add update=True)

  • remote – git remote, default to origin

  • add – list of files to add

register_artifacts()[source]#

register the artifacts in the MLRun DB (under this project)

reload(sync=False, context=None) mlrun.projects.project.MlrunProject[source]#

reload the project and function objects from the project yaml/specs

Parameters
  • sync – set to True to load functions objects

  • context – context directory (where the yaml and code exist)

Returns

project object

remove_function(name)[source]#

remove a function from a project

Parameters

name – name of the function (under the project)

run(name: Optional[str] = None, workflow_path: Optional[str] = None, arguments: Optional[Dict[str, Any]] = None, artifact_path: Optional[str] = None, workflow_handler: Optional[Union[str, Callable]] = None, namespace: Optional[str] = None, sync: bool = False, watch: bool = False, dirty: bool = False, ttl: Optional[int] = None, engine: Optional[str] = None, local: Optional[bool] = None, schedule: Optional[Union[str, mlrun.api.schemas.schedule.ScheduleCronTrigger, bool]] = None, timeout: Optional[int] = None, overwrite: bool = False) mlrun.projects.pipelines._PipelineRunStatus[source]#

run a workflow using kubeflow pipelines

Parameters
  • name – name of the workflow

  • workflow_path – url to a workflow file, if not a project workflow

  • arguments – kubeflow pipelines arguments (parameters)

  • artifact_path – target path/url for workflow artifacts, the string ‘{{workflow.uid}}’ will be replaced by workflow id

  • workflow_handler – workflow function handler (for running workflow function directly)

  • namespace – kubernetes namespace if other than default

  • sync – force functions sync before run

  • watch – wait for pipeline completion

  • dirty – allow running the workflow when the git repo is dirty

  • ttl – pipeline ttl in secs (after that the pods will be removed)

  • engine – workflow engine running the workflow. supported values are ‘kfp’ (default), ‘local’ or ‘remote’. for setting engine for remote running use ‘remote:local’ or ‘remote:kfp’.

  • local – run local pipeline with local functions (set local=True in function.run())

  • schedule – ScheduleCronTrigger class instance or a standard crontab expression string (which will be converted to the class using its from_crontab constructor), see this link for help: https://apscheduler.readthedocs.io/en/3.x/modules/triggers/cron.html#module-apscheduler.triggers.cron for using the pre-defined workflow’s schedule, set schedule=True

  • timeout – timeout in seconds to wait for pipeline completion (used when watch=True)

  • overwrite – replacing the schedule of the same workflow (under the same name) if exists with the new one.

Returns

run id

run_function(function: Union[str, mlrun.runtimes.base.BaseRuntime], handler: Optional[str] = None, name: str = '', params: Optional[dict] = None, hyperparams: Optional[dict] = None, hyper_param_options: Optional[mlrun.model.HyperParamOptions] = None, inputs: Optional[dict] = None, outputs: Optional[List[str]] = None, workdir: str = '', labels: Optional[dict] = None, base_task: Optional[mlrun.model.RunTemplate] = None, watch: bool = True, local: Optional[bool] = None, verbose: Optional[bool] = None, selector: Optional[str] = None, auto_build: Optional[bool] = None, schedule: Optional[Union[str, mlrun.api.schemas.schedule.ScheduleCronTrigger]] = None, artifact_path: Optional[str] = None) Union[mlrun.model.RunObject, kfp.dsl._container_op.ContainerOp][source]#

Run a local or remote task as part of a local/kubeflow pipeline

example (use with project):

# create a project with two functions (local and from marketplace)
project = mlrun.new_project(project_name, "./proj")
project.set_function("mycode.py", "myfunc", image="mlrun/mlrun")
project.set_function("hub://sklearn_classifier", "train")

# run functions (refer to them by name)
run1 = project.run_function("myfunc", params={"x": 7})
run2 = project.run_function("train", params={"data": run1.outputs["data"]})
Parameters
  • function – name of the function (in the project) or function object

  • handler – name of the function handler

  • name – execution name

  • params – input parameters (dict)

  • hyperparams – hyper parameters

  • selector – selection criteria for hyper params e.g. “max.accuracy”

  • hyper_param_options – hyper param options (selector, early stop, strategy, ..) see: HyperParamOptions

  • inputs – input objects (dict of key: path)

  • outputs – list of outputs which can pass in the workflow

  • workdir – default input artifacts path

  • labels – labels to tag the job/run with ({key:val, ..})

  • base_task – task object to use as base

  • watch – watch/follow run log, True by default

  • local – run the function locally vs on the runtime/cluster

  • verbose – add verbose prints/logs

  • auto_build – when set to True and the function require build it will be built on the first function run, use only if you dont plan on changing the build config between runs

  • schedule – ScheduleCronTrigger class instance or a standard crontab expression string (which will be converted to the class using its from_crontab constructor), see this link for help: https://apscheduler.readthedocs.io/en/v3.6.3/modules/triggers/cron.html#module-apscheduler.triggers.cron

  • artifact_path – path to store artifacts, when running in a workflow this will be set automatically

Returns

MLRun RunObject or KubeFlow containerOp

save(filepath=None, store=True)[source]#

export project to yaml file and save project in database

Store

if True, allow updating in case project already exists

save_to_db(store=True)[source]#

save project to database

Store

if True, allow updating in case project already exists

save_workflow(name, target, artifact_path=None, ttl=None)[source]#

create and save a workflow as a yaml or archive file

Parameters
  • name – workflow name

  • target – target file path (can end with .yaml or .zip)

  • artifact_path – target path/url for workflow artifacts, the string ‘{{workflow.uid}}’ will be replaced by workflow id

  • ttl – pipeline ttl (time to live) in secs (after that the pods will be removed)

set_artifact(key, artifact: Optional[Union[str, dict, mlrun.artifacts.base.Artifact]] = None, target_path: Optional[str] = None, tag: Optional[str] = None)[source]#

add/set an artifact in the project spec (will be registered on load)

example:

# register a simple file artifact
project.set_artifact('data', target_path=data_url)
# register a model artifact
project.set_artifact('model', ModelArtifact(model_file="model.pkl"), target_path=model_dir_url)

# register a path to artifact package (will be imported on project load)
# to generate such package use `artifact.export(target_path)`
project.set_artifact('model', 'https://mystuff.com/models/mymodel.zip')
Parameters
  • key – artifact key/name

  • artifact – mlrun Artifact object/dict (or its subclasses) or path to artifact file to import (yaml/json/zip), relative paths are relative to the context path

  • target_path – absolute target path url (point to the artifact content location)

  • tag – artifact tag

set_function(func: Optional[Union[str, mlrun.runtimes.base.BaseRuntime]] = None, name: str = '', kind: str = '', image: Optional[str] = None, handler=None, with_repo: Optional[bool] = None, tag: Optional[str] = None, requirements: Optional[Union[str, List[str]]] = None) mlrun.runtimes.base.BaseRuntime[source]#

update or add a function object to the project

function can be provided as an object (func) or a .py/.ipynb/.yaml url support url prefixes:

object (s3://, v3io://, ..)
MLRun DB e.g. db://project/func:ver
functions hub/market: e.g. hub://sklearn_classifier:master

examples:

proj.set_function(func_object)
proj.set_function('./src/mycode.py', 'ingest',
                  image='myrepo/ing:latest', with_repo=True)
proj.set_function('http://.../mynb.ipynb', 'train')
proj.set_function('./func.yaml')
proj.set_function('hub://get_toy_data', 'getdata')
Parameters
  • func – function object or spec/code url, None refers to current Notebook

  • name – name of the function (under the project)

  • kind – runtime kind e.g. job, nuclio, spark, dask, mpijob default: job

  • image – docker image to be used, can also be specified in the function object/yaml

  • handler – default function handler to invoke (can only be set with .py/.ipynb files)

  • with_repo – add (clone) the current repo to the build source

  • requirements – list of python packages or pip requirements file path

Tag

function version tag (none for ‘latest’, can only be set with .py/.ipynb files)

Returns

project object

set_model_monitoring_credentials(access_key: str)[source]#

Set the credentials that will be used by the project’s model monitoring infrastructure functions. The supplied credentials must have data access

Parameters

access_key – Model Monitoring access key for managing user permissions.

set_secrets(secrets: Optional[dict] = None, file_path: Optional[str] = None, provider: Optional[Union[str, mlrun.api.schemas.secret.SecretProviderName]] = None)[source]#

set project secrets from dict or secrets env file when using a secrets file it should have lines in the form KEY=VALUE, comment line start with “#” V3IO paths/credentials and MLrun service API address are dropped from the secrets

example secrets file:

# this is an env file
AWS_ACCESS_KEY_ID-XXXX
AWS_SECRET_ACCESS_KEY=YYYY

usage:

# read env vars from dict or file and set as project secrets
project.set_secrets({"SECRET1": "value"})
project.set_secrets(file_path="secrets.env")
Parameters
  • secrets – dict with secrets key/value

  • file_path – path to secrets file

  • provider – MLRun secrets provider

set_source(source, pull_at_runtime=False, workdir=None)[source]#

set the project source code path(can be git/tar/zip archive)

Parameters
  • source – valid path to git, zip, or tar file, (or None for current) e.g. git://github.com/mlrun/something.git http://some/url/file.zip

  • pull_at_runtime – load the archive into the container at job runtime vs on build/deploy

  • workdir – the relative workdir path (under the context dir)

set_workflow(name, workflow_path: str, embed=False, engine=None, args_schema: Optional[List[mlrun.model.EntrypointParam]] = None, handler=None, schedule: Optional[Union[str, mlrun.api.schemas.schedule.ScheduleCronTrigger]] = None, ttl=None, **args)[source]#

add or update a workflow, specify a name and the code path

Parameters
  • name – name of the workflow

  • workflow_path – url/path for the workflow file

  • embed – add the workflow code into the project.yaml

  • engine – workflow processing engine (“kfp” or “local”)

  • args_schema – list of arg schema definitions (:py:class`~mlrun.model.EntrypointParam`)

  • handler – workflow function handler

  • schedule – ScheduleCronTrigger class instance or a standard crontab expression string (which will be converted to the class using its from_crontab constructor), see this link for help: https://apscheduler.readthedocs.io/en/3.x/modules/triggers/cron.html#module-apscheduler.triggers.cron

  • ttl – pipeline ttl in secs (after that the pods will be removed)

  • args – argument values (key=value, ..)

property source: str#

This is a property of the spec, look there for documentation leaving here for backwards compatibility with users code that used MlrunProjectLegacy

property spec: mlrun.projects.project.ProjectSpec#
property status: mlrun.projects.project.ProjectStatus#
sync_functions(names: Optional[list] = None, always=True, save=False)[source]#

reload function objects from specs and files

with_secrets(kind, source, prefix='')[source]#

register a secrets source (file, env or dict)

read secrets from a source provider to be used in workflows, example:

proj.with_secrets('file', 'file.txt')
proj.with_secrets('inline', {'key': 'val'})
proj.with_secrets('env', 'ENV1,ENV2', prefix='PFX_')

Vault secret source has several options:

proj.with_secrets('vault', {'user': <user name>, 'secrets': ['secret1', 'secret2' ...]})
proj.with_secrets('vault', {'project': <proj.name>, 'secrets': ['secret1', 'secret2' ...]})
proj.with_secrets('vault', ['secret1', 'secret2' ...])

The 2nd option uses the current project name as context. Can also use empty secret list:

proj.with_secrets('vault', [])

This will enable access to all secrets in vault registered to the current project.

Parameters
  • kind – secret type (file, inline, env, vault)

  • source – secret data or link (see example)

  • prefix – add a prefix to the keys in this source

Returns

project object

property workflows: list#

This is a property of the spec, look there for documentation leaving here for backwards compatibility with users code that used MlrunProjectLegacy

class mlrun.projects.ProjectMetadata(name=None, created=None, labels=None, annotations=None)[source]#

Bases: mlrun.model.ModelObj

property name: str#

Project name

static validate_project_name(name: str, raise_on_failure: bool = True) bool[source]#
class mlrun.projects.ProjectSpec(description=None, params=None, functions=None, workflows=None, artifacts=None, artifact_path=None, conda=None, source=None, subpath=None, origin_url=None, goals=None, load_source_on_run=None, default_requirements: Optional[Union[str, List[str]]] = None, desired_state='online', owner=None, disable_auto_mount=None, workdir=None)[source]#

Bases: mlrun.model.ModelObj

property artifacts: list#

list of artifacts used in this project

property functions: list#

list of function object/specs used in this project

get_code_path()[source]#

Get the path to the code root/workdir

property mountdir: str#

specify to mount the context dir inside the function container use ‘.’ to use the same path as in the client e.g. Jupyter

remove_artifact(key)[source]#
remove_function(name)[source]#
remove_workflow(name)[source]#
set_artifact(key, artifact)[source]#
set_function(name, function_object, function_dict)[source]#
set_workflow(name, workflow)[source]#
property source: str#

source url or git repo

property workflows: List[dict]#

list of workflows specs dicts used in this project

Type

returns

class mlrun.projects.ProjectStatus(state=None)[source]#

Bases: mlrun.model.ModelObj

mlrun.projects.build_function(function: Union[str, mlrun.runtimes.base.BaseRuntime], with_mlrun: Optional[bool] = None, skip_deployed: bool = False, image=None, base_image=None, commands: Optional[list] = None, secret_name='', requirements: Optional[Union[str, List[str]]] = None, mlrun_version_specifier=None, builder_env: Optional[dict] = None, project_object=None, overwrite_build_params: bool = False) Union[mlrun.projects.operations.BuildStatus, kfp.dsl._container_op.ContainerOp][source]#

deploy ML function, build container with its dependencies

Parameters
  • function – name of the function (in the project) or function object

  • with_mlrun – add the current mlrun package to the container build

  • skip_deployed – skip the build if we already have an image for the function

  • image – target image name/path

  • base_image – base image name/path (commands and source code will be added to it)

  • commands – list of docker build (RUN) commands e.g. [‘pip install pandas’]

  • secret_name – k8s secret for accessing the docker registry

  • requirements – list of python packages or pip requirements file path, defaults to None

  • mlrun_version_specifier – which mlrun package version to include (if not current)

  • builder_env – Kaniko builder pod env vars dict (for config/credentials) e.g. builder_env={“GIT_TOKEN”: token}, does not work yet in KFP

  • project_object – override the project object to use, will default to the project set in the runtime context.

  • builder_env – Kaniko builder pod env vars dict (for config/credentials) e.g. builder_env={“GIT_TOKEN”: token}, does not work yet in KFP

  • overwrite_build_params – overwrite the function build parameters with the provided ones, or attempt to add to existing parameters

mlrun.projects.deploy_function(function: Union[str, mlrun.runtimes.base.BaseRuntime], dashboard: str = '', models: Optional[list] = None, env: Optional[dict] = None, tag: Optional[str] = None, verbose: Optional[bool] = None, builder_env: Optional[dict] = None, project_object=None, mock: Optional[bool] = None) Union[mlrun.projects.operations.DeployStatus, kfp.dsl._container_op.ContainerOp][source]#

deploy real-time (nuclio based) functions

Parameters
  • function – name of the function (in the project) or function object

  • dashboard – url of the remote Nuclio dashboard (when not local)

  • models – list of model items

  • env – dict of extra environment variables

  • tag – extra version tag

  • verbose – add verbose prints/logs

  • builder_env – env vars dict for source archive config/credentials e.g. builder_env={“GIT_TOKEN”: token}

  • mock – deploy mock server vs a real Nuclio function (for local simulations)

  • project_object – override the project object to use, will default to the project set in the runtime context.

mlrun.projects.get_or_create_project(name: str, context: str = './', url: Optional[str] = None, secrets: Optional[dict] = None, init_git=False, subpath: Optional[str] = None, clone: bool = False, user_project: bool = False, from_template: Optional[str] = None, save: bool = True) mlrun.projects.project.MlrunProject[source]#

Load a project from MLRun DB, or create/import if doesnt exist

example:

# load project from the DB (if exist) or the source repo
project = get_or_create_project("myproj", "./", "git://github.com/mlrun/demo-xgb-project.git")
project.pull("development")  # pull the latest code from git
project.run("main", arguments={'data': data_url})  # run the workflow "main"
Parameters
  • name – project name

  • context – project local directory path (Default value = “./”)

  • url – name (in DB) or git or tar.gz or .zip sources archive path e.g.: git://github.com/mlrun/demo-xgb-project.git http://mysite/archived-project.zip

  • secrets – key:secret dict or SecretsStore used to download sources

  • init_git – if True, will git init the context dir

  • subpath – project subpath (within the archive/context)

  • clone – if True, always clone (delete any existing content)

  • user_project – add the current user name to the project name (for db:// prefixes)

  • from_template – path to project YAML file that will be used as from_template (for new projects)

  • save – whether to save the created project in the DB

Returns

project object

mlrun.projects.load_project(context: str = './', url: Optional[str] = None, name: Optional[str] = None, secrets: Optional[dict] = None, init_git: bool = False, subpath: Optional[str] = None, clone: bool = False, user_project: bool = False, save: bool = True) mlrun.projects.project.MlrunProject[source]#

Load an MLRun project from git or tar or dir

example:

# Load the project and run the 'main' workflow.
# When using git as the url source the context directory must be an empty or
# non-existent folder as the git repo will be cloned there
project = load_project("./demo_proj", "git://github.com/mlrun/project-demo.git")
project.run("main", arguments={'data': data_url})
Parameters
  • context – project local directory path

  • url – name (in DB) or git or tar.gz or .zip sources archive path e.g.: git://github.com/mlrun/demo-xgb-project.git http://mysite/archived-project.zip <project-name> The git project should include the project yaml file. If the project yaml file is in a sub-directory, must specify the sub-directory.

  • name – project name

  • secrets – key:secret dict or SecretsStore used to download sources

  • init_git – if True, will git init the context dir

  • subpath – project subpath (within the archive)

  • clone – if True, always clone (delete any existing content)

  • user_project – add the current user name to the project name (for db:// prefixes)

  • save – whether to save the created project and artifact in the DB

Returns

project object

mlrun.projects.new_project(name, context: str = './', init_git: bool = False, user_project: bool = False, remote: Optional[str] = None, from_template: Optional[str] = None, secrets: Optional[dict] = None, description: Optional[str] = None, subpath: Optional[str] = None, save: bool = True, overwrite: bool = False) mlrun.projects.project.MlrunProject[source]#

Create a new MLRun project, optionally load it from a yaml/zip/git template

example:

# create a project with local and marketplace functions, a workflow, and an artifact
project = mlrun.new_project("myproj", "./", init_git=True, description="my new project")
project.set_function('prep_data.py', 'prep-data', image='mlrun/mlrun', handler='prep_data')
project.set_function('hub://sklearn_classifier', 'train')
project.set_artifact('data', Artifact(target_path=data_url))
project.set_workflow('main', "./myflow.py")
project.save()

# run the "main" workflow (watch=True to wait for run completion)
project.run("main", watch=True)

example (load from template):

# create a new project from a zip template (can also use yaml/git templates)
# initialize a local git, and register the git remote path
project = mlrun.new_project("myproj", "./", init_git=True,
                            remote="git://github.com/mlrun/project-demo.git",
                            from_template="http://mysite/proj.zip")
project.run("main", watch=True)
Parameters
  • name – project name

  • context – project local directory path

  • init_git – if True, will git init the context dir

  • user_project – add the current user name to the provided project name (making it unique per user)

  • remote – remote Git url

  • from_template – path to project YAML/zip file that will be used as a template

  • secrets – key:secret dict or SecretsStore used to download sources

  • description – text describing the project

  • subpath – project subpath (relative to the context dir)

  • save – whether to save the created project in the DB

  • overwrite – overwrite project using ‘cascade’ deletion strategy (deletes project resources) if project with name exists

Returns

project object

mlrun.projects.run_function(function: Union[str, mlrun.runtimes.base.BaseRuntime], handler: Optional[str] = None, name: str = '', params: Optional[dict] = None, hyperparams: Optional[dict] = None, hyper_param_options: Optional[mlrun.model.HyperParamOptions] = None, inputs: Optional[dict] = None, outputs: Optional[List[str]] = None, workdir: str = '', labels: Optional[dict] = None, base_task: Optional[mlrun.model.RunTemplate] = None, watch: bool = True, local: Optional[bool] = None, verbose: Optional[bool] = None, selector: Optional[str] = None, project_object=None, auto_build: Optional[bool] = None, schedule: Optional[Union[str, mlrun.api.schemas.schedule.ScheduleCronTrigger]] = None, artifact_path: Optional[str] = None) Union[mlrun.model.RunObject, kfp.dsl._container_op.ContainerOp][source]#

Run a local or remote task as part of a local/kubeflow pipeline

run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow function can be specified as an object or by name (str), when the function is specified by name it is looked up in the current project eliminating the need to redefine/edit functions.

when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level, e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path. project runs provide additional notifications/reporting and exception handling. inside a Kubeflow pipeline (KFP) run_function() generates KFP “ContainerOps” which are used to form a DAG some behavior may differ between regular runs and deferred KFP runs.

example (use with function object):

function = mlrun.import_function("hub://sklearn_classifier")
run1 = run_function(function, params={"data": url})

example (use with project):

# create a project with two functions (local and from marketplace)
project = mlrun.new_project(project_name, "./proj)
project.set_function("mycode.py", "myfunc", image="mlrun/mlrun")
project.set_function("hub://sklearn_classifier", "train")

# run functions (refer to them by name)
run1 = run_function("myfunc", params={"x": 7})
run2 = run_function("train", params={"data": run1.outputs["data"]})

example (use in pipeline):

@dsl.pipeline(name="test pipeline", description="test")
def my_pipe(url=""):
    run1 = run_function("loaddata", params={"url": url})
    run2 = run_function("train", params={"data": run1.outputs["data"]})

project.run(workflow_handler=my_pipe, arguments={"param1": 7})
Parameters
  • function – name of the function (in the project) or function object

  • handler – name of the function handler

  • name – execution name

  • params – input parameters (dict)

  • hyperparams – hyper parameters

  • selector – selection criteria for hyper params e.g. “max.accuracy”

  • hyper_param_options – hyper param options (selector, early stop, strategy, ..) see: HyperParamOptions

  • inputs – input objects (dict of key: path)

  • outputs – list of outputs which can pass in the workflow

  • workdir – default input artifacts path

  • labels – labels to tag the job/run with ({key:val, ..})

  • base_task – task object to use as base

  • watch – watch/follow run log, True by default

  • local – run the function locally vs on the runtime/cluster

  • verbose – add verbose prints/logs

  • project_object – override the project object to use, will default to the project set in the runtime context.

  • auto_build – when set to True and the function require build it will be built on the first function run, use only if you dont plan on changing the build config between runs

  • schedule – ScheduleCronTrigger class instance or a standard crontab expression string (which will be converted to the class using its from_crontab constructor), see this link for help: https://apscheduler.readthedocs.io/en/v3.6.3/modules/triggers/cron.html#module-apscheduler.triggers.cron

  • artifact_path – path to store artifacts, when running in a workflow this will be set automatically

Returns

MLRun RunObject or KubeFlow containerOp