<no title>

class mlrun.artifacts.dataset.DatasetArtifact(key: str | None = None, df=None, preview: int | None = None, format: str = '', stats: bool | None = None, target_path: str | None = None, extra_data: dict | None = None, column_metadata: dict | None = None, ignore_preview_limits: bool = False, label_column: str | None = None, **kwargs)[source]#

Bases: Artifact

SUPPORTED_FORMATS = ['csv', 'parquet', 'pq', 'tsdb', 'kv']#

property column_metadata#

property df: DataFrame#

Get the dataset in this artifact.

Returns:: The dataset as a DataFrame.

property header#

static is_format_supported(fmt: str) → bool[source]#

Check whether the given dataset format is supported by the DatasetArtifact.

Parameters:: fmt -- The format string to check.
Returns:: True if the format is supported and False if not.

kind = 'dataset'#

property preview#

resolve_dataframe_target_hash_path(dataframe, artifact_path: str)[source]#

property schema#

property spec: DatasetArtifactSpec#

property stats#

static update_preview_fields_from_df(artifact, df, stats=None, preview_rows_length=None, ignore_preview_limits=False)[source]#

upload(artifact_path: str | None = None)[source]#: internal, upload to target store :param artifact_path: required only for when generating target_path from artifact hash

class mlrun.artifacts.dataset.DatasetArtifactSpec[source]#: Bases: ArtifactSpec

class mlrun.artifacts.dataset.TableArtifact(key=None, body=None, df=None, viewer=None, visible=False, inline=False, format=None, header=None, schema=None)[source]#

Bases: Artifact

get_body()[source]#: get the artifact body when inline

kind = 'table'#

property spec: TableArtifactSpec#

class mlrun.artifacts.dataset.TableArtifactSpec[source]#: Bases: ArtifactSpec

mlrun.artifacts.dataset.get_df_stats(df)[source]#

Update dataset object attributes/metadata

this method will edit or add metadata to a dataset object

Example

update_dataset_meta(dataset, from_df=df,: extra_data={'histogram': 's3://mybucket/..'})

Parameters:

from_df -- read metadata (schema, preview, ..) from provided df
artifact -- dataset artifact object or path (store://..) or DataItem
schema -- dataset schema, see pandas build_table_schema
header -- column headers
preview -- list of rows and row values (from df.values.tolist())
stats -- dict of column names and their stats (cleaned df.describe(include='all'))
extra_data -- extra data items (key: path string | artifact)
column_metadata -- dict of metadata per column
labels -- metadata labels
ignore_preview_limits -- whether to ignore the preview size limits

mlrun.artifacts.dataset.upload_dataframe(df, target_path, format, src_path=None, **kw) → tuple[int | None, str | None][source]#

Contents