class mlrun.artifacts.dataset.DatasetArtifact(key: str | None = None, df=None, preview: int | None = None, format: str = '', stats: bool | None = None, target_path: str | None = None, extra_data: dict | None = None, column_metadata: dict | None = None, ignore_preview_limits: bool = False, label_column: str | None = None, **kwargs)[source]#

Bases: Artifact

SUPPORTED_FORMATS = ['csv', 'parquet', 'pq', 'tsdb', 'kv']#
property column_metadata#
property df: DataFrame#

Get the dataset in this artifact.

Returns:

The dataset as a DataFrame.

property header#
static is_format_supported(fmt: str) bool[source]#

Check whether the given dataset format is supported by the DatasetArtifact.

Parameters:

fmt -- The format string to check.

Returns:

True if the format is supported and False if not.

kind = 'dataset'#
property preview#
resolve_dataframe_target_hash_path(dataframe, artifact_path: str)[source]#
property schema#
property spec: DatasetArtifactSpec#
property stats#
static update_preview_fields_from_df(artifact, df, stats=None, preview_rows_length=None, ignore_preview_limits=False)[source]#
upload(artifact_path: str | None = None)[source]#

internal, upload to target store :param artifact_path: required only for when generating target_path from artifact hash

class mlrun.artifacts.dataset.DatasetArtifactSpec[source]#

Bases: ArtifactSpec

class mlrun.artifacts.dataset.TableArtifact(key=None, body=None, df=None, viewer=None, visible=False, inline=False, format=None, header=None, schema=None)[source]#

Bases: Artifact

get_body()[source]#

get the artifact body when inline

kind = 'table'#
property spec: TableArtifactSpec#
class mlrun.artifacts.dataset.TableArtifactSpec[source]#

Bases: ArtifactSpec

mlrun.artifacts.dataset.get_df_stats(df)[source]#
mlrun.artifacts.dataset.update_dataset_meta(artifact, from_df=None, schema: dict | None = None, header: list | None = None, preview: list | None = None, stats: dict | None = None, extra_data: dict | None = None, column_metadata: dict | None = None, labels: dict | None = None, ignore_preview_limits: bool = False)[source]#

Update dataset object attributes/metadata

this method will edit or add metadata to a dataset object

Example

update_dataset_meta(dataset, from_df=df,

extra_data={'histogram': 's3://mybucket/..'})

Parameters:
  • from_df -- read metadata (schema, preview, ..) from provided df

  • artifact -- dataset artifact object or path (store://..) or DataItem

  • schema -- dataset schema, see pandas build_table_schema

  • header -- column headers

  • preview -- list of rows and row values (from df.values.tolist())

  • stats -- dict of column names and their stats (cleaned df.describe(include='all'))

  • extra_data -- extra data items (key: path string | artifact)

  • column_metadata -- dict of metadata per column

  • labels -- metadata labels

  • ignore_preview_limits -- whether to ignore the preview size limits

mlrun.artifacts.dataset.upload_dataframe(df, target_path, format, src_path=None, **kw) tuple[Optional[int], Optional[str]][source]#