mlrun.alerts.alert

mlrun.alerts.alert#

class mlrun.alerts.alert.AlertConfig(project: str | None = None, name: str | None = None, template: AlertTemplate | str = None, description: str | None = None, summary: str | None = None, severity: AlertSeverity = None, trigger: AlertTrigger = None, criteria: AlertCriteria = None, reset_policy: ResetPolicy = None, cooldown_period: str | None = None, notifications: list[AlertNotification] | None = None, entities: EventEntities = None, id: int | None = None, state: AlertActiveState = None, created: str | None = None, count: int | None = None, updated: str | None = None, **kwargs)[source]#

Bases: ModelObj

Alert config object

Example:

# create an alert on endpoint_id, which will be triggered to slack if there is a "data_drift_detected" event
# 3 times in the next hour.

from mlrun.alerts import AlertConfig
import mlrun.common.schemas.alert as alert_objects

entity_kind = alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT
entity_id = get_default_result_instance_fqn(endpoint_id)
event_name = alert_objects.EventKind.DATA_DRIFT_DETECTED
notification = mlrun.model.Notification(
    kind="slack",
    name="slack_notification",
    message="drift was detected",
    severity="warning",
    when=["now"],
    condition="failed",
    secret_params={
        "webhook": "https://hooks.slack.com/",
    },
).to_dict()

alert_data = AlertConfig(
    project="my-project",
    name="drift-alert",
    summary="a drift was detected",
    severity=alert_objects.AlertSeverity.LOW,
    entities=alert_objects.EventEntities(
        kind=entity_kind, project="my-project", ids=[entity_id]
    ),
    trigger=alert_objects.AlertTrigger(events=[event_name]),
    criteria=alert_objects.AlertCriteria(count=3, period="1h"),
    notifications=[
        alert_objects.AlertNotification(notification=notification)
    ],
)
project.store_alert_config(alert_data)

Parameters:

project -- Name of the project to associate the alert with
name -- Name of the alert
template -- Optional parameter that allows creating an alert based on a predefined template. You can pass either an AlertTemplate object or a string (the template name). If a template is used, many fields of the alert will be auto-generated based on the template.However, you still need to provide the following fields: name, project, entity, notifications
description -- Description of the alert
summary -- Summary of the alert, will be sent in the generated notifications
severity -- Severity of the alert
trigger -- The events that will trigger this alert, may be a simple trigger based on events or complex trigger which is based on a prometheus alert
criteria -- When the alert will be triggered based on the specified number of events within the defined time period.
reset_policy --
When to clear the alert.
- manual: the alert stays active after triggering and must be reset explicitly.
- auto: the alert is reset automatically after triggering and sending notifications (immediately if cooldown_period is not set, or after the cooldown_period elapses if it is set).
cooldown_period -- Period during which the alert remains active after being triggered before it is automatically reset. Only applicable when reset_policy=auto, cooldown_period > 0, and cooldown_period >= the server's cooldown_reset_interval (15s by default, modifiable by Support). If not set or set to zero, the alert resets immediately. Format: e.g. 1d, 3h, 5m, 15s.
notifications -- List of notifications to invoke once the alert is triggered
entities -- Entities that the event relates to. The entity object will contain fields that uniquely identify a given entity in the system
id -- Internal id of the alert (user should not supply it)
state -- State of the alert, may be active/inactive (user should not supply it)
created -- When the alert is created (user should not supply it)
count -- Internal counter of the alert (user should not supply it)
updated -- The last update time of the alert (user should not supply it)

property created: datetime#: Get the created field as a datetime object.

classmethod from_dict(struct=None, fields=None, deprecated_fields: dict | None = None)[source]#: create an object from a python dictionary

list_activations(since: datetime | None = None, until: datetime | None = None, from_last_update: bool = False) → list[AlertActivation][source]#

Retrieve a list of all alert activations.

Parameters:

since -- Filters for alert activations occurring after this timestamp.
until -- Filters for alert activations occurring before this timestamp.
from_last_update -- If set to True, retrieves alert activations since the alert's last update time. if both since and from_last_update=True are provided, from_last_update takes precedence and the since value will be overridden by the alert's last update timestamp.

Returns:

A list of alert activations matching the provided filters.

paginated_list_activations(*args, page: int | None = None, page_size: int | None = None, page_token: str | None = None, from_last_update: bool = False, **kwargs) → tuple[AlertActivation, str | None][source]#

List alerts activations with support for pagination and various filtering options.

This method retrieves a paginated list of alert activations based on the specified filter parameters. Pagination is controlled using the page, page_size, and page_token parameters. The method will return a list of alert activations that match the filtering criteria provided.

For detailed information about the parameters, refer to the list_activations method:: See list_activations() for more details.

Examples:

# Fetch first page of alert activations with page size of 5
alert_activations, token = alert_config.paginated_list_activations(
    page_size=5
)
# Fetch next page using the pagination token from the previous response
alert_activations, token = alert_config.paginated_list_activations(
    page_token=token
)
# Fetch alert activations for a specific page (e.g., page 3)
alert_activations, token = alert_config.paginated_list_activations(
    page=3, page_size=5
)

# Automatically iterate over all pages without explicitly specifying the page number
alert_activations = []
token = None
while True:
    page_alert_activations, token = alert_config.paginated_list_activations(
        page_token=token, page_size=5
    )
    alert_activations.extend(page_alert_activations)

    # If token is None and page_alert_activations is empty, we've reached the end (no more activations).
    # If token is None and page_alert_activations is not empty, we've fetched the last page of activations.
    if not token:
        break
print(f"Total alert activations retrieved: {len(alert_activations)}")

Parameters:

page -- The page number to retrieve. If not provided, the next page will be retrieved.
page_size -- The number of items per page to retrieve. Up to page_size responses are expected.
page_token -- A pagination token used to retrieve the next page of results. Should not be provided for the first request.
from_last_update -- If set to True, retrieves alert activations since the alert's last update time.

Returns:

A tuple containing the list of alert activations and an optional page_token for pagination.

to_dict(fields: list | None = None, exclude: list | None = None, strip: bool = False)[source]#

property updated: datetime#: Get the updated field as a datetime object.

validate_required_fields()[source]#

with_entities(entities: EventEntities)[source]#

with_notifications(notifications: list[AlertNotification])[source]#

mlrun.alerts.alert

Contents

mlrun.alerts.alert#