Source code for mlrun.common.schemas.alert
# Copyright 2023 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from datetime import datetime
from typing import Annotated, Optional, Union
import pydantic
from mlrun.common.schemas.notification import Notification
from mlrun.common.types import StrEnum
[docs]class EventEntityKind(StrEnum):
MODEL_ENDPOINT_RESULT = "model-endpoint-result"
MODEL_MONITORING_APPLICATION = "model-monitoring-application"
JOB = "job"
[docs]class EventEntities(pydantic.BaseModel):
kind: EventEntityKind
project: str
ids: pydantic.conlist(str, min_items=1, max_items=1)
[docs]class EventKind(StrEnum):
DATA_DRIFT_DETECTED = "data-drift-detected"
DATA_DRIFT_SUSPECTED = "data-drift-suspected"
CONCEPT_DRIFT_DETECTED = "concept-drift-detected"
CONCEPT_DRIFT_SUSPECTED = "concept-drift-suspected"
MODEL_PERFORMANCE_DETECTED = "model-performance-detected"
MODEL_PERFORMANCE_SUSPECTED = "model-performance-suspected"
SYSTEM_PERFORMANCE_DETECTED = "system-performance-detected"
SYSTEM_PERFORMANCE_SUSPECTED = "system-performance-suspected"
MM_APP_ANOMALY_DETECTED = "mm-app-anomaly-detected"
MM_APP_ANOMALY_SUSPECTED = "mm-app-anomaly-suspected"
MM_APP_FAILED = "mm-app-failed"
FAILED = "failed"
_event_kind_entity_map = {
EventKind.DATA_DRIFT_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
EventKind.DATA_DRIFT_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
EventKind.CONCEPT_DRIFT_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
EventKind.CONCEPT_DRIFT_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
EventKind.MODEL_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
EventKind.MODEL_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
EventKind.SYSTEM_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
EventKind.SYSTEM_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
EventKind.MM_APP_ANOMALY_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
EventKind.MM_APP_ANOMALY_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
EventKind.MM_APP_FAILED: [EventEntityKind.MODEL_MONITORING_APPLICATION],
EventKind.FAILED: [EventEntityKind.JOB],
}
[docs]class Event(pydantic.BaseModel):
kind: EventKind
timestamp: Union[str, datetime] = None # occurrence time
entity: EventEntities
value_dict: Optional[dict] = pydantic.Field(default_factory=dict)
[docs] def is_valid(self):
return self.entity.kind in _event_kind_entity_map[self.kind]
[docs]class AlertActiveState(StrEnum):
ACTIVE = "active"
INACTIVE = "inactive"
[docs]class AlertSeverity(StrEnum):
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
# what should trigger the alert. must be either event (at least 1), or prometheus query
[docs]class AlertTrigger(pydantic.BaseModel):
events: list[EventKind] = []
prometheus_alert: str = None
def __eq__(self, other):
return (
self.prometheus_alert == other.prometheus_alert
and self.events == other.events
)
[docs]class AlertCriteria(pydantic.BaseModel):
count: Annotated[
int,
pydantic.Field(
description="Number of events to wait until notification is sent"
),
] = 1
period: Annotated[
str,
pydantic.Field(
description="Time period during which event occurred. e.g. 1d, 3h, 5m, 15s"
),
] = None
def __eq__(self, other):
return self.count == other.count and self.period == other.period
[docs]class ResetPolicy(StrEnum):
MANUAL = "manual"
AUTO = "auto"
[docs]class AlertNotification(pydantic.BaseModel):
notification: Notification
cooldown_period: Annotated[
str,
pydantic.Field(
description="Period during which notifications "
"will not be sent after initial send. The format of this would be in time."
" e.g. 1d, 3h, 5m, 15s"
),
] = None
[docs]class AlertConfig(pydantic.BaseModel):
project: str
id: int = None
name: str
description: Optional[str] = ""
summary: Annotated[
str,
pydantic.Field(
description=(
"String to be sent in the notifications generated."
"e.g. 'Model {{project}}/{{entity}} is drifting.'"
"Supported variables: project, entity, name"
)
),
]
created: Union[str, datetime] = None
severity: AlertSeverity
entities: EventEntities
trigger: AlertTrigger
criteria: Optional[AlertCriteria]
reset_policy: ResetPolicy = ResetPolicy.AUTO
notifications: pydantic.conlist(AlertNotification, min_items=1)
state: AlertActiveState = AlertActiveState.INACTIVE
count: Optional[int] = 0
[docs] def get_raw_notifications(self) -> list[Notification]:
return [
alert_notification.notification for alert_notification in self.notifications
]
[docs]class AlertsModes(StrEnum):
enabled = "enabled"
disabled = "disabled"
[docs]class AlertTemplate(
pydantic.BaseModel
): # Template fields that are not shared with created configs
template_id: int = None
template_name: str
template_description: Optional[str] = (
"String explaining the purpose of this template"
)
# A property that identifies templates that were created by the system and cannot be modified/deleted by the user
system_generated: bool = False
# AlertConfig fields that are pre-defined
summary: Optional[str] = (
"String to be sent in the generated notifications e.g. 'Model {{project}}/{{entity}} is drifting.'"
"See AlertConfig.summary description"
)
severity: AlertSeverity
trigger: AlertTrigger
criteria: Optional[AlertCriteria]
reset_policy: ResetPolicy = ResetPolicy.AUTO
# This is slightly different than __eq__ as it doesn't compare everything
[docs] def templates_differ(self, other):
return (
self.template_description != other.template_description
or self.summary != other.summary
or self.severity != other.severity
or self.trigger != other.trigger
or self.reset_policy != other.reset_policy
or self.criteria != other.criteria
)