|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-alertmanager.rules.yaml > alertmanager.rules
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-etcd.yaml > etcd
|
| Labels |
State |
Active Since |
Value |
|
alertname="etcdInsufficientMembers"
job="kube-etcd"
severity="critical"
|
firing |
2025-10-04 07:24:42.582998871 +0000 UTC |
0 |
| Annotations |
- message
- etcd cluster "kube-etcd": insufficient members (0).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-general.rules.yaml > general.rules
|
| Labels |
State |
Active Since |
Value |
|
alertname="TargetDown"
job="ceph-metrics"
severity="warning"
|
firing |
2025-10-04 07:24:26.904694511 +0000 UTC |
100 |
| Annotations |
- message
- 100% of the ceph-metrics targets are down.
|
|
alertname="TargetDown"
job="kube-proxy"
severity="warning"
|
firing |
2025-10-04 07:24:56.904694511 +0000 UTC |
100 |
| Annotations |
- message
- 100% of the kube-proxy targets are down.
|
|
alertname="TargetDown"
job="kube-controller-manager"
severity="warning"
|
firing |
2025-10-04 07:24:56.904694511 +0000 UTC |
100 |
| Annotations |
- message
- 100% of the kube-controller-manager targets are down.
|
|
alertname="TargetDown"
job="kube-etcd"
severity="warning"
|
firing |
2025-10-04 07:24:26.904694511 +0000 UTC |
100 |
| Annotations |
- message
- 100% of the kube-etcd targets are down.
|
|
alertname="TargetDown"
job="kube-scheduler"
severity="warning"
|
firing |
2025-10-04 07:24:26.904694511 +0000 UTC |
100 |
| Annotations |
- message
- 100% of the kube-scheduler targets are down.
|
|
alert: Watchdog
expr: vector(1)
labels:
severity: none
annotations:
message: |
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
| Labels |
State |
Active Since |
Value |
|
alertname="Watchdog"
severity="none"
|
firing |
2025-10-04 07:24:26.904694511 +0000 UTC |
1 |
| Annotations |
- message
- This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-kube-prometheus-node-alerting.rules.yaml > kube-prometheus-node-alerting.rules
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-kubernetes-absent.yaml > kubernetes-absent
|
| Labels |
State |
Active Since |
Value |
|
alertname="KubeControllerManagerDown"
severity="critical"
|
firing |
2025-10-04 07:24:38.65408944 +0000 UTC |
1 |
| Annotations |
- message
- KubeControllerManager has disappeared from Prometheus target discovery.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
|
|
alert: KubeSchedulerDown
expr: absent(up{job="kube-scheduler"}
== 1)
for: 15m
labels:
severity: critical
annotations:
message: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
| Labels |
State |
Active Since |
Value |
|
alertname="KubeSchedulerDown"
severity="critical"
|
firing |
2025-10-04 07:24:38.65408944 +0000 UTC |
1 |
| Annotations |
- message
- KubeScheduler has disappeared from Prometheus target discovery.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
|
|
|
|
alert: KubeAPIDown
expr: absent(up{job="apiserver"}
== 1)
for: 15m
labels:
severity: critical
annotations:
message: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
|
alert: KubeStateMetricsDown
expr: absent(up{job="kube-state-metrics"}
== 1)
for: 15m
labels:
severity: critical
annotations:
message: KubeStateMetrics has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricsdown
|
alert: KubeletDown
expr: absent(up{job="kubelet"}
== 1)
for: 15m
labels:
severity: critical
annotations:
message: Kubelet has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
|
alert: NodeExporterDown
expr: absent(up{job="node-exporter"}
== 1)
for: 15m
labels:
severity: critical
annotations:
message: NodeExporter has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeexporterdown
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-kubernetes-apps.yaml > kubernetes-apps
|
| Labels |
State |
Active Since |
Value |
|
alertname="KubeJobCompletion"
endpoint="http"
instance="10.244.1.48:8080"
job="kube-state-metrics"
job_name="gitlab-task-runner-backup-1765065600"
namespace="gitlab"
pod="prometheus-kube-state-metrics-58fbd9f8ff-rvcg7"
service="prometheus-kube-state-metrics"
severity="warning"
|
firing |
2025-12-07 00:00:44.021059511 +0000 UTC |
1 |
| Annotations |
- message
- Job gitlab/gitlab-task-runner-backup-1765065600 is taking more than one hour to complete.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alert: KubeJobFailed
expr: kube_job_status_failed{job="kube-state-metrics"}
> 0
for: 1h
labels:
severity: warning
annotations:
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
|
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-kubernetes-resources.yaml > kubernetes-resources
|
| Labels |
State |
Active Since |
Value |
|
alertname="CPUThrottlingHigh"
container="kube-flannel"
namespace="kube-system"
pod="kube-flannel-ds-9vx64"
severity="warning"
|
firing |
2025-10-04 07:24:58.714203701 +0000 UTC |
38.73873873873874 |
| Annotations |
- message
- 39% throttling of CPU in namespace kube-system for container kube-flannel in pod kube-flannel-ds-9vx64.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
alertname="CPUThrottlingHigh"
container="kube-flannel"
namespace="kube-system"
pod="kube-flannel-ds-kjjh4"
severity="warning"
|
firing |
2025-10-11 19:41:58.714203701 +0000 UTC |
44.91869918699187 |
| Annotations |
- message
- 45% throttling of CPU in namespace kube-system for container kube-flannel in pod kube-flannel-ds-kjjh4.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
alertname="CPUThrottlingHigh"
container="kube-flannel"
namespace="kube-system"
pod="kube-flannel-ds-rj8mc"
severity="warning"
|
firing |
2025-10-04 07:25:28.714203701 +0000 UTC |
41.34615384615385 |
| Annotations |
- message
- 41% throttling of CPU in namespace kube-system for container kube-flannel in pod kube-flannel-ds-rj8mc.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
|
| Labels |
State |
Active Since |
Value |
|
alertname="KubeMemOvercommit"
severity="warning"
|
firing |
2025-10-11 19:51:28.714203701 +0000 UTC |
0.6725526718056776 |
| Annotations |
- message
- Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
|
|
|
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-kubernetes-storage.yaml > kubernetes-storage
|
| Labels |
State |
Active Since |
Value |
|
alertname="KubePersistentVolumeFullInFourDays"
endpoint="https-metrics"
instance="192.168.5.103:10250"
job="kubelet"
namespace="socialboost"
node="socialboost3"
persistentvolumeclaim="jira-data"
service="prometheus-prometheus-oper-kubelet"
severity="critical"
|
firing |
2025-12-10 06:19:38.880689489 +0000 UTC |
0.05364516213272585 |
| Annotations |
- message
- Based on recent sampling, the PersistentVolume claimed by jira-data in Namespace socialboost is expected to fill up within four days. Currently 0.05% is available.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays
|
|
| Labels |
State |
Active Since |
Value |
|
alertname="KubePersistentVolumeUsageCritical"
endpoint="https-metrics"
instance="192.168.5.103:10250"
job="kubelet"
namespace="socialboost"
node="socialboost3"
persistentvolumeclaim="jira-data"
service="prometheus-prometheus-oper-kubelet"
severity="critical"
|
firing |
2025-10-04 09:56:38.880689489 +0000 UTC |
0.05364516213272585 |
| Annotations |
- message
- The PersistentVolume claimed by jira-data in Namespace socialboost is only 0.05% free.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
|
|
alertname="KubePersistentVolumeUsageCritical"
endpoint="https-metrics"
instance="192.168.5.103:10250"
job="kubelet"
namespace="default"
node="socialboost3"
persistentvolumeclaim="registry-docker-registry"
service="prometheus-prometheus-oper-kubelet"
severity="critical"
|
firing |
2025-10-04 10:01:08.880689489 +0000 UTC |
2.594532216805217 |
| Annotations |
- message
- The PersistentVolume claimed by registry-docker-registry in Namespace default is only 2.59% free.
- runbook_url
- https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
|
|
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-kubernetes-system.yaml > kubernetes-system
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alert: KubeletTooManyPods
expr: kubelet_running_pod_count{job="kubelet"}
> 110 * 0.9
for: 15m
labels:
severity: warning
annotations:
message: Kubelet {{ $labels.instance }} is running {{ $value }} Pods, close to the
limit of 110.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-node-network.yaml > node-network
|
|
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-node-time.yaml > node-time
|
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-prometheus-operator.yaml > prometheus-operator
|
|
|
|
|
|
/etc/prometheus/rules/prometheus-prometheus-prometheus-oper-prometheus-rulefiles-0/monitoring-prometheus-prometheus-oper-prometheus.yaml > prometheus
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|