| 知乎专栏 |
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
hostname: alertmanager
restart: always
volumes:
- ${PWD}/alertmanager/config.yml:/etc/alertmanager/config.yml
- alertmanager:/alertmanager
ports:
- "9093:9093"
depends_on:
- prometheus
command:
--config.file=/etc/alertmanager/config.yml
--cluster.advertise-address=0.0.0.0:9093
配置 prometheus.yml
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager:9093"]
scrape_configs:
- job_name: 'alertmanager'
metrics_path: "/metrics"
检查 Alertmanager 是否正常工作
root@production:~# curl -s http://localhost:9093/metrics | head
# HELP alertmanager_alerts How many alerts by state.
# TYPE alertmanager_alerts gauge
alertmanager_alerts{state="active"} 0
alertmanager_alerts{state="suppressed"} 0
# HELP alertmanager_alerts_invalid_total The total number of received alerts that were invalid.
# TYPE alertmanager_alerts_invalid_total counter
alertmanager_alerts_invalid_total{version="v1"} 0
alertmanager_alerts_invalid_total{version="v2"} 0
# HELP alertmanager_alerts_received_total The total number of received alerts.
# TYPE alertmanager_alerts_received_total counter
解决时区问题,默认 docker 镜像使用 UTC,我们需要改为GMT+8
neo@MacBook-Pro-Neo ~/workspace/docker/prometheus % docker exec -it alertmanager sh /alertmanager $ cat /etc/localtime TZif2UTCTZif2?UTC UTC0 neo@MacBook-Pro-Neo ~/workspace/docker/prometheus % docker-compose cp alertmanager:/usr/share/zoneinfo/PRC Shanghai
查看反馈信息
neo@MacBook-Pro-Neo ~/workspace/docker/prometheus % curl -X OPTIONS 127.0.0.1:9093/api/v1/alerts -v * Trying 127.0.0.1... * TCP_NODELAY set * Connected to 127.0.0.1 (127.0.0.1) port 9093 (#0) > OPTIONS /api/v1/alerts HTTP/1.1 > Host: 127.0.0.1:9093 > User-Agent: curl/7.64.1 > Accept: */* > < HTTP/1.1 200 OK < Access-Control-Allow-Headers: Accept, Authorization, Content-Type, Origin < Access-Control-Allow-Methods: GET, POST, DELETE, OPTIONS < Access-Control-Allow-Origin: * < Access-Control-Expose-Headers: Date < Cache-Control: no-cache, no-store, must-revalidate < Date: Mon, 23 Aug 2021 12:18:20 GMT < Content-Length: 0 < * Connection #0 to host 127.0.0.1 left intact * Closing connection 0
SMTP 配置
global: resolve_timeout: 5m #处理超时时间,默认为5min smtp_smarthost: 'smtp.nejtkiller.cn:25' # 邮箱smtp服务器代理 smtp_from: 'monitor@netkiller.cn' # 发送邮箱名称 smtp_auth_username: 'monitor@netkiller.cn' # 邮箱名称 smtp_auth_password: '******' #邮箱密码
route: group_by: ['alertname'] # 报警分组名称 group_wait: 10s # 最初即第一次等待多久时间发送一组警报的通知 group_interval: 10s # 在发送新警报前的等待时间 repeat_interval: 1m # 发送重复警报的周期 receiver: 'email' # 发送警报的接收者的名称,以下receivers name的名称
receivers:
- name: 'email' # 警报
email_configs: # 邮箱配置
- to: 'monitor@netkiller.cn' # 接收警报的email配置
通过 webhook 触发手机短信发送程序
global:
route:
group_by: ["alertname"]
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: webhook
receivers:
- name: 'webhook'
webhook_configs:
- url: 'http://alertmanager-webhook:8080/webhook'
docker-compose.yaml 容器编排文件
version: '3.9'
services:
alertmanager-webhook:
image: netkiller/alertmanager
container_name: alertmanager-webhook
restart: always
hostname: alertmanager-webhook
extra_hosts:
- dysmsapi.aliyuncs.com:106.11.45.35
environment:
TZ: Asia/Shanghai
JAVA_OPTS: -Xms256m -Xmx1024m -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=512m
ports:
- 8080:8080
volumes:
- ${PWD}/alertmanager/application.properties:/app/application.properties
- /tmp/alertmanager:/tmp
working_dir: /app
command:
--spring.config.location=/app/application.properties
application.properties 配置文件
alerts_message='[
{
"labels": {
"alertname": "磁盘满",
"dev": "sda1",
"instance": "example",
"msgtype": "testing"
},
"annotations": {
"info": "/dev/vdb1 磁盘空间满",
"summary": "/dev/vdb1 磁盘空间满"
}
}
]'
curl -XPOST -d"$alerts_message" http://127.0.0.1:9093/api/v1/alerts
#!/usr/bin/env bash
alerts_message='[
{
"labels": {
"alertname": "DiskRunningFull",
"dev": "sda1",
"instance": "example1",
"msgtype": "testing"
},
"annotations": {
"info": "The disk sda1 is running full",
"summary": "please check the instance example1"
}
},
{
"labels": {
"alertname": "DiskRunningFull",
"dev": "sda2",
"instance": "example1",
"msgtype": "testing"
},
"annotations": {
"info": "The disk sda2 is running full",
"summary": "please check the instance example1",
"runbook": "the following link http://test-url should be clickable"
}
}
]'
curl -XPOST -d"$alerts_message" http://127.0.0.1:9093/api/v1/alerts