知乎专栏 |
alertmanager: image: prom/alertmanager:latest container_name: alertmanager hostname: alertmanager restart: always volumes: - ${PWD}/alertmanager/config.yml:/etc/alertmanager/config.yml - alertmanager:/alertmanager ports: - "9093:9093" depends_on: - prometheus command: --config.file=/etc/alertmanager/config.yml --cluster.advertise-address=0.0.0.0:9093
配置 prometheus.yml
alerting: alertmanagers: - static_configs: - targets: ["alertmanager:9093"] scrape_configs: - job_name: 'alertmanager' metrics_path: "/metrics"
检查 Alertmanager 是否正常工作
root@production:~# curl -s http://localhost:9093/metrics | head # HELP alertmanager_alerts How many alerts by state. # TYPE alertmanager_alerts gauge alertmanager_alerts{state="active"} 0 alertmanager_alerts{state="suppressed"} 0 # HELP alertmanager_alerts_invalid_total The total number of received alerts that were invalid. # TYPE alertmanager_alerts_invalid_total counter alertmanager_alerts_invalid_total{version="v1"} 0 alertmanager_alerts_invalid_total{version="v2"} 0 # HELP alertmanager_alerts_received_total The total number of received alerts. # TYPE alertmanager_alerts_received_total counter
解决时区问题,默认 docker 镜像使用 UTC,我们需要改为GMT+8
neo@MacBook-Pro-Neo ~/workspace/docker/prometheus % docker exec -it alertmanager sh /alertmanager $ cat /etc/localtime TZif2UTCTZif2?UTC UTC0 neo@MacBook-Pro-Neo ~/workspace/docker/prometheus % docker-compose cp alertmanager:/usr/share/zoneinfo/PRC Shanghai
查看反馈信息
neo@MacBook-Pro-Neo ~/workspace/docker/prometheus % curl -X OPTIONS 127.0.0.1:9093/api/v1/alerts -v * Trying 127.0.0.1... * TCP_NODELAY set * Connected to 127.0.0.1 (127.0.0.1) port 9093 (#0) > OPTIONS /api/v1/alerts HTTP/1.1 > Host: 127.0.0.1:9093 > User-Agent: curl/7.64.1 > Accept: */* > < HTTP/1.1 200 OK < Access-Control-Allow-Headers: Accept, Authorization, Content-Type, Origin < Access-Control-Allow-Methods: GET, POST, DELETE, OPTIONS < Access-Control-Allow-Origin: * < Access-Control-Expose-Headers: Date < Cache-Control: no-cache, no-store, must-revalidate < Date: Mon, 23 Aug 2021 12:18:20 GMT < Content-Length: 0 < * Connection #0 to host 127.0.0.1 left intact * Closing connection 0
SMTP 配置
global: resolve_timeout: 5m #处理超时时间,默认为5min smtp_smarthost: 'smtp.nejtkiller.cn:25' # 邮箱smtp服务器代理 smtp_from: 'monitor@netkiller.cn' # 发送邮箱名称 smtp_auth_username: 'monitor@netkiller.cn' # 邮箱名称 smtp_auth_password: '******' #邮箱密码
route: group_by: ['alertname'] # 报警分组名称 group_wait: 10s # 最初即第一次等待多久时间发送一组警报的通知 group_interval: 10s # 在发送新警报前的等待时间 repeat_interval: 1m # 发送重复警报的周期 receiver: 'email' # 发送警报的接收者的名称,以下receivers name的名称
receivers: - name: 'email' # 警报 email_configs: # 邮箱配置 - to: 'monitor@netkiller.cn' # 接收警报的email配置
通过 webhook 触发手机短信发送程序
global: route: group_by: ["alertname"] group_wait: 10s group_interval: 10s repeat_interval: 1h receiver: webhook receivers: - name: 'webhook' webhook_configs: - url: 'http://alertmanager-webhook:8080/webhook'
docker-compose.yaml 容器编排文件 version: '3.9' services: alertmanager-webhook: image: netkiller/alertmanager container_name: alertmanager-webhook restart: always hostname: alertmanager-webhook extra_hosts: - dysmsapi.aliyuncs.com:106.11.45.35 environment: TZ: Asia/Shanghai JAVA_OPTS: -Xms256m -Xmx1024m -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=512m ports: - 8080:8080 volumes: - ${PWD}/alertmanager/application.properties:/app/application.properties - /tmp/alertmanager:/tmp working_dir: /app command: --spring.config.location=/app/application.properties
application.properties 配置文件
alerts_message='[ { "labels": { "alertname": "磁盘满", "dev": "sda1", "instance": "example", "msgtype": "testing" }, "annotations": { "info": "/dev/vdb1 磁盘空间满", "summary": "/dev/vdb1 磁盘空间满" } } ]' curl -XPOST -d"$alerts_message" http://127.0.0.1:9093/api/v1/alerts
#!/usr/bin/env bash alerts_message='[ { "labels": { "alertname": "DiskRunningFull", "dev": "sda1", "instance": "example1", "msgtype": "testing" }, "annotations": { "info": "The disk sda1 is running full", "summary": "please check the instance example1" } }, { "labels": { "alertname": "DiskRunningFull", "dev": "sda2", "instance": "example1", "msgtype": "testing" }, "annotations": { "info": "The disk sda2 is running full", "summary": "please check the instance example1", "runbook": "the following link http://test-url should be clickable" } } ]' curl -XPOST -d"$alerts_message" http://127.0.0.1:9093/api/v1/alerts