此文章只是記錄一下搭建一套prometheus以及微信告警的步驟,沒有詳細(xì)講解原理。
下面文字包括的組件比較多,可以選擇性安裝。
- 安裝node_exporter監(jiān)控服務(wù)器基礎(chǔ)信息
1.1 下載node_exporter并安裝
# 下載地址https://github.com/prometheus/node_exporter/releases/download/v1.0.1/node_exporter-1.0.1.linux-amd64.tar.gz
# 創(chuàng)建主目錄,并上傳安裝包
mkdir /opt/monitor
cd /opt/monitor
tar -xzvf node_exporter-1.0.1.linux-amd64.tar.gz
mv node_exporter-1.0.1.linux-amd64 node_exporter
# 默認(rèn)端口是9100,需要考慮是否端口沖突
cd /opt/monitor/node_exporter
nohup ./node_exporter --web.listen-address=:9100 &
1.2 測試node_exporter是否可以采集到數(shù)據(jù)
# 瀏覽器訪問,或者以下curl命令
curl http://xx.xx.xx.xx:9100/metrics
1.3 將node_exporter服務(wù)化,方便重啟,以及設(shè)置自啟動
# 服務(wù)化,端口可更改
echo '[Unit]
Description=node_exporter
Documentation=node_exporter
[Service]
User=finance
Group=finance
ExecStart=/opt/monitor/node_exporter/node_exporter --web.listen-address=:9100
[Install]
WantedBy=multi-user.target' >/usr/lib/systemd/system/node_exporter.service
# 設(shè)置自啟動
systemctl enable --now node_exporter
- 安裝mysqld_exporter監(jiān)控mysql
2.1 下載mysqld_exporter并安裝
# 下載地址https://github.com/prometheus/mysqld_exporter/releases/download/v0.12.1/mysqld_exporter-0.12.1.linux-amd64.tar.gz
# 安裝上傳安裝包并解壓
cd /opt/monitor
tar -xzvf mysqld_exporter-0.12.1.linux-amd64.tar.gz
mv mysqld_exporter-0.12.1.linux-amd64 mysqld_exporter
# 需要在監(jiān)控的目標(biāo)數(shù)據(jù)庫創(chuàng)建賬號用于收集數(shù)據(jù)庫監(jiān)控指標(biāo)
CREATE USER 'exporter'@'localhost' IDENTIFIED BY 'pwd4test';
GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'localhost';
flush privileges;
# 創(chuàng)建mysqld_exporter配置文件
echo '[client]
user=exporter
password=pwd4test' > /opt/monitor/mysqld_exporter/.my.cnf
# 啟動mysqld_exporter
cd mysqld_exporter
nohup ./mysqld_exporter --config.my-cnf=.my.cnf --web.listen-address=:9104 &
2.2 測試驗(yàn)證
# 可以瀏覽器訪問,也可以curl
curl http://xx.xx.xx.xx:9104/metrics
2.3 服務(wù)化
# 服務(wù)化,端口可更改
echo '[Unit]
Description=mysqld_exporter
Documentation=mysqld_exporter
[Service]
User=finance
Group=finance
ExecStart=/opt/monitor/mysqld_exporter/mysqld_exporter --config.my-cnf=/opt/monitor/mysqld_exporter/.my.cnf --web.listen-address=:9104
[Install]
WantedBy=multi-user.target' > /usr/lib/systemd/system/mysqld_exporter.service
# 配置自啟動
systemctl daemon-reload
systemctl enable --now mysqld_exporter
- 安裝process_exporter監(jiān)控指定進(jìn)程
3.1 下載process_exporter并安裝
# 下載地址https://github-production-release-asset-2e65be.s3.amazonaws.com/64079945/7d8eb200-1f3a-11eb-8261-7d8780cc4638?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20201231%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20201231T061102Z&X-Amz-Expires=300&X-Amz-Signature=cc89dafcb363f69d91c6b3c8982aa666afbfddbd341e0fca94207566267828d4&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=64079945&response-content-disposition=attachment%3B%20filename%3Dprocess-exporter-0.7.5.linux-amd64.tar.gz&response-content-type=application%2Foctet-stream
# 解壓
cd /opt/monitor
tar -xzvf process-exporter-0.7.5.linux-amd64.tar.gz
mv process-exporter-0.7.5.linux-amd64 process-exporter
# 創(chuàng)建配置文件,以下實(shí)例為監(jiān)控redis_exporter進(jìn)程
cd process-exporter
vi process-name.yml
process_names:
- name: "{{.Matches}}"
cmdline:
- 'redis_exporter'
# 啟動
nohup ./process-exporter -config.path process-name.yml --web.listen-address=:9256 &
3.2 測試驗(yàn)證
# 可以瀏覽器訪問,或者curl
curl http://xx.xx.xx.xx:9256/metrics
3.3 服務(wù)化
# 服務(wù)器,端口適當(dāng)修改
echo '[Unit]
Description=process_exporter
Documentation=process_exporter
[Service]
User=finance
Group=finance
ExecStart=/opt/monitor/process-exporter/process-exporter -config.path /opt/monitor/process-exporter/process-name.yml --web.listen-address=:9256
[Install]
WantedBy=multi-user.target' > /usr/lib/systemd/system/process_exporter.service
# 設(shè)置服務(wù)自啟動
systemctl enable --now process_exporter
- 安裝redis_exporter監(jiān)控redis集群
4.1 下載安裝redis_exporter
# 下載地址https://github-production-release-asset-2e65be.s3.amazonaws.com/34705315/3938b180-200a-11ea-852e-5c7b617446b2?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20201230%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20201230T065656Z&X-Amz-Expires=300&X-Amz-Signature=7ca5c9b2c2f67dcaee356f5ac9fbe0527beb006ffa721d4fde607f9a1d4c434e&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=34705315&response-content-disposition=attachment%3B%20filename%3Dredis_exporter-v1.3.5.linux-amd64.tar.gz&response-content-type=application%2Foctet-stream
# 解壓安裝
cd /opt/monitor
tar -xzvf redis_exporter-v1.3.5.linux-amd64.tar.gz
mv redis_exporter-v1.3.5.linux-amd64 redis_exporter
# redis如果是單節(jié)點(diǎn),以下命令就可以監(jiān)控單節(jié)點(diǎn)
# 如果是集群,那么還需要配合prometheus.yml來實(shí)現(xiàn),不過此處只需要監(jiān)控一個(gè)節(jié)點(diǎn)就行
cd redis_exporter
nohup ./redis_exporter --web.listen-address=:9121 -redis.addr localhost:7000 -redis.password pwd4test &
4.2 測試驗(yàn)證
# 瀏覽器訪問,或者curl
curl http://xx.xx.xx.xx:9121/metrics
4.3 服務(wù)化
#服務(wù)化,注意適當(dāng)修改端口
echo '[Unit]
Description=redis_exporter
Documentation=redis_exporter
[Service]
User=finance
Group=finance
ExecStart=/opt/monitor/redis_exporter/redis_exporter --web.listen-address=:9121 -redis.addr localhost:7000 -redis.password pwd4test
[Install]
WantedBy=multi-user.target' > /usr/lib/systemd/system/redis_exporter.service
#配置自啟動
systemctl enable --now redis_exporter
- 安裝配置alaertmanager用于實(shí)現(xiàn)告警(此處只配置了微信告警)
# 此步驟之前,首先需要申請企業(yè)微信以及拿到一些信息參考[可以參考這篇文字](https://blog.csdn.net/qq_25934401/article/details/83088344)
#1. 訪問[企業(yè)微信網(wǎng)站](https://work.weixin.qq.com/),注冊企業(yè)微信
#2. 訪問應(yīng)用管理,創(chuàng)建“第三方應(yīng)用”,并填寫信息
#3. 點(diǎn)擊應(yīng)用,查詢到以下信息:
#AgentId
#Secret
#DepartmentId
#Comany ID
#4. allow users里面,添加用戶,這樣才能接受到消息
#5. 要接收微信展會接收企業(yè)微信告警消息,分兩步:
#5.1 掃描企業(yè)二微碼加入企業(yè),并且在通訊錄把賬號加入對應(yīng)部門
#5.2 掃描“微工作臺”二維碼并關(guān)注
5.1 安裝配置alartmanager
# 下載安裝包 https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz
# 解壓安裝
cd /opt/monitor
tar -xzvf alertmanager-0.21.0.linux-amd64.tar.gz
mv alertmanager-0.21.0.linux-amd64 alertmanager
cd /opt/monitor/alertmanager
cp alertmanager.yml alertmanager.yml.bk
# 修改配置文件
vim alertmanager.yml
# 以下僅為實(shí)例
/opt/monitor/alertmanager/alertmanager.yml文件內(nèi)容
global:
resolve_timeout: 5m
wechat_api_corp_id: 'wwf9ad187705xxxx'
wechat_api_url: 'https://qyapi.weixin.qq.com/cgi-bin/'
wechat_api_secret: 'yytUedAXmHOOdZODZFeI_QDuzYOX8PGRouxxxx'
templates:
- '/opt/monitor/alertmanager/template/wechat.tmpl'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 20m
receiver: 'wechat'
receivers:
- name: 'wechat'
wechat_configs:
- send_resolved: true
to_party: '4'
agent_id: 1000005
corp_id: 'wwf9ad18770xxxx'
api_url: 'https://qyapi.weixin.qq.com/cgi-bin/'
api_secret: 'yytUedAXmHOOdZODZFeI_QDuzYOX8PGRou6Gkg8xxxx'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
/opt/monitor/alertmanager/template/wechat.tmpl文件內(nèi)容
{{ define "wechat.default.message" }}
{{ range .Alerts }}
告警主題: {{ .Annotations.summary }}
========start=========
告警程序: prometheus_alert
告警級別: {{ .Labels.severity }}
告警類型: {{ .Labels.alertname }}
故障主機(jī): {{ .Labels.instance }}
告警詳情: {{ .Annotations.description }}
觸發(fā)時(shí)間: {{ .StartsAt.Format "2006-01-02 15:04:05" }}
=========end===========
{{ end }}
{{ end }}
~
5.2 啟動并驗(yàn)證
# 啟動
nohup ./alertmanager --web.listen-address=:9093 &
# 瀏覽器訪問,或者curl
curl http://xx.xx.xx.xx:9093/metrics
5.3 服務(wù)化
# 服務(wù)化,注意適當(dāng)修改端口
echo '[Unit]
Description=alertmanager
Documentation=alertmanager
[Service]
User=finance
Group=finance
ExecStart=/opt/monitor/alertmanager/alertmanager --config.file=/opt/monitor/alertmanager/alertmanager.yml --web.listen-address=:9093
[Install]
WantedBy=multi-user.target' > /usr/lib/systemd/system/alertmanager.service
# 設(shè)置自啟動
systemctl enable --now alertmanager
- 安裝配置Prometheus服務(wù)端
6.1 下載安裝prometheus
# 下載地址https://github.com/prometheus/prometheus/releases/download/v2.23.0/prometheus-2.23.0.linux-amd64.tar.gz
# 解壓安裝配置
cd /opt/monitor
tar -xzvf prometheus-2.23.0.linux-amd64.tar.gz
mv prometheus-2.23.0.linux-amd64 prometheus
cd prometheus
# 修改prometheus.yml主配置文件,參考下面文件內(nèi)容
# 創(chuàng)建存放rule的目錄
mkdir rules
# 創(chuàng)建rule文件來規(guī)定告警項(xiàng)目,參考下面文件內(nèi)容
/opt/monitor/prometheus/prometheus.yml內(nèi)容如下,僅供參考
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
# - targets: ["localhost:9093"]
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
- "rules/nodes.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
- job_name: "node"
static_configs:
- targets: ["localhost:9100"]
- job_name: "mysql"
static_configs:
- targets: ["localhost:9104"]
- job_name: 'redis_exporter_targets'
static_configs:
- targets:
- redis://localhost:7000
- redis://localhost:7001
- redis://localhost:7002
- redis://localhost:7003
- redis://localhost:7004
- redis://localhost:7005
metrics_path: /scrape
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: localhost:9121
## config for scraping the exporter itself
- job_name: 'redis'
static_configs:
- targets:
- localhost:9121
- job_name: 'process'
static_configs:
- targets: ['localhost:9256']
/opt/monitor/prometheus/rules/nodes.yml內(nèi)容如下,僅供參考
groups:
- name: node
rules:
- alert: "服務(wù)器掛了--測試信息請忽略"
expr: up{job="prometheus"} == 0 or up{job="node"} == 0
for: 1m
labels:
severity: warning
annotations:
summary: "服務(wù)器{{ $labels.instance }} 掛了"
description: "當(dāng)前服務(wù)器無法連接,服務(wù)器掛掉了,或者node_exporter掛了,請立即查看!"
- name: process
rules:
- alert: "指定監(jiān)控進(jìn)程掛了"
expr: namedprocess_namegroup_num_threads == 0
for: 1m
labels:
severity: warning
annotations:
summary: "指定監(jiān)控進(jìn)程{{ $labels.groupname }} 掛了"
description: "指定監(jiān)控進(jìn)程{{ $labels.groupname }} 掛了,請立即查看相關(guān)應(yīng)用是否正常啟動!"
- name: redis
rules:
- alert: "Redis掛了 -- 測試信息請忽略"
expr: redis_up == 0
for: 1m
labels:
severity: error
annotations:
summary: "Redis down (instance {{ $labels.instance }})"
description: "Redis 掛了,請立即查看。 VALUE = {{ $value }}\n LABELS: {{ $labels }}"
6.2 啟動并驗(yàn)證
# 啟動,開啟web.enable-lifecycle,方便動態(tài)加載配置
nohup ./prometheus --web.enable-lifecycle &
# 瀏覽器驗(yàn)證,或者curl
curl http://xx.xx.xx.xx:9090/
6.3 服務(wù)化
# 服務(wù)化,適當(dāng)修改路徑,以及端口
echo '[Unit]
Description=Prometheus Monitoring System
Documentation=Prometheus Monitoring System
[Service]
User=finance
Group=finance
ExecStart=/opt/monitor/prometheus/prometheus \
--config.file=/opt/monitor/prometheus/prometheus.yml --web.enable-admin-api \
--storage.tsdb.retention=90d --web.enable-lifecycle --web.listen-address=:9090 --storage.tsdb.path=/opt/monitor/prometheus/data
[Install]
WantedBy=multi-user.target' > /usr/lib/systemd/system/prometheus.service
#配置自啟動
systemctl enable --now prometheus
6.4 動態(tài)修改配置命令
curl -X POST http://localhost:9090/-/reload
- 安裝配置grafana用于監(jiān)控信息展示
7.1 下載安裝配置grafana
# 下載
cd /opt/monitor
wget https://dl.grafana.com/oss/release/grafana-7.3.5-1.x86_64.rpm
# 安裝之后默認(rèn)配置信息如下:
Installs binary to /usr/sbin/grafana-server
Copies init.d script to /etc/init.d/grafana-server
Installs default file (environment vars) to /etc/sysconfig/grafana-server
Copies configuration file to /etc/grafana/grafana.ini
Installs systemd service (if systemd is available) name grafana-server.service
The default configuration uses a log file at /var/log/grafana/grafana.log
The default configuration specifies an sqlite3 database at /var/lib/grafana/grafana.db
#安裝grafana
cd /opt/monitor
yum install initscripts urw-fonts wget
rpm -Uvh grafana-7.3.5-1.x86_64.rpm
7.2 啟動以及驗(yàn)證
# 啟動
systemctl daemon-reload
systemctl start grafana-server
systemctl status grafana-server
# 瀏覽器驗(yàn)證,設(shè)置密碼
http://xx.xx.xx.xx:3000/login
7.3 配置添加promethus數(shù)據(jù)源
Configuration -> Add data source -> Promethus
URL: http://10.247.121.6:9090
Save & Test
7.4導(dǎo)入展示監(jiān)控信息模板
# node_exporter
https://grafana.com/dashboards/9276
# mysqld_exporter
https://grafana.com/dashboards/7362
# process_exporter
# redis_exporter
https://grafana.com/dashboards/763