[root@tuiguang master-slave]# tree -L 2
├── conf
│ ├── alertmanager.yml
│ ├── master.conf
│ ├── mysql.yml
│ ├── prometheus-webhook-dingtalk.yml
│ ├── prometheus.yml
│ └── slave.conf
├── data
│ ├── mysql-master
│ └── mysql-slave
├── prometheus.yml
└── template
└── default.tmpl
mkdir -p /data/master-slave/conf #配置文件
mkdir -p /data/master-slave/data #持久化數(shù)據(jù)
# 創(chuàng)建配置文件
cd /data/master-slave/conf
cat > master.cnf <<EOF
[client]
default-character-set=utf8mb4
[mysql]
default-character-set=utf8mb4
[mysqld]
default_authentication_plugin=mysql_native_password
default-time_zone = '+8:00'
log-bin=mysql-bin
skip-name-resolve
server-id=1
sql_mode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION
pid-file = /var/run/mysqld/mysqld.pid
socket = /var/run/mysqld/mysqld.sock
datadir = /var/lib/mysql
secure-file-priv= NULL
max_connections = 10000
EOF
cat > slave.cnf <<EOF
[client]
default-character-set=utf8mb4
[mysql]
default-character-set=utf8mb4
[mysqld]
default_authentication_plugin=mysql_native_password
default-time_zone = '+8:00'
log-bin=mysql-bin
skip-name-resolve
server-id=2
sql_mode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION
pid-file = /var/run/mysqld/mysqld.pid
socket = /var/run/mysqld/mysqld.sock
datadir = /var/lib/mysql
read_only=ON
secure-file-priv= NULL
max_connections = 10000
EOF
- 方法一,直接run
docker run -d --name mysql-master -v /data/master-slave/data/mysql-master:/var/lib/mysql -v /data/master-slave/conf/master.conf:/etc/my.cnf -e MYSQL_ROOT_PASSWORD=123456 -p 3001:3306 mysql:8.0
docker run -d --name mysql-slave -v /data/master-slave/data/mysql-slave:/var/lib/mysql -v /data/master-slave/conf/slave.conf:/etc/my.cnf -e MYSQL_ROOT_PASSWORD=123456 -p 3002:3306 mysql:8.0
- 方法二、docker-compose
version: '3'
services:
mysql:
network_mode: "bridge"
environment:
MYSQL_ROOT_PASSWORD: "123456"
image: "mysql:8.0"
container_name: mysql-master
restart: always
volumes:
- "/data/master-slave/data/mysql-master:/var/lib/mysql"
- "/data/master-slave/conf/master.conf:/etc/my.cnf"
ports:
- "3001:3306"
mysql:
network_mode: "bridge"
environment:
MYSQL_ROOT_PASSWORD: "123456"
image: "mysql:8.0"
container_name: mysql-slave
restart: always
volumes:
- "/data/master-slave/data/mysql-slave:/var/lib/mysql"
- "/data/master-slave/conf/slave.conf:/etc/my.cnf"
ports:
- "3002:3306"
- mysql.yml
groups:
- name: MySQLStatsAlert
rules:
- alert: MySQL是關(guān)閉的
expr: mysql_up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "實例 {{ $labels.instance }} MySQL是關(guān)閉的"
description: "MySQL數(shù)據(jù)庫關(guān)閉。這需要立即采取行動!"
- alert: 打開文件數(shù)
expr: mysql_global_status_innodb_num_open_files > (mysql_global_variables_open_files_limit) * 0.75
for: 1m
labels:
severity: warning
annotations:
summary: "實例 {{ $labels.instance }} 打開文件數(shù)過高"
description: "打開文件數(shù)過高。請考慮增加open_files_limit."
- alert: 讀取緩沖區(qū)的大小大于最大值,允許數(shù)據(jù)包大小
expr: mysql_global_variables_read_buffer_size > mysql_global_variables_slave_max_allowed_packet
for: 1m
labels:
severity: warning
annotations:
summary: "實例 {{ $labels.instance }} 讀緩沖區(qū)大小(read_buffer_size)大于max。允許的數(shù)據(jù)包大小(max_allowed_packet)"
description: "讀緩沖區(qū)大小(read_buffer_size)大于max。允許的數(shù)據(jù)包大小(max_allowed_packet)。這可能會破壞復(fù)制."
- alert: 排序緩沖區(qū)可能錯過配置
expr: mysql_global_variables_innodb_sort_buffer_size <256*1024 or mysql_global_variables_read_buffer_size > 4*1024*1024
for: 1m
labels:
severity: warning
annotations:
summary: "實例 {{ $labels.instance }} 排序緩沖區(qū)大小不是太大就是太小"
description: "排序緩沖區(qū)大小不是太大就是太小。sort_buffer_size的一個較好的值是256k到4M之間."
- alert: 線程堆棧大小太小
expr: mysql_global_variables_thread_stack <196608
for: 1m
labels:
severity: warning
annotations:
summary: "實例 {{ $labels.instance }} 線程堆棧大小太小"
description: "線程堆棧大小太小。例如,當(dāng)您使用存儲語言構(gòu)造時,這可能會導(dǎo)致問題。thread_stack_size的典型值是256k."
- alert: 使用了超過80%的最大連接限制
expr: mysql_global_status_max_used_connections > mysql_global_variables_max_connections * 0.8
for: 1m
labels:
severity: warning
annotations:
summary: "實例 {{ $labels.instance }} 使用了超過80%的最大連接限制"
description: "使用了超過80%的最大連接限制"
- alert: InnoDB 已啟用強(qiáng)制恢復(fù)
expr: mysql_global_variables_innodb_force_recovery != 0
for: 1m
labels:
severity: warning
annotations:
summary: "實例 {{ $labels.instance }} 已啟用InnoDB 強(qiáng)制恢復(fù)"
description: "已啟用InnoDB 強(qiáng)制恢復(fù),這種模式只能用于數(shù)據(jù)恢復(fù)目的,它禁止寫入數(shù)據(jù)."
- alert: InnoDB日志文件太小
expr: mysql_global_variables_innodb_log_file_size < 16777216
for: 1m
labels:
severity: warning
annotations:
summary: "實例 {{ $labels.instance }} nnoDB日志文件大小可能太小"
description: "InnoDB日志文件大小可能太小,選擇較小的InnoDB日志文件大小會對性能產(chǎn)生重大影響."
- alert: InnoDB事務(wù)提交時的Flush日志
expr: mysql_global_variables_innodb_flush_log_at_trx_commit != 1
for: 1m
labels:
severity: warning
annotations:
summary: "實例 {{ $labels.instance }} InnoDB事務(wù)提交時的Flush日志設(shè)置為!= 1"
description: "InnoDB事務(wù)提交時的Flush日志設(shè)置為!= 1。這可能導(dǎo)致在電源故障時丟失已提交的事務(wù)."
- alert: 表定義緩存太小
expr: mysql_global_status_open_table_definitions > mysql_global_variables_table_definition_cache
for: 1m
labels:
severity: page
annotations:
summary: "實例 {{ $labels.instance }} 您的表定義緩存可能太小"
description: "您的表定義緩存可能太小。如果它太小,則會對性能產(chǎn)生重大影響!"
- alert: 表打開緩存太小
expr: mysql_global_status_open_tables >mysql_global_variables_table_open_cache * 99/100
for: 1m
labels:
severity: page
annotations:
summary: "實例 {{ $labels.instance }} 您的表打開緩存可能太小(舊名稱表緩存)"
description: "您的表打開緩存可能太小(舊名稱表緩存)。如果它太小,則會對性能產(chǎn)生重大影響!"
- alert: 線程堆棧大小可能太小
expr: mysql_global_variables_thread_stack < 262144
for: 1m
labels:
severity: page
annotations:
summary: "實例 {{ $labels.instance }} 線程堆棧大小可能太小"
description: "線程堆棧大小可能太小,例如,當(dāng)您使用存儲語言構(gòu)造時,這可能會導(dǎo)致問題,thread_stack_size的典型值是256k."
- alert: InnoDB緩沖池實例太小
expr: mysql_global_variables_innodb_buffer_pool_instances == 1
for: 1m
labels:
severity: page
annotations:
summary: "實例 {{ $labels.instance }} InnoDB緩沖池實例太小"
description: "如果你使用的是MySQL 5.5或更高版本,你應(yīng)該使用幾個InnoDB緩沖池實例來提高性能。InnoDB緩沖池實例的大小至少為1G,InnoDB緩沖池實例,你可以設(shè)置等于你的機(jī)器的核數(shù)."
- alert: InnoDB 插件已啟用
expr: mysql_global_variables_ignore_builtin_innodb == 1
for: 1m
labels:
severity: page
annotations:
summary: "實例 {{ $labels.instance }} InnoDB 插件已啟用"
description: "InnoDB 插件已啟用"
- alert: 禁用二進(jìn)制日志
expr: mysql_global_variables_log_bin != 1
for: 1m
labels:
severity: warning
annotations:
summary: "實例 {{ $labels.instance }} 禁用二進(jìn)制日志"
description: "二進(jìn)制日志被禁用。這就禁止您進(jìn)行時間點(diǎn)恢復(fù)(PiTR)。"
- alert: Binlog緩存大小太小
expr: mysql_global_variables_binlog_cache_size < 1048576
for: 1m
labels:
severity: page
annotations:
summary: "實例 {{ $labels.instance }} Binlog緩存大小太小"
description: "Binlog緩存大小可能太小了。1M或更高的值是可以的."
- alert: Binlog語句緩存大小太小
expr: mysql_global_variables_binlog_stmt_cache_size <1048576 and mysql_global_variables_binlog_stmt_cache_size > 0
for: 1m
labels:
severity: page
annotations:
summary: "實例 {{ $labels.instance }} Binlog語句緩存大小太小"
description: "Binlog語句緩存大小可能太小了。1M或更高的值通常是可以的."
- alert: Binlog同步已啟用
expr: mysql_global_variables_sync_binlog == 1
for: 1m
labels:
severity: page
annotations:
summary: "實例 {{ $labels.instance }} Binlog同步已啟用"
description: "Binlog同步已啟用。這帶來了更高的數(shù)據(jù)安全性,但以寫入性能為代價."
- alert: IO線程停止
expr: mysql_slave_status_slave_io_running != 1
for: 1m
labels:
severity: critical
annotations:
summary: "實例 {{ $labels.instance }} IO線程停止"
description: "IO線程已停止。這通常是因為它無法連接到Master."
- alert: SQL線程停止
expr: mysql_slave_status_slave_sql_running == 0
for: 1m
labels:
severity: critical
annotations:
summary: "實例 {{ $labels.instance }} SQL線程停止"
description: "SQL線程已停止。這通常是因為它不能應(yīng)用從主程序接收到的SQL語句."
- alert: Slave 落后于 Master
expr: rate(mysql_slave_status_seconds_behind_master[1m]) >30
for: 1m
labels:
severity: warning
annotations:
summary: "實例 {{ $labels.instance }} Slave 落后于 Master"
description: "Slave 落后于 Master. 請檢查從線程是否正在運(yùn)行,以及是否有一些性能問題!"
- alert: Slave不是只讀的
expr: mysql_global_variables_read_only != 0
for: 1m
labels:
severity: page
annotations:
summary: "實例 {{ $labels.instance }} Slave不是只讀的"
description: "Slave沒有設(shè)置為只讀。你可能會不小心操縱從服務(wù)器上的數(shù)據(jù),得到不一致的結(jié)果……"
- prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
rule_files:
- /etc/prometheus/mysql.yml
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'mysql_monitor'
static_configs:
#- targets: ['mysqld-exporter-master:9104','mysqld-exporter-slave:9104']
- targets: ['192.168.10.88:3003','192.168.10.88:3004']
- alertmanager.yml
route:
group_by: ['alertname']
group_wait: 1s
group_interval: 1m
repeat_interval: 4h
receiver: 'webhook1'
receivers:
- name: 'webhook1'
webhook_configs:
- &dingtalk_config
send_resolved: true
url: http://192.168.10.88:3008/dingtalk/webhook1/send
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
- prometheus-webhook-dingtalk.yml
## Request timeout
# timeout: 5s
## Customizable templates path
templates:
- /etc/prometheus-webhook-dingtalk/template/default.tmpl
targets:
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
# secret for signature
secret: SECc4148b709fb3db931d31c7a55457d9797c302a3d8257586e935cf4003440d6c0
webhook2:
url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
webhook_legacy:
url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
# Customize template content
message:
# Use legacy template
title: '{{ template "legacy.title" . }}'
text: '{{ template "legacy.content" . }}'
webhook_mention_all:
url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
mention:
all: true
webhook_mention_users:
url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
mention:
mobiles: ['156xxxx8827', '189xxxx8325']
- default.tmpl
{{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
{{ define "__alertmanagerURL" }}http://192.168.10.88:3005/alerts {{ end }}
{{ define "__text_alert_list" }}{{ range . }}
**Labels**
{{ range .Labels.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}
**Annotations**
{{ range .Annotations.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}
**Source:** [{{ .GeneratorURL }}]({{ .GeneratorURL }})
{{ end }}{{ end }}
{{ define "default.__text_alert_list" }}{{ range . }}
---
【告警狀態(tài)】:{{ .Status }}
【告警級別】:{{ .Labels.severity }}
【告警類型】:{{ .Labels.alertname }}
【告警應(yīng)用】:{{ .Annotations.summary }}
【告警主機(jī)】:{{ .Labels.instance }}
【告警詳情】:{{ .Annotations.description }}
【告警時間】:{{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
{{ end }}
{{ end }}
{{ define "default.__text_alertresolve_list" }}{{ range . }}
---
【告警狀態(tài)】: {{ .Status }}
【告警級別】: {{ .Labels.severity }}
【告警類型】: {{ .Labels.alertname }}
【告警應(yīng)用】: {{ .Annotations.summary }}
【告警主機(jī)】: {{ .Labels.instance }}
【告警詳情】: {{ .Annotations.description }}
【觸發(fā)時間】: {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
【結(jié)束時間】: {{ dateInZone "2006.01.02 15:04:05" (.EndsAt) "Asia/Shanghai" }}
{{ end }}
{{ end }}
{{/* Default */}}
{{ define "default.title" }}{{ template "__subject" . }}{{ end }}
{{ define "default.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**
{{ if gt (len .Alerts.Firing) 0 -}}
**=====好家伙,來活了~~~=====**
{{ template "default.__text_alert_list" .Alerts.Firing }}
---------------------------------------------------------
{{- end }}
{{ if gt (len .Alerts.Resolved) 0 -}}
**=====舒服了~~~ =====**
{{ template "default.__text_alertresolve_list" .Alerts.Resolved }}
{{- end }}
{{- end }}
{{/* Legacy */}}
{{ define "legacy.title" }}{{ template "__subject" . }}{{ end }}
{{ define "legacy.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**
{{ template "__text_alert_list" .Alerts.Firing }}
{{- end }}
{{/* Following names for compatibility */}}
{{ define "ding.link.title" }}{{ template "default.title" . }}{{ end }}
{{ define "ding.link.content" }}{{ template "default.content" . }}{{ end }}
mysqld_exporter
CREATE USER 'mysql_monitor'@'%' IDENTIFIED BY 'mysql_monitor' WITH MAX_USER_CONNECTIONS 3;
GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'mysql_monitor'@'%' ;
commit;
FLUSH PRIVILEGES;
docker run -d --name mysqld_exporter -p 3003:9104 -e DATA_SOURCE_NAME="mysql_monitor:mysql_monitor@(192.168.10.88:3002)/" prom/mysqld-exporter
version: '3'
services:
mysqld-exporter-master:
network_mode: "bridge"
environment:
DATA_SOURCE_NAME: "mysql_monitor:mysql_monitor@(192.168.10.88:3001)/"
image: "prom/mysqld-exporter"
container_name: mysqld-exporter-master
restart: always
ports:
- "3003:9104"
mysqld-exporter-slave:
depends_on:
- mysqld-exporter-master
network_mode: "bridge"
environment:
DATA_SOURCE_NAME: "mysql_monitor:mysql_monitor@(192.168.10.88:3002)/"
image: "prom/mysqld-exporter"
container_name: mysqld-exporter-slave
restart: always
ports:
- "3004:9104"
prometheus:
depends_on:
- mysqld-exporter-master
- mysqld-exporter-slave
network_mode: "bridge"
environment:
- TZ=Asia/Shanghai
image: "prom/prometheus:latest"
container_name: prometheus
restart: always
volumes:
- "/data/master-slave/conf/mysql.yml:/etc/prometheus/mysql.yml"
- "/data/master-slave/conf/prometheus.yml:/etc/prometheus/prometheus.yml"
- "/etc/localtime:/etc/localtime:ro"
ports:
- "3005:9090"
alertmanager:
depends_on:
- prometheus
network_mode: "bridge"
image: "prom/alertmanager:latest"
container_name: alertmanager
restart: always
volumes:
- "/data/master-slave/conf/alertmanager.yml:/etc/alertmanager/alertmanager.yml"
ports:
- "3007:9093"
prometheus-webhook-dingtalk:
depends_on:
- prometheus
network_mode: "bridge"
image: "timonwong/prometheus-webhook-dingtalk:latest"
container_name: prometheus-webhook-dingtalk
restart: always
volumes:
- "/data/master-slave/conf/prometheus-webhook-dingtalk.yml:/etc/prometheus-webhook-dingtalk/config.yml"
- "/data/master-slave/template:/etc/prometheus-webhook-dingtalk/template"
ports:
- "3008:9093"
grafana:
depends_on:
- prometheus
network_mode: "bridge"
image: grafana/grafana:latest
container_name: grafana
restart: always
ports:
- "3006:3000"