mysql主從+prometheus+grafana

[root@tuiguang master-slave]# tree -L 2
├── conf
│   ├── alertmanager.yml
│   ├── master.conf
│   ├── mysql.yml
│   ├── prometheus-webhook-dingtalk.yml
│   ├── prometheus.yml
│   └── slave.conf
├── data
│   ├── mysql-master
│   └── mysql-slave
├── prometheus.yml
└── template
    └── default.tmpl

mkdir -p /data/master-slave/conf  #配置文件
mkdir -p /data/master-slave/data  #持久化數(shù)據(jù)
# 創(chuàng)建配置文件
cd /data/master-slave/conf
cat > master.cnf <<EOF
[client]
default-character-set=utf8mb4
[mysql]
default-character-set=utf8mb4
[mysqld]
default_authentication_plugin=mysql_native_password
default-time_zone = '+8:00'
log-bin=mysql-bin
skip-name-resolve
server-id=1
sql_mode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION
pid-file        = /var/run/mysqld/mysqld.pid
socket          = /var/run/mysqld/mysqld.sock
datadir         = /var/lib/mysql
secure-file-priv= NULL
max_connections = 10000
EOF
cat > slave.cnf <<EOF
[client]
default-character-set=utf8mb4
[mysql]
default-character-set=utf8mb4
[mysqld]
default_authentication_plugin=mysql_native_password
default-time_zone = '+8:00'
log-bin=mysql-bin
skip-name-resolve
server-id=2
sql_mode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION
pid-file        = /var/run/mysqld/mysqld.pid
socket          = /var/run/mysqld/mysqld.sock
datadir         = /var/lib/mysql
read_only=ON
secure-file-priv= NULL
max_connections = 10000
EOF
  • 方法一,直接run
docker run -d --name mysql-master -v /data/master-slave/data/mysql-master:/var/lib/mysql -v /data/master-slave/conf/master.conf:/etc/my.cnf -e MYSQL_ROOT_PASSWORD=123456 -p 3001:3306 mysql:8.0
docker run -d --name mysql-slave -v /data/master-slave/data/mysql-slave:/var/lib/mysql -v /data/master-slave/conf/slave.conf:/etc/my.cnf -e MYSQL_ROOT_PASSWORD=123456 -p 3002:3306 mysql:8.0
  • 方法二、docker-compose
version: '3'
services:
  mysql:
    network_mode: "bridge"
    environment:
      MYSQL_ROOT_PASSWORD: "123456"
    image: "mysql:8.0"
    container_name: mysql-master
    restart: always
    volumes:
      - "/data/master-slave/data/mysql-master:/var/lib/mysql"
      - "/data/master-slave/conf/master.conf:/etc/my.cnf"
    ports:
      - "3001:3306"
  mysql:
    network_mode: "bridge"
    environment:
      MYSQL_ROOT_PASSWORD: "123456"
    image: "mysql:8.0"
    container_name: mysql-slave
    restart: always
    volumes:
      - "/data/master-slave/data/mysql-slave:/var/lib/mysql"
      - "/data/master-slave/conf/slave.conf:/etc/my.cnf"
    ports:
      - "3002:3306"
  • mysql.yml
groups:
- name: MySQLStatsAlert
  rules:
  - alert: MySQL是關(guān)閉的
    expr: mysql_up == 0
    for: 1m
    labels:
      severity: critical
    annotations:
      summary: "實例 {{ $labels.instance }} MySQL是關(guān)閉的"
      description: "MySQL數(shù)據(jù)庫關(guān)閉。這需要立即采取行動!"
  - alert: 打開文件數(shù)
    expr: mysql_global_status_innodb_num_open_files > (mysql_global_variables_open_files_limit) * 0.75
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "實例 {{ $labels.instance }} 打開文件數(shù)過高"
      description: "打開文件數(shù)過高。請考慮增加open_files_limit."
  - alert: 讀取緩沖區(qū)的大小大于最大值,允許數(shù)據(jù)包大小
    expr: mysql_global_variables_read_buffer_size > mysql_global_variables_slave_max_allowed_packet 
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "實例 {{ $labels.instance }} 讀緩沖區(qū)大小(read_buffer_size)大于max。允許的數(shù)據(jù)包大小(max_allowed_packet)"
      description: "讀緩沖區(qū)大小(read_buffer_size)大于max。允許的數(shù)據(jù)包大小(max_allowed_packet)。這可能會破壞復(fù)制."
  - alert: 排序緩沖區(qū)可能錯過配置
    expr: mysql_global_variables_innodb_sort_buffer_size <256*1024 or mysql_global_variables_read_buffer_size > 4*1024*1024 
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "實例 {{ $labels.instance }} 排序緩沖區(qū)大小不是太大就是太小"
      description: "排序緩沖區(qū)大小不是太大就是太小。sort_buffer_size的一個較好的值是256k到4M之間."
  - alert: 線程堆棧大小太小
    expr: mysql_global_variables_thread_stack <196608
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "實例 {{ $labels.instance }} 線程堆棧大小太小"
      description: "線程堆棧大小太小。例如,當(dāng)您使用存儲語言構(gòu)造時,這可能會導(dǎo)致問題。thread_stack_size的典型值是256k."
  - alert: 使用了超過80%的最大連接限制 
    expr: mysql_global_status_max_used_connections > mysql_global_variables_max_connections * 0.8
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "實例 {{ $labels.instance }} 使用了超過80%的最大連接限制"
      description: "使用了超過80%的最大連接限制"
  - alert: InnoDB 已啟用強(qiáng)制恢復(fù)
    expr: mysql_global_variables_innodb_force_recovery != 0 
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "實例 {{ $labels.instance }} 已啟用InnoDB 強(qiáng)制恢復(fù)"
      description: "已啟用InnoDB 強(qiáng)制恢復(fù),這種模式只能用于數(shù)據(jù)恢復(fù)目的,它禁止寫入數(shù)據(jù)."
  - alert: InnoDB日志文件太小
    expr: mysql_global_variables_innodb_log_file_size < 16777216 
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "實例 {{ $labels.instance }} nnoDB日志文件大小可能太小"
      description: "InnoDB日志文件大小可能太小,選擇較小的InnoDB日志文件大小會對性能產(chǎn)生重大影響."
  - alert: InnoDB事務(wù)提交時的Flush日志
    expr: mysql_global_variables_innodb_flush_log_at_trx_commit != 1
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "實例 {{ $labels.instance }} InnoDB事務(wù)提交時的Flush日志設(shè)置為!= 1"
      description: "InnoDB事務(wù)提交時的Flush日志設(shè)置為!= 1。這可能導(dǎo)致在電源故障時丟失已提交的事務(wù)."
  - alert: 表定義緩存太小
    expr: mysql_global_status_open_table_definitions > mysql_global_variables_table_definition_cache
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "實例 {{ $labels.instance }} 您的表定義緩存可能太小"
      description: "您的表定義緩存可能太小。如果它太小,則會對性能產(chǎn)生重大影響!"
  - alert: 表打開緩存太小
    expr: mysql_global_status_open_tables >mysql_global_variables_table_open_cache * 99/100
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "實例 {{ $labels.instance }} 您的表打開緩存可能太小(舊名稱表緩存)"
      description: "您的表打開緩存可能太小(舊名稱表緩存)。如果它太小,則會對性能產(chǎn)生重大影響!"
  - alert: 線程堆棧大小可能太小
    expr: mysql_global_variables_thread_stack < 262144
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "實例 {{ $labels.instance }} 線程堆棧大小可能太小"
      description: "線程堆棧大小可能太小,例如,當(dāng)您使用存儲語言構(gòu)造時,這可能會導(dǎo)致問題,thread_stack_size的典型值是256k."
  - alert: InnoDB緩沖池實例太小
    expr: mysql_global_variables_innodb_buffer_pool_instances == 1
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "實例 {{ $labels.instance }} InnoDB緩沖池實例太小"
      description: "如果你使用的是MySQL 5.5或更高版本,你應(yīng)該使用幾個InnoDB緩沖池實例來提高性能。InnoDB緩沖池實例的大小至少為1G,InnoDB緩沖池實例,你可以設(shè)置等于你的機(jī)器的核數(shù)."
  - alert: InnoDB 插件已啟用
    expr: mysql_global_variables_ignore_builtin_innodb == 1
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "實例 {{ $labels.instance }} InnoDB 插件已啟用"
      description: "InnoDB 插件已啟用"
  - alert: 禁用二進(jìn)制日志
    expr: mysql_global_variables_log_bin != 1
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: "實例 {{ $labels.instance }} 禁用二進(jìn)制日志"
      description: "二進(jìn)制日志被禁用。這就禁止您進(jìn)行時間點(diǎn)恢復(fù)(PiTR)。"
  - alert: Binlog緩存大小太小
    expr: mysql_global_variables_binlog_cache_size < 1048576
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "實例 {{ $labels.instance }} Binlog緩存大小太小"
      description: "Binlog緩存大小可能太小了。1M或更高的值是可以的."
  - alert: Binlog語句緩存大小太小
    expr: mysql_global_variables_binlog_stmt_cache_size <1048576 and mysql_global_variables_binlog_stmt_cache_size > 0
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "實例 {{ $labels.instance }} Binlog語句緩存大小太小"
      description: "Binlog語句緩存大小可能太小了。1M或更高的值通常是可以的."
  - alert: Binlog同步已啟用
    expr: mysql_global_variables_sync_binlog == 1
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "實例 {{ $labels.instance }} Binlog同步已啟用"
      description: "Binlog同步已啟用。這帶來了更高的數(shù)據(jù)安全性,但以寫入性能為代價."
  - alert: IO線程停止
    expr: mysql_slave_status_slave_io_running != 1
    for: 1m
    labels:
      severity: critical
    annotations:
      summary: "實例 {{ $labels.instance }} IO線程停止"
      description: "IO線程已停止。這通常是因為它無法連接到Master."
  - alert: SQL線程停止 
    expr: mysql_slave_status_slave_sql_running == 0
    for: 1m
    labels:
      severity: critical
    annotations:
      summary: "實例 {{ $labels.instance }} SQL線程停止"
      description: "SQL線程已停止。這通常是因為它不能應(yīng)用從主程序接收到的SQL語句."
  - alert: Slave 落后于 Master
    expr: rate(mysql_slave_status_seconds_behind_master[1m]) >30 
    for: 1m
    labels:
      severity: warning 
    annotations:
      summary: "實例 {{ $labels.instance }} Slave 落后于 Master"
      description: "Slave 落后于 Master. 請檢查從線程是否正在運(yùn)行,以及是否有一些性能問題!"
  - alert: Slave不是只讀的
    expr: mysql_global_variables_read_only != 0
    for: 1m
    labels:
      severity: page
    annotations:
      summary: "實例 {{ $labels.instance }} Slave不是只讀的"
      description: "Slave沒有設(shè)置為只讀。你可能會不小心操縱從服務(wù)器上的數(shù)據(jù),得到不一致的結(jié)果……"

  • prometheus.yml
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets:
rule_files:
   - /etc/prometheus/mysql.yml
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'mysql_monitor'
    static_configs:
    #- targets: ['mysqld-exporter-master:9104','mysqld-exporter-slave:9104']
    - targets: ['192.168.10.88:3003','192.168.10.88:3004']
  • alertmanager.yml
route:
  group_by: ['alertname']
  group_wait: 1s
  group_interval: 1m
  repeat_interval: 4h
  receiver: 'webhook1'
receivers:
- name: 'webhook1'
  webhook_configs:
    - &dingtalk_config
       send_resolved: true
       url: http://192.168.10.88:3008/dingtalk/webhook1/send
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']
  • prometheus-webhook-dingtalk.yml
## Request timeout
# timeout: 5s

## Customizable templates path
templates:
   - /etc/prometheus-webhook-dingtalk/template/default.tmpl
targets:
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
    # secret for signature
    secret: SECc4148b709fb3db931d31c7a55457d9797c302a3d8257586e935cf4003440d6c0
  webhook2:
    url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
  webhook_legacy:
    url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
    # Customize template content
    message:
      # Use legacy template
      title: '{{ template "legacy.title" . }}'
      text: '{{ template "legacy.content" . }}'
  webhook_mention_all:
    url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
    mention:
      all: true
  webhook_mention_users:
    url: https://oapi.dingtalk.com/robot/send?access_token=2d95a449e55205b7ca8b71ec00deae98556768dd5f9fc78f19485e1c850cc34b
    mention:
      mobiles: ['156xxxx8827', '189xxxx8325']
  • default.tmpl
{{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
{{ define "__alertmanagerURL" }}http://192.168.10.88:3005/alerts {{ end }}

{{ define "__text_alert_list" }}{{ range . }}
**Labels**
{{ range .Labels.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}
**Annotations**
{{ range .Annotations.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}
**Source:** [{{ .GeneratorURL }}]({{ .GeneratorURL }})
{{ end }}{{ end }}

{{ define "default.__text_alert_list" }}{{ range . }}
---
【告警狀態(tài)】:{{ .Status }}
【告警級別】:{{ .Labels.severity }}
【告警類型】:{{ .Labels.alertname }}
【告警應(yīng)用】:{{ .Annotations.summary }}
【告警主機(jī)】:{{ .Labels.instance }}
【告警詳情】:{{ .Annotations.description }}
【告警時間】:{{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
{{ end }}
{{ end }}

{{ define "default.__text_alertresolve_list" }}{{ range . }}
---
【告警狀態(tài)】: {{ .Status }}
【告警級別】: {{ .Labels.severity }}
【告警類型】: {{ .Labels.alertname }}
【告警應(yīng)用】: {{ .Annotations.summary }}
【告警主機(jī)】: {{ .Labels.instance }}
【告警詳情】: {{ .Annotations.description }}
【觸發(fā)時間】: {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
【結(jié)束時間】: {{ dateInZone "2006.01.02 15:04:05" (.EndsAt) "Asia/Shanghai" }}
{{ end }}
{{ end }}

{{/* Default */}}
{{ define "default.title" }}{{ template "__subject" . }}{{ end }}
{{ define "default.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**

{{ if gt (len .Alerts.Firing) 0 -}} 

**=====好家伙,來活了~~~=====**
{{ template "default.__text_alert_list" .Alerts.Firing }} 
---------------------------------------------------------

{{- end }}

{{ if gt (len .Alerts.Resolved) 0 -}}

**=====舒服了~~~   =====**
{{ template "default.__text_alertresolve_list" .Alerts.Resolved }}

{{- end }}
{{- end }}

{{/* Legacy */}}
{{ define "legacy.title" }}{{ template "__subject" . }}{{ end }}
{{ define "legacy.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**
{{ template "__text_alert_list" .Alerts.Firing }}
{{- end }}

{{/* Following names for compatibility */}}
{{ define "ding.link.title" }}{{ template "default.title" . }}{{ end }}
{{ define "ding.link.content" }}{{ template "default.content" . }}{{ end }}

mysqld_exporter

CREATE USER 'mysql_monitor'@'%' IDENTIFIED BY 'mysql_monitor' WITH MAX_USER_CONNECTIONS 3;
GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'mysql_monitor'@'%' ;
commit;
FLUSH PRIVILEGES;
docker run -d --name mysqld_exporter -p 3003:9104 -e DATA_SOURCE_NAME="mysql_monitor:mysql_monitor@(192.168.10.88:3002)/" prom/mysqld-exporter
version: '3'
services:
  mysqld-exporter-master:
    network_mode: "bridge"
    environment:
      DATA_SOURCE_NAME: "mysql_monitor:mysql_monitor@(192.168.10.88:3001)/"
    image: "prom/mysqld-exporter"
    container_name: mysqld-exporter-master
    restart: always
    ports:
      - "3003:9104"
  mysqld-exporter-slave:
    depends_on:
      - mysqld-exporter-master
    network_mode: "bridge"
    environment:
      DATA_SOURCE_NAME: "mysql_monitor:mysql_monitor@(192.168.10.88:3002)/"
    image: "prom/mysqld-exporter"
    container_name: mysqld-exporter-slave
    restart: always
    ports:
      - "3004:9104"
  prometheus:
    depends_on:
      - mysqld-exporter-master
      - mysqld-exporter-slave
    network_mode: "bridge"
    environment:
      - TZ=Asia/Shanghai
    image: "prom/prometheus:latest"
    container_name: prometheus
    restart: always
    volumes:
      - "/data/master-slave/conf/mysql.yml:/etc/prometheus/mysql.yml"
      - "/data/master-slave/conf/prometheus.yml:/etc/prometheus/prometheus.yml"
      - "/etc/localtime:/etc/localtime:ro"
    ports:
      - "3005:9090"
  alertmanager:
    depends_on:
      - prometheus
    network_mode: "bridge"
    image: "prom/alertmanager:latest"
    container_name: alertmanager
    restart: always
    volumes:
      - "/data/master-slave/conf/alertmanager.yml:/etc/alertmanager/alertmanager.yml"
    ports:
      - "3007:9093"
  prometheus-webhook-dingtalk:
    depends_on:
      - prometheus
    network_mode: "bridge"
    image: "timonwong/prometheus-webhook-dingtalk:latest"
    container_name: prometheus-webhook-dingtalk
    restart: always
    volumes:
      - "/data/master-slave/conf/prometheus-webhook-dingtalk.yml:/etc/prometheus-webhook-dingtalk/config.yml"
      - "/data/master-slave/template:/etc/prometheus-webhook-dingtalk/template"
    ports:      
      - "3008:9093"
  grafana:
    depends_on:
      - prometheus
    network_mode: "bridge"
    image: grafana/grafana:latest
    container_name: grafana
    restart: always
    ports:
      - "3006:3000"
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容