(文章目录)

目录结构

  • alertmanager

    • alert_templates
      • *.tmpl
    • alertmanager.yml
  • loki

    • alert_rules
    • loki.yaml
  • promtail

    • promtail.yaml
  • prometheus

    • alert_rules
    • prometheus.yml
  • docker-compose.yml

docker 镜像地址切换(/etc/docker/daemon.json):

{
	  "registry-mirrors": ["https://cr.console.aliyun.com/"]
}

prometheus

promtheus.yml

global:
  scrape_interval:     15s # By default, scrape targets every 15 seconds.

  # Attach these labels to any time series or alerts when communicating with
  # external systems (federation, remote storage, Alertmanager).
  external_labels:
    monitor: 'codelab-monitor'

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # Override the global default and scrape targets from this job every 5 seconds.
    scrape_interval: 5s

    static_configs:
      - targets: ['localhost:9090']
  #Server installation exporter Configure the corresponding listening port and run it
  - job_name: 'node_exporter'

    static_configs:

      - targets: ['192.168.152.150:9100','120.76.47.32:9100']

  - job_name: 'mysql_exporter'

    static_configs:

    - targets: ['120.76.47.32:9104','localhost:9104']

  - job_name: 'nginx-vts-exporter'

    scrape_interval: 10s

    static_configs:

    - targets: ['120.76.47.32:9913','192.168.152.150:9913']
# Alertmanager
alerting:
  alertmanagers:
  - static_configs:
    - targets: ['localhost:9093']
# alert filepath
rule_files:
 - "/etc/prometheus/alert_rules/*.yml"

alert_rules

警告规则配置:

prometheus/alert_rules/*.yml

loki

loki.yaml

auth_enabled: false

server:
  http_listen_port: 3100
  grpc_listen_port: 9096

limits_config:
  reject_old_samples: true # 是否拒绝旧样本
  reject_old_samples_max_age: 168h # 168小时之前的样本被拒绝
  retention_period: 360h # 数据保留期
  max_query_series: 1000
  max_query_parallelism: 2 # Maximum number of queries that will be scheduled in parallel by the frontend.
  max_query_lookback: 24h
  #max_global_streams_per_user: 0
# frontend:
#   max_outstanding_per_tenant: 1024

query_scheduler:
  max_outstanding_requests_per_tenant: 50

common:
  instance_addr: 127.0.0.1
  path_prefix: /tmp/loki
  storage:
    filesystem:
      chunks_directory: /tmp/loki/chunks
      rules_directory: /tmp/loki/rules
  replication_factor: 1
  ring:
    kvstore:
      store: inmemory

query_range:
  results_cache:
    cache:
      embedded_cache:
        enabled: true
        max_size_mb: 10000

schema_config:
  configs:
    - from: 2020-10-24
      store: boltdb-shipper
      object_store: filesystem
      schema: v11
      index:
        prefix: index_
        period: 24h

table_manager:
  retention_deletes_enabled: true # 保留删除开启
  retention_period: 24h # 超过该时间的块数据将被删除

compactor:
  working_directory: /tmp/loki/retention
  shared_store: filesystem
  compaction_interval: 10m
  retention_enabled: true
  retention_delete_delay: 10s
  retention_delete_worker_count: 150

ruler:
  storage:
    type: local
    local:
      directory: /loki/rules
  rule_path: /loki/rules-temp
  alertmanager_url: http://192.168.152.150:9093
  ring:
    kvstore:
      store: inmemory
  enable_api: true
  enable_alertmanager_v2: true
# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
#
# Statistics help us better understand how Loki is used, and they show us performance
# levels for most users. This helps us prioritize features and documentation.
# For more information on what's sent, look at
# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go
# Refer to the buildReport method to see what goes into a report.
#
# If you would like to disable reporting, uncomment the following lines:
#analytics:
#  reporting_enabled: false

alert_rules

警告规则配置:

loki/alert_rules/*.yml

promtail

server:
  http_listen_port: 9080
  grpc_listen_port: 0

positions:
  filename: /tmp/positions.yaml

clients:
  - url: http://loki:3100/loki/api/v1/push

limits_config:
  readline_rate_enabled: true
  max_streams: 2000
  readline_rate_drop: false
  max_line_size: 2048
  readline_burst: 2000
scrape_configs:
- job_name: deviceLog
  static_configs:
  - targets:
      - localhost
    labels:
      job: localserver
      __path__: /var/log/localserver/002*/*.log
  - targets:
      - localhost
    labels:
      job: bob
      __path__: /var/log/bob/002*/*.log
  - targets:
      - localhost
    labels:
      job: media
      __path__: /var/log/media/002*/*.log

- job_name: webLog
  static_configs:
  - targets:
      - localhost
    labels:
      job: image
      __path__: /var/log/image/*.log
  - targets:
      - localhost
    labels:
      job: bob-soultion
      __path__: /var/log/bob-soultion/*.log
  - targets:
      - localhost
    labels:
      job: customer
      __path__: /var/log/customer/*.log

alertmanager

alertmanager.yml

global:
  # resolve_timeout: 1m
  #587 465
  smtp_smarthost: smtp.qq.com:587
  smtp_from: 2833692207@qq.com
  smtp_auth_username: 2833692207@qq.com
  smtp_auth_password: osymjgsurmosdcia
templates:
  - "/etc/alertmanager/alert_templates/*"
route:
  group_by: ["alertname"]
  group_wait: 1m
  group_interval: 5m
  repeat_interval: 20m
  receiver: "telepush"
receivers:
  - name: "wbhook"
    webhook_configs:
      - url: "http://192.168.152.160:8089/adapter/wx"
        send_resolved: true

  - name: "telepush"
    telegram_configs:
      - send_resolved: true
        api_url: "https://api.telegram.org"
        bot_token: "6100072571:AAFPoUqywbdxv-wFGxPoa6dF4f9VKuvtjhk"
        chat_id: -857040540
        message: '{{ template "default.to.message" .}}'

      - send_resolved: true
        api_url: "https://api.telegram.org"
        bot_token: "6100072571:AAFPoUqywbdxv-wFGxPoa6dF4f9VKuvtjhk"
        chat_id: -1001929772831
        message: '{{ template "default.to.message" .}}'

inhibit_rules:
  - source_match:
      severity: "critical"
    target_match:
      severity: "warning"
    equal: ["alertname", "dev", "instance"]

alert_templates

警告模板:

alertmanager/alert_templates/*.tmpl
{{ define "default.to.message" }}
{{ range .Alerts }}
=========start==========
告警状态:{{ .Status }}
告警级别: {{ .Labels.severity }} 级
告警类型: {{ .Labels.alertname }}
故障主机: {{ .Labels.instance }}
告警主题: {{ .Annotations.summary }}
触发阀值:{{ .Annotations.value }}
告警详情: {{ .Annotations.description }}
触发时间: {{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
=========end==========
{{ end }}
{{ end }}

docker-compose 配置

version: "3.7"
services:
  loki:
    image: grafana/loki:2.8.0
    container_name: loki
    restart: always
    ports:
      - "3100:3100"
    volumes:
      - ./loki/loki-config.yaml:/etc/loki/local-config.yaml
      - ./loki/alert_rules/:/loki/rules/fake
  promtail:
    image: grafana/promtail:2.8.0
    container_name: promtail
    restart: always
    ports:
      - "9080:9080"
    volumes:
      - ./promtail/promtail.yaml:/etc/promtail/config.yml
      - /home/tjc/log:/var/log
    links:
      - loki
  grafana:
    image: grafana/grafana-enterprise:9.4.7
    container_name: grafana
    restart: always
    ports:
      - "3000:3000"
  prometheus:
    image: prom/prometheus:v2.43.0
    container_name: prometheus
    restart: always
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
  alertmanager:
    image: quay.io/prometheus/alertmanager:v0.25.0
    container_name: alertmanager
    restart: always
    ports:
      - "9093:9093"
  webhook-adapter:
    image: guyongquan/webhook-adapter:latest
    container_name: webhook-adapter
    hostname: webhook-adapter
    ports:
      - "8089:80"
    command:
      - "--adapter=/app/prometheusalert/wx.js=/wx=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=c5bb4610-eba4-4476-8eae-29f230bbf143"

exporter 安装

在你需要监控的对应服务器上安装运行

nginx-exporter

nginx-exporter下载地址

mysqld-exporter

mysqld-exporter下载地址

开始运行

docker-compose up -d

版本查看

grafana:

docker exec grafana grafana-cli -v
docker exec grafana grafana-server -v

prometheus:

docker exec -it 79ecc34b518f  /bin/prometheus --version
docker exec -it 42dbe7c3997c  /bin/alertmanager --version

#79ecc34b518f 42dbe7c3997c   为CONTAINER ID
#prometheus alertmanager NAMES

image.png