企业🤖AI智能体构建引擎,智能编排和调试,一键部署,支持私有化部署方案 广告
文件结构 ![](https://img.kancloud.cn/f9/1e/f91e63e16054aaf44a00e34abc94fa9c_453x172.png) <hr> 1.创建基本目录结构 #切换到root用户 mkdir /data/docker-prometheusp mkdir /data/docker-prometheus/{grafana,prometheus,alertmanager} -p cd /data/docker-prometheus/ <hr> 2.创建alertmanager的配置文件 vi alertmanager/config.yml global: #163 smtp_smarthost: 'smtp.163.com:465' #fa you jian de you xiang smtp_from: 'sanyii31@163.com' smtp_auth_username: 'sanyii31@163.com' smtp_auth_password: '123456' smtp_require_tls: false route: group_by: ['alertname'] group_wait: 10s group_interval: 10s repeat_interval: 10m receiver: email receivers: - name: 'email' email_configs: - to: 'sanyii31@163.com' inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname','dev','instance'] <hr> 3.创建grafana的配置文件 vi grafana/config.monitoring # admin登录密码为password GF_SECURITY_ADMIN_PASSWORD:password GF USERS ALLOW SIGN UP=false <hr/> 4.创建prometheus的配置文件 vi prometheus/prometheus.yml global: scrape_interval: "15s" # 将搜刮时间改为15秒,默认一分钟 evaluation_interval: "15s" #每15秒评估一次规则,默认一分钟 alerting: alertmanagers: - static_configs: - targets: ["alertmanager:9093"] rule_files: #报警配置(触发器) - "alert.yml" scrape_configs: # 搜刮配置,一共创建5个Prometheus监控项 - job_name: "prometheus" scrape_interval: "15s" # static_configs: - targets: ["localhost:9090"] # 修改为Prometheus自身的地址和端口 - job_name: "alertmanager" scrape_interval: "15s" static_configs: - targets: ["alertmanager:9093"] - job_name: "cadvisor" scrape_interval: "15s" static_configs: - targets: ["cadvisor:8080"] labels: instance: "cadvisor-server" # 修改为更具体的实例名称 - job_name: "node-exporter" scrape_interval: "15s" static_configs: - targets: ["node-exporter:9100"] labels: instance: "Prometheus服务器" # 修改为更具体的实例名称 <hr> 5 . 创建prometheus的告警文件 vi prometheus/alert.yml groups: - name:Prometheus alert rules: #对任何实例超过30秒无法联系的情况发出警报 - alert:服务告警 expr:up0 for:30s labels: severity: critical annotations: summary:“服务异常,实例:{{$labels.instance }} description:"{{slabels.job }}服务已关用” <hr> 6 .创建docker-compose.yaml文件 vi docker-compose.yaml # 存储卷 volumes: prometheus_data: {} grafana_data: {} networks: monitoring: driver: bridge services: prometheus: image: registry.cn-hangzhou.aliyuncs.com/ldw520/prometheus:latest #镜像源 container_name: prometheus restart: always volumes: - /etc/localtime:/etc/localtime:ro #本地时区挂载在镜像中 - ./prometheus/:/etc/prometheus/ - prometheus_data:/prometheus #数据存储位置 command : - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/usr/share/prometheus/console_libraries' - '--web.console.templates=/usr/share/prometheus/consoles' #控制台机 #热加戟配置 - '--web.enable-lifecycle' #历史数据最大保窗时间,默认15天 - '--storage.tsdb.retention.time=30d' networks: - monitoring links: - alertmanager - cadvisor - node_exporter expose: - '9090' ports: - 9090:9090 depends_on: - cadvisor #等待cadvisor日动完成后prometheu5再启动 alertmanager: image: registry.cn-hangzhou.aliyuncs.com/ldw520/alertmanager:latest #镜像源 container_name: alertmanager restart: always volumes: - /etc/localtime:/etc/localtime:ro - ./alertmanager/:/etc/alertmanager/ command: - '--config.file=/etc/alertmanager/config.yml' - '--storage.path=/alertmanager' networks: - monitoring expose: - '9093' ports: - 9093:9093 # 监控容卻 cadvisor: image: registry.cn-hangzhou.aliyuncs.com/ldw520/cadvisor:latest #镜像源 container_name: cadvisor restart: always volumes: - /etc/localtime:/etc/localtime:ro - /:/rootfs:ro - /var/run:/var/run:rw - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro networks: - monitoring expose: - '8080' node_exporter: image: registry.cn-hangzhou.aliyuncs.com/ldw520/node-exporter:v1.8.0 #镜像源 container_name: node-exporter restart: always volumes: - /etc/localtime:/etc/localtime:ro - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro command: - '--path.procfs=/host/proc' - '--path.sysfs=/host/sys' - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)' networks: - monitoring ports: - '9100:9100' grafana: image: registry.cn-hangzhou.aliyuncs.com/ldw520/grafana:9.3.16 #镜像源 container_name: grafana restart: always volumes: - /etc/localtime:/etc/localtime:ro - grafana_data:/var/lib/grafana - ./grafana/provisioning/:/etc/grafana/provisioning/ env_file: - ./grafana/config.monitoring networks: - monitoring links: - prometheus ports: - 3000:3000 depends_on: - prometheus <hr> 7.查看文件夹树 yum intall -y tree ![](https://img.kancloud.cn/2c/8e/2c8e93c0187bc541e0d7e536c6eb788f_509x223.png) <hr> 8.运行docker-compose cd /data/docker-compose docker-compose up -d (-d后台运行) <hr> 9.检查运行状态 docker images #查看镜像是否已下载 docker ps -a #发现5个镜像都处于 up运行状态 ![](https://img.kancloud.cn/b8/f6/b8f651829d139e39b3ff3b6a013b4d63_964x225.png) ss -lntp|egrep "3000|9090|9100|9093" #检查端口 ![](https://img.kancloud.cn/21/fc/21fc4559ace818747c7eb8bdd3091ade_918x158.png) <hr> 10.web访问地址 应用 | 访向地址 | 账号/密码 prometheus | http://centosip:9090/ | 无 grafana | http://centosip:3000/ | admin/password alertmanager | http://centosip:9093/ | 无 node_exporter | http://centosip:9100 | 无