ΠΠ΄ΡΠ°Π²Π΅ΠΉΡΠ΅ Π²ΡΠΈΡΠΊΠΈ. ΠΡΠ΅Π· ΠΌΠ°ΠΉ ΡΡΠ°ΡΡΠΈΡΠ° OTUS
ΠΠΊΠΎΠ»Π½Π°ΡΠ° ΡΡΠ΅Π΄Π°
Π©Π΅ Π½ΠΈ ΡΡΡΠ±Π²Π° ΡΠ»Π΅Π΄Π½ΠΎΡΠΎ:
- Kubernetes
- ΠΠΏΠ΅ΡΠ°ΡΠΎΡ ΠΡΠΎΠΌΠ΅ΡΠ΅ΠΉ
ΠΊΠΎΠ½ΡΠΈΠ³ΡΡΠ°ΡΠΈΡ Π½Π° ΠΈΠ·Π½ΠΎΡΠΈΡΠ΅Π» Π½Π° ΡΠ΅ΡΠ½Π° ΠΊΡΡΠΈΡ
ΠΠΎΠ½ΡΠΈΠ³ΡΡΠΈΡΠ°Π½Π΅ Π½Π° Blackbox ΡΡΠ΅Π· ConfigMap
Π·Π° Π½Π°ΡΡΡΠΎΠΉΠΊΠΈ http
ΠΌΠΎΠ΄ΡΠ» Π·Π° Π½Π°Π±Π»ΡΠ΄Π΅Π½ΠΈΠ΅ Π½Π° ΡΠ΅Π± ΡΡΠ»ΡΠ³ΠΈ.
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-blackbox-exporter
labels:
app: prometheus-blackbox-exporter
data:
blackbox.yaml: |
modules:
http_2xx:
http:
no_follow_redirects: false
preferred_ip_protocol: ip4
valid_http_versions:
- HTTP/1.1
- HTTP/2
valid_status_codes: []
prober: http
timeout: 5s
ΠΠΎΠ΄ΡΠ» http_2xx
ΡΠ΅ ΠΈΠ·ΠΏΠΎΠ»Π·Π²Π° Π·Π° ΠΏΡΠΎΠ²Π΅ΡΠΊΠ° Π΄Π°Π»ΠΈ ΡΠ΅Π± ΡΡΠ»ΡΠ³Π°ΡΠ° Π²ΡΡΡΠ° 2xx HTTP ΡΡΠ°ΡΡΡ ΠΊΠΎΠ΄. ΠΠΎΠ½ΡΠΈΠ³ΡΡΠ°ΡΠΈΡΡΠ° Π½Π° Π΅ΠΊΡΠΏΠΎΡΡΠ΅ΡΠ° Π½Π° ΡΠ΅ΡΠ½Π° ΠΊΡΡΠΈΡ Π΅ ΠΎΠΏΠΈΡΠ°Π½Π° ΠΏΠΎ-ΠΏΠΎΠ΄ΡΠΎΠ±Π½ΠΎ Π²
ΠΠ½Π΅Π΄ΡΠ΅ΡΠ΅ ΠΈΠ½ΡΡΡΡΠΌΠ΅Π½ΡΠ° Π·Π° Π΅ΠΊΡΠΏΠΎΡΡΠΈΡΠ°Π½Π΅ Π½Π° blackbox Π² ΠΊΠ»ΡΡΡΠ΅ΡΠ° Π½Π° Kubernetes
ΠΠΏΠΈΡΠ²Π°ΠΌ Deployment
ΠΈ Service
Π·Π° Π²Π½Π΅Π΄ΡΡΠ²Π°Π½Π΅ Π² Kubernetes.
---
kind: Service
apiVersion: v1
metadata:
name: prometheus-blackbox-exporter
labels:
app: prometheus-blackbox-exporter
spec:
type: ClusterIP
ports:
- name: http
port: 9115
protocol: TCP
selector:
app: prometheus-blackbox-exporter
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-blackbox-exporter
labels:
app: prometheus-blackbox-exporter
spec:
replicas: 1
selector:
matchLabels:
app: prometheus-blackbox-exporter
template:
metadata:
labels:
app: prometheus-blackbox-exporter
spec:
restartPolicy: Always
containers:
- name: blackbox-exporter
image: "prom/blackbox-exporter:v0.15.1"
imagePullPolicy: IfNotPresent
securityContext:
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
args:
- "--config.file=/config/blackbox.yaml"
resources:
{}
ports:
- containerPort: 9115
name: http
livenessProbe:
httpGet:
path: /health
port: http
readinessProbe:
httpGet:
path: /health
port: http
volumeMounts:
- mountPath: /config
name: config
- name: configmap-reload
image: "jimmidyson/configmap-reload:v0.2.2"
imagePullPolicy: "IfNotPresent"
securityContext:
runAsNonRoot: true
runAsUser: 65534
args:
- --volume-dir=/etc/config
- --webhook-url=http://localhost:9115/-/reload
resources:
{}
volumeMounts:
- mountPath: /etc/config
name: config
readOnly: true
volumes:
- name: config
configMap:
name: prometheus-blackbox-exporter
ΠΠ·Π½ΠΎΡΠΈΡΠ΅Π»ΡΡ Π½Π° Blackbox ΠΌΠΎΠΆΠ΅ Π΄Π° Π±ΡΠ΄Π΅ Π²Π½Π΅Π΄ΡΠ΅Π½ ΡΡΡ ΡΠ»Π΅Π΄Π½Π°ΡΠ° ΠΊΠΎΠΌΠ°Π½Π΄Π°. ΠΡΠΎΡΡΡΠ°Π½ΡΡΠ²ΠΎ ΠΎΡ ΠΈΠΌΠ΅Π½Π° monitoring
ΡΠ΅ ΠΎΡΠ½Π°ΡΡ Π΄ΠΎ ΠΎΠΏΠ΅ΡΠ°ΡΠΎΡΠ° Prometheus.
kubectl --namespace=monitoring apply -f blackbox-exporter.yaml
ΠΡΠΎΠ²Π΅ΡΠ΅ΡΠ΅ Π΄Π°Π»ΠΈ Π²ΡΠΈΡΠΊΠΈ ΡΡΠ»ΡΠ³ΠΈ ΡΠ°Π±ΠΎΡΡΡ, ΠΊΠ°ΡΠΎ ΠΈΠ·ΠΏΠΎΠ»Π·Π²Π°ΡΠ΅ ΡΠ»Π΅Π΄Π½Π°ΡΠ° ΠΊΠΎΠΌΠ°Π½Π΄Π°:
kubectl --namespace=monitoring get all --selector=app=prometheus-blackbox-exporter
ΠΡΠΎΠ²Π΅ΡΠΊΠ° Π½Π° ΡΠ΅ΡΠ½Π°ΡΠ° ΠΊΡΡΠΈΡ
ΠΠΎΠΆΠ΅ΡΠ΅ Π΄Π° ΠΏΠΎΠ»ΡΡΠΈΡΠ΅ Π΄ΠΎΡΡΡΠΏ Π΄ΠΎ ΡΠ΅Π± ΠΈΠ½ΡΠ΅ΡΡΠ΅ΠΉΡΠ° Π½Π° Blackbox Exporter Ρ port-forward
:
kubectl --namespace=monitoring port-forward svc/prometheus-blackbox-exporter 9115:9115
Π‘Π²ΡΡΠΆΠ΅ΡΠ΅ ΡΠ΅ Ρ ΡΠ΅Π± ΠΈΠ½ΡΠ΅ΡΡΠ΅ΠΉΡΠ° Π½Π° Blackbox Exporter ΡΡΠ΅Π· ΡΠ΅Π± Π±ΡΠ°ΡΠ·ΡΡ Π½Π°
ΠΠΊΠΎ ΠΎΡΠΈΠ΄Π΅ΡΠ΅ Π½Π°
ΠΠ΅ΡΡΠΈΡΠ½Π° ΡΡΠΎΠΉΠ½ΠΎΡΡ probe_success
ΡΠ°Π²Π½ΠΎ Π½Π° 1 ΠΎΠ·Π½Π°ΡΠ°Π²Π° ΡΡΠΏΠ΅ΡΠ½Π° ΠΏΡΠΎΠ²Π΅ΡΠΊΠ°. Π‘ΡΠΎΠΉΠ½ΠΎΡΡ 0 ΠΏΠΎΠΊΠ°Π·Π²Π° Π³ΡΠ΅ΡΠΊΠ°.
ΠΠ°ΡΡΡΠΎΠΉΠΊΠ° Π½Π° Prometheus
Π‘Π»Π΅Π΄ ΠΊΠ°ΡΠΎ ΠΈΠ½ΡΡΠ°Π»ΠΈΡΠ°ΡΠ΅ ΠΏΡΠΎΠ³ΡΠ°ΠΌΠ°ΡΠ° Π·Π° Π΅ΠΊΡΠΏΠΎΡΡΠΈΡΠ°Π½Π΅ Π½Π° BlackBox, Π½Π°ΡΡΡΠΎΠΉΡΠ΅ Prometheus prometheus-additional.yaml
.
- job_name: 'kube-api-blackbox'
scrape_interval: 1w
metrics_path: /probe
params:
module: [http_2xx]
static_configs:
- targets:
- https://www.google.com
- http://www.example.com
- https://prometheus.io
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: prometheus-blackbox-exporter:9115 # The blackbox exporter.
ΠΠΈΠ΅ Π³Π΅Π½Π΅ΡΠΈΡΠ°ΠΌΠ΅ Secret
ΠΈΠ·ΠΏΠΎΠ»Π·Π²Π°ΠΉΠΊΠΈ ΡΠ»Π΅Π΄Π½Π°ΡΠ° ΠΊΠΎΠΌΠ°Π½Π΄Π°.
PROMETHEUS_ADD_CONFIG=$(cat prometheus-additional.yaml | base64)
cat << EOF | kubectl --namespace=monitoring apply -f -
apiVersion: v1
kind: Secret
metadata:
name: additional-scrape-configs
type: Opaque
data:
prometheus-additional.yaml: $PROMETHEUS_ADD_CONFIG
EOF
ΠΠΎΡΠΎΡΠ²Π°ΠΌΠ΅ additional-scrape-configs
Π·Π° ΠΈΠ·ΠΏΠΎΠ»Π·Π²Π°Π½Π΅ Π½Π° Prometheus Operator additionalScrapeConfigs
.
kubectl --namespace=monitoring edit prometheuses k8s
...
spec:
additionalScrapeConfigs:
key: prometheus-additional.yaml
name: additional-scrape-configs
ΠΡΠΈΠ²Π°ΠΌΠ΅ Π² ΡΠ΅Π± ΠΈΠ½ΡΠ΅ΡΡΠ΅ΠΉΡΠ° Π½Π° Prometheus, ΠΏΡΠΎΠ²Π΅ΡΡΠ²Π°ΠΌΠ΅ ΠΏΠΎΠΊΠ°Π·Π°ΡΠ΅Π»ΠΈΡΠ΅ ΠΈ ΡΠ΅Π»ΠΈΡΠ΅.
kubectl --namespace=monitoring port-forward svc/prometheus-k8s 9090:9090
ΠΠΈΠΆΠ΄Π°ΠΌΠ΅ ΠΏΠΎΠΊΠ°Π·Π°ΡΠ΅Π»ΠΈΡΠ΅ ΠΈ ΡΠ΅Π»ΠΈΡΠ΅ Π½Π° Blackbox.
ΠΠΎΠ±Π°Π²ΡΠ½Π΅ Π½Π° ΠΏΡΠ°Π²ΠΈΠ»Π° Π·Π° ΠΈΠ·Π²Π΅ΡΡΠΈΡ (ΡΠΈΠ³Π½Π°Π»)
ΠΠ° Π΄Π° ΠΏΠΎΠ»ΡΡΠ°Π²Π°ΡΠ΅ ΠΈΠ·Π²Π΅ΡΡΠΈΡ ΠΎΡ Blackbox Π΅ΠΊΡΠΏΠΎΡΡΠ΅ΡΠ°, Π½Π΅ΠΊΠ° Π΄ΠΎΠ±Π°Π²ΠΈΠΌ ΠΏΡΠ°Π²ΠΈΠ»Π° ΠΊΡΠΌ Prometheus Operator.
kubectl --namespace=monitoring edit prometheusrules prometheus-k8s-rules
...
- name: blackbox-exporter
rules:
- alert: ProbeFailed
expr: probe_success == 0
for: 5m
labels:
severity: error
annotations:
summary: "Probe failed (instance {{ $labels.instance }})"
description: "Probe failedn VALUE = {{ $value }}n LABELS: {{ $labels }}"
- alert: SlowProbe
expr: avg_over_time(probe_duration_seconds[1m]) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "Slow probe (instance {{ $labels.instance }})"
description: "Blackbox probe took more than 1s to completen VALUE = {{ $value }}n LABELS: {{ $labels }}"
- alert: HttpStatusCode
expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
for: 5m
labels:
severity: error
annotations:
summary: "HTTP Status Code (instance {{ $labels.instance }})"
description: "HTTP status code is not 200-399n VALUE = {{ $value }}n LABELS: {{ $labels }}"
- alert: SslCertificateWillExpireSoon
expr: probe_ssl_earliest_cert_expiry - time() < 86400 * 30
for: 5m
labels:
severity: warning
annotations:
summary: "SSL certificate will expire soon (instance {{ $labels.instance }})"
description: "SSL certificate expires in 30 daysn VALUE = {{ $value }}n LABELS: {{ $labels }}"
- alert: SslCertificateHasExpired
expr: probe_ssl_earliest_cert_expiry - time() <= 0
for: 5m
labels:
severity: error
annotations:
summary: "SSL certificate has expired (instance {{ $labels.instance }})"
description: "SSL certificate has expired alreadyn VALUE = {{ $value }}n LABELS: {{ $labels }}"
- alert: HttpSlowRequests
expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "HTTP slow requests (instance {{ $labels.instance }})"
description: "HTTP request took more than 1sn VALUE = {{ $value }}n LABELS: {{ $labels }}"
- alert: SlowPing
expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "Slow ping (instance {{ $labels.instance }})"
description: "Blackbox ping took more than 1sn VALUE = {{ $value }}n LABELS: {{ $labels }}"
Π ΡΠ΅Π± ΠΈΠ½ΡΠ΅ΡΡΠ΅ΠΉΡΠ° Π½Π° Prometheus ΠΎΡΠΈΠ΄Π΅ΡΠ΅ Π½Π° Status => Rules ΠΈ Π½Π°ΠΌΠ΅ΡΠ΅ΡΠ΅ ΠΏΡΠ°Π²ΠΈΠ»Π°ΡΠ° Π·Π° ΠΏΡΠ΅Π΄ΡΠΏΡΠ΅ΠΆΠ΄Π΅Π½ΠΈΠ΅ Π·Π° blackbox-exporter.
ΠΠΎΠ½ΡΠΈΠ³ΡΡΠΈΡΠ°Π½Π΅ Π½Π° ΠΈΠ·Π²Π΅ΡΡΠΈΡ Π·Π° ΠΈΠ·ΡΠΈΡΠ°Π½Π΅ Π½Π° SSL ΡΠ΅ΡΡΠΈΡΠΈΠΊΠ°Ρ Π½Π° Kubernetes API ΡΡΡΠ²ΡΡ
ΠΠ΅ΠΊΠ° Π΄Π° ΠΊΠΎΠ½ΡΠΈΠ³ΡΡΠΈΡΠ°ΠΌΠ΅ ΠΌΠΎΠ½ΠΈΡΠΎΡΠΈΠ½Π³ Π½Π° ΠΈΠ·ΡΠΈΡΠ°Π½Π΅ΡΠΎ Π½Π° SSL ΡΠ΅ΡΡΠΈΡΠΈΠΊΠ°ΡΠ° Π½Π° Kubernetes API Server. Π’ΠΎΠΉ ΡΠ΅ ΠΈΠ·ΠΏΡΠ°ΡΠ° ΠΈΠ·Π²Π΅ΡΡΠΈΡ Π²Π΅Π΄Π½ΡΠΆ ΡΠ΅Π΄ΠΌΠΈΡΠ½ΠΎ.
ΠΠΎΠ±Π°Π²ΡΠ½Π΅ Π½Π° ΠΌΠΎΠ΄ΡΠ»Π° Π·Π° Π΅ΠΊΡΠΏΠΎΡΡΠΈΡΠ°Π½Π΅ Π½Π° Blackbox Π·Π° Kubernetes API Server Authentication.
kubectl --namespace=monitoring edit configmap prometheus-blackbox-exporter
...
kube-api:
http:
method: GET
no_follow_redirects: false
preferred_ip_protocol: ip4
tls_config:
insecure_skip_verify: false
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
valid_http_versions:
- HTTP/1.1
- HTTP/2
valid_status_codes: []
prober: http
timeout: 5s
ΠΠΎΠ±Π°Π²ΡΠ½Π΅ Π½Π° ΠΊΠΎΠ½ΡΠΈΠ³ΡΡΠ°ΡΠΈΡΡΠ° Π·Π° ΠΈΠ·ΡΡΠΈΠ²Π°Π½Π΅ Π½Π° Prometheus
- job_name: 'kube-api-blackbox'
metrics_path: /probe
params:
module: [kube-api]
static_configs:
- targets:
- https://kubernetes.default.svc/api
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: prometheus-blackbox-exporter:9115 # The blackbox exporter.
ΠΡΠΈΠ»ΠΎΠΆΠ΅ΡΠ΅ Prometheus Secret
PROMETHEUS_ADD_CONFIG=$(cat prometheus-additional.yaml | base64)
cat << EOF | kubectl --namespace=monitoring apply -f -
apiVersion: v1
kind: Secret
metadata:
name: additional-scrape-configs
type: Opaque
data:
prometheus-additional.yaml: $PROMETHEUS_ADD_CONFIG
EOF
ΠΠΎΠ±Π°Π²ΡΠ½Π΅ Π½Π° ΠΏΡΠ°Π²ΠΈΠ»Π° Π·Π° ΠΏΡΠ΅Π΄ΡΠΏΡΠ΅ΠΆΠ΄Π΅Π½ΠΈΠ΅
kubectl --namespace=monitoring edit prometheusrules prometheus-k8s-rules
...
- name: k8s-api-server-cert-expiry
rules:
- alert: K8sAPIServerSSLCertExpiringAfterThreeMonths
expr: probe_ssl_earliest_cert_expiry{job="kube-api-blackbox"} - time() < 86400 * 90
for: 1w
labels:
severity: warning
annotations:
summary: "Kubernetes API Server SSL certificate will expire after three months (instance {{ $labels.instance }})"
description: "Kubernetes API Server SSL certificate expires in 90 daysn VALUE = {{ $value }}n LABELS: {{ $labels }}"
ΠΠΎΠ»Π΅Π·Π½ΠΈ Π²ΡΡΠ·ΠΊΠΈ
ΠΠ·ΡΠΎΡΠ½ΠΈΠΊ: www.habr.com