Π ΡΡΠΎΠΉ ΡΡΠ°ΡΡΠ΅ Π±ΡΠ΄Π΅Ρ ΡΠ°ΡΡΠΌΠ°ΡΡΠΈΠ²Π°ΡΡΡΡ ΠΏΡΠΎΠ΅ΠΊΡ
ΠΠ»Ρ ΠΏΡΠΎΡΠΌΠΎΡΡΠ° Π°Π½Π°Π»ΠΈΡΠΈΠΊΠΈ ΠΏΠΎ Π»ΠΎΠ³Π°ΠΌ ΡΠΎΠ·Π΄Π°Π΄ΠΈΠΌ Π΄Π°ΡΠ±ΠΎΡΠ΄ Π΄Π»Ρ Grafana.
ΠΠΎΠΌΡ ΠΈΠ½ΡΠ΅ΡΠ΅ΡΠ½ΠΎ, Π΄ΠΎΠ±ΡΠΎ ΠΏΠΎΠΆΠ°Π»ΠΎΠ²Π°ΡΡ ΠΏΠΎΠ΄ ΠΊΠ°Ρ.
Π£ΡΡΠ°Π½Π°Π²Π»ΠΈΠ²Π°Π΅ΠΌ nginx, grafana ΡΡΠ°Π½Π΄Π°ΡΡΠ½ΡΠΌ ΡΠΏΠΎΡΠΎΠ±ΠΎΠΌ.
Π£ΡΡΠ°Π½Π°Π²Π»ΠΈΠ²Π°Π΅ΠΌ ΠΊΠ»Π°ΡΡΠ΅Ρ clickhouse Ρ ΠΏΠΎΠΌΠΎΡΡΡ ansible-playbook ΠΎΡ
Π‘ΠΎΠ·Π΄Π°Π½ΠΈΠ΅ Π±Π΄ ΠΈ ΡΠ°Π±Π»ΠΈΡ Π² Clickhouse
Π ΡΡΠΎΠΌ
ΠΠ°ΠΆΠ΄ΡΠΉ Π·Π°ΠΏΡΠΎΡ Π΄Π΅Π»Π°Π΅ΠΌ ΠΏΠΎΠΎΡΠ΅ΡΠ΅Π΄Π½ΠΎ Π½Π° ΠΊΠ°ΠΆΠ΄ΠΎΠΌ ΡΠ΅ΡΠ²Π΅ΡΠ΅ ΠΊΠ»Π°ΡΡΠ΅ΡΠ° Clickhouse.
ΠΠ°ΠΆΠ½ΠΎΠ΅ Π·Π°ΠΌΠ΅ΡΠ°Π½ΠΈΠ΅. Π ΡΡΠΎΠΉ ΡΡΡΠΎΠΊΠ΅ logs_cluster Π½ΡΠΆΠ½ΠΎ Π·Π°ΠΌΠ΅Π½ΠΈΡΡ Π½Π° Π²Π°ΡΠ΅ Π½Π°Π·Π²Π°Π½ΠΈΠ΅ ΠΊΠ»Π°ΡΡΠ΅ΡΠ° ΠΈΠ· ΡΠ°ΠΉΠ»Π° clickhouse_remote_servers.xml ΠΌΠ΅ΠΆΠ΄Ρ "remote_servers" and "shard".
ENGINE = Distributed('logs_cluster', 'nginx', 'access_log_shard', rand())
Π£ΡΡΠ°Π½Π°Π²Π»ΠΈΠ²ΠΊΠ° ΠΈ Π½Π°ΡΡΡΠΎΠΉΠΊΠ° nginx-log-collector-rpm
Nginx-log-collector Π½Π΅ ΠΈΠΌΠ΅Π΅Ρ rpm. ΠΠ΄Π΅ΡΡ
Π£ΡΡΠ°Π½Π°Π²Π»ΠΈΠ²Π°Π΅ΠΌ rpm ΠΏΠ°ΠΊΠ΅Ρ nginx-log-collector-rpm
yum -y install yum-plugin-copr
yum copr enable antonpatsev/nginx-log-collector-rpm
yum -y install nginx-log-collector
systemctl start nginx-log-collector
ΠΡΠ°Π²ΠΈΠΌ ΠΊΠΎΠ½ΡΠΈΠ³ /etc/nginx-log-collector/config.yaml:
.......
upload:
table: nginx.access_log
dsn: http://ip-Π°Π΄ΡΠ΅Ρ-ΠΊΠ»Π°ΡΡΠ΅ΡΠ°-clickhouse:8123/
- tag: "nginx_error:"
format: error # access | error
buffer_size: 1048576
upload:
table: nginx.error_log
dsn: http://ip-Π°Π΄ΡΠ΅Ρ-ΠΊΠ»Π°ΡΡΠ΅ΡΠ°-clickhouse:8123/
ΠΠ°ΡΡΡΠΎΠΉΠΊΠ° nginx
ΠΠ±ΡΠΈΠΉ ΠΊΠΎΠ½ΡΠΈΠ³ nginx:
user nginx;
worker_processes auto;
#error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
log_format avito_json escape=json
'{'
'"event_datetime": "$time_iso8601", '
'"server_name": "$server_name", '
'"remote_addr": "$remote_addr", '
'"remote_user": "$remote_user", '
'"http_x_real_ip": "$http_x_real_ip", '
'"status": "$status", '
'"scheme": "$scheme", '
'"request_method": "$request_method", '
'"request_uri": "$request_uri", '
'"server_protocol": "$server_protocol", '
'"body_bytes_sent": $body_bytes_sent, '
'"http_referer": "$http_referer", '
'"http_user_agent": "$http_user_agent", '
'"request_bytes": "$request_length", '
'"request_time": "$request_time", '
'"upstream_addr": "$upstream_addr", '
'"upstream_response_time": "$upstream_response_time", '
'"hostname": "$hostname", '
'"host": "$host"'
'}';
access_log syslog_server=unix:/var/run/nginx_log.sock,nohostname,tag=nginx avito_json; #ClickHouse
error_log syslog_server=unix:/var/run/nginx_log.sock,nohostname,tag=nginx_error; #ClickHouse
#access_log /var/log/nginx/access.log main;
proxy_ignore_client_abort on;
sendfile on;
keepalive_timeout 65;
include /etc/nginx/conf.d/*.conf;
}
ΠΠΈΡΡΡΠ°Π»ΡΠ½ΡΠΉ Ρ ΠΎΡΡ ΠΎΠ΄ΠΈΠ½:
vhost1.conf:
upstream backend {
server ip-Π°Π΄ΡΠ΅Ρ-ΡΠ΅ΡΠ²Π΅ΡΠ°-Ρ-stub_http_server:8080;
server ip-Π°Π΄ΡΠ΅Ρ-ΡΠ΅ΡΠ²Π΅ΡΠ°-Ρ-stub_http_server:8080;
server ip-Π°Π΄ΡΠ΅Ρ-ΡΠ΅ΡΠ²Π΅ΡΠ°-Ρ-stub_http_server:8080;
server ip-Π°Π΄ΡΠ΅Ρ-ΡΠ΅ΡΠ²Π΅ΡΠ°-Ρ-stub_http_server:8080;
server ip-Π°Π΄ΡΠ΅Ρ-ΡΠ΅ΡΠ²Π΅ΡΠ°-Ρ-stub_http_server:8080;
}
server {
listen 80;
server_name vhost1;
location / {
proxy_pass http://backend;
}
}
ΠΠΎΠ±Π°Π²Π»ΡΠ΅ΠΌ Π² ΡΠ°ΠΉΠ» /etc/hosts Π²ΠΈΡΡΡΠ°Π»ΡΠ½ΡΠ΅ Ρ ΠΎΡΡΡ:
ip-Π°Π΄ΡΠ΅Ρ-ΡΠ΅ΡΠ²Π΅ΡΠ°-Ρ-nginx vhost1
ΠΠΌΡΠ»ΡΡΠΎΡ HTTP ΡΠ΅ΡΠ²Π΅ΡΠ°
Π ΠΊΠ°ΡΠ΅ΡΡΠ²Π΅ ΡΠΌΡΠ»ΡΡΠΎΡΠ° HTTP ΡΠ΅ΡΠ²Π΅ΡΠ° Π±ΡΠ΄Π΅ΠΌ ΠΈΡΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°ΡΡ
Nodejs-stub-server Π½Π΅ ΠΈΠΌΠ΅Π΅Ρ rpm. ΠΠ΄Π΅ΡΡ
Π£ΡΡΠ°Π½Π°Π²Π»ΠΈΠ²Π°Π΅ΠΌ Π½Π° upstream nginx rpm ΠΏΠ°ΠΊΠ΅Ρ nodejs-stub-server
yum -y install yum-plugin-copr
yum copr enable antonpatsev/nodejs-stub-server
yum -y install stub_http_server
systemctl start stub_http_server
ΠΠ°Π³ΡΡΠ·ΠΎΡΠ½ΠΎΠ΅ ΡΠ΅ΡΡΠΈΡΠΎΠ²Π°Π½ΠΈΠ΅
Π’Π΅ΡΡΠΈΡΠΎΠ²Π°Π½ΠΈΠ΅ ΠΏΡΠΎΠ²ΠΎΠ΄ΠΈΠΌ Ρ ΠΏΠΎΠΌΠΎΡΡΡ Apache benchmark.
Π£ΡΡΠ°Π½Π°Π²Π»ΠΈΠ²Π°Π΅ΠΌ Π΅Π³ΠΎ:
yum install -y httpd-tools
ΠΠ°ΠΏΡΡΠΊΠ°Π΅ΠΌ ΡΠ΅ΡΡΠΈΡΠΎΠ²Π°Π½ΠΈΠ΅ Ρ ΠΏΠΎΠΌΠΎΡΡΡ Apache benchmark c 5 ΡΠ°Π·Π½ΡΡ ΡΠ΅ΡΠ²Π΅ΡΠΎΠ²:
while true; do ab -H "User-Agent: 1server" -c 10 -n 10 -t 10 http://vhost1/; sleep 1; done
while true; do ab -H "User-Agent: 2server" -c 10 -n 10 -t 10 http://vhost1/; sleep 1; done
while true; do ab -H "User-Agent: 3server" -c 10 -n 10 -t 10 http://vhost1/; sleep 1; done
while true; do ab -H "User-Agent: 4server" -c 10 -n 10 -t 10 http://vhost1/; sleep 1; done
while true; do ab -H "User-Agent: 5server" -c 10 -n 10 -t 10 http://vhost1/; sleep 1; done
ΠΠ°ΡΡΡΠΎΠΉΠΊΠ° Grafana
ΠΠ° ΠΎΡΠΈΡΠΈΠ°Π»ΡΠ½ΠΎΠΌ ΡΠ°ΠΉΡΠ΅ Grafana Π²Ρ Π½Π΅ Π½Π°ΠΉΠ΄Π΅ΡΠ΅ Π΄Π°ΡΠ±ΠΎΡΠ΄.
ΠΠΎΡΡΠΎΠΌΡ Π±ΡΠ΄Π΅ΠΌ Π΄Π΅Π»Π°ΡΡ Π΅Π³ΠΎ Π²ΡΡΡΡΡ.
ΠΠΎΠΉ ΡΠΎΡ
ΡΠ°Π½Π΅Π½Π½ΡΠΉ Π΄Π°ΡΠ±ΠΎΡΠ΄ Π²Ρ ΠΌΠΎΠΆΠ΅ΡΠ΅ Π½Π°ΠΉΡΠΈ
Π’Π°ΠΊ ΠΆΠ΅ Π²Π°ΠΌ Π½ΡΠΆΠ½ΠΎ ΡΠΎΠ·Π΄Π°ΡΡ ΠΏΠ΅ΡΠ΅ΠΌΠ΅Π½Π½ΡΡ table Ρ ΡΠΎΠ΄Π΅ΡΠΆΠΈΠΌΡΠΌ nginx.access_log
.
Singlestat Total Requests:
SELECT
1 as t,
count(*) as c
FROM $table
WHERE $timeFilter GROUP BY t
Singlestat Failed Requests:
SELECT
1 as t,
count(*) as c
FROM $table
WHERE $timeFilter AND status NOT IN (200, 201, 401) GROUP BY t
Singlestat Failing Percent:
SELECT
1 as t, (sum(status = 500 or status = 499)/sum(status = 200 or status = 201 or status = 401))*100 FROM $table
WHERE $timeFilter GROUP BY t
Singlestat Avg Response Time:
SELECT
1, avg(request_time) FROM $table
WHERE $timeFilter GROUP BY 1
Singlestat Max Response Time:
SELECT
1 as t, max(request_time) as c
FROM $table
WHERE $timeFilter GROUP BY t
Count Status:
$columns(status, count(*) as c) from $table
ΠΠ»Ρ Π²ΡΠ²ΠΎΠ΄Π° Π΄Π°Π½Π½ΡΡ ΠΊΠ°ΠΊ ΠΏΠΈΡΠΎΠ³, Π½ΡΠΆΠ½ΠΎ ΡΡΡΠ°Π½ΠΎΠ²ΠΈΡΡ ΠΏΠ»Π°Π³ΠΈΠ½ ΠΈ ΠΏΠ΅ΡΠ΅Π·Π°Π³ΡΡΠ·ΠΈΡΡ grafana.
grafana-cli plugins install grafana-piechart-panel
service grafana-server restart
Pie TOP 5 Status:
SELECT
1, /* fake timestamp value */
status,
sum(status) AS Reqs
FROM $table
WHERE $timeFilter
GROUP BY status
ORDER BY Reqs desc
LIMIT 5
ΠΠ°Π»ΡΡΠ΅ Π±ΡΠ΄Ρ ΠΏΡΠΈΠ²ΠΎΠ΄ΠΈΡΡ Π·Π°ΠΏΡΠΎΡΡ Π±Π΅Π· ΡΠΊΡΠΈΠ½ΡΠΎΡΠΎΠ²:
Count http_user_agent:
$columns(http_user_agent, count(*) c) FROM $table
GoodRate/BadRate:
$rate(countIf(status = 200) AS good, countIf(status != 200) AS bad) FROM $table
Response Timing:
$rate(avg(request_time) as request_time) FROM $table
Upstream response time (Π²ΡΠ΅ΠΌΡ ΠΎΡΠ²Π΅ΡΠ° 1-Π³ΠΎ upstream):
$rate(avg(arrayElement(upstream_response_time,1)) as upstream_response_time) FROM $table
Table Count Status for all vhost:
$columns(status, count(*) as c) from $table
ΠΠ±ΡΠΈΠΉ Π²ΠΈΠ΄ Π΄Π°ΡΠ±ΠΎΡΠ΄Π°
Π‘ΡΠ°Π²Π½Π΅Π½ΠΈΠ΅ avg() ΠΈ quantile()
avg()
quantile()
ΠΡΠ²ΠΎΠ΄:
ΠΠ°Π΄Π΅ΡΡΡ, ΡΠΎΠΎΠ±ΡΠ΅ΡΡΠ²ΠΎ ΠΏΠΎΠ΄ΠΊΠ»ΡΡΠΈΡΡΡ ΠΊ ΡΠ°Π·ΡΠ°Π±ΠΎΡΠΊΠ΅/ΡΠ΅ΡΡΠΈΡΠΎΠ²Π°Π½ΠΈΡ ΠΈ ΠΈΡΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°Π½ΠΈΡ nginx-log-collector.
Π ΠΊΡΠΎ-Π½ΠΈΠ±ΡΠ΄Ρ ΠΊΠΎΠ³Π΄Π° Π²Π½Π΅Π΄ΡΠΈΡ nginx-log-collector ΡΠ°ΡΡΠΊΠ°ΠΆΠ΅Ρ ΡΠΊΠΎΠ»ΡΠΊΠΎ ΡΡΠΊΠΎΠ½ΠΎΠΌΠΈΠ» Π΄ΠΈΡΠΊΠ°, ΠΠΠ£, Π¦ΠΠ£.
Telegram ΠΊΠ°Π½Π°Π»Ρ:
Clickhouse Nginx Π¦Π΅ΡΠΊΠΎΠ²Ρ ΠΌΠ΅ΡΡΠΈΠΊ Π‘Π±ΠΎΡ ΠΈ Π°Π½Π°Π»ΠΈΡΠΈΠΊΠ° ΡΠΈΡΡΠ΅ΠΌΠ½ΡΡ ΡΠΎΠΎΠ±ΡΠ΅Π½ΠΈΠΉ Grafana
ΠΠΈΠ»Π»ΠΈΡΠ΅ΠΊΡΠ½Π΄Ρ:
ΠΠΎΠΌΡ Π²Π°ΠΆΠ½Ρ ΠΌΠΈΠ»Π»ΠΈΡΠ΅ΠΊΡΠ½Π΄Ρ, Π½Π°ΠΏΠΈΡΠΈΡΠ΅ ΠΈΠ»ΠΈ ΠΏΡΠΎΠ³ΠΎΠ»ΠΎΡΡΠΉΡΠ΅, ΠΏΠΎΠΆΠ°Π»ΡΠΉΡΡΠ°, Π² ΡΡΠΎΠΌ
ΠΡΡΠΎΡΠ½ΠΈΠΊ: habr.com