Logging expansion

Adds Promtail and Loki to the metrics stack, configuring them as a datasource in grafana on deployment
Adds a dashboard to monitor the system (logs for producer and consumer)
Restructures the base directory so deployment manifests are moved into a deployments folder
Adds selective labels to producer, consumer and addon so that their logs are ingested by promtail
This commit is contained in:
iPromKnight
2024-02-04 19:50:28 +00:00
parent eef3265850
commit b773ffcb20
13 changed files with 749 additions and 41 deletions

View File

@@ -0,0 +1,39 @@
# General environment variables
TZ=London/Europe
# PostgreSQL
POSTGRES_HOST=postgres
POSTGRES_PORT=5432
POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_DB=knightcrawler
# MongoDB
MONGODB_HOST=mongodb
MONGODB_PORT=27017
MONGODB_DB=knightcrawler
MONGO_INITDB_ROOT_USERNAME=mongo
MONGO_INITDB_ROOT_PASSWORD=mongo
# Addon
DEBUG_MODE=false
# Consumer
RABBIT_URI=amqp://guest:guest@rabbitmq:5672/?heartbeat=30
QUEUE_NAME=ingested
JOB_CONCURRENCY=5
JOBS_ENABLED=true
MAX_SINGLE_TORRENT_CONNECTIONS=10
TORRENT_TIMEOUT=30000
UDP_TRACKERS_ENABLED=true
# Producer
RabbitMqConfiguration__Host=rabbitmq
RabbitMqConfiguration__QueueName=ingested
RabbitMqConfiguration__Username=guest
RabbitMqConfiguration__Password=guest
RabbitMqConfiguration__Durable=true
RabbitMqConfiguration__MaxQueueSize=0
RabbitMqConfiguration__MaxPublishBatchSize=500
RabbitMqConfiguration__PublishIntervalInSeconds=10
GithubSettings__PAT=

View File

@@ -0,0 +1,8 @@
apiVersion: 1
providers:
- name: Knightcrawler
folder: Dashboards
type: file
options:
path: /var/lib/grafana/dashboards

View File

@@ -0,0 +1,581 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Logs for Producer and Consumer",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 4,
"panels": [],
"title": "Consumer Data",
"type": "row"
},
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 0,
"fieldMinMax": false,
"mappings": [],
"min": 0,
"noValue": "0",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
},
"unit": "none",
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 1
},
"id": 1,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto",
"text": {}
},
"pluginVersion": "10.3.1",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"editorMode": "builder",
"expr": "sum(count_over_time({container=~\"knightcrawler-consumer-[0-9]+\"} | json | __error__=`` | level > 30 [15m]))",
"queryType": "range",
"refId": "ConsumerErrorsLast15m"
}
],
"title": "Errors - Last 15m",
"transparent": true,
"type": "gauge"
},
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 1
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "10.3.1",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"editorMode": "code",
"expr": "sum(count_over_time({container=~\"knightcrawler-consumer-[0-9]+\"} | json | __error__=`` | level = `30` [15m]))",
"queryType": "range",
"refId": "InfoLogsLast15mSum"
}
],
"title": "Info Logs - Last 15m",
"transparent": true,
"type": "stat"
},
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "left",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"inspect": false,
"minWidth": 50
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unitScale": true
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "level"
},
"properties": [
{
"id": "custom.width",
"value": 100
},
{
"id": "custom.align",
"value": "center"
}
]
},
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.width",
"value": 200
}
]
}
]
},
"gridPos": {
"h": 12,
"w": 24,
"x": 0,
"y": 6
},
"id": 5,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "10.3.1",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"editorMode": "code",
"expr": "{container=~\"knightcrawler-consumer-[0-9]+\"} | json | __error__=``",
"queryType": "range",
"refId": "A"
}
],
"title": "Log entries",
"transformations": [
{
"id": "extractFields",
"options": {
"format": "json",
"jsonPaths": [
{
"alias": "level",
"path": "level"
},
{
"alias": "Message",
"path": "msg"
}
],
"keepTime": true,
"replace": true,
"source": "Line"
}
}
],
"type": "table"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 18
},
"id": 6,
"panels": [],
"title": "Producer Data",
"type": "row"
},
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 0,
"fieldMinMax": false,
"mappings": [],
"min": 0,
"noValue": "0",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
},
"unit": "none",
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 19
},
"id": 7,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto",
"text": {}
},
"pluginVersion": "10.3.1",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"editorMode": "code",
"expr": "sum(\n count_over_time({container=~\"knightcrawler-producer-[0-9]+\"} |= \"[Warning]\" [15m])\n) \n+\nsum(\n count_over_time({container=~\"knightcrawler-producer-[0-9]+\"} |= \"[Error]\" [15m])\n)",
"queryType": "range",
"refId": "ConsumerErrorsLast15m"
}
],
"title": "Errors and Warnings - Last 15m",
"transparent": true,
"type": "gauge"
},
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 19
},
"id": 8,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "10.3.1",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"editorMode": "code",
"expr": "sum(\n count_over_time({container=~\"knightcrawler-producer-[0-9]+\"} |= \"[Information]\" [15m])\n) ",
"queryType": "range",
"refId": "InfoLogsLast15mSum"
}
],
"title": "Info Logs - Last 15m",
"transparent": true,
"type": "stat"
},
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "left",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"inspect": false,
"minWidth": 50
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unitScale": true
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "labels"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "tsNs"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "id"
},
"properties": [
{
"id": "custom.hidden",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.width",
"value": 200
}
]
}
]
},
"gridPos": {
"h": 12,
"w": 24,
"x": 0,
"y": 24
},
"id": 9,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"showHeader": true
},
"pluginVersion": "10.3.1",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "loki"
},
"editorMode": "code",
"expr": "{container=~\"knightcrawler-producer-[0-9]+\"}",
"queryType": "range",
"refId": "A"
}
],
"title": "Log entries",
"transformations": [],
"type": "table"
}
],
"refresh": "10s",
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Knightcrawler Logs",
"uid": "knightcrawler-logs",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,12 @@
apiVersion: 1
datasources:
- name: Loki
uid: loki
type: loki
access: proxy
orgId: 1
url: http://loki:3100
basicAuth: false
isDefault: false
version: 1
editable: true

View File

@@ -0,0 +1,11 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
orgId: 1
url: "http://prometheus:9090"
basicAuth: false
isDefault: true
editable: true

View File

@@ -0,0 +1,35 @@
auth_enabled: false
server:
http_listen_port: 3100
limits_config:
split_queries_by_interval: 24h
max_query_parallelism: 100
query_scheduler:
max_outstanding_requests_per_tenant: 4096
frontend:
max_outstanding_per_tenant: 4096
common:
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h

View File

@@ -0,0 +1,22 @@
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
scrape_configs:
- job_name: "rabbitmq"
static_configs:
- targets: ["rabbitmq:15692"]
- job_name: "postgres-exporter"
static_configs:
- targets: ["postgres-exporter:9187"]

View File

@@ -0,0 +1,27 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: http://loki:3100/loki/api/v1/push
tenant_id: knightcrawler
scrape_configs:
- job_name: scraper
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 15s
filters:
- name: label
values: ["logging=promtail","com.docker.compose.project=knightcrawler"]
relabel_configs:
- source_labels: ["__meta_docker_container_name"]
regex: "/(.*)"
target_label: "container"
- source_labels: ["__meta_docker_container_log_stream"]
target_label: "logstream"
- source_labels: ["__meta_docker_container_label_logging_jobname"]
target_label: "job"

View File

@@ -0,0 +1,74 @@
version: '3.8'
name: knightcrawler-metrics
services:
prometheus:
image: prom/prometheus:v2.20.1
volumes:
- ./config/prometheus/config.yml:/etc/prometheus/prometheus.yml
command:
- '--config.file=/etc/prometheus/prometheus.yml'
ports:
- "9090:9090"
networks:
- knightcrawler-network
grafana:
image: grafana/grafana:latest
volumes:
- ./config/grafana/datasources:/etc/grafana/provisioning/datasources
- ./config/grafana/dashboards/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml
- ./config/grafana/dashboards/logs.json:/var/lib/grafana/dashboards/logs.json
- grafana-data:/var/lib/grafana
ports:
- "3000:3000"
environment:
- GF_PATHS_PROVISIONING=/etc/grafana/provisioning
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin_password
depends_on:
- prometheus
networks:
- knightcrawler-network
postgres-exporter:
image: prometheuscommunity/postgres-exporter
env_file:
- .env
environment:
DATA_SOURCE_NAME: "postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}?sslmode=disable"
networks:
- knightcrawler-network
promtail:
image: grafana/promtail:2.9.4
volumes:
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock
- ./config/promtail/config.yml:/etc/promtail/config.yml
command: -config.file=/etc/promtail/config.yml
depends_on:
- prometheus
- loki
networks:
- knightcrawler-network
loki:
image: grafana/loki:2.9.4
command: -config.file=/etc/loki/local-config.yml
depends_on:
- prometheus
- grafana
volumes:
- loki-data:/loki
- ./config/loki/config.yml:/etc/loki/local-config.yml
networks:
- knightcrawler-network
volumes:
loki-data:
grafana-data:
networks:
knightcrawler-network:
external: true

View File

@@ -0,0 +1,131 @@
version: "3.8"
name: knightcrawler
x-restart: &restart-policy "unless-stopped"
x-basehealth: &base-health
interval: 10s
timeout: 10s
retries: 3
start_period: 10s
x-rabbithealth: &rabbitmq-health
test: rabbitmq-diagnostics -q ping
<<: *base-health
x-mongohealth: &mongodb-health
test: ["CMD", "mongosh", "--eval", "db.adminCommand('ping')"]
<<: *base-health
x-postgreshealth: &postgresdb-health
test: pg_isready
<<: *base-health
x-apps: &knightcrawler-app
depends_on:
mongodb:
condition: service_healthy
postgres:
condition: service_healthy
rabbitmq:
condition: service_healthy
restart: *restart-policy
services:
postgres:
image: postgres:latest
env_file:
- .env
environment:
PGUSER: postgres # needed for healthcheck.
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, change the username and password in the .env file.
# # If you want to enhance your security even more, create a new user for the database with a strong password.
# ports:
# - "5432:5432"
volumes:
- postgres:/var/lib/postgresql/data
healthcheck: *postgresdb-health
restart: *restart-policy
networks:
- knightcrawler-network
mongodb:
image: mongo:latest
env_file:
- .env
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, change the username and password in the .env file.
# ports:
# - "27017:27017"
volumes:
- mongo:/data/db
restart: *restart-policy
healthcheck: *mongodb-health
networks:
- knightcrawler-network
rabbitmq:
image: rabbitmq:3-management
# # If you need the database to be accessible from outside, please open the below port.
# # Furthermore, please, please, please, look at the documentation for rabbit on how to secure the service.
# ports:
# - "5672:5672"
# - "15672:15672"
# - "15692:15692"
volumes:
- rabbitmq:/var/lib/rabbitmq
restart: *restart-policy
healthcheck: *rabbitmq-health
networks:
- knightcrawler-network
producer:
build:
context: src/producer
dockerfile: Dockerfile
labels:
logging: "promtail"
env_file:
- .env
<<: *knightcrawler-app
networks:
- knightcrawler-network
consumer:
build:
context: src/node/consumer
dockerfile: Dockerfile
env_file:
- .env
labels:
logging: "promtail"
deploy:
replicas: 3
<<: *knightcrawler-app
networks:
- knightcrawler-network
addon:
build:
context: src/node/addon
dockerfile: Dockerfile
ports:
- "7000:7000"
labels:
logging: "promtail"
env_file:
- .env
<<: *knightcrawler-app
networks:
- knightcrawler-network
networks:
knightcrawler-network:
driver: bridge
name: knightcrawler-network
volumes:
postgres:
mongo:
rabbitmq: