409 lines
13 KiB
YAML
409 lines
13 KiB
YAML
# ==============================================================================
|
|
# AfterWork API - Configuration Monitoring pour Lions Infrastructure
|
|
# ==============================================================================
|
|
# Cette configuration intègre l'application avec:
|
|
# - Prometheus (https://prometheus.lions.dev) - scraping auto via annotations
|
|
# - Grafana (https://grafana.lions.dev) - dashboard dédié
|
|
# ==============================================================================
|
|
|
|
---
|
|
# ==============================================================================
|
|
# ServiceMonitor pour Prometheus Operator (si installé)
|
|
# ==============================================================================
|
|
# Note: L'infrastructure Lions utilise le scraping via annotations pod, mais
|
|
# ce ServiceMonitor peut être utilisé si Prometheus Operator est déployé.
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: ServiceMonitor
|
|
metadata:
|
|
name: afterwork-api-monitor
|
|
namespace: monitoring
|
|
labels:
|
|
app: mic-after-work-server-impl-quarkus-main
|
|
release: prometheus
|
|
project: lions-infrastructure-2025
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app: mic-after-work-server-impl-quarkus-main
|
|
namespaceSelector:
|
|
matchNames:
|
|
- applications
|
|
endpoints:
|
|
- port: http-direct
|
|
path: /afterwork/q/metrics
|
|
interval: 30s
|
|
scrapeTimeout: 10s
|
|
scheme: http
|
|
|
|
---
|
|
# ==============================================================================
|
|
# PrometheusRule - Alertes pour AfterWork API
|
|
# ==============================================================================
|
|
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: afterwork-api-alerts
|
|
namespace: monitoring
|
|
labels:
|
|
app: mic-after-work-server-impl-quarkus-main
|
|
release: prometheus
|
|
project: lions-infrastructure-2025
|
|
spec:
|
|
groups:
|
|
- name: afterwork-api.rules
|
|
rules:
|
|
# Alerte si l'application est down
|
|
- alert: AfterWorkAPIDown
|
|
expr: up{job=~".*afterwork.*"} == 0
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
application: afterwork-api
|
|
annotations:
|
|
summary: "AfterWork API is down"
|
|
description: "L'API AfterWork n'est pas accessible depuis plus de 2 minutes"
|
|
|
|
# Alerte si le taux d'erreur HTTP 5xx est élevé
|
|
- alert: AfterWorkHighErrorRate
|
|
expr: |
|
|
sum(rate(http_server_requests_seconds_count{
|
|
kubernetes_namespace="applications",
|
|
app="mic-after-work-server-impl-quarkus-main",
|
|
status=~"5.."
|
|
}[5m])) /
|
|
sum(rate(http_server_requests_seconds_count{
|
|
kubernetes_namespace="applications",
|
|
app="mic-after-work-server-impl-quarkus-main"
|
|
}[5m])) > 0.05
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
application: afterwork-api
|
|
annotations:
|
|
summary: "High error rate on AfterWork API"
|
|
description: "Le taux d'erreur 5xx est supérieur à 5% depuis 5 minutes"
|
|
|
|
# Alerte si la latence p95 est élevée
|
|
- alert: AfterWorkHighLatency
|
|
expr: |
|
|
histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{
|
|
kubernetes_namespace="applications",
|
|
app="mic-after-work-server-impl-quarkus-main"
|
|
}[5m])) by (le)) > 2
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
application: afterwork-api
|
|
annotations:
|
|
summary: "High latency on AfterWork API"
|
|
description: "La latence p95 dépasse 2 secondes depuis 5 minutes"
|
|
|
|
# Alerte si la mémoire est proche de la limite
|
|
- alert: AfterWorkHighMemoryUsage
|
|
expr: |
|
|
sum(container_memory_working_set_bytes{
|
|
namespace="applications",
|
|
pod=~"mic-after-work-server-impl-quarkus-main.*"
|
|
}) /
|
|
sum(container_spec_memory_limit_bytes{
|
|
namespace="applications",
|
|
pod=~"mic-after-work-server-impl-quarkus-main.*"
|
|
}) > 0.85
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
application: afterwork-api
|
|
annotations:
|
|
summary: "High memory usage on AfterWork API"
|
|
description: "L'utilisation mémoire dépasse 85% de la limite"
|
|
|
|
# Alerte si le pod redémarre fréquemment
|
|
- alert: AfterWorkPodRestarts
|
|
expr: |
|
|
increase(kube_pod_container_status_restarts_total{
|
|
namespace="applications",
|
|
pod=~"mic-after-work-server-impl-quarkus-main.*"
|
|
}[1h]) > 3
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
application: afterwork-api
|
|
annotations:
|
|
summary: "AfterWork API pod restarting frequently"
|
|
description: "Le pod a redémarré plus de 3 fois dans la dernière heure"
|
|
|
|
---
|
|
# ==============================================================================
|
|
# Grafana Dashboard ConfigMap (pour import automatique)
|
|
# ==============================================================================
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: afterwork-grafana-dashboard
|
|
namespace: monitoring
|
|
labels:
|
|
grafana_dashboard: "1"
|
|
app: mic-after-work-server-impl-quarkus-main
|
|
project: lions-infrastructure-2025
|
|
data:
|
|
afterwork-api-dashboard.json: |
|
|
{
|
|
"annotations": {
|
|
"list": []
|
|
},
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 0,
|
|
"id": null,
|
|
"links": [],
|
|
"liveNow": false,
|
|
"panels": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{"color": "green", "value": null},
|
|
{"color": "yellow", "value": 100},
|
|
{"color": "red", "value": 500}
|
|
]
|
|
},
|
|
"unit": "reqps"
|
|
}
|
|
},
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
|
|
"id": 1,
|
|
"options": {},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m]))",
|
|
"legendFormat": "Requests/s",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Request Rate",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"unit": "ms"
|
|
}
|
|
},
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
|
|
"id": 2,
|
|
"options": {},
|
|
"targets": [
|
|
{
|
|
"expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) by (le)) * 1000",
|
|
"legendFormat": "p95 Latency",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "histogram_quantile(0.50, sum(rate(http_server_requests_seconds_bucket{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) by (le)) * 1000",
|
|
"legendFormat": "p50 Latency",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Response Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"unit": "percent"
|
|
}
|
|
},
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
|
|
"id": 3,
|
|
"options": {},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\",status=~\"5..\"}[5m])) / sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) * 100",
|
|
"legendFormat": "Error Rate %",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Error Rate",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"unit": "bytes"
|
|
}
|
|
},
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},
|
|
"id": 4,
|
|
"options": {},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(container_memory_working_set_bytes{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"})",
|
|
"legendFormat": "Memory Used",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "sum(container_spec_memory_limit_bytes{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"})",
|
|
"legendFormat": "Memory Limit",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Memory Usage",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"unit": "short"
|
|
}
|
|
},
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
|
|
"id": 5,
|
|
"options": {},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"}[5m])) * 1000",
|
|
"legendFormat": "CPU Usage (millicores)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "CPU Usage",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{"color": "red", "value": null},
|
|
{"color": "green", "value": 1}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 16},
|
|
"id": 6,
|
|
"options": {
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "up{job=~\".*afterwork.*\"}",
|
|
"legendFormat": "Status",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "API Status",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "prometheus"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{"color": "green", "value": null},
|
|
{"color": "yellow", "value": 1},
|
|
{"color": "red", "value": 3}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 16},
|
|
"id": 7,
|
|
"options": {
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "increase(kube_pod_container_status_restarts_total{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"}[1h])",
|
|
"legendFormat": "Restarts (1h)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Pod Restarts (1h)",
|
|
"type": "stat"
|
|
}
|
|
],
|
|
"refresh": "30s",
|
|
"schemaVersion": 38,
|
|
"style": "dark",
|
|
"tags": ["lions", "afterwork", "quarkus", "api"],
|
|
"templating": {
|
|
"list": []
|
|
},
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {},
|
|
"timezone": "browser",
|
|
"title": "AfterWork API Dashboard",
|
|
"uid": "afterwork-api",
|
|
"version": 1,
|
|
"weekStart": ""
|
|
}
|