# ============================================================================== # AfterWork API - Configuration Monitoring pour Lions Infrastructure # ============================================================================== # Cette configuration intègre l'application avec: # - Prometheus (https://prometheus.lions.dev) - scraping auto via annotations # - Grafana (https://grafana.lions.dev) - dashboard dédié # ============================================================================== --- # ============================================================================== # ServiceMonitor pour Prometheus Operator (si installé) # ============================================================================== # Note: L'infrastructure Lions utilise le scraping via annotations pod, mais # ce ServiceMonitor peut être utilisé si Prometheus Operator est déployé. apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: afterwork-api-monitor namespace: monitoring labels: app: mic-after-work-server-impl-quarkus-main release: prometheus project: lions-infrastructure-2025 spec: selector: matchLabels: app: mic-after-work-server-impl-quarkus-main namespaceSelector: matchNames: - applications endpoints: - port: http-direct path: /afterwork/q/metrics interval: 30s scrapeTimeout: 10s scheme: http --- # ============================================================================== # PrometheusRule - Alertes pour AfterWork API # ============================================================================== apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: name: afterwork-api-alerts namespace: monitoring labels: app: mic-after-work-server-impl-quarkus-main release: prometheus project: lions-infrastructure-2025 spec: groups: - name: afterwork-api.rules rules: # Alerte si l'application est down - alert: AfterWorkAPIDown expr: up{job=~".*afterwork.*"} == 0 for: 2m labels: severity: critical application: afterwork-api annotations: summary: "AfterWork API is down" description: "L'API AfterWork n'est pas accessible depuis plus de 2 minutes" # Alerte si le taux d'erreur HTTP 5xx est élevé - alert: AfterWorkHighErrorRate expr: | sum(rate(http_server_requests_seconds_count{ kubernetes_namespace="applications", app="mic-after-work-server-impl-quarkus-main", status=~"5.." }[5m])) / sum(rate(http_server_requests_seconds_count{ kubernetes_namespace="applications", app="mic-after-work-server-impl-quarkus-main" }[5m])) > 0.05 for: 5m labels: severity: warning application: afterwork-api annotations: summary: "High error rate on AfterWork API" description: "Le taux d'erreur 5xx est supérieur à 5% depuis 5 minutes" # Alerte si la latence p95 est élevée - alert: AfterWorkHighLatency expr: | histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{ kubernetes_namespace="applications", app="mic-after-work-server-impl-quarkus-main" }[5m])) by (le)) > 2 for: 5m labels: severity: warning application: afterwork-api annotations: summary: "High latency on AfterWork API" description: "La latence p95 dépasse 2 secondes depuis 5 minutes" # Alerte si la mémoire est proche de la limite - alert: AfterWorkHighMemoryUsage expr: | sum(container_memory_working_set_bytes{ namespace="applications", pod=~"mic-after-work-server-impl-quarkus-main.*" }) / sum(container_spec_memory_limit_bytes{ namespace="applications", pod=~"mic-after-work-server-impl-quarkus-main.*" }) > 0.85 for: 5m labels: severity: warning application: afterwork-api annotations: summary: "High memory usage on AfterWork API" description: "L'utilisation mémoire dépasse 85% de la limite" # Alerte si le pod redémarre fréquemment - alert: AfterWorkPodRestarts expr: | increase(kube_pod_container_status_restarts_total{ namespace="applications", pod=~"mic-after-work-server-impl-quarkus-main.*" }[1h]) > 3 for: 5m labels: severity: warning application: afterwork-api annotations: summary: "AfterWork API pod restarting frequently" description: "Le pod a redémarré plus de 3 fois dans la dernière heure" --- # ============================================================================== # Grafana Dashboard ConfigMap (pour import automatique) # ============================================================================== apiVersion: v1 kind: ConfigMap metadata: name: afterwork-grafana-dashboard namespace: monitoring labels: grafana_dashboard: "1" app: mic-after-work-server-impl-quarkus-main project: lions-infrastructure-2025 data: afterwork-api-dashboard.json: | { "annotations": { "list": [] }, "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": null, "links": [], "liveNow": false, "panels": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "thresholds": { "mode": "absolute", "steps": [ {"color": "green", "value": null}, {"color": "yellow", "value": 100}, {"color": "red", "value": 500} ] }, "unit": "reqps" } }, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}, "id": 1, "options": {}, "targets": [ { "expr": "sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m]))", "legendFormat": "Requests/s", "refId": "A" } ], "title": "Request Rate", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "unit": "ms" } }, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, "id": 2, "options": {}, "targets": [ { "expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) by (le)) * 1000", "legendFormat": "p95 Latency", "refId": "A" }, { "expr": "histogram_quantile(0.50, sum(rate(http_server_requests_seconds_bucket{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) by (le)) * 1000", "legendFormat": "p50 Latency", "refId": "B" } ], "title": "Response Time", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "unit": "percent" } }, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, "id": 3, "options": {}, "targets": [ { "expr": "sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\",status=~\"5..\"}[5m])) / sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) * 100", "legendFormat": "Error Rate %", "refId": "A" } ], "title": "Error Rate", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "unit": "bytes" } }, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, "id": 4, "options": {}, "targets": [ { "expr": "sum(container_memory_working_set_bytes{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"})", "legendFormat": "Memory Used", "refId": "A" }, { "expr": "sum(container_spec_memory_limit_bytes{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"})", "legendFormat": "Memory Limit", "refId": "B" } ], "title": "Memory Usage", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "unit": "short" } }, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}, "id": 5, "options": {}, "targets": [ { "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"}[5m])) * 1000", "legendFormat": "CPU Usage (millicores)", "refId": "A" } ], "title": "CPU Usage", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [ {"color": "red", "value": null}, {"color": "green", "value": 1} ] } } }, "gridPos": {"h": 4, "w": 6, "x": 12, "y": 16}, "id": 6, "options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true }, "targets": [ { "expr": "up{job=~\".*afterwork.*\"}", "legendFormat": "Status", "refId": "A" } ], "title": "API Status", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [ {"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 3} ] } } }, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 16}, "id": 7, "options": { "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } }, "targets": [ { "expr": "increase(kube_pod_container_status_restarts_total{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"}[1h])", "legendFormat": "Restarts (1h)", "refId": "A" } ], "title": "Pod Restarts (1h)", "type": "stat" } ], "refresh": "30s", "schemaVersion": 38, "style": "dark", "tags": ["lions", "afterwork", "quarkus", "api"], "templating": { "list": [] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "AfterWork API Dashboard", "uid": "afterwork-api", "version": 1, "weekStart": "" }