Refactoring

2026-02-05 18:09:30 +00:00
parent 2a794523b6
commit 806efeb074
24 changed files with 2261 additions and 123 deletions
--- a/kubernetes/afterwork-monitoring.yaml
+++ b/kubernetes/afterwork-monitoring.yaml
@@ -0,0 +1,408 @@
+# ==============================================================================
+# AfterWork API - Configuration Monitoring pour Lions Infrastructure
+# ==============================================================================
+# Cette configuration intègre l'application avec:
+# - Prometheus (https://prometheus.lions.dev) - scraping auto via annotations
+# - Grafana (https://grafana.lions.dev) - dashboard dédié
+# ==============================================================================
+
+---
+# ==============================================================================
+# ServiceMonitor pour Prometheus Operator (si installé)
+# ==============================================================================
+# Note: L'infrastructure Lions utilise le scraping via annotations pod, mais
+# ce ServiceMonitor peut être utilisé si Prometheus Operator est déployé.
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: afterwork-api-monitor
+  namespace: monitoring
+  labels:
+    app: mic-after-work-server-impl-quarkus-main
+    release: prometheus
+    project: lions-infrastructure-2025
+spec:
+  selector:
+    matchLabels:
+      app: mic-after-work-server-impl-quarkus-main
+  namespaceSelector:
+    matchNames:
+      - applications
+  endpoints:
+    - port: http-direct
+      path: /afterwork/q/metrics
+      interval: 30s
+      scrapeTimeout: 10s
+      scheme: http
+
+---
+# ==============================================================================
+# PrometheusRule - Alertes pour AfterWork API
+# ==============================================================================
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: afterwork-api-alerts
+  namespace: monitoring
+  labels:
+    app: mic-after-work-server-impl-quarkus-main
+    release: prometheus
+    project: lions-infrastructure-2025
+spec:
+  groups:
+    - name: afterwork-api.rules
+      rules:
+        # Alerte si l'application est down
+        - alert: AfterWorkAPIDown
+          expr: up{job=~".*afterwork.*"} == 0
+          for: 2m
+          labels:
+            severity: critical
+            application: afterwork-api
+          annotations:
+            summary: "AfterWork API is down"
+            description: "L'API AfterWork n'est pas accessible depuis plus de 2 minutes"
+
+        # Alerte si le taux d'erreur HTTP 5xx est élevé
+        - alert: AfterWorkHighErrorRate
+          expr: |
+            sum(rate(http_server_requests_seconds_count{
+              kubernetes_namespace="applications",
+              app="mic-after-work-server-impl-quarkus-main",
+              status=~"5.."
+            }[5m])) /
+            sum(rate(http_server_requests_seconds_count{
+              kubernetes_namespace="applications",
+              app="mic-after-work-server-impl-quarkus-main"
+            }[5m])) > 0.05
+          for: 5m
+          labels:
+            severity: warning
+            application: afterwork-api
+          annotations:
+            summary: "High error rate on AfterWork API"
+            description: "Le taux d'erreur 5xx est supérieur à 5% depuis 5 minutes"
+
+        # Alerte si la latence p95 est élevée
+        - alert: AfterWorkHighLatency
+          expr: |
+            histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{
+              kubernetes_namespace="applications",
+              app="mic-after-work-server-impl-quarkus-main"
+            }[5m])) by (le)) > 2
+          for: 5m
+          labels:
+            severity: warning
+            application: afterwork-api
+          annotations:
+            summary: "High latency on AfterWork API"
+            description: "La latence p95 dépasse 2 secondes depuis 5 minutes"
+
+        # Alerte si la mémoire est proche de la limite
+        - alert: AfterWorkHighMemoryUsage
+          expr: |
+            sum(container_memory_working_set_bytes{
+              namespace="applications",
+              pod=~"mic-after-work-server-impl-quarkus-main.*"
+            }) /
+            sum(container_spec_memory_limit_bytes{
+              namespace="applications",
+              pod=~"mic-after-work-server-impl-quarkus-main.*"
+            }) > 0.85
+          for: 5m
+          labels:
+            severity: warning
+            application: afterwork-api
+          annotations:
+            summary: "High memory usage on AfterWork API"
+            description: "L'utilisation mémoire dépasse 85% de la limite"
+
+        # Alerte si le pod redémarre fréquemment
+        - alert: AfterWorkPodRestarts
+          expr: |
+            increase(kube_pod_container_status_restarts_total{
+              namespace="applications",
+              pod=~"mic-after-work-server-impl-quarkus-main.*"
+            }[1h]) > 3
+          for: 5m
+          labels:
+            severity: warning
+            application: afterwork-api
+          annotations:
+            summary: "AfterWork API pod restarting frequently"
+            description: "Le pod a redémarré plus de 3 fois dans la dernière heure"
+
+---
+# ==============================================================================
+# Grafana Dashboard ConfigMap (pour import automatique)
+# ==============================================================================
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: afterwork-grafana-dashboard
+  namespace: monitoring
+  labels:
+    grafana_dashboard: "1"
+    app: mic-after-work-server-impl-quarkus-main
+    project: lions-infrastructure-2025
+data:
+  afterwork-api-dashboard.json: |
+    {
+      "annotations": {
+        "list": []
+      },
+      "editable": true,
+      "fiscalYearStartMonth": 0,
+      "graphTooltip": 0,
+      "id": null,
+      "links": [],
+      "liveNow": false,
+      "panels": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {"color": "green", "value": null},
+                  {"color": "yellow", "value": 100},
+                  {"color": "red", "value": 500}
+                ]
+              },
+              "unit": "reqps"
+            }
+          },
+          "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
+          "id": 1,
+          "options": {},
+          "targets": [
+            {
+              "expr": "sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m]))",
+              "legendFormat": "Requests/s",
+              "refId": "A"
+            }
+          ],
+          "title": "Request Rate",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "unit": "ms"
+            }
+          },
+          "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
+          "id": 2,
+          "options": {},
+          "targets": [
+            {
+              "expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) by (le)) * 1000",
+              "legendFormat": "p95 Latency",
+              "refId": "A"
+            },
+            {
+              "expr": "histogram_quantile(0.50, sum(rate(http_server_requests_seconds_bucket{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) by (le)) * 1000",
+              "legendFormat": "p50 Latency",
+              "refId": "B"
+            }
+          ],
+          "title": "Response Time",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "unit": "percent"
+            }
+          },
+          "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
+          "id": 3,
+          "options": {},
+          "targets": [
+            {
+              "expr": "sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\",status=~\"5..\"}[5m])) / sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) * 100",
+              "legendFormat": "Error Rate %",
+              "refId": "A"
+            }
+          ],
+          "title": "Error Rate",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "unit": "bytes"
+            }
+          },
+          "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},
+          "id": 4,
+          "options": {},
+          "targets": [
+            {
+              "expr": "sum(container_memory_working_set_bytes{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"})",
+              "legendFormat": "Memory Used",
+              "refId": "A"
+            },
+            {
+              "expr": "sum(container_spec_memory_limit_bytes{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"})",
+              "legendFormat": "Memory Limit",
+              "refId": "B"
+            }
+          ],
+          "title": "Memory Usage",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "unit": "short"
+            }
+          },
+          "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
+          "id": 5,
+          "options": {},
+          "targets": [
+            {
+              "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"}[5m])) * 1000",
+              "legendFormat": "CPU Usage (millicores)",
+              "refId": "A"
+            }
+          ],
+          "title": "CPU Usage",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "thresholds"
+              },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {"color": "red", "value": null},
+                  {"color": "green", "value": 1}
+                ]
+              }
+            }
+          },
+          "gridPos": {"h": 4, "w": 6, "x": 12, "y": 16},
+          "id": 6,
+          "options": {
+            "orientation": "auto",
+            "reduceOptions": {
+              "calcs": ["lastNotNull"],
+              "fields": "",
+              "values": false
+            },
+            "showThresholdLabels": false,
+            "showThresholdMarkers": true
+          },
+          "targets": [
+            {
+              "expr": "up{job=~\".*afterwork.*\"}",
+              "legendFormat": "Status",
+              "refId": "A"
+            }
+          ],
+          "title": "API Status",
+          "type": "gauge"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "prometheus"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "thresholds"
+              },
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {"color": "green", "value": null},
+                  {"color": "yellow", "value": 1},
+                  {"color": "red", "value": 3}
+                ]
+              }
+            }
+          },
+          "gridPos": {"h": 4, "w": 6, "x": 18, "y": 16},
+          "id": 7,
+          "options": {
+            "orientation": "auto",
+            "reduceOptions": {
+              "calcs": ["lastNotNull"],
+              "fields": "",
+              "values": false
+            }
+          },
+          "targets": [
+            {
+              "expr": "increase(kube_pod_container_status_restarts_total{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"}[1h])",
+              "legendFormat": "Restarts (1h)",
+              "refId": "A"
+            }
+          ],
+          "title": "Pod Restarts (1h)",
+          "type": "stat"
+        }
+      ],
+      "refresh": "30s",
+      "schemaVersion": 38,
+      "style": "dark",
+      "tags": ["lions", "afterwork", "quarkus", "api"],
+      "templating": {
+        "list": []
+      },
+      "time": {
+        "from": "now-1h",
+        "to": "now"
+      },
+      "timepicker": {},
+      "timezone": "browser",
+      "title": "AfterWork API Dashboard",
+      "uid": "afterwork-api",
+      "version": 1,
+      "weekStart": ""
+    }