Refactoring

This commit is contained in:
dahoud
2026-02-05 18:09:30 +00:00
parent 2a794523b6
commit 806efeb074
24 changed files with 2261 additions and 123 deletions

View File

@@ -0,0 +1,408 @@
# ==============================================================================
# AfterWork API - Configuration Monitoring pour Lions Infrastructure
# ==============================================================================
# Cette configuration intègre l'application avec:
# - Prometheus (https://prometheus.lions.dev) - scraping auto via annotations
# - Grafana (https://grafana.lions.dev) - dashboard dédié
# ==============================================================================
---
# ==============================================================================
# ServiceMonitor pour Prometheus Operator (si installé)
# ==============================================================================
# Note: L'infrastructure Lions utilise le scraping via annotations pod, mais
# ce ServiceMonitor peut être utilisé si Prometheus Operator est déployé.
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: afterwork-api-monitor
namespace: monitoring
labels:
app: mic-after-work-server-impl-quarkus-main
release: prometheus
project: lions-infrastructure-2025
spec:
selector:
matchLabels:
app: mic-after-work-server-impl-quarkus-main
namespaceSelector:
matchNames:
- applications
endpoints:
- port: http-direct
path: /afterwork/q/metrics
interval: 30s
scrapeTimeout: 10s
scheme: http
---
# ==============================================================================
# PrometheusRule - Alertes pour AfterWork API
# ==============================================================================
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: afterwork-api-alerts
namespace: monitoring
labels:
app: mic-after-work-server-impl-quarkus-main
release: prometheus
project: lions-infrastructure-2025
spec:
groups:
- name: afterwork-api.rules
rules:
# Alerte si l'application est down
- alert: AfterWorkAPIDown
expr: up{job=~".*afterwork.*"} == 0
for: 2m
labels:
severity: critical
application: afterwork-api
annotations:
summary: "AfterWork API is down"
description: "L'API AfterWork n'est pas accessible depuis plus de 2 minutes"
# Alerte si le taux d'erreur HTTP 5xx est élevé
- alert: AfterWorkHighErrorRate
expr: |
sum(rate(http_server_requests_seconds_count{
kubernetes_namespace="applications",
app="mic-after-work-server-impl-quarkus-main",
status=~"5.."
}[5m])) /
sum(rate(http_server_requests_seconds_count{
kubernetes_namespace="applications",
app="mic-after-work-server-impl-quarkus-main"
}[5m])) > 0.05
for: 5m
labels:
severity: warning
application: afterwork-api
annotations:
summary: "High error rate on AfterWork API"
description: "Le taux d'erreur 5xx est supérieur à 5% depuis 5 minutes"
# Alerte si la latence p95 est élevée
- alert: AfterWorkHighLatency
expr: |
histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{
kubernetes_namespace="applications",
app="mic-after-work-server-impl-quarkus-main"
}[5m])) by (le)) > 2
for: 5m
labels:
severity: warning
application: afterwork-api
annotations:
summary: "High latency on AfterWork API"
description: "La latence p95 dépasse 2 secondes depuis 5 minutes"
# Alerte si la mémoire est proche de la limite
- alert: AfterWorkHighMemoryUsage
expr: |
sum(container_memory_working_set_bytes{
namespace="applications",
pod=~"mic-after-work-server-impl-quarkus-main.*"
}) /
sum(container_spec_memory_limit_bytes{
namespace="applications",
pod=~"mic-after-work-server-impl-quarkus-main.*"
}) > 0.85
for: 5m
labels:
severity: warning
application: afterwork-api
annotations:
summary: "High memory usage on AfterWork API"
description: "L'utilisation mémoire dépasse 85% de la limite"
# Alerte si le pod redémarre fréquemment
- alert: AfterWorkPodRestarts
expr: |
increase(kube_pod_container_status_restarts_total{
namespace="applications",
pod=~"mic-after-work-server-impl-quarkus-main.*"
}[1h]) > 3
for: 5m
labels:
severity: warning
application: afterwork-api
annotations:
summary: "AfterWork API pod restarting frequently"
description: "Le pod a redémarré plus de 3 fois dans la dernière heure"
---
# ==============================================================================
# Grafana Dashboard ConfigMap (pour import automatique)
# ==============================================================================
apiVersion: v1
kind: ConfigMap
metadata:
name: afterwork-grafana-dashboard
namespace: monitoring
labels:
grafana_dashboard: "1"
app: mic-after-work-server-impl-quarkus-main
project: lions-infrastructure-2025
data:
afterwork-api-dashboard.json: |
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": null},
{"color": "yellow", "value": 100},
{"color": "red", "value": 500}
]
},
"unit": "reqps"
}
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
"id": 1,
"options": {},
"targets": [
{
"expr": "sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m]))",
"legendFormat": "Requests/s",
"refId": "A"
}
],
"title": "Request Rate",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"unit": "ms"
}
},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
"id": 2,
"options": {},
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) by (le)) * 1000",
"legendFormat": "p95 Latency",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(http_server_requests_seconds_bucket{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) by (le)) * 1000",
"legendFormat": "p50 Latency",
"refId": "B"
}
],
"title": "Response Time",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"unit": "percent"
}
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
"id": 3,
"options": {},
"targets": [
{
"expr": "sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\",status=~\"5..\"}[5m])) / sum(rate(http_server_requests_seconds_count{kubernetes_namespace=\"applications\",app=\"mic-after-work-server-impl-quarkus-main\"}[5m])) * 100",
"legendFormat": "Error Rate %",
"refId": "A"
}
],
"title": "Error Rate",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"unit": "bytes"
}
},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},
"id": 4,
"options": {},
"targets": [
{
"expr": "sum(container_memory_working_set_bytes{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"})",
"legendFormat": "Memory Used",
"refId": "A"
},
{
"expr": "sum(container_spec_memory_limit_bytes{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"})",
"legendFormat": "Memory Limit",
"refId": "B"
}
],
"title": "Memory Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"unit": "short"
}
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
"id": 5,
"options": {},
"targets": [
{
"expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"}[5m])) * 1000",
"legendFormat": "CPU Usage (millicores)",
"refId": "A"
}
],
"title": "CPU Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "red", "value": null},
{"color": "green", "value": 1}
]
}
}
},
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 16},
"id": 6,
"options": {
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"targets": [
{
"expr": "up{job=~\".*afterwork.*\"}",
"legendFormat": "Status",
"refId": "A"
}
],
"title": "API Status",
"type": "gauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"mode": "absolute",
"steps": [
{"color": "green", "value": null},
{"color": "yellow", "value": 1},
{"color": "red", "value": 3}
]
}
}
},
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 16},
"id": 7,
"options": {
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
}
},
"targets": [
{
"expr": "increase(kube_pod_container_status_restarts_total{namespace=\"applications\",pod=~\"mic-after-work-server-impl-quarkus-main.*\"}[1h])",
"legendFormat": "Restarts (1h)",
"refId": "A"
}
],
"title": "Pod Restarts (1h)",
"type": "stat"
}
],
"refresh": "30s",
"schemaVersion": 38,
"style": "dark",
"tags": ["lions", "afterwork", "quarkus", "api"],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "AfterWork API Dashboard",
"uid": "afterwork-api",
"version": 1,
"weekStart": ""
}