feat: métriques serveur dans footer dashboard + notification offline WS
This commit is contained in:
+26
-4
@@ -234,13 +234,35 @@ func (d *DB) PruneOldMetrics(retentionDays int) error {
|
||||
}
|
||||
|
||||
func (d *DB) MarkOffline(timeoutSec int64) error {
|
||||
cutoff := time.Now().Unix() - timeoutSec
|
||||
_, err := d.conn.Exec(
|
||||
`UPDATE agents SET status='offline' WHERE last_seen < ? AND status != 'offline'`,
|
||||
cutoff)
|
||||
_, err := d.MarkOfflineAndGetIDs(timeoutSec)
|
||||
return err
|
||||
}
|
||||
|
||||
// MarkOfflineAndGetIDs marque les agents inactifs et retourne leurs IDs.
|
||||
func (d *DB) MarkOfflineAndGetIDs(timeoutSec int64) ([]string, error) {
|
||||
cutoff := time.Now().Unix() - timeoutSec
|
||||
rows, err := d.conn.Query(
|
||||
`SELECT id FROM agents WHERE last_seen < ? AND status != 'offline'`, cutoff)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var ids []string
|
||||
for rows.Next() {
|
||||
var id string
|
||||
_ = rows.Scan(&id)
|
||||
ids = append(ids, id)
|
||||
}
|
||||
if err = rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rows.Close()
|
||||
if len(ids) > 0 {
|
||||
_, err = d.conn.Exec(
|
||||
`UPDATE agents SET status='offline' WHERE last_seen < ? AND status != 'offline'`, cutoff)
|
||||
}
|
||||
return ids, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
log.SetFlags(log.LstdFlags | log.Lshortfile)
|
||||
}
|
||||
|
||||
+117
-1
@@ -1,8 +1,13 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
@@ -59,13 +64,35 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
// Maintenance : nettoyage + détection offline avec notification WS
|
||||
go func() {
|
||||
ticker := time.NewTicker(time.Minute)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
srvCfg, _ := database.GetServerConfig()
|
||||
_ = database.PruneOldMetrics(srvCfg.RetentionDays)
|
||||
_ = database.MarkOffline(30)
|
||||
ids, _ := database.MarkOfflineAndGetIDs(30)
|
||||
for _, id := range ids {
|
||||
hub.Broadcast(models.WSMessage{
|
||||
Type: "status_update",
|
||||
AgentID: id,
|
||||
Data: map[string]string{"status": "offline"},
|
||||
})
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Métriques du serveur lui-même → footer du dashboard
|
||||
go func() {
|
||||
ticker := time.NewTicker(5 * time.Second)
|
||||
defer ticker.Stop()
|
||||
var prevIdle, prevTotal uint64
|
||||
for range ticker.C {
|
||||
stats, err := collectServerStats(&prevIdle, &prevTotal)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
hub.Broadcast(models.WSMessage{Type: "server_stats", Data: stats})
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -96,3 +123,92 @@ func main() {
|
||||
func endsWith(path, suffix string) bool {
|
||||
return len(path) >= len(suffix) && path[len(path)-len(suffix):] == suffix
|
||||
}
|
||||
|
||||
func collectServerStats(prevIdle, prevTotal *uint64) (*models.ServerStats, error) {
|
||||
idle, total, err := readCPUStat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var cpuPct float64
|
||||
if *prevTotal > 0 && total > *prevTotal {
|
||||
deltaIdle := float64(idle - *prevIdle)
|
||||
deltaTotal := float64(total - *prevTotal)
|
||||
cpuPct = 100.0 * (1.0 - deltaIdle/deltaTotal)
|
||||
if cpuPct < 0 {
|
||||
cpuPct = 0
|
||||
}
|
||||
}
|
||||
*prevIdle = idle
|
||||
*prevTotal = total
|
||||
|
||||
memTotal, memAvail, err := readMemInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &models.ServerStats{
|
||||
CPUPercent: cpuPct,
|
||||
MemUsed: memTotal - memAvail,
|
||||
MemTotal: memTotal,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func readCPUStat() (idle, total uint64, err error) {
|
||||
f, err := os.Open("/proc/stat")
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if !strings.HasPrefix(line, "cpu ") {
|
||||
continue
|
||||
}
|
||||
fields := strings.Fields(line)[1:]
|
||||
var vals [10]uint64
|
||||
for i, v := range fields {
|
||||
if i >= 10 {
|
||||
break
|
||||
}
|
||||
vals[i], _ = strconv.ParseUint(v, 10, 64)
|
||||
}
|
||||
// idle = idle + iowait
|
||||
idle = vals[3] + vals[4]
|
||||
for _, v := range vals {
|
||||
total += v
|
||||
}
|
||||
return idle, total, nil
|
||||
}
|
||||
return 0, 0, fmt.Errorf("cpu line not found in /proc/stat")
|
||||
}
|
||||
|
||||
func readMemInfo() (totalBytes, availBytes int64, err error) {
|
||||
f, err := os.Open("/proc/meminfo")
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
var total, avail int64
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
continue
|
||||
}
|
||||
val, _ := strconv.ParseInt(fields[1], 10, 64)
|
||||
switch fields[0] {
|
||||
case "MemTotal:":
|
||||
total = val * 1024
|
||||
case "MemAvailable:":
|
||||
avail = val * 1024
|
||||
}
|
||||
if total > 0 && avail > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if total == 0 {
|
||||
return 0, 0, fmt.Errorf("MemTotal not found in /proc/meminfo")
|
||||
}
|
||||
return total, avail, nil
|
||||
}
|
||||
|
||||
@@ -103,3 +103,9 @@ type WSMessage struct {
|
||||
AgentID string `json:"agent_id"`
|
||||
Data interface{} `json:"data"`
|
||||
}
|
||||
|
||||
type ServerStats struct {
|
||||
CPUPercent float64 `json:"cpu_percent"`
|
||||
MemUsed int64 `json:"mem_used"`
|
||||
MemTotal int64 `json:"mem_total"`
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user