fix(admin-performance): timezone-shifted chart axis, storage management panel, and path resolution
Some checks are pending
full-modernization-gates / frontend-route-governance (push) Waiting to run
full-modernization-gates / backend-modernization-gates (push) Waiting to run
released-pages-hardening-gates / released-pages-hardening (push) Waiting to run

- Fix SQLite aggregation timezone bug: add 'utc' modifier to strftime
  so naive local timestamps are not double-converted through UTC
- Add MetricsHistoryStore.purge() to clear stale snapshot data
- Add storage management panel: file size monitoring for SQLite, logs,
  and archive with cleanup/purge actions
- Fix path resolution: use Path(__file__) instead of current_app.root_path
  which can resolve incorrectly under Gunicorn
- Fix runtime contract validation: use is_relative_to() instead of
  strict parent equality to tolerate symlinks and nested dirs
- Add tests for timezone handling and purge functionality

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
egg
2026-03-05 09:55:12 +08:00
parent 7f409dc17f
commit 8397c68f89
6 changed files with 359 additions and 4 deletions

View File

@@ -237,6 +237,73 @@
</div>
</section>
<!-- Storage Management -->
<section class="panel" v-if="storageInfo">
<h2 class="panel-title">儲存空間管理</h2>
<p class="storage-total">總使用量{{ formatBytes(storageInfo.total_bytes) }}</p>
<div class="storage-section">
<h4>SQLite 資料庫</h4>
<table class="mini-table">
<thead><tr><th>檔案</th><th>大小</th><th>操作</th></tr></thead>
<tbody>
<tr v-for="f in storageInfo.sqlite_files" :key="f.path">
<td>{{ f.path }}</td>
<td>{{ formatBytes(f.size_bytes) }}</td>
<td>
<button
v-if="f.path.includes('metrics_history')"
class="btn btn-sm btn-danger"
:disabled="storagePurging"
@click="purgeMetricsHistory"
>清除快照</button>
</td>
</tr>
</tbody>
</table>
</div>
<div class="storage-section">
<h4>Log 檔案</h4>
<table class="mini-table">
<thead><tr><th>檔案</th><th>大小</th></tr></thead>
<tbody>
<tr v-for="f in storageInfo.log_files" :key="f.path">
<td>{{ f.path }}</td>
<td>{{ formatBytes(f.size_bytes) }}</td>
</tr>
</tbody>
</table>
</div>
<div class="storage-section" v-if="storageInfo.archive_files?.length">
<h4>Archive ({{ storageInfo.archive_files.length }} , {{ formatBytes(storageInfo.archive_total_bytes) }})</h4>
<table class="mini-table">
<thead><tr><th>檔案</th><th>大小</th></tr></thead>
<tbody>
<tr v-for="f in storageInfo.archive_files" :key="f.path">
<td>{{ f.path }}</td>
<td>{{ formatBytes(f.size_bytes) }}</td>
</tr>
</tbody>
</table>
</div>
<div class="storage-actions">
<button class="btn btn-sm" :disabled="storagePurging" @click="cleanupLogFiles(['logs'])">
{{ storagePurging ? '清理中...' : '清空 Log 檔案' }}
</button>
<button
class="btn btn-sm"
:disabled="storagePurging || !storageInfo.archive_files?.length"
@click="cleanupLogFiles(['archive'])"
>清空 Archive</button>
<button class="btn btn-sm btn-danger" :disabled="storagePurging" @click="cleanupLogFiles(['logs', 'archive'])">
全部清理
</button>
</div>
</section>
<!-- Worker Control -->
<section class="panel">
<h2 class="panel-title">Worker 控制</h2>
@@ -352,6 +419,8 @@ const logLimit = 50;
const showRestartModal = ref(false);
const restartLoading = ref(false);
const cleanupLoading = ref(false);
const storageInfo = ref(null);
const storagePurging = ref(false);
const latencyChartRef = ref(null);
let chartInstance = null;
@@ -562,6 +631,51 @@ async function loadPerformanceHistory() {
}
}
async function loadStorageInfo() {
try {
const res = await apiGet('/admin/api/storage-info');
storageInfo.value = res?.data || null;
} catch (e) {
console.error('Failed to load storage info:', e);
}
}
async function purgeMetricsHistory() {
if (!confirm('確定要清除所有效能快照資料?清除後趨勢圖將重新累積。')) return;
storagePurging.value = true;
try {
await apiPost('/admin/api/performance-history/purge', {});
await Promise.all([loadStorageInfo(), loadPerformanceHistory()]);
} catch (e) {
console.error('Failed to purge metrics history:', e);
} finally {
storagePurging.value = false;
}
}
async function cleanupLogFiles(targets) {
const label = targets.includes('logs') && targets.includes('archive') ? '所有 Log 和 Archive'
: targets.includes('archive') ? 'Archive 目錄' : 'Log 檔案';
if (!confirm(`確定要清理${label}`)) return;
storagePurging.value = true;
try {
await apiPost('/admin/api/log-files/cleanup', { targets });
await loadStorageInfo();
} catch (e) {
console.error('Failed to cleanup log files:', e);
} finally {
storagePurging.value = false;
}
}
function formatBytes(bytes) {
if (bytes == null || bytes === 0) return '0 B';
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1048576) return `${(bytes / 1024).toFixed(1)} KB`;
if (bytes < 1073741824) return `${(bytes / 1048576).toFixed(1)} MB`;
return `${(bytes / 1073741824).toFixed(2)} GB`;
}
// --- Trend Chart Series Configs ---
const poolTrendSeries = [
{ name: '飽和度', key: 'pool_saturation', color: '#6366f1' },
@@ -599,6 +713,7 @@ async function refreshAll() {
loadPerformanceHistory(),
loadLogs(),
loadWorkerStatus(),
loadStorageInfo(),
]);
} finally {
loading.value = false;

View File

@@ -583,6 +583,46 @@ body {
font-weight: 500;
}
/* Storage Management Panel */
.storage-total {
font-size: 0.9rem;
font-weight: 600;
color: #475569;
margin-bottom: 12px;
}
.storage-section {
margin-bottom: 16px;
}
.storage-section h4 {
font-size: 0.85rem;
color: #64748b;
margin-bottom: 6px;
}
.storage-section .mini-table {
width: 100%;
font-size: 0.82rem;
}
.storage-section .mini-table th,
.storage-section .mini-table td {
padding: 4px 10px;
}
.storage-section .mini-table th {
color: #64748b;
font-weight: 500;
}
.storage-actions {
display: flex;
gap: 8px;
margin-top: 12px;
flex-wrap: wrap;
}
@media (max-width: 768px) {
.memory-guard-stats {
grid-template-columns: repeat(3, 1fr);

View File

@@ -254,10 +254,14 @@ class MetricsHistoryStore:
if not self._initialized:
self.initialize()
cutoff = (datetime.now() - timedelta(minutes=minutes)).isoformat()
# NOTE: stored ts is naive local time (datetime.now().isoformat()).
# SQLite strftime('%s', ts) treats input as UTC by default, so we
# add 'utc' modifier to tell SQLite "this is local time → convert
# to UTC first", then convert back with 'localtime' for display.
sql = f"""
SELECT
datetime(
(CAST(strftime('%s', ts) AS INTEGER) / {bucket_seconds}) * {bucket_seconds},
(CAST(strftime('%s', ts, 'utc') AS INTEGER) / {bucket_seconds}) * {bucket_seconds},
'unixepoch', 'localtime'
) AS ts,
MAX(pool_saturation) AS pool_saturation,
@@ -281,7 +285,7 @@ class MetricsHistoryStore:
ROUND(MAX(redis_used_memory) / 1048576.0, 2) AS redis_used_memory_mb
FROM metrics_snapshots
WHERE ts >= ?
GROUP BY (CAST(strftime('%s', ts) AS INTEGER) / {bucket_seconds})
GROUP BY (CAST(strftime('%s', ts, 'utc') AS INTEGER) / {bucket_seconds})
ORDER BY ts ASC
"""
try:
@@ -328,6 +332,26 @@ class MetricsHistoryStore:
logger.error("Failed to cleanup metrics history: %s", exc)
return deleted
def purge(self) -> int:
"""Delete ALL rows from the metrics_snapshots table.
Useful after schema/measurement fixes to discard stale data
(e.g. peak-RSS or timezone-shifted timestamps).
"""
if not self._initialized:
self.initialize()
try:
with self._write_lock:
with self._get_connection() as conn:
cursor = conn.execute("DELETE FROM metrics_snapshots")
deleted = cursor.rowcount
conn.commit()
logger.info("Purged all %d metrics history rows", deleted)
return deleted
except Exception as exc:
logger.error("Failed to purge metrics history: %s", exc)
return 0
# ============================================================
# Background Collector

View File

@@ -88,12 +88,12 @@ def validate_runtime_contract(
pid_file = Path(str(cfg["watchdog_pid_file"])).resolve()
state_file = Path(str(cfg["watchdog_state_file"])).resolve()
if restart_flag.parent != runtime_dir:
if not restart_flag.is_relative_to(runtime_dir):
errors.append(
"WATCHDOG_RESTART_FLAG must be under WATCHDOG_RUNTIME_DIR "
f"({restart_flag} not under {runtime_dir})."
)
if pid_file.parent != runtime_dir:
if not pid_file.is_relative_to(runtime_dir):
errors.append(
"WATCHDOG_PID_FILE must be under WATCHDOG_RUNTIME_DIR "
f"({pid_file} not under {runtime_dir})."

View File

@@ -415,6 +415,125 @@ def api_performance_history():
})
@admin_bp.route("/api/performance-history/purge", methods=["POST"])
@admin_required
def api_performance_history_purge():
"""API: Purge all historical metrics snapshots (stale data cleanup)."""
from mes_dashboard.core.metrics_history import get_metrics_history_store
store = get_metrics_history_store()
deleted = store.purge()
return jsonify({
"success": True,
"data": {"deleted": deleted},
})
@admin_bp.route("/api/storage-info", methods=["GET"])
@admin_required
def api_storage_info():
"""API: Return sizes of SQLite databases, log files, and archive dir."""
base = Path(__file__).resolve().parents[3] # project root
logs_dir = base / "logs"
archive_dir = logs_dir / "archive"
def _file_info(p: Path) -> dict:
try:
st = p.stat()
return {"path": str(p.relative_to(base)), "size_bytes": st.st_size}
except OSError:
return {"path": str(p.relative_to(base)), "size_bytes": 0}
# SQLite databases
sqlite_files = [_file_info(f) for f in sorted(logs_dir.glob("*.sqlite"))]
# Active log files
log_files = [_file_info(f) for f in sorted(logs_dir.glob("*.log"))]
# Archive directory
archive_files = []
archive_total = 0
if archive_dir.is_dir():
for f in sorted(archive_dir.iterdir()):
if f.is_file():
info = _file_info(f)
archive_files.append(info)
archive_total += info["size_bytes"]
total = (
sum(f["size_bytes"] for f in sqlite_files)
+ sum(f["size_bytes"] for f in log_files)
+ archive_total
)
return jsonify({
"success": True,
"data": {
"sqlite_files": sqlite_files,
"log_files": log_files,
"archive_files": archive_files,
"archive_total_bytes": archive_total,
"total_bytes": total,
},
})
@admin_bp.route("/api/log-files/cleanup", methods=["POST"])
@admin_required
def api_log_files_cleanup():
"""API: Truncate active log files and/or purge archive directory."""
base = Path(__file__).resolve().parents[3]
logs_dir = base / "logs"
archive_dir = logs_dir / "archive"
data = request.get_json(silent=True) or {}
targets = data.get("targets", ["archive", "logs"]) # default: both
logger.debug("Log file cleanup: base=%s, logs_dir=%s, archive_dir=%s", base, logs_dir, archive_dir)
cleaned = {"log_files": [], "archive_files": []}
total_freed = 0
# Truncate active .log files (not SQLite — those have their own cleanup)
if "logs" in targets:
for f in logs_dir.glob("*.log"):
try:
size = f.stat().st_size
if size > 0:
f.write_text("")
cleaned["log_files"].append(str(f.name))
total_freed += size
except OSError as exc:
logger.warning("Failed to truncate %s: %s", f, exc)
# Remove archive files
if "archive" in targets:
if archive_dir.is_dir():
for f in archive_dir.iterdir():
if f.is_file():
try:
size = f.stat().st_size
f.unlink()
cleaned["archive_files"].append(str(f.name))
total_freed += size
except OSError as exc:
logger.warning("Failed to remove archive file %s: %s", f, exc)
user = getattr(g, "username", "unknown")
logger.info(
"Log file cleanup by %s: freed %d bytes, targets=%s",
user, total_freed, targets,
)
return jsonify({
"success": True,
"data": {
"freed_bytes": total_freed,
"cleaned": cleaned,
},
})
@admin_bp.route("/api/logs/cleanup", methods=["POST"])
@admin_required
def api_logs_cleanup():

View File

@@ -199,6 +199,63 @@ class TestRedisUsedMemoryMb:
assert rows[0]["redis_used_memory_mb"] == 256.0
# ============================================================
# Test Timezone Handling in Aggregation
# ============================================================
class TestTimezoneHandling:
"""Aggregated timestamps must match the original local-time ts,
not be shifted by the server timezone offset."""
def test_aggregated_ts_matches_local_time(self):
"""The bucketed ts should stay close to the original local time,
not be shifted by ±timezone offset."""
with tempfile.TemporaryDirectory() as tmpdir:
db_path = os.path.join(tmpdir, "test_tz.sqlite")
store = MetricsHistoryStore(db_path=db_path)
store.initialize()
now = datetime.now()
ts_str = now.isoformat()
_insert_snapshot(store, ts_str, pid=1001, rss_bytes=100 * 1024 * 1024)
rows = store.query_snapshots_aggregated(minutes=5)
assert len(rows) == 1
# Parse the aggregated ts back and check it's within 30s of original
agg_ts = rows[0]["ts"] # format: "YYYY-MM-DD HH:MM:SS"
agg_dt = datetime.strptime(agg_ts, "%Y-%m-%d %H:%M:%S")
delta = abs((now - agg_dt).total_seconds())
assert delta < 30, (
f"Aggregated ts '{agg_ts}' is {delta:.0f}s away from original "
f"'{ts_str}' — likely a timezone double-conversion bug"
)
# ============================================================
# Test Purge
# ============================================================
class TestPurge:
"""purge() must delete all rows from metrics_snapshots."""
def test_purge_deletes_all(self):
with tempfile.TemporaryDirectory() as tmpdir:
db_path = os.path.join(tmpdir, "test_purge.sqlite")
store = MetricsHistoryStore(db_path=db_path)
store.initialize()
now = datetime.now()
for i in range(5):
ts = (now - timedelta(seconds=i * 30)).isoformat()
_insert_snapshot(store, ts, pid=1001, rss_bytes=100 * 1024 * 1024)
assert len(store.query_snapshots(minutes=5)) == 5
deleted = store.purge()
assert deleted == 5
assert len(store.query_snapshots(minutes=5)) == 0
# ============================================================
# Test Original query_snapshots Still Works
# ============================================================