优化传感器布置算法，修复数据库更新逻辑

2026-04-17 17:21:50 +08:00
parent bf2aaa5ff7
commit 3b712ea467
7 changed files with 795 additions and 291 deletions
@@ -0,0 +1,108 @@
+import importlib.util
+import sys
+import types
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+
+def _load_pressure_cleaning_module():
+    project_root = Path(__file__).resolve().parents[2]
+    utils_path = project_root / "app" / "algorithms" / "_utils.py"
+    pressure_path = project_root / "app" / "algorithms" / "cleaning" / "pressure.py"
+
+    app_module = sys.modules.setdefault("app", types.ModuleType("app"))
+    algorithms_module = sys.modules.setdefault(
+        "app.algorithms",
+        types.ModuleType("app.algorithms"),
+    )
+    setattr(app_module, "algorithms", algorithms_module)
+
+    utils_spec = importlib.util.spec_from_file_location("app.algorithms._utils", utils_path)
+    assert utils_spec and utils_spec.loader
+    utils_module = importlib.util.module_from_spec(utils_spec)
+    sys.modules["app.algorithms._utils"] = utils_module
+    utils_spec.loader.exec_module(utils_module)
+
+    pressure_spec = importlib.util.spec_from_file_location(
+        "tests_pressure_under_test",
+        pressure_path,
+    )
+    assert pressure_spec and pressure_spec.loader
+    pressure_module = importlib.util.module_from_spec(pressure_spec)
+    pressure_spec.loader.exec_module(pressure_module)
+    return pressure_module
+
+
+def test_clean_pressure_data_df_km_repairs_long_form_pressure_series():
+    module = _load_pressure_cleaning_module()
+    repo_root = Path(__file__).resolve().parents[3]
+
+    raw_df = pd.read_csv(repo_root / "data" / "node_simulation.csv")
+    noisy_df = pd.read_csv(repo_root / "data" / "node_simulation_noisy.csv")
+    cleaned_df = module.clean_pressure_data_df_km(noisy_df)
+
+    for df in (raw_df, noisy_df, cleaned_df):
+        df["time"] = pd.to_datetime(df["time"])
+
+    assert len(cleaned_df) == len(raw_df)
+    assert set(cleaned_df.columns) == {"time", "id", "pressure"}
+    assert cleaned_df["pressure"].isna().sum() == 0
+
+    noisy_joined = raw_df.merge(noisy_df, on=["time", "id"], how="inner", suffixes=("_raw", "_noisy"))
+    cleaned_joined = raw_df.merge(
+        cleaned_df,
+        on=["time", "id"],
+        how="inner",
+        suffixes=("_raw", "_clean"),
+    )
+
+    noisy_rmse = float(
+        np.sqrt(np.mean((noisy_joined["pressure_raw"] - noisy_joined["pressure_noisy"]) ** 2))
+    )
+    cleaned_rmse = float(
+        np.sqrt(np.mean((cleaned_joined["pressure_raw"] - cleaned_joined["pressure_clean"]) ** 2))
+    )
+    noisy_mae = float(
+        np.mean(np.abs(noisy_joined["pressure_raw"] - noisy_joined["pressure_noisy"]))
+    )
+    cleaned_mae = float(
+        np.mean(np.abs(cleaned_joined["pressure_raw"] - cleaned_joined["pressure_clean"]))
+    )
+
+    assert cleaned_rmse < 0.35
+    assert cleaned_rmse < noisy_rmse * 0.5
+    assert cleaned_mae < noisy_mae
+
+    repaired_gap = cleaned_df[
+        (cleaned_df["id"] == 170490)
+        & (cleaned_df["time"] == pd.Timestamp("2026-01-01T05:00:00+08:00"))
+    ]["pressure"].iloc[0]
+    assert abs(repaired_gap - 30.62433433532715) < 1.0
+
+    spike_row = cleaned_df[
+        (cleaned_df["id"] == 42563)
+        & (cleaned_df["time"] == pd.Timestamp("2026-01-01T03:45:00+08:00"))
+    ]["pressure"].iloc[0]
+    assert abs(spike_row - 28.018701553344727) < 2.0
+
+
+def test_clean_pressure_data_df_km_accepts_single_sensor_wide_frame_with_utc_strings():
+    module = _load_pressure_cleaning_module()
+    repo_root = Path(__file__).resolve().parents[3]
+
+    noisy_df = pd.read_csv(repo_root / "data" / "node_simulation_noisy.csv")
+    single_sensor = (
+        noisy_df[noisy_df["id"] == 170490][["time", "pressure"]]
+        .rename(columns={"pressure": "170490"})
+        .copy()
+    )
+    single_sensor["time"] = (
+        pd.to_datetime(single_sensor["time"], utc=True).dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+    )
+
+    cleaned_df = module.clean_pressure_data_df_km(single_sensor)
+
+    assert len(cleaned_df) == 192
+    assert cleaned_df["170490"].isna().sum() == 0
@@ -0,0 +1,87 @@
+from datetime import datetime, timezone
+import importlib.util
+from pathlib import Path
+
+import pytest
+
+
+def _load_scada_repository():
+    module_path = (
+        Path(__file__).resolve().parents[2]
+        / "app"
+        / "infra"
+        / "db"
+        / "timescaledb"
+        / "repositories"
+        / "scada.py"
+    )
+    spec = importlib.util.spec_from_file_location("tests_scada_repo_under_test", module_path)
+    module = importlib.util.module_from_spec(spec)
+    assert spec and spec.loader
+    spec.loader.exec_module(module)
+    return module.ScadaRepository
+
+
+class _FakeCursor:
+    def __init__(self, initial_rowcount: int):
+        self.initial_rowcount = initial_rowcount
+        self.rowcount = 0
+        self.calls: list[tuple[str, tuple]] = []
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type, exc, tb):
+        return False
+
+    async def execute(self, query, params):
+        self.calls.append((str(query), params))
+        if len(self.calls) == 1:
+            self.rowcount = self.initial_rowcount
+        else:
+            self.rowcount = 1
+
+
+class _FakeConnection:
+    def __init__(self, initial_rowcount: int):
+        self.cursor_instance = _FakeCursor(initial_rowcount)
+
+    def cursor(self):
+        return self.cursor_instance
+
+
+@pytest.mark.asyncio
+async def test_update_scada_field_inserts_when_update_hits_no_rows():
+    ScadaRepository = _load_scada_repository()
+    conn = _FakeConnection(initial_rowcount=0)
+    point_time = datetime(2026, 1, 1, 0, 0, tzinfo=timezone.utc)
+
+    await ScadaRepository.update_scada_field(
+        conn,
+        point_time,
+        "170490",
+        "cleaned_value",
+        26.5,
+    )
+
+    assert len(conn.cursor_instance.calls) == 2
+    assert "UPDATE scada.scada_data SET" in conn.cursor_instance.calls[0][0]
+    assert "INSERT INTO scada.scada_data" in conn.cursor_instance.calls[1][0]
+
+
+@pytest.mark.asyncio
+async def test_update_scada_field_skips_insert_when_update_succeeds():
+    ScadaRepository = _load_scada_repository()
+    conn = _FakeConnection(initial_rowcount=1)
+    point_time = datetime(2026, 1, 1, 0, 0, tzinfo=timezone.utc)
+
+    await ScadaRepository.update_scada_field(
+        conn,
+        point_time,
+        "170490",
+        "cleaned_value",
+        26.5,
+    )
+
+    assert len(conn.cursor_instance.calls) == 1
+    assert "UPDATE scada.scada_data SET" in conn.cursor_instance.calls[0][0]