优化传感器布置算法,修复数据库更新逻辑

This commit is contained in:
2026-04-17 17:21:50 +08:00
parent bf2aaa5ff7
commit 3b712ea467
7 changed files with 795 additions and 291 deletions
+108
View File
@@ -0,0 +1,108 @@
import importlib.util
import sys
import types
from pathlib import Path
import numpy as np
import pandas as pd
def _load_pressure_cleaning_module():
project_root = Path(__file__).resolve().parents[2]
utils_path = project_root / "app" / "algorithms" / "_utils.py"
pressure_path = project_root / "app" / "algorithms" / "cleaning" / "pressure.py"
app_module = sys.modules.setdefault("app", types.ModuleType("app"))
algorithms_module = sys.modules.setdefault(
"app.algorithms",
types.ModuleType("app.algorithms"),
)
setattr(app_module, "algorithms", algorithms_module)
utils_spec = importlib.util.spec_from_file_location("app.algorithms._utils", utils_path)
assert utils_spec and utils_spec.loader
utils_module = importlib.util.module_from_spec(utils_spec)
sys.modules["app.algorithms._utils"] = utils_module
utils_spec.loader.exec_module(utils_module)
pressure_spec = importlib.util.spec_from_file_location(
"tests_pressure_under_test",
pressure_path,
)
assert pressure_spec and pressure_spec.loader
pressure_module = importlib.util.module_from_spec(pressure_spec)
pressure_spec.loader.exec_module(pressure_module)
return pressure_module
def test_clean_pressure_data_df_km_repairs_long_form_pressure_series():
module = _load_pressure_cleaning_module()
repo_root = Path(__file__).resolve().parents[3]
raw_df = pd.read_csv(repo_root / "data" / "node_simulation.csv")
noisy_df = pd.read_csv(repo_root / "data" / "node_simulation_noisy.csv")
cleaned_df = module.clean_pressure_data_df_km(noisy_df)
for df in (raw_df, noisy_df, cleaned_df):
df["time"] = pd.to_datetime(df["time"])
assert len(cleaned_df) == len(raw_df)
assert set(cleaned_df.columns) == {"time", "id", "pressure"}
assert cleaned_df["pressure"].isna().sum() == 0
noisy_joined = raw_df.merge(noisy_df, on=["time", "id"], how="inner", suffixes=("_raw", "_noisy"))
cleaned_joined = raw_df.merge(
cleaned_df,
on=["time", "id"],
how="inner",
suffixes=("_raw", "_clean"),
)
noisy_rmse = float(
np.sqrt(np.mean((noisy_joined["pressure_raw"] - noisy_joined["pressure_noisy"]) ** 2))
)
cleaned_rmse = float(
np.sqrt(np.mean((cleaned_joined["pressure_raw"] - cleaned_joined["pressure_clean"]) ** 2))
)
noisy_mae = float(
np.mean(np.abs(noisy_joined["pressure_raw"] - noisy_joined["pressure_noisy"]))
)
cleaned_mae = float(
np.mean(np.abs(cleaned_joined["pressure_raw"] - cleaned_joined["pressure_clean"]))
)
assert cleaned_rmse < 0.35
assert cleaned_rmse < noisy_rmse * 0.5
assert cleaned_mae < noisy_mae
repaired_gap = cleaned_df[
(cleaned_df["id"] == 170490)
& (cleaned_df["time"] == pd.Timestamp("2026-01-01T05:00:00+08:00"))
]["pressure"].iloc[0]
assert abs(repaired_gap - 30.62433433532715) < 1.0
spike_row = cleaned_df[
(cleaned_df["id"] == 42563)
& (cleaned_df["time"] == pd.Timestamp("2026-01-01T03:45:00+08:00"))
]["pressure"].iloc[0]
assert abs(spike_row - 28.018701553344727) < 2.0
def test_clean_pressure_data_df_km_accepts_single_sensor_wide_frame_with_utc_strings():
module = _load_pressure_cleaning_module()
repo_root = Path(__file__).resolve().parents[3]
noisy_df = pd.read_csv(repo_root / "data" / "node_simulation_noisy.csv")
single_sensor = (
noisy_df[noisy_df["id"] == 170490][["time", "pressure"]]
.rename(columns={"pressure": "170490"})
.copy()
)
single_sensor["time"] = (
pd.to_datetime(single_sensor["time"], utc=True).dt.strftime("%Y-%m-%dT%H:%M:%SZ")
)
cleaned_df = module.clean_pressure_data_df_km(single_sensor)
assert len(cleaned_df) == 192
assert cleaned_df["170490"].isna().sum() == 0
+87
View File
@@ -0,0 +1,87 @@
from datetime import datetime, timezone
import importlib.util
from pathlib import Path
import pytest
def _load_scada_repository():
module_path = (
Path(__file__).resolve().parents[2]
/ "app"
/ "infra"
/ "db"
/ "timescaledb"
/ "repositories"
/ "scada.py"
)
spec = importlib.util.spec_from_file_location("tests_scada_repo_under_test", module_path)
module = importlib.util.module_from_spec(spec)
assert spec and spec.loader
spec.loader.exec_module(module)
return module.ScadaRepository
class _FakeCursor:
def __init__(self, initial_rowcount: int):
self.initial_rowcount = initial_rowcount
self.rowcount = 0
self.calls: list[tuple[str, tuple]] = []
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc, tb):
return False
async def execute(self, query, params):
self.calls.append((str(query), params))
if len(self.calls) == 1:
self.rowcount = self.initial_rowcount
else:
self.rowcount = 1
class _FakeConnection:
def __init__(self, initial_rowcount: int):
self.cursor_instance = _FakeCursor(initial_rowcount)
def cursor(self):
return self.cursor_instance
@pytest.mark.asyncio
async def test_update_scada_field_inserts_when_update_hits_no_rows():
ScadaRepository = _load_scada_repository()
conn = _FakeConnection(initial_rowcount=0)
point_time = datetime(2026, 1, 1, 0, 0, tzinfo=timezone.utc)
await ScadaRepository.update_scada_field(
conn,
point_time,
"170490",
"cleaned_value",
26.5,
)
assert len(conn.cursor_instance.calls) == 2
assert "UPDATE scada.scada_data SET" in conn.cursor_instance.calls[0][0]
assert "INSERT INTO scada.scada_data" in conn.cursor_instance.calls[1][0]
@pytest.mark.asyncio
async def test_update_scada_field_skips_insert_when_update_succeeds():
ScadaRepository = _load_scada_repository()
conn = _FakeConnection(initial_rowcount=1)
point_time = datetime(2026, 1, 1, 0, 0, tzinfo=timezone.utc)
await ScadaRepository.update_scada_field(
conn,
point_time,
"170490",
"cleaned_value",
26.5,
)
assert len(conn.cursor_instance.calls) == 1
assert "UPDATE scada.scada_data SET" in conn.cursor_instance.calls[0][0]