Merge pull request #2 from OrgTJWater/refactor/app-structure
Refactor/app structure
This commit is contained in:
@@ -0,0 +1,51 @@
|
||||
# TJWater Server 环境变量配置模板
|
||||
# 复制此文件为 .env 并填写实际值
|
||||
ENVIRONMENT="local"
|
||||
NETWORK_NAME="tjwater"
|
||||
# ============================================
|
||||
# 安全配置 (必填)
|
||||
# ============================================
|
||||
|
||||
# JWT 密钥 - 用于生成和验证 Token
|
||||
# 生成方式: openssl rand -hex 32
|
||||
SECRET_KEY=your-secret-key-here-change-in-production-use-openssl-rand-hex-32
|
||||
|
||||
# 数据加密密钥 - 用于敏感数据加密
|
||||
# 生成方式: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
ENCRYPTION_KEY=
|
||||
DATABASE_ENCRYPTION_KEY="rJC2VqLg4KrlSq+DGJcYm869q4v5KB2dFAeuQTe0I50="
|
||||
|
||||
# ============================================
|
||||
# 数据库配置 (PostgreSQL)
|
||||
# ============================================
|
||||
DB_NAME="tjwater"
|
||||
DB_HOST="localhost"
|
||||
DB_PORT="5432"
|
||||
DB_USER="tjwater"
|
||||
DB_PASSWORD="password"
|
||||
|
||||
# ============================================
|
||||
# 数据库配置 (TimescaleDB)
|
||||
# ============================================
|
||||
TIMESCALEDB_DB_NAME="tjwater"
|
||||
TIMESCALEDB_DB_HOST="localhost"
|
||||
TIMESCALEDB_DB_PORT="5433"
|
||||
TIMESCALEDB_DB_USER="tjwater"
|
||||
TIMESCALEDB_DB_PASSWORD="password"
|
||||
|
||||
# ============================================
|
||||
# 元数据数据库配置 (Metadata DB)
|
||||
# ============================================
|
||||
METADATA_DB_NAME="system_hub"
|
||||
METADATA_DB_HOST="localhost"
|
||||
METADATA_DB_PORT="5432"
|
||||
METADATA_DB_USER="tjwater"
|
||||
METADATA_DB_PASSWORD="password"
|
||||
|
||||
# ============================================
|
||||
# Keycloak JWT (可选)
|
||||
# ============================================
|
||||
KEYCLOAK_PUBLIC_KEY="-----BEGIN PUBLIC KEY-----\n...\n-----END PUBLIC KEY-----"
|
||||
KEYCLOAK_ALGORITHM=RS256
|
||||
KEYCLOAK_AUDIENCE="account"
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
# Copilot Instructions for TJWater Server
|
||||
|
||||
This repository contains the backend code for the TJWater Server, a water distribution network management system built with FastAPI.
|
||||
|
||||
## High-Level Architecture
|
||||
|
||||
The application follows a layered architecture:
|
||||
|
||||
- **Entry Point**: `app/main.py` initializes the FastAPI application, database connections (PostgreSQL & TimescaleDB), and middleware.
|
||||
- **API Layer**: `app/api/v1` contains the route handlers.
|
||||
- **Service Layer**: `app/services` contains business logic and orchestration.
|
||||
- **Infrastructure Layer**: `app/infra` handles database connections (`db`), audit logging (`audit`), and external integrations.
|
||||
- **Domain Layer**: `app/domain` likely contains core domain models.
|
||||
- **Native/Algorithms**: `app/native` and `app/algorithms` handle specialized water network calculations (possibly using EPANET/WNTR).
|
||||
|
||||
## Build, Test, and Run Commands
|
||||
|
||||
### Environment Setup
|
||||
|
||||
- Dependencies are listed in `requirements.txt`.
|
||||
- Configuration is managed via environment variables (see `.env.example` if available, or `app/core/config.py`).
|
||||
- **Important**: Ensure `.env` is configured with correct database credentials for both PostgreSQL and TimescaleDB.
|
||||
|
||||
If first time setting up, you may want to create a Conda environment:
|
||||
|
||||
```bash
|
||||
conda create -n server python=3.12
|
||||
conda activate server
|
||||
pip install uv
|
||||
uv pip install -r requirements.txt
|
||||
conda install -c conda-forge pymetis
|
||||
```
|
||||
|
||||
### Running the Server
|
||||
|
||||
The preferred way to run the server locally is using the helper script which sets up the Python path correctly:
|
||||
|
||||
```bash
|
||||
conda activate server
|
||||
python scripts/run_server.py
|
||||
```
|
||||
|
||||
Alternatively, you can run directly with uvicorn (ensure PYTHONPATH includes the root):
|
||||
|
||||
```bash
|
||||
conda activate server
|
||||
uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
||||
```
|
||||
|
||||
### Running Tests
|
||||
|
||||
Use `pytest` to run tests. The `tests/conftest.py` handles path setup.
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run a specific test file
|
||||
pytest tests/unit/test_specific_file.py
|
||||
|
||||
# Run a specific test case
|
||||
pytest tests/unit/test_specific_file.py::test_function_name
|
||||
```
|
||||
|
||||
### Building (Optional)
|
||||
|
||||
The project includes scripts to compile Python modules to `.pyd` files using Cython (see `scripts/build_pyd.py`). This is likely for distribution/performance but not required for standard development.
|
||||
|
||||
## Key Conventions
|
||||
|
||||
- **Async/Await**: The codebase heavily uses `async` and `await` for I/O operations, especially database interactions.
|
||||
- **Database Management**:
|
||||
- Connections are managed globally in `app.infra.db` and initialized in `lifespan` (app/main.py).
|
||||
- Use `app.infra.db.dynamic_manager` for project-specific database connections (multi-tenancy/dynamic projects).
|
||||
- **Pydantic**: extensively used for data validation and settings management.
|
||||
- **Scripts**: The `scripts/` directory contains many utility scripts for maintenance, data processing, and server management. Check there before writing new operational scripts.
|
||||
- **Water Network Modeling**: Interactions with water network models often involve `epanet` or `wntr` libraries. Be aware of domain-specific terminology (nodes, links, junctions, tanks).
|
||||
|
||||
## Code Style
|
||||
|
||||
- Follow standard PEP 8 guidelines.
|
||||
- No specific linter configuration was found, so default to standard Python formatting.
|
||||
+5
-2
@@ -1,6 +1,9 @@
|
||||
*.pyc
|
||||
.env
|
||||
db_inp/
|
||||
temp/
|
||||
data/
|
||||
build/
|
||||
*.pyc
|
||||
.env
|
||||
*.dump
|
||||
.vscode/
|
||||
app/algorithms/health/model/my_survival_forest_model_quxi.joblib
|
||||
|
||||
Vendored
-23
@@ -1,23 +0,0 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"name": "Debug Uvicorn",
|
||||
"module": "uvicorn",
|
||||
"args": [
|
||||
"main:app",
|
||||
"--host",
|
||||
"0.0.0.0",
|
||||
"--port",
|
||||
"8000",
|
||||
"--workers",
|
||||
"1"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
Vendored
-5
@@ -1,5 +0,0 @@
|
||||
{
|
||||
"cSpell.words": [
|
||||
"Fastapi"
|
||||
]
|
||||
}
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
FROM continuumio/miniconda3:latest
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 安装 Python 3.12 和 pymetis (通过 conda-forge 避免编译问题)
|
||||
RUN conda install -y -c conda-forge python=3.12 pymetis && \
|
||||
conda clean -afy
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install uv
|
||||
RUN uv pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 将代码放入子目录 'app',将数据放入子目录 'db_inp'
|
||||
# 这样临时文件默认会生成在 /app 下,而代码在 /app/app 下,实现了分离
|
||||
COPY app ./app
|
||||
COPY db_inp ./db_inp
|
||||
COPY temp ./temp
|
||||
COPY .env .
|
||||
|
||||
# 设置 PYTHONPATH 以便 uvicorn 找到 app 模块
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "4"]
|
||||
@@ -1,4 +0,0 @@
|
||||
当前 适配 szh 项目的分支 是 dingsu/szh
|
||||
|
||||
Binary 适配的是 代码 中dingsu/szh 的部分
|
||||
当前只是把 API目录(也就是TJNetwork的部分)加密了
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,32 +0,0 @@
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
# infile = Path(r"c:\copilot codes\dataclean\Flow_Timedata2025_new_format.csv")
|
||||
# outfile = Path(r"c:\copilot codes\dataclean\szh_flow_scada.csv")
|
||||
|
||||
infile = Path(r"c:\copilot codes\dataclean\Pressure_Timedata2025_new_format.csv")
|
||||
outfile = Path(r"c:\copilot codes\dataclean\szh_pressure_scada.csv")
|
||||
|
||||
with infile.open("r", newline="", encoding="utf-8") as f_in:
|
||||
reader = csv.reader(f_in)
|
||||
rows = list(reader)
|
||||
|
||||
if not rows:
|
||||
print("input file is empty")
|
||||
raise SystemExit(1)
|
||||
|
||||
headers = rows[0]
|
||||
# keep columns whose header does NOT contain 'SB_'
|
||||
keep_indices = [i for i,h in enumerate(headers) if 'SB_' not in h]
|
||||
removed = [h for i,h in enumerate(headers) if 'SB_' in h]
|
||||
|
||||
with outfile.open("w", newline="", encoding="utf-8") as f_out:
|
||||
writer = csv.writer(f_out)
|
||||
for row in rows:
|
||||
# ensure row has same length as headers
|
||||
if len(row) < len(headers):
|
||||
row = row + [''] * (len(headers) - len(row))
|
||||
newrow = [row[i] for i in keep_indices]
|
||||
writer.writerow(newrow)
|
||||
|
||||
print(f"Wrote {outfile} — removed {len(removed)} columns containing 'SB_'.")
|
||||
@@ -0,0 +1,36 @@
|
||||
from app.algorithms.cleaning import flow_data_clean, pressure_data_clean
|
||||
from app.algorithms.sensor import (
|
||||
pressure_sensor_placement_sensitivity,
|
||||
pressure_sensor_placement_kmeans,
|
||||
)
|
||||
from app.algorithms.isolation.valve import valve_isolation_analysis
|
||||
from app.algorithms.leakage import LeakageIdentifier
|
||||
from app.algorithms.health import PipelineHealthAnalyzer
|
||||
from app.algorithms.burst_location import run_burst_location
|
||||
from app.algorithms.simulation.scenarios import (
|
||||
convert_to_local_unit,
|
||||
burst_analysis,
|
||||
valve_close_analysis,
|
||||
flushing_analysis,
|
||||
contaminant_simulation,
|
||||
age_analysis,
|
||||
pressure_regulation,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"flow_data_clean",
|
||||
"pressure_data_clean",
|
||||
"pressure_sensor_placement_sensitivity",
|
||||
"pressure_sensor_placement_kmeans",
|
||||
"convert_to_local_unit",
|
||||
"burst_analysis",
|
||||
"valve_close_analysis",
|
||||
"flushing_analysis",
|
||||
"contaminant_simulation",
|
||||
"age_analysis",
|
||||
"pressure_regulation",
|
||||
"valve_isolation_analysis",
|
||||
"LeakageIdentifier",
|
||||
"PipelineHealthAnalyzer",
|
||||
"run_burst_location",
|
||||
]
|
||||
@@ -0,0 +1,88 @@
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def fill_time_gaps(
|
||||
data: pd.DataFrame,
|
||||
time_col: str = "time",
|
||||
freq: str = "1min",
|
||||
short_gap_threshold: int = 10,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
补齐缺失时间戳并填补数据缺口。
|
||||
|
||||
Args:
|
||||
data: 包含时间列的 DataFrame
|
||||
time_col: 时间列名(默认 'time')
|
||||
freq: 重采样频率(默认 '1min')
|
||||
short_gap_threshold: 短缺口阈值(分钟),<=此值用线性插值,>此值用前向填充
|
||||
|
||||
Returns:
|
||||
补齐时间后的 DataFrame(保留原时间列格式)
|
||||
"""
|
||||
if time_col not in data.columns:
|
||||
raise ValueError(f"时间列 '{time_col}' 不存在于数据中")
|
||||
|
||||
# 解析时间列并设为索引
|
||||
data = data.copy()
|
||||
data[time_col] = pd.to_datetime(data[time_col], utc=True)
|
||||
data_indexed = data.set_index(time_col)
|
||||
|
||||
# 生成完整时间范围
|
||||
full_range = pd.date_range(
|
||||
start=data_indexed.index.min(), end=data_indexed.index.max(), freq=freq
|
||||
)
|
||||
|
||||
# 重索引以补齐缺失时间点,同时保留原始时间戳
|
||||
combined_index = data_indexed.index.union(full_range).sort_values().unique()
|
||||
data_reindexed = data_indexed.reindex(combined_index)
|
||||
|
||||
# 按列处理缺口
|
||||
for col in data_reindexed.columns:
|
||||
# 识别缺失值位置
|
||||
is_missing = data_reindexed[col].isna()
|
||||
|
||||
# 计算连续缺失的长度
|
||||
missing_groups = (is_missing != is_missing.shift()).cumsum()
|
||||
gap_lengths = is_missing.groupby(missing_groups).transform("sum")
|
||||
|
||||
# 短缺口:时间插值
|
||||
short_gap_mask = is_missing & (gap_lengths <= short_gap_threshold)
|
||||
if short_gap_mask.any():
|
||||
data_reindexed.loc[short_gap_mask, col] = (
|
||||
data_reindexed[col]
|
||||
.interpolate(method="time", limit_area="inside")
|
||||
.loc[short_gap_mask]
|
||||
)
|
||||
|
||||
# 长缺口:前向填充
|
||||
long_gap_mask = is_missing & (gap_lengths > short_gap_threshold)
|
||||
if long_gap_mask.any():
|
||||
data_reindexed.loc[long_gap_mask, col] = (
|
||||
data_reindexed[col].ffill().loc[long_gap_mask]
|
||||
)
|
||||
|
||||
# 重置索引并恢复时间列(保留原格式)
|
||||
data_result = data_reindexed.reset_index()
|
||||
data_result.rename(columns={"index": time_col}, inplace=True)
|
||||
|
||||
# 保留时区信息
|
||||
data_result[time_col] = data_result[time_col].dt.strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||
# 修正时区格式(Python的%z输出为+0000,需转为+00:00)
|
||||
data_result[time_col] = data_result[time_col].str.replace(
|
||||
r"(\+\d{2})(\d{2})$", r"\1:\2", regex=True
|
||||
)
|
||||
|
||||
return data_result
|
||||
|
||||
|
||||
def _cleanup_temp_files(prefix: str) -> None:
|
||||
"""清理 EPANET 仿真产生的临时文件。"""
|
||||
for ext in [".inp", ".rpt", ".bin", ".out"]:
|
||||
temp_file = prefix + ext
|
||||
if os.path.exists(temp_file):
|
||||
try:
|
||||
os.remove(temp_file)
|
||||
except OSError:
|
||||
pass
|
||||
@@ -0,0 +1,3 @@
|
||||
from app.algorithms.burst_detection.burst_detector import BurstDetector
|
||||
|
||||
__all__ = ["BurstDetector"]
|
||||
@@ -0,0 +1,259 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy.fft import fft, ifft
|
||||
from sklearn.ensemble import IsolationForest
|
||||
|
||||
PressureDataInput = (
|
||||
pd.DataFrame
|
||||
| dict[str, list[Any]]
|
||||
| list[dict[str, Any]]
|
||||
| list[list[Any]]
|
||||
| np.ndarray
|
||||
)
|
||||
IGNORED_OBSERVATION_COLUMNS = {"time", "timestamp", "datetime", "date"}
|
||||
|
||||
|
||||
class BurstDetector:
|
||||
"""FFT + IsolationForest based burst detection for daily aligned pressure data."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
mu: int = 100,
|
||||
points_per_day: int = 1440,
|
||||
iforest_params: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
if points_per_day <= 0:
|
||||
raise ValueError("points_per_day 必须大于 0。")
|
||||
if mu <= 0:
|
||||
raise ValueError("mu 必须大于 0。")
|
||||
|
||||
self.mu = int(mu)
|
||||
self.points_per_day = int(points_per_day)
|
||||
self.iforest_params = {
|
||||
"n_estimators": 50,
|
||||
"random_state": 42,
|
||||
"contamination": "auto",
|
||||
}
|
||||
if iforest_params:
|
||||
self.iforest_params.update(iforest_params)
|
||||
|
||||
self.data: np.ndarray | None = None
|
||||
self.sensor_names: list[str] = []
|
||||
self.high_freq_features: np.ndarray | None = None
|
||||
|
||||
def load_data(
|
||||
self,
|
||||
data_source: PressureDataInput,
|
||||
*,
|
||||
sensor_nodes: list[str] | None = None,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
标准化输入观测数据为 DataFrame。
|
||||
|
||||
支持的 `data_source` 格式:
|
||||
- `pd.DataFrame`
|
||||
每一列代表一个传感器,每一行代表一个时间点。
|
||||
- `dict[str, list[Any]]`
|
||||
键为传感器 ID,值为该传感器按时间顺序排列的压力序列。
|
||||
例如:`{"J1": [101.2, 101.0], "J2": [99.8, 99.7]}`。
|
||||
- `list[dict[str, Any]]`
|
||||
每个字典代表一个时间点,键为传感器 ID,值为该时刻压力。
|
||||
例如:`[{"J1": 101.2, "J2": 99.8}, {"J1": 101.0, "J2": 99.7}]`。
|
||||
- `list[list[Any]]`
|
||||
二维列表,格式为 `(时间点数, 传感器数)`。
|
||||
例如:`[[101.2, 99.8], [101.0, 99.7]]`。
|
||||
- `np.ndarray`
|
||||
二维数组,形状必须为 `(时间点数, 传感器数)`。
|
||||
|
||||
参数:
|
||||
- `sensor_nodes`:
|
||||
可选的传感器列筛选列表。传入后,数据中必须包含这些列名。
|
||||
|
||||
返回:
|
||||
- 标准化后的 `pd.DataFrame`,列为传感器,行为时间点。
|
||||
"""
|
||||
if isinstance(data_source, np.ndarray):
|
||||
observation_df = pd.DataFrame(data_source)
|
||||
elif isinstance(data_source, pd.DataFrame):
|
||||
observation_df = data_source.copy()
|
||||
else:
|
||||
observation_df = pd.DataFrame(data_source)
|
||||
|
||||
return self._normalize_observation_frame(
|
||||
observation_df=observation_df, sensor_nodes=sensor_nodes
|
||||
)
|
||||
|
||||
def process(
|
||||
self,
|
||||
observed_pressure_data: PressureDataInput,
|
||||
*,
|
||||
sensor_nodes: list[str] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
对输入压力序列按天切片,并提取每天末时刻的高频特征。
|
||||
|
||||
`observed_pressure_data` 的格式与 `load_data()` 一致,统一要求:
|
||||
- 数据必须表示为“行=时间点、列=传感器”。
|
||||
- 总行数必须是 `points_per_day` 的整数倍。
|
||||
- 至少需要 2 天数据,即总行数 `>= 2 * points_per_day`。
|
||||
|
||||
例如:
|
||||
- 当 `points_per_day=1440` 时,15 天数据的形状通常为 `(21600, 传感器数)`。
|
||||
- 若传入 `sensor_nodes=["J1", "J2"]`,则输入中必须存在 `J1/J2` 两列。
|
||||
|
||||
返回:
|
||||
- `np.ndarray`,形状为 `(天数, 传感器数)`,
|
||||
每个值表示对应传感器在当天末时刻提取出的高频分量。
|
||||
"""
|
||||
observation_df = self.load_data(
|
||||
observed_pressure_data,
|
||||
sensor_nodes=sensor_nodes,
|
||||
)
|
||||
matrix = observation_df.to_numpy(dtype=float)
|
||||
total_points, sensor_count = matrix.shape
|
||||
if sensor_count == 0:
|
||||
raise ValueError("压力观测数据中未找到可用传感器列。")
|
||||
if total_points < self.points_per_day * 2:
|
||||
raise ValueError("至少需要 2 天的观测数据才能执行爆管侦测。")
|
||||
if total_points % self.points_per_day != 0:
|
||||
raise ValueError("观测数据长度必须能被每日采样点数整除,以便按天切分。")
|
||||
|
||||
day_count = total_points // self.points_per_day
|
||||
high_freq_features = np.zeros((day_count, sensor_count), dtype=float)
|
||||
|
||||
for sensor_idx in range(sensor_count):
|
||||
sensor_series = matrix[:, sensor_idx]
|
||||
for day_idx in range(day_count):
|
||||
start = day_idx * self.points_per_day
|
||||
end = (day_idx + 1) * self.points_per_day
|
||||
day_data = sensor_series[start:end]
|
||||
mirrored_data = np.concatenate([day_data, day_data[::-1]])
|
||||
transformed = fft(mirrored_data)
|
||||
transformed[self.mu : len(mirrored_data) - self.mu + 1] = 0
|
||||
low_freq = ifft(transformed).real
|
||||
high_freq = day_data - low_freq[: self.points_per_day]
|
||||
high_freq_features[day_idx, sensor_idx] = float(high_freq[-1])
|
||||
|
||||
self.data = matrix
|
||||
self.sensor_names = [str(column) for column in observation_df.columns]
|
||||
self.high_freq_features = high_freq_features
|
||||
return high_freq_features
|
||||
|
||||
def detect(self) -> pd.DataFrame:
|
||||
if self.high_freq_features is None:
|
||||
raise ValueError("特征未提取。请先调用 process()。")
|
||||
|
||||
day_count = self.high_freq_features.shape[0]
|
||||
if day_count < 2:
|
||||
raise ValueError("孤立森林至少需要 2 天特征数据。")
|
||||
|
||||
clf = IsolationForest(
|
||||
n_estimators=self.iforest_params.get("n_estimators", 50),
|
||||
max_samples=day_count,
|
||||
random_state=self.iforest_params.get("random_state", 42),
|
||||
contamination=self.iforest_params.get("contamination", "auto"),
|
||||
**{
|
||||
key: value
|
||||
for key, value in self.iforest_params.items()
|
||||
if key not in {"n_estimators", "random_state", "contamination"}
|
||||
},
|
||||
)
|
||||
clf.fit(self.high_freq_features)
|
||||
|
||||
scores = clf.decision_function(self.high_freq_features)
|
||||
predictions = clf.predict(self.high_freq_features)
|
||||
result_df = pd.DataFrame(
|
||||
{
|
||||
"Day": range(1, day_count + 1),
|
||||
"Score": scores.astype(float),
|
||||
"Prediction": predictions.astype(int),
|
||||
}
|
||||
)
|
||||
result_df["IsBurst"] = result_df["Prediction"].eq(-1)
|
||||
result_df.attrs["sensor_nodes"] = self.sensor_names.copy()
|
||||
result_df.attrs["high_freq_features"] = self.high_freq_features.copy()
|
||||
result_df.attrs["day_count"] = day_count
|
||||
result_df.attrs["points_per_day"] = self.points_per_day
|
||||
result_df.attrs["sample_count"] = (
|
||||
int(self.data.shape[0]) if self.data is not None else 0
|
||||
)
|
||||
return result_df
|
||||
|
||||
def run_detection(
|
||||
self,
|
||||
observed_pressure_data: PressureDataInput,
|
||||
*,
|
||||
sensor_nodes: list[str] | None = None,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
执行完整爆管侦测流程。
|
||||
|
||||
输入格式与 `process()` 相同:
|
||||
- `DataFrame` / `dict[str, list[Any]]` / `list[dict[str, Any]]` / `list[list[Any]]` / `np.ndarray`
|
||||
- 行表示时间点,列表示传感器
|
||||
- 总行数必须能被 `points_per_day` 整除
|
||||
|
||||
返回结果包含列:
|
||||
- `Day`: 第几天(从 1 开始)
|
||||
- `Score`: IsolationForest 异常分数,越小越异常
|
||||
- `Prediction`: `-1` 表示异常,`1` 表示正常
|
||||
- `IsBurst`: 是否判定为异常日
|
||||
"""
|
||||
self.process(observed_pressure_data, sensor_nodes=sensor_nodes)
|
||||
return self.detect()
|
||||
|
||||
@staticmethod
|
||||
def _normalize_observation_frame(
|
||||
*,
|
||||
observation_df: pd.DataFrame,
|
||||
sensor_nodes: list[str] | None,
|
||||
) -> pd.DataFrame:
|
||||
if observation_df.empty:
|
||||
raise ValueError("压力观测数据为空。")
|
||||
|
||||
normalized_df = observation_df.copy()
|
||||
normalized_df.columns = [str(column) for column in normalized_df.columns]
|
||||
normalized_df = normalized_df.drop(
|
||||
columns=[
|
||||
column
|
||||
for column in normalized_df.columns
|
||||
if column.lower() in IGNORED_OBSERVATION_COLUMNS
|
||||
or column.lower().startswith("unnamed:")
|
||||
],
|
||||
errors="ignore",
|
||||
)
|
||||
|
||||
if sensor_nodes:
|
||||
selected_columns = [str(node) for node in sensor_nodes]
|
||||
missing_columns = [
|
||||
column
|
||||
for column in selected_columns
|
||||
if column not in normalized_df.columns
|
||||
]
|
||||
if missing_columns:
|
||||
preview = ", ".join(missing_columns[:10])
|
||||
raise ValueError(f"观测数据缺少传感器列: {preview}")
|
||||
normalized_df = normalized_df.loc[:, selected_columns]
|
||||
else:
|
||||
candidate_df = normalized_df.apply(pd.to_numeric, errors="coerce")
|
||||
normalized_df = candidate_df.loc[:, candidate_df.notna().any(axis=0)]
|
||||
|
||||
if normalized_df.empty:
|
||||
raise ValueError("未识别到可用的数值型压力观测列。")
|
||||
|
||||
normalized_df = normalized_df.apply(pd.to_numeric, errors="coerce")
|
||||
invalid_columns = [
|
||||
column
|
||||
for column in normalized_df.columns
|
||||
if normalized_df[column].isna().any()
|
||||
]
|
||||
if invalid_columns:
|
||||
preview = ", ".join(invalid_columns[:10])
|
||||
raise ValueError(f"压力观测数据包含非数值或缺失值: {preview}")
|
||||
|
||||
return normalized_df.reset_index(drop=True)
|
||||
@@ -0,0 +1,3 @@
|
||||
from .burst_location import run_burst_location
|
||||
|
||||
__all__ = ["run_burst_location"]
|
||||
@@ -0,0 +1,342 @@
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
from multiprocessing import cpu_count
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from app.algorithms.burst_location import leak_simulator
|
||||
|
||||
from .burst_locator import (
|
||||
DN_search_multi_simple_add_flow_count_new,
|
||||
)
|
||||
from .network_model import (
|
||||
_build_node_pipe_maps,
|
||||
cal_node_coordinate,
|
||||
construct_graph,
|
||||
load_inp,
|
||||
read_inf_inp,
|
||||
read_inf_inp_other,
|
||||
)
|
||||
|
||||
DEFAULT_N_WORKERS = max(1, min(cpu_count() - 1, 4))
|
||||
# DEFAULT_N_WORKERS = max(1, cpu_count() - 1)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _read_id_list_json(path):
|
||||
if path is None:
|
||||
return None
|
||||
data = json.loads(Path(path).read_text(encoding="utf-8"))
|
||||
if isinstance(data, list):
|
||||
return [str(item) for item in data]
|
||||
if isinstance(data, dict):
|
||||
if "ids" in data and isinstance(data["ids"], list):
|
||||
return [str(item) for item in data["ids"]]
|
||||
raise ValueError(f"ID JSON must be list or dict with key 'ids': {path}")
|
||||
raise ValueError(f"Unsupported ID JSON format: {path}")
|
||||
|
||||
|
||||
def _read_series_csv(path):
|
||||
if path is None:
|
||||
return None
|
||||
df = pd.read_csv(path)
|
||||
if df.shape[1] < 2:
|
||||
raise ValueError(f"CSV must contain at least two columns (id,value): {path}")
|
||||
if {"id", "value"}.issubset(df.columns):
|
||||
id_col, value_col = "id", "value"
|
||||
else:
|
||||
id_col, value_col = df.columns[0], df.columns[1]
|
||||
series = pd.Series(
|
||||
df[value_col].values, index=df[id_col].astype(str).values, dtype=float
|
||||
)
|
||||
return series
|
||||
|
||||
|
||||
def _align_scada_series(
|
||||
series: pd.Series, ids: Iterable[str], series_name: str
|
||||
) -> pd.Series:
|
||||
ids = [str(item) for item in ids]
|
||||
aligned = series.copy()
|
||||
aligned.index = aligned.index.map(str)
|
||||
missing_ids = [item for item in ids if item not in aligned.index]
|
||||
if missing_ids:
|
||||
preview = ", ".join(missing_ids[:10])
|
||||
raise ValueError(f"{series_name} missing IDs: {preview}")
|
||||
aligned = pd.to_numeric(aligned.loc[ids], errors="coerce")
|
||||
invalid_ids = aligned[aligned.isna()].index.tolist()
|
||||
if invalid_ids:
|
||||
preview = ", ".join(invalid_ids[:10])
|
||||
raise ValueError(
|
||||
f"{series_name} contains non-numeric values for IDs: {preview}"
|
||||
)
|
||||
return aligned
|
||||
|
||||
|
||||
def _validate_flow_inputs(
|
||||
flow_scada_ids: list[str] | None,
|
||||
burst_flow: pd.Series | None,
|
||||
normal_flow: pd.Series | None,
|
||||
) -> tuple[bool, list[str]]:
|
||||
has_any_flow = any(
|
||||
value is not None for value in [flow_scada_ids, burst_flow, normal_flow]
|
||||
)
|
||||
has_all_flow = all(
|
||||
value is not None for value in [flow_scada_ids, burst_flow, normal_flow]
|
||||
)
|
||||
if has_any_flow and not has_all_flow:
|
||||
raise ValueError(
|
||||
"flow_scada_ids, burst_flow, and normal_flow must be provided together."
|
||||
)
|
||||
|
||||
if not has_all_flow:
|
||||
return False, []
|
||||
|
||||
flow_ids = [str(item) for item in (flow_scada_ids or [])]
|
||||
if len(flow_ids) == 0:
|
||||
raise ValueError("flow_scada_ids cannot be empty when flow data is provided.")
|
||||
return True, flow_ids
|
||||
|
||||
|
||||
def _build_top_candidates(similarity_series: pd.Series) -> list[dict[str, Any]]:
|
||||
top_series = similarity_series.iloc[:10]
|
||||
return [
|
||||
{"pipe_id": str(pipe_id), "similarity": float(score)}
|
||||
for pipe_id, score in top_series.items()
|
||||
]
|
||||
|
||||
|
||||
def run_burst_location(
|
||||
wn_inp_path: str,
|
||||
pressure_scada_ids: list[str],
|
||||
burst_pressure: pd.Series,
|
||||
normal_pressure: pd.Series,
|
||||
burst_leakage: float,
|
||||
flow_scada_ids: list[str] | None = None,
|
||||
burst_flow: pd.Series | None = None,
|
||||
normal_flow: pd.Series | None = None,
|
||||
min_dpressure: float = 2.0,
|
||||
basic_pressure: float = 10.0,
|
||||
n_workers: int = DEFAULT_N_WORKERS,
|
||||
partition_on_full_graph: bool = True,
|
||||
visualize_partition: bool = True,
|
||||
visualize_pause_seconds: float = 0.3,
|
||||
final_candidates_csv_path: (
|
||||
str | None
|
||||
) = "temp/burst_location/final_round_candidates.csv",
|
||||
) -> dict[str, Any]:
|
||||
if pressure_scada_ids is None or len(pressure_scada_ids) == 0:
|
||||
raise ValueError("pressure_scada_ids cannot be empty.")
|
||||
if burst_pressure is None or normal_pressure is None:
|
||||
raise ValueError("burst_pressure and normal_pressure are required.")
|
||||
|
||||
has_all_flow, flow_ids = _validate_flow_inputs(
|
||||
flow_scada_ids=flow_scada_ids,
|
||||
burst_flow=burst_flow,
|
||||
normal_flow=normal_flow,
|
||||
)
|
||||
|
||||
inp_path = Path(wn_inp_path)
|
||||
wn = load_inp(
|
||||
inp_name=inp_path.name,
|
||||
inp_location=str(inp_path.parent) + "/",
|
||||
inp_time=0,
|
||||
driven_mode="PDD",
|
||||
require_p=float(basic_pressure),
|
||||
minimum_p=0.0,
|
||||
)
|
||||
|
||||
(
|
||||
all_node,
|
||||
_,
|
||||
node_coordinates,
|
||||
all_pipe,
|
||||
_,
|
||||
_,
|
||||
pipe_length,
|
||||
pipe_diameter,
|
||||
) = read_inf_inp(wn)
|
||||
|
||||
candidate_pipe, _ = leak_simulator.cal_possible_pipe(
|
||||
burst_leakage, all_pipe, pipe_diameter
|
||||
)
|
||||
|
||||
_, pipe_start_node_all, pipe_end_node_all = read_inf_inp_other(wn)
|
||||
node_x, node_y = cal_node_coordinate(all_node, node_coordinates)
|
||||
G0 = construct_graph(wn)
|
||||
node_pipe_dic, couple_node_length = _build_node_pipe_maps(
|
||||
all_node,
|
||||
all_pipe,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
pipe_length,
|
||||
)
|
||||
all_node_series = pd.Series(range(len(all_node)), index=all_node)
|
||||
|
||||
pressure_ids = [str(item) for item in pressure_scada_ids]
|
||||
normal_pressure_aligned = _align_scada_series(
|
||||
normal_pressure, pressure_ids, "normal_pressure"
|
||||
)
|
||||
burst_pressure_aligned = _align_scada_series(
|
||||
burst_pressure, pressure_ids, "burst_pressure"
|
||||
)
|
||||
pressure_normal = normal_pressure_aligned.to_frame().T
|
||||
pressure_monitor = burst_pressure_aligned.to_frame().T
|
||||
pressure_predict = pressure_normal.copy()
|
||||
timestep_list = list(pressure_normal.index)
|
||||
|
||||
if has_all_flow:
|
||||
normal_flow_aligned = _align_scada_series(normal_flow, flow_ids, "normal_flow")
|
||||
burst_flow_aligned = _align_scada_series(burst_flow, flow_ids, "burst_flow")
|
||||
flow_normal = normal_flow_aligned.to_frame().T
|
||||
flow_monitor = burst_flow_aligned.to_frame().T
|
||||
flow_predict = flow_normal.copy()
|
||||
similarity_mode = "CDF"
|
||||
max_flow = flow_normal.iloc[0, :].abs()
|
||||
else:
|
||||
flow_normal = pd.DataFrame(index=timestep_list)
|
||||
flow_monitor = pd.DataFrame(index=timestep_list)
|
||||
flow_predict = pd.DataFrame(index=timestep_list)
|
||||
similarity_mode = "CAD_new_gy"
|
||||
max_flow = pd.Series(dtype=float)
|
||||
|
||||
stage_timing: dict[str, Any] = {}
|
||||
try:
|
||||
(
|
||||
located_pipe,
|
||||
elapsed_seconds,
|
||||
simulation_times,
|
||||
_,
|
||||
similarity_series,
|
||||
exit_condition,
|
||||
final_candidates_csv,
|
||||
) = DN_search_multi_simple_add_flow_count_new(
|
||||
wn=wn,
|
||||
wn_inp_path=str(inp_path),
|
||||
G0=G0,
|
||||
all_node=all_node,
|
||||
node_x=node_x,
|
||||
node_y=node_y,
|
||||
pipe_start_node_all=pipe_start_node_all,
|
||||
pipe_end_node_all=pipe_end_node_all,
|
||||
pipe_diameter=pipe_diameter,
|
||||
couple_node_length=couple_node_length,
|
||||
node_pipe_dic=node_pipe_dic,
|
||||
all_node_series=all_node_series,
|
||||
top_group_ratio=0.3,
|
||||
top_pipe_num_max=80,
|
||||
top_pipe_num_min=10,
|
||||
candidate_pipe_input_initial=candidate_pipe,
|
||||
similarity_mode=similarity_mode,
|
||||
pressure_monitor=pressure_monitor,
|
||||
pressure_predict=pressure_predict,
|
||||
pressure_normal=pressure_normal,
|
||||
pressure_leak_all=None,
|
||||
flow_monitor=flow_monitor,
|
||||
flow_predict=flow_predict,
|
||||
flow_normal=flow_normal,
|
||||
flow_leak_all=None,
|
||||
timestep_list=timestep_list,
|
||||
max_flow=max_flow,
|
||||
group_basic_num=30,
|
||||
Top_sensor_num=min(5, len(pressure_ids)),
|
||||
if_gy=0,
|
||||
pressure_threshold=float(min_dpressure),
|
||||
leak_mag=float(burst_leakage),
|
||||
n_workers=max(1, int(n_workers)),
|
||||
stage_timing=stage_timing,
|
||||
partition_on_full_graph=partition_on_full_graph,
|
||||
visualize_partition=visualize_partition,
|
||||
visualize_pause_seconds=visualize_pause_seconds,
|
||||
final_candidates_csv_path=final_candidates_csv_path,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception("Burst location algorithm execution failed.")
|
||||
raise RuntimeError(f"Failed to run burst location algorithm: {exc}") from exc
|
||||
|
||||
return {
|
||||
"located_pipe": located_pipe,
|
||||
"burst_leakage": float(burst_leakage),
|
||||
"elapsed_seconds": elapsed_seconds,
|
||||
"simulation_times": int(simulation_times),
|
||||
"top_candidates": _build_top_candidates(similarity_series),
|
||||
"similarity_mode": similarity_mode,
|
||||
"exit_condition": exit_condition,
|
||||
"final_candidates_csv": final_candidates_csv,
|
||||
"stage_timing_seconds": stage_timing,
|
||||
}
|
||||
|
||||
|
||||
def _parse_args():
|
||||
parser = argparse.ArgumentParser(description="爆管定位主函数入口")
|
||||
parser.add_argument("--wn-inp", required=True, help="EPANET inp 文件路径")
|
||||
parser.add_argument(
|
||||
"--pressure-ids-json", required=True, help="压力SCADA ID列表 JSON 文件"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--flow-ids-json", default=None, help="(可选)流量SCADA ID列表 JSON 文件"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--burst-pressure-csv", required=True, help="爆管时压力 CSV(id,value)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--normal-pressure-csv", required=True, help="正常时压力 CSV(id,value)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--burst-flow-csv", default=None, help="(可选)爆管时流量 CSV(id,value)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--normal-flow-csv", default=None, help="(可选)正常时流量 CSV(id,value)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--burst-leakage", type=float, required=True, help="爆管漏损流量"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--min-dpressure",
|
||||
type=float,
|
||||
default=2.0,
|
||||
help="(可选)最小压降阈值,默认 2.0",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--basic-pressure",
|
||||
type=float,
|
||||
default=10.0,
|
||||
help="(可选)基础服务压力,默认 10.0",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--n-workers",
|
||||
type=int,
|
||||
default=DEFAULT_N_WORKERS,
|
||||
help="(可选)特征中心模拟进程数,默认 max(1, min(cpu_count()-1, 4))",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--final-candidates-csv-path",
|
||||
default="temp/burst_location/final_round_candidates.csv",
|
||||
help="(可选)最后一轮候选管道明细 CSV 输出路径",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = _parse_args()
|
||||
result = run_burst_location(
|
||||
wn_inp_path=args.wn_inp,
|
||||
pressure_scada_ids=_read_id_list_json(args.pressure_ids_json),
|
||||
burst_pressure=_read_series_csv(args.burst_pressure_csv),
|
||||
normal_pressure=_read_series_csv(args.normal_pressure_csv),
|
||||
burst_leakage=args.burst_leakage,
|
||||
flow_scada_ids=_read_id_list_json(args.flow_ids_json),
|
||||
burst_flow=_read_series_csv(args.burst_flow_csv),
|
||||
normal_flow=_read_series_csv(args.normal_flow_csv),
|
||||
min_dpressure=args.min_dpressure,
|
||||
basic_pressure=args.basic_pressure,
|
||||
n_workers=args.n_workers,
|
||||
final_candidates_csv_path=args.final_candidates_csv_path,
|
||||
)
|
||||
print(json.dumps(result, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,772 @@
|
||||
"""爆管定位主模块。"""
|
||||
|
||||
import copy
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from time import perf_counter
|
||||
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from .leak_simulator import cal_signature_pipe_multi_pf
|
||||
from .network_partitioner import (
|
||||
cal_group_num,
|
||||
metis_grouping_pipe_weight,
|
||||
visualize_metis_partition,
|
||||
)
|
||||
from .similarity_calculator import (
|
||||
adjust_ratio,
|
||||
cal_similarity_all_multi_new_sq_improve_double_lzr,
|
||||
decode_mode,
|
||||
extra_judge,
|
||||
update_similarity,
|
||||
)
|
||||
|
||||
|
||||
def _ensure_signatures_for_centers(
|
||||
wn,
|
||||
wn_inp_path,
|
||||
center_list, # 本轮要用到的中心(list[str])
|
||||
pressure_leak_all,
|
||||
flow_leak_all, # 全量缓存(可为空 DF)
|
||||
timestep_list, # 你现有的时序列表
|
||||
pressure_monitor,
|
||||
flow_monitor, # 用来推断传感器列名
|
||||
leak_mag,
|
||||
n_workers=1,
|
||||
):
|
||||
"""
|
||||
只为缺失的中心补算 SLF(调用你现有的 cal_signature_pipe_multi_pf),
|
||||
并把补算结果并回缓存。返回:
|
||||
pressure_leak_subset, flow_leak_subset, pressure_leak_all_new, flow_leak_all_new
|
||||
其中 subset 只包含 center_list 的行(顺序与 center_list 保持一致)。
|
||||
"""
|
||||
|
||||
center_list = _dedupe_preserve_order(center_list)
|
||||
|
||||
# 1) 推断传感器列名(与现有数据保持一致)
|
||||
sensor_name_all = list(pressure_monitor.columns)
|
||||
sensor_f_name_all = (
|
||||
list(flow_monitor.columns)
|
||||
if (flow_monitor is not None and hasattr(flow_monitor, "columns"))
|
||||
else []
|
||||
)
|
||||
|
||||
# 2) 取出缓存里已经有的中心(考虑 MultiIndex 的第 0 层为 pipe)
|
||||
def _existing_pipes(df):
|
||||
if df is None or len(df) == 0:
|
||||
return set()
|
||||
idx = df.index
|
||||
if isinstance(idx, pd.MultiIndex):
|
||||
return set(idx.get_level_values(0))
|
||||
else:
|
||||
return set(idx)
|
||||
|
||||
exist_p = _existing_pipes(pressure_leak_all)
|
||||
need = [p for p in center_list if p not in exist_p]
|
||||
|
||||
# 3) 若有缺失中心,仅为这些中心补算一次
|
||||
if len(need) > 0:
|
||||
p_new, _ = cal_signature_pipe_multi_pf(
|
||||
wn,
|
||||
leak_mag,
|
||||
need,
|
||||
timestep_list,
|
||||
sensor_name_all,
|
||||
n_workers=n_workers,
|
||||
wn_inp_path=wn_inp_path,
|
||||
)
|
||||
# 初始化空缓存时,做一次“同构化”
|
||||
if pressure_leak_all is None or len(pressure_leak_all) == 0:
|
||||
pressure_leak_all = p_new
|
||||
else:
|
||||
pressure_leak_all = pd.concat([pressure_leak_all, p_new], axis=0)
|
||||
# if (flow_leak_all is None or len(flow_leak_all) == 0) and f_new is not None:
|
||||
# flow_leak_all = f_new
|
||||
# elif f_new is not None:
|
||||
# flow_leak_all = pd.concat([flow_leak_all, f_new], axis=0)
|
||||
|
||||
# 去重(如果既有缓存里不小心有重复中心)
|
||||
if isinstance(pressure_leak_all.index, pd.MultiIndex):
|
||||
pressure_leak_all = pressure_leak_all[
|
||||
~pressure_leak_all.index.duplicated(keep="last")
|
||||
]
|
||||
if flow_leak_all is not None and len(flow_leak_all) > 0:
|
||||
flow_leak_all = flow_leak_all[
|
||||
~flow_leak_all.index.duplicated(keep="last")
|
||||
]
|
||||
else:
|
||||
pressure_leak_all = pressure_leak_all[
|
||||
~pressure_leak_all.index.duplicated(keep="last")
|
||||
]
|
||||
if flow_leak_all is not None and len(flow_leak_all) > 0:
|
||||
flow_leak_all = flow_leak_all[
|
||||
~flow_leak_all.index.duplicated(keep="last")
|
||||
]
|
||||
|
||||
# 4) 从更新后的缓存里,取出这轮需要的中心子集(顺序与 center_list 一致)
|
||||
if isinstance(pressure_leak_all.index, pd.MultiIndex):
|
||||
pressure_subset = pressure_leak_all.loc[center_list]
|
||||
flow_subset = (
|
||||
flow_leak_all.loc[center_list]
|
||||
if (flow_leak_all is not None and len(flow_leak_all) > 0)
|
||||
else None
|
||||
)
|
||||
else:
|
||||
pressure_subset = pressure_leak_all.loc[center_list, :]
|
||||
flow_subset = (
|
||||
flow_leak_all.loc[center_list, :]
|
||||
if (flow_leak_all is not None and len(flow_leak_all) > 0)
|
||||
else None
|
||||
)
|
||||
|
||||
return pressure_subset, flow_subset, pressure_leak_all, flow_leak_all
|
||||
|
||||
|
||||
def area_output_num_ki_improve(
|
||||
candidate_center,
|
||||
candidate_group,
|
||||
similarity,
|
||||
new_all_node,
|
||||
top_group_ratio,
|
||||
top_pipe_num_max,
|
||||
top_pipe_num_min,
|
||||
cut_ratio,
|
||||
):
|
||||
final_area = []
|
||||
final_center = []
|
||||
all_node_iter = []
|
||||
if similarity.index.is_unique == False:
|
||||
total_center_num = len(set(similarity.index))
|
||||
else:
|
||||
total_center_num = len(similarity.index)
|
||||
|
||||
next_group_num = min(
|
||||
total_center_num, math.ceil(total_center_num / cut_ratio * top_group_ratio)
|
||||
)
|
||||
|
||||
for i in range(next_group_num):
|
||||
top_center = similarity.index[i]
|
||||
top_center_index = find_list_repeat(candidate_center, top_center)
|
||||
for j in range(len(top_center_index)):
|
||||
final_area = final_area + candidate_group[top_center_index[j]]
|
||||
all_node_iter = all_node_iter + list(new_all_node[top_center_index[j]])
|
||||
final_center.append(top_center)
|
||||
final_area = sorted(set(final_area))
|
||||
|
||||
if len(final_area) > top_pipe_num_max:
|
||||
if_end = 0
|
||||
elif len(final_area) > top_pipe_num_min:
|
||||
if_end = 1
|
||||
elif total_center_num == next_group_num:
|
||||
if_end = 1
|
||||
else:
|
||||
if_end = 1
|
||||
for i in np.arange(next_group_num, total_center_num, 1):
|
||||
|
||||
before_list = copy.deepcopy(final_area)
|
||||
top_center = similarity.index[i]
|
||||
top_center_index = candidate_center.index(top_center)
|
||||
temp_group = final_area + candidate_group[top_center_index]
|
||||
temp_area = sorted(set(temp_group))
|
||||
|
||||
if len(temp_area) < top_pipe_num_min:
|
||||
|
||||
final_center.append(top_center)
|
||||
all_node_iter = all_node_iter + list(new_all_node[top_center_index])
|
||||
final_area = temp_area
|
||||
|
||||
elif len(temp_area) < top_pipe_num_max:
|
||||
final_center.append(top_center)
|
||||
all_node_iter = all_node_iter + list(new_all_node[top_center_index])
|
||||
final_area = temp_area
|
||||
break
|
||||
else:
|
||||
a = len(temp_area) - top_pipe_num_max
|
||||
b = top_pipe_num_min - len(before_list)
|
||||
|
||||
if a >= b:
|
||||
final_area = before_list
|
||||
else:
|
||||
final_center.append(top_center)
|
||||
all_node_iter = all_node_iter + list(new_all_node[top_center_index])
|
||||
final_area = temp_area
|
||||
break
|
||||
|
||||
final_center = sorted(set(final_center))
|
||||
all_node_iter = sorted(set(all_node_iter))
|
||||
return final_area, final_center, all_node_iter, if_end
|
||||
|
||||
|
||||
def find_list_repeat(candidate_center, target):
|
||||
repeated_list = []
|
||||
for index, nums in enumerate(candidate_center):
|
||||
if nums == target:
|
||||
repeated_list.append(index)
|
||||
return repeated_list
|
||||
|
||||
|
||||
def _dedupe_preserve_order(items):
|
||||
seen = set()
|
||||
output = []
|
||||
for item in items:
|
||||
if item in seen:
|
||||
continue
|
||||
seen.add(item)
|
||||
output.append(item)
|
||||
return output
|
||||
|
||||
|
||||
def _accumulate_stage(stage_timing, stage_name, started_at):
|
||||
stage_timing[stage_name] = stage_timing.get(stage_name, 0.0) + (
|
||||
perf_counter() - started_at
|
||||
)
|
||||
|
||||
|
||||
def _write_last_round_candidates_csv(
|
||||
csv_path,
|
||||
exit_condition,
|
||||
iteration_count,
|
||||
similarity_mode,
|
||||
candidate_details,
|
||||
fallback_similarity,
|
||||
):
|
||||
if not csv_path:
|
||||
return None
|
||||
timestamp_suffix = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
base_path, ext = os.path.splitext(csv_path)
|
||||
ext = ext or ".csv"
|
||||
output_path = f"{base_path}_{timestamp_suffix}{ext}"
|
||||
if candidate_details is not None and len(candidate_details) > 0:
|
||||
export_df = candidate_details.copy()
|
||||
if export_df.index.name == "pipe_id":
|
||||
export_df = export_df.reset_index()
|
||||
else:
|
||||
export_df = pd.DataFrame(
|
||||
{
|
||||
"pipe_id": [str(pipe_id) for pipe_id in fallback_similarity.index],
|
||||
"final_similarity": [float(value) for value in fallback_similarity.values],
|
||||
}
|
||||
)
|
||||
export_df["exit_condition"] = exit_condition
|
||||
export_df["iterations"] = int(iteration_count)
|
||||
export_df["similarity_mode"] = similarity_mode
|
||||
parent_dir = os.path.dirname(output_path)
|
||||
if parent_dir:
|
||||
os.makedirs(parent_dir, exist_ok=True)
|
||||
export_df.to_csv(output_path, index=False, encoding="utf-8-sig")
|
||||
return output_path
|
||||
|
||||
|
||||
def cal_DtoTop1(
|
||||
G0, pipe_leak, located_pipe, pipe_start_node_all, pipe_end_node_all, pipe_length
|
||||
):
|
||||
if pipe_leak == located_pipe:
|
||||
result_DtoTop1 = 0
|
||||
result_DtoTop1_num = 0
|
||||
else:
|
||||
pipe_leak_start_node = pipe_start_node_all[pipe_leak]
|
||||
pipe_leak_end_node = pipe_end_node_all[pipe_leak]
|
||||
located_pipe_start_node = pipe_start_node_all[located_pipe]
|
||||
located_pipe_end_node = pipe_end_node_all[located_pipe]
|
||||
DtoTop1_series = pd.Series(dtype=object)
|
||||
DtoTop1_num_series = pd.Series(dtype=object)
|
||||
DtoTop1_series["ss"] = nx.shortest_path_length(
|
||||
G0, pipe_leak_start_node, located_pipe_start_node, weight="weight"
|
||||
)
|
||||
DtoTop1_series["se"] = nx.shortest_path_length(
|
||||
G0, pipe_leak_start_node, located_pipe_end_node, weight="weight"
|
||||
)
|
||||
DtoTop1_series["es"] = nx.shortest_path_length(
|
||||
G0, pipe_leak_end_node, located_pipe_start_node, weight="weight"
|
||||
)
|
||||
DtoTop1_series["ee"] = nx.shortest_path_length(
|
||||
G0, pipe_leak_end_node, located_pipe_end_node, weight="weight"
|
||||
)
|
||||
DtoTop1_num_series["ss"] = nx.shortest_path_length(
|
||||
G0, pipe_leak_start_node, located_pipe_start_node
|
||||
)
|
||||
DtoTop1_num_series["se"] = nx.shortest_path_length(
|
||||
G0, pipe_leak_start_node, located_pipe_end_node
|
||||
)
|
||||
DtoTop1_num_series["es"] = nx.shortest_path_length(
|
||||
G0, pipe_leak_end_node, located_pipe_start_node
|
||||
)
|
||||
DtoTop1_num_series["ee"] = nx.shortest_path_length(
|
||||
G0, pipe_leak_end_node, located_pipe_end_node
|
||||
)
|
||||
|
||||
if DtoTop1_num_series.min() == 0:
|
||||
result_DtoTop1_num = 1
|
||||
result_DtoTop1 = DtoTop1_series.max() / 2
|
||||
else:
|
||||
result_DtoTop1_num = DtoTop1_num_series.min() + 1
|
||||
DtoTop1_type = DtoTop1_series.argmin()
|
||||
result_DtoTop1 = (
|
||||
DtoTop1_series[DtoTop1_type]
|
||||
+ (pipe_length[pipe_leak] + pipe_length[located_pipe]) / 2
|
||||
)
|
||||
return result_DtoTop1, result_DtoTop1_num
|
||||
|
||||
|
||||
def cal_RR(located_pipe, similarity_sp):
|
||||
if located_pipe in similarity_sp.index:
|
||||
rank = similarity_sp.index.get_loc(located_pipe)
|
||||
RR = rank / len(similarity_sp.index)
|
||||
else:
|
||||
RR = 1.1
|
||||
return RR
|
||||
|
||||
|
||||
def cal_cover(similarity, leak_pipe):
|
||||
if leak_pipe in list(similarity.index):
|
||||
cover = 1
|
||||
else:
|
||||
cover = 0
|
||||
return cover
|
||||
|
||||
|
||||
def cal_SD(located_pipe, real_pipe, pipe_x, pipe_y):
|
||||
dx = pipe_x[located_pipe] - pipe_x[real_pipe]
|
||||
dy = pipe_y[located_pipe] - pipe_y[real_pipe]
|
||||
SD = math.sqrt(dx * dx + dy * dy)
|
||||
return SD
|
||||
|
||||
|
||||
def DN_search_multi_simple_add_flow_count_new(
|
||||
wn,
|
||||
wn_inp_path,
|
||||
G0,
|
||||
all_node,
|
||||
node_x,
|
||||
node_y,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
pipe_diameter,
|
||||
couple_node_length,
|
||||
node_pipe_dic,
|
||||
all_node_series,
|
||||
top_group_ratio,
|
||||
top_pipe_num_max,
|
||||
top_pipe_num_min,
|
||||
candidate_pipe_input_initial,
|
||||
similarity_mode,
|
||||
pressure_monitor,
|
||||
pressure_predict,
|
||||
pressure_normal,
|
||||
pressure_leak_all,
|
||||
flow_monitor,
|
||||
flow_predict,
|
||||
flow_normal,
|
||||
flow_leak_all,
|
||||
timestep_list,
|
||||
max_flow,
|
||||
group_basic_num,
|
||||
Top_sensor_num,
|
||||
if_gy,
|
||||
pressure_threshold,
|
||||
leak_mag,
|
||||
n_workers=1,
|
||||
stage_timing=None,
|
||||
partition_on_full_graph=True,
|
||||
visualize_partition=False,
|
||||
visualize_pause_seconds=0.3,
|
||||
final_candidates_csv_path=None,
|
||||
):
|
||||
if stage_timing is None:
|
||||
stage_timing = {}
|
||||
exit_condition = "unknown"
|
||||
final_candidates_csv = None
|
||||
iter_count = 0
|
||||
all_node_iter = copy.deepcopy(all_node)
|
||||
candidate_pipe_input = copy.deepcopy(candidate_pipe_input_initial) # 可能漏损管段
|
||||
t1 = datetime.now()
|
||||
if_flow, if_only_cos, if_only_flow = decode_mode(similarity_mode) # 定位方法
|
||||
# threshold
|
||||
if if_only_flow == 1:
|
||||
dpressure = (flow_predict - flow_monitor).mean()
|
||||
dpressure = dpressure.abs()
|
||||
effective_sensor = list(dpressure.index)
|
||||
|
||||
else:
|
||||
dpressure = (pressure_predict - pressure_monitor).mean()
|
||||
dpressure = dpressure.abs()
|
||||
dpressure = dpressure[dpressure > pressure_threshold]
|
||||
effective_sensor = list(dpressure.index)
|
||||
simulation_times = 0 # 模拟次数
|
||||
if len(dpressure) > 0:
|
||||
break_flag = 0
|
||||
last_round_candidate_details = None
|
||||
|
||||
cos_h = 0
|
||||
dis_h = 0
|
||||
dis_f_h = 0
|
||||
if_compalsive = 0
|
||||
record_center_dataset = []
|
||||
record_center_set = set()
|
||||
# iter
|
||||
while 1:
|
||||
final_area = []
|
||||
final_center = []
|
||||
group_num = cal_group_num(candidate_pipe_input, group_basic_num)
|
||||
partition_nodes = all_node if partition_on_full_graph else all_node_iter
|
||||
|
||||
# group 分组,得出候选漏损中心
|
||||
stage_start = perf_counter()
|
||||
(
|
||||
candidate_center_list,
|
||||
candidate_group_list,
|
||||
new_all_node,
|
||||
candidate_center_candidates,
|
||||
) = (
|
||||
metis_grouping_pipe_weight(
|
||||
G0,
|
||||
wn,
|
||||
partition_nodes,
|
||||
candidate_pipe_input,
|
||||
group_num,
|
||||
node_x,
|
||||
node_y,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
node_pipe_dic,
|
||||
all_node_series,
|
||||
couple_node_length,
|
||||
pipe_diameter,
|
||||
)
|
||||
)
|
||||
_accumulate_stage(stage_timing, "group_partitioning", stage_start)
|
||||
if visualize_partition:
|
||||
visualize_metis_partition(
|
||||
G0,
|
||||
candidate_center_list,
|
||||
candidate_group_list,
|
||||
node_x,
|
||||
node_y,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
title=(
|
||||
f"METIS Partition Iteration {iter_count + 1} | "
|
||||
f"candidate pipes={len(candidate_pipe_input)} "
|
||||
f"groups={len(candidate_group_list)}"
|
||||
),
|
||||
block=False,
|
||||
pause_seconds=visualize_pause_seconds,
|
||||
)
|
||||
simulation_times = simulation_times + len(candidate_center_list)
|
||||
# pick_pressure_leak
|
||||
# pressure_leak = pressure_leak_all.loc[candidate_center_list].loc[:, :]
|
||||
# flow_leak = flow_leak_all.loc[candidate_center_list].loc[:, :]
|
||||
# —— 新增泄漏量(保持你现在的一致,或从外部传入)——
|
||||
# —— 只为缺失中心补算,然后取本轮需要的中心子集 ——
|
||||
stage_start = perf_counter()
|
||||
pressure_leak, flow_leak, pressure_leak_all, flow_leak_all = (
|
||||
_ensure_signatures_for_centers(
|
||||
wn=wn,
|
||||
wn_inp_path=wn_inp_path,
|
||||
center_list=candidate_center_list,
|
||||
pressure_leak_all=pressure_leak_all,
|
||||
flow_leak_all=flow_leak_all,
|
||||
timestep_list=timestep_list,
|
||||
pressure_monitor=pressure_monitor,
|
||||
flow_monitor=flow_monitor,
|
||||
leak_mag=leak_mag,
|
||||
n_workers=n_workers,
|
||||
)
|
||||
)
|
||||
_accumulate_stage(stage_timing, "signature_for_candidates", stage_start)
|
||||
|
||||
# pressure_leak_f= pressure_leak.swaplevel()
|
||||
|
||||
# --------------------------------------------------------
|
||||
|
||||
add_center = []
|
||||
leak_center_dict = dict()
|
||||
for i in range(len(candidate_center_list)):
|
||||
primary_center = candidate_center_list[i]
|
||||
houxuan_center = [
|
||||
center
|
||||
for center in candidate_center_candidates[i]
|
||||
if center != primary_center
|
||||
]
|
||||
candidate_group_set = set(candidate_group_list[i])
|
||||
for each_center in record_center_dataset:
|
||||
if (
|
||||
each_center in candidate_group_set
|
||||
and each_center != primary_center
|
||||
):
|
||||
houxuan_center.append(each_center)
|
||||
add_center = add_center + houxuan_center
|
||||
leak_center_dict[primary_center] = _dedupe_preserve_order(
|
||||
houxuan_center + [primary_center]
|
||||
)
|
||||
add_center = _dedupe_preserve_order(add_center)
|
||||
for each_group_centers in candidate_center_candidates:
|
||||
for each_center in each_group_centers:
|
||||
if each_center not in record_center_set:
|
||||
record_center_dataset.append(each_center)
|
||||
record_center_set.add(each_center)
|
||||
for each_center in add_center:
|
||||
if each_center not in record_center_set:
|
||||
record_center_dataset.append(each_center)
|
||||
record_center_set.add(each_center)
|
||||
|
||||
# --------------------------------------------------------
|
||||
# --------------------------------------------------------
|
||||
# if len(add_center) > 0:
|
||||
# s3 = pressure_leak_all.loc[add_center]
|
||||
# pressure_leak = pd.concat([pressure_leak, s3])
|
||||
# s4 = flow_leak_all.loc[add_center]
|
||||
# flow_leak = pd.concat([flow_leak, s4])
|
||||
# --------------------------------------------------------
|
||||
# 只为 add_center 里还没算过的中心补算,并与本轮中心合并
|
||||
if len(add_center) > 0:
|
||||
stage_start = perf_counter()
|
||||
pressure_add, flow_add, pressure_leak_all, flow_leak_all = (
|
||||
_ensure_signatures_for_centers(
|
||||
wn=wn,
|
||||
wn_inp_path=wn_inp_path,
|
||||
center_list=add_center,
|
||||
pressure_leak_all=pressure_leak_all,
|
||||
flow_leak_all=flow_leak_all,
|
||||
timestep_list=timestep_list,
|
||||
pressure_monitor=pressure_monitor,
|
||||
flow_monitor=flow_monitor,
|
||||
leak_mag=leak_mag, # 与上面一致
|
||||
n_workers=n_workers,
|
||||
)
|
||||
)
|
||||
_accumulate_stage(
|
||||
stage_timing, "signature_for_extra_centers", stage_start
|
||||
)
|
||||
pressure_leak = pd.concat([pressure_leak, pressure_add], axis=0)
|
||||
if (flow_leak is not None) and (flow_add is not None):
|
||||
flow_leak = pd.concat([flow_leak, flow_add], axis=0)
|
||||
# --------------------------------------------------------
|
||||
#
|
||||
if len(candidate_pipe_input) < 1.2 * top_pipe_num_max / top_group_ratio:
|
||||
if_compalsive = 1
|
||||
cos_h, dis_h, dis_f_h = adjust_ratio(similarity_mode, cos_h, dis_h, dis_f_h)
|
||||
candidate_center_list_sup = _dedupe_preserve_order(
|
||||
candidate_center_list + add_center
|
||||
)
|
||||
stage_start = perf_counter()
|
||||
similarity, cos_h, dis_h, dis_f_h, break_flag, similarity_details = (
|
||||
cal_similarity_all_multi_new_sq_improve_double_lzr(
|
||||
candidate_center_list_sup,
|
||||
similarity_mode,
|
||||
pressure_leak,
|
||||
pressure_monitor,
|
||||
pressure_predict,
|
||||
pressure_normal,
|
||||
if_flow,
|
||||
if_only_cos,
|
||||
if_only_flow,
|
||||
flow_leak,
|
||||
flow_monitor,
|
||||
flow_predict,
|
||||
flow_normal,
|
||||
timestep_list,
|
||||
Top_sensor_num,
|
||||
if_gy,
|
||||
effective_sensor,
|
||||
cos_h,
|
||||
dis_h,
|
||||
dis_f_h,
|
||||
if_compalsive,
|
||||
max_flow,
|
||||
)
|
||||
)
|
||||
last_round_candidate_details = similarity_details
|
||||
_accumulate_stage(stage_timing, "similarity_ranking", stage_start)
|
||||
if break_flag == 1:
|
||||
exit_condition = "similarity_break_flag"
|
||||
break
|
||||
|
||||
new_similarity = update_similarity(
|
||||
candidate_center_list, similarity, leak_center_dict
|
||||
)
|
||||
if len(candidate_pipe_input) > top_pipe_num_max / top_group_ratio:
|
||||
cut_ratio, new_similarity = extra_judge(new_similarity)
|
||||
else:
|
||||
cut_ratio = 1
|
||||
stage_start = perf_counter()
|
||||
final_area_t, final_center_t, all_node_new_1, if_end = (
|
||||
area_output_num_ki_improve(
|
||||
candidate_center_list,
|
||||
candidate_group_list,
|
||||
new_similarity,
|
||||
new_all_node,
|
||||
top_group_ratio,
|
||||
top_pipe_num_max,
|
||||
top_pipe_num_min,
|
||||
cut_ratio,
|
||||
)
|
||||
)
|
||||
_accumulate_stage(stage_timing, "candidate_area_selection", stage_start)
|
||||
final_area = final_area + final_area_t
|
||||
final_center = final_center + final_center_t
|
||||
|
||||
final_area = sorted(set(final_area))
|
||||
final_center = sorted(set(final_center))
|
||||
if if_end == 1:
|
||||
exit_condition = "candidate_area_if_end"
|
||||
break
|
||||
elif len(candidate_pipe_input) == len(final_area):
|
||||
exit_condition = "candidate_size_no_change"
|
||||
break
|
||||
else:
|
||||
candidate_pipe_input = final_area
|
||||
if not partition_on_full_graph:
|
||||
all_node_iter = all_node_new_1
|
||||
iter_count += 1
|
||||
sys.stdout.write(
|
||||
"\r"
|
||||
+ "已经完成"
|
||||
+ str(iter_count)
|
||||
+ "次迭代计算"
|
||||
+ "候选节点"
|
||||
+ str(len(final_area))
|
||||
+ "个"
|
||||
)
|
||||
# if break_flag == 0:
|
||||
# final_area_pipe = copy.deepcopy(final_area)
|
||||
# simulation_times = simulation_times + len(final_area)
|
||||
# pressure_leak_sp = pressure_leak_all.loc[final_area_pipe].loc[:, :]
|
||||
# flow_leak_sp = flow_leak_all.loc[final_area_pipe].loc[:, :]
|
||||
# similarity_sp, cos_h, dis_h, dis_f_h, break_flag = cal_similarity_all_multi_new_sq_improve_double_lzr(
|
||||
# final_area_pipe, similarity_mode, pressure_leak_sp,
|
||||
# pressure_monitor, pressure_predict, pressure_normal, if_flow,
|
||||
# if_only_cos, if_only_flow,
|
||||
# flow_leak_sp, flow_monitor, flow_predict, flow_normal,
|
||||
# timestep_list, Top_sensor_num, if_gy, effective_sensor, cos_h, dis_h, dis_f_h, if_compalsive, max_flow)
|
||||
if break_flag == 0:
|
||||
final_area_pipe = list(final_area) # 确保是 list
|
||||
|
||||
# 只为还没算过的管段补齐 SLF(按需计算)
|
||||
stage_start = perf_counter()
|
||||
pressure_leak_sp, flow_leak_sp, pressure_leak_all, flow_leak_all = (
|
||||
_ensure_signatures_for_centers(
|
||||
wn=wn,
|
||||
wn_inp_path=wn_inp_path,
|
||||
center_list=final_area_pipe, # 这次要用的“最终区域里的所有管段”
|
||||
pressure_leak_all=pressure_leak_all, # 累积缓存(会被更新)
|
||||
flow_leak_all=flow_leak_all,
|
||||
timestep_list=timestep_list,
|
||||
pressure_monitor=pressure_monitor,
|
||||
flow_monitor=flow_monitor,
|
||||
leak_mag=leak_mag,
|
||||
n_workers=n_workers,
|
||||
)
|
||||
)
|
||||
_accumulate_stage(stage_timing, "signature_for_final_area", stage_start)
|
||||
|
||||
# 如果你要精确统计模拟次数,这里可以加上“本次新补的数量”,
|
||||
# 做法:让 _ensure_signatures_for_centers 额外返回 need_cnt,再 simulation_times += need_cnt
|
||||
|
||||
stage_start = perf_counter()
|
||||
(
|
||||
similarity_sp,
|
||||
cos_h,
|
||||
dis_h,
|
||||
dis_f_h,
|
||||
break_flag,
|
||||
similarity_details,
|
||||
) = (
|
||||
cal_similarity_all_multi_new_sq_improve_double_lzr(
|
||||
final_area_pipe,
|
||||
similarity_mode,
|
||||
pressure_leak_sp,
|
||||
pressure_monitor,
|
||||
pressure_predict,
|
||||
pressure_normal,
|
||||
if_flow,
|
||||
if_only_cos,
|
||||
if_only_flow,
|
||||
flow_leak_sp,
|
||||
flow_monitor,
|
||||
flow_predict,
|
||||
flow_normal,
|
||||
timestep_list,
|
||||
Top_sensor_num,
|
||||
if_gy,
|
||||
effective_sensor,
|
||||
cos_h,
|
||||
dis_h,
|
||||
dis_f_h,
|
||||
if_compalsive,
|
||||
max_flow,
|
||||
)
|
||||
)
|
||||
last_round_candidate_details = similarity_details
|
||||
_accumulate_stage(stage_timing, "similarity_final", stage_start)
|
||||
|
||||
else:
|
||||
dpressure = (pressure_predict - pressure_monitor).mean()
|
||||
dpressure = dpressure.abs()
|
||||
simulation_times = simulation_times + len(dpressure.index)
|
||||
similarity_sp = pd.Series(dtype=float)
|
||||
for each_node in dpressure.index:
|
||||
pipe = node_pipe_dic[each_node][0]
|
||||
similarity_sp.loc[pipe] = dpressure.loc[each_node]
|
||||
similarity_sp = similarity_sp.sort_values(ascending=False, kind="mergesort")
|
||||
t2 = datetime.now()
|
||||
final_area_pipe = []
|
||||
|
||||
sys.stdout.write(
|
||||
"\r"
|
||||
+ "已经完成"
|
||||
+ str(iter_count + 1)
|
||||
+ "次迭代计算"
|
||||
+ "候选节点"
|
||||
+ str(len(final_area_pipe))
|
||||
+ "个"
|
||||
)
|
||||
t2 = datetime.now()
|
||||
dt = (t2 - t1).seconds
|
||||
final_candidates_csv = _write_last_round_candidates_csv(
|
||||
csv_path=final_candidates_csv_path,
|
||||
exit_condition=exit_condition,
|
||||
iteration_count=iter_count + 1,
|
||||
similarity_mode=similarity_mode,
|
||||
candidate_details=last_round_candidate_details,
|
||||
fallback_similarity=similarity_sp,
|
||||
)
|
||||
else:
|
||||
exit_condition = "no_effective_sensor_after_threshold"
|
||||
dpressure = (pressure_predict - pressure_monitor).mean()
|
||||
dpressure = dpressure.abs()
|
||||
|
||||
similarity_sp = pd.Series(dtype=float)
|
||||
for each_node in dpressure.index:
|
||||
pipe = node_pipe_dic[each_node][0]
|
||||
similarity_sp.loc[pipe] = dpressure.loc[each_node]
|
||||
similarity_sp = similarity_sp.sort_values(ascending=False, kind="mergesort")
|
||||
t2 = datetime.now()
|
||||
dt = (t2 - t1).seconds
|
||||
final_candidates_csv = _write_last_round_candidates_csv(
|
||||
csv_path=final_candidates_csv_path,
|
||||
exit_condition=exit_condition,
|
||||
iteration_count=0,
|
||||
similarity_mode=similarity_mode,
|
||||
candidate_details=None,
|
||||
fallback_similarity=similarity_sp,
|
||||
)
|
||||
stage_timing["iterations"] = iter_count + 1 if len(dpressure) > 0 else 0
|
||||
stage_timing["total_elapsed_seconds"] = float(dt)
|
||||
stage_timing["exit_condition"] = exit_condition
|
||||
stage_timing["final_candidates_csv"] = final_candidates_csv
|
||||
return (
|
||||
similarity_sp.index[0],
|
||||
dt,
|
||||
simulation_times,
|
||||
wn,
|
||||
similarity_sp,
|
||||
exit_condition,
|
||||
final_candidates_csv,
|
||||
)
|
||||
|
||||
|
||||
@@ -0,0 +1,563 @@
|
||||
"""漏损模拟模块。"""
|
||||
|
||||
import math
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
import wntr
|
||||
|
||||
from app.algorithms._utils import _cleanup_temp_files
|
||||
|
||||
_PIPE2LEAKNODE = None
|
||||
_SIGNATURE_WORKER_DATA = {}
|
||||
|
||||
|
||||
def _make_temp_prefix(tag):
|
||||
temp_dir = os.path.abspath(os.path.join("temp", "burst_location"))
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
safe_tag = str(tag).replace(os.sep, "_").replace(" ", "_")
|
||||
return os.path.join(temp_dir, f"{safe_tag}_{os.getpid()}")
|
||||
|
||||
|
||||
def _snapshot_hydraulic_options(wn):
|
||||
options = wn.options
|
||||
return {
|
||||
"demand_model": options.hydraulic.demand_model,
|
||||
"duration": float(options.time.duration),
|
||||
"hydraulic_timestep": float(options.time.hydraulic_timestep),
|
||||
"pattern_timestep": float(options.time.pattern_timestep),
|
||||
"report_timestep": float(options.time.report_timestep),
|
||||
"required_pressure": float(options.hydraulic.required_pressure),
|
||||
"minimum_pressure": float(options.hydraulic.minimum_pressure),
|
||||
}
|
||||
|
||||
|
||||
def _apply_hydraulic_options(wn, option_values):
|
||||
options = wn.options
|
||||
options.hydraulic.demand_model = option_values["demand_model"]
|
||||
options.time.duration = option_values["duration"]
|
||||
options.time.hydraulic_timestep = option_values["hydraulic_timestep"]
|
||||
options.time.pattern_timestep = option_values["pattern_timestep"]
|
||||
options.time.report_timestep = option_values["report_timestep"]
|
||||
options.hydraulic.required_pressure = option_values["required_pressure"]
|
||||
options.hydraulic.minimum_pressure = option_values["minimum_pressure"]
|
||||
|
||||
|
||||
def simple_add_leak(wn, leak_mag, leak_pipe):
|
||||
whole_inf = dict()
|
||||
leak_pipe_self = wn.get_link(leak_pipe)
|
||||
pipe_diameter = leak_pipe_self.diameter
|
||||
pipe_length = leak_pipe_self.length
|
||||
pipe_roughness = leak_pipe_self.roughness
|
||||
pipe_minor_loss = leak_pipe_self.minor_loss
|
||||
# pipe_status = leak_pipe_self.status
|
||||
# pipe_check_valve = leak_pipe_self.check_valve
|
||||
pipe_start_node = leak_pipe_self.start_node_name
|
||||
pipe_end_node = leak_pipe_self.end_node_name
|
||||
|
||||
# close the pipe
|
||||
# leak_pipe_self.status = 'Closed'
|
||||
wn.remove_link(leak_pipe)
|
||||
# add the pipe
|
||||
add_pipe1 = leak_pipe + "A"
|
||||
add_pipe2 = leak_pipe + "B"
|
||||
add_node = leak_pipe + "_"
|
||||
|
||||
start_n = wn.get_node(pipe_start_node)
|
||||
end_n = wn.get_node(pipe_end_node)
|
||||
if start_n.node_type == "Reservoir":
|
||||
end_n_elevation = end_n.elevation
|
||||
start_n_elevation = end_n_elevation
|
||||
elif end_n.node_type == "Reservoir":
|
||||
start_n_elevation = start_n.elevation
|
||||
end_n_elevation = start_n_elevation
|
||||
else:
|
||||
end_n_elevation = end_n.elevation
|
||||
start_n_elevation = start_n.elevation
|
||||
elevation_self = (start_n_elevation + end_n_elevation) / 2
|
||||
coordinates_self = (
|
||||
(start_n.coordinates[0] + end_n.coordinates[0]) / 2,
|
||||
(start_n.coordinates[1] + end_n.coordinates[1]),
|
||||
)
|
||||
|
||||
wn.add_junction(
|
||||
add_node, base_demand=0, elevation=elevation_self, coordinates=coordinates_self
|
||||
)
|
||||
leak_node = wn.get_node(add_node)
|
||||
wn.add_pipe(
|
||||
add_pipe1,
|
||||
start_node_name=pipe_start_node,
|
||||
end_node_name=add_node,
|
||||
length=pipe_length / 2,
|
||||
diameter=pipe_diameter,
|
||||
roughness=pipe_roughness,
|
||||
minor_loss=pipe_minor_loss,
|
||||
)
|
||||
wn.add_pipe(
|
||||
add_pipe2,
|
||||
start_node_name=pipe_end_node,
|
||||
end_node_name=add_node,
|
||||
length=pipe_length / 2,
|
||||
diameter=pipe_diameter,
|
||||
roughness=pipe_roughness,
|
||||
minor_loss=pipe_minor_loss,
|
||||
)
|
||||
|
||||
# simulation
|
||||
leak_node.add_demand(base=leak_mag, pattern_name="add_leak")
|
||||
|
||||
whole_inf["leak_node_name"] = add_node
|
||||
whole_inf["add_pipe1"] = add_pipe1
|
||||
whole_inf["add_pipe2"] = add_pipe2
|
||||
whole_inf["leak_pipe"] = leak_pipe
|
||||
whole_inf["pipe_start_node"] = pipe_start_node
|
||||
whole_inf["pipe_end_node"] = pipe_end_node
|
||||
whole_inf["pipe_length"] = pipe_length
|
||||
whole_inf["pipe_diameter"] = pipe_diameter
|
||||
whole_inf["pipe_roughness"] = pipe_roughness
|
||||
whole_inf["pipe_minor_loss"] = pipe_diameter
|
||||
return wn, whole_inf, add_pipe1
|
||||
|
||||
|
||||
def simple_recover_wn(wn, whole_inf):
|
||||
leak_node = wn.get_node(whole_inf["leak_node_name"])
|
||||
|
||||
del leak_node.demand_timeseries_list[-1]
|
||||
# update
|
||||
wn.remove_link(whole_inf["add_pipe1"])
|
||||
wn.remove_link(whole_inf["add_pipe2"])
|
||||
wn.remove_node(whole_inf["leak_node_name"])
|
||||
# open the pipe
|
||||
# leak_pipe_self.status = 'Open'
|
||||
wn.add_pipe(
|
||||
whole_inf["leak_pipe"],
|
||||
start_node_name=whole_inf["pipe_start_node"],
|
||||
end_node_name=whole_inf["pipe_end_node"],
|
||||
length=whole_inf["pipe_length"],
|
||||
diameter=whole_inf["pipe_diameter"],
|
||||
roughness=whole_inf["pipe_roughness"],
|
||||
minor_loss=whole_inf["pipe_minor_loss"],
|
||||
)
|
||||
|
||||
return wn
|
||||
|
||||
|
||||
def disable_all_controls_temporarily(wn):
|
||||
"""返回(控制名, 控制对象)的列表,之后可用 restore_controls 还原。"""
|
||||
removed = []
|
||||
# WNTR 的控制都在 wn.control_name_list / wn.get_control / wn.remove_control
|
||||
for cname in list(wn.control_name_list):
|
||||
ctrl = wn.get_control(cname)
|
||||
removed.append((cname, ctrl))
|
||||
wn.remove_control(cname)
|
||||
return removed
|
||||
|
||||
|
||||
def restore_controls(wn, removed):
|
||||
"""把先前禁用的控制全部加回去。"""
|
||||
for cname, ctrl in removed:
|
||||
wn.add_control(cname, ctrl)
|
||||
|
||||
|
||||
def set_pipe2leaknode_mapping(mapping):
|
||||
global _PIPE2LEAKNODE
|
||||
_PIPE2LEAKNODE = mapping
|
||||
|
||||
|
||||
def _get_or_create_leak_demand_ts(leak_node):
|
||||
"""
|
||||
返回:泄漏专用 demand 的下标 idx。
|
||||
若不存在,以 category='leak' 新建一条 base=0.0 的 demand。
|
||||
"""
|
||||
# 先尝试找到已有的 'leak' 分类
|
||||
for i, ts in enumerate(leak_node.demand_timeseries_list):
|
||||
# WNTR 的 Demand object 存在 category 属性
|
||||
if getattr(ts, "category", None) == "leak":
|
||||
return i
|
||||
# 没有则新建(base=0.0,后续临时改 base_value)
|
||||
leak_node.add_demand(base=0.0, pattern_name=None, category="leak")
|
||||
return len(leak_node.demand_timeseries_list) - 1
|
||||
|
||||
|
||||
def ensure_mid_node(wn, leak_pipe):
|
||||
add_pipe1 = f"{leak_pipe}A"
|
||||
add_pipe2 = f"{leak_pipe}B"
|
||||
add_node = f"{leak_pipe}__mid"
|
||||
|
||||
if add_node in wn.node_name_list:
|
||||
return add_node
|
||||
|
||||
if leak_pipe in wn.link_name_list:
|
||||
leak_pipe_self = wn.get_link(leak_pipe)
|
||||
pipe_diameter = leak_pipe_self.diameter
|
||||
pipe_length = leak_pipe_self.length
|
||||
pipe_roughness = leak_pipe_self.roughness
|
||||
pipe_minor_loss = leak_pipe_self.minor_loss
|
||||
pipe_start_node = leak_pipe_self.start_node_name
|
||||
pipe_end_node = leak_pipe_self.end_node_name
|
||||
|
||||
start_n = wn.get_node(pipe_start_node)
|
||||
end_n = wn.get_node(pipe_end_node)
|
||||
if start_n.node_type == "Reservoir":
|
||||
end_elev = end_n.elevation
|
||||
start_elev = end_elev
|
||||
elif end_n.node_type == "Reservoir":
|
||||
start_elev = start_n.elevation
|
||||
end_elev = start_elev
|
||||
else:
|
||||
end_elev = end_n.elevation
|
||||
start_elev = start_n.elevation
|
||||
elev_mid = (start_elev + end_elev) / 2.0
|
||||
x_mid = (start_n.coordinates[0] + end_n.coordinates[0]) / 2.0
|
||||
y_mid = (start_n.coordinates[1] + end_n.coordinates[1]) / 2.0
|
||||
|
||||
wn.remove_link(leak_pipe)
|
||||
wn.add_junction(
|
||||
add_node, base_demand=0.0, elevation=elev_mid, coordinates=(x_mid, y_mid)
|
||||
)
|
||||
wn.add_pipe(
|
||||
add_pipe1,
|
||||
start_node_name=pipe_start_node,
|
||||
end_node_name=add_node,
|
||||
length=pipe_length / 2.0,
|
||||
diameter=pipe_diameter,
|
||||
roughness=pipe_roughness,
|
||||
minor_loss=pipe_minor_loss,
|
||||
)
|
||||
wn.add_pipe(
|
||||
add_pipe2,
|
||||
start_node_name=add_node,
|
||||
end_node_name=pipe_end_node,
|
||||
length=pipe_length / 2.0,
|
||||
diameter=pipe_diameter,
|
||||
roughness=pipe_roughness,
|
||||
minor_loss=pipe_minor_loss,
|
||||
)
|
||||
return add_node
|
||||
|
||||
# 若 A/B 已存在但中点不在,建议确认网络一致性
|
||||
raise KeyError(f"Cannot ensure mid node for pipe '{leak_pipe}'.")
|
||||
|
||||
|
||||
def leak_simulation_pipe_dd_multi_pf(
|
||||
wn, leak_mag, leak_pipe, sensor_name, file_prefix=None
|
||||
):
|
||||
"""
|
||||
优化版:
|
||||
- 不再 remove/add link/node
|
||||
- 直接在预插入的中点泄漏节点上设置 base_demand = leak_mag;仿真后设回 0
|
||||
"""
|
||||
wn.options.hydraulic.demand_model = "DD"
|
||||
|
||||
# 确保中点节点存在
|
||||
leak_node_name = ensure_mid_node(wn, leak_pipe)
|
||||
leak_node = wn.get_node(leak_node_name)
|
||||
|
||||
# 拿到泄漏专用的 demand time-series 下标
|
||||
leak_idx = _get_or_create_leak_demand_ts(leak_node)
|
||||
ts_obj = leak_node.demand_timeseries_list[leak_idx]
|
||||
|
||||
# 记录原值(通常是 0.0)
|
||||
orig_base = ts_obj.base_value
|
||||
try:
|
||||
# 打开泄漏:只改 base_value,不碰 base_demand(只读)
|
||||
ts_obj.base_value = float(leak_mag)
|
||||
|
||||
# 仿真
|
||||
sim = wntr.sim.EpanetSimulator(wn)
|
||||
if file_prefix is None:
|
||||
results = sim.run_sim()
|
||||
else:
|
||||
results = sim.run_sim(file_prefix=file_prefix)
|
||||
|
||||
# 输出(保持列顺序)
|
||||
pressure_output = results.node["pressure"].loc[:, sensor_name]
|
||||
# flow_output = results.link['flowrate'].loc[:, sensor_f_name]
|
||||
return wn, pressure_output
|
||||
finally:
|
||||
# 关闭泄漏:还原 base_value
|
||||
ts_obj.base_value = orig_base
|
||||
if file_prefix is not None:
|
||||
_cleanup_temp_files(file_prefix)
|
||||
|
||||
|
||||
def prepare_leak_infrastructure(wn, candidate_pipes):
|
||||
"""
|
||||
把 candidate_pipes 每条管段切成两段,并在中点插入一个泄漏节点(base_demand=0)。
|
||||
返回一个映射:pipe_id -> leak_node_name
|
||||
注意:只做一次;后续仿真通过在该节点设置 base_demand 实现“打开泄漏”,结束后恢复为 0。
|
||||
"""
|
||||
pipe2leaknode = {}
|
||||
for leak_pipe in candidate_pipes:
|
||||
if leak_pipe in pipe2leaknode:
|
||||
continue
|
||||
|
||||
leak_pipe_self = wn.get_link(leak_pipe)
|
||||
pipe_diameter = leak_pipe_self.diameter
|
||||
pipe_length = leak_pipe_self.length
|
||||
pipe_roughness = leak_pipe_self.roughness
|
||||
pipe_minor_loss = leak_pipe_self.minor_loss
|
||||
pipe_start_node = leak_pipe_self.start_node_name
|
||||
pipe_end_node = leak_pipe_self.end_node_name
|
||||
|
||||
# 计算中点高程/坐标(与原逻辑一致)
|
||||
start_n = wn.get_node(pipe_start_node)
|
||||
end_n = wn.get_node(pipe_end_node)
|
||||
if start_n.node_type == "Reservoir":
|
||||
end_elev = end_n.elevation
|
||||
start_elev = end_elev
|
||||
elif end_n.node_type == "Reservoir":
|
||||
start_elev = start_n.elevation
|
||||
end_elev = start_elev
|
||||
else:
|
||||
end_elev = end_n.elevation
|
||||
start_elev = start_n.elevation
|
||||
elev_mid = (start_elev + end_elev) / 2.0
|
||||
x_mid = (start_n.coordinates[0] + end_n.coordinates[0]) / 2.0
|
||||
y_mid = (start_n.coordinates[1] + end_n.coordinates[1]) / 2.0
|
||||
|
||||
# 先删原管,再加中点与两段半长管(只做一次)
|
||||
wn.remove_link(leak_pipe)
|
||||
|
||||
add_pipe1 = f"{leak_pipe}A"
|
||||
add_pipe2 = f"{leak_pipe}B"
|
||||
add_node = f"{leak_pipe}__mid" # 唯一命名,后面直接用它当泄漏节点
|
||||
|
||||
wn.add_junction(
|
||||
add_node, base_demand=0.0, elevation=elev_mid, coordinates=(x_mid, y_mid)
|
||||
)
|
||||
wn.add_pipe(
|
||||
add_pipe1,
|
||||
start_node_name=pipe_start_node,
|
||||
end_node_name=add_node,
|
||||
length=pipe_length / 2.0,
|
||||
diameter=pipe_diameter,
|
||||
roughness=pipe_roughness,
|
||||
minor_loss=pipe_minor_loss,
|
||||
)
|
||||
wn.add_pipe(
|
||||
add_pipe2,
|
||||
start_node_name=add_node,
|
||||
end_node_name=pipe_end_node,
|
||||
length=pipe_length / 2.0,
|
||||
diameter=pipe_diameter,
|
||||
roughness=pipe_roughness,
|
||||
minor_loss=pipe_minor_loss,
|
||||
)
|
||||
|
||||
pipe2leaknode[leak_pipe] = add_node
|
||||
|
||||
return pipe2leaknode
|
||||
|
||||
|
||||
def normal_simulation_pf(
|
||||
wn, drive_mode, sensor_name, sensor_f_name, inp_time, require_p, minimum_p
|
||||
):
|
||||
# inp_time = 0
|
||||
if drive_mode == "PDD": # 需水量根据节点压力动态调整
|
||||
wn.options.hydraulic.demand_model = "PDD"
|
||||
wn.options.hydraulic.required_pressure = require_p
|
||||
wn.options.hydraulic.minimum_pressure = minimum_p
|
||||
elif drive_mode == "DD": # 需水量固定,与压力无关
|
||||
wn.options.hydraulic.demand_model = "DD"
|
||||
sim = wntr.sim.EpanetSimulator(wn)
|
||||
results = sim.run_sim()
|
||||
pressure_all = results.node["pressure"][sensor_name]
|
||||
pressure = pressure_all.iloc[inp_time]
|
||||
demand_all = results.node["demand"]
|
||||
demand = demand_all.iloc[inp_time]
|
||||
sum_demand = cal_sum_demand(demand)
|
||||
flow_all = results.link["flowrate"][sensor_f_name]
|
||||
flow = flow_all.iloc[inp_time]
|
||||
|
||||
top_sensor = pressure.idxmin()
|
||||
basic_p = results.node["pressure"]
|
||||
basic_p = basic_p.iloc[inp_time]
|
||||
return pressure, flow, basic_p, top_sensor, sum_demand
|
||||
|
||||
|
||||
def normal_simulation_multi_pf(
|
||||
wn, drive_mode, sensor_name, sensor_f_name, require_p, minimum_p
|
||||
):
|
||||
# inp_time = 0
|
||||
if drive_mode == "PDD":
|
||||
wn.options.hydraulic.demand_model = "PDD"
|
||||
wn.options.hydraulic.required_pressure = require_p
|
||||
wn.options.hydraulic.minimum_pressure = minimum_p
|
||||
elif drive_mode == "DD":
|
||||
wn.options.hydraulic.demand_model = "DD"
|
||||
sim = wntr.sim.EpanetSimulator(wn)
|
||||
results = sim.run_sim()
|
||||
pressure_all = results.node["pressure"][sensor_name]
|
||||
pressure = pressure_all
|
||||
demand_all = results.node["demand"]
|
||||
demand = demand_all
|
||||
flow = results.link["flowrate"][sensor_f_name]
|
||||
sum_demand = pd.Series(dtype=object)
|
||||
for i in range(len(demand.index)):
|
||||
sum_demand[str(demand.index[i])] = cal_sum_demand(demand.iloc[i])
|
||||
if type(pressure) == pd.core.series.Series:
|
||||
top_sensor = pressure.idxmin()
|
||||
else:
|
||||
mean_pressure = pressure.mean()
|
||||
top_sensor = mean_pressure.idxmin()
|
||||
basic_p = results.node["pressure"]
|
||||
return pressure, flow, basic_p, top_sensor, sum_demand
|
||||
|
||||
|
||||
def simple_simulation_pf(wn, sensor_name, sensor_f_name, leak_pipe, add_pipe1):
|
||||
sim = wntr.sim.EpanetSimulator(wn)
|
||||
results = sim.run_sim()
|
||||
pressure_all = results.node["pressure"][sensor_name]
|
||||
if len(leak_pipe) > 0 and leak_pipe in sensor_f_name:
|
||||
|
||||
f_sensor_name = [add_pipe1 if i == leak_pipe else i for i in sensor_f_name]
|
||||
flow_all = results.link["flowrate"][f_sensor_name]
|
||||
flow_all.columns = sensor_f_name
|
||||
else:
|
||||
flow_all = results.link["flowrate"][sensor_f_name]
|
||||
return pressure_all, flow_all
|
||||
|
||||
|
||||
def cal_sum_demand(demand):
|
||||
sum_demand = 0
|
||||
for i in range(len(demand)):
|
||||
if demand.iloc[i] > 0:
|
||||
sum_demand += demand.iloc[i]
|
||||
return sum_demand
|
||||
|
||||
|
||||
def cal_signature_pipe_multi_pf(
|
||||
wn,
|
||||
leak_mag,
|
||||
candidate_center,
|
||||
timestep_list,
|
||||
sensor_name,
|
||||
n_workers=1,
|
||||
wn_inp_path=None,
|
||||
):
|
||||
candidate_center_num = len(candidate_center)
|
||||
pressure_leak = pd.DataFrame(
|
||||
index=pd.MultiIndex.from_product([candidate_center, timestep_list]),
|
||||
columns=sensor_name,
|
||||
)
|
||||
# flow_leak = pd.DataFrame(index=pd.MultiIndex.from_product([candidate_center, timestep_list]),
|
||||
# columns=sensor_f_name)
|
||||
pressure_leak = pressure_leak.sort_index()
|
||||
# flow_leak = flow_leak.sort_index()
|
||||
can_parallel = (
|
||||
n_workers > 1
|
||||
and candidate_center_num > 1
|
||||
and wn_inp_path is not None
|
||||
and len(str(wn_inp_path)) > 0
|
||||
)
|
||||
if can_parallel:
|
||||
option_values = _snapshot_hydraulic_options(wn)
|
||||
worker_count = min(n_workers, candidate_center_num)
|
||||
start_methods = mp.get_all_start_methods()
|
||||
context_name = "spawn" if "spawn" in start_methods else start_methods[0]
|
||||
with mp.get_context(context_name).Pool(
|
||||
processes=worker_count,
|
||||
initializer=_signature_worker_init,
|
||||
initargs=(
|
||||
str(wn_inp_path),
|
||||
float(leak_mag),
|
||||
list(sensor_name),
|
||||
option_values,
|
||||
list(candidate_center),
|
||||
),
|
||||
) as pool:
|
||||
for i, (center_name, pressure_array) in enumerate(
|
||||
pool.imap(_signature_worker_run_center, candidate_center)
|
||||
):
|
||||
pressure_leak.loc[(center_name, slice(None)), :] = pressure_array
|
||||
sys.stdout.write("\r" + "已经完成计算" + str(i + 1) + "个特征中心")
|
||||
else:
|
||||
# Pre-insert all mid-nodes so every simulation sees the same topology
|
||||
for center in candidate_center:
|
||||
ensure_mid_node(wn, center)
|
||||
for i in range(candidate_center_num):
|
||||
temp_prefix = _make_temp_prefix(f"sig_{i}")
|
||||
wn, pressure_output = leak_simulation_pipe_dd_multi_pf(
|
||||
wn,
|
||||
leak_mag,
|
||||
candidate_center[i],
|
||||
sensor_name,
|
||||
file_prefix=temp_prefix,
|
||||
)
|
||||
# leak_or_not_list.append(leak_or_not)
|
||||
pressure_leak.loc[(candidate_center[i], slice(None)), :] = (
|
||||
pressure_output.to_numpy()
|
||||
)
|
||||
# flow_leak.loc[candidate_center[i]].loc[:, :] = flow_output
|
||||
sys.stdout.write("\r" + "已经完成计算" + str(i + 1) + "个特征中心")
|
||||
return pressure_leak, candidate_center
|
||||
|
||||
|
||||
def _signature_worker_init(
|
||||
inp_path, leak_mag, sensor_name, option_values, candidate_centers=None
|
||||
):
|
||||
global _SIGNATURE_WORKER_DATA
|
||||
wn = wntr.network.WaterNetworkModel(inp_path)
|
||||
_apply_hydraulic_options(wn, option_values)
|
||||
# Pre-insert ALL mid-nodes so every simulation runs on the same topology,
|
||||
# regardless of which worker handles which task.
|
||||
if candidate_centers is not None:
|
||||
for center in candidate_centers:
|
||||
ensure_mid_node(wn, center)
|
||||
_SIGNATURE_WORKER_DATA = {
|
||||
"wn": wn,
|
||||
"leak_mag": leak_mag,
|
||||
"sensor_name": sensor_name,
|
||||
}
|
||||
|
||||
|
||||
def _signature_worker_run_center(center_name):
|
||||
data = _SIGNATURE_WORKER_DATA
|
||||
temp_prefix = _make_temp_prefix(f"sig_worker_{center_name}")
|
||||
_, pressure_output = leak_simulation_pipe_dd_multi_pf(
|
||||
data["wn"],
|
||||
data["leak_mag"],
|
||||
center_name,
|
||||
data["sensor_name"],
|
||||
file_prefix=temp_prefix,
|
||||
)
|
||||
return center_name, pressure_output.to_numpy()
|
||||
|
||||
|
||||
def pick_pipe(all_pipes, pipe_diameter, limited_diameter):
|
||||
candidate_pipe = []
|
||||
for each_pipe in all_pipes:
|
||||
if pipe_diameter[each_pipe] >= limited_diameter:
|
||||
candidate_pipe.append(each_pipe)
|
||||
return candidate_pipe
|
||||
|
||||
|
||||
def cal_possible_pipe(leak_flow, all_pipe, pipe_diameter):
|
||||
basic_pressure = 10 # 基础压力
|
||||
discharge_coeff = 0.6 # 经验系数
|
||||
break_area_ratio = 1 # 爆管面积比 0.5 1.25
|
||||
break_area = leak_flow / (
|
||||
discharge_coeff * math.sqrt(2 * basic_pressure * 9.81)
|
||||
) # 爆管面积 m3/h
|
||||
"""break_area_diameter = math.sqrt(4 * break_area / math.pi)
|
||||
min_diameter = (math.ceil(1000 * break_area_diameter / break_area_ratio)) / 1000"""
|
||||
break_area_diameter = math.sqrt(
|
||||
4 * break_area / math.pi / break_area_ratio
|
||||
) # 爆管直径
|
||||
min_diameter = (math.ceil(1000 * break_area_diameter)) / 1000 # 向上取整
|
||||
new_all_pipe = pick_pipe(all_pipe, pipe_diameter, min_diameter)
|
||||
return new_all_pipe, min_diameter
|
||||
|
||||
|
||||
def extract_links(data, link_types, direction):
|
||||
return [
|
||||
link
|
||||
for res_data in data.values()
|
||||
for link_type in link_types
|
||||
for link in res_data[link_type][direction]
|
||||
]
|
||||
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
"""管网模型读取与图构建模块。"""
|
||||
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
import networkx as nx
|
||||
import pandas as pd
|
||||
import wntr
|
||||
|
||||
|
||||
def load_inp(inp_name, inp_location, inp_time, driven_mode, require_p, minimum_p):
|
||||
inp_file = inp_location + inp_name
|
||||
wn = wntr.network.WaterNetworkModel(inp_file)
|
||||
if driven_mode == "PDD":
|
||||
wn.options.hydraulic.demand_model = "PDD"
|
||||
wn.options.hydraulic.required_pressure = require_p
|
||||
wn.options.hydraulic.minimum_pressure = minimum_p
|
||||
else:
|
||||
wn.options.hydraulic.demand_model = "DD"
|
||||
return wn
|
||||
|
||||
|
||||
def read_inf_inp(wn):
|
||||
all_node = wn.node_name_list
|
||||
node_elevation = wn.query_node_attribute("elevation")
|
||||
node_coordinates = wn.query_node_attribute("coordinates")
|
||||
|
||||
all_pipe = wn.pipe_name_list
|
||||
# 改_wz__________________________________
|
||||
n_pipe = []
|
||||
for p in all_pipe:
|
||||
pipe = wn.get_link(p)
|
||||
if pipe.initial_status == 0: # 状态为'Closed'
|
||||
n_pipe.append(p)
|
||||
candidate_pipe_init = sorted(set(all_pipe) - set(n_pipe))
|
||||
pipe_start_node = wn.query_link_attribute(
|
||||
"start_node_name", link_type=wntr.network.model.Pipe
|
||||
)
|
||||
pipe_end_node = wn.query_link_attribute(
|
||||
"end_node_name", link_type=wntr.network.model.Pipe
|
||||
)
|
||||
pipe_length = wn.query_link_attribute("length")
|
||||
pipe_diameter = wn.query_link_attribute("diameter")
|
||||
|
||||
return (
|
||||
all_node,
|
||||
node_elevation,
|
||||
node_coordinates,
|
||||
candidate_pipe_init,
|
||||
pipe_start_node,
|
||||
pipe_end_node,
|
||||
pipe_length,
|
||||
pipe_diameter,
|
||||
)
|
||||
|
||||
|
||||
def read_inf_inp_other(wn):
|
||||
all_link = wn.link_name_list
|
||||
pipe_start_node_all = wn.query_link_attribute("start_node_name")
|
||||
pipe_end_node_all = wn.query_link_attribute("end_node_name")
|
||||
|
||||
return all_link, pipe_start_node_all, pipe_end_node_all
|
||||
|
||||
|
||||
def construct_graph(wn):
|
||||
length = wn.query_link_attribute("length")
|
||||
G = wn.get_graph(wn, link_weight=length)
|
||||
# 转为无向图
|
||||
G0 = G.to_undirected()
|
||||
# A0 = np.array(nx.adjacency_graph(G0).todense())
|
||||
return G0 # , A0
|
||||
|
||||
|
||||
def cal_pipe_coordinate(all_pipe, pipe_start_node, pipe_end_node, node_coordinates):
|
||||
pipe_num = len(all_pipe)
|
||||
pipe_coordinates = np.zeros([pipe_num, 2])
|
||||
pipe_x = copy.deepcopy(pipe_start_node)
|
||||
pipe_y = copy.deepcopy(pipe_start_node)
|
||||
for i in range(pipe_num):
|
||||
temp_pipe = all_pipe[i]
|
||||
pipe_x[temp_pipe] = (
|
||||
node_coordinates[pipe_start_node[temp_pipe]][0]
|
||||
+ node_coordinates[pipe_end_node[temp_pipe]][0]
|
||||
) / 2
|
||||
pipe_y[temp_pipe] = (
|
||||
node_coordinates[pipe_start_node[temp_pipe]][1]
|
||||
+ node_coordinates[pipe_end_node[temp_pipe]][1]
|
||||
) / 2
|
||||
return pipe_x, pipe_y
|
||||
|
||||
|
||||
def cal_node_coordinate(all_node, node_coordinates):
|
||||
node_x = copy.deepcopy(node_coordinates)
|
||||
node_y = copy.deepcopy(node_coordinates)
|
||||
for i in range(len(node_x)):
|
||||
temp_node = all_node[i]
|
||||
node_x[temp_node] = node_coordinates[temp_node][0]
|
||||
node_y[temp_node] = node_coordinates[temp_node][1]
|
||||
return node_x, node_y
|
||||
|
||||
|
||||
def produce_pattern_value(wn, all_node):
|
||||
wn_o = copy.deepcopy(wn)
|
||||
# 改_wz_____________________________
|
||||
# sample_node = wn_o.get_node(all_node[0])
|
||||
# num_categories = len(sample_node.demand_timeseries_list)
|
||||
num_categories = 1
|
||||
columns = [f"D{i}" for i in range(num_categories)]
|
||||
basic_demand_pd = pd.DataFrame(index=all_node, columns=columns)
|
||||
for each in all_node:
|
||||
node = wn_o.get_node(each)
|
||||
for i in range(num_categories):
|
||||
basic_demand_pd.loc[each, columns[i]] = node.demand_timeseries_list[
|
||||
i
|
||||
].base_value
|
||||
|
||||
return basic_demand_pd
|
||||
|
||||
|
||||
def _build_node_pipe_maps(
|
||||
all_nodes, candidate_pipes, pipe_start_node, pipe_end_node, pipe_length
|
||||
):
|
||||
node_pipe_dic = {node: [] for node in all_nodes}
|
||||
couple_node_length = {}
|
||||
for pipe in candidate_pipes:
|
||||
start_node = pipe_start_node[pipe]
|
||||
end_node = pipe_end_node[pipe]
|
||||
if start_node in node_pipe_dic:
|
||||
node_pipe_dic[start_node].append(pipe)
|
||||
if end_node in node_pipe_dic:
|
||||
node_pipe_dic[end_node].append(pipe)
|
||||
length = float(pipe_length[pipe])
|
||||
couple_node_length[f"{start_node},{end_node}"] = length
|
||||
couple_node_length[f"{end_node},{start_node}"] = length
|
||||
return node_pipe_dic, couple_node_length
|
||||
|
||||
|
||||
@@ -0,0 +1,456 @@
|
||||
"""管网分区模块。"""
|
||||
|
||||
import math
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import networkx as nx
|
||||
import networkx as networkx
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pymetis
|
||||
from scipy.sparse import coo_matrix, csr_matrix
|
||||
from scipy.sparse.csgraph import connected_components
|
||||
|
||||
|
||||
def _to_metis_edge_weight(edge_weight):
|
||||
weight = float(edge_weight)
|
||||
if not math.isfinite(weight):
|
||||
raise ValueError(f"Invalid non-finite METIS edge weight: {edge_weight}")
|
||||
# pymetis expects integer edge weights.
|
||||
return max(1, int(round(weight)))
|
||||
|
||||
|
||||
def _dedupe_preserve_order(items):
|
||||
seen = set()
|
||||
output = []
|
||||
for item in items:
|
||||
if item in seen:
|
||||
continue
|
||||
seen.add(item)
|
||||
output.append(item)
|
||||
return output
|
||||
|
||||
|
||||
def pick_center_pipe(node_x, node_y, candidate_pipe, pipe_start_node, pipe_end_node):
|
||||
candidate_pipe_list = list(candidate_pipe)
|
||||
start_nodes = pipe_start_node[candidate_pipe_list]
|
||||
end_nodes = pipe_end_node[candidate_pipe_list]
|
||||
|
||||
x_vals = (node_x[start_nodes].to_numpy() + node_x[end_nodes].to_numpy()) / 2.0
|
||||
y_vals = (node_y[start_nodes].to_numpy() + node_y[end_nodes].to_numpy()) / 2.0
|
||||
mean_x = float(np.mean(x_vals))
|
||||
mean_y = float(np.mean(y_vals))
|
||||
distance = np.abs(x_vals - mean_x) + np.abs(y_vals - mean_y)
|
||||
center_idx = int(np.argmin(distance))
|
||||
return candidate_pipe_list[center_idx]
|
||||
|
||||
|
||||
def pick_max_diameter_pipe(candidate_pipe, pipe_diameter):
|
||||
candidate_pipe_list = list(candidate_pipe)
|
||||
diameters = pd.to_numeric(
|
||||
pipe_diameter.reindex(candidate_pipe_list), errors="coerce"
|
||||
).dropna()
|
||||
if len(diameters) != len(candidate_pipe_list):
|
||||
missing = sorted(set(candidate_pipe_list) - set(diameters.index))
|
||||
preview = ", ".join(map(str, missing[:10]))
|
||||
raise ValueError(f"Missing or invalid diameter for pipes: {preview}")
|
||||
|
||||
max_diameter = float(diameters.max())
|
||||
max_diameter_pipes = sorted(
|
||||
[pipe for pipe, diameter in diameters.items() if float(diameter) == max_diameter],
|
||||
key=str,
|
||||
)
|
||||
return max_diameter_pipes[0]
|
||||
|
||||
|
||||
def pick_dual_center_pipes(
|
||||
node_x, node_y, candidate_pipe, pipe_start_node, pipe_end_node, pipe_diameter
|
||||
):
|
||||
geometric_center = pick_center_pipe(
|
||||
node_x, node_y, candidate_pipe, pipe_start_node, pipe_end_node
|
||||
)
|
||||
diameter_center = pick_max_diameter_pipe(candidate_pipe, pipe_diameter)
|
||||
return _dedupe_preserve_order([geometric_center, diameter_center])
|
||||
|
||||
|
||||
def find_new_center_pipe(
|
||||
node_x,
|
||||
node_y,
|
||||
candidate_pipe,
|
||||
pipe_start_node,
|
||||
pipe_end_node,
|
||||
pipe_diameter,
|
||||
record_center,
|
||||
):
|
||||
new_candidate_pipe = sorted(set(candidate_pipe) - set(record_center))
|
||||
if new_candidate_pipe == []:
|
||||
new_candidate_pipe = candidate_pipe
|
||||
center_t = pick_center_pipe(
|
||||
node_x,
|
||||
node_y,
|
||||
new_candidate_pipe,
|
||||
pipe_start_node,
|
||||
pipe_end_node,
|
||||
)
|
||||
return center_t
|
||||
|
||||
|
||||
def cal_area_node_linked_pipe(nodeset, node_pipe_dic):
|
||||
pipeset = []
|
||||
for temp_node in nodeset:
|
||||
pipeset.extend(node_pipe_dic[temp_node])
|
||||
return pipeset
|
||||
|
||||
|
||||
def metis_grouping_pipe_weight(
|
||||
G0,
|
||||
wn,
|
||||
all_node_iter,
|
||||
candidate_pipe_input,
|
||||
group_num,
|
||||
node_x,
|
||||
node_y,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
node_pipe_dic,
|
||||
all_node_series,
|
||||
couple_node_length,
|
||||
pipe_diameter,
|
||||
):
|
||||
all_node_iter_series_new = all_node_series[all_node_iter]
|
||||
all_node_iter_series_new = all_node_iter_series_new.sort_values(ascending=True)
|
||||
all_node_iter_new = list(all_node_iter_series_new.index)
|
||||
|
||||
G1 = G0.subgraph(all_node_iter_new)
|
||||
delimiter = " "
|
||||
adjacency_list = []
|
||||
node_dict = {}
|
||||
c_new = 0
|
||||
for each_node in all_node_iter_new:
|
||||
node_dict[each_node] = c_new
|
||||
c_new = c_new + 1
|
||||
correspond_dic = {}
|
||||
count_node = 0
|
||||
w = []
|
||||
for node_name in all_node_iter_new:
|
||||
neighbors = G1[node_name]
|
||||
w_temp = []
|
||||
n_t = [node_dict[node_name]]
|
||||
for neighbor_name in sorted(neighbors.keys()):
|
||||
edge_data = neighbors[neighbor_name]
|
||||
edge_key = f"{node_name},{neighbor_name}"
|
||||
reverse_edge_key = f"{neighbor_name},{node_name}"
|
||||
if edge_key in couple_node_length:
|
||||
edge_weight = couple_node_length[edge_key]
|
||||
elif reverse_edge_key in couple_node_length:
|
||||
edge_weight = couple_node_length[reverse_edge_key]
|
||||
elif edge_data.get("weight") is not None:
|
||||
edge_weight = float(edge_data["weight"])
|
||||
else:
|
||||
# Ignore graph edges that are outside candidate pipes and have no usable
|
||||
# partition weight (e.g. some non-pipe links in mixed network graphs).
|
||||
continue
|
||||
w_temp.append(_to_metis_edge_weight(edge_weight))
|
||||
n_t.append(node_dict[neighbor_name])
|
||||
w.append(w_temp)
|
||||
correspond_dic[n_t[0]] = count_node
|
||||
count_node = count_node + 1
|
||||
# del n_t[0]
|
||||
adjacency_list.append(n_t)
|
||||
adjacency_list_new = [[] * 1 for i in range(len(adjacency_list))]
|
||||
w_new = [[] * 1 for i in range(len(adjacency_list))]
|
||||
for i in range(len(adjacency_list)):
|
||||
adjacency_list_new[int(adjacency_list[i][0])] = adjacency_list[i]
|
||||
w_new[int(adjacency_list[i][0])] = w[i]
|
||||
|
||||
for i in range(len(adjacency_list)):
|
||||
del adjacency_list_new[i][0]
|
||||
xadj = [0]
|
||||
w_f = []
|
||||
final_adjacency_list = []
|
||||
for i in range(len(adjacency_list_new)):
|
||||
final_adjacency_list = final_adjacency_list + adjacency_list_new[i]
|
||||
xadj.append(len(final_adjacency_list))
|
||||
w_f = w_f + w_new[i]
|
||||
|
||||
# (edgecuts, parts) = pymetis.part_graph(nparts=group_num, adjacency=adjacency_list_new)
|
||||
metis_options = pymetis.Options()
|
||||
metis_options.seed = 42
|
||||
(edgecuts, parts) = pymetis.part_graph(
|
||||
nparts=group_num,
|
||||
adjncy=final_adjacency_list,
|
||||
xadj=xadj,
|
||||
eweights=w_f,
|
||||
options=metis_options,
|
||||
)
|
||||
# (edgecuts, parts) = pymetis.part_graph(nparts=group_num, adjacency=adjacency_list_new)
|
||||
candidate_group_list = [[] * 1 for i in range(group_num)]
|
||||
for i in range(len(all_node_iter_new)):
|
||||
candidate_group_list[parts[i]].append(all_node_iter_new[i])
|
||||
|
||||
"""parts_new = np.zeros(len(candidate_node_input), dtype=int)
|
||||
for i in range(len(candidate_group_list)):
|
||||
temp_group = candidate_group_list[i]
|
||||
for each_node in temp_group:
|
||||
parts_new[node_dict[each_node]] = i
|
||||
parts_new = list(parts_new)"""
|
||||
|
||||
new_center = []
|
||||
new_group = []
|
||||
new_center_candidates = []
|
||||
new_all_node = []
|
||||
candidate_pipe_set = set(candidate_pipe_input)
|
||||
all_grouped_pipe = []
|
||||
for i in range(group_num):
|
||||
# 构建子图
|
||||
G_sub = G0.subgraph(candidate_group_list[i])
|
||||
|
||||
# 计算联通子图
|
||||
sub_graphs = networkx.connected_components(G_sub)
|
||||
if networkx.number_connected_components(G_sub) == 1:
|
||||
# 求交集
|
||||
nodeset = G_sub.nodes()
|
||||
pipeset_set = set(cal_area_node_linked_pipe(nodeset, node_pipe_dic))
|
||||
candidate_pipe = sorted(pipeset_set.intersection(candidate_pipe_set))
|
||||
|
||||
# 判断集合是否保留
|
||||
if len(candidate_pipe) > 0:
|
||||
# 保留 计算中心
|
||||
center_t = pick_center_pipe(
|
||||
node_x,
|
||||
node_y,
|
||||
candidate_pipe,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
)
|
||||
center_candidates_t = pick_dual_center_pipes(
|
||||
node_x,
|
||||
node_y,
|
||||
candidate_pipe,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
pipe_diameter,
|
||||
)
|
||||
|
||||
# 更新
|
||||
new_center.append(center_t)
|
||||
new_center_candidates.append(center_candidates_t)
|
||||
new_group.append(candidate_pipe)
|
||||
new_all_node.append(nodeset)
|
||||
all_grouped_pipe = all_grouped_pipe + candidate_pipe
|
||||
else:
|
||||
for c in sorted(sub_graphs, key=lambda c: min(c)):
|
||||
G_temp = G0.subgraph(c)
|
||||
nodeset = G_temp.nodes()
|
||||
pipeset = cal_area_node_linked_pipe(nodeset, node_pipe_dic)
|
||||
pipeset_set = set(pipeset)
|
||||
|
||||
# 求交集
|
||||
candidate_pipe = sorted(pipeset_set.intersection(candidate_pipe_set))
|
||||
# print(len(candidate_node))
|
||||
# 判断集合是否保留
|
||||
if len(candidate_pipe) > 0:
|
||||
# 保留 计算中心
|
||||
center_t = pick_center_pipe(
|
||||
node_x,
|
||||
node_y,
|
||||
candidate_pipe,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
)
|
||||
center_candidates_t = pick_dual_center_pipes(
|
||||
node_x,
|
||||
node_y,
|
||||
candidate_pipe,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
pipe_diameter,
|
||||
)
|
||||
# 更新
|
||||
new_center.append(center_t)
|
||||
new_center_candidates.append(center_candidates_t)
|
||||
new_group.append(candidate_pipe)
|
||||
new_all_node.append(nodeset)
|
||||
all_grouped_pipe = all_grouped_pipe + candidate_pipe
|
||||
record_center = []
|
||||
c_g = 0
|
||||
for each_group in new_group:
|
||||
if len(each_group) < 3:
|
||||
record_center.append(new_center[c_g])
|
||||
c_g += 1
|
||||
c_g = 0
|
||||
for each_group in new_group:
|
||||
if len(each_group) >= 3:
|
||||
if new_center[c_g] in record_center:
|
||||
new_center[c_g] = find_new_center_pipe(
|
||||
node_x,
|
||||
node_y,
|
||||
each_group,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
pipe_diameter,
|
||||
record_center,
|
||||
)
|
||||
new_center_candidates[c_g] = _dedupe_preserve_order(
|
||||
[new_center[c_g]] + list(new_center_candidates[c_g])
|
||||
)
|
||||
record_center.append(new_center[c_g])
|
||||
c_g += 1
|
||||
|
||||
# visualize_metis_partition(
|
||||
# G0, new_center, new_group,
|
||||
# node_x, node_y,
|
||||
# pipe_start_node_all, pipe_end_node_all
|
||||
# )
|
||||
return new_center, new_group, new_all_node, new_center_candidates
|
||||
|
||||
|
||||
def visualize_metis_partition(
|
||||
G,
|
||||
center_pipes,
|
||||
pipe_groups,
|
||||
node_x,
|
||||
node_y,
|
||||
pipe_start_node_all,
|
||||
pipe_end_node_all,
|
||||
title: str | None = None,
|
||||
block: bool = True,
|
||||
pause_seconds: float | None = None,
|
||||
):
|
||||
"""
|
||||
可视化METIS分区结果(单图模式)
|
||||
参数:
|
||||
G: 原始管网图(nx.Graph)
|
||||
center_pipes: 中心管道列表(list)
|
||||
pipe_groups: 分组管道列表(list of lists)
|
||||
node_x: 节点X坐标字典(dict)
|
||||
node_y: 节点Y坐标字典(dict)
|
||||
pipe_start_node_all: 管道起点字典(dict)
|
||||
pipe_end_node_all: 管道终点字典(dict)
|
||||
"""
|
||||
fig = plt.figure("metis_partition_convergence", figsize=(22.51, 12.48))
|
||||
fig.clf()
|
||||
ax = fig.add_subplot(111)
|
||||
if not block:
|
||||
plt.ion()
|
||||
|
||||
# 生成颜色映射(自动扩展颜色数量)
|
||||
colors = plt.cm.tab20(np.linspace(0, 1, len(pipe_groups)))
|
||||
|
||||
# --- 绘制背景管网(灰色半透明) ---
|
||||
for edge in G.edges():
|
||||
start_node, end_node = edge
|
||||
ax.plot(
|
||||
[node_x[start_node], node_x[end_node]],
|
||||
[node_y[start_node], node_y[end_node]],
|
||||
color="lightgray",
|
||||
linewidth=0.5,
|
||||
alpha=0.3,
|
||||
zorder=1, # 确保背景在底层
|
||||
)
|
||||
|
||||
# --- 绘制各分区管道(彩色)---
|
||||
legend_handles = [] # 用于图例的句柄
|
||||
for i, (group, center) in enumerate(zip(pipe_groups, center_pipes)):
|
||||
color = colors[i % len(colors)] # 循环使用颜色
|
||||
|
||||
# 绘制分组管道
|
||||
for pipe in group:
|
||||
start = pipe_start_node_all[pipe]
|
||||
end = pipe_end_node_all[pipe]
|
||||
line = ax.plot(
|
||||
[node_x[start], node_x[end]],
|
||||
[node_y[start], node_y[end]],
|
||||
color=color,
|
||||
linewidth=2.5,
|
||||
alpha=0.8,
|
||||
zorder=2,
|
||||
)
|
||||
# 只为每个分组的第一个管道添加图例句柄
|
||||
if pipe == group[0]:
|
||||
legend_handles.append(line[0])
|
||||
|
||||
# 高亮中心管道(红色虚线)
|
||||
if center in pipe_start_node_all and center in pipe_end_node_all:
|
||||
start = pipe_start_node_all[center]
|
||||
end = pipe_end_node_all[center]
|
||||
ax.plot(
|
||||
[node_x[start], node_x[end]],
|
||||
[node_y[start], node_y[end]],
|
||||
color="red",
|
||||
linewidth=4,
|
||||
linestyle="--",
|
||||
dash_capstyle="round",
|
||||
zorder=3, # 确保中心管道在最顶层
|
||||
)
|
||||
|
||||
# --- 添加图例和标注 ---
|
||||
# 分组图例
|
||||
if legend_handles:
|
||||
group_labels = [f"Group {i + 1}" for i in range(len(pipe_groups))]
|
||||
ax.legend(
|
||||
legend_handles,
|
||||
group_labels,
|
||||
loc="upper right",
|
||||
title="Partitions",
|
||||
fontsize=8,
|
||||
title_fontsize=10,
|
||||
)
|
||||
|
||||
# 中心管道标注(可选)
|
||||
for i, center in enumerate(center_pipes):
|
||||
if center in pipe_start_node_all:
|
||||
x = (
|
||||
node_x[pipe_start_node_all[center]] + node_x[pipe_end_node_all[center]]
|
||||
) / 2
|
||||
y = (
|
||||
node_y[pipe_start_node_all[center]] + node_y[pipe_end_node_all[center]]
|
||||
) / 2
|
||||
ax.text(
|
||||
x,
|
||||
y,
|
||||
f"C{i + 1}",
|
||||
color="red",
|
||||
fontsize=10,
|
||||
ha="center",
|
||||
va="center",
|
||||
bbox=dict(facecolor="white", alpha=0.8, edgecolor="none"),
|
||||
)
|
||||
|
||||
# --- 图形美化 ---
|
||||
ax.set_title(title or "Water Network Partitioning Overview", fontsize=14, pad=20)
|
||||
ax.set_xlabel("X Coordinate", fontsize=10)
|
||||
ax.set_ylabel("Y Coordinate", fontsize=10)
|
||||
ax.grid(True, alpha=0.2, linestyle=":")
|
||||
fig.tight_layout()
|
||||
|
||||
# 显示图形并强制刷新,避免迭代显示滞后一轮。
|
||||
plt.show(block=block)
|
||||
if not block:
|
||||
fig.canvas.draw_idle()
|
||||
fig.canvas.flush_events()
|
||||
pause_value = 0.001 if pause_seconds is None else max(0.0, float(pause_seconds))
|
||||
plt.pause(max(0.001, pause_value))
|
||||
elif pause_seconds is not None:
|
||||
plt.pause(max(0.0, float(pause_seconds)))
|
||||
return fig
|
||||
|
||||
|
||||
def generate_adjlist_with_all_edges(G, delimiter):
|
||||
for s, nbrs in G.adjacency():
|
||||
line = str(s) + delimiter
|
||||
for t, data in nbrs.items():
|
||||
line += str(t) + delimiter
|
||||
yield line[: -len(delimiter)]
|
||||
|
||||
|
||||
def cal_group_num(candidate_node_input, cal_group_num):
|
||||
candidate_node_num = len(candidate_node_input)
|
||||
if candidate_node_num > 100:
|
||||
group_num_input = cal_group_num # 30
|
||||
else:
|
||||
group_num_input = 10
|
||||
|
||||
return group_num_input
|
||||
|
||||
|
||||
@@ -0,0 +1,198 @@
|
||||
"""噪声生成模块。"""
|
||||
|
||||
import copy
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from .leak_simulator import simple_add_leak, simple_recover_wn, simple_simulation_pf
|
||||
|
||||
|
||||
def add_noise_pd(data, noise_type, noise_para):
|
||||
output_data = copy.deepcopy(data)
|
||||
if type(output_data) == pd.core.frame.Series:
|
||||
if noise_type == "uni":
|
||||
for x in output_data.index:
|
||||
noise = (np.random.random() - 0.5) * 2
|
||||
output_data[x] = output_data[x] + noise * noise_para
|
||||
|
||||
elif noise_type == "gauss":
|
||||
noise = np.random.normal(loc=0, scale=noise_para, size=output_data.shape)
|
||||
output_data = output_data + noise
|
||||
elif type(output_data) == pd.core.frame.DataFrame:
|
||||
if noise_type == "uni":
|
||||
noise = (np.random.random(size=output_data.shape) - 0.5) * 2
|
||||
output_data = output_data + noise * noise_para
|
||||
|
||||
elif noise_type == "gauss":
|
||||
noise = np.random.normal(loc=0, scale=noise_para, size=output_data.shape)
|
||||
output_data = output_data + noise
|
||||
return output_data
|
||||
|
||||
|
||||
def add_noise_number(data, noise_type, noise_para):
|
||||
output_data = copy.deepcopy(data)
|
||||
if noise_type == "uni":
|
||||
noise = (np.random.random() - 0.5) * 2
|
||||
output_data = output_data + noise * noise_para
|
||||
|
||||
elif noise_type == "gauss":
|
||||
noise = random.gauss(0, noise_para)
|
||||
output_data = output_data + noise
|
||||
return output_data
|
||||
|
||||
|
||||
def add_noise_number_flow(data, noise_para_mean, noise_para_std1, noise_para_std2):
|
||||
output_data = copy.deepcopy(data)
|
||||
noise_flag1 = np.random.random() - 0.5
|
||||
if noise_flag1 < 0:
|
||||
noise = noise_para_mean - abs(np.random.normal(loc=0, scale=noise_para_std1))
|
||||
else:
|
||||
noise = noise_para_mean + abs(np.random.normal(loc=0, scale=noise_para_std2))
|
||||
noise_flag2 = np.random.random() - 0.5
|
||||
if noise_flag2 < 0:
|
||||
noise_f = noise * (-1)
|
||||
else:
|
||||
noise_f = noise
|
||||
output_data = output_data + noise_f
|
||||
return output_data
|
||||
|
||||
|
||||
def produce_noise_number(noise_type, noise_para):
|
||||
if noise_type == "uni":
|
||||
noise = (np.random.random() - 0.5) * 2
|
||||
noise = noise * noise_para
|
||||
elif noise_type == "gauss":
|
||||
noise = random.gauss(0, noise_para)
|
||||
else:
|
||||
noise = 0
|
||||
return noise
|
||||
|
||||
|
||||
def add_noise_percentage_pd(data, noise_type, noise_para):
|
||||
output_data = copy.deepcopy(data)
|
||||
|
||||
if type(output_data) == pd.core.frame.Series:
|
||||
if noise_type == "uni":
|
||||
for x in output_data.index:
|
||||
noise = (np.random.random() - 0.5) * 2
|
||||
output_data[x] = output_data[x] * (1 + noise * noise_para / 100)
|
||||
|
||||
elif noise_type == "gauss":
|
||||
for x in output_data.index:
|
||||
noise = np.random.gauss(0, noise_para)
|
||||
output_data[x] = output_data[x] * (1 + noise / 100)
|
||||
# std_noise = noise.std()
|
||||
elif type(output_data) == pd.core.frame.DataFrame:
|
||||
if noise_type == "uni":
|
||||
noise = (np.random.random(size=output_data.shape) - 0.5) * 2
|
||||
output_data = output_data * (1 + noise * noise_para / 100)
|
||||
|
||||
elif noise_type == "gauss":
|
||||
noise = np.random.normal(loc=0, scale=noise_para, size=output_data.shape)
|
||||
output_data = output_data * (1 + noise / 100)
|
||||
# std_noise = noise.std().mean()
|
||||
return output_data
|
||||
|
||||
|
||||
def add_noise_in_wn_pf(
|
||||
wn,
|
||||
pipe_c_noise,
|
||||
timestep_list,
|
||||
pipe_coefficient,
|
||||
sensor_name,
|
||||
sensor_f_name,
|
||||
all_node,
|
||||
basic_demand_pd,
|
||||
noise_type,
|
||||
noise_para,
|
||||
leak_pipe,
|
||||
leak_flow,
|
||||
):
|
||||
|
||||
wn.options.time.duration = 0
|
||||
pipe_roughness_change = add_noise_pd(pipe_coefficient, noise_type, pipe_c_noise)
|
||||
wn = change_para_of_wn(wn, pipe_roughness_change)
|
||||
record_pressure = pd.DataFrame(index=timestep_list, columns=sensor_name)
|
||||
record_flow = pd.DataFrame(index=timestep_list, columns=sensor_f_name)
|
||||
record_noise_all = pd.DataFrame(
|
||||
index=pd.MultiIndex.from_product([timestep_list, all_node]),
|
||||
columns=basic_demand_pd.columns,
|
||||
)
|
||||
record_noise_all = record_noise_all.sort_index()
|
||||
|
||||
# normal 获取添加噪声后的监测点数据
|
||||
for i in range(len(timestep_list)):
|
||||
wn, record_noise = change_node_demand(
|
||||
wn, basic_demand_pd, all_node, noise_type, noise_para
|
||||
)
|
||||
record_noise_all.loc[timestep_list[i]].loc[:, :] = record_noise
|
||||
pressure_temp, flow_temp = simple_simulation_pf(
|
||||
wn, sensor_name, sensor_f_name, [], []
|
||||
)
|
||||
record_pressure.iloc[i, :] = pressure_temp
|
||||
record_flow.iloc[i, :] = flow_temp
|
||||
|
||||
# leak_simulation 获取添加漏损后的监测点数据
|
||||
record_pressure_leak = pd.DataFrame(index=timestep_list, columns=sensor_name)
|
||||
record_flow_leak = pd.DataFrame(index=timestep_list, columns=sensor_f_name)
|
||||
# 改_wz_________________________________________
|
||||
# add leak
|
||||
wn, whole_inf, add_pipe1 = simple_add_leak(wn, leak_flow, leak_pipe)
|
||||
# simulation
|
||||
for i in range(len(timestep_list)):
|
||||
record_noise = record_noise_all.loc[timestep_list[i]]
|
||||
wn = change_node_demand_leak(wn, record_noise, all_node)
|
||||
pressure_temp, flow_temp = simple_simulation_pf(
|
||||
wn, sensor_name, sensor_f_name, leak_pipe, add_pipe1
|
||||
)
|
||||
record_pressure_leak.iloc[i, :] = pressure_temp
|
||||
record_flow_leak.iloc[i, :] = flow_temp
|
||||
# delete leak
|
||||
wn = simple_recover_wn(wn, whole_inf)
|
||||
return wn, record_pressure, record_flow, record_pressure_leak, record_flow_leak
|
||||
|
||||
|
||||
def change_node_demand(wn, basic_demand_pd, all_node, noise_type, noise_para):
|
||||
# 改_wz_____________________________________
|
||||
record_noise = pd.DataFrame(index=all_node, columns=basic_demand_pd.columns)
|
||||
for each_node in all_node:
|
||||
node = wn.get_node(each_node)
|
||||
num_columns = len(basic_demand_pd.columns)
|
||||
# 处理前N-1列(如果有)
|
||||
for i in range(num_columns - 1):
|
||||
# 获取原始值并添加噪声
|
||||
record_noise.loc[each_node].iloc[i] = (
|
||||
1 + produce_noise_number(noise_type, noise_para)
|
||||
) * basic_demand_pd.loc[each_node].iloc[i]
|
||||
node.demand_timeseries_list[i].base_value = record_noise.loc[
|
||||
each_node
|
||||
].iloc[i]
|
||||
# 处理最后一列(当列数>=1时)
|
||||
if num_columns >= 1:
|
||||
last_col = basic_demand_pd.columns[-1]
|
||||
original_last = basic_demand_pd.loc[each_node, last_col]
|
||||
record_noise.loc[each_node, last_col] = original_last
|
||||
node.demand_timeseries_list[-1].base_value = original_last
|
||||
|
||||
return wn, record_noise
|
||||
|
||||
|
||||
def change_node_demand_leak(wn, record_noise, all_node):
|
||||
sample_node = wn.get_node(all_node[0])
|
||||
# num_categories = len(sample_node.demand_timeseries_list)
|
||||
num_categories = 1
|
||||
for each in all_node:
|
||||
node = wn.get_node(each)
|
||||
for i in range(num_categories):
|
||||
node.demand_timeseries_list[i].base_value = record_noise.loc[each].iloc[i]
|
||||
return wn
|
||||
|
||||
|
||||
def change_para_of_wn(wn, pipe_roughness_change):
|
||||
for pipe_name, pipe in wn.pipes():
|
||||
pipe.roughness = pipe_roughness_change[pipe_name]
|
||||
return wn
|
||||
|
||||
|
||||
@@ -0,0 +1,858 @@
|
||||
"""相似性计算模块。"""
|
||||
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def cal_similarity_simple_return_dd(
|
||||
similarity_mode,
|
||||
monitor_p,
|
||||
predict_p,
|
||||
normal_p,
|
||||
leak_p,
|
||||
monitor_p_all,
|
||||
predict_p_all,
|
||||
normal_p_all,
|
||||
leak_p_all,
|
||||
important_sensor,
|
||||
mean_dpressure,
|
||||
dpressure_std,
|
||||
dpressure_std_all,
|
||||
if_gy=0,
|
||||
cos_or_flow=1,
|
||||
):
|
||||
# cos_or_flow 用于 CAF
|
||||
dpressure_s = normal_p - leak_p
|
||||
dpressure = predict_p - monitor_p
|
||||
act_dpressure = pd.Series(dtype=object)
|
||||
for i in range(len(leak_p.index)):
|
||||
if dpressure_std.iloc[i] > -200: # 0.001:
|
||||
if if_gy == 1:
|
||||
act_dpressure[leak_p.index[i]] = (
|
||||
leak_p.iloc[i] - monitor_p.iloc[i]
|
||||
) / dpressure_std.iloc[i]
|
||||
else:
|
||||
act_dpressure[leak_p.index[i]] = leak_p.iloc[i] - monitor_p.iloc[i]
|
||||
|
||||
if similarity_mode == "COS" or (similarity_mode == "CAF" and cos_or_flow == 1):
|
||||
|
||||
"""if leak_p.min()<0:
|
||||
none_flag = 1
|
||||
similarity_cos = 0
|
||||
similarity_dis = 0
|
||||
else:"""
|
||||
none_flag = 0
|
||||
sensor_for_cos = sorted(
|
||||
set(dpressure_s.index).intersection(set(act_dpressure.index))
|
||||
)
|
||||
"""if len(dpressure_s) ==0 or len(dpressure) ==0:
|
||||
jj=9
|
||||
else:"""
|
||||
try:
|
||||
s1 = np.dot(
|
||||
np.transpose(dpressure_s.loc[sensor_for_cos]),
|
||||
dpressure.loc[sensor_for_cos],
|
||||
)
|
||||
s2 = np.linalg.norm(dpressure_s.loc[sensor_for_cos]) * np.linalg.norm(
|
||||
dpressure.loc[sensor_for_cos]
|
||||
)
|
||||
if s2 == 0:
|
||||
s2 = s2 + 0.0001
|
||||
similarity_cos = s1 / s2
|
||||
similarity_dis = 0
|
||||
except Exception as e:
|
||||
print(dpressure_s)
|
||||
print(sensor_for_cos)
|
||||
print(act_dpressure)
|
||||
print(dpressure_std)
|
||||
print(dpressure)
|
||||
|
||||
elif similarity_mode == "DIS" or (similarity_mode == "CAF" and cos_or_flow == 2):
|
||||
"""if leak_p.min()<0:
|
||||
none_flag = 1
|
||||
else:"""
|
||||
none_flag = 0
|
||||
important_sensor = sorted(
|
||||
set(important_sensor).intersection(set(act_dpressure.index))
|
||||
)
|
||||
part_dpressure = dpressure_s[important_sensor] - dpressure[important_sensor]
|
||||
similarity_pre_DIS = np.linalg.norm(part_dpressure)
|
||||
# similarity_pre_DIS_later = 1 / (1 + similarity_pre_DIS)
|
||||
similarity_dis = similarity_pre_DIS
|
||||
similarity_cos = 0
|
||||
|
||||
elif similarity_mode == "CAD_new":
|
||||
act_dpressure = leak_p - monitor_p
|
||||
"""if leak_p.min() < 0:
|
||||
none_flag = 1
|
||||
similarity_cos = 0
|
||||
similarity_dis =0
|
||||
else:"""
|
||||
none_flag = 0
|
||||
# cos
|
||||
s1 = np.dot(np.transpose(dpressure_s), dpressure)
|
||||
s2 = np.linalg.norm(dpressure_s) * np.linalg.norm(dpressure)
|
||||
|
||||
if s2 == 0:
|
||||
s2 = s2 + 0.0001
|
||||
similarity_cos = s1 / s2
|
||||
|
||||
# DIS
|
||||
part_dpressure = act_dpressure.loc[important_sensor]
|
||||
similarity_pre_DIS = np.linalg.norm(part_dpressure)
|
||||
similarity_pre_DIS_later = 1 / (1 + similarity_pre_DIS)
|
||||
similarity_dis = similarity_pre_DIS
|
||||
|
||||
elif similarity_mode == "CAD_new_gy" or similarity_mode == "CDF":
|
||||
# cos
|
||||
sensor_for_cos = sorted(
|
||||
set(dpressure_s.index).intersection(set(act_dpressure.index))
|
||||
)
|
||||
if len(sensor_for_cos) == 0 and len(dpressure_s) == 0:
|
||||
similarity_cos = 0
|
||||
elif len(sensor_for_cos) == 0 and len(dpressure_s) > 0:
|
||||
sensor_for_cos = list(dpressure_s.index)
|
||||
none_flag = 0
|
||||
s1 = np.dot(
|
||||
np.transpose(dpressure_s.loc[sensor_for_cos]),
|
||||
dpressure.loc[sensor_for_cos],
|
||||
)
|
||||
s2 = np.linalg.norm(dpressure_s.loc[sensor_for_cos]) * np.linalg.norm(
|
||||
dpressure.loc[sensor_for_cos]
|
||||
)
|
||||
|
||||
if s2 == 0:
|
||||
s2 = s2 + 0.0001
|
||||
similarity_cos = s1 / s2
|
||||
else:
|
||||
none_flag = 0
|
||||
s1 = np.dot(
|
||||
np.transpose(dpressure_s.loc[sensor_for_cos]),
|
||||
dpressure.loc[sensor_for_cos],
|
||||
)
|
||||
s2 = np.linalg.norm(dpressure_s.loc[sensor_for_cos]) * np.linalg.norm(
|
||||
dpressure.loc[sensor_for_cos]
|
||||
)
|
||||
|
||||
if s2 == 0:
|
||||
s2 = s2 + 0.0001
|
||||
similarity_cos = s1 / s2
|
||||
|
||||
# DIS
|
||||
important_sensor_new = sorted(
|
||||
set(important_sensor).intersection(set(act_dpressure.index))
|
||||
)
|
||||
if len(important_sensor_new) == 0:
|
||||
important_sensor_new = important_sensor
|
||||
act_dpressure = pd.Series(dtype=object)
|
||||
for i in range(len(leak_p_all.index)):
|
||||
# if dpressure_std.iloc [i] > -200: # 0.001:
|
||||
if if_gy == 1:
|
||||
act_dpressure[leak_p_all.index[i]] = (
|
||||
leak_p_all.iloc[i] - monitor_p_all.iloc[i]
|
||||
) / dpressure_std_all.iloc[i]
|
||||
else:
|
||||
act_dpressure[leak_p_all.index[i]] = (
|
||||
leak_p_all.iloc[i] - monitor_p_all.iloc[i]
|
||||
)
|
||||
# part_dpressure = act_dpressure.loc[important_sensor_new]
|
||||
|
||||
part_dpressure = (
|
||||
dpressure.loc[important_sensor_new] - dpressure_s.loc[important_sensor_new]
|
||||
)
|
||||
similarity_pre_DIS = np.linalg.norm(part_dpressure) ## chang test
|
||||
# part_dpressure = dpressure_s.loc[important_sensor]-dpressure.loc[important_sensor]
|
||||
# similarity_pre_DIS = np.linalg.norm(part_dpressure)
|
||||
# similarity_pre_DIS_later = 1 / (1 + similarity_pre_DIS)
|
||||
similarity_dis = similarity_pre_DIS
|
||||
elif similarity_mode == "OF":
|
||||
# cos
|
||||
similarity_cos = 0
|
||||
none_flag = 0
|
||||
# DIS
|
||||
important_sensor_new = sorted(
|
||||
set(important_sensor).intersection(set(act_dpressure.index))
|
||||
)
|
||||
if len(important_sensor_new) == 0:
|
||||
important_sensor_new = important_sensor
|
||||
act_dpressure = pd.Series(dtype=object)
|
||||
for i in range(len(leak_p_all.index)):
|
||||
# if dpressure_std.iloc [i] > -200: # 0.001:
|
||||
if if_gy == 1:
|
||||
act_dpressure[leak_p_all.index[i]] = (
|
||||
leak_p_all.iloc[i] - monitor_p_all.iloc[i]
|
||||
) / dpressure_std_all.iloc[i]
|
||||
else:
|
||||
act_dpressure[leak_p_all.index[i]] = (
|
||||
leak_p_all.iloc[i] - monitor_p_all.iloc[i]
|
||||
)
|
||||
# part_dpressure = act_dpressure.loc[important_sensor_new]
|
||||
|
||||
part_dpressure = (
|
||||
dpressure.loc[important_sensor_new] - dpressure_s.loc[important_sensor_new]
|
||||
)
|
||||
similarity_pre_DIS = np.linalg.norm(part_dpressure) ## chang test
|
||||
# part_dpressure = dpressure_s.loc[important_sensor]-dpressure.loc[important_sensor]
|
||||
# similarity_pre_DIS = np.linalg.norm(part_dpressure)
|
||||
# similarity_pre_DIS_later = 1 / (1 + similarity_pre_DIS)
|
||||
similarity_dis = similarity_pre_DIS
|
||||
|
||||
return similarity_cos, similarity_dis, none_flag
|
||||
|
||||
|
||||
def adjust(
|
||||
similarity_cos,
|
||||
similarity_dis,
|
||||
record_success_candidate,
|
||||
record_success_no_candidate,
|
||||
):
|
||||
if len(record_success_no_candidate) > 0:
|
||||
for each in record_success_no_candidate:
|
||||
similarity_cos[each] = similarity_cos[record_success_candidate].min() * 0.9
|
||||
similarity_dis[each] = similarity_dis[record_success_candidate].max() * 1.1
|
||||
return similarity_cos, similarity_dis
|
||||
|
||||
|
||||
def cal_sq_all_multi(
|
||||
similarity_cos,
|
||||
similarity_dis,
|
||||
similarity_f,
|
||||
candidate_pipe,
|
||||
timestep_list_spc,
|
||||
if_flow,
|
||||
if_only_cos,
|
||||
if_only_flow,
|
||||
cos_h_input,
|
||||
dis_h_input,
|
||||
dis_f_h_input,
|
||||
if_compalsive,
|
||||
cos_sensor_num,
|
||||
flow_sensor_num,
|
||||
):
|
||||
"""融合多种相似性并输出按时刻与候选管段组织的综合相似度。
|
||||
|
||||
该函数会根据模式开关(是否仅流量、是否仅 COS、是否包含流量)对
|
||||
`similarity_cos`、`similarity_dis`、`similarity_f` 做标准化,并计算
|
||||
权重 `sq_cos/sq_dis/sq_f` 后进行加权融合。
|
||||
|
||||
Args:
|
||||
similarity_cos: 压力余弦相似性(DataFrame/Series,通常为时刻 x 候选管段)。
|
||||
similarity_dis: 压力距离相似性(DataFrame/Series,通常为时刻 x 候选管段)。
|
||||
similarity_f: 流量距离相似性(DataFrame/Series,通常为时刻 x 候选管段)。
|
||||
candidate_pipe: 候选管段列表,用于输出列索引。
|
||||
timestep_list_spc: 时刻列表,用于输出行索引。
|
||||
if_flow: 是否启用流量相似性(1 启用,0 禁用)。
|
||||
if_only_cos: 相似性模式标识(0: COS+DIS;1: COS;其他值按分支定义处理)。
|
||||
if_only_flow: 是否仅使用流量相似性(1 是,0 否)。
|
||||
cos_h_input: 外部给定的 COS 权重(强制权重模式下使用)。
|
||||
dis_h_input: 外部给定的 DIS 权重(强制权重模式下使用)。
|
||||
dis_f_h_input: 外部给定的流量权重(强制权重模式下使用)。
|
||||
if_compalsive: 是否使用外部强制权重(1 使用输入权重,0 自动计算权重)。
|
||||
cos_sensor_num: 压力传感器数量,用于权重调整。
|
||||
flow_sensor_num: 流量传感器数量,用于权重调整。
|
||||
|
||||
Returns:
|
||||
tuple[pd.DataFrame | pd.Series, float, float, float]:
|
||||
- output_similarity_pd: 综合相似性结果。
|
||||
- sq_cos: 最终 COS 权重。
|
||||
- sq_dis: 最终 DIS 权重。
|
||||
- sq_f: 最终流量权重。
|
||||
"""
|
||||
if if_only_flow == 1:
|
||||
similarity_f, h_f = cal_sq_single_array(
|
||||
similarity_f.values.reshape((-1, 1)), if_direct=2
|
||||
)
|
||||
sq_cos = 0
|
||||
sq_dis = 0
|
||||
sq_f = 1
|
||||
similarity_all = similarity_f * sq_f
|
||||
output_similarity = similarity_all.reshape((-1, len(candidate_pipe)))
|
||||
output_similarity_pd = pd.DataFrame(
|
||||
output_similarity, index=timestep_list_spc, columns=candidate_pipe
|
||||
)
|
||||
else:
|
||||
if if_only_cos == 0:
|
||||
if if_flow == 1:
|
||||
# standerdize
|
||||
similarity_cos, h_cos = cal_sq_single_array(
|
||||
similarity_cos.values.reshape((-1, 1)), if_direct=1
|
||||
)
|
||||
similarity_dis, h_dis = cal_sq_single_array(
|
||||
similarity_dis.values.reshape((-1, 1)), if_direct=2
|
||||
)
|
||||
similarity_f, h_f = cal_sq_single_array(
|
||||
similarity_f.values.reshape((-1, 1)), if_direct=2
|
||||
)
|
||||
if if_compalsive == 1:
|
||||
sq_cos = cos_h_input
|
||||
sq_dis = dis_h_input
|
||||
sq_f = dis_f_h_input
|
||||
else:
|
||||
"""sq_cos = h_cos/(h_cos +h_dis +h_f )
|
||||
sq_dis = h_dis/(h_cos +h_dis +h_f )
|
||||
sq_f = h_f/(h_cos +h_dis +h_f )"""
|
||||
sq_cos, sq_dis, sq_f = add_weight_for_SQ(
|
||||
h_cos, h_dis, h_f, cos_sensor_num, flow_sensor_num
|
||||
)
|
||||
|
||||
"""if cos_sensor_num == 2 and sq_cos>0.2:
|
||||
sq_cos = 0.2
|
||||
sq_dis = 0.8*h_dis / (h_dis + h_f)
|
||||
sq_f = 0.8*h_f / (h_dis + h_f)
|
||||
if cos_sensor_num == 1 and sq_dis > 0.3:
|
||||
sq_cos = 0.1
|
||||
sq_dis = 0.3
|
||||
sq_f = 0.6"""
|
||||
sq_cos, sq_dis, sq_f = adjust_ratio("CDF", sq_cos, sq_dis, sq_f)
|
||||
if cos_sensor_num <= 1:
|
||||
sq_cos = 0
|
||||
# similarity
|
||||
|
||||
similarity_all = (
|
||||
similarity_cos * sq_cos
|
||||
+ similarity_dis * sq_dis
|
||||
+ similarity_f * sq_f
|
||||
)
|
||||
output_similarity = similarity_all.reshape((-1, len(candidate_pipe)))
|
||||
output_similarity_pd = pd.DataFrame(
|
||||
output_similarity, index=timestep_list_spc, columns=candidate_pipe
|
||||
)
|
||||
else:
|
||||
# standerdize
|
||||
similarity_cos, h_cos = cal_sq_single_array(
|
||||
similarity_cos.values.reshape((-1, 1)), if_direct=1
|
||||
)
|
||||
similarity_dis, h_dis = cal_sq_single_array(
|
||||
similarity_dis.values.reshape((-1, 1)), if_direct=2
|
||||
)
|
||||
if if_compalsive == 1:
|
||||
sq_cos = cos_h_input
|
||||
sq_dis = dis_h_input
|
||||
else:
|
||||
sq_cos = h_cos / (h_cos + h_dis)
|
||||
sq_dis = h_dis / (h_cos + h_dis)
|
||||
if cos_sensor_num == 2 and sq_cos > 0.5:
|
||||
sq_cos = 0.5
|
||||
sq_dis = 0.5
|
||||
sq_cos, sq_dis, sq_f = adjust_ratio("CAD_new_gy", sq_cos, sq_dis, 0)
|
||||
sq_f = 0
|
||||
# similarity
|
||||
similarity_all = similarity_cos * sq_cos + similarity_dis * sq_dis
|
||||
output_similarity = similarity_all.reshape((-1, len(candidate_pipe)))
|
||||
output_similarity_pd = pd.DataFrame(
|
||||
output_similarity, index=timestep_list_spc, columns=candidate_pipe
|
||||
)
|
||||
elif if_only_cos == 1:
|
||||
if if_flow == 1:
|
||||
# standerdize
|
||||
similarity_cos, h_cos = cal_sq_single_array(
|
||||
similarity_cos.values.reshape((-1, 1)), if_direct=1
|
||||
)
|
||||
similarity_f, h_f = cal_sq_single_array(
|
||||
similarity_f.values.reshape((-1, 1)), if_direct=2
|
||||
)
|
||||
if if_compalsive == 1:
|
||||
sq_cos = cos_h_input
|
||||
sq_f = dis_f_h_input
|
||||
else:
|
||||
sq_cos = h_cos / (h_cos + h_f)
|
||||
sq_f = h_f / (h_cos + h_f)
|
||||
sq_cos, sq_dis, sq_f = adjust_ratio("CAF", sq_cos, 0, sq_f)
|
||||
sq_dis = 0
|
||||
# similarity
|
||||
similarity_all = similarity_cos * sq_cos + similarity_f * sq_f
|
||||
output_similarity = similarity_all.reshape((-1, len(candidate_pipe)))
|
||||
output_similarity_pd = pd.DataFrame(
|
||||
output_similarity, index=timestep_list_spc, columns=candidate_pipe
|
||||
)
|
||||
|
||||
else:
|
||||
sq_cos = cos_h_input
|
||||
sq_dis = dis_h_input
|
||||
sq_f = dis_f_h_input
|
||||
output_similarity_pd = similarity_cos
|
||||
else:
|
||||
sq_cos = cos_h_input
|
||||
sq_dis = dis_h_input
|
||||
sq_f = dis_f_h_input
|
||||
output_similarity_pd = 1 / (similarity_dis + 1)
|
||||
return output_similarity_pd, sq_cos, sq_dis, sq_f
|
||||
|
||||
|
||||
def add_weight_for_SQ(h_cos, h_dis, h_f, sensor_cos_num, sensor_f_num):
|
||||
h_f_new = h_f * sensor_f_num
|
||||
if sensor_cos_num <= 1:
|
||||
h_cos_new = 0
|
||||
h_dis_new = h_dis * sensor_cos_num
|
||||
else:
|
||||
h_cos_new = h_cos * sensor_cos_num # / 2
|
||||
h_dis_new = h_dis * sensor_cos_num # / 2
|
||||
cos_sq = h_cos_new / (h_cos_new + h_dis_new + h_f_new)
|
||||
dis_sq = h_dis_new / (h_cos_new + h_dis_new + h_f_new)
|
||||
f_sq = h_f_new / (h_cos_new + h_dis_new + h_f_new)
|
||||
|
||||
if sensor_cos_num == 2 and cos_sq > 0.2:
|
||||
cos_sq = 0.2
|
||||
dis_sq = 0.8 * h_dis_new / (h_dis_new + h_f_new)
|
||||
f_sq = 0.8 * h_f_new / (h_dis_new + h_f_new)
|
||||
"""if sensor_cos_num == 1:
|
||||
if dis_sq / f_sq > sensor_cos_num/sensor_f_num:
|
||||
dis_sq = sensor_cos_num/sensor_f_num
|
||||
f_sq=1-dis_sq"""
|
||||
# if h_dis_new/h_f_new > sensor_cos_num/sensor_f_num
|
||||
return cos_sq, dis_sq, f_sq
|
||||
|
||||
|
||||
def cal_sq_single_array(similarity_pre, if_direct):
|
||||
if similarity_pre.max() - similarity_pre.min() == 0:
|
||||
similarity_pre = np.ones(similarity_pre.shape)
|
||||
else:
|
||||
if if_direct == 1:
|
||||
similarity_pre = (
|
||||
0.998
|
||||
* (similarity_pre - similarity_pre.min())
|
||||
/ (similarity_pre.max() - similarity_pre.min())
|
||||
+ 0.002
|
||||
)
|
||||
else:
|
||||
similarity_pre = (
|
||||
0.998
|
||||
* (similarity_pre.max() - similarity_pre)
|
||||
/ (similarity_pre.max() - similarity_pre.min())
|
||||
+ 0.002
|
||||
)
|
||||
# calculate pij
|
||||
similarity_p = similarity_pre / similarity_pre.sum()
|
||||
# cal xinxishang
|
||||
similarity_lnp = np.zeros((len(similarity_pre), 1))
|
||||
for j in range(len(similarity_p)):
|
||||
similarity_lnp[j] = -similarity_p[j] * math.log(similarity_p[j], math.e)
|
||||
h = 1 - 1 / math.log(len(similarity_pre), math.e) * similarity_lnp.sum()
|
||||
return similarity_pre, h
|
||||
|
||||
|
||||
def cal_similarity_all_multi_new_sq_improve_double_lzr(
|
||||
candidate_pipe,
|
||||
similarity_mode,
|
||||
pressure_leak,
|
||||
monitor_p,
|
||||
predict_p,
|
||||
normal_p,
|
||||
if_flow,
|
||||
if_only_cos,
|
||||
if_only_flow,
|
||||
flow_leak,
|
||||
monitor_f,
|
||||
predict_f,
|
||||
normal_f,
|
||||
timestep_list,
|
||||
Top_sensor_num,
|
||||
if_gy,
|
||||
effective_sensor,
|
||||
cos_h,
|
||||
dis_h,
|
||||
dis_f_h,
|
||||
if_compalsive,
|
||||
max_flow,
|
||||
):
|
||||
similarity = pd.Series(dtype=float, index=candidate_pipe)
|
||||
similarity_detail: pd.DataFrame | None = None
|
||||
important_p_sensor = cal_top_sensors(monitor_p, predict_p, Top_sensor_num)
|
||||
# important_f_sensor, basic_f = cal_top_f_sensor(normal_f)
|
||||
important_f_sensor = monitor_f.columns
|
||||
if (
|
||||
len(important_p_sensor) > 0 or len(important_f_sensor) > 0
|
||||
): # if len(important_p_sensor) > 0
|
||||
break_flag = 0
|
||||
pressure_leak_new = pressure_leak.swaplevel()
|
||||
# flow_leak_new = flow_leak.swaplevel()
|
||||
if isinstance(flow_leak, pd.DataFrame) and len(flow_leak) > 0:
|
||||
flow_leak_new = flow_leak.swaplevel()
|
||||
else:
|
||||
flow_leak_new = None
|
||||
total_similarity_cos = pd.DataFrame(index=timestep_list, columns=candidate_pipe)
|
||||
total_similarity_dis = pd.DataFrame(index=timestep_list, columns=candidate_pipe)
|
||||
total_similarity_dis_f = pd.DataFrame(
|
||||
index=timestep_list, columns=candidate_pipe
|
||||
)
|
||||
|
||||
for timestep in timestep_list:
|
||||
# cal p_cos, p_dis, f_dis
|
||||
if if_only_flow != 1:
|
||||
pressure_leak_temp = pressure_leak_new.loc[timestep].loc[
|
||||
:, effective_sensor
|
||||
]
|
||||
monitor_p_temp = monitor_p.loc[timestep, effective_sensor]
|
||||
predict_p_temp = predict_p.loc[timestep, effective_sensor]
|
||||
normal_p_temp = normal_p.loc[timestep, effective_sensor]
|
||||
|
||||
(
|
||||
total_similarity_cos.loc[timestep, :],
|
||||
total_similarity_dis.loc[timestep, :],
|
||||
) = cal_similarity_all_cos_dis(
|
||||
candidate_pipe,
|
||||
pressure_leak_temp,
|
||||
similarity_mode,
|
||||
monitor_p_temp,
|
||||
predict_p_temp,
|
||||
normal_p_temp,
|
||||
pressure_leak_new.loc[timestep].loc[:, monitor_p.columns],
|
||||
monitor_p.loc[timestep, :],
|
||||
predict_p.loc[timestep, :],
|
||||
normal_p.loc[timestep, :],
|
||||
important_p_sensor,
|
||||
if_gy,
|
||||
cos_or_flow=1,
|
||||
)
|
||||
|
||||
if if_flow == 1:
|
||||
if len(timestep_list) == 1:
|
||||
leak_f_temp = flow_leak_new.loc[timestep].loc[:, important_f_sensor]
|
||||
monitor_f_temp = monitor_f.loc[timestep, important_f_sensor]
|
||||
predict_f_temp = predict_f.loc[timestep, important_f_sensor]
|
||||
normal_f_temp = normal_f.loc[timestep, important_f_sensor]
|
||||
basic_normal_f_temp = abs(max_flow.loc[important_f_sensor])
|
||||
|
||||
leak_f_temp = leak_f_temp / basic_normal_f_temp
|
||||
monitor_f_temp = monitor_f_temp / basic_normal_f_temp
|
||||
predict_f_temp = predict_f_temp / basic_normal_f_temp
|
||||
normal_f_temp = normal_f_temp / basic_normal_f_temp
|
||||
|
||||
else:
|
||||
basic_f = abs(max_flow.loc[important_f_sensor])
|
||||
leak_f_temp = (
|
||||
flow_leak_new.loc[timestep].loc[:, important_f_sensor] / basic_f
|
||||
)
|
||||
monitor_f_temp = (
|
||||
monitor_f.loc[timestep, important_f_sensor] / basic_f
|
||||
)
|
||||
predict_f_temp = (
|
||||
predict_f.loc[timestep, important_f_sensor] / basic_f
|
||||
)
|
||||
normal_f_temp = normal_f.loc[timestep, important_f_sensor] / basic_f
|
||||
_, total_similarity_dis_f.loc[timestep, :] = cal_similarity_all_cos_dis(
|
||||
candidate_pipe,
|
||||
leak_f_temp,
|
||||
similarity_mode,
|
||||
monitor_f_temp,
|
||||
predict_f_temp,
|
||||
normal_f_temp,
|
||||
flow_leak_new.loc[timestep].loc[:, monitor_f.columns],
|
||||
monitor_f.loc[timestep, :],
|
||||
predict_f.loc[timestep, :],
|
||||
normal_f.loc[timestep, :],
|
||||
important_f_sensor,
|
||||
if_gy,
|
||||
cos_or_flow=2,
|
||||
)
|
||||
else:
|
||||
total_similarity_dis_f = []
|
||||
similarity_all, cos_h, dis_h, dis_f_h = cal_sq_all_multi(
|
||||
total_similarity_cos,
|
||||
total_similarity_dis,
|
||||
total_similarity_dis_f,
|
||||
candidate_pipe,
|
||||
timestep_list,
|
||||
if_flow,
|
||||
if_only_cos,
|
||||
if_only_flow,
|
||||
cos_h,
|
||||
dis_h,
|
||||
dis_f_h,
|
||||
if_compalsive,
|
||||
len(important_p_sensor),
|
||||
len(important_f_sensor),
|
||||
)
|
||||
if len(timestep_list) == 1:
|
||||
similarity = similarity_all.iloc[0]
|
||||
elif len(timestep_list) > 3:
|
||||
for each_candidate in candidate_pipe:
|
||||
similarity[each_candidate] = remove_3_sigma(
|
||||
similarity_all.loc[:, each_candidate]
|
||||
)
|
||||
else:
|
||||
for each_candidate in candidate_pipe:
|
||||
similarity[each_candidate] = similarity_all.loc[
|
||||
:, each_candidate
|
||||
].mean()
|
||||
similarity = similarity.sort_values(ascending=False, kind="mergesort")
|
||||
detail_index = [str(pipe) for pipe in candidate_pipe]
|
||||
similarity_detail = pd.DataFrame(index=detail_index)
|
||||
similarity_detail.index.name = "pipe_id"
|
||||
if isinstance(total_similarity_cos, pd.DataFrame) and len(total_similarity_cos) > 0:
|
||||
pressure_cos_mean = (
|
||||
total_similarity_cos.mean(axis=0)
|
||||
.reindex(candidate_pipe)
|
||||
.to_numpy(dtype=float)
|
||||
)
|
||||
else:
|
||||
pressure_cos_mean = np.full(len(candidate_pipe), np.nan)
|
||||
if isinstance(total_similarity_dis, pd.DataFrame) and len(total_similarity_dis) > 0:
|
||||
pressure_dis_mean = (
|
||||
total_similarity_dis.mean(axis=0)
|
||||
.reindex(candidate_pipe)
|
||||
.to_numpy(dtype=float)
|
||||
)
|
||||
else:
|
||||
pressure_dis_mean = np.full(len(candidate_pipe), np.nan)
|
||||
if isinstance(total_similarity_dis_f, pd.DataFrame) and len(total_similarity_dis_f) > 0:
|
||||
flow_dis_mean = (
|
||||
total_similarity_dis_f.mean(axis=0)
|
||||
.reindex(candidate_pipe)
|
||||
.to_numpy(dtype=float)
|
||||
)
|
||||
else:
|
||||
flow_dis_mean = np.full(len(candidate_pipe), np.nan)
|
||||
similarity_detail["pressure_cos_mean"] = pressure_cos_mean
|
||||
similarity_detail["pressure_dis_mean"] = pressure_dis_mean
|
||||
similarity_detail["flow_dis_mean"] = flow_dis_mean
|
||||
similarity_detail["weight_cos"] = float(cos_h)
|
||||
similarity_detail["weight_dis"] = float(dis_h)
|
||||
similarity_detail["weight_flow"] = float(dis_f_h)
|
||||
similarity_detail["final_similarity"] = (
|
||||
similarity.reindex(candidate_pipe).to_numpy(dtype=float)
|
||||
)
|
||||
similarity_detail["similarity_rank"] = (
|
||||
similarity_detail["final_similarity"].rank(method="dense", ascending=False)
|
||||
).astype(int)
|
||||
similarity_detail["pressure_sensor_count"] = int(len(important_p_sensor))
|
||||
similarity_detail["flow_sensor_count"] = int(len(important_f_sensor))
|
||||
similarity_detail = similarity_detail.sort_values(
|
||||
by="final_similarity", ascending=False, kind="mergesort"
|
||||
)
|
||||
else:
|
||||
break_flag = 1
|
||||
similarity = 0
|
||||
cos_h = 0
|
||||
dis_h = 0
|
||||
dis_f_h = 0
|
||||
return similarity, cos_h, dis_h, dis_f_h, break_flag, similarity_detail
|
||||
|
||||
|
||||
def cal_similarity_all_cos_dis(
|
||||
candidate_pipe,
|
||||
pressure_leak,
|
||||
similarity_mode,
|
||||
monitor_p,
|
||||
predict_p,
|
||||
normal_p,
|
||||
pressure_leak_all,
|
||||
monitor_p_all,
|
||||
predict_p_all,
|
||||
normal_p_all,
|
||||
important_sensor,
|
||||
if_gy,
|
||||
cos_or_flow,
|
||||
):
|
||||
similarity_cos = pd.Series(dtype=float, index=candidate_pipe)
|
||||
similarity_dis = pd.Series(dtype=float, index=candidate_pipe)
|
||||
dpressure = normal_p - pressure_leak
|
||||
|
||||
# 无用 ----------------------------------------------
|
||||
mean_dpressure = dpressure.mean()
|
||||
|
||||
monitor_new = pd.DataFrame(index=["monitor"], columns=monitor_p.index)
|
||||
monitor_new.iloc[0] = monitor_p
|
||||
add_m_leak_pressure = [pressure_leak, monitor_p]
|
||||
add_m_leak_pressure = pd.concat(add_m_leak_pressure)
|
||||
pressure_leak_std = add_m_leak_pressure.std(axis=0, ddof=1)
|
||||
pressure_leak_std = pd.Series(pressure_leak_std, index=pressure_leak.columns)
|
||||
|
||||
add_m_leak_pressure_all = [pressure_leak_all, monitor_p_all]
|
||||
add_m_leak_pressure_all = pd.concat(add_m_leak_pressure_all)
|
||||
pressure_leak_std_all = add_m_leak_pressure_all.std(axis=0, ddof=1)
|
||||
pressure_leak_std_all = pd.Series(
|
||||
pressure_leak_std_all, index=pressure_leak.columns
|
||||
)
|
||||
# 无用 ----------------------------------------------
|
||||
|
||||
monitor_p_temp = monitor_p
|
||||
predict_p_temp = predict_p
|
||||
normal_p_temp = normal_p
|
||||
monitor_p_temp_all = monitor_p_all
|
||||
predict_p_temp_all = predict_p_all
|
||||
normal_p_temp_all = normal_p_all
|
||||
record_success_candidate = []
|
||||
record_success_no_candidate = []
|
||||
for i in range(len(candidate_pipe)):
|
||||
leak_p = pressure_leak.iloc[i, :]
|
||||
leak_p_all = pressure_leak_all.iloc[i, :]
|
||||
similarity_cos.iloc[i], similarity_dis.iloc[i], none_flag = (
|
||||
cal_similarity_simple_return_dd(
|
||||
similarity_mode,
|
||||
monitor_p_temp,
|
||||
predict_p_temp,
|
||||
normal_p_temp,
|
||||
leak_p,
|
||||
monitor_p_temp_all,
|
||||
predict_p_temp_all,
|
||||
normal_p_temp_all,
|
||||
leak_p_all,
|
||||
important_sensor,
|
||||
mean_dpressure,
|
||||
pressure_leak_std,
|
||||
pressure_leak_std_all,
|
||||
if_gy,
|
||||
cos_or_flow,
|
||||
)
|
||||
)
|
||||
if none_flag == 0:
|
||||
record_success_candidate.append(candidate_pipe[i])
|
||||
else:
|
||||
record_success_no_candidate.append(candidate_pipe[i])
|
||||
similarity_cos, similarity_dis = adjust(
|
||||
similarity_cos,
|
||||
similarity_dis,
|
||||
record_success_candidate,
|
||||
record_success_no_candidate,
|
||||
)
|
||||
return similarity_cos, similarity_dis
|
||||
|
||||
|
||||
def cal_top_f_sensor(normal_f):
|
||||
if type(normal_f) == pd.core.frame.DataFrame:
|
||||
mean_f = normal_f.mean()
|
||||
else:
|
||||
mean_f = normal_f
|
||||
output_sensor = []
|
||||
output_normal_f = pd.Series(dtype=object)
|
||||
for i in range(len(mean_f.index)):
|
||||
if abs(mean_f.iloc[i]) > 0.01 / 3600:
|
||||
output_sensor.append(mean_f.index[i])
|
||||
output_normal_f[mean_f.index[i]] = mean_f.iloc[i]
|
||||
return output_sensor, output_normal_f
|
||||
|
||||
|
||||
def cal_top_sensors(monitor_p, predict_p, Top_sensor_num):
|
||||
dpressure = abs(predict_p - monitor_p)
|
||||
if type(dpressure) == pd.core.frame.DataFrame:
|
||||
dpressure = dpressure.mean()
|
||||
dpressure_rank = dpressure.sort_values(ascending=False, kind="mergesort")
|
||||
return list(dpressure_rank.index[:Top_sensor_num])
|
||||
|
||||
|
||||
def remove_3_sigma(similarity_t):
|
||||
all_sample = len(similarity_t.index)
|
||||
apart_sample = math.ceil(all_sample * 0.6)
|
||||
similarity = similarity_t.astype("float")
|
||||
mean_t = similarity.mean()
|
||||
std_t = similarity.std()
|
||||
new_similarity = similarity[
|
||||
(similarity <= mean_t + 3 * std_t) & (similarity >= mean_t - 3 * std_t)
|
||||
]
|
||||
mean_t_new = new_similarity.mean()
|
||||
return mean_t_new
|
||||
|
||||
|
||||
def update_similarity(leak_candidate_center, similarity, leak_center_dict):
|
||||
similarity_new = pd.Series(dtype=float)
|
||||
for each_center in leak_candidate_center:
|
||||
houxuan_center = leak_center_dict[each_center]
|
||||
if len(houxuan_center) > 1:
|
||||
temp_similarity = similarity[houxuan_center]
|
||||
similarity_new[each_center] = temp_similarity.max()
|
||||
else:
|
||||
if type(similarity[each_center]) == pd.core.series.Series:
|
||||
similarity_new[each_center] = similarity[each_center].mean()
|
||||
else:
|
||||
similarity_new[each_center] = similarity[each_center]
|
||||
similarity_new = similarity_new.sort_values(ascending=False, kind="mergesort")
|
||||
return similarity_new
|
||||
|
||||
|
||||
def extra_judge(
|
||||
similarity, min_candidates_to_prune: int = 200, std_relax_factor: float = 0.5
|
||||
):
|
||||
if len(similarity.index) == 0:
|
||||
return 1.0, similarity
|
||||
if len(similarity.index) < int(min_candidates_to_prune):
|
||||
return 1.0, similarity
|
||||
|
||||
mean_similarity = float(similarity.mean())
|
||||
std_similarity = float(similarity.std())
|
||||
if not math.isfinite(std_similarity):
|
||||
std_similarity = 0.0
|
||||
|
||||
threshold = mean_similarity - float(std_relax_factor) * std_similarity
|
||||
out_put_similarity = similarity[similarity >= threshold - 1e-10]
|
||||
if len(out_put_similarity.index) == 0:
|
||||
out_put_similarity = similarity.iloc[:1]
|
||||
|
||||
cut_ratio = len(out_put_similarity.index) / len(similarity.index)
|
||||
return cut_ratio, out_put_similarity
|
||||
|
||||
|
||||
def adjust_ratio(similarity_mode, cos_h, dis_h, dis_f_h, low_limit=0.1):
|
||||
if similarity_mode == "CAF":
|
||||
if cos_h < low_limit:
|
||||
cos_h = low_limit
|
||||
dis_f_h = 1 - cos_h
|
||||
elif dis_f_h < low_limit:
|
||||
dis_f_h = low_limit
|
||||
cos_h = 1 - dis_f_h
|
||||
elif similarity_mode == "CAD_new_gy":
|
||||
if dis_h < low_limit:
|
||||
dis_h = low_limit
|
||||
cos_h = 1 - dis_h
|
||||
elif cos_h < low_limit:
|
||||
cos_h = low_limit
|
||||
dis_h = 1 - cos_h
|
||||
elif similarity_mode == "CDF":
|
||||
normal_index = [0, 1, 2]
|
||||
h_list = [cos_h, dis_h, dis_f_h]
|
||||
if cos_h < low_limit:
|
||||
h_list[0] = low_limit
|
||||
normal_index.remove(0)
|
||||
if dis_h < low_limit:
|
||||
h_list[1] = low_limit
|
||||
normal_index.remove(1)
|
||||
if dis_f_h < low_limit:
|
||||
h_list[2] = low_limit
|
||||
normal_index.remove(2)
|
||||
|
||||
if len(normal_index) == 1:
|
||||
h_list[normal_index[0]] = h_list[normal_index[0]] - (sum(h_list) - 1)
|
||||
elif len(normal_index) == 2:
|
||||
sum_list = sum(h_list)
|
||||
multiper = 1 - (sum_list - 1) / (
|
||||
h_list[normal_index[0]] + h_list[normal_index[1]]
|
||||
)
|
||||
h_list[normal_index[0]] = h_list[normal_index[0]] * multiper
|
||||
h_list[normal_index[1]] = h_list[normal_index[1]] * multiper
|
||||
|
||||
cos_h, dis_h, dis_f_h = h_list[0], h_list[1], h_list[2]
|
||||
|
||||
return cos_h, dis_h, dis_f_h
|
||||
|
||||
|
||||
# 返回相似性计算的模式(不同权重),是否计算流量相似性,是否只计算cos相似性,是否只计算流量相似性。
|
||||
def decode_mode(similarity_mode):
|
||||
if similarity_mode == "COS":
|
||||
if_flow = 0
|
||||
if_only_cos = 1
|
||||
if_only_flow = 0
|
||||
elif similarity_mode == "CAD_new_gy":
|
||||
if_flow = 0
|
||||
if_only_cos = 0
|
||||
if_only_flow = 0
|
||||
elif similarity_mode == "CDF":
|
||||
if_flow = 1
|
||||
if_only_cos = 0
|
||||
if_only_flow = 0
|
||||
elif similarity_mode == "CAF":
|
||||
if_flow = 1
|
||||
if_only_cos = 1
|
||||
if_only_flow = 0
|
||||
elif similarity_mode == "DIS":
|
||||
if_flow = 1
|
||||
if_only_cos = 2
|
||||
if_only_flow = 0
|
||||
elif similarity_mode == "OF":
|
||||
if_flow = 1
|
||||
if_only_cos = 0
|
||||
if_only_flow = 1
|
||||
return if_flow, if_only_cos, if_only_flow
|
||||
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
import os
|
||||
|
||||
from app.algorithms.cleaning import flow as _flow_module
|
||||
from app.algorithms.cleaning import pressure as _pressure_module
|
||||
|
||||
|
||||
############################################################
|
||||
# 流量监测数据清洗 ***卡尔曼滤波法***
|
||||
############################################################
|
||||
# 2025/08/21 hxyan
|
||||
|
||||
|
||||
def flow_data_clean(input_csv_file: str) -> str:
|
||||
"""
|
||||
读取 input_csv_path 中的每列时间序列,使用一维 Kalman 滤波平滑并用预测值替换基于 3σ 检测出的异常点。
|
||||
保存输出为:<input_filename>_cleaned.xlsx(与输入同目录),并返回输出文件的绝对路径。如有同名文件存在,则覆盖。
|
||||
:param: input_csv_file: 输入的 CSV 文件明或路径
|
||||
:return: 输出文件的绝对路径
|
||||
"""
|
||||
|
||||
# 提供的 input_csv_path 绝对路径,以下为 默认脚本目录下同名 CSV 文件,构建绝对路径,可根据情况修改
|
||||
# 使用 algorithms 根目录保持与原 data_cleaning.py 一致的行为
|
||||
script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
input_csv_path = os.path.join(script_dir, input_csv_file)
|
||||
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(input_csv_path):
|
||||
raise FileNotFoundError(f"指定的文件不存在: {input_csv_path}")
|
||||
# 调用 clean_flow_data_kf 函数进行数据清洗
|
||||
out_xlsx_path = _flow_module.clean_flow_data_kf(input_csv_path)
|
||||
print("清洗后的数据已保存到:", out_xlsx_path)
|
||||
|
||||
|
||||
############################################################
|
||||
# 压力监测数据清洗 ***kmean++法***
|
||||
############################################################
|
||||
# 2025/08/21 hxyan
|
||||
|
||||
|
||||
def pressure_data_clean(input_csv_file: str) -> str:
|
||||
"""
|
||||
读取 input_csv_path 中的每列时间序列,使用Kmean++清洗数据。
|
||||
保存输出为:<input_filename>_cleaned.xlsx(与输入同目录),并返回输出文件的绝对路径。如有同名文件存在,则覆盖。
|
||||
原始数据在 sheet 'raw_pressure_data',处理后数据在 sheet 'cleaned_pressusre_data'。
|
||||
:param input_csv_path: 输入的 CSV 文件路径
|
||||
:return: 输出文件的绝对路径
|
||||
"""
|
||||
|
||||
# 提供的 input_csv_path 绝对路径,以下为 默认脚本目录下同名 CSV 文件,构建绝对路径,可根据情况修改
|
||||
# 使用 algorithms 根目录保持与原 data_cleaning.py 一致的行为
|
||||
script_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
input_csv_path = os.path.join(script_dir, input_csv_file)
|
||||
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(input_csv_path):
|
||||
raise FileNotFoundError(f"指定的文件不存在: {input_csv_path}")
|
||||
# 调用 clean_pressure_data_km 函数进行数据清洗
|
||||
out_xlsx_path = _pressure_module.clean_pressure_data_km(input_csv_path)
|
||||
print("清洗后的数据已保存到:", out_xlsx_path)
|
||||
@@ -1,19 +1,40 @@
|
||||
# ...existing code...
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from pykalman import KalmanFilter
|
||||
import os
|
||||
|
||||
from app.algorithms._utils import fill_time_gaps
|
||||
|
||||
def clean_flow_data_kf(input_csv_path: str, show_plot: bool = False) -> str:
|
||||
|
||||
def clean_flow_data_kf(
|
||||
input_csv_path: str, show_plot: bool = False, fill_gaps: bool = True
|
||||
) -> str:
|
||||
"""
|
||||
读取 input_csv_path 中的每列时间序列,使用一维 Kalman 滤波平滑并用预测值替换基于 3σ 检测出的异常点。
|
||||
保存输出为:<input_filename>_cleaned.xlsx(与输入同目录),并返回输出文件的绝对路径。
|
||||
仅保留输入文件路径作为参数(按要求)。
|
||||
|
||||
Args:
|
||||
input_csv_path: CSV 文件路径
|
||||
show_plot: 是否显示可视化
|
||||
fill_gaps: 是否先补齐时间缺口(默认 True)
|
||||
"""
|
||||
# 读取 CSV
|
||||
data = pd.read_csv(input_csv_path, header=0, index_col=None, encoding="utf-8")
|
||||
|
||||
# 补齐时间缺口(如果数据包含 time 列)
|
||||
if fill_gaps and "time" in data.columns:
|
||||
data = fill_time_gaps(
|
||||
data, time_col="time", freq="1min", short_gap_threshold=10
|
||||
)
|
||||
|
||||
# 分离时间列和数值列
|
||||
time_col_data = None
|
||||
if "time" in data.columns:
|
||||
time_col_data = data["time"]
|
||||
data = data.drop(columns=["time"])
|
||||
|
||||
# 存储 Kalman 平滑结果
|
||||
data_kf = pd.DataFrame(index=data.index, columns=data.columns)
|
||||
# 平滑每一列
|
||||
@@ -63,6 +84,10 @@ def clean_flow_data_kf(input_csv_path: str, show_plot: bool = False) -> str:
|
||||
)
|
||||
cleaned_data.loc[anomaly_idx, f"{col}_cleaned"] = data_kf.loc[anomaly_idx, col]
|
||||
|
||||
# 如果原始数据包含时间列,将其添加回结果
|
||||
if time_col_data is not None:
|
||||
cleaned_data.insert(0, "time", time_col_data)
|
||||
|
||||
# 构造输出文件名:在输入文件名基础上加后缀 _cleaned.xlsx
|
||||
input_dir = os.path.dirname(os.path.abspath(input_csv_path))
|
||||
input_base = os.path.splitext(os.path.basename(input_csv_path))[0]
|
||||
@@ -117,22 +142,31 @@ def clean_flow_data_kf(input_csv_path: str, show_plot: bool = False) -> str:
|
||||
return os.path.abspath(output_path)
|
||||
|
||||
|
||||
def clean_flow_data_dict(data_dict: dict, show_plot: bool = False) -> dict:
|
||||
def clean_flow_data_df_kf(data: pd.DataFrame, show_plot: bool = False) -> dict:
|
||||
"""
|
||||
接收一个字典数据结构,其中键为列名,值为时间序列列表,使用一维 Kalman 滤波平滑并用预测值替换基于 IQR 检测出的异常点。
|
||||
接收一个 DataFrame 数据结构,使用一维 Kalman 滤波平滑并用预测值替换基于 IQR 检测出的异常点。
|
||||
区分合理的0值(流量转换)和异常的0值(连续多个0或孤立0)。
|
||||
返回完整的清洗后的字典数据结构。
|
||||
|
||||
Args:
|
||||
data: 输入 DataFrame(可包含 time 列)
|
||||
show_plot: 是否显示可视化
|
||||
"""
|
||||
# 将字典转换为 DataFrame
|
||||
data = pd.DataFrame(data_dict)
|
||||
# 替换0值,填充NaN值
|
||||
data_filled = data.replace(0, np.nan)
|
||||
# 使用传入的 DataFrame
|
||||
data = data.copy()
|
||||
|
||||
# 对异常0值进行插值:先用前后均值填充,再用ffill/bfill处理剩余NaN
|
||||
data_filled = data_filled.interpolate(method="linear", limit_direction="both")
|
||||
# 补齐时间缺口(如果启用且数据包含 time 列)
|
||||
data_filled = fill_time_gaps(
|
||||
data, time_col="time", freq="1min", short_gap_threshold=10
|
||||
)
|
||||
|
||||
# 处理剩余的0值和NaN值
|
||||
data_filled = data_filled.ffill().bfill()
|
||||
# 保存 time 列用于最后合并
|
||||
time_col_series = None
|
||||
if "time" in data_filled.columns:
|
||||
time_col_series = data_filled["time"]
|
||||
|
||||
# 移除 time 列用于后续清洗
|
||||
data_filled = data_filled.drop(columns=["time"])
|
||||
|
||||
# 存储 Kalman 平滑结果
|
||||
data_kf = pd.DataFrame(index=data_filled.index, columns=data_filled.columns)
|
||||
@@ -192,28 +226,47 @@ def clean_flow_data_dict(data_dict: dict, show_plot: bool = False) -> dict:
|
||||
plt.rcParams["axes.unicode_minus"] = False
|
||||
if show_plot and len(data.columns) > 0:
|
||||
sensor_to_plot = data.columns[0]
|
||||
|
||||
# 定义x轴
|
||||
n = len(data)
|
||||
time = np.arange(n)
|
||||
n_filled = len(data_filled)
|
||||
time_filled = np.arange(n_filled)
|
||||
|
||||
plt.figure(figsize=(12, 8))
|
||||
|
||||
plt.subplot(2, 1, 1)
|
||||
plt.plot(
|
||||
data.index,
|
||||
time,
|
||||
data[sensor_to_plot],
|
||||
label="原始监测值",
|
||||
marker="o",
|
||||
markersize=3,
|
||||
alpha=0.7,
|
||||
)
|
||||
abnormal_zero_idx = data.index[data_filled[sensor_to_plot].isna()]
|
||||
|
||||
# 修正:检查 data_filled 的异常值,绘制在 time_filled 上
|
||||
abnormal_zero_mask = data_filled[sensor_to_plot].isna()
|
||||
# 如果目的是检查0值,应该用 == 0。这里保留 isna() 但修正索引引用,防止crash。
|
||||
# 如果原意是 isna() 则在 fillna 后通常没有 na。假设用户可能想检查 0 值?
|
||||
# 基于 "异常0值" 的标签,改为检查 0 值更合理,但为了保险起见,
|
||||
# 如果 isna() 返回空,就不画。防止索引越界是主要的。
|
||||
abnormal_zero_idx = data_filled.index[abnormal_zero_mask]
|
||||
|
||||
if len(abnormal_zero_idx) > 0:
|
||||
# 注意:如果 abnormal_zero_idx 是基于 data_filled 的索引(0..M-1),
|
||||
# 直接作为 x 坐标即可,因为 time_filled 也是 0..M-1
|
||||
# 而 y 值应该取自 data_filled 或 data_kf,取 data 会越界
|
||||
plt.plot(
|
||||
abnormal_zero_idx,
|
||||
data[sensor_to_plot].loc[abnormal_zero_idx],
|
||||
data_filled[sensor_to_plot].loc[abnormal_zero_idx],
|
||||
"mo",
|
||||
markersize=8,
|
||||
label="异常0值",
|
||||
label="异常值(NaN)",
|
||||
)
|
||||
|
||||
plt.plot(
|
||||
data.index, data_kf[sensor_to_plot], label="Kalman滤波预测值", linewidth=2
|
||||
time_filled, data_kf[sensor_to_plot], label="Kalman滤波预测值", linewidth=2
|
||||
)
|
||||
anomaly_idx = anomalies_info[sensor_to_plot].index
|
||||
if len(anomaly_idx) > 0:
|
||||
@@ -231,7 +284,7 @@ def clean_flow_data_dict(data_dict: dict, show_plot: bool = False) -> dict:
|
||||
|
||||
plt.subplot(2, 1, 2)
|
||||
plt.plot(
|
||||
data.index,
|
||||
time_filled,
|
||||
cleaned_data[sensor_to_plot],
|
||||
label="修复后监测值",
|
||||
marker="o",
|
||||
@@ -246,8 +299,12 @@ def clean_flow_data_dict(data_dict: dict, show_plot: bool = False) -> dict:
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
# 将 time 列添加回结果
|
||||
if time_col_series is not None:
|
||||
cleaned_data.insert(0, "time", time_col_series)
|
||||
|
||||
# 返回完整的修复后字典
|
||||
return cleaned_data.to_dict(orient="list")
|
||||
return cleaned_data
|
||||
|
||||
|
||||
# # 测试
|
||||
@@ -279,7 +336,7 @@ if __name__ == "__main__":
|
||||
print("原始数据长度:", len(data_dict[selected_columns[0]]))
|
||||
|
||||
# 调用函数进行清洗
|
||||
cleaned_dict = clean_flow_data_dict(data_dict, show_plot=True)
|
||||
cleaned_dict = clean_flow_data_df_kf(data_dict, show_plot=True)
|
||||
# 将清洗后的字典写回 CSV
|
||||
out_csv = os.path.join(script_dir, f"{selected_columns[0]}_clean.csv")
|
||||
pd.DataFrame(cleaned_dict).to_csv(out_csv, index=False, encoding="utf-8-sig")
|
||||
@@ -1,22 +1,41 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.metrics import silhouette_score
|
||||
from sklearn.impute import SimpleImputer
|
||||
import os
|
||||
|
||||
from app.algorithms._utils import fill_time_gaps
|
||||
|
||||
|
||||
def clean_pressure_data_km(input_csv_path: str, show_plot: bool = False) -> str:
|
||||
def clean_pressure_data_km(
|
||||
input_csv_path: str, show_plot: bool = False, fill_gaps: bool = True
|
||||
) -> str:
|
||||
"""
|
||||
读取输入 CSV,基于 KMeans 检测异常并用滚动平均修复。输出为 <input_basename>_cleaned.xlsx(同目录)。
|
||||
原始数据在 sheet 'raw_pressure_data',处理后数据在 sheet 'cleaned_pressusre_data'。
|
||||
返回输出文件的绝对路径。
|
||||
|
||||
Args:
|
||||
input_csv_path: CSV 文件路径
|
||||
show_plot: 是否显示可视化
|
||||
fill_gaps: 是否先补齐时间缺口(默认 True)
|
||||
"""
|
||||
# 读取 CSV
|
||||
input_csv_path = os.path.abspath(input_csv_path)
|
||||
data = pd.read_csv(input_csv_path, header=0, index_col=None, encoding="utf-8")
|
||||
|
||||
# 补齐时间缺口(如果数据包含 time 列)
|
||||
if fill_gaps and "time" in data.columns:
|
||||
data = fill_time_gaps(
|
||||
data, time_col="time", freq="1min", short_gap_threshold=10
|
||||
)
|
||||
|
||||
# 分离时间列和数值列
|
||||
time_col_data = None
|
||||
if "time" in data.columns:
|
||||
time_col_data = data["time"]
|
||||
data = data.drop(columns=["time"])
|
||||
# 标准化
|
||||
data_norm = (data - data.mean()) / data.std()
|
||||
|
||||
@@ -50,18 +69,18 @@ def clean_pressure_data_km(input_csv_path: str, show_plot: bool = False) -> str:
|
||||
data_repaired.loc[label, sensor] = data_rolled.loc[label, sensor]
|
||||
|
||||
# 可选可视化(使用位置作为 x 轴)
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
plt.rcParams["font.sans-serif"] = ["SimHei"]
|
||||
plt.rcParams["axes.unicode_minus"] = False
|
||||
|
||||
if show_plot and len(data.columns) > 0:
|
||||
n = len(data)
|
||||
time = np.arange(n)
|
||||
plt.figure(figsize=(12, 8))
|
||||
for col in data.columns:
|
||||
plt.plot(time, data[col].values, marker='o', markersize=3, label=col)
|
||||
plt.plot(time, data[col].values, marker="o", markersize=3, label=col)
|
||||
for pos in anomaly_pos:
|
||||
sensor = anomaly_details[data.index[pos]]
|
||||
plt.plot(pos, data.iloc[pos][sensor], 'ro', markersize=8)
|
||||
plt.plot(pos, data.iloc[pos][sensor], "ro", markersize=8)
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("压力监测值")
|
||||
plt.title("各传感器折线图(红色标记主要异常点)")
|
||||
@@ -70,10 +89,12 @@ def clean_pressure_data_km(input_csv_path: str, show_plot: bool = False) -> str:
|
||||
|
||||
plt.figure(figsize=(12, 8))
|
||||
for col in data_repaired.columns:
|
||||
plt.plot(time, data_repaired[col].values, marker='o', markersize=3, label=col)
|
||||
plt.plot(
|
||||
time, data_repaired[col].values, marker="o", markersize=3, label=col
|
||||
)
|
||||
for pos in anomaly_pos:
|
||||
sensor = anomaly_details[data.index[pos]]
|
||||
plt.plot(pos, data_repaired.iloc[pos][sensor], 'go', markersize=8)
|
||||
plt.plot(pos, data_repaired.iloc[pos][sensor], "go", markersize=8)
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("修复后压力监测值")
|
||||
plt.title("修复后各传感器折线图(绿色标记修复值)")
|
||||
@@ -86,35 +107,63 @@ def clean_pressure_data_km(input_csv_path: str, show_plot: bool = False) -> str:
|
||||
output_filename = f"{input_base}_cleaned.xlsx"
|
||||
output_path = os.path.join(input_dir, output_filename)
|
||||
|
||||
# 如果原始数据包含时间列,将其添加回结果
|
||||
data_for_save = data.copy()
|
||||
data_repaired_for_save = data_repaired.copy()
|
||||
if time_col_data is not None:
|
||||
data_for_save.insert(0, "time", time_col_data)
|
||||
data_repaired_for_save.insert(0, "time", time_col_data)
|
||||
|
||||
if os.path.exists(output_path):
|
||||
os.remove(output_path) # 覆盖同名文件
|
||||
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
|
||||
data.to_excel(writer, sheet_name='raw_pressure_data', index=False)
|
||||
data_repaired.to_excel(writer, sheet_name='cleaned_pressusre_data', index=False)
|
||||
|
||||
os.remove(output_path) # 覆盖同名文件
|
||||
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
|
||||
data_for_save.to_excel(writer, sheet_name="raw_pressure_data", index=False)
|
||||
data_repaired_for_save.to_excel(
|
||||
writer, sheet_name="cleaned_pressusre_data", index=False
|
||||
)
|
||||
|
||||
# 返回输出文件的绝对路径
|
||||
return os.path.abspath(output_path)
|
||||
|
||||
|
||||
def clean_pressure_data_dict_km(data_dict: dict, show_plot: bool = False) -> dict:
|
||||
def clean_pressure_data_df_km(data: pd.DataFrame, show_plot: bool = False) -> dict:
|
||||
"""
|
||||
接收一个字典数据结构,其中键为列名,值为时间序列列表,使用KMeans聚类检测异常并用滚动平均修复。
|
||||
接收一个 DataFrame 数据结构,使用KMeans聚类检测异常并用滚动平均修复。
|
||||
返回清洗后的字典数据结构。
|
||||
|
||||
Args:
|
||||
data: 输入 DataFrame(可包含 time 列)
|
||||
show_plot: 是否显示可视化
|
||||
"""
|
||||
# 将字典转换为 DataFrame
|
||||
data = pd.DataFrame(data_dict)
|
||||
# 填充NaN值
|
||||
data = data.ffill().bfill()
|
||||
# 异常值预处理
|
||||
# 将0值替换为NaN,然后用线性插值填充
|
||||
data_filled = data.replace(0, np.nan)
|
||||
data_filled = data_filled.interpolate(method="linear", limit_direction="both")
|
||||
# 如果仍有NaN(全为0的列),用前后值填充
|
||||
data_filled = data_filled.ffill().bfill()
|
||||
# 使用传入的 DataFrame
|
||||
data = data.copy()
|
||||
|
||||
# 补齐时间缺口(如果启用且数据包含 time 列)
|
||||
data_filled = fill_time_gaps(
|
||||
data, time_col="time", freq="1min", short_gap_threshold=10
|
||||
)
|
||||
|
||||
# 保存 time 列用于最后合并
|
||||
time_col_series = None
|
||||
if "time" in data_filled.columns:
|
||||
time_col_series = data_filled["time"]
|
||||
|
||||
# 移除 time 列用于后续清洗
|
||||
data_filled = data_filled.drop(columns=["time"])
|
||||
|
||||
# 标准化(使用填充后的数据)
|
||||
data_norm = (data_filled - data_filled.mean()) / data_filled.std()
|
||||
|
||||
# 添加:处理标准化后的 NaN(例如,标准差为0的列),防止异常数据,时间段内所有数据都相同导致计算结果为 NaN
|
||||
imputer = SimpleImputer(
|
||||
strategy="constant", fill_value=0, keep_empty_features=True
|
||||
) # 用 0 填充 NaN,包括全 NaN,并保留空特征
|
||||
data_norm = pd.DataFrame(
|
||||
imputer.fit_transform(data_norm),
|
||||
columns=data_norm.columns,
|
||||
index=data_norm.index,
|
||||
)
|
||||
|
||||
# 聚类与异常检测
|
||||
k = 3
|
||||
kmeans = KMeans(n_clusters=k, init="k-means++", n_init=50, random_state=42)
|
||||
@@ -125,7 +174,7 @@ def clean_pressure_data_dict_km(data_dict: dict, show_plot: bool = False) -> dic
|
||||
threshold = distances.mean() + 3 * distances.std()
|
||||
|
||||
anomaly_pos = np.where(distances > threshold)[0]
|
||||
anomaly_indices = data.index[anomaly_pos]
|
||||
anomaly_indices = data_filled.index[anomaly_pos]
|
||||
|
||||
anomaly_details = {}
|
||||
for pos in anomaly_pos:
|
||||
@@ -134,13 +183,13 @@ def clean_pressure_data_dict_km(data_dict: dict, show_plot: bool = False) -> dic
|
||||
center = centers[cluster_idx]
|
||||
diff = abs(row_norm - center)
|
||||
main_sensor = diff.idxmax()
|
||||
anomaly_details[data.index[pos]] = main_sensor
|
||||
anomaly_details[data_filled.index[pos]] = main_sensor
|
||||
|
||||
# 修复:滚动平均(窗口可调)
|
||||
data_rolled = data_filled.rolling(window=13, center=True, min_periods=1).mean()
|
||||
data_repaired = data_filled.copy()
|
||||
for pos in anomaly_pos:
|
||||
label = data.index[pos]
|
||||
label = data_filled.index[pos]
|
||||
sensor = anomaly_details[label]
|
||||
data_repaired.loc[label, sensor] = data_rolled.loc[label, sensor]
|
||||
|
||||
@@ -151,6 +200,8 @@ def clean_pressure_data_dict_km(data_dict: dict, show_plot: bool = False) -> dic
|
||||
if show_plot and len(data.columns) > 0:
|
||||
n = len(data)
|
||||
time = np.arange(n)
|
||||
n_filled = len(data_filled)
|
||||
time_filled = np.arange(n_filled)
|
||||
plt.figure(figsize=(12, 8))
|
||||
for col in data.columns:
|
||||
plt.plot(
|
||||
@@ -158,7 +209,7 @@ def clean_pressure_data_dict_km(data_dict: dict, show_plot: bool = False) -> dic
|
||||
)
|
||||
for col in data_filled.columns:
|
||||
plt.plot(
|
||||
time,
|
||||
time_filled,
|
||||
data_filled[col].values,
|
||||
marker="x",
|
||||
markersize=3,
|
||||
@@ -166,7 +217,7 @@ def clean_pressure_data_dict_km(data_dict: dict, show_plot: bool = False) -> dic
|
||||
linestyle="--",
|
||||
)
|
||||
for pos in anomaly_pos:
|
||||
sensor = anomaly_details[data.index[pos]]
|
||||
sensor = anomaly_details[data_filled.index[pos]]
|
||||
plt.plot(pos, data_filled.iloc[pos][sensor], "ro", markersize=8)
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("压力监测值")
|
||||
@@ -177,19 +228,23 @@ def clean_pressure_data_dict_km(data_dict: dict, show_plot: bool = False) -> dic
|
||||
plt.figure(figsize=(12, 8))
|
||||
for col in data_repaired.columns:
|
||||
plt.plot(
|
||||
time, data_repaired[col].values, marker="o", markersize=3, label=col
|
||||
time_filled, data_repaired[col].values, marker="o", markersize=3, label=col
|
||||
)
|
||||
for pos in anomaly_pos:
|
||||
sensor = anomaly_details[data.index[pos]]
|
||||
sensor = anomaly_details[data_filled.index[pos]]
|
||||
plt.plot(pos, data_repaired.iloc[pos][sensor], "go", markersize=8)
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("修复后压力监测值")
|
||||
plt.title("修复后各传感器折线图(绿色标记修复值)")
|
||||
plt.legend()
|
||||
plt.show()
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("修复后压力监测值")
|
||||
plt.title("修复后各传感器折线图(绿色标记修复值)")
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
# 将 time 列添加回结果
|
||||
if time_col_series is not None:
|
||||
data_repaired.insert(0, "time", time_col_series)
|
||||
|
||||
# 返回清洗后的字典
|
||||
return data_repaired.to_dict(orient="list")
|
||||
return data_repaired
|
||||
|
||||
|
||||
# 测试
|
||||
@@ -203,25 +258,26 @@ def clean_pressure_data_dict_km(data_dict: dict, show_plot: bool = False) -> dic
|
||||
# 测试 clean_pressure_data_dict_km 函数
|
||||
if __name__ == "__main__":
|
||||
import random
|
||||
|
||||
# 读取 szh_pressure_scada.csv 文件
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
csv_path = os.path.join(script_dir, "szh_pressure_scada.csv")
|
||||
data = pd.read_csv(csv_path, header=0, index_col=None, encoding="utf-8")
|
||||
|
||||
|
||||
# 排除 Time 列,随机选择 5 列
|
||||
columns_to_exclude = ['Time']
|
||||
columns_to_exclude = ["Time"]
|
||||
available_columns = [col for col in data.columns if col not in columns_to_exclude]
|
||||
selected_columns = random.sample(available_columns, 5)
|
||||
|
||||
|
||||
# 将选中的列转换为字典
|
||||
data_dict = {col: data[col].tolist() for col in selected_columns}
|
||||
|
||||
|
||||
print("选中的列:", selected_columns)
|
||||
print("原始数据长度:", len(data_dict[selected_columns[0]]))
|
||||
|
||||
|
||||
# 调用函数进行清洗
|
||||
cleaned_dict = clean_pressure_data_dict_km(data_dict, show_plot=True)
|
||||
|
||||
cleaned_dict = clean_pressure_data_df_km(data_dict, show_plot=True)
|
||||
|
||||
print("清洗后的字典键:", list(cleaned_dict.keys()))
|
||||
print("清洗后的数据长度:", len(cleaned_dict[selected_columns[0]]))
|
||||
print("测试完成:函数运行正常")
|
||||
@@ -0,0 +1,3 @@
|
||||
from app.algorithms.health.analyzer import PipelineHealthAnalyzer
|
||||
|
||||
__all__ = ["PipelineHealthAnalyzer"]
|
||||
@@ -0,0 +1,148 @@
|
||||
import os
|
||||
import joblib
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
class PipelineHealthAnalyzer:
|
||||
"""
|
||||
管道健康分析器类,使用随机生存森林模型预测管道的生存概率。
|
||||
|
||||
该类封装了模型加载和预测功能,便于在其他项目中复用。
|
||||
模型基于4个特征进行生存分析预测:材料、直径、流速、压力。
|
||||
|
||||
使用前需确保安装依赖:joblib, pandas, numpy, scikit-survival, matplotlib。
|
||||
"""
|
||||
|
||||
def __init__(self, model_path: str = None):
|
||||
"""
|
||||
初始化分析器,加载预训练的随机生存森林模型。
|
||||
|
||||
:param model_path: 模型文件的路径(默认为相对路径 './model/my_survival_forest_model_quxi.joblib')。
|
||||
:raises FileNotFoundError: 如果模型文件不存在。
|
||||
:raises Exception: 如果模型加载失败。
|
||||
"""
|
||||
if model_path is None:
|
||||
model_path = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"model",
|
||||
"my_survival_forest_model_quxi.joblib",
|
||||
)
|
||||
# 确保 model 目录存在
|
||||
model_dir = os.path.dirname(model_path)
|
||||
if model_dir and not os.path.exists(model_dir):
|
||||
os.makedirs(model_dir, exist_ok=True)
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
raise FileNotFoundError(f"模型文件未找到: {model_path}")
|
||||
|
||||
try:
|
||||
self.rsf = joblib.load(model_path)
|
||||
self.features = [
|
||||
"Material",
|
||||
"Diameter",
|
||||
"Flow Velocity",
|
||||
"Pressure", # 'Temperature', 'Precipitation',
|
||||
# 'Location', 'Structural Defects', 'Functional Defects'
|
||||
]
|
||||
except Exception as e:
|
||||
raise Exception(f"加载模型时出错: {str(e)}")
|
||||
|
||||
def predict_survival(self, data: pd.DataFrame) -> list:
|
||||
"""
|
||||
基于输入数据预测生存函数。
|
||||
|
||||
:param data: pandas DataFrame,包含4个必需特征列。数据应为数值型或可转换为数值型。
|
||||
:return: 生存函数列表,每个元素为一个生存函数对象(包含时间点x和生存概率y)。
|
||||
:raises ValueError: 如果数据缺少必需特征或格式不正确。
|
||||
"""
|
||||
# 检查必需特征是否存在
|
||||
missing_features = [feat for feat in self.features if feat not in data.columns]
|
||||
if missing_features:
|
||||
raise ValueError(f"数据缺少必需特征: {missing_features}")
|
||||
|
||||
# 提取特征数据
|
||||
try:
|
||||
x_test = data[self.features].astype(float) # 确保数值型
|
||||
except ValueError as e:
|
||||
raise ValueError(f"特征数据转换失败,请检查数据类型: {str(e)}")
|
||||
|
||||
# 进行预测
|
||||
survival_functions = self.rsf.predict_survival_function(x_test)
|
||||
return list(survival_functions)
|
||||
|
||||
def plot_survival(
|
||||
self, survival_functions: list, save_path: str = None, show_plot: bool = True
|
||||
):
|
||||
"""
|
||||
可视化生存函数,生成生存概率图表。
|
||||
|
||||
:param survival_functions: predict_survival返回的生存函数列表。
|
||||
:param save_path: 可选,保存图表的路径(.png格式)。如果为None,则不保存。
|
||||
:param show_plot: 是否显示图表(在交互环境中)。
|
||||
"""
|
||||
plt.figure(figsize=(10, 6))
|
||||
for i, sf in enumerate(survival_functions):
|
||||
plt.step(sf.x, sf.y, where="post", label=f"样本 {i + 1}")
|
||||
plt.xlabel("时间(年)")
|
||||
plt.ylabel("生存概率")
|
||||
plt.title("管道生存概率预测")
|
||||
plt.legend()
|
||||
plt.grid(True, alpha=0.3)
|
||||
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches="tight")
|
||||
print(f"图表已保存到: {save_path}")
|
||||
|
||||
if show_plot:
|
||||
plt.show()
|
||||
else:
|
||||
plt.close()
|
||||
|
||||
|
||||
# 调用说明示例
|
||||
"""
|
||||
在其他项目中使用PipelineHealthAnalyzer类的步骤:
|
||||
|
||||
1. 安装依赖(在requirements.txt中添加):
|
||||
joblib==1.5.0
|
||||
pandas==2.2.3
|
||||
numpy==2.0.2
|
||||
scikit-survival==0.23.1
|
||||
matplotlib==3.9.4
|
||||
|
||||
2. 导入类:
|
||||
from pipeline_health_analyzer import PipelineHealthAnalyzer
|
||||
|
||||
3. 初始化分析器(替换为实际模型路径):
|
||||
analyzer = PipelineHealthAnalyzer(model_path='path/to/my_survival_forest_model3-10.joblib')
|
||||
|
||||
4. 准备数据(pandas DataFrame,包含9个特征列):
|
||||
import pandas as pd
|
||||
data = pd.DataFrame({
|
||||
'Material': [1, 2], # 示例数据
|
||||
'Diameter': [100, 150],
|
||||
'Flow Velocity': [1.5, 2.0],
|
||||
'Pressure': [50, 60],
|
||||
'Temperature': [20, 25],
|
||||
'Precipitation': [0.1, 0.2],
|
||||
'Location': [1, 2],
|
||||
'Structural Defects': [0, 1],
|
||||
'Functional Defects': [0, 0]
|
||||
})
|
||||
|
||||
5. 进行预测:
|
||||
survival_funcs = analyzer.predict_survival(data)
|
||||
|
||||
6. 查看结果(每个样本的生存概率随时间变化):
|
||||
for i, sf in enumerate(survival_funcs):
|
||||
print(f"样本 {i+1}: 时间点: {sf.x[:5]}..., 生存概率: {sf.y[:5]}...")
|
||||
|
||||
7. 可视化(可选):
|
||||
analyzer.plot_survival(survival_funcs, save_path='survival_plot.png')
|
||||
|
||||
注意:
|
||||
- 数据格式必须匹配特征列表,特征值为数值型。
|
||||
- 模型文件需从原项目复制或重新训练。
|
||||
- 如果需要自定义特征或模型参数,可修改类中的features列表或继承此类。
|
||||
"""
|
||||
Binary file not shown.
@@ -0,0 +1,3 @@
|
||||
from app.algorithms.isolation.valve import valve_isolation_analysis
|
||||
|
||||
__all__ = ["valve_isolation_analysis"]
|
||||
@@ -0,0 +1,165 @@
|
||||
from collections import defaultdict, deque
|
||||
from functools import lru_cache
|
||||
from typing import Any
|
||||
|
||||
from app.services.tjnetwork import (
|
||||
get_network_link_nodes,
|
||||
is_node,
|
||||
get_link_properties,
|
||||
)
|
||||
|
||||
|
||||
VALVE_LINK_TYPE = "valve"
|
||||
|
||||
|
||||
def _parse_link_entry(link_entry: str) -> tuple[str, str, str, str]:
|
||||
parts = link_entry.split(":", 3)
|
||||
if len(parts) != 4:
|
||||
raise ValueError(f"Invalid link entry format: {link_entry}")
|
||||
return parts[0], parts[1], parts[2], parts[3]
|
||||
|
||||
|
||||
@lru_cache(maxsize=16)
|
||||
def _get_network_topology(network: str):
|
||||
"""
|
||||
解析并缓存网络拓扑,大幅减少重复的 API 调用和字符串解析开销。
|
||||
返回:
|
||||
- pipe_adj: 永久连通的管道/泵邻接表 (dict[str, set])
|
||||
- all_valves: 所有阀门字典 {id: (n1, n2)}
|
||||
- link_lookup: 链路快速查表 {id: (n1, n2, type)} 用于快速定位事故点
|
||||
- node_set: 所有已知节点集合
|
||||
"""
|
||||
pipe_adj = defaultdict(set)
|
||||
all_valves = {}
|
||||
link_lookup = {}
|
||||
node_set = set()
|
||||
|
||||
# 此处假设 get_network_link_nodes 获取全网数据
|
||||
for link_entry in get_network_link_nodes(network):
|
||||
link_id, link_type, node1, node2 = _parse_link_entry(link_entry)
|
||||
link_type_name = str(link_type).lower()
|
||||
|
||||
link_lookup[link_id] = (node1, node2, link_type_name)
|
||||
node_set.add(node1)
|
||||
node_set.add(node2)
|
||||
|
||||
if link_type_name == VALVE_LINK_TYPE:
|
||||
all_valves[link_id] = (node1, node2)
|
||||
else:
|
||||
# 只有非阀门(管道/泵)才进入永久连通图
|
||||
pipe_adj[node1].add(node2)
|
||||
pipe_adj[node2].add(node1)
|
||||
|
||||
return pipe_adj, all_valves, link_lookup, node_set
|
||||
|
||||
|
||||
def valve_isolation_analysis(
|
||||
network: str, accident_elements: str | list[str], disabled_valves: list[str] = None
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
关阀搜索/分析:基于拓扑结构确定事故隔离所需关阀。
|
||||
:param network: 模型名称
|
||||
:param accident_elements: 事故点(节点或管道/泵/阀门ID),可以是单个ID字符串或ID列表
|
||||
:param disabled_valves: 故障/无法关闭的阀门ID列表
|
||||
:return: dict,包含受影响节点、必须关闭阀门、可选阀门等信息
|
||||
"""
|
||||
if disabled_valves is None:
|
||||
disabled_valves_set = set()
|
||||
else:
|
||||
disabled_valves_set = set(disabled_valves)
|
||||
|
||||
if isinstance(accident_elements, str):
|
||||
target_elements = [accident_elements]
|
||||
else:
|
||||
target_elements = accident_elements
|
||||
|
||||
# 1. 获取缓存拓扑 (极快,无 IO)
|
||||
pipe_adj, all_valves, link_lookup, node_set = _get_network_topology(network)
|
||||
|
||||
# 2. 确定起点,优先查表避免 API 调用
|
||||
start_nodes = set()
|
||||
for element in target_elements:
|
||||
if element in node_set:
|
||||
start_nodes.add(element)
|
||||
elif element in link_lookup:
|
||||
n1, n2, _ = link_lookup[element]
|
||||
start_nodes.add(n1)
|
||||
start_nodes.add(n2)
|
||||
else:
|
||||
# 仅当缓存中没找到时(极少见),才回退到慢速 API
|
||||
if is_node(network, element):
|
||||
start_nodes.add(element)
|
||||
else:
|
||||
props = get_link_properties(network, element)
|
||||
n1, n2 = props.get("node1"), props.get("node2")
|
||||
if n1 and n2:
|
||||
start_nodes.add(n1)
|
||||
start_nodes.add(n2)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Accident element {element} invalid or missing endpoints"
|
||||
)
|
||||
|
||||
# 3. 处理故障阀门 (构建临时增量图)
|
||||
# 我们不修改 cached pipe_adj,而是建立一个 extra_adj
|
||||
extra_adj = defaultdict(list)
|
||||
boundary_valves = {} # 当前有效的边界阀门
|
||||
|
||||
for vid, (n1, n2) in all_valves.items():
|
||||
if vid in disabled_valves_set:
|
||||
# 故障阀门:视为连通管道
|
||||
extra_adj[n1].append(n2)
|
||||
extra_adj[n2].append(n1)
|
||||
else:
|
||||
# 正常阀门:视为潜在边界
|
||||
boundary_valves[vid] = (n1, n2)
|
||||
|
||||
# 4. BFS 搜索 (叠加 pipe_adj 和 extra_adj)
|
||||
affected_nodes: set[str] = set()
|
||||
queue = deque(start_nodes)
|
||||
while queue:
|
||||
node = queue.popleft()
|
||||
if node in affected_nodes:
|
||||
continue
|
||||
affected_nodes.add(node)
|
||||
|
||||
# 遍历永久管道邻居
|
||||
if node in pipe_adj:
|
||||
for neighbor in pipe_adj[node]:
|
||||
if neighbor not in affected_nodes:
|
||||
queue.append(neighbor)
|
||||
|
||||
# 遍历故障阀门带来的额外邻居
|
||||
if node in extra_adj:
|
||||
for neighbor in extra_adj[node]:
|
||||
if neighbor not in affected_nodes:
|
||||
queue.append(neighbor)
|
||||
|
||||
# 5. 结果聚合
|
||||
must_close_valves: list[str] = []
|
||||
optional_valves: list[str] = []
|
||||
|
||||
for valve_id, (n1, n2) in boundary_valves.items():
|
||||
in_n1 = n1 in affected_nodes
|
||||
in_n2 = n2 in affected_nodes
|
||||
if in_n1 and in_n2:
|
||||
optional_valves.append(valve_id)
|
||||
elif in_n1 or in_n2:
|
||||
must_close_valves.append(valve_id)
|
||||
|
||||
must_close_valves.sort()
|
||||
optional_valves.sort()
|
||||
|
||||
result = {
|
||||
"accident_elements": target_elements,
|
||||
"disabled_valves": disabled_valves,
|
||||
"affected_nodes": sorted(affected_nodes),
|
||||
"must_close_valves": must_close_valves,
|
||||
"optional_valves": optional_valves,
|
||||
"isolatable": len(must_close_valves) > 0,
|
||||
}
|
||||
|
||||
if len(target_elements) == 1:
|
||||
result["accident_element"] = target_elements[0]
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,3 @@
|
||||
from app.algorithms.leakage.identifier import LeakageIdentifier
|
||||
|
||||
__all__ = ["LeakageIdentifier"]
|
||||
@@ -0,0 +1,653 @@
|
||||
import wntr
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import time
|
||||
import argparse
|
||||
from multiprocessing import Pool, cpu_count
|
||||
from typing import Any, List, Dict, Union
|
||||
|
||||
from pymoo.core.problem import Problem
|
||||
from pymoo.core.callback import Callback
|
||||
from pymoo.algorithms.soo.nonconvex.ga import GA
|
||||
from pymoo.operators.crossover.sbx import SBX
|
||||
from pymoo.operators.mutation.pm import PM
|
||||
from pymoo.optimize import minimize as pymoo_minimize
|
||||
from pymoo.termination.default import DefaultSingleObjectiveTermination
|
||||
|
||||
|
||||
from app.algorithms._utils import _cleanup_temp_files
|
||||
|
||||
_worker_data: dict[str, Any] = {}
|
||||
DEFAULT_N_WORKERS = max(1, min(cpu_count() - 1, 4))
|
||||
|
||||
|
||||
def _worker_init(
|
||||
inp_path: str,
|
||||
sensor_nodes: list[str],
|
||||
area_ids: list[str],
|
||||
nodes_by_area: dict[str, list[str]],
|
||||
obs_matrix: np.ndarray,
|
||||
q_sum: float,
|
||||
duration_sec: float,
|
||||
timestep_sec: float,
|
||||
) -> None:
|
||||
global _worker_data
|
||||
wn = wntr.network.WaterNetworkModel(inp_path)
|
||||
wn.options.hydraulic.demand_model = "DD"
|
||||
wn.options.time.duration = duration_sec
|
||||
wn.options.time.hydraulic_timestep = timestep_sec
|
||||
wn.options.time.pattern_timestep = timestep_sec
|
||||
wn.options.time.report_timestep = timestep_sec
|
||||
|
||||
demand_objs_by_area = {}
|
||||
allocatable_counts = {}
|
||||
for area_id in area_ids:
|
||||
demand_objs = []
|
||||
for node_name in nodes_by_area.get(area_id, []):
|
||||
if node_name not in wn.node_name_list:
|
||||
continue
|
||||
node = wn.get_node(node_name)
|
||||
if (
|
||||
hasattr(node, "demand_timeseries_list")
|
||||
and len(node.demand_timeseries_list) > 0
|
||||
):
|
||||
demand_objs.append(node.demand_timeseries_list[0])
|
||||
demand_objs_by_area[area_id] = demand_objs
|
||||
allocatable_counts[area_id] = len(demand_objs)
|
||||
|
||||
_worker_data = {
|
||||
"wn": wn,
|
||||
"sensor_nodes": sensor_nodes,
|
||||
"area_ids": area_ids,
|
||||
"nodes_by_area": nodes_by_area,
|
||||
"demand_objs_by_area": demand_objs_by_area,
|
||||
"allocatable_counts": allocatable_counts,
|
||||
"obs_matrix": obs_matrix,
|
||||
"q_sum": q_sum,
|
||||
}
|
||||
|
||||
|
||||
def _worker_evaluate(raw_ratios: np.ndarray) -> float:
|
||||
d = _worker_data
|
||||
effective_ratio_map = LeakageIdentifier._effective_area_ratios(
|
||||
raw_ratios,
|
||||
d["area_ids"],
|
||||
d["nodes_by_area"],
|
||||
allocatable_counts=d["allocatable_counts"],
|
||||
)
|
||||
|
||||
modifications = []
|
||||
for area_id in d["area_ids"]:
|
||||
ratio = effective_ratio_map.get(area_id, 0.0)
|
||||
if ratio <= 0:
|
||||
continue
|
||||
|
||||
demand_objs = d["demand_objs_by_area"].get(area_id, [])
|
||||
if not demand_objs:
|
||||
continue
|
||||
|
||||
per_node_leak = d["q_sum"] * ratio / len(demand_objs)
|
||||
for demand_obj in demand_objs:
|
||||
original_val = demand_obj.base_value
|
||||
demand_obj.base_value = original_val + per_node_leak
|
||||
modifications.append((demand_obj, original_val))
|
||||
|
||||
temp_dir = os.path.abspath(os.path.join("temp", "leakage"))
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
prefix = os.path.join(temp_dir, f"temp_{os.getpid()}")
|
||||
|
||||
try:
|
||||
sim = wntr.sim.EpanetSimulator(d["wn"])
|
||||
results = sim.run_sim(file_prefix=prefix)
|
||||
sim_pressure = results.node["pressure"].loc[:, d["sensor_nodes"]]
|
||||
|
||||
n_steps = min(sim_pressure.shape[0], d["obs_matrix"].shape[0])
|
||||
sim_vals = sim_pressure.values[:n_steps, :]
|
||||
obs_vals = d["obs_matrix"][:n_steps, :]
|
||||
diff = sim_vals - obs_vals
|
||||
|
||||
row_max = np.max(np.abs(diff), axis=1, keepdims=True)
|
||||
row_max[row_max == 0] = 1.0
|
||||
normalized_diff = diff / row_max
|
||||
return float(np.linalg.norm(normalized_diff))
|
||||
|
||||
except Exception:
|
||||
return 1e9
|
||||
|
||||
finally:
|
||||
for demand_obj, original_val in modifications:
|
||||
demand_obj.base_value = original_val
|
||||
_cleanup_temp_files(prefix)
|
||||
|
||||
|
||||
class LeakageIdentifier:
|
||||
FLOW_UNIT_TO_M3S = {
|
||||
"m3/s": 1.0,
|
||||
"m3/h": 1.0 / 3600.0,
|
||||
"L/s": 1.0 / 1000.0,
|
||||
"L/min": 1.0 / 60000.0,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _flow_to_m3s(cls, value: float, unit: str) -> float:
|
||||
if unit not in cls.FLOW_UNIT_TO_M3S:
|
||||
raise ValueError(f"不支持的流量单位: {unit}")
|
||||
return float(value) * cls.FLOW_UNIT_TO_M3S[unit]
|
||||
|
||||
@classmethod
|
||||
def _flow_from_m3s(cls, value_m3s: float, unit: str) -> float:
|
||||
if unit not in cls.FLOW_UNIT_TO_M3S:
|
||||
raise ValueError(f"不支持的流量单位: {unit}")
|
||||
return float(value_m3s) / cls.FLOW_UNIT_TO_M3S[unit]
|
||||
|
||||
@staticmethod
|
||||
def _effective_area_ratios(
|
||||
raw_ratios: Union["np.ndarray", Dict[str, float]],
|
||||
area_ids: List[str],
|
||||
nodes_by_area: Dict[str, List[str]],
|
||||
allocatable_counts: Union[Dict[str, int], None] = None,
|
||||
) -> Dict[str, float]:
|
||||
"""将输入比例转换为有效区域比例,确保有效区域比例和为 1。"""
|
||||
area_count = len(area_ids)
|
||||
if area_count == 0:
|
||||
return {}
|
||||
|
||||
if isinstance(raw_ratios, dict):
|
||||
ratios = np.array(
|
||||
[float(raw_ratios.get(area_id, 0.0)) for area_id in area_ids],
|
||||
dtype=float,
|
||||
)
|
||||
else:
|
||||
arr = np.asarray(raw_ratios, dtype=float).reshape(-1)
|
||||
ratios = np.zeros(area_count, dtype=float)
|
||||
fill_len = min(area_count, arr.shape[0])
|
||||
if fill_len > 0:
|
||||
ratios[:fill_len] = arr[:fill_len]
|
||||
|
||||
# 仅保留非负比例,负值按 0 处理
|
||||
ratios = np.clip(ratios, a_min=0.0, a_max=None)
|
||||
|
||||
# 仅在有效区域(存在可分配节点)内归一化
|
||||
if allocatable_counts is not None:
|
||||
valid_mask = np.array(
|
||||
[int(allocatable_counts.get(area_id, 0)) > 0 for area_id in area_ids],
|
||||
dtype=bool,
|
||||
)
|
||||
else:
|
||||
valid_mask = np.array(
|
||||
[len(nodes_by_area.get(area_id, [])) > 0 for area_id in area_ids],
|
||||
dtype=bool,
|
||||
)
|
||||
if not np.any(valid_mask):
|
||||
raise ValueError("没有可分配漏损的有效分区,无法满足漏损总量约束。")
|
||||
|
||||
effective = np.zeros(area_count, dtype=float)
|
||||
valid_sum = float(np.sum(ratios[valid_mask]))
|
||||
|
||||
if valid_sum > 0:
|
||||
effective[valid_mask] = ratios[valid_mask] / valid_sum
|
||||
else:
|
||||
# 若输入全为 0,则在有效区域内均分,保证总和仍为 1
|
||||
valid_count = int(np.sum(valid_mask))
|
||||
effective[valid_mask] = 1.0 / valid_count
|
||||
|
||||
return {area_id: float(effective[idx]) for idx, area_id in enumerate(area_ids)}
|
||||
|
||||
@staticmethod
|
||||
def _normalize_area_map_df(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""标准化区域映射列名为 ID 和 Area。"""
|
||||
if "ID" in df.columns and "Area" in df.columns:
|
||||
return df
|
||||
|
||||
if "ID" in df.columns and "now" in df.columns:
|
||||
df = df.rename(columns={"now": "Area"})
|
||||
return df
|
||||
|
||||
df = df.copy()
|
||||
df.columns = ["ID", "Area"] + list(df.columns[2:])
|
||||
return df
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
inp_path: str,
|
||||
sensor_nodes: List[str],
|
||||
area_map: Union[str, Dict[str, str]],
|
||||
start_time: float = 0,
|
||||
duration: float = 24,
|
||||
timestep: float = 5,
|
||||
q_sum: float = 0.2,
|
||||
):
|
||||
"""
|
||||
初始化漏损识别器。
|
||||
|
||||
参数:
|
||||
inp_path: EPANET .inp 文件路径。
|
||||
sensor_nodes: 用作压力传感器的节点 ID 列表。
|
||||
area_map: 节点到区域的映射。可以是 CSV 文件路径(列:ID, Area),也可以是字典 {NodeID: AreaID}。
|
||||
start_time: 模拟开始时间(小时)。
|
||||
duration: 模拟持续时间(小时)。
|
||||
timestep: 模拟时间步长(分钟)。
|
||||
q_sum: 假设的总漏损流量 (m3/s)。
|
||||
"""
|
||||
self.inp_path = inp_path
|
||||
self.sensor_nodes = sensor_nodes
|
||||
self.start_time = start_time
|
||||
self.duration = duration
|
||||
self.timestep = timestep
|
||||
self.q_sum = q_sum
|
||||
|
||||
# 加载管网模型(仅一次)
|
||||
self.wn = wntr.network.WaterNetworkModel(self.inp_path)
|
||||
|
||||
# 优化 WNTR 设置以提高速度
|
||||
self.wn.options.hydraulic.demand_model = "DD"
|
||||
self.wn.options.time.duration = float(self.duration) * 3600
|
||||
self.wn.options.time.hydraulic_timestep = float(self.timestep) * 60
|
||||
self.wn.options.time.pattern_timestep = float(self.timestep) * 60
|
||||
self.wn.options.time.report_timestep = float(self.timestep) * 60
|
||||
|
||||
# 加载区域映射
|
||||
if isinstance(area_map, str):
|
||||
self.area_map_df = self._load_area_map(area_map)
|
||||
elif isinstance(area_map, dict):
|
||||
self.area_map_df = self._normalize_area_map_df(
|
||||
pd.DataFrame(list(area_map.items()), columns=["ID", "Area"])
|
||||
)
|
||||
else:
|
||||
raise ValueError("area_map 必须是 CSV 文件路径或字典。")
|
||||
|
||||
self.area_ids = sorted(self.area_map_df["Area"].unique())
|
||||
self.num_areas = len(self.area_ids)
|
||||
|
||||
# 按区域对节点进行预分类,以便更快查找
|
||||
self.nodes_by_area = {
|
||||
area: self.area_map_df[self.area_map_df["Area"] == area]["ID"].tolist()
|
||||
for area in self.area_ids
|
||||
}
|
||||
|
||||
def _load_area_map(self, path: str) -> pd.DataFrame:
|
||||
"""加载并验证节点-区域映射文件。"""
|
||||
df = pd.read_csv(path, dtype={"ID": str, "Area": str})
|
||||
return self._normalize_area_map_df(df)
|
||||
|
||||
def run_identification(
|
||||
self,
|
||||
observed_pressure_data: Union[
|
||||
str, pd.DataFrame, Dict[str, List[Any]], List[Dict[str, Any]]
|
||||
],
|
||||
output_dir: str = "Results",
|
||||
pop_size: int = 50,
|
||||
max_gen: int = 100,
|
||||
output_flow_unit: str = "m3/s",
|
||||
save_result: bool = True,
|
||||
ftol: float = 1e-3,
|
||||
ftol_period: int = 15,
|
||||
n_workers: int = DEFAULT_N_WORKERS,
|
||||
):
|
||||
"""
|
||||
运行遗传算法以识别漏损分布。
|
||||
|
||||
参数:
|
||||
observed_pressure_data: 包含 SCADA 压力数据的 CSV 文件路径或 DataFrame/字典列表数据。
|
||||
output_dir: 结果保存目录。
|
||||
pop_size: GA 的种群大小。
|
||||
max_gen: GA 的最大代数。
|
||||
output_flow_unit: 输出漏损流量的单位。
|
||||
save_result: 是否保存识别结果到本地 CSV。
|
||||
ftol: 目标值收敛容差(连续 ftol_period 代改善 < ftol 则停止)。
|
||||
ftol_period: 收敛检测的窗口代数。
|
||||
n_workers: 并行工作进程数(1=串行,>1=并行评估)。
|
||||
"""
|
||||
if save_result:
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# 加载观测数据
|
||||
if isinstance(observed_pressure_data, str):
|
||||
obs_df = pd.read_csv(observed_pressure_data)
|
||||
observed_name = os.path.basename(observed_pressure_data)
|
||||
elif isinstance(observed_pressure_data, pd.DataFrame):
|
||||
obs_df = observed_pressure_data.copy()
|
||||
observed_name = "observed_pressure.csv"
|
||||
else:
|
||||
obs_df = pd.DataFrame(observed_pressure_data)
|
||||
observed_name = "observed_pressure.csv"
|
||||
|
||||
# 准备 pymoo 问题实例
|
||||
problem = LeakageProblem(
|
||||
self.wn,
|
||||
self.nodes_by_area,
|
||||
self.area_ids,
|
||||
self.sensor_nodes,
|
||||
obs_df,
|
||||
q_sum=self.q_sum,
|
||||
n_workers=n_workers,
|
||||
inp_path=os.path.abspath(self.inp_path),
|
||||
)
|
||||
|
||||
# 配置 pymoo GA 算法
|
||||
n_var = self.num_areas
|
||||
algorithm = GA(
|
||||
pop_size=pop_size,
|
||||
crossover=SBX(prob=0.9, eta=15),
|
||||
mutation=PM(prob=1.0 / max(1, n_var), eta=20),
|
||||
eliminate_duplicates=False,
|
||||
)
|
||||
|
||||
# 终止条件:收敛检测 + 最大代数
|
||||
termination = DefaultSingleObjectiveTermination(
|
||||
ftol=ftol,
|
||||
period=ftol_period,
|
||||
n_max_gen=max_gen,
|
||||
)
|
||||
|
||||
# 回调:记录每代信息
|
||||
callback = _ProgressCallback()
|
||||
|
||||
t0 = time.time()
|
||||
try:
|
||||
res = pymoo_minimize(
|
||||
problem,
|
||||
algorithm,
|
||||
termination,
|
||||
seed=42,
|
||||
verbose=True,
|
||||
callback=callback,
|
||||
)
|
||||
finally:
|
||||
problem.close()
|
||||
elapsed = time.time() - t0
|
||||
|
||||
# 提取最优解
|
||||
best_ind = res.X # 最优个体(漏损比例原始值)
|
||||
best_obj = float(res.F[0])
|
||||
|
||||
# 输出终止信息
|
||||
print(f"\n优化完成。耗时: {elapsed:.1f}s")
|
||||
print(f"总代数: {res.algorithm.n_gen}, 总评估次数: {problem._eval_count}")
|
||||
print(f"最佳目标值: {best_obj:.6f}")
|
||||
|
||||
# 保存到文件
|
||||
effective_ratio_map = self._effective_area_ratios(
|
||||
best_ind,
|
||||
self.area_ids,
|
||||
self.nodes_by_area,
|
||||
allocatable_counts=problem.allocatable_counts,
|
||||
)
|
||||
normalized_ratios = [
|
||||
effective_ratio_map.get(area_id, 0.0) for area_id in self.area_ids
|
||||
]
|
||||
leakage_flow_m3s = [ratio * self.q_sum for ratio in normalized_ratios]
|
||||
leakage_flow_output = [
|
||||
self._flow_from_m3s(value_m3s, output_flow_unit)
|
||||
for value_m3s in leakage_flow_m3s
|
||||
]
|
||||
|
||||
result_df = pd.DataFrame(
|
||||
{
|
||||
"Area": self.area_ids,
|
||||
"LeakageRatioRaw": best_ind,
|
||||
"LeakageRatio": normalized_ratios,
|
||||
"LeakageFlow_m3_per_s": leakage_flow_m3s,
|
||||
f"LeakageFlow_{output_flow_unit.replace('/', '_per_')}": leakage_flow_output,
|
||||
}
|
||||
)
|
||||
result_path = None
|
||||
if save_result:
|
||||
result_path = os.path.join(
|
||||
output_dir, f"identified_leakage_{observed_name}"
|
||||
)
|
||||
result_df.to_csv(result_path, index=False)
|
||||
print(f"结果已保存至 {result_path}")
|
||||
result_df.attrs["result_path"] = result_path
|
||||
return result_df
|
||||
|
||||
|
||||
class _ProgressCallback(Callback):
|
||||
"""每代回调:记录进度。"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.gen_times = []
|
||||
self._t_last = None
|
||||
|
||||
def notify(self, algorithm):
|
||||
now = time.time()
|
||||
if self._t_last is not None:
|
||||
self.gen_times.append(now - self._t_last)
|
||||
self._t_last = now
|
||||
|
||||
|
||||
class LeakageProblem(Problem):
|
||||
"""pymoo 批量评估问题定义。
|
||||
|
||||
搜索空间:n 维 [0, 1] 实数 -> 通过 _effective_area_ratios 归一化到单纯形。
|
||||
目标:模拟压力与观测压力之间的归一化误差范数。
|
||||
无显式约束(sum=1 由归一化自动保证)。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
wn,
|
||||
nodes_by_area,
|
||||
area_ids,
|
||||
sensor_nodes,
|
||||
observed_data,
|
||||
q_sum: float = 0.2,
|
||||
n_workers: int = DEFAULT_N_WORKERS,
|
||||
inp_path: str | None = None,
|
||||
):
|
||||
n_var = len(area_ids)
|
||||
|
||||
super().__init__(
|
||||
n_var=n_var,
|
||||
n_obj=1,
|
||||
n_ieq_constr=0,
|
||||
xl=np.zeros(n_var),
|
||||
xu=np.ones(n_var),
|
||||
)
|
||||
|
||||
self.wn = wn
|
||||
self.nodes_by_area = nodes_by_area
|
||||
self.area_ids = area_ids
|
||||
self.sensor_nodes = sensor_nodes
|
||||
self.q_sum = q_sum
|
||||
self.n_workers = max(1, int(n_workers))
|
||||
self.inp_path = inp_path
|
||||
|
||||
# 预处理观测数据以匹配模拟格式
|
||||
try:
|
||||
missing_sensors = [
|
||||
s for s in self.sensor_nodes if s not in observed_data.columns
|
||||
]
|
||||
if not missing_sensors:
|
||||
self.obs_matrix = observed_data[self.sensor_nodes].values
|
||||
else:
|
||||
self.obs_matrix = observed_data.values[:, : len(self.sensor_nodes)]
|
||||
except Exception:
|
||||
self.obs_matrix = observed_data.values[:, : len(self.sensor_nodes)]
|
||||
|
||||
duration_sec = float(self.wn.options.time.duration)
|
||||
step_sec = float(self.wn.options.time.hydraulic_timestep)
|
||||
if step_sec > 0:
|
||||
max_steps = int(duration_sec / step_sec) + 1
|
||||
self.obs_matrix = self.obs_matrix[:max_steps, :]
|
||||
|
||||
# 预先缓存每个区域的需水对象,减少每次适应度计算的节点查找
|
||||
self.demand_objs_by_area = {}
|
||||
for area_id in self.area_ids:
|
||||
demand_objs = []
|
||||
for node_name in self.nodes_by_area.get(area_id, []):
|
||||
if node_name not in self.wn.node_name_list:
|
||||
continue
|
||||
node = self.wn.get_node(node_name)
|
||||
if (
|
||||
hasattr(node, "demand_timeseries_list")
|
||||
and len(node.demand_timeseries_list) > 0
|
||||
):
|
||||
demand_objs.append(node.demand_timeseries_list[0])
|
||||
self.demand_objs_by_area[area_id] = demand_objs
|
||||
|
||||
self.allocatable_counts = {
|
||||
area_id: len(self.demand_objs_by_area.get(area_id, []))
|
||||
for area_id in self.area_ids
|
||||
}
|
||||
if not any(count > 0 for count in self.allocatable_counts.values()):
|
||||
raise ValueError("没有可分配漏损的有效分区,无法满足漏损总量约束。")
|
||||
|
||||
# 评估计数器(诊断用)
|
||||
self._eval_count = 0
|
||||
self._pool = None
|
||||
if self.n_workers > 1:
|
||||
if not self.inp_path:
|
||||
raise ValueError("并行评估需要提供 inp_path。")
|
||||
duration_sec = float(self.wn.options.time.duration)
|
||||
timestep_sec = float(self.wn.options.time.hydraulic_timestep)
|
||||
self._pool = Pool(
|
||||
processes=self.n_workers,
|
||||
initializer=_worker_init,
|
||||
initargs=(
|
||||
self.inp_path,
|
||||
list(self.sensor_nodes),
|
||||
list(self.area_ids),
|
||||
{k: list(v) for k, v in self.nodes_by_area.items()},
|
||||
self.obs_matrix.copy(),
|
||||
self.q_sum,
|
||||
duration_sec,
|
||||
timestep_sec,
|
||||
),
|
||||
)
|
||||
|
||||
def _evaluate(self, X, out, *args, **kwargs):
|
||||
"""批量评估种群。
|
||||
|
||||
X: 形状 (pop_size, n_var) 的决策变量矩阵。
|
||||
"""
|
||||
n_pop = X.shape[0]
|
||||
self._eval_count += n_pop
|
||||
|
||||
if self._pool is not None:
|
||||
results = self._pool.map(_worker_evaluate, [X[i] for i in range(n_pop)])
|
||||
out["F"] = np.array(results, dtype=float).reshape(-1, 1)
|
||||
return
|
||||
|
||||
F = np.zeros((n_pop, 1))
|
||||
for i in range(n_pop):
|
||||
F[i, 0] = self._evaluate_single(X[i])
|
||||
out["F"] = F
|
||||
|
||||
def _evaluate_single(self, x):
|
||||
"""评估单个个体,返回归一化误差范数。"""
|
||||
leak_ratios = x
|
||||
|
||||
# 将漏损分布归一化
|
||||
effective_ratio_map = LeakageIdentifier._effective_area_ratios(
|
||||
leak_ratios,
|
||||
self.area_ids,
|
||||
self.nodes_by_area,
|
||||
allocatable_counts=self.allocatable_counts,
|
||||
)
|
||||
|
||||
# 跟踪修改以便稍后恢复
|
||||
modifications = []
|
||||
|
||||
for j, area_id in enumerate(self.area_ids):
|
||||
ratio = effective_ratio_map.get(area_id, 0.0)
|
||||
if ratio <= 0:
|
||||
continue
|
||||
|
||||
demand_objs = self.demand_objs_by_area.get(area_id, [])
|
||||
if not demand_objs:
|
||||
continue
|
||||
|
||||
per_node_leak = self.q_sum * ratio / len(demand_objs)
|
||||
|
||||
for demand_obj in demand_objs:
|
||||
original_val = demand_obj.base_value
|
||||
demand_obj.base_value = original_val + per_node_leak
|
||||
modifications.append((demand_obj, original_val))
|
||||
|
||||
# 结果保存在根目录的temp/leakage文件夹中
|
||||
temp_dir = os.path.abspath(os.path.join("temp", "leakage"))
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
prefix = os.path.join(temp_dir, f"temp_{os.getpid()}")
|
||||
|
||||
try:
|
||||
sim = wntr.sim.EpanetSimulator(self.wn)
|
||||
results = sim.run_sim(file_prefix=prefix)
|
||||
sim_pressure = results.node["pressure"].loc[:, self.sensor_nodes]
|
||||
|
||||
n_steps = min(sim_pressure.shape[0], self.obs_matrix.shape[0])
|
||||
sim_vals = sim_pressure.values[:n_steps, :]
|
||||
obs_vals = self.obs_matrix[:n_steps, :]
|
||||
|
||||
diff = sim_vals - obs_vals
|
||||
|
||||
# 按行最大值归一化
|
||||
row_max = np.max(np.abs(diff), axis=1, keepdims=True)
|
||||
row_max[row_max == 0] = 1.0 # 防止除以零
|
||||
|
||||
normalized_diff = diff / row_max
|
||||
|
||||
# 目标:归一化差值矩阵的 2-范数
|
||||
return float(np.linalg.norm(normalized_diff))
|
||||
|
||||
except Exception:
|
||||
return 1e9
|
||||
|
||||
finally:
|
||||
for demand_obj, original_val in modifications:
|
||||
demand_obj.base_value = original_val
|
||||
|
||||
_cleanup_temp_files(prefix)
|
||||
|
||||
def close(self) -> None:
|
||||
if self._pool is not None:
|
||||
self._pool.close()
|
||||
self._pool.join()
|
||||
self._pool = None
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="漏损区域识别")
|
||||
parser.add_argument("--inp", required=True, help=".inp 文件路径")
|
||||
parser.add_argument("--map", help="节点-区域映射 CSV 路径")
|
||||
parser.add_argument("--scada", help="SCADA 压力 CSV 路径 (观测数据)")
|
||||
parser.add_argument("--sensors", help="传感器节点 ID 列表 (逗号分隔)")
|
||||
parser.add_argument("--output", default="Results", help="输出目录")
|
||||
|
||||
parser.add_argument("--pop_size", type=int, default=50, help="种群大小")
|
||||
parser.add_argument("--max_gen", type=int, default=100, help="最大代数")
|
||||
parser.add_argument("--duration", type=float, default=24, help="模拟时长(小时)")
|
||||
parser.add_argument("--q_sum", type=float, default=0.241, help="总漏损流量")
|
||||
parser.add_argument(
|
||||
"--q_sum_unit",
|
||||
default="m3/s",
|
||||
choices=list(LeakageIdentifier.FLOW_UNIT_TO_M3S.keys()),
|
||||
help="q_sum 输入单位(建议与现场习惯一致,内部统一换算为 m3/s)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.map or not args.scada or not args.sensors:
|
||||
parser.error("--map、--scada、--sensors 为必填")
|
||||
|
||||
q_sum_m3s = LeakageIdentifier._flow_to_m3s(args.q_sum, args.q_sum_unit)
|
||||
|
||||
sensors = [sensor.strip() for sensor in args.sensors.split(",") if sensor.strip()]
|
||||
identifier = LeakageIdentifier(
|
||||
args.inp, sensors, args.map, duration=args.duration, q_sum=q_sum_m3s
|
||||
)
|
||||
|
||||
identifier.run_identification(
|
||||
args.scada,
|
||||
args.output,
|
||||
pop_size=args.pop_size,
|
||||
max_gen=args.max_gen,
|
||||
output_flow_unit=args.q_sum_unit,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,91 @@
|
||||
import psycopg
|
||||
|
||||
from app.algorithms.sensor import kmeans as kmeans_sensor
|
||||
from app.algorithms.sensor import sensitivity
|
||||
from app.core.config import get_pgconn_string
|
||||
from app.services.tjnetwork import dump_inp
|
||||
|
||||
|
||||
def pressure_sensor_placement_sensitivity(
|
||||
name: str, scheme_name: str, sensor_number: int, min_diameter: int, username: str
|
||||
) -> None:
|
||||
"""
|
||||
基于改进灵敏度法进行压力监测点优化布置
|
||||
:param name: 数据库名称
|
||||
:param scheme_name: 监测优化布置方案名称
|
||||
:param sensor_number: 传感器数目
|
||||
:param min_diameter: 最小管径
|
||||
:param username: 用户名
|
||||
:return:
|
||||
"""
|
||||
sensor_location = sensitivity.get_ID(
|
||||
name=name, sensor_num=sensor_number, min_diameter=min_diameter
|
||||
)
|
||||
try:
|
||||
conn_string = get_pgconn_string(db_name=name)
|
||||
with psycopg.connect(conn_string) as conn:
|
||||
with conn.cursor() as cur:
|
||||
sql = """
|
||||
INSERT INTO sensor_placement (scheme_name, sensor_number, min_diameter, username, sensor_location)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
cur.execute(
|
||||
sql,
|
||||
(
|
||||
scheme_name,
|
||||
sensor_number,
|
||||
min_diameter,
|
||||
username,
|
||||
sensor_location,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
print("方案信息存储成功!")
|
||||
except Exception as e:
|
||||
print(f"存储方案信息时出错:{e}")
|
||||
|
||||
|
||||
# 2025/08/21
|
||||
# 基于kmeans聚类法进行压力监测点优化布置
|
||||
def pressure_sensor_placement_kmeans(
|
||||
name: str, scheme_name: str, sensor_number: int, min_diameter: int, username: str
|
||||
) -> None:
|
||||
"""
|
||||
基于聚类法进行压力监测点优化布置
|
||||
:param name: 数据库名称(注意,此处数据库名称也是inp文件名称,inp文件与pg库名要一样)
|
||||
:param scheme_name: 监测优化布置方案名称
|
||||
:param sensor_number: 传感器数目
|
||||
:param min_diameter: 最小管径
|
||||
:param username: 用户名
|
||||
:return:
|
||||
"""
|
||||
# dump_inp
|
||||
inp_name = f"./db_inp/{name}.db.inp"
|
||||
dump_inp(name, inp_name, "2")
|
||||
sensor_location = kmeans_sensor.kmeans_sensor_placement(
|
||||
name=name, sensor_num=sensor_number, min_diameter=min_diameter
|
||||
)
|
||||
try:
|
||||
conn_string = get_pgconn_string(db_name=name)
|
||||
with psycopg.connect(conn_string) as conn:
|
||||
with conn.cursor() as cur:
|
||||
sql = """
|
||||
INSERT INTO sensor_placement (scheme_name, sensor_number, min_diameter, username, sensor_location)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
cur.execute(
|
||||
sql,
|
||||
(
|
||||
scheme_name,
|
||||
sensor_number,
|
||||
min_diameter,
|
||||
username,
|
||||
sensor_location,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
print("方案信息存储成功!")
|
||||
except Exception as e:
|
||||
print(f"存储方案信息时出错:{e}")
|
||||
@@ -0,0 +1,109 @@
|
||||
import wntr
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import sklearn.cluster
|
||||
import os
|
||||
|
||||
|
||||
|
||||
class QD_KMeans(object):
|
||||
def __init__(self, wn, num_monitors):
|
||||
# self.inp = inp
|
||||
self.cluster_num = num_monitors # 聚类中心个数,也即测压点个数
|
||||
self.wn=wn
|
||||
self.monitor_nodes = []
|
||||
self.coords = []
|
||||
self.junction_nodes = {} # Added missing initialization
|
||||
|
||||
|
||||
def get_junctions_coordinates(self):
|
||||
|
||||
for junction_name in self.wn.junction_name_list:
|
||||
junction = self.wn.get_node(junction_name)
|
||||
self.junction_nodes[junction_name] = junction.coordinates
|
||||
self.coords.append(junction.coordinates )
|
||||
|
||||
# print(f"Total junctions: {self.junction_coordinates}")
|
||||
|
||||
def select_monitoring_points(self):
|
||||
if not self.coords: # Add check if coordinates are collected
|
||||
self.get_junctions_coordinates()
|
||||
coords = np.array(self.coords)
|
||||
coords_normalized = (coords - coords.min(axis=0)) / (coords.max(axis=0) - coords.min(axis=0))
|
||||
kmeans = sklearn.cluster.KMeans(n_clusters= self.cluster_num, random_state=42)
|
||||
kmeans.fit(coords_normalized)
|
||||
|
||||
for center in kmeans.cluster_centers_:
|
||||
distances = np.sum((coords_normalized - center) ** 2, axis=1)
|
||||
nearest_node = self.wn.junction_name_list[np.argmin(distances)]
|
||||
self.monitor_nodes.append(nearest_node)
|
||||
|
||||
return self.monitor_nodes
|
||||
|
||||
|
||||
def visualize_network(self):
|
||||
"""Visualize network with monitoring points"""
|
||||
ax=wntr.graphics.plot_network(self.wn,
|
||||
node_attribute=self.monitor_nodes,
|
||||
node_size=30,
|
||||
title='Optimal sensor')
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
|
||||
def kmeans_sensor_placement(name: str, sensor_num: int, min_diameter: int) -> list:
|
||||
inp_name = f'./db_inp/{name}.db.inp'
|
||||
wn= wntr.network.WaterNetworkModel(inp_name)
|
||||
wn_cluster=QD_KMeans(wn, sensor_num)
|
||||
|
||||
# Select monitoring pointse
|
||||
sensor_ids= wn_cluster.select_monitoring_points()
|
||||
|
||||
# wn_cluster.visualize_network()
|
||||
|
||||
return sensor_ids
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#sensorindex = get_ID(name='suzhouhe_2024_cloud_0817', sensor_num=30, min_diameter=500)
|
||||
sensorindex = kmeans_sensor_placement(name='szh', sensor_num=50, min_diameter=300)
|
||||
print(sensorindex)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user