import math import os from collections import deque from datetime import datetime from typing import Any import numpy as np import pandas as pd import wntr from app.algorithms.leakage.identifier import LeakageIdentifier from app.infra.db.timescaledb.internal_queries import InternalQueries from app.services.scheme_management import ( query_leakage_identify_scheme_detail, query_leakage_identify_schemes, scheme_name_exists, store_leakage_identify_result, store_scheme_info, ) from app.services.tjnetwork import ( dump_inp, get_all_scada_info, get_network_link_nodes, get_network_node_coords, ) from app.services.time_api import extract_date, parse_utc_time, utc_now DEFAULT_N_WORKERS = max(1, min((os.cpu_count() or 1) - 1, 4)) def run_leakage_identification( network: str, username: str, observed_pressure_data: ( str | pd.DataFrame | dict[str, list[Any]] | list[dict[str, Any]] | None ) = None, start_time: float = 0, duration: float = 24, timestep: float = 5, q_sum: float = 0.2, q_sum_unit: str = "m3/s", output_dir: str = "db_inp", pop_size: int = 50, max_gen: int = 100, n_workers: int = DEFAULT_N_WORKERS, output_flow_unit: str = "m3/s", dma_count: int | None = None, scada_start: datetime | str | None = None, scada_end: datetime | str | None = None, sensor_nodes: list[str] | None = None, scheme_name: str | None = None, ) -> dict[str, Any]: os.makedirs(output_dir, exist_ok=True) inp_path = _prepare_leakage_inp(network) selected_sensor_nodes = ( list(dict.fromkeys([node for node in (sensor_nodes or []) if node])) if sensor_nodes else _get_pressure_sensor_nodes(network) ) if not selected_sensor_nodes: raise ValueError("未提供有效传感器节点,且系统未识别到可用压力传感器。") area_map, areas, node_coords = _build_area_map_by_topology( network, selected_sensor_nodes, dma_count ) observed_source = "request_payload" if scada_start is not None or scada_end is not None: observed_df = _build_observed_pressure_from_scada( network=network, sensor_nodes=selected_sensor_nodes, scada_start=scada_start, scada_end=scada_end, ) observed_source = "backend_timerange" else: if observed_pressure_data is None: raise ValueError( "未提供 observed_pressure_data,且未提供 scada_start/scada_end。" ) observed_df = observed_pressure_data q_sum_m3s = LeakageIdentifier._flow_to_m3s(q_sum, q_sum_unit) identifier = LeakageIdentifier( inp_path=inp_path, sensor_nodes=selected_sensor_nodes, area_map=area_map, start_time=start_time, duration=duration, timestep=timestep, q_sum=q_sum_m3s, ) result_df = identifier.run_identification( observed_pressure_data=observed_df, output_dir=output_dir, pop_size=pop_size, max_gen=max_gen, n_workers=n_workers, output_flow_unit=output_flow_unit, save_result=False, ) rows = result_df.to_dict(orient="records") # node_visual_payload = _build_node_visual_payload(area_map, node_coords, rows) # drawing_payload = _build_drawing_payload(node_visual_payload) payload = { "result_path": result_df.attrs.get("result_path"), "sensor_nodes": selected_sensor_nodes, "observed_source": observed_source, "area_count": len(set(area_map.values())), "node_area_map": area_map, "areas": areas, # "node_visual_payload": node_visual_payload, # "drawing_payload": drawing_payload, "rows": rows, } if scheme_name: if scheme_name_exists(network, scheme_name): raise ValueError(f"方案名称已存在: {scheme_name}") scheme_start_time = ( _to_datetime(scada_start).isoformat() if scada_start is not None else utc_now().isoformat() ) scheme_detail = { "network": network, "dma_count": dma_count, "sensor_nodes": selected_sensor_nodes, "scada_start": ( _to_datetime(scada_start).isoformat() if scada_start is not None else None ), "scada_end": ( _to_datetime(scada_end).isoformat() if scada_end is not None else None ), "algorithm_params": { "start_time": start_time, "duration": duration, "timestep": timestep, "q_sum": q_sum, "q_sum_unit": q_sum_unit, "output_flow_unit": output_flow_unit, "pop_size": pop_size, "max_gen": max_gen, "n_workers": n_workers, }, "result_summary": { "area_count": len(set(area_map.values())), "max_leakage": max( (float(row.get("LeakageFlow_m3_per_s", 0.0)) for row in rows), default=0.0, ), }, } store_scheme_info( name=network, scheme_name=scheme_name, scheme_type="dma_leak_identification", username=username, scheme_start_time=scheme_start_time, scheme_detail=scheme_detail, ) store_leakage_identify_result( name=network, scheme_name=scheme_name, network=network, sensor_nodes=selected_sensor_nodes, result_rows=rows, node_area_map=area_map, areas=areas, drawing_payload={}, ) payload["scheme_name"] = scheme_name return payload def list_leakage_identify_schemes( network: str, query_date: datetime | str | None = None ) -> list[dict[str, Any]]: parsed_date = extract_date(query_date, field_name="query_date") if query_date is not None else None return query_leakage_identify_schemes( name=network, network=network, query_date=parsed_date ) def get_leakage_identify_scheme_detail( network: str, scheme_name: str ) -> dict[str, Any]: result = query_leakage_identify_scheme_detail(network, scheme_name) if not result: raise ValueError(f"未找到漏损识别方案: {scheme_name}") return result def _get_pressure_sensor_nodes(network: str) -> list[str]: scada_info = get_all_scada_info(network) sensor_nodes: list[str] = [] for item in scada_info: scada_type = str(item.get("type", "")).lower() if scada_type != "pressure": continue node_id = item.get("associated_element_id") if isinstance(node_id, str) and node_id: sensor_nodes.append(node_id) sensor_nodes = list(dict.fromkeys(sensor_nodes)) if not sensor_nodes: raise ValueError("未找到压力传感器对应节点(scada_info.type=pressure)。") return sensor_nodes def _build_area_map_by_topology( network: str, sensor_nodes: list[str], dma_count: int | None ) -> tuple[dict[str, str], list[dict[str, Any]], dict[str, dict[str, float]]]: node_coords = get_network_node_coords(network) all_nodes = list(node_coords.keys()) if not all_nodes: raise ValueError("管网中未获取到可分区节点。") available_sensors = [node for node in sensor_nodes if node in node_coords] if not available_sensors: raise ValueError("无可用压力传感器,无法生成虚拟分区。") area_count = _resolve_dma_count(dma_count, available_sensors, all_nodes) sensor_area_map = _cluster_sensors_to_areas( available_sensors, node_coords, area_count ) adjacency = _build_adjacency(network, all_nodes) distance_by_sensor = { sensor: _bfs_distances(adjacency, sensor) for sensor in available_sensors } assignment_count = {sensor: 0 for sensor in available_sensors} area_map: dict[str, str] = {} for node_id in sorted(all_nodes): sensor = _choose_sensor_for_node( node_id=node_id, sensors=available_sensors, node_coords=node_coords, distance_by_sensor=distance_by_sensor, assignment_count=assignment_count, ) assignment_count[sensor] += 1 area_map[node_id] = sensor_area_map[sensor] if not area_map: raise ValueError("虚拟分区结果为空,无法生成节点区域映射。") areas = _build_area_meta(area_map, sensor_area_map) return area_map, areas, node_coords def _resolve_dma_count( dma_count: int | None, sensor_nodes: list[str], all_nodes: list[str] ) -> int: if dma_count is None: return min(len(sensor_nodes), len(all_nodes)) if dma_count <= 0: raise ValueError("dma_count 必须大于 0。") if dma_count > len(all_nodes): raise ValueError("dma_count 不能大于可分区节点数量。") if dma_count > len(sensor_nodes): raise ValueError("dma_count 不能大于可用传感器数量。") return dma_count def _cluster_sensors_to_areas( sensor_nodes: list[str], node_coords: dict[str, dict[str, float]], area_count: int ) -> dict[str, str]: if area_count >= len(sensor_nodes): return {sensor: str(i + 1) for i, sensor in enumerate(sensor_nodes)} points = np.array( [ [float(node_coords[s]["x"]), float(node_coords[s]["y"])] for s in sensor_nodes ], dtype=float, ) centers = points[:area_count].copy() labels = np.zeros(points.shape[0], dtype=int) for _ in range(20): d2 = ((points[:, None, :] - centers[None, :, :]) ** 2).sum(axis=2) new_labels = d2.argmin(axis=1) if np.array_equal(labels, new_labels): break labels = new_labels for i in range(area_count): cluster_points = points[labels == i] if cluster_points.size > 0: centers[i] = cluster_points.mean(axis=0) return { sensor: str(int(labels[idx]) + 1) for idx, sensor in enumerate(sensor_nodes) } def _build_adjacency(network: str, all_nodes: list[str]) -> dict[str, set[str]]: adjacency: dict[str, set[str]] = {node: set() for node in all_nodes} for link in get_network_link_nodes(network): parts = str(link).split(":") if len(parts) < 4: continue node1, node2 = parts[-2], parts[-1] if node1 in adjacency and node2 in adjacency: adjacency[node1].add(node2) adjacency[node2].add(node1) return adjacency def _bfs_distances(adjacency: dict[str, set[str]], start: str) -> dict[str, int]: distances: dict[str, int] = {start: 0} queue: deque[str] = deque([start]) while queue: node = queue.popleft() for neighbor in adjacency.get(node, set()): if neighbor in distances: continue distances[neighbor] = distances[node] + 1 queue.append(neighbor) return distances def _choose_sensor_for_node( node_id: str, sensors: list[str], node_coords: dict[str, dict[str, float]], distance_by_sensor: dict[str, dict[str, int]], assignment_count: dict[str, int], ) -> str: min_distance = None candidates: list[str] = [] for sensor in sensors: d = distance_by_sensor.get(sensor, {}).get(node_id) if d is None: continue if min_distance is None or d < min_distance: min_distance = d candidates = [sensor] elif d == min_distance: candidates.append(sensor) if not candidates: node_coord = node_coords[node_id] return min( sensors, key=lambda sensor: _euclidean_distance( node_coord, node_coords.get(sensor, node_coord) ), ) return min(candidates, key=lambda sensor: (assignment_count[sensor], sensor)) def _euclidean_distance(a: dict[str, float], b: dict[str, float]) -> float: return math.hypot(float(a["x"]) - float(b["x"]), float(a["y"]) - float(b["y"])) def _build_area_meta( area_map: dict[str, str], sensor_area_map: dict[str, str] ) -> list[dict[str, Any]]: nodes_by_area: dict[str, list[str]] = {} for node_id, area_id in area_map.items(): nodes_by_area.setdefault(area_id, []).append(node_id) sensors_by_area: dict[str, list[str]] = {} for sensor, area_id in sensor_area_map.items(): sensors_by_area.setdefault(area_id, []).append(sensor) areas: list[dict[str, Any]] = [] for area_id in sorted(nodes_by_area.keys(), key=lambda x: int(x)): node_ids = sorted(nodes_by_area.get(area_id, [])) sensor_nodes = sorted(sensors_by_area.get(area_id, [])) areas.append( { "area_id": area_id, "sensor_nodes": sensor_nodes, "node_ids": node_ids, "node_count": len(node_ids), } ) return areas def _build_area_node_map(area_map: dict[str, str]) -> dict[str, list[str]]: area_node_map: dict[str, list[str]] = {} for node_id, area_id in area_map.items(): area_node_map.setdefault(area_id, []).append(node_id) for area_id in list(area_node_map.keys()): area_node_map[area_id] = sorted(area_node_map[area_id]) return area_node_map def _build_node_visual_payload( area_map: dict[str, str], node_coords: dict[str, dict[str, float]], rows: list[dict[str, Any]], ) -> dict[str, Any]: area_leakage_map = _build_area_leakage_map(rows) max_leakage = max(area_leakage_map.values(), default=0.0) features: list[dict[str, Any]] = [] for node_id, area_id in area_map.items(): coord = node_coords.get(node_id) if not coord: continue leakage_flow = float(area_leakage_map.get(area_id, 0.0)) leakage_level = _classify_leakage_level(leakage_flow, max_leakage) features.append( { "type": "Feature", "properties": { "node_id": node_id, "area_id": area_id, "leakage_flow_m3_per_s": leakage_flow, "leakage_level": leakage_level, }, "geometry": { "type": "Point", "coordinates": [float(coord["x"]), float(coord["y"])], }, } ) return {"type": "FeatureCollection", "features": features} def _build_area_leakage_map(rows: list[dict[str, Any]]) -> dict[str, float]: area_leakage_map: dict[str, float] = {} for row in rows: area_id = str(row.get("Area", "")).strip() if not area_id: continue area_leakage_map[area_id] = float(row.get("LeakageFlow_m3_per_s", 0.0)) return area_leakage_map def _classify_leakage_level(leakage_flow: float, max_leakage: float) -> str: if max_leakage <= 0: return "normal" ratio = leakage_flow / max_leakage if ratio >= 0.75: return "high" if ratio >= 0.4: return "medium" if ratio > 0: return "low" return "normal" def _build_drawing_payload(node_visual_payload: dict[str, Any]) -> dict[str, Any]: return node_visual_payload def _build_observed_pressure_from_scada( network: str, sensor_nodes: list[str], scada_start: datetime | str | None, scada_end: datetime | str | None, ) -> pd.DataFrame: if scada_start is None or scada_end is None: raise ValueError("使用后端 SCADA 查询时必须同时提供 scada_start 与 scada_end。") start_dt = _to_datetime(scada_start) end_dt = _to_datetime(scada_end) if start_dt >= end_dt: raise ValueError("SCADA 时间窗非法:scada_start 必须早于 scada_end。") node_query_id: dict[str, str] = {} for item in get_all_scada_info(network): if str(item.get("type", "")).lower() != "pressure": continue node_id = item.get("associated_element_id") query_id = item.get("api_query_id") if ( isinstance(node_id, str) and node_id and isinstance(query_id, str) and query_id ): node_query_id[node_id] = query_id query_ids = [node_query_id[node] for node in sensor_nodes if node in node_query_id] if not query_ids: raise ValueError("未找到可用于压力观测的 SCADA api_query_id。") scada_data = InternalQueries.query_scada_by_ids_timerange( db_name=network, device_ids=query_ids, start_time=start_dt.isoformat(), end_time=end_dt.isoformat(), ) available_lengths = [ len(scada_data.get(query_id, [])) for query_id in query_ids if len(scada_data.get(query_id, [])) > 0 ] if not available_lengths: raise ValueError("指定时间窗内未查询到压力 SCADA 数据。") min_len = min(available_lengths) obs_df = pd.DataFrame() for node_id in sensor_nodes: query_id = node_query_id.get(node_id) if not query_id: continue records = scada_data.get(query_id, [])[:min_len] if len(records) < min_len: continue obs_df[node_id] = [float(item["value"]) for item in records] if obs_df.empty: raise ValueError("SCADA 压力数据无法构建观测矩阵。") return obs_df def _to_datetime(value: datetime | str) -> datetime: return parse_utc_time(value) def _prepare_leakage_inp(network: str) -> str: project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) db_inp_dir = os.path.join(project_root, "db_inp") os.makedirs(db_inp_dir, exist_ok=True) inp_path = os.path.join(db_inp_dir, f"{network}.leakage.inp") if os.path.isfile(inp_path) and os.path.getsize(inp_path) > 0: return inp_path dump_inp(network, inp_path, "2") if not os.path.isfile(inp_path) or os.path.getsize(inp_path) <= 0: raise ValueError(f"漏损识别 INP 文件无效: {inp_path}") return inp_path