使用pymoo实现遗传算法

2026-03-03 16:29:59 +08:00
parent e7a3aec02f
commit d0abad3c65
3 changed files with 197 additions and 184 deletions
@@ -2,17 +2,17 @@ import wntr
 import numpy as np
 import pandas as pd
 import os
 import time
 import argparse
 from typing import Any, List, Dict, Union
-try:
+from pymoo.core.problem import Problem
-    import geatpy as ea
+from pymoo.core.callback import Callback
-
+from pymoo.algorithms.soo.nonconvex.ga import GA
-    _GEATPY_IMPORT_ERROR = None
+from pymoo.operators.crossover.sbx import SBX
-
+from pymoo.operators.mutation.pm import PM
-except Exception as import_error:
+from pymoo.optimize import minimize as pymoo_minimize
-    ea = None
+from pymoo.termination.default import DefaultSingleObjectiveTermination
    _GEATPY_IMPORT_ERROR = import_error
 class LeakageIdentifier:
@@ -175,6 +175,8 @@ class LeakageIdentifier:
        max_gen: int = 100,
        output_flow_unit: str = "m3/s",
        save_result: bool = True,
        ftol: float = 1e-3,
        ftol_period: int = 15,
    ):
        """
        运行遗传算法以识别漏损分布。
@@ -184,19 +186,15 @@ class LeakageIdentifier:
            output_dir: 结果保存目录。
            pop_size: GA 的种群大小。
            max_gen: GA 的最大代数。
            output_flow_unit: 输出漏损流量的单位。
            save_result: 是否保存识别结果到本地 CSV。
            ftol: 目标值收敛容差（连续 ftol_period 代改善 < ftol 则停止）。
            ftol_period: 收敛检测的窗口代数。
        """
        if ea is None:
            raise ImportError(
                "geatpy 无法导入，无法运行识别。请安装兼容版本或修复依赖。"
            ) from _GEATPY_IMPORT_ERROR
        if save_result:
            os.makedirs(output_dir, exist_ok=True)
        # 加载观测数据
        # 假设观测数据格式：行=时间，列=传感器（或原始格式）
        # 原始代码: scadapd = pd.read_csv(scada); Rs = scadapd.values[1 : Rlen + 1, :]
        # 我们需要将其标准化。假设带有标题的标准 CSV 或匹配原始格式。
        if isinstance(observed_pressure_data, str):
            obs_df = pd.read_csv(observed_pressure_data)
            observed_name = os.path.basename(observed_pressure_data)
@@ -206,11 +204,8 @@ class LeakageIdentifier:
        else:
            obs_df = pd.DataFrame(observed_pressure_data)
            observed_name = "observed_pressure.csv"
        # 提取特定传感器和时间步长的观测压力
        # 这部分需要与数据存储方式对齐。
        # 对于此重构，我们将遵循读取对应模拟步骤值的原始逻辑。
-        # 准备问题实例
+        # 准备 pymoo 问题实例
        problem = LeakageProblem(
            self.wn,
            self.nodes_by_area,
@@ -220,30 +215,44 @@ class LeakageIdentifier:
            q_sum=self.q_sum,
        )
-        # 配置算法
+        # 配置 pymoo GA 算法
-        algorithm = ea.soea_SEGA_templet(
+        n_var = self.num_areas
-            problem, ea.Population(Encoding="RI", NIND=pop_size)
+        algorithm = GA(
            pop_size=pop_size,
            crossover=SBX(prob=0.9, eta=15),
            mutation=PM(prob=1.0 / max(1, n_var), eta=20),
            eliminate_duplicates=False,
        )
        algorithm.MAXGEN = max_gen
        algorithm.mutOper.Pm = 0.5  # 变异概率 (for Real Encoding, this is usually probability per individual or variable)
        algorithm.recOper.XOVR = 0.9  # 交叉概率
        algorithm.logTras = 1  # 每代记录一次
-        # 运行
+        # 终止条件：收敛检测 + 最大代数
-        res = ea.optimize(
+        termination = DefaultSingleObjectiveTermination(
            ftol=ftol,
            period=ftol_period,
            n_max_gen=max_gen,
        )
        # 回调：记录每代信息
        callback = _ProgressCallback()
        t0 = time.time()
        res = pymoo_minimize(
            problem,
            algorithm,
            termination,
            seed=42,
            verbose=True,
-            drawing=0,
+            callback=callback,
            outputMsg=True,
            drawLog=False,
            saveFlag=False,
        )
        elapsed = time.time() - t0
-        # 保存结果
+        # 提取最优解
-        best_ind = res["Vars"][0]  # 最优个体（漏损比例）
+        best_ind = res.X  # 最优个体（漏损比例原始值）
-        best_obj = res["ObjV"][0][0]  # 最优目标函数值
+        best_obj = float(res.F[0])
-        print(f"优化完成。最佳目标值: {best_obj}")
+        # 输出终止信息
        print(f"\n优化完成。耗时: {elapsed:.1f}s")
        print(f"总代数: {res.algorithm.n_gen}, 总评估次数: {problem._eval_count}")
        print(f"最佳目标值: {best_obj:.6f}")
        # 保存到文件
        effective_ratio_map = self._effective_area_ratios(
@@ -281,168 +290,168 @@ class LeakageIdentifier:
        return result_df
-if ea is not None:
+class _ProgressCallback(Callback):
    """每代回调：记录进度。"""
-    class LeakageProblem(ea.Problem):
+    def __init__(self):
-        def __init__(
+        super().__init__()
-            self,
+        self.gen_times = []
-            wn,
+        self._t_last = None
            nodes_by_area,
            area_ids,
            sensor_nodes,
            observed_data,
            q_sum,
        ):
            name = "LeakageIdentification"
            M = 1  # 单目标
            maxormins = [1]  # 最小化
            Dim = len(area_ids)  # 维度 = 区域数量
            varTypes = [0] * Dim  # 连续变量
            lb = [0] * Dim  # 下界 0
            ub = [1] * Dim  # 上界 1
            lbin = [1] * Dim  # 包含下界
            ubin = [1] * Dim  # 包含上界
-            super().__init__(name, M, maxormins, Dim, varTypes, lb, ub, lbin, ubin)
+    def notify(self, algorithm):
        now = time.time()
        if self._t_last is not None:
            self.gen_times.append(now - self._t_last)
        self._t_last = now
            self.wn = wn
            self.nodes_by_area = nodes_by_area
            self.area_ids = area_ids
            self.sensor_nodes = sensor_nodes
            self.q_sum = q_sum
-            # 预处理观测数据以匹配模拟格式
+class LeakageProblem(Problem):
-            # 提取对应于传感器节点的列
+    """pymoo 批量评估问题定义。
-            try:
+
-                # 检查列是否存在
+    搜索空间：n 维 [0, 1] 实数 -> 通过 _effective_area_ratios 归一化到单纯形。
-                missing_sensors = [
+    目标：模拟压力与观测压力之间的归一化误差范数。
-                    s for s in self.sensor_nodes if s not in observed_data.columns
+    无显式约束（sum=1 由归一化自动保证）。
-                ]
+    """
-                if not missing_sensors:
+
-                    self.obs_matrix = observed_data[self.sensor_nodes].values
+    def __init__(
-                else:
+        self,
-                    # 回退：如果标题不匹配，假设列顺序与传感器节点一致
+        wn,
-                    self.obs_matrix = observed_data.values[:, : len(self.sensor_nodes)]
+        nodes_by_area,
-            except Exception:
+        area_ids,
        sensor_nodes,
        observed_data,
        q_sum: float = 0.2,
    ):
        n_var = len(area_ids)
        super().__init__(
            n_var=n_var,
            n_obj=1,
            n_ieq_constr=0,
            xl=np.zeros(n_var),
            xu=np.ones(n_var),
        )
        self.wn = wn
        self.nodes_by_area = nodes_by_area
        self.area_ids = area_ids
        self.sensor_nodes = sensor_nodes
        self.q_sum = q_sum
        # 预处理观测数据以匹配模拟格式
        try:
            missing_sensors = [
                s for s in self.sensor_nodes if s not in observed_data.columns
            ]
            if not missing_sensors:
                self.obs_matrix = observed_data[self.sensor_nodes].values
            else:
                self.obs_matrix = observed_data.values[:, : len(self.sensor_nodes)]
        except Exception:
            self.obs_matrix = observed_data.values[:, : len(self.sensor_nodes)]
-            duration_sec = float(self.wn.options.time.duration)
+        duration_sec = float(self.wn.options.time.duration)
-            step_sec = float(self.wn.options.time.hydraulic_timestep)
+        step_sec = float(self.wn.options.time.hydraulic_timestep)
-            if step_sec > 0:
+        if step_sec > 0:
-                max_steps = int(duration_sec / step_sec) + 1
+            max_steps = int(duration_sec / step_sec) + 1
-                self.obs_matrix = self.obs_matrix[:max_steps, :]
+            self.obs_matrix = self.obs_matrix[:max_steps, :]
-            # 预先缓存每个区域的需水对象，减少每次适应度计算的节点查找
+        # 预先缓存每个区域的需水对象，减少每次适应度计算的节点查找
-            self.demand_objs_by_area = {}
+        self.demand_objs_by_area = {}
-            for area_id in self.area_ids:
+        for area_id in self.area_ids:
-                demand_objs = []
+            demand_objs = []
-                for node_name in self.nodes_by_area.get(area_id, []):
+            for node_name in self.nodes_by_area.get(area_id, []):
-                    if node_name not in self.wn.node_name_list:
+                if node_name not in self.wn.node_name_list:
-                        continue
+                    continue
-                    node = self.wn.get_node(node_name)
+                node = self.wn.get_node(node_name)
-                    if (
+                if (
-                        hasattr(node, "demand_timeseries_list")
+                    hasattr(node, "demand_timeseries_list")
-                        and len(node.demand_timeseries_list) > 0
+                    and len(node.demand_timeseries_list) > 0
-                    ):
+                ):
-                        demand_objs.append(node.demand_timeseries_list[0])
+                    demand_objs.append(node.demand_timeseries_list[0])
-                self.demand_objs_by_area[area_id] = demand_objs
+            self.demand_objs_by_area[area_id] = demand_objs
-            self.allocatable_counts = {
+        self.allocatable_counts = {
-                area_id: len(self.demand_objs_by_area.get(area_id, []))
+            area_id: len(self.demand_objs_by_area.get(area_id, []))
-                for area_id in self.area_ids
+            for area_id in self.area_ids
-            }
+        }
-            if not any(count > 0 for count in self.allocatable_counts.values()):
+        if not any(count > 0 for count in self.allocatable_counts.values()):
-                raise ValueError("没有可分配漏损的有效分区，无法满足漏损总量约束。")
+            raise ValueError("没有可分配漏损的有效分区，无法满足漏损总量约束。")
-        def aimFunc(self, pop):
+        # 评估计数器（诊断用）
-            Vars = pop.Phen  # 种群表现型（实数值）
+        self._eval_count = 0
            NIND = Vars.shape[0]
            ObjV = np.zeros((NIND, 1))
-            for i in range(NIND):
+    def _evaluate(self, X, out, *args, **kwargs):
-                leak_ratios = Vars[i, :]
+        """批量评估种群。
-                # 1. 将漏损分布应用到模型
+        X: 形状 (pop_size, n_var) 的决策变量矩阵。
-                effective_ratio_map = LeakageIdentifier._effective_area_ratios(
+        """
-                    leak_ratios,
+        n_pop = X.shape[0]
-                    self.area_ids,
+        self._eval_count += n_pop
                    self.nodes_by_area,
                    allocatable_counts=self.allocatable_counts,
                )
-                # 此时跟踪修改以便稍后恢复
+        F = np.zeros((n_pop, 1))
-                modifications = []  # (node_obj, original_base_demand) 列表
+        for i in range(n_pop):
            F[i, 0] = self._evaluate_single(X[i])
        out["F"] = F
-                for j, area_id in enumerate(self.area_ids):
+    def _evaluate_single(self, x):
-                    ratio = effective_ratio_map.get(area_id, 0.0)
+        """评估单个个体，返回归一化误差范数。"""
-                    if ratio <= 0:
+        leak_ratios = x
                        continue
-                    demand_objs = self.demand_objs_by_area.get(area_id, [])
+        # 将漏损分布归一化
-                    if not demand_objs:
+        effective_ratio_map = LeakageIdentifier._effective_area_ratios(
-                        continue
+            leak_ratios,
            self.area_ids,
            self.nodes_by_area,
            allocatable_counts=self.allocatable_counts,
        )
-                    # 将漏损分配给区域内的节点
+        # 跟踪修改以便稍后恢复
-                    per_node_leak = self.q_sum * ratio / len(demand_objs)
+        modifications = []
-                    for demand_obj in demand_objs:
+        for j, area_id in enumerate(self.area_ids):
-                        original_val = demand_obj.base_value
+            ratio = effective_ratio_map.get(area_id, 0.0)
-                        demand_obj.base_value = original_val + per_node_leak
+            if ratio <= 0:
-                        modifications.append((demand_obj, original_val))
+                continue
-                # 2. 运行模拟
+            demand_objs = self.demand_objs_by_area.get(area_id, [])
-                try:
+            if not demand_objs:
-                    sim = wntr.sim.EpanetSimulator(self.wn)
+                continue
                    results = sim.run_sim()
-                    # 3. 计算目标函数（误差）
+            per_node_leak = self.q_sum * ratio / len(demand_objs)
                    sim_pressure = results.node["pressure"].loc[:, self.sensor_nodes]
-                    # 对齐维度
+            for demand_obj in demand_objs:
-                    # 仅比较重叠的时间步长
+                original_val = demand_obj.base_value
-                    n_steps = min(sim_pressure.shape[0], self.obs_matrix.shape[0])
+                demand_obj.base_value = original_val + per_node_leak
-                    sim_vals = sim_pressure.values[:n_steps, :]
+                modifications.append((demand_obj, original_val))
                    obs_vals = self.obs_matrix[:n_steps, :]
-                    # 差值
+        try:
-                    diff = sim_vals - obs_vals
+            sim = wntr.sim.EpanetSimulator(self.wn)
            results = sim.run_sim()
-                    # 按行最大值归一化（根据原始代码逻辑）
+            sim_pressure = results.node["pressure"].loc[:, self.sensor_nodes]
                    # R1 = R0 - Rs
                    # Rmax = R1.max(axis=1) -> 该时间步长的最大绝对差值？
                    # 原始代码: Rmax = R1.max(axis=1) (该时间步长所有传感器的最大值)
                    # 注意：如果最大差值为 0 或负数，原始代码逻辑可能有缺陷，使用绝对最大值更安全
                    # Rmax = Rmax.reshape(-1, 1)
                    # R = R1 / Rmax
-                    # 计算每个时间步长的最大差值
+            n_steps = min(sim_pressure.shape[0], self.obs_matrix.shape[0])
-                    row_max = np.max(np.abs(diff), axis=1, keepdims=True)
+            sim_vals = sim_pressure.values[:n_steps, :]
-                    row_max[row_max == 0] = 1.0  # 防止除以零
+            obs_vals = self.obs_matrix[:n_steps, :]
-                    normalized_diff = diff / row_max
+            diff = sim_vals - obs_vals
-                    # 目标：归一化差值矩阵的 2-范数
+            # 按行最大值归一化
-                    error = np.linalg.norm(normalized_diff)
+            row_max = np.max(np.abs(diff), axis=1, keepdims=True)
-                    ObjV[i] = error
+            row_max[row_max == 0] = 1.0  # 防止除以零
-                except Exception:
+            normalized_diff = diff / row_max
                    ObjV[i] = 1e9  # 失败时给予高惩罚
-                # 4. 恢复模型更改
+            # 目标：归一化差值矩阵的 2-范数
-                for demand_obj, original_val in modifications:
+            return float(np.linalg.norm(normalized_diff))
                    demand_obj.base_value = original_val
-            pop.ObjV = ObjV
+        except Exception:
            return 1e9
-            pass
+        finally:
-
+            for demand_obj, original_val in modifications:
-else:
+                demand_obj.base_value = original_val
    class LeakageProblem:
        def __init__(self, *args, **kwargs):
            raise ImportError(
                "geatpy 无法导入，LeakageProblem 不可用。"
            ) from _GEATPY_IMPORT_ERROR
 def main() -> int:
@@ -1,14 +1,17 @@
 from .database import *
 from .s0_base import *
 def get_scheme_schema(name: str) -> dict[str, dict[Any, Any]]:
-    return { 'id'         : {'type': 'str'      , 'optional': False , 'readonly': True },
+    return {
-             'name'       : {'type': 'str'      , 'optional': False , 'readonly': False},
+        "id": {"type": "str", "optional": False, "readonly": True},
-             'type'       : {'type': 'str'      , 'optional': False , 'readonly': False},
+        "name": {"type": "str", "optional": False, "readonly": False},
-             "create_time": {'type': 'str'      , 'optional': False , 'readonly': True },
+        "type": {"type": "str", "optional": False, "readonly": False},
-             "start_time" : {'type': 'str'      , 'optional': False , 'readonly': True },
+        "create_time": {"type": "str", "optional": False, "readonly": True},
-             "detail"     : {'type': 'str'      , 'optional': False , 'readonly': True } }
+        "start_time": {"type": "str", "optional": False, "readonly": True},
-             
+        "detail": {"type": "str", "optional": False, "readonly": True},
    }
 def get_scheme(name: str, schema_name: str) -> dict[Any, Any]:
    t = try_read(name, f"select * from scheme_list where scheme_name = '{schema_name}'")
@@ -16,15 +19,15 @@ def get_scheme(name: str, schema_name: str) -> dict[Any, Any]:
        return {}
    d = {}
-    d['id'] = str(t['scheme_id'])
+    d["id"] = str(t["scheme_id"])
-    d['name'] = str(t['scheme_name'])
+    d["name"] = str(t["scheme_name"])
-    d['type'] = str(t['scheme_type'])
+    d["type"] = str(t["scheme_type"])
-    d['create_time'] = str(t['create_time'])
+    d["create_time"] = str(t["create_time"])
-    d['start_time'] = str(t['start_time'])
+    d["start_time"] = str(t["start_time"])
-    d['detail'] = str(t['detail'])
+    d["detail"] = str(t["detail"])
    return d
 def get_all_schemes(name: str) -> list[dict[Any, Any]]:
    return read_all(name, "select * from scheme_list")
@@ -165,3 +165,4 @@ wntr==1.3.2
 wrapt==1.17.3
 zipp==3.23.0
 zmq==0.0.0
 pymoo==0.6.1.6