使用pymoo实现遗传算法

2026-03-03 16:29:59 +08:00
parent e7a3aec02f
commit d0abad3c65
3 changed files with 197 additions and 184 deletions
@@ -2,17 +2,17 @@ import wntr
 import numpy as np
 import pandas as pd
 import os
 import time
 import argparse
 from typing import Any, List, Dict, Union
-try:
+from pymoo.core.problem import Problem
-    import geatpy as ea
+from pymoo.core.callback import Callback
-
+from pymoo.algorithms.soo.nonconvex.ga import GA
-    _GEATPY_IMPORT_ERROR = None
+from pymoo.operators.crossover.sbx import SBX
-
+from pymoo.operators.mutation.pm import PM
-except Exception as import_error:
+from pymoo.optimize import minimize as pymoo_minimize
-    ea = None
+from pymoo.termination.default import DefaultSingleObjectiveTermination
    _GEATPY_IMPORT_ERROR = import_error
 class LeakageIdentifier:
@@ -175,6 +175,8 @@ class LeakageIdentifier:
        max_gen: int = 100,
        output_flow_unit: str = "m3/s",
        save_result: bool = True,
        ftol: float = 1e-3,
        ftol_period: int = 15,
    ):
        """
        运行遗传算法以识别漏损分布。
@@ -184,19 +186,15 @@ class LeakageIdentifier:
            output_dir: 结果保存目录。
            pop_size: GA 的种群大小。
            max_gen: GA 的最大代数。
            output_flow_unit: 输出漏损流量的单位。
            save_result: 是否保存识别结果到本地 CSV。
            ftol: 目标值收敛容差（连续 ftol_period 代改善 < ftol 则停止）。
            ftol_period: 收敛检测的窗口代数。
        """
        if ea is None:
            raise ImportError(
                "geatpy 无法导入，无法运行识别。请安装兼容版本或修复依赖。"
            ) from _GEATPY_IMPORT_ERROR
        if save_result:
            os.makedirs(output_dir, exist_ok=True)
        # 加载观测数据
        # 假设观测数据格式：行=时间，列=传感器（或原始格式）
        # 原始代码: scadapd = pd.read_csv(scada); Rs = scadapd.values[1 : Rlen + 1, :]
        # 我们需要将其标准化。假设带有标题的标准 CSV 或匹配原始格式。
        if isinstance(observed_pressure_data, str):
            obs_df = pd.read_csv(observed_pressure_data)
            observed_name = os.path.basename(observed_pressure_data)
@@ -206,11 +204,8 @@ class LeakageIdentifier:
        else:
            obs_df = pd.DataFrame(observed_pressure_data)
            observed_name = "observed_pressure.csv"
        # 提取特定传感器和时间步长的观测压力
        # 这部分需要与数据存储方式对齐。
        # 对于此重构，我们将遵循读取对应模拟步骤值的原始逻辑。
-        # 准备问题实例
+        # 准备 pymoo 问题实例
        problem = LeakageProblem(
            self.wn,
            self.nodes_by_area,
@@ -220,30 +215,44 @@ class LeakageIdentifier:
            q_sum=self.q_sum,
        )
-        # 配置算法
+        # 配置 pymoo GA 算法
-        algorithm = ea.soea_SEGA_templet(
+        n_var = self.num_areas
-            problem, ea.Population(Encoding="RI", NIND=pop_size)
+        algorithm = GA(
            pop_size=pop_size,
            crossover=SBX(prob=0.9, eta=15),
            mutation=PM(prob=1.0 / max(1, n_var), eta=20),
            eliminate_duplicates=False,
        )
        algorithm.MAXGEN = max_gen
        algorithm.mutOper.Pm = 0.5  # 变异概率 (for Real Encoding, this is usually probability per individual or variable)
        algorithm.recOper.XOVR = 0.9  # 交叉概率
        algorithm.logTras = 1  # 每代记录一次
-        # 运行
+        # 终止条件：收敛检测 + 最大代数
-        res = ea.optimize(
+        termination = DefaultSingleObjectiveTermination(
            ftol=ftol,
            period=ftol_period,
            n_max_gen=max_gen,
        )
        # 回调：记录每代信息
        callback = _ProgressCallback()
        t0 = time.time()
        res = pymoo_minimize(
            problem,
            algorithm,
            termination,
            seed=42,
            verbose=True,
-            drawing=0,
+            callback=callback,
            outputMsg=True,
            drawLog=False,
            saveFlag=False,
        )
        elapsed = time.time() - t0
-        # 保存结果
+        # 提取最优解
-        best_ind = res["Vars"][0]  # 最优个体（漏损比例）
+        best_ind = res.X  # 最优个体（漏损比例原始值）
-        best_obj = res["ObjV"][0][0]  # 最优目标函数值
+        best_obj = float(res.F[0])
-        print(f"优化完成。最佳目标值: {best_obj}")
+        # 输出终止信息
        print(f"\n优化完成。耗时: {elapsed:.1f}s")
        print(f"总代数: {res.algorithm.n_gen}, 总评估次数: {problem._eval_count}")
        print(f"最佳目标值: {best_obj:.6f}")
        # 保存到文件
        effective_ratio_map = self._effective_area_ratios(
@@ -281,9 +290,29 @@ class LeakageIdentifier:
        return result_df
-if ea is not None:
+class _ProgressCallback(Callback):
    """每代回调：记录进度。"""
    def __init__(self):
        super().__init__()
        self.gen_times = []
        self._t_last = None
    def notify(self, algorithm):
        now = time.time()
        if self._t_last is not None:
            self.gen_times.append(now - self._t_last)
        self._t_last = now
 class LeakageProblem(Problem):
    """pymoo 批量评估问题定义。
    搜索空间：n 维 [0, 1] 实数 -> 通过 _effective_area_ratios 归一化到单纯形。
    目标：模拟压力与观测压力之间的归一化误差范数。
    无显式约束（sum=1 由归一化自动保证）。
    """
    class LeakageProblem(ea.Problem):
    def __init__(
        self,
        wn,
@@ -291,19 +320,17 @@ if ea is not None:
        area_ids,
        sensor_nodes,
        observed_data,
-            q_sum,
+        q_sum: float = 0.2,
    ):
-            name = "LeakageIdentification"
+        n_var = len(area_ids)
            M = 1  # 单目标
            maxormins = [1]  # 最小化
            Dim = len(area_ids)  # 维度 = 区域数量
            varTypes = [0] * Dim  # 连续变量
            lb = [0] * Dim  # 下界 0
            ub = [1] * Dim  # 上界 1
            lbin = [1] * Dim  # 包含下界
            ubin = [1] * Dim  # 包含上界
-            super().__init__(name, M, maxormins, Dim, varTypes, lb, ub, lbin, ubin)
+        super().__init__(
            n_var=n_var,
            n_obj=1,
            n_ieq_constr=0,
            xl=np.zeros(n_var),
            xu=np.ones(n_var),
        )
        self.wn = wn
        self.nodes_by_area = nodes_by_area
@@ -312,16 +339,13 @@ if ea is not None:
        self.q_sum = q_sum
        # 预处理观测数据以匹配模拟格式
            # 提取对应于传感器节点的列
        try:
                # 检查列是否存在
            missing_sensors = [
                s for s in self.sensor_nodes if s not in observed_data.columns
            ]
            if not missing_sensors:
                self.obs_matrix = observed_data[self.sensor_nodes].values
            else:
                    # 回退：如果标题不匹配，假设列顺序与传感器节点一致
                self.obs_matrix = observed_data.values[:, : len(self.sensor_nodes)]
        except Exception:
            self.obs_matrix = observed_data.values[:, : len(self.sensor_nodes)]
@@ -354,15 +378,27 @@ if ea is not None:
        if not any(count > 0 for count in self.allocatable_counts.values()):
            raise ValueError("没有可分配漏损的有效分区，无法满足漏损总量约束。")
-        def aimFunc(self, pop):
+        # 评估计数器（诊断用）
-            Vars = pop.Phen  # 种群表现型（实数值）
+        self._eval_count = 0
            NIND = Vars.shape[0]
            ObjV = np.zeros((NIND, 1))
-            for i in range(NIND):
+    def _evaluate(self, X, out, *args, **kwargs):
-                leak_ratios = Vars[i, :]
+        """批量评估种群。
-                # 1. 将漏损分布应用到模型
+        X: 形状 (pop_size, n_var) 的决策变量矩阵。
        """
        n_pop = X.shape[0]
        self._eval_count += n_pop
        F = np.zeros((n_pop, 1))
        for i in range(n_pop):
            F[i, 0] = self._evaluate_single(X[i])
        out["F"] = F
    def _evaluate_single(self, x):
        """评估单个个体，返回归一化误差范数。"""
        leak_ratios = x
        # 将漏损分布归一化
        effective_ratio_map = LeakageIdentifier._effective_area_ratios(
            leak_ratios,
            self.area_ids,
@@ -370,8 +406,8 @@ if ea is not None:
            allocatable_counts=self.allocatable_counts,
        )
-                # 此时跟踪修改以便稍后恢复
+        # 跟踪修改以便稍后恢复
-                modifications = []  # (node_obj, original_base_demand) 列表
+        modifications = []
        for j, area_id in enumerate(self.area_ids):
            ratio = effective_ratio_map.get(area_id, 0.0)
@@ -382,7 +418,6 @@ if ea is not None:
            if not demand_objs:
                continue
                    # 将漏损分配给区域内的节点
            per_node_leak = self.q_sum * ratio / len(demand_objs)
            for demand_obj in demand_objs:
@@ -390,60 +425,34 @@ if ea is not None:
                demand_obj.base_value = original_val + per_node_leak
                modifications.append((demand_obj, original_val))
                # 2. 运行模拟
        try:
            sim = wntr.sim.EpanetSimulator(self.wn)
            results = sim.run_sim()
                    # 3. 计算目标函数（误差）
            sim_pressure = results.node["pressure"].loc[:, self.sensor_nodes]
                    # 对齐维度
                    # 仅比较重叠的时间步长
            n_steps = min(sim_pressure.shape[0], self.obs_matrix.shape[0])
            sim_vals = sim_pressure.values[:n_steps, :]
            obs_vals = self.obs_matrix[:n_steps, :]
                    # 差值
            diff = sim_vals - obs_vals
-                    # 按行最大值归一化（根据原始代码逻辑）
+            # 按行最大值归一化
                    # R1 = R0 - Rs
                    # Rmax = R1.max(axis=1) -> 该时间步长的最大绝对差值？
                    # 原始代码: Rmax = R1.max(axis=1) (该时间步长所有传感器的最大值)
                    # 注意：如果最大差值为 0 或负数，原始代码逻辑可能有缺陷，使用绝对最大值更安全
                    # Rmax = Rmax.reshape(-1, 1)
                    # R = R1 / Rmax
                    # 计算每个时间步长的最大差值
            row_max = np.max(np.abs(diff), axis=1, keepdims=True)
            row_max[row_max == 0] = 1.0  # 防止除以零
            normalized_diff = diff / row_max
            # 目标：归一化差值矩阵的 2-范数
-                    error = np.linalg.norm(normalized_diff)
+            return float(np.linalg.norm(normalized_diff))
                    ObjV[i] = error
        except Exception:
-                    ObjV[i] = 1e9  # 失败时给予高惩罚
+            return 1e9
-                # 4. 恢复模型更改
+        finally:
            for demand_obj, original_val in modifications:
                demand_obj.base_value = original_val
            pop.ObjV = ObjV
            pass
 else:
    class LeakageProblem:
        def __init__(self, *args, **kwargs):
            raise ImportError(
                "geatpy 无法导入，LeakageProblem 不可用。"
            ) from _GEATPY_IMPORT_ERROR
 def main() -> int:
    parser = argparse.ArgumentParser(description="漏损区域识别")
@@ -1,13 +1,16 @@
 from .database import *
 from .s0_base import *
 def get_scheme_schema(name: str) -> dict[str, dict[Any, Any]]:
-    return { 'id'         : {'type': 'str'      , 'optional': False , 'readonly': True },
+    return {
-             'name'       : {'type': 'str'      , 'optional': False , 'readonly': False},
+        "id": {"type": "str", "optional": False, "readonly": True},
-             'type'       : {'type': 'str'      , 'optional': False , 'readonly': False},
+        "name": {"type": "str", "optional": False, "readonly": False},
-             "create_time": {'type': 'str'      , 'optional': False , 'readonly': True },
+        "type": {"type": "str", "optional": False, "readonly": False},
-             "start_time" : {'type': 'str'      , 'optional': False , 'readonly': True },
+        "create_time": {"type": "str", "optional": False, "readonly": True},
-             "detail"     : {'type': 'str'      , 'optional': False , 'readonly': True } }
+        "start_time": {"type": "str", "optional": False, "readonly": True},
        "detail": {"type": "str", "optional": False, "readonly": True},
    }
 def get_scheme(name: str, schema_name: str) -> dict[Any, Any]:
@@ -16,15 +19,15 @@ def get_scheme(name: str, schema_name: str) -> dict[Any, Any]:
        return {}
    d = {}
-    d['id'] = str(t['scheme_id'])
+    d["id"] = str(t["scheme_id"])
-    d['name'] = str(t['scheme_name'])
+    d["name"] = str(t["scheme_name"])
-    d['type'] = str(t['scheme_type'])
+    d["type"] = str(t["scheme_type"])
-    d['create_time'] = str(t['create_time'])
+    d["create_time"] = str(t["create_time"])
-    d['start_time'] = str(t['start_time'])
+    d["start_time"] = str(t["start_time"])
-    d['detail'] = str(t['detail'])
+    d["detail"] = str(t["detail"])
    return d
 def get_all_schemes(name: str) -> list[dict[Any, Any]]:
    return read_all(name, "select * from scheme_list")
@@ -165,3 +165,4 @@ wntr==1.3.2
 wrapt==1.17.3
 zipp==3.23.0
 zmq==0.0.0
 pymoo==0.6.1.6