使用pymoo实现遗传算法

This commit is contained in:
2026-03-03 16:29:59 +08:00
parent e7a3aec02f
commit d0abad3c65
3 changed files with 197 additions and 184 deletions
+179 -170
View File
@@ -2,17 +2,17 @@ import wntr
import numpy as np
import pandas as pd
import os
import time
import argparse
from typing import Any, List, Dict, Union
try:
import geatpy as ea
_GEATPY_IMPORT_ERROR = None
except Exception as import_error:
ea = None
_GEATPY_IMPORT_ERROR = import_error
from pymoo.core.problem import Problem
from pymoo.core.callback import Callback
from pymoo.algorithms.soo.nonconvex.ga import GA
from pymoo.operators.crossover.sbx import SBX
from pymoo.operators.mutation.pm import PM
from pymoo.optimize import minimize as pymoo_minimize
from pymoo.termination.default import DefaultSingleObjectiveTermination
class LeakageIdentifier:
@@ -175,6 +175,8 @@ class LeakageIdentifier:
max_gen: int = 100,
output_flow_unit: str = "m3/s",
save_result: bool = True,
ftol: float = 1e-3,
ftol_period: int = 15,
):
"""
运行遗传算法以识别漏损分布。
@@ -184,19 +186,15 @@ class LeakageIdentifier:
output_dir: 结果保存目录。
pop_size: GA 的种群大小。
max_gen: GA 的最大代数。
output_flow_unit: 输出漏损流量的单位。
save_result: 是否保存识别结果到本地 CSV。
ftol: 目标值收敛容差(连续 ftol_period 代改善 < ftol 则停止)。
ftol_period: 收敛检测的窗口代数。
"""
if ea is None:
raise ImportError(
"geatpy 无法导入,无法运行识别。请安装兼容版本或修复依赖。"
) from _GEATPY_IMPORT_ERROR
if save_result:
os.makedirs(output_dir, exist_ok=True)
# 加载观测数据
# 假设观测数据格式:行=时间,列=传感器(或原始格式)
# 原始代码: scadapd = pd.read_csv(scada); Rs = scadapd.values[1 : Rlen + 1, :]
# 我们需要将其标准化。假设带有标题的标准 CSV 或匹配原始格式。
if isinstance(observed_pressure_data, str):
obs_df = pd.read_csv(observed_pressure_data)
observed_name = os.path.basename(observed_pressure_data)
@@ -206,11 +204,8 @@ class LeakageIdentifier:
else:
obs_df = pd.DataFrame(observed_pressure_data)
observed_name = "observed_pressure.csv"
# 提取特定传感器和时间步长的观测压力
# 这部分需要与数据存储方式对齐。
# 对于此重构,我们将遵循读取对应模拟步骤值的原始逻辑。
# 准备问题实例
# 准备 pymoo 问题实例
problem = LeakageProblem(
self.wn,
self.nodes_by_area,
@@ -220,30 +215,44 @@ class LeakageIdentifier:
q_sum=self.q_sum,
)
# 配置算法
algorithm = ea.soea_SEGA_templet(
problem, ea.Population(Encoding="RI", NIND=pop_size)
# 配置 pymoo GA 算法
n_var = self.num_areas
algorithm = GA(
pop_size=pop_size,
crossover=SBX(prob=0.9, eta=15),
mutation=PM(prob=1.0 / max(1, n_var), eta=20),
eliminate_duplicates=False,
)
algorithm.MAXGEN = max_gen
algorithm.mutOper.Pm = 0.5 # 变异概率 (for Real Encoding, this is usually probability per individual or variable)
algorithm.recOper.XOVR = 0.9 # 交叉概率
algorithm.logTras = 1 # 每代记录一次
# 运行
res = ea.optimize(
# 终止条件:收敛检测 + 最大代数
termination = DefaultSingleObjectiveTermination(
ftol=ftol,
period=ftol_period,
n_max_gen=max_gen,
)
# 回调:记录每代信息
callback = _ProgressCallback()
t0 = time.time()
res = pymoo_minimize(
problem,
algorithm,
termination,
seed=42,
verbose=True,
drawing=0,
outputMsg=True,
drawLog=False,
saveFlag=False,
callback=callback,
)
elapsed = time.time() - t0
# 保存结果
best_ind = res["Vars"][0] # 最优个体(漏损比例)
best_obj = res["ObjV"][0][0] # 最优目标函数值
# 提取最优解
best_ind = res.X # 最优个体(漏损比例原始值
best_obj = float(res.F[0])
print(f"优化完成。最佳目标值: {best_obj}")
# 输出终止信息
print(f"\n优化完成。耗时: {elapsed:.1f}s")
print(f"总代数: {res.algorithm.n_gen}, 总评估次数: {problem._eval_count}")
print(f"最佳目标值: {best_obj:.6f}")
# 保存到文件
effective_ratio_map = self._effective_area_ratios(
@@ -281,168 +290,168 @@ class LeakageIdentifier:
return result_df
if ea is not None:
class _ProgressCallback(Callback):
"""每代回调:记录进度。"""
class LeakageProblem(ea.Problem):
def __init__(
self,
wn,
nodes_by_area,
area_ids,
sensor_nodes,
observed_data,
q_sum,
):
name = "LeakageIdentification"
M = 1 # 单目标
maxormins = [1] # 最小化
Dim = len(area_ids) # 维度 = 区域数量
varTypes = [0] * Dim # 连续变量
lb = [0] * Dim # 下界 0
ub = [1] * Dim # 上界 1
lbin = [1] * Dim # 包含下界
ubin = [1] * Dim # 包含上界
def __init__(self):
super().__init__()
self.gen_times = []
self._t_last = None
super().__init__(name, M, maxormins, Dim, varTypes, lb, ub, lbin, ubin)
def notify(self, algorithm):
now = time.time()
if self._t_last is not None:
self.gen_times.append(now - self._t_last)
self._t_last = now
self.wn = wn
self.nodes_by_area = nodes_by_area
self.area_ids = area_ids
self.sensor_nodes = sensor_nodes
self.q_sum = q_sum
# 预处理观测数据以匹配模拟格式
# 提取对应于传感器节点的列
try:
# 检查列是否存在
missing_sensors = [
s for s in self.sensor_nodes if s not in observed_data.columns
]
if not missing_sensors:
self.obs_matrix = observed_data[self.sensor_nodes].values
else:
# 回退:如果标题不匹配,假设列顺序与传感器节点一致
self.obs_matrix = observed_data.values[:, : len(self.sensor_nodes)]
except Exception:
class LeakageProblem(Problem):
"""pymoo 批量评估问题定义。
搜索空间:n 维 [0, 1] 实数 -> 通过 _effective_area_ratios 归一化到单纯形。
目标:模拟压力与观测压力之间的归一化误差范数。
无显式约束(sum=1 由归一化自动保证)。
"""
def __init__(
self,
wn,
nodes_by_area,
area_ids,
sensor_nodes,
observed_data,
q_sum: float = 0.2,
):
n_var = len(area_ids)
super().__init__(
n_var=n_var,
n_obj=1,
n_ieq_constr=0,
xl=np.zeros(n_var),
xu=np.ones(n_var),
)
self.wn = wn
self.nodes_by_area = nodes_by_area
self.area_ids = area_ids
self.sensor_nodes = sensor_nodes
self.q_sum = q_sum
# 预处理观测数据以匹配模拟格式
try:
missing_sensors = [
s for s in self.sensor_nodes if s not in observed_data.columns
]
if not missing_sensors:
self.obs_matrix = observed_data[self.sensor_nodes].values
else:
self.obs_matrix = observed_data.values[:, : len(self.sensor_nodes)]
except Exception:
self.obs_matrix = observed_data.values[:, : len(self.sensor_nodes)]
duration_sec = float(self.wn.options.time.duration)
step_sec = float(self.wn.options.time.hydraulic_timestep)
if step_sec > 0:
max_steps = int(duration_sec / step_sec) + 1
self.obs_matrix = self.obs_matrix[:max_steps, :]
duration_sec = float(self.wn.options.time.duration)
step_sec = float(self.wn.options.time.hydraulic_timestep)
if step_sec > 0:
max_steps = int(duration_sec / step_sec) + 1
self.obs_matrix = self.obs_matrix[:max_steps, :]
# 预先缓存每个区域的需水对象,减少每次适应度计算的节点查找
self.demand_objs_by_area = {}
for area_id in self.area_ids:
demand_objs = []
for node_name in self.nodes_by_area.get(area_id, []):
if node_name not in self.wn.node_name_list:
continue
node = self.wn.get_node(node_name)
if (
hasattr(node, "demand_timeseries_list")
and len(node.demand_timeseries_list) > 0
):
demand_objs.append(node.demand_timeseries_list[0])
self.demand_objs_by_area[area_id] = demand_objs
# 预先缓存每个区域的需水对象,减少每次适应度计算的节点查找
self.demand_objs_by_area = {}
for area_id in self.area_ids:
demand_objs = []
for node_name in self.nodes_by_area.get(area_id, []):
if node_name not in self.wn.node_name_list:
continue
node = self.wn.get_node(node_name)
if (
hasattr(node, "demand_timeseries_list")
and len(node.demand_timeseries_list) > 0
):
demand_objs.append(node.demand_timeseries_list[0])
self.demand_objs_by_area[area_id] = demand_objs
self.allocatable_counts = {
area_id: len(self.demand_objs_by_area.get(area_id, []))
for area_id in self.area_ids
}
if not any(count > 0 for count in self.allocatable_counts.values()):
raise ValueError("没有可分配漏损的有效分区,无法满足漏损总量约束。")
self.allocatable_counts = {
area_id: len(self.demand_objs_by_area.get(area_id, []))
for area_id in self.area_ids
}
if not any(count > 0 for count in self.allocatable_counts.values()):
raise ValueError("没有可分配漏损的有效分区,无法满足漏损总量约束。")
def aimFunc(self, pop):
Vars = pop.Phen # 种群表现型(实数值)
NIND = Vars.shape[0]
ObjV = np.zeros((NIND, 1))
# 评估计数器(诊断用)
self._eval_count = 0
for i in range(NIND):
leak_ratios = Vars[i, :]
def _evaluate(self, X, out, *args, **kwargs):
"""批量评估种群。
# 1. 将漏损分布应用到模型
effective_ratio_map = LeakageIdentifier._effective_area_ratios(
leak_ratios,
self.area_ids,
self.nodes_by_area,
allocatable_counts=self.allocatable_counts,
)
X: 形状 (pop_size, n_var) 的决策变量矩阵。
"""
n_pop = X.shape[0]
self._eval_count += n_pop
# 此时跟踪修改以便稍后恢复
modifications = [] # (node_obj, original_base_demand) 列表
F = np.zeros((n_pop, 1))
for i in range(n_pop):
F[i, 0] = self._evaluate_single(X[i])
out["F"] = F
for j, area_id in enumerate(self.area_ids):
ratio = effective_ratio_map.get(area_id, 0.0)
if ratio <= 0:
continue
def _evaluate_single(self, x):
"""评估单个个体,返回归一化误差范数。"""
leak_ratios = x
demand_objs = self.demand_objs_by_area.get(area_id, [])
if not demand_objs:
continue
# 将漏损分布归一化
effective_ratio_map = LeakageIdentifier._effective_area_ratios(
leak_ratios,
self.area_ids,
self.nodes_by_area,
allocatable_counts=self.allocatable_counts,
)
# 将漏损分配给区域内的节点
per_node_leak = self.q_sum * ratio / len(demand_objs)
# 跟踪修改以便稍后恢复
modifications = []
for demand_obj in demand_objs:
original_val = demand_obj.base_value
demand_obj.base_value = original_val + per_node_leak
modifications.append((demand_obj, original_val))
for j, area_id in enumerate(self.area_ids):
ratio = effective_ratio_map.get(area_id, 0.0)
if ratio <= 0:
continue
# 2. 运行模拟
try:
sim = wntr.sim.EpanetSimulator(self.wn)
results = sim.run_sim()
demand_objs = self.demand_objs_by_area.get(area_id, [])
if not demand_objs:
continue
# 3. 计算目标函数(误差)
sim_pressure = results.node["pressure"].loc[:, self.sensor_nodes]
per_node_leak = self.q_sum * ratio / len(demand_objs)
# 对齐维度
# 仅比较重叠的时间步长
n_steps = min(sim_pressure.shape[0], self.obs_matrix.shape[0])
sim_vals = sim_pressure.values[:n_steps, :]
obs_vals = self.obs_matrix[:n_steps, :]
for demand_obj in demand_objs:
original_val = demand_obj.base_value
demand_obj.base_value = original_val + per_node_leak
modifications.append((demand_obj, original_val))
# 差值
diff = sim_vals - obs_vals
try:
sim = wntr.sim.EpanetSimulator(self.wn)
results = sim.run_sim()
# 按行最大值归一化(根据原始代码逻辑)
# R1 = R0 - Rs
# Rmax = R1.max(axis=1) -> 该时间步长的最大绝对差值?
# 原始代码: Rmax = R1.max(axis=1) (该时间步长所有传感器的最大值)
# 注意:如果最大差值为 0 或负数,原始代码逻辑可能有缺陷,使用绝对最大值更安全
# Rmax = Rmax.reshape(-1, 1)
# R = R1 / Rmax
sim_pressure = results.node["pressure"].loc[:, self.sensor_nodes]
# 计算每个时间步长的最大差值
row_max = np.max(np.abs(diff), axis=1, keepdims=True)
row_max[row_max == 0] = 1.0 # 防止除以零
n_steps = min(sim_pressure.shape[0], self.obs_matrix.shape[0])
sim_vals = sim_pressure.values[:n_steps, :]
obs_vals = self.obs_matrix[:n_steps, :]
normalized_diff = diff / row_max
diff = sim_vals - obs_vals
# 目标:归一化差值矩阵的 2-范数
error = np.linalg.norm(normalized_diff)
ObjV[i] = error
# 按行最大值归一化
row_max = np.max(np.abs(diff), axis=1, keepdims=True)
row_max[row_max == 0] = 1.0 # 防止除以零
except Exception:
ObjV[i] = 1e9 # 失败时给予高惩罚
normalized_diff = diff / row_max
# 4. 恢复模型更改
for demand_obj, original_val in modifications:
demand_obj.base_value = original_val
# 目标:归一化差值矩阵的 2-范数
return float(np.linalg.norm(normalized_diff))
pop.ObjV = ObjV
except Exception:
return 1e9
pass
else:
class LeakageProblem:
def __init__(self, *args, **kwargs):
raise ImportError(
"geatpy 无法导入,LeakageProblem 不可用。"
) from _GEATPY_IMPORT_ERROR
finally:
for demand_obj, original_val in modifications:
demand_obj.base_value = original_val
def main() -> int:
+17 -14
View File
@@ -1,14 +1,17 @@
from .database import *
from .s0_base import *
def get_scheme_schema(name: str) -> dict[str, dict[Any, Any]]:
return { 'id' : {'type': 'str' , 'optional': False , 'readonly': True },
'name' : {'type': 'str' , 'optional': False , 'readonly': False},
'type' : {'type': 'str' , 'optional': False , 'readonly': False},
"create_time": {'type': 'str' , 'optional': False , 'readonly': True },
"start_time" : {'type': 'str' , 'optional': False , 'readonly': True },
"detail" : {'type': 'str' , 'optional': False , 'readonly': True } }
return {
"id": {"type": "str", "optional": False, "readonly": True},
"name": {"type": "str", "optional": False, "readonly": False},
"type": {"type": "str", "optional": False, "readonly": False},
"create_time": {"type": "str", "optional": False, "readonly": True},
"start_time": {"type": "str", "optional": False, "readonly": True},
"detail": {"type": "str", "optional": False, "readonly": True},
}
def get_scheme(name: str, schema_name: str) -> dict[Any, Any]:
t = try_read(name, f"select * from scheme_list where scheme_name = '{schema_name}'")
@@ -16,15 +19,15 @@ def get_scheme(name: str, schema_name: str) -> dict[Any, Any]:
return {}
d = {}
d['id'] = str(t['scheme_id'])
d['name'] = str(t['scheme_name'])
d['type'] = str(t['scheme_type'])
d['create_time'] = str(t['create_time'])
d['start_time'] = str(t['start_time'])
d['detail'] = str(t['detail'])
d["id"] = str(t["scheme_id"])
d["name"] = str(t["scheme_name"])
d["type"] = str(t["scheme_type"])
d["create_time"] = str(t["create_time"])
d["start_time"] = str(t["start_time"])
d["detail"] = str(t["detail"])
return d
def get_all_schemes(name: str) -> list[dict[Any, Any]]:
return read_all(name, "select * from scheme_list")
+1
View File
@@ -165,3 +165,4 @@ wntr==1.3.2
wrapt==1.17.3
zipp==3.23.0
zmq==0.0.0
pymoo==0.6.1.6