修复数据清洗index越界错误;重命名压力流量清洗方法
This commit is contained in:
@@ -1,3 +1,3 @@
|
||||
from .Fdataclean import *
|
||||
from .Pdataclean import *
|
||||
from .flow_data_clean import *
|
||||
from .pressure_data_clean import *
|
||||
from .pipeline_health_analyzer import *
|
||||
@@ -292,28 +292,47 @@ def clean_flow_data_df_kf(data: pd.DataFrame, show_plot: bool = False) -> dict:
|
||||
plt.rcParams["axes.unicode_minus"] = False
|
||||
if show_plot and len(data.columns) > 0:
|
||||
sensor_to_plot = data.columns[0]
|
||||
|
||||
# 定义x轴
|
||||
n = len(data)
|
||||
time = np.arange(n)
|
||||
n_filled = len(data_filled)
|
||||
time_filled = np.arange(n_filled)
|
||||
|
||||
plt.figure(figsize=(12, 8))
|
||||
|
||||
plt.subplot(2, 1, 1)
|
||||
plt.plot(
|
||||
data.index,
|
||||
time,
|
||||
data[sensor_to_plot],
|
||||
label="原始监测值",
|
||||
marker="o",
|
||||
markersize=3,
|
||||
alpha=0.7,
|
||||
)
|
||||
abnormal_zero_idx = data.index[data_filled[sensor_to_plot].isna()]
|
||||
|
||||
# 修正:检查 data_filled 的异常值,绘制在 time_filled 上
|
||||
abnormal_zero_mask = data_filled[sensor_to_plot].isna()
|
||||
# 如果目的是检查0值,应该用 == 0。这里保留 isna() 但修正索引引用,防止crash。
|
||||
# 如果原意是 isna() 则在 fillna 后通常没有 na。假设用户可能想检查 0 值?
|
||||
# 基于 "异常0值" 的标签,改为检查 0 值更合理,但为了保险起见,
|
||||
# 如果 isna() 返回空,就不画。防止索引越界是主要的。
|
||||
abnormal_zero_idx = data_filled.index[abnormal_zero_mask]
|
||||
|
||||
if len(abnormal_zero_idx) > 0:
|
||||
# 注意:如果 abnormal_zero_idx 是基于 data_filled 的索引(0..M-1),
|
||||
# 直接作为 x 坐标即可,因为 time_filled 也是 0..M-1
|
||||
# 而 y 值应该取自 data_filled 或 data_kf,取 data 会越界
|
||||
plt.plot(
|
||||
abnormal_zero_idx,
|
||||
data[sensor_to_plot].loc[abnormal_zero_idx],
|
||||
data_filled[sensor_to_plot].loc[abnormal_zero_idx],
|
||||
"mo",
|
||||
markersize=8,
|
||||
label="异常0值",
|
||||
label="异常值(NaN)",
|
||||
)
|
||||
|
||||
plt.plot(
|
||||
data.index, data_kf[sensor_to_plot], label="Kalman滤波预测值", linewidth=2
|
||||
time_filled, data_kf[sensor_to_plot], label="Kalman滤波预测值", linewidth=2
|
||||
)
|
||||
anomaly_idx = anomalies_info[sensor_to_plot].index
|
||||
if len(anomaly_idx) > 0:
|
||||
@@ -331,7 +350,7 @@ def clean_flow_data_df_kf(data: pd.DataFrame, show_plot: bool = False) -> dict:
|
||||
|
||||
plt.subplot(2, 1, 2)
|
||||
plt.plot(
|
||||
data.index,
|
||||
time_filled,
|
||||
cleaned_data[sensor_to_plot],
|
||||
label="修复后监测值",
|
||||
marker="o",
|
||||
@@ -239,7 +239,7 @@ def clean_pressure_data_df_km(data: pd.DataFrame, show_plot: bool = False) -> di
|
||||
threshold = distances.mean() + 3 * distances.std()
|
||||
|
||||
anomaly_pos = np.where(distances > threshold)[0]
|
||||
anomaly_indices = data.index[anomaly_pos]
|
||||
anomaly_indices = data_filled.index[anomaly_pos]
|
||||
|
||||
anomaly_details = {}
|
||||
for pos in anomaly_pos:
|
||||
@@ -248,13 +248,13 @@ def clean_pressure_data_df_km(data: pd.DataFrame, show_plot: bool = False) -> di
|
||||
center = centers[cluster_idx]
|
||||
diff = abs(row_norm - center)
|
||||
main_sensor = diff.idxmax()
|
||||
anomaly_details[data.index[pos]] = main_sensor
|
||||
anomaly_details[data_filled.index[pos]] = main_sensor
|
||||
|
||||
# 修复:滚动平均(窗口可调)
|
||||
data_rolled = data_filled.rolling(window=13, center=True, min_periods=1).mean()
|
||||
data_repaired = data_filled.copy()
|
||||
for pos in anomaly_pos:
|
||||
label = data.index[pos]
|
||||
label = data_filled.index[pos]
|
||||
sensor = anomaly_details[label]
|
||||
data_repaired.loc[label, sensor] = data_rolled.loc[label, sensor]
|
||||
|
||||
@@ -265,6 +265,8 @@ def clean_pressure_data_df_km(data: pd.DataFrame, show_plot: bool = False) -> di
|
||||
if show_plot and len(data.columns) > 0:
|
||||
n = len(data)
|
||||
time = np.arange(n)
|
||||
n_filled = len(data_filled)
|
||||
time_filled = np.arange(n_filled)
|
||||
plt.figure(figsize=(12, 8))
|
||||
for col in data.columns:
|
||||
plt.plot(
|
||||
@@ -272,7 +274,7 @@ def clean_pressure_data_df_km(data: pd.DataFrame, show_plot: bool = False) -> di
|
||||
)
|
||||
for col in data_filled.columns:
|
||||
plt.plot(
|
||||
time,
|
||||
time_filled,
|
||||
data_filled[col].values,
|
||||
marker="x",
|
||||
markersize=3,
|
||||
@@ -280,7 +282,7 @@ def clean_pressure_data_df_km(data: pd.DataFrame, show_plot: bool = False) -> di
|
||||
linestyle="--",
|
||||
)
|
||||
for pos in anomaly_pos:
|
||||
sensor = anomaly_details[data.index[pos]]
|
||||
sensor = anomaly_details[data_filled.index[pos]]
|
||||
plt.plot(pos, data_filled.iloc[pos][sensor], "ro", markersize=8)
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("压力监测值")
|
||||
@@ -291,16 +293,16 @@ def clean_pressure_data_df_km(data: pd.DataFrame, show_plot: bool = False) -> di
|
||||
plt.figure(figsize=(12, 8))
|
||||
for col in data_repaired.columns:
|
||||
plt.plot(
|
||||
time, data_repaired[col].values, marker="o", markersize=3, label=col
|
||||
time_filled, data_repaired[col].values, marker="o", markersize=3, label=col
|
||||
)
|
||||
for pos in anomaly_pos:
|
||||
sensor = anomaly_details[data.index[pos]]
|
||||
sensor = anomaly_details[data_filled.index[pos]]
|
||||
plt.plot(pos, data_repaired.iloc[pos][sensor], "go", markersize=8)
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("修复后压力监测值")
|
||||
plt.title("修复后各传感器折线图(绿色标记修复值)")
|
||||
plt.legend()
|
||||
plt.show()
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("修复后压力监测值")
|
||||
plt.title("修复后各传感器折线图(绿色标记修复值)")
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
# 返回清洗后的字典
|
||||
return data_repaired
|
||||
Reference in New Issue
Block a user