新增压力、流量数据清洗前的对异常值0的预处理方法
This commit is contained in:
@@ -105,8 +105,15 @@ def clean_pressure_data_dict_km(data_dict: dict, show_plot: bool = False) -> dic
|
||||
data = pd.DataFrame(data_dict)
|
||||
# 填充NaN值
|
||||
data = data.ffill().bfill()
|
||||
# 标准化
|
||||
data_norm = (data - data.mean()) / data.std()
|
||||
# 异常值预处理
|
||||
# 将0值替换为NaN,然后用线性插值填充
|
||||
data_filled = data.replace(0, np.nan)
|
||||
data_filled = data_filled.interpolate(method="linear", limit_direction="both")
|
||||
# 如果仍有NaN(全为0的列),用前后值填充
|
||||
data_filled = data_filled.ffill().bfill()
|
||||
|
||||
# 标准化(使用填充后的数据)
|
||||
data_norm = (data_filled - data_filled.mean()) / data_filled.std()
|
||||
|
||||
# 聚类与异常检测
|
||||
k = 3
|
||||
@@ -130,46 +137,59 @@ def clean_pressure_data_dict_km(data_dict: dict, show_plot: bool = False) -> dic
|
||||
anomaly_details[data.index[pos]] = main_sensor
|
||||
|
||||
# 修复:滚动平均(窗口可调)
|
||||
data_rolled = data.rolling(window=13, center=True, min_periods=1).mean()
|
||||
data_repaired = data.copy()
|
||||
data_rolled = data_filled.rolling(window=13, center=True, min_periods=1).mean()
|
||||
data_repaired = data_filled.copy()
|
||||
for pos in anomaly_pos:
|
||||
label = data.index[pos]
|
||||
sensor = anomaly_details[label]
|
||||
data_repaired.loc[label, sensor] = data_rolled.loc[label, sensor]
|
||||
|
||||
# 可选可视化(使用位置作为 x 轴)
|
||||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||||
plt.rcParams['axes.unicode_minus'] = False
|
||||
plt.rcParams["font.sans-serif"] = ["SimHei"]
|
||||
plt.rcParams["axes.unicode_minus"] = False
|
||||
|
||||
if show_plot and len(data.columns) > 0:
|
||||
n = len(data)
|
||||
time = np.arange(n)
|
||||
plt.figure(figsize=(12, 8))
|
||||
for col in data.columns:
|
||||
plt.plot(time, data[col].values, marker='o', markersize=3, label=col)
|
||||
plt.plot(
|
||||
time, data[col].values, marker="o", markersize=3, label=col, alpha=0.5
|
||||
)
|
||||
for col in data_filled.columns:
|
||||
plt.plot(
|
||||
time,
|
||||
data_filled[col].values,
|
||||
marker="x",
|
||||
markersize=3,
|
||||
label=f"{col}_filled",
|
||||
linestyle="--",
|
||||
)
|
||||
for pos in anomaly_pos:
|
||||
sensor = anomaly_details[data.index[pos]]
|
||||
plt.plot(pos, data.iloc[pos][sensor], 'ro', markersize=8)
|
||||
plt.plot(pos, data_filled.iloc[pos][sensor], "ro", markersize=8)
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("压力监测值")
|
||||
plt.title("各传感器折线图(红色标记主要异常点)")
|
||||
plt.title("各传感器折线图(红色标记主要异常点,虚线为0值填充后)")
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
plt.figure(figsize=(12, 8))
|
||||
for col in data_repaired.columns:
|
||||
plt.plot(time, data_repaired[col].values, marker='o', markersize=3, label=col)
|
||||
plt.plot(
|
||||
time, data_repaired[col].values, marker="o", markersize=3, label=col
|
||||
)
|
||||
for pos in anomaly_pos:
|
||||
sensor = anomaly_details[data.index[pos]]
|
||||
plt.plot(pos, data_repaired.iloc[pos][sensor], 'go', markersize=8)
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("修复后压力监测值")
|
||||
plt.title("修复后各传感器折线图(绿色标记修复值)")
|
||||
plt.legend()
|
||||
plt.show()
|
||||
plt.plot(pos, data_repaired.iloc[pos][sensor], "go", markersize=8)
|
||||
plt.xlabel("时间点(序号)")
|
||||
plt.ylabel("修复后压力监测值")
|
||||
plt.title("修复后各传感器折线图(绿色标记修复值)")
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
# 返回清洗后的字典
|
||||
return data_repaired.to_dict(orient='list')
|
||||
return data_repaired.to_dict(orient="list")
|
||||
|
||||
|
||||
# 测试
|
||||
|
||||
Reference in New Issue
Block a user