修复数据清洗时间轴填补后的对齐问题
This commit is contained in:
@@ -37,8 +37,9 @@ def fill_time_gaps(
|
||||
start=data_indexed.index.min(), end=data_indexed.index.max(), freq=freq
|
||||
)
|
||||
|
||||
# 重索引以补齐缺失时间点
|
||||
data_reindexed = data_indexed.reindex(full_range)
|
||||
# 重索引以补齐缺失时间点,同时保留原始时间戳
|
||||
combined_index = data_indexed.index.union(full_range).sort_values().unique()
|
||||
data_reindexed = data_indexed.reindex(combined_index)
|
||||
|
||||
# 按列处理缺口
|
||||
for col in data_reindexed.columns:
|
||||
@@ -49,12 +50,12 @@ def fill_time_gaps(
|
||||
missing_groups = (is_missing != is_missing.shift()).cumsum()
|
||||
gap_lengths = is_missing.groupby(missing_groups).transform("sum")
|
||||
|
||||
# 短缺口:线性插值
|
||||
# 短缺口:时间插值
|
||||
short_gap_mask = is_missing & (gap_lengths <= short_gap_threshold)
|
||||
if short_gap_mask.any():
|
||||
data_reindexed.loc[short_gap_mask, col] = (
|
||||
data_reindexed[col]
|
||||
.interpolate(method="linear", limit_area="inside")
|
||||
.interpolate(method="time", limit_area="inside")
|
||||
.loc[short_gap_mask]
|
||||
)
|
||||
|
||||
@@ -213,6 +214,12 @@ def clean_pressure_data_df_km(data: pd.DataFrame, show_plot: bool = False) -> di
|
||||
data_filled = fill_time_gaps(
|
||||
data, time_col="time", freq="1min", short_gap_threshold=10
|
||||
)
|
||||
|
||||
# 保存 time 列用于最后合并
|
||||
time_col_series = None
|
||||
if "time" in data_filled.columns:
|
||||
time_col_series = data_filled["time"]
|
||||
|
||||
# 移除 time 列用于后续清洗
|
||||
data_filled = data_filled.drop(columns=["time"])
|
||||
|
||||
@@ -304,6 +311,10 @@ def clean_pressure_data_df_km(data: pd.DataFrame, show_plot: bool = False) -> di
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
# 将 time 列添加回结果
|
||||
if time_col_series is not None:
|
||||
data_repaired.insert(0, "time", time_col_series)
|
||||
|
||||
# 返回清洗后的字典
|
||||
return data_repaired
|
||||
|
||||
|
||||
Reference in New Issue
Block a user