拆分online_Analysis.py文件

2026-01-26 17:22:06 +08:00
parent f3665798b7
commit 7c9667822f
10 changed files with 1597 additions and 1573 deletions
@@ -0,0 +1,57 @@
+import os
+
+import app.algorithms.api_ex.Fdataclean as Fdataclean
+import app.algorithms.api_ex.Pdataclean as Pdataclean
+
+
+############################################################
+# 流量监测数据清洗   ***卡尔曼滤波法***
+############################################################
+# 2025/08/21 hxyan
+
+
+def flow_data_clean(input_csv_file: str) -> str:
+    """
+    读取 input_csv_path 中的每列时间序列，使用一维 Kalman 滤波平滑并用预测值替换基于 3σ 检测出的异常点。
+    保存输出为：<input_filename>_cleaned.xlsx（与输入同目录），并返回输出文件的绝对路径。如有同名文件存在，则覆盖。
+    :param: input_csv_file: 输入的 CSV 文件明或路径
+    :return: 输出文件的绝对路径
+    """
+
+    # 提供的 input_csv_path 绝对路径，以下为 默认脚本目录下同名 CSV 文件，构建绝对路径，可根据情况修改
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    input_csv_path = os.path.join(script_dir, input_csv_file)
+
+    # 检查文件是否存在
+    if not os.path.exists(input_csv_path):
+        raise FileNotFoundError(f"指定的文件不存在: {input_csv_path}")
+    # 调用 Fdataclean.clean_flow_data_kf 函数进行数据清洗
+    out_xlsx_path = Fdataclean.clean_flow_data_kf(input_csv_path)
+    print("清洗后的数据已保存到:", out_xlsx_path)
+
+
+############################################################
+# 压力监测数据清洗   ***kmean++法***
+############################################################
+# 2025/08/21 hxyan
+
+
+def pressure_data_clean(input_csv_file: str) -> str:
+    """
+    读取 input_csv_path 中的每列时间序列，使用Kmean++清洗数据。
+    保存输出为：<input_filename>_cleaned.xlsx（与输入同目录），并返回输出文件的绝对路径。如有同名文件存在，则覆盖。
+    原始数据在 sheet 'raw_pressure_data'，处理后数据在 sheet 'cleaned_pressusre_data'。
+    :param input_csv_path: 输入的 CSV 文件路径
+    :return: 输出文件的绝对路径
+    """
+
+    # 提供的 input_csv_path 绝对路径，以下为 默认脚本目录下同名 CSV 文件，构建绝对路径，可根据情况修改
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    input_csv_path = os.path.join(script_dir, input_csv_file)
+
+    # 检查文件是否存在
+    if not os.path.exists(input_csv_path):
+        raise FileNotFoundError(f"指定的文件不存在: {input_csv_path}")
+    # 调用 Fdataclean.clean_flow_data_kf 函数进行数据清洗
+    out_xlsx_path = Pdataclean.clean_pressure_data_km(input_csv_path)
+    print("清洗后的数据已保存到:", out_xlsx_path)