add api for data clean && sensor_placement

This commit is contained in:
xinzish
2025-10-30 00:39:29 +08:00
parent ae6510ac37
commit 1907e1d3cb
6 changed files with 671 additions and 0 deletions

View File

@@ -3513,6 +3513,129 @@ def delete_data(delete_date: str, bucket: str) -> None:
delete_api: DeleteApi = client.delete_api()
delete_api.delete(start=start_time, stop=stop_time, predicate=predicate, bucket=bucket)
#2025/08/18 从文件导入scada数据xkl
def import_data_from_file(file_path: str, bucket: str = "SCADA_data") -> None:
"""
从指定的CSV文件导入数据到InfluxDB的指定bucket中。
:param file_path: CSV文件的路径
:param bucket: 数据存储的 bucket 名称,默认值为 "SCADA_data"
:return:
"""
client = get_new_client()
if not client.ping():
print("{} -- Failed to connect to InfluxDB.".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
#清空指定bucket的数据
# delete_api = DeleteApi(client)
# start = "1970-01-01T00:00:00Z"
# stop = "2100-01-01T00:00:00Z"
# delete_api.delete(start, stop, '', bucket="SCADA_data", org="TJWATERORG")
df = pd.read_csv(file_path)
write_api = client.write_api(write_options=SYNCHRONOUS)
points_to_write = []
for _, row in df.iterrows():
scada_id = row['ScadaId']
value = row['Value']
time_str = row['Time']
date_str = str(time_str)[:10] # 取前10位作为日期
try:
raw_value = float(value)
except (ValueError, TypeError):
raw_value = 0.0
point = Point("SCADA") \
.tag("date", date_str) \
.tag("description", None) \
.tag("device_ID", scada_id) \
.field("monitored_value", raw_value) \
.field("datacleaning_value", 0.0) \
.field("simulation_value", 0.0) \
.time(time_str, write_precision='s')
points_to_write.append(point)
# 批量写入数据
batch_size = 500
for i in range(0, len(points_to_write), batch_size):
batch = points_to_write[i:i+batch_size]
write_api.write(bucket=bucket, record=batch)
print(f"Data imported from {file_path} to bucket {bucket} successfully.")
print(f"Total points written: {len(points_to_write)}")
write_api.close()
client.close()
#2025/08/28 从多列格式文件导入SCADA数据xkl
def import_multicolumn_data_from_file(file_path: str, raw: bool = True, bucket: str = "SCADA_data") -> None:
"""
从指定的多列格式CSV文件导入数据到InfluxDB的指定bucket中。
:param file_path: CSV文件的路径
:param bucket: 数据存储的 bucket 名称,默认值为 "SCADA_data"
:return:
"""
client = get_new_client()
write_api = client.write_api(write_options=SYNCHRONOUS)
points_to_write = []
if not client.ping():
print("{} -- Failed to connect to InfluxDB.".format(datetime.now().strftime('%Y/%m/%d %H:%M')))
def convert_to_iso(timestr):
# 假设原格式为 '2025/8/3 0:00'
dt = datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S')
return dt.isoformat()
with open(file_path, encoding='utf-8') as f:
reader = csv.reader(f)
header = next(reader)
device_ids = header[1:] # 第一列是time后面是device_ID
if raw:
for row in reader:
time_str = row[0]
iso_time = convert_to_iso(time_str)
for idx, value in enumerate(row[1:]):
try:
raw_value = float(value)
except (ValueError, TypeError):
raw_value = 0.0
scada_id = device_ids[idx]
# 如果是原始数据直接使用Value列
point = Point("SCADA") \
.tag("date", iso_time.split('T')[0]) \
.tag("description", None) \
.tag("device_ID", scada_id) \
.field("monitored_value", raw_value) \
.field("datacleaning_value", 0.0) \
.field("simulation_value", 0.0) \
.time(iso_time, WritePrecision.S)
points_to_write.append(point)
else:
for row in reader:
time_str = row[0]
iso_time = convert_to_iso(time_str)
# 如果不是原始数据直接使用datacleaning_value列
for idx, value in enumerate(row[1:]):
scada_id = device_ids[idx]
try:
datacleaning_value = float(value)
except (ValueError, TypeError):
datacleaning_value = 0.0
# 如果是清洗数据直接使用datacleaning_value列
point = Point("SCADA") \
.tag("date", iso_time.split('T')[0]) \
.tag("description", "None") \
.tag("device_ID", scada_id) \
.field("monitored_value", 0.0) \
.field("datacleaning_value",datacleaning_value) \
.field("simulation_value", 0.0) \
.time(iso_time, WritePrecision.S)
points_to_write.append(point)
# 批量写入数据
batch_size = 1000
for i in range(0, len(points_to_write), batch_size):
batch = points_to_write[i:i+batch_size]
write_api.write(bucket=bucket, record=batch)
print(f"Data imported from {file_path} to bucket {bucket} successfully.")
print(f"Total points written: {len(points_to_write)}")
write_api.close()
client.close()
# 示例调用
@@ -3652,5 +3775,25 @@ if __name__ == "__main__":
# end_time='2024-03-26T23:59:00+08:00')
# print(result)
#示例import_data_from_file
#import_data_from_file(file_path='data/Flow_Timedata.csv', bucket='SCADA_data')
# # 示例query_all_records_by_type_date
#result = query_all__records_by_type__date(type='node', query_date='2025-08-04')
#示例query_all_records_by_date_hour
#result = query_all_records_by_date_hour(query_date='2025-08-04', query_hour=1)
#示例import_multicolumn_data_from_file
#import_multicolumn_data_from_file(file_path='data/selected_Flow_Timedata2025_new_format_cleaned.csv', raw=False, bucket='SCADA_data')
# client = InfluxDBClient(url="http://localhost:8086", token=token, org=org_name)
# delete_api = client.delete_api()
# start = "2025-08-02T00:00:00Z" # 要删除的起始时间
# stop = "2025-08-11T00:00:00Z" # 结束时间(可设为未来)
# predicate = '_measurement="SCADA"' # 指定 measurement
# delete_api.delete(start, stop, predicate, bucket="SCADA_data", org=org_name)
# client.close()