add api for data clean && sensor_placement
This commit is contained in:
143
influxdb_api.py
143
influxdb_api.py
@@ -3513,6 +3513,129 @@ def delete_data(delete_date: str, bucket: str) -> None:
|
||||
|
||||
delete_api: DeleteApi = client.delete_api()
|
||||
delete_api.delete(start=start_time, stop=stop_time, predicate=predicate, bucket=bucket)
|
||||
|
||||
#2025/08/18 从文件导入scada数据,xkl
|
||||
def import_data_from_file(file_path: str, bucket: str = "SCADA_data") -> None:
|
||||
"""
|
||||
从指定的CSV文件导入数据到InfluxDB的指定bucket中。
|
||||
:param file_path: CSV文件的路径
|
||||
:param bucket: 数据存储的 bucket 名称,默认值为 "SCADA_data"
|
||||
:return:
|
||||
"""
|
||||
client = get_new_client()
|
||||
if not client.ping():
|
||||
print("{} -- Failed to connect to InfluxDB.".format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
|
||||
|
||||
#清空指定bucket的数据
|
||||
# delete_api = DeleteApi(client)
|
||||
# start = "1970-01-01T00:00:00Z"
|
||||
# stop = "2100-01-01T00:00:00Z"
|
||||
# delete_api.delete(start, stop, '', bucket="SCADA_data", org="TJWATERORG")
|
||||
|
||||
df = pd.read_csv(file_path)
|
||||
write_api = client.write_api(write_options=SYNCHRONOUS)
|
||||
points_to_write = []
|
||||
for _, row in df.iterrows():
|
||||
scada_id = row['ScadaId']
|
||||
value = row['Value']
|
||||
time_str = row['Time']
|
||||
date_str = str(time_str)[:10] # 取前10位作为日期
|
||||
try:
|
||||
raw_value = float(value)
|
||||
except (ValueError, TypeError):
|
||||
raw_value = 0.0
|
||||
point = Point("SCADA") \
|
||||
.tag("date", date_str) \
|
||||
.tag("description", None) \
|
||||
.tag("device_ID", scada_id) \
|
||||
.field("monitored_value", raw_value) \
|
||||
.field("datacleaning_value", 0.0) \
|
||||
.field("simulation_value", 0.0) \
|
||||
.time(time_str, write_precision='s')
|
||||
points_to_write.append(point)
|
||||
# 批量写入数据
|
||||
batch_size = 500
|
||||
for i in range(0, len(points_to_write), batch_size):
|
||||
batch = points_to_write[i:i+batch_size]
|
||||
write_api.write(bucket=bucket, record=batch)
|
||||
print(f"Data imported from {file_path} to bucket {bucket} successfully.")
|
||||
print(f"Total points written: {len(points_to_write)}")
|
||||
write_api.close()
|
||||
client.close()
|
||||
|
||||
#2025/08/28 从多列格式文件导入SCADA数据,xkl
|
||||
def import_multicolumn_data_from_file(file_path: str, raw: bool = True, bucket: str = "SCADA_data") -> None:
|
||||
"""
|
||||
从指定的多列格式CSV文件导入数据到InfluxDB的指定bucket中。
|
||||
:param file_path: CSV文件的路径
|
||||
:param bucket: 数据存储的 bucket 名称,默认值为 "SCADA_data"
|
||||
:return:
|
||||
"""
|
||||
client = get_new_client()
|
||||
write_api = client.write_api(write_options=SYNCHRONOUS)
|
||||
points_to_write = []
|
||||
if not client.ping():
|
||||
print("{} -- Failed to connect to InfluxDB.".format(datetime.now().strftime('%Y/%m/%d %H:%M')))
|
||||
def convert_to_iso(timestr):
|
||||
# 假设原格式为 '2025/8/3 0:00'
|
||||
dt = datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S')
|
||||
return dt.isoformat()
|
||||
|
||||
with open(file_path, encoding='utf-8') as f:
|
||||
reader = csv.reader(f)
|
||||
header = next(reader)
|
||||
device_ids = header[1:] # 第一列是time,后面是device_ID
|
||||
if raw:
|
||||
for row in reader:
|
||||
time_str = row[0]
|
||||
iso_time = convert_to_iso(time_str)
|
||||
for idx, value in enumerate(row[1:]):
|
||||
try:
|
||||
raw_value = float(value)
|
||||
except (ValueError, TypeError):
|
||||
raw_value = 0.0
|
||||
scada_id = device_ids[idx]
|
||||
# 如果是原始数据,直接使用Value列
|
||||
point = Point("SCADA") \
|
||||
.tag("date", iso_time.split('T')[0]) \
|
||||
.tag("description", None) \
|
||||
.tag("device_ID", scada_id) \
|
||||
.field("monitored_value", raw_value) \
|
||||
.field("datacleaning_value", 0.0) \
|
||||
.field("simulation_value", 0.0) \
|
||||
.time(iso_time, WritePrecision.S)
|
||||
points_to_write.append(point)
|
||||
else:
|
||||
for row in reader:
|
||||
time_str = row[0]
|
||||
iso_time = convert_to_iso(time_str)
|
||||
# 如果不是原始数据,直接使用datacleaning_value列
|
||||
for idx, value in enumerate(row[1:]):
|
||||
scada_id = device_ids[idx]
|
||||
try:
|
||||
datacleaning_value = float(value)
|
||||
except (ValueError, TypeError):
|
||||
datacleaning_value = 0.0
|
||||
# 如果是清洗数据,直接使用datacleaning_value列
|
||||
point = Point("SCADA") \
|
||||
.tag("date", iso_time.split('T')[0]) \
|
||||
.tag("description", "None") \
|
||||
.tag("device_ID", scada_id) \
|
||||
.field("monitored_value", 0.0) \
|
||||
.field("datacleaning_value",datacleaning_value) \
|
||||
.field("simulation_value", 0.0) \
|
||||
.time(iso_time, WritePrecision.S)
|
||||
points_to_write.append(point)
|
||||
# 批量写入数据
|
||||
batch_size = 1000
|
||||
for i in range(0, len(points_to_write), batch_size):
|
||||
batch = points_to_write[i:i+batch_size]
|
||||
write_api.write(bucket=bucket, record=batch)
|
||||
print(f"Data imported from {file_path} to bucket {bucket} successfully.")
|
||||
print(f"Total points written: {len(points_to_write)}")
|
||||
write_api.close()
|
||||
client.close()
|
||||
|
||||
|
||||
|
||||
# 示例调用
|
||||
@@ -3652,5 +3775,25 @@ if __name__ == "__main__":
|
||||
# end_time='2024-03-26T23:59:00+08:00')
|
||||
# print(result)
|
||||
|
||||
#示例:import_data_from_file
|
||||
#import_data_from_file(file_path='data/Flow_Timedata.csv', bucket='SCADA_data')
|
||||
|
||||
# # 示例:query_all_records_by_type_date
|
||||
#result = query_all__records_by_type__date(type='node', query_date='2025-08-04')
|
||||
|
||||
#示例:query_all_records_by_date_hour
|
||||
#result = query_all_records_by_date_hour(query_date='2025-08-04', query_hour=1)
|
||||
|
||||
#示例:import_multicolumn_data_from_file
|
||||
#import_multicolumn_data_from_file(file_path='data/selected_Flow_Timedata2025_new_format_cleaned.csv', raw=False, bucket='SCADA_data')
|
||||
|
||||
# client = InfluxDBClient(url="http://localhost:8086", token=token, org=org_name)
|
||||
# delete_api = client.delete_api()
|
||||
|
||||
# start = "2025-08-02T00:00:00Z" # 要删除的起始时间
|
||||
# stop = "2025-08-11T00:00:00Z" # 结束时间(可设为未来)
|
||||
# predicate = '_measurement="SCADA"' # 指定 measurement
|
||||
|
||||
# delete_api.delete(start, stop, predicate, bucket="SCADA_data", org=org_name)
|
||||
# client.close()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user