Files
TJWaterServer/epanet/fix_valve_ids.py
2025-12-31 16:11:28 +08:00

145 lines
5.2 KiB
Python

#!/usr/bin/env python3
import re
from pathlib import Path
inp = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii.inp")
mapf = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii.inp.mapping.txt")
out = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii-fixed.inp")
outmap = out.with_suffix(out.suffix + '.mapping.txt')
text = inp.read_text(encoding='utf-8')
# parse mapping file (original -> mapped)
map_original_to_mapped = {}
if mapf.exists():
for line in mapf.read_text(encoding='utf-8').splitlines():
if '->' in line:
a,b = line.split('->',1)
map_original_to_mapped[a.strip()] = b.strip()
# find [VALVES] block
m = re.search(r"(?mi)^\[VALVES\]\s*(?:;.*\n)?(.*?)(?=^\[|\Z)", text, flags=re.S|re.M)
if not m:
print('No [VALVES] section found')
raise SystemExit(1)
block = m.group(1)
# extract IDs (first non-empty token at start of each non-comment line)
ids = []
line_offsets = []
lines = block.splitlines()
for i,l in enumerate(lines):
if not l.strip() or l.strip().startswith(';'):
continue
# split by whitespace
toks = l.split()
if toks:
ids.append(toks[0])
line_offsets.append((i, l))
# find duplicates
from collections import defaultdict
count = defaultdict(list)
for idx, token in enumerate(ids):
count[token].append(idx)
dups = {k:v for k,v in count.items() if len(v)>1}
print(f'Found {len(ids)} valve IDs; {len(dups)} duplicates')
for k,v in list(dups.items())[:40]:
print(k, 'occurs', len(v), 'times')
# Also find mapped collisions: multiple originals mapped to same mapped token
mapped_rev = defaultdict(list)
for orig,mapped in map_original_to_mapped.items():
mapped_rev[mapped].append(orig)
collisions = {m:origlist for m,origlist in mapped_rev.items() if len(origlist)>1}
print('\nMapped collisions (same mapped token from multiple originals):', len(collisions))
for m,ol in list(collisions.items())[:40]:
print(m, ' <- ', ol[:5])
# We'll fix any ID that is purely digits, or any duplicate ID in the valves block.
fixed_map = {} # oldToken -> newToken
used = set(ids) # existing tokens in valves
suffix_counter = 1
for token, positions in dups.items():
# choose new unique names for subsequent occurrences (leave first occurrence as-is)
for pos_index, occ in enumerate(positions):
if pos_index == 0:
continue
base = token
# if base is all digits or starts with digit, prefix with VAL_
if re.fullmatch(r"\d+", base) or re.match(r"^\d", base):
candidate = f'VAL_{base}'
else:
candidate = f'{base}_dup'
# ensure uniqueness
while candidate in used:
candidate = f'{candidate}_{suffix_counter}'
suffix_counter += 1
used.add(candidate)
fixed_map[token + f'__occ{pos_index}'] = candidate
# The above approach requires us to identify which exact occurrence to replace. We'll instead build a replacement pass that replaces only the Nth occurrence.
# Build per-token occurrence numbers to replace subsequent ones.
occ_to_new = {} # (token, occ_index) -> newname
for token, positions in dups.items():
for pos_index, occ in enumerate(positions):
if pos_index == 0:
continue
if re.fullmatch(r"\d+", token) or re.match(r"^\d", token):
candidate = f'VAL_{token}'
else:
candidate = f'{token}_dup'
while candidate in used:
candidate = f'{candidate}_{suffix_counter}'
suffix_counter += 1
used.add(candidate)
occ_to_new[(token, pos_index)] = candidate
# Now construct new block replacing the Nth occurrence of duplicates token
new_lines = []
occ_seen = defaultdict(int)
for l in lines:
if not l.strip() or l.strip().startswith(';'):
new_lines.append(l)
continue
toks = l.split()
token = toks[0]
occ_seen[token] += 1
occ_idx = occ_seen[token]-1
if (token, occ_idx) in occ_to_new:
new_token = occ_to_new[(token, occ_idx)]
# replace only the first token in the line
rest = l[len(l.lstrip()):]
# reconstruct preserving leading whitespace
leading = l[:len(l)-len(l.lstrip())]
# find start index of token in line
m2 = re.match(r"(\s*)" + re.escape(token), l)
if m2:
leading = m2.group(1)
new_line = leading + new_token + l[m2.end():]
new_lines.append(new_line)
# record mapping for global replacement
fixed_map[token + f'__occ{occ_idx}'] = new_token
else:
new_lines.append(l)
# write new file by replacing block
new_block = '\n'.join(new_lines) + '\n'
new_text = text[:m.start(1)] + new_block + text[m.end(1):]
out.write_text(new_text, encoding='utf-8')
# Create an updated mapping file: show which tokens were changed and why
with outmap.open('w', encoding='utf-8') as f:
f.write('Changes applied to fix duplicate valve IDs:\n')
for k,v in occ_to_new.items():
token, occ = k
f.write(f'{token} occurrence {occ} -> {v}\n')
f.write('\nNote: These replacements are only for valve ID occurrences beyond the first.\n')
print('Wrote', out, 'and mapping', outmap)
print('Replacements:', len(occ_to_new))
print('If you want different naming (e.g. prefix with V_), rerun with that preference.')