fix bug and refine ,end of 2025
This commit is contained in:
85
epanet/apply_valve_renames.py
Normal file
85
epanet/apply_valve_renames.py
Normal file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env python3
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
inp = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii.inp")
|
||||
out = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii-fixed2.inp")
|
||||
mapout = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii-fixed2.mapping.txt")
|
||||
|
||||
text = inp.read_text(encoding='utf-8')
|
||||
lines = text.splitlines()
|
||||
|
||||
# find [VALVES] start and end
|
||||
start = None
|
||||
for i,l in enumerate(lines):
|
||||
if l.strip().upper() == '[VALVES]':
|
||||
start = i
|
||||
break
|
||||
if start is None:
|
||||
print('No [VALVES] section found')
|
||||
raise SystemExit(1)
|
||||
end = len(lines)
|
||||
for j in range(start+1, len(lines)):
|
||||
if re.match(r"^\s*\[.+\]", lines[j]):
|
||||
end = j
|
||||
break
|
||||
|
||||
# collect valve lines with their absolute numbers
|
||||
valve_entries = [] # (absolute_line_index, token, line)
|
||||
for idx in range(start+1, end):
|
||||
l = lines[idx]
|
||||
if not l.strip() or l.strip().startswith(';'):
|
||||
continue
|
||||
tok = l.split()[0]
|
||||
valve_entries.append((idx, tok, l))
|
||||
|
||||
from collections import defaultdict
|
||||
positions = defaultdict(list)
|
||||
for ln, tok, l in valve_entries:
|
||||
positions[tok].append(ln)
|
||||
|
||||
# find duplicates
|
||||
dups = {tok:lns for tok,lns in positions.items() if len(lns)>1}
|
||||
print('Found', sum(1 for _ in valve_entries), 'valve entries; duplicates:', len(dups))
|
||||
|
||||
replacements = [] # (line_index, old, new)
|
||||
counter = 1
|
||||
for tok, lns in dups.items():
|
||||
# skip first occurrence, rename others
|
||||
for occ_index, ln in enumerate(lns):
|
||||
if occ_index == 0:
|
||||
continue
|
||||
# produce new name: prefix V if starts with digit
|
||||
if re.fullmatch(r"\d+", tok) or re.match(r"^\d", tok):
|
||||
base = 'V' + tok
|
||||
else:
|
||||
base = tok
|
||||
new = f'{base}_{occ_index}'
|
||||
# ensure uniqueness globally
|
||||
while any(rn == new for _,_,rn in replacements) or any(new == t for t in positions.keys()):
|
||||
counter += 1
|
||||
new = f'{base}_{occ_index}_{counter}'
|
||||
replacements.append((ln, tok, new))
|
||||
|
||||
# Apply replacements on the given absolute lines
|
||||
for ln, old, new in replacements:
|
||||
line = lines[ln]
|
||||
# replace only first token occurrence
|
||||
parts = line.split()
|
||||
if parts:
|
||||
# find start of token in line (preserve spacing)
|
||||
m = re.search(re.escape(parts[0]), line)
|
||||
if m:
|
||||
startpos = m.start()
|
||||
endpos = m.end()
|
||||
newline = line[:startpos] + new + line[endpos:]
|
||||
lines[ln] = newline
|
||||
|
||||
# write new file
|
||||
out.write_text('\n'.join(lines) + '\n', encoding='utf-8')
|
||||
# write mapping
|
||||
with mapout.open('w', encoding='utf-8') as f:
|
||||
for ln, old, new in replacements:
|
||||
f.write(f'line {ln+1}: {old} -> {new}\n')
|
||||
|
||||
print('Wrote', out, 'with', len(replacements), 'replacements; mapping at', mapout)
|
||||
@@ -9,7 +9,7 @@ import subprocess
|
||||
import logging
|
||||
from typing import Any
|
||||
sys.path.append("..")
|
||||
from api import project
|
||||
from api import project_backup
|
||||
from api import inp_out
|
||||
|
||||
|
||||
@@ -243,7 +243,7 @@ def dump_output_binary(path: str) -> str:
|
||||
|
||||
#DingZQ, 2025-02-04, 返回dict[str, Any]
|
||||
def run_project_return_dict(name: str, readable_output: bool = False) -> dict[str, Any]:
|
||||
if not project.have_project(name):
|
||||
if not project_backup.have_project(name):
|
||||
raise Exception(f'Not found project [{name}]')
|
||||
|
||||
dir = os.path.abspath(os.getcwd())
|
||||
@@ -276,7 +276,7 @@ def run_project_return_dict(name: str, readable_output: bool = False) -> dict[st
|
||||
|
||||
# original code
|
||||
def run_project(name: str, readable_output: bool = False) -> str:
|
||||
if not project.have_project(name):
|
||||
if not project_backup.have_project(name):
|
||||
raise Exception(f'Not found project [{name}]')
|
||||
|
||||
dir = os.path.abspath(os.getcwd())
|
||||
|
||||
Binary file not shown.
64
epanet/fix_inp_nonascii.py
Normal file
64
epanet/fix_inp_nonascii.py
Normal file
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fix non-ASCII ID tokens in an EPANET .inp file by mapping each unique non-ASCII-containing token
|
||||
to an ASCII-safe name. Outputs a new INP and a mapping file for review.
|
||||
Usage: python fix_inp_nonascii.py input.inp [output.inp]
|
||||
"""
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python fix_inp_nonascii.py input.inp [output.inp]")
|
||||
sys.exit(2)
|
||||
|
||||
src = Path(sys.argv[1])
|
||||
if len(sys.argv) > 2:
|
||||
dst = Path(sys.argv[2])
|
||||
else:
|
||||
dst = src.with_name(src.stem + '-ascii' + src.suffix)
|
||||
|
||||
text = src.read_text(encoding='utf-8')
|
||||
# Find tokens that contain at least one non-ASCII char. Token = contiguous non-whitespace sequence
|
||||
nonascii_tokens = set(re.findall(r"\S*[^\x00-\x7F]\S*", text))
|
||||
if not nonascii_tokens:
|
||||
print("No non-ASCII tokens found. Copying source to destination unchanged.")
|
||||
dst.write_text(text, encoding='utf-8')
|
||||
sys.exit(0)
|
||||
|
||||
used = set()
|
||||
mapping = {}
|
||||
counter = 1
|
||||
# Sort tokens to get deterministic output
|
||||
for t in sorted(nonascii_tokens):
|
||||
# build ASCII prefix from characters that are safe (alnum, underscore, hyphen)
|
||||
prefix = ''.join(ch for ch in t if ord(ch) < 128 and (ch.isalnum() or ch in '_-'))
|
||||
if not prefix:
|
||||
prefix = 'ID'
|
||||
candidate = prefix
|
||||
# ensure candidate is unique and not equal to original token
|
||||
while candidate in used:
|
||||
candidate = f"{prefix}_x{counter}"
|
||||
counter += 1
|
||||
# if candidate accidentally equals the original token (rare), force suffix
|
||||
if candidate == t:
|
||||
candidate = f"{prefix}_x{counter}"
|
||||
counter += 1
|
||||
mapping[t] = candidate
|
||||
used.add(candidate)
|
||||
|
||||
# Replace occurrences safely using regex word boundary style (escape token)
|
||||
new_text = text
|
||||
for src_token, dst_token in mapping.items():
|
||||
# replace exact matches (no partial). Use lookarounds: not part of larger non-whitespace.
|
||||
pattern = re.escape(src_token)
|
||||
new_text = re.sub(pattern, dst_token, new_text)
|
||||
|
||||
# Write output files
|
||||
dst.write_text(new_text, encoding='utf-8')
|
||||
mapfile = dst.with_suffix(dst.suffix + '.mapping.txt')
|
||||
with mapfile.open('w', encoding='utf-8') as f:
|
||||
for k, v in mapping.items():
|
||||
f.write(f"{k} -> {v}\n")
|
||||
|
||||
print(f"Wrote: {dst}\nMapping: {mapfile}\nReplaced {len(mapping)} non-ASCII tokens.")
|
||||
144
epanet/fix_valve_ids.py
Normal file
144
epanet/fix_valve_ids.py
Normal file
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
inp = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii.inp")
|
||||
mapf = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii.inp.mapping.txt")
|
||||
out = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii-fixed.inp")
|
||||
outmap = out.with_suffix(out.suffix + '.mapping.txt')
|
||||
|
||||
text = inp.read_text(encoding='utf-8')
|
||||
|
||||
# parse mapping file (original -> mapped)
|
||||
map_original_to_mapped = {}
|
||||
if mapf.exists():
|
||||
for line in mapf.read_text(encoding='utf-8').splitlines():
|
||||
if '->' in line:
|
||||
a,b = line.split('->',1)
|
||||
map_original_to_mapped[a.strip()] = b.strip()
|
||||
|
||||
# find [VALVES] block
|
||||
m = re.search(r"(?mi)^\[VALVES\]\s*(?:;.*\n)?(.*?)(?=^\[|\Z)", text, flags=re.S|re.M)
|
||||
if not m:
|
||||
print('No [VALVES] section found')
|
||||
raise SystemExit(1)
|
||||
block = m.group(1)
|
||||
|
||||
# extract IDs (first non-empty token at start of each non-comment line)
|
||||
ids = []
|
||||
line_offsets = []
|
||||
lines = block.splitlines()
|
||||
for i,l in enumerate(lines):
|
||||
if not l.strip() or l.strip().startswith(';'):
|
||||
continue
|
||||
# split by whitespace
|
||||
toks = l.split()
|
||||
if toks:
|
||||
ids.append(toks[0])
|
||||
line_offsets.append((i, l))
|
||||
|
||||
# find duplicates
|
||||
from collections import defaultdict
|
||||
count = defaultdict(list)
|
||||
for idx, token in enumerate(ids):
|
||||
count[token].append(idx)
|
||||
|
||||
dups = {k:v for k,v in count.items() if len(v)>1}
|
||||
|
||||
print(f'Found {len(ids)} valve IDs; {len(dups)} duplicates')
|
||||
for k,v in list(dups.items())[:40]:
|
||||
print(k, 'occurs', len(v), 'times')
|
||||
|
||||
# Also find mapped collisions: multiple originals mapped to same mapped token
|
||||
mapped_rev = defaultdict(list)
|
||||
for orig,mapped in map_original_to_mapped.items():
|
||||
mapped_rev[mapped].append(orig)
|
||||
collisions = {m:origlist for m,origlist in mapped_rev.items() if len(origlist)>1}
|
||||
print('\nMapped collisions (same mapped token from multiple originals):', len(collisions))
|
||||
for m,ol in list(collisions.items())[:40]:
|
||||
print(m, ' <- ', ol[:5])
|
||||
|
||||
# We'll fix any ID that is purely digits, or any duplicate ID in the valves block.
|
||||
fixed_map = {} # oldToken -> newToken
|
||||
used = set(ids) # existing tokens in valves
|
||||
suffix_counter = 1
|
||||
|
||||
for token, positions in dups.items():
|
||||
# choose new unique names for subsequent occurrences (leave first occurrence as-is)
|
||||
for pos_index, occ in enumerate(positions):
|
||||
if pos_index == 0:
|
||||
continue
|
||||
base = token
|
||||
# if base is all digits or starts with digit, prefix with VAL_
|
||||
if re.fullmatch(r"\d+", base) or re.match(r"^\d", base):
|
||||
candidate = f'VAL_{base}'
|
||||
else:
|
||||
candidate = f'{base}_dup'
|
||||
# ensure uniqueness
|
||||
while candidate in used:
|
||||
candidate = f'{candidate}_{suffix_counter}'
|
||||
suffix_counter += 1
|
||||
used.add(candidate)
|
||||
fixed_map[token + f'__occ{pos_index}'] = candidate
|
||||
|
||||
# The above approach requires us to identify which exact occurrence to replace. We'll instead build a replacement pass that replaces only the Nth occurrence.
|
||||
# Build per-token occurrence numbers to replace subsequent ones.
|
||||
occ_to_new = {} # (token, occ_index) -> newname
|
||||
for token, positions in dups.items():
|
||||
for pos_index, occ in enumerate(positions):
|
||||
if pos_index == 0:
|
||||
continue
|
||||
if re.fullmatch(r"\d+", token) or re.match(r"^\d", token):
|
||||
candidate = f'VAL_{token}'
|
||||
else:
|
||||
candidate = f'{token}_dup'
|
||||
while candidate in used:
|
||||
candidate = f'{candidate}_{suffix_counter}'
|
||||
suffix_counter += 1
|
||||
used.add(candidate)
|
||||
occ_to_new[(token, pos_index)] = candidate
|
||||
|
||||
# Now construct new block replacing the Nth occurrence of duplicates token
|
||||
new_lines = []
|
||||
occ_seen = defaultdict(int)
|
||||
for l in lines:
|
||||
if not l.strip() or l.strip().startswith(';'):
|
||||
new_lines.append(l)
|
||||
continue
|
||||
toks = l.split()
|
||||
token = toks[0]
|
||||
occ_seen[token] += 1
|
||||
occ_idx = occ_seen[token]-1
|
||||
if (token, occ_idx) in occ_to_new:
|
||||
new_token = occ_to_new[(token, occ_idx)]
|
||||
# replace only the first token in the line
|
||||
rest = l[len(l.lstrip()):]
|
||||
# reconstruct preserving leading whitespace
|
||||
leading = l[:len(l)-len(l.lstrip())]
|
||||
# find start index of token in line
|
||||
m2 = re.match(r"(\s*)" + re.escape(token), l)
|
||||
if m2:
|
||||
leading = m2.group(1)
|
||||
new_line = leading + new_token + l[m2.end():]
|
||||
new_lines.append(new_line)
|
||||
# record mapping for global replacement
|
||||
fixed_map[token + f'__occ{occ_idx}'] = new_token
|
||||
else:
|
||||
new_lines.append(l)
|
||||
|
||||
# write new file by replacing block
|
||||
new_block = '\n'.join(new_lines) + '\n'
|
||||
new_text = text[:m.start(1)] + new_block + text[m.end(1):]
|
||||
out.write_text(new_text, encoding='utf-8')
|
||||
|
||||
# Create an updated mapping file: show which tokens were changed and why
|
||||
with outmap.open('w', encoding='utf-8') as f:
|
||||
f.write('Changes applied to fix duplicate valve IDs:\n')
|
||||
for k,v in occ_to_new.items():
|
||||
token, occ = k
|
||||
f.write(f'{token} occurrence {occ} -> {v}\n')
|
||||
f.write('\nNote: These replacements are only for valve ID occurrences beyond the first.\n')
|
||||
|
||||
print('Wrote', out, 'and mapping', outmap)
|
||||
print('Replacements:', len(occ_to_new))
|
||||
print('If you want different naming (e.g. prefix with V_), rerun with that preference.')
|
||||
65
epanet/fix_valve_ids2.py
Normal file
65
epanet/fix_valve_ids2.py
Normal file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python3
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
inp = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii.inp")
|
||||
text = inp.read_text(encoding='utf-8')
|
||||
lines = text.splitlines()
|
||||
|
||||
start = None
|
||||
for i,l in enumerate(lines):
|
||||
if l.strip().upper() == '[VALVES]':
|
||||
start = i
|
||||
break
|
||||
if start is None:
|
||||
print('No [VALVES] section found')
|
||||
raise SystemExit(1)
|
||||
# collect until next section header or EOF
|
||||
end = len(lines)
|
||||
for j in range(start+1, len(lines)):
|
||||
if re.match(r"^\s*\[.+\]", lines[j]):
|
||||
end = j
|
||||
break
|
||||
block_lines = lines[start+1:end]
|
||||
|
||||
ids = []
|
||||
for idx,l in enumerate(block_lines, start=start+1):
|
||||
if not l.strip() or l.strip().startswith(';'):
|
||||
continue
|
||||
# first token
|
||||
tok = l.split()[0]
|
||||
ids.append((idx, tok, l))
|
||||
|
||||
from collections import defaultdict
|
||||
count = defaultdict(list)
|
||||
for ln, tok, l in ids:
|
||||
count[tok].append(ln)
|
||||
|
||||
dups = {k:v for k,v in count.items() if len(v)>1}
|
||||
print('Total valve entries found:', len(ids))
|
||||
print('Duplicate token count:', len(dups))
|
||||
if dups:
|
||||
print('\nSample duplicates:')
|
||||
for k,v in list(dups.items())[:20]:
|
||||
print(k, 'lines:', v)
|
||||
|
||||
# show whether tokens are purely digits
|
||||
num_only = [tok for ln,tok,l in ids if re.fullmatch(r'\d+', tok)]
|
||||
print('\nNumeric-only valve IDs count:', len(num_only))
|
||||
|
||||
# show examples of numeric-only
|
||||
if num_only:
|
||||
print('Examples:', num_only[:20])
|
||||
|
||||
# write a short report
|
||||
rep = inp.with_name(inp.stem + '-valves-report.txt')
|
||||
with rep.open('w', encoding='utf-8') as f:
|
||||
f.write(f'Total valve entries: {len(ids)}\n')
|
||||
f.write(f'Duplicate tokens: {len(dups)}\n')
|
||||
for k,v in dups.items():
|
||||
f.write(f'{k}: lines {v}\n')
|
||||
f.write('\nNumeric-only tokens:\n')
|
||||
for tok in sorted(set(num_only)):
|
||||
f.write(tok + '\n')
|
||||
|
||||
print('Wrote report to', rep)
|
||||
Binary file not shown.
Reference in New Issue
Block a user