fix bug and refine ,end of 2025

This commit is contained in:
xinzish
2025-12-31 16:11:28 +08:00
parent 38fb35a333
commit 32bbe3ddcd
23 changed files with 3180 additions and 68 deletions

View File

@@ -0,0 +1,85 @@
#!/usr/bin/env python3
from pathlib import Path
import re
inp = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii.inp")
out = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii-fixed2.inp")
mapout = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii-fixed2.mapping.txt")
text = inp.read_text(encoding='utf-8')
lines = text.splitlines()
# find [VALVES] start and end
start = None
for i,l in enumerate(lines):
if l.strip().upper() == '[VALVES]':
start = i
break
if start is None:
print('No [VALVES] section found')
raise SystemExit(1)
end = len(lines)
for j in range(start+1, len(lines)):
if re.match(r"^\s*\[.+\]", lines[j]):
end = j
break
# collect valve lines with their absolute numbers
valve_entries = [] # (absolute_line_index, token, line)
for idx in range(start+1, end):
l = lines[idx]
if not l.strip() or l.strip().startswith(';'):
continue
tok = l.split()[0]
valve_entries.append((idx, tok, l))
from collections import defaultdict
positions = defaultdict(list)
for ln, tok, l in valve_entries:
positions[tok].append(ln)
# find duplicates
dups = {tok:lns for tok,lns in positions.items() if len(lns)>1}
print('Found', sum(1 for _ in valve_entries), 'valve entries; duplicates:', len(dups))
replacements = [] # (line_index, old, new)
counter = 1
for tok, lns in dups.items():
# skip first occurrence, rename others
for occ_index, ln in enumerate(lns):
if occ_index == 0:
continue
# produce new name: prefix V if starts with digit
if re.fullmatch(r"\d+", tok) or re.match(r"^\d", tok):
base = 'V' + tok
else:
base = tok
new = f'{base}_{occ_index}'
# ensure uniqueness globally
while any(rn == new for _,_,rn in replacements) or any(new == t for t in positions.keys()):
counter += 1
new = f'{base}_{occ_index}_{counter}'
replacements.append((ln, tok, new))
# Apply replacements on the given absolute lines
for ln, old, new in replacements:
line = lines[ln]
# replace only first token occurrence
parts = line.split()
if parts:
# find start of token in line (preserve spacing)
m = re.search(re.escape(parts[0]), line)
if m:
startpos = m.start()
endpos = m.end()
newline = line[:startpos] + new + line[endpos:]
lines[ln] = newline
# write new file
out.write_text('\n'.join(lines) + '\n', encoding='utf-8')
# write mapping
with mapout.open('w', encoding='utf-8') as f:
for ln, old, new in replacements:
f.write(f'line {ln+1}: {old} -> {new}\n')
print('Wrote', out, 'with', len(replacements), 'replacements; mapping at', mapout)

View File

@@ -9,7 +9,7 @@ import subprocess
import logging
from typing import Any
sys.path.append("..")
from api import project
from api import project_backup
from api import inp_out
@@ -243,7 +243,7 @@ def dump_output_binary(path: str) -> str:
#DingZQ, 2025-02-04, 返回dict[str, Any]
def run_project_return_dict(name: str, readable_output: bool = False) -> dict[str, Any]:
if not project.have_project(name):
if not project_backup.have_project(name):
raise Exception(f'Not found project [{name}]')
dir = os.path.abspath(os.getcwd())
@@ -276,7 +276,7 @@ def run_project_return_dict(name: str, readable_output: bool = False) -> dict[st
# original code
def run_project(name: str, readable_output: bool = False) -> str:
if not project.have_project(name):
if not project_backup.have_project(name):
raise Exception(f'Not found project [{name}]')
dir = os.path.abspath(os.getcwd())

Binary file not shown.

View File

@@ -0,0 +1,64 @@
#!/usr/bin/env python3
"""
Fix non-ASCII ID tokens in an EPANET .inp file by mapping each unique non-ASCII-containing token
to an ASCII-safe name. Outputs a new INP and a mapping file for review.
Usage: python fix_inp_nonascii.py input.inp [output.inp]
"""
import re
import sys
from pathlib import Path
if len(sys.argv) < 2:
print("Usage: python fix_inp_nonascii.py input.inp [output.inp]")
sys.exit(2)
src = Path(sys.argv[1])
if len(sys.argv) > 2:
dst = Path(sys.argv[2])
else:
dst = src.with_name(src.stem + '-ascii' + src.suffix)
text = src.read_text(encoding='utf-8')
# Find tokens that contain at least one non-ASCII char. Token = contiguous non-whitespace sequence
nonascii_tokens = set(re.findall(r"\S*[^\x00-\x7F]\S*", text))
if not nonascii_tokens:
print("No non-ASCII tokens found. Copying source to destination unchanged.")
dst.write_text(text, encoding='utf-8')
sys.exit(0)
used = set()
mapping = {}
counter = 1
# Sort tokens to get deterministic output
for t in sorted(nonascii_tokens):
# build ASCII prefix from characters that are safe (alnum, underscore, hyphen)
prefix = ''.join(ch for ch in t if ord(ch) < 128 and (ch.isalnum() or ch in '_-'))
if not prefix:
prefix = 'ID'
candidate = prefix
# ensure candidate is unique and not equal to original token
while candidate in used:
candidate = f"{prefix}_x{counter}"
counter += 1
# if candidate accidentally equals the original token (rare), force suffix
if candidate == t:
candidate = f"{prefix}_x{counter}"
counter += 1
mapping[t] = candidate
used.add(candidate)
# Replace occurrences safely using regex word boundary style (escape token)
new_text = text
for src_token, dst_token in mapping.items():
# replace exact matches (no partial). Use lookarounds: not part of larger non-whitespace.
pattern = re.escape(src_token)
new_text = re.sub(pattern, dst_token, new_text)
# Write output files
dst.write_text(new_text, encoding='utf-8')
mapfile = dst.with_suffix(dst.suffix + '.mapping.txt')
with mapfile.open('w', encoding='utf-8') as f:
for k, v in mapping.items():
f.write(f"{k} -> {v}\n")
print(f"Wrote: {dst}\nMapping: {mapfile}\nReplaced {len(mapping)} non-ASCII tokens.")

144
epanet/fix_valve_ids.py Normal file
View File

@@ -0,0 +1,144 @@
#!/usr/bin/env python3
import re
from pathlib import Path
inp = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii.inp")
mapf = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii.inp.mapping.txt")
out = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii-fixed.inp")
outmap = out.with_suffix(out.suffix + '.mapping.txt')
text = inp.read_text(encoding='utf-8')
# parse mapping file (original -> mapped)
map_original_to_mapped = {}
if mapf.exists():
for line in mapf.read_text(encoding='utf-8').splitlines():
if '->' in line:
a,b = line.split('->',1)
map_original_to_mapped[a.strip()] = b.strip()
# find [VALVES] block
m = re.search(r"(?mi)^\[VALVES\]\s*(?:;.*\n)?(.*?)(?=^\[|\Z)", text, flags=re.S|re.M)
if not m:
print('No [VALVES] section found')
raise SystemExit(1)
block = m.group(1)
# extract IDs (first non-empty token at start of each non-comment line)
ids = []
line_offsets = []
lines = block.splitlines()
for i,l in enumerate(lines):
if not l.strip() or l.strip().startswith(';'):
continue
# split by whitespace
toks = l.split()
if toks:
ids.append(toks[0])
line_offsets.append((i, l))
# find duplicates
from collections import defaultdict
count = defaultdict(list)
for idx, token in enumerate(ids):
count[token].append(idx)
dups = {k:v for k,v in count.items() if len(v)>1}
print(f'Found {len(ids)} valve IDs; {len(dups)} duplicates')
for k,v in list(dups.items())[:40]:
print(k, 'occurs', len(v), 'times')
# Also find mapped collisions: multiple originals mapped to same mapped token
mapped_rev = defaultdict(list)
for orig,mapped in map_original_to_mapped.items():
mapped_rev[mapped].append(orig)
collisions = {m:origlist for m,origlist in mapped_rev.items() if len(origlist)>1}
print('\nMapped collisions (same mapped token from multiple originals):', len(collisions))
for m,ol in list(collisions.items())[:40]:
print(m, ' <- ', ol[:5])
# We'll fix any ID that is purely digits, or any duplicate ID in the valves block.
fixed_map = {} # oldToken -> newToken
used = set(ids) # existing tokens in valves
suffix_counter = 1
for token, positions in dups.items():
# choose new unique names for subsequent occurrences (leave first occurrence as-is)
for pos_index, occ in enumerate(positions):
if pos_index == 0:
continue
base = token
# if base is all digits or starts with digit, prefix with VAL_
if re.fullmatch(r"\d+", base) or re.match(r"^\d", base):
candidate = f'VAL_{base}'
else:
candidate = f'{base}_dup'
# ensure uniqueness
while candidate in used:
candidate = f'{candidate}_{suffix_counter}'
suffix_counter += 1
used.add(candidate)
fixed_map[token + f'__occ{pos_index}'] = candidate
# The above approach requires us to identify which exact occurrence to replace. We'll instead build a replacement pass that replaces only the Nth occurrence.
# Build per-token occurrence numbers to replace subsequent ones.
occ_to_new = {} # (token, occ_index) -> newname
for token, positions in dups.items():
for pos_index, occ in enumerate(positions):
if pos_index == 0:
continue
if re.fullmatch(r"\d+", token) or re.match(r"^\d", token):
candidate = f'VAL_{token}'
else:
candidate = f'{token}_dup'
while candidate in used:
candidate = f'{candidate}_{suffix_counter}'
suffix_counter += 1
used.add(candidate)
occ_to_new[(token, pos_index)] = candidate
# Now construct new block replacing the Nth occurrence of duplicates token
new_lines = []
occ_seen = defaultdict(int)
for l in lines:
if not l.strip() or l.strip().startswith(';'):
new_lines.append(l)
continue
toks = l.split()
token = toks[0]
occ_seen[token] += 1
occ_idx = occ_seen[token]-1
if (token, occ_idx) in occ_to_new:
new_token = occ_to_new[(token, occ_idx)]
# replace only the first token in the line
rest = l[len(l.lstrip()):]
# reconstruct preserving leading whitespace
leading = l[:len(l)-len(l.lstrip())]
# find start index of token in line
m2 = re.match(r"(\s*)" + re.escape(token), l)
if m2:
leading = m2.group(1)
new_line = leading + new_token + l[m2.end():]
new_lines.append(new_line)
# record mapping for global replacement
fixed_map[token + f'__occ{occ_idx}'] = new_token
else:
new_lines.append(l)
# write new file by replacing block
new_block = '\n'.join(new_lines) + '\n'
new_text = text[:m.start(1)] + new_block + text[m.end(1):]
out.write_text(new_text, encoding='utf-8')
# Create an updated mapping file: show which tokens were changed and why
with outmap.open('w', encoding='utf-8') as f:
f.write('Changes applied to fix duplicate valve IDs:\n')
for k,v in occ_to_new.items():
token, occ = k
f.write(f'{token} occurrence {occ} -> {v}\n')
f.write('\nNote: These replacements are only for valve ID occurrences beyond the first.\n')
print('Wrote', out, 'and mapping', outmap)
print('Replacements:', len(occ_to_new))
print('If you want different naming (e.g. prefix with V_), rerun with that preference.')

65
epanet/fix_valve_ids2.py Normal file
View File

@@ -0,0 +1,65 @@
#!/usr/bin/env python3
from pathlib import Path
import re
inp = Path(r"d:\TJWaterServer\epanet\szhskeleton-patternfixed-ascii.inp")
text = inp.read_text(encoding='utf-8')
lines = text.splitlines()
start = None
for i,l in enumerate(lines):
if l.strip().upper() == '[VALVES]':
start = i
break
if start is None:
print('No [VALVES] section found')
raise SystemExit(1)
# collect until next section header or EOF
end = len(lines)
for j in range(start+1, len(lines)):
if re.match(r"^\s*\[.+\]", lines[j]):
end = j
break
block_lines = lines[start+1:end]
ids = []
for idx,l in enumerate(block_lines, start=start+1):
if not l.strip() or l.strip().startswith(';'):
continue
# first token
tok = l.split()[0]
ids.append((idx, tok, l))
from collections import defaultdict
count = defaultdict(list)
for ln, tok, l in ids:
count[tok].append(ln)
dups = {k:v for k,v in count.items() if len(v)>1}
print('Total valve entries found:', len(ids))
print('Duplicate token count:', len(dups))
if dups:
print('\nSample duplicates:')
for k,v in list(dups.items())[:20]:
print(k, 'lines:', v)
# show whether tokens are purely digits
num_only = [tok for ln,tok,l in ids if re.fullmatch(r'\d+', tok)]
print('\nNumeric-only valve IDs count:', len(num_only))
# show examples of numeric-only
if num_only:
print('Examples:', num_only[:20])
# write a short report
rep = inp.with_name(inp.stem + '-valves-report.txt')
with rep.open('w', encoding='utf-8') as f:
f.write(f'Total valve entries: {len(ids)}\n')
f.write(f'Duplicate tokens: {len(dups)}\n')
for k,v in dups.items():
f.write(f'{k}: lines {v}\n')
f.write('\nNumeric-only tokens:\n')
for tok in sorted(set(num_only)):
f.write(tok + '\n')
print('Wrote report to', rep)

Binary file not shown.