fix bug and refine ,end of 2025

This commit is contained in:
xinzish
2025-12-31 16:11:28 +08:00
parent 38fb35a333
commit 32bbe3ddcd
23 changed files with 3180 additions and 68 deletions

View File

@@ -0,0 +1,64 @@
#!/usr/bin/env python3
"""
Fix non-ASCII ID tokens in an EPANET .inp file by mapping each unique non-ASCII-containing token
to an ASCII-safe name. Outputs a new INP and a mapping file for review.
Usage: python fix_inp_nonascii.py input.inp [output.inp]
"""
import re
import sys
from pathlib import Path
if len(sys.argv) < 2:
print("Usage: python fix_inp_nonascii.py input.inp [output.inp]")
sys.exit(2)
src = Path(sys.argv[1])
if len(sys.argv) > 2:
dst = Path(sys.argv[2])
else:
dst = src.with_name(src.stem + '-ascii' + src.suffix)
text = src.read_text(encoding='utf-8')
# Find tokens that contain at least one non-ASCII char. Token = contiguous non-whitespace sequence
nonascii_tokens = set(re.findall(r"\S*[^\x00-\x7F]\S*", text))
if not nonascii_tokens:
print("No non-ASCII tokens found. Copying source to destination unchanged.")
dst.write_text(text, encoding='utf-8')
sys.exit(0)
used = set()
mapping = {}
counter = 1
# Sort tokens to get deterministic output
for t in sorted(nonascii_tokens):
# build ASCII prefix from characters that are safe (alnum, underscore, hyphen)
prefix = ''.join(ch for ch in t if ord(ch) < 128 and (ch.isalnum() or ch in '_-'))
if not prefix:
prefix = 'ID'
candidate = prefix
# ensure candidate is unique and not equal to original token
while candidate in used:
candidate = f"{prefix}_x{counter}"
counter += 1
# if candidate accidentally equals the original token (rare), force suffix
if candidate == t:
candidate = f"{prefix}_x{counter}"
counter += 1
mapping[t] = candidate
used.add(candidate)
# Replace occurrences safely using regex word boundary style (escape token)
new_text = text
for src_token, dst_token in mapping.items():
# replace exact matches (no partial). Use lookarounds: not part of larger non-whitespace.
pattern = re.escape(src_token)
new_text = re.sub(pattern, dst_token, new_text)
# Write output files
dst.write_text(new_text, encoding='utf-8')
mapfile = dst.with_suffix(dst.suffix + '.mapping.txt')
with mapfile.open('w', encoding='utf-8') as f:
for k, v in mapping.items():
f.write(f"{k} -> {v}\n")
print(f"Wrote: {dst}\nMapping: {mapfile}\nReplaced {len(mapping)} non-ASCII tokens.")