fix bug and refine ,end of 2025

2025-12-31 16:11:28 +08:00
parent 38fb35a333
commit 32bbe3ddcd
23 changed files with 3180 additions and 68 deletions
--- a/epanet/fix_inp_nonascii.py
+++ b/epanet/fix_inp_nonascii.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+"""
+Fix non-ASCII ID tokens in an EPANET .inp file by mapping each unique non-ASCII-containing token
+to an ASCII-safe name. Outputs a new INP and a mapping file for review.
+Usage: python fix_inp_nonascii.py input.inp [output.inp]
+"""
+import re
+import sys
+from pathlib import Path
+
+if len(sys.argv) < 2:
+    print("Usage: python fix_inp_nonascii.py input.inp [output.inp]")
+    sys.exit(2)
+
+src = Path(sys.argv[1])
+if len(sys.argv) > 2:
+    dst = Path(sys.argv[2])
+else:
+    dst = src.with_name(src.stem + '-ascii' + src.suffix)
+
+text = src.read_text(encoding='utf-8')
+# Find tokens that contain at least one non-ASCII char. Token = contiguous non-whitespace sequence
+nonascii_tokens = set(re.findall(r"\S*[^\x00-\x7F]\S*", text))
+if not nonascii_tokens:
+    print("No non-ASCII tokens found. Copying source to destination unchanged.")
+    dst.write_text(text, encoding='utf-8')
+    sys.exit(0)
+
+used = set()
+mapping = {}
+counter = 1
+# Sort tokens to get deterministic output
+for t in sorted(nonascii_tokens):
+    # build ASCII prefix from characters that are safe (alnum, underscore, hyphen)
+    prefix = ''.join(ch for ch in t if ord(ch) < 128 and (ch.isalnum() or ch in '_-'))
+    if not prefix:
+        prefix = 'ID'
+    candidate = prefix
+    # ensure candidate is unique and not equal to original token
+    while candidate in used:
+        candidate = f"{prefix}_x{counter}"
+        counter += 1
+    # if candidate accidentally equals the original token (rare), force suffix
+    if candidate == t:
+        candidate = f"{prefix}_x{counter}"
+        counter += 1
+    mapping[t] = candidate
+    used.add(candidate)
+
+# Replace occurrences safely using regex word boundary style (escape token)
+new_text = text
+for src_token, dst_token in mapping.items():
+    # replace exact matches (no partial). Use lookarounds: not part of larger non-whitespace.
+    pattern = re.escape(src_token)
+    new_text = re.sub(pattern, dst_token, new_text)
+
+# Write output files
+dst.write_text(new_text, encoding='utf-8')
+mapfile = dst.with_suffix(dst.suffix + '.mapping.txt')
+with mapfile.open('w', encoding='utf-8') as f:
+    for k, v in mapping.items():
+        f.write(f"{k} -> {v}\n")
+
+print(f"Wrote: {dst}\nMapping: {mapfile}\nReplaced {len(mapping)} non-ASCII tokens.")