diff --git a/convert.py b/convert.py index ba8816b..f5e9b2f 100755 --- a/convert.py +++ b/convert.py @@ -35,7 +35,8 @@ def find_sf_files(model_dir): return [single] index = os.path.join(model_dir, "model.safetensors.index.json") if os.path.exists(index): - idx = json.load(open(index)) + with open(index, "r", encoding="utf-8") as f: + idx = json.load(f) shards = sorted(set(idx["weight_map"].values())) return [os.path.join(model_dir, s) for s in shards] diffusers = os.path.join(model_dir, "diffusion_pytorch_model.safetensors") @@ -108,45 +109,44 @@ def add_metadata(w, cfg, model_type): w.add_string("acestep.config_json", json.dumps(cfg, separators=(",", ":"))) # Tensor packing from safetensors -def add_tensors_from_sf(w, sf_path, tag): +def add_tensors_from_sf(w, sf_path, tag, model_type): meta, hdr_size = read_sf_header(sf_path) names = sorted(meta.keys()) - f = open(sf_path, "rb") - count = 0 - total = 0 - - for name in names: - info = meta[name] - - # normalize: some upstream checkpoints omit the "model." prefix - if not name.startswith("model."): - name = "model." + name - - dtype_str = info["dtype"] - shape = info["shape"] - off0, off1 = info["data_offsets"] - nbytes = off1 - off0 - - f.seek(hdr_size + off0) - raw = f.read(nbytes) - - if dtype_str == "BF16": - arr = np.frombuffer(raw, dtype=np.uint16).reshape(shape) - w.add_tensor(name, arr, raw_dtype=BF16) - elif dtype_str == "F16": - arr = np.frombuffer(raw, dtype=np.float16).reshape(shape) - w.add_tensor(name, arr) - elif dtype_str == "F32": - arr = np.frombuffer(raw, dtype=np.float32).reshape(shape) - w.add_tensor(name, arr) - else: - log(tag, " skip %s: dtype %s" % (name, dtype_str)) - continue + with open(sf_path, "rb") as f: + count = 0 + total = 0 + + for name in names: + info = meta[name] + + # normalize: some upstream checkpoints omit the "model." prefix + if model_type == "lm" and not name.startswith("model."): + name = "model." + name + + dtype_str = info["dtype"] + shape = info["shape"] + off0, off1 = info["data_offsets"] + nbytes = off1 - off0 + + f.seek(hdr_size + off0) + raw = f.read(nbytes) + + if dtype_str == "BF16": + arr = np.frombuffer(raw, dtype=np.uint16).reshape(shape) + w.add_tensor(name, arr, raw_dtype=BF16) + elif dtype_str == "F16": + arr = np.frombuffer(raw, dtype=np.float16).reshape(shape) + w.add_tensor(name, arr) + elif dtype_str == "F32": + arr = np.frombuffer(raw, dtype=np.float32).reshape(shape) + w.add_tensor(name, arr) + else: + log(tag, " skip %s: dtype %s" % (name, dtype_str)) + continue - count += 1 - total += nbytes + count += 1 + total += nbytes - f.close() return count, total # silence_latent.pt reader (replaces pt2bin C++ tool) @@ -171,13 +171,14 @@ def add_bpe_tokenizer(w, model_dir, tag): if not os.path.exists(vocab_path) or not os.path.exists(merges_path): return False - vocab = json.load(open(vocab_path)) + with open(vocab_path, "r", encoding="utf-8") as f: + vocab = json.load(f) tokens = [""] * len(vocab) for tok_str, tok_id in vocab.items(): if 0 <= tok_id < len(tokens): tokens[tok_id] = tok_str - with open(merges_path) as f: + with open(merges_path, "r", encoding="utf-8") as f: merges = [] for line in f: line = line.rstrip("\n\r") @@ -202,7 +203,8 @@ def convert_model(name, model_dir, output_path, model_type): log(tag, "skip %s: no config.json" % name) return False - cfg = json.load(open(cfg_path)) + with open(cfg_path, "r", encoding="utf-8") as f: + cfg = json.load(f) sf_files = find_sf_files(model_dir) if not sf_files: log(tag, "skip %s: no safetensors" % name) @@ -225,7 +227,7 @@ def convert_model(name, model_dir, output_path, model_type): n_tensors = 0 n_bytes = 0 for sf in sf_files: - c, b = add_tensors_from_sf(w, sf, tag) + c, b = add_tensors_from_sf(w, sf, tag, model_type) n_tensors += c n_bytes += b if len(sf_files) > 1: