Compare commits
3 Commits
5578b84fd8
..
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 7de23ac5be | |||
| c480902306 | |||
| bee1ed65a4 |
Binary file not shown.
@@ -1,260 +0,0 @@
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import statistics
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
|
||||
ort.set_default_logger_severity(3)
|
||||
|
||||
NP = {
|
||||
"tensor(float)": np.float32, "tensor(float16)": np.float16, "tensor(double)": np.float64,
|
||||
"tensor(int64)": np.int64, "tensor(int32)": np.int32, "tensor(int8)": np.int8,
|
||||
"tensor(uint8)": np.uint8, "tensor(bool)": np.bool_,
|
||||
}
|
||||
TAG = {"tensor(float)": "f32", "tensor(float16)": "f16", "tensor(double)": "f64",
|
||||
"tensor(int64)": "i64", "tensor(int32)": "i32", "tensor(int8)": "i8",
|
||||
"tensor(uint8)": "u8", "tensor(bool)": "b"}
|
||||
GRAPHS = ["ssl", "encode", "decode", "global"]
|
||||
|
||||
|
||||
def cpu_info():
|
||||
info = {"cpu": platform_cpu(), "logical": os.cpu_count(), "phys": "?", "isa": {}}
|
||||
try:
|
||||
txt = Path("/proc/cpuinfo").read_text()
|
||||
for l in txt.splitlines():
|
||||
if l.startswith("model name"):
|
||||
info["cpu"] = l.split(":", 1)[1].strip(); break
|
||||
flags = next((l for l in txt.splitlines() if l.startswith("flags")), "")
|
||||
cc = next((l for l in txt.splitlines() if l.startswith("cpu cores")), "")
|
||||
if cc:
|
||||
info["phys"] = cc.split(":")[1].strip()
|
||||
info["isa"] = {k: int(k in flags) for k in
|
||||
["avx2", "avx512f", "avx_vnni", "avx512_vnni", "amx_int8"]}
|
||||
except Exception:
|
||||
pass
|
||||
return info
|
||||
|
||||
|
||||
def platform_cpu():
|
||||
import platform
|
||||
return platform.processor() or platform.machine()
|
||||
|
||||
|
||||
def make_session(path, provider, intra, inter, profile=False):
|
||||
so = ort.SessionOptions()
|
||||
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
if intra:
|
||||
so.intra_op_num_threads = intra
|
||||
if inter:
|
||||
so.inter_op_num_threads = inter
|
||||
so.enable_profiling = profile
|
||||
if provider == "openvino":
|
||||
providers = [("OpenVINOExecutionProvider", {"device_type": "CPU"}), "CPUExecutionProvider"]
|
||||
else:
|
||||
providers = ["CPUExecutionProvider"]
|
||||
return ort.InferenceSession(str(path), sess_options=so, providers=providers)
|
||||
|
||||
|
||||
def dim_value(name, axis, ndim, meta, seq):
|
||||
n = name.lower()
|
||||
if axis == 0 and ndim >= 2:
|
||||
return 1
|
||||
if "audio" in n:
|
||||
return int(meta.get("ssl_in_16k", seq))
|
||||
if "local" in n or ("ssl_features" in n and "global" not in n):
|
||||
return int(meta.get("enc_tokens", 1) * meta.get("downsample_factor", 1)) or seq
|
||||
if "token" in n or "indices" in n:
|
||||
return int(meta.get("dec_tokens", seq))
|
||||
return seq
|
||||
|
||||
|
||||
def resolve_inputs(sess, meta, seq, rng):
|
||||
feeds, shapes = {}, {}
|
||||
for inp in sess.get_inputs():
|
||||
dt = NP.get(inp.type, np.float32)
|
||||
shape = [d if isinstance(d, int) and d > 0
|
||||
else dim_value(inp.name, ax, len(inp.shape), meta, seq)
|
||||
for ax, d in enumerate(inp.shape)]
|
||||
shapes[inp.name] = (shape, TAG.get(inp.type, "?"))
|
||||
n = inp.name.lower()
|
||||
if np.issubdtype(dt, np.integer):
|
||||
feeds[inp.name] = np.zeros(shape, dtype=dt)
|
||||
elif dt == np.bool_:
|
||||
feeds[inp.name] = np.ones(shape, dtype=dt)
|
||||
else:
|
||||
a = rng.standard_normal(shape).astype(dt)
|
||||
if "std" in n:
|
||||
a = np.abs(a) + 1.0
|
||||
elif "mean" in n:
|
||||
a *= 0.0
|
||||
elif "audio" in n:
|
||||
a *= 0.1
|
||||
feeds[inp.name] = a
|
||||
return feeds, shapes
|
||||
|
||||
|
||||
def bench(sess, feeds, runs, warmup):
|
||||
out = [o.name for o in sess.get_outputs()]
|
||||
for _ in range(warmup):
|
||||
sess.run(out, feeds)
|
||||
ts = []
|
||||
for _ in range(runs):
|
||||
t = time.perf_counter()
|
||||
sess.run(out, feeds)
|
||||
ts.append((time.perf_counter() - t) * 1e3)
|
||||
return ts, out
|
||||
|
||||
|
||||
def profile_ops(path, provider, intra, inter, feeds, out, runs):
|
||||
sess = make_session(path, provider, intra, inter, profile=True)
|
||||
for _ in range(runs):
|
||||
sess.run(out, feeds)
|
||||
prof = Path(sess.end_profiling())
|
||||
events = json.loads(prof.read_text())
|
||||
prof.unlink(missing_ok=True)
|
||||
agg, prov = {}, {}
|
||||
for e in events:
|
||||
if e.get("cat") == "Node" and e.get("name", "").endswith("kernel_time"):
|
||||
op = e.get("args", {}).get("op_name", "?")
|
||||
agg[op] = agg.get(op, 0.0) + e.get("dur", 0)
|
||||
p = e.get("args", {}).get("provider", "")
|
||||
if p:
|
||||
prov.setdefault(op, set()).add(p)
|
||||
rows = sorted(agg.items(), key=lambda kv: kv[1], reverse=True)
|
||||
return rows, (sum(agg.values()) or 1.0), prov
|
||||
|
||||
|
||||
def static_ops(path):
|
||||
try:
|
||||
import onnx
|
||||
except Exception:
|
||||
return None
|
||||
m = onnx.load(str(path), load_external_data=False)
|
||||
c = {}
|
||||
for node in m.graph.node:
|
||||
c[node.op_type] = c.get(node.op_type, 0) + 1
|
||||
return dict(sorted(c.items(), key=lambda kv: kv[1], reverse=True))
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--dir", default="outputs")
|
||||
ap.add_argument("--meta")
|
||||
ap.add_argument("--provider", choices=["cpu", "openvino"], default="cpu")
|
||||
ap.add_argument("--intra", type=int, default=0)
|
||||
ap.add_argument("--inter", type=int, default=0)
|
||||
ap.add_argument("--runs", type=int, default=50)
|
||||
ap.add_argument("--warmup", type=int, default=5)
|
||||
ap.add_argument("--seq", type=int, default=100)
|
||||
ap.add_argument("--extra", nargs="*", default=[])
|
||||
ap.add_argument("--quant", action="store_true")
|
||||
args = ap.parse_args()
|
||||
|
||||
d = Path(args.dir)
|
||||
meta = json.loads(Path(args.meta or d / "meta.json").read_text())
|
||||
rng = np.random.default_rng(0)
|
||||
avail = ort.get_available_providers()
|
||||
prov = args.provider
|
||||
if prov == "openvino" and "OpenVINOExecutionProvider" not in avail:
|
||||
prov = "cpu"
|
||||
ov_note = "requested but NOT installed -> fell back to cpu"
|
||||
else:
|
||||
ov_note = "available" if "OpenVINOExecutionProvider" in avail else "not installed"
|
||||
|
||||
models = {g: d / f"{g}.onnx" for g in GRAPHS if (d / f"{g}.onnx").exists()}
|
||||
if args.quant:
|
||||
for g in GRAPHS:
|
||||
q = d / f"{g}_quant.onnx"
|
||||
if q.exists():
|
||||
models[f"{g}_q"] = q
|
||||
for kv in args.extra:
|
||||
name, _, path = kv.partition("=")
|
||||
models[name] = Path(path)
|
||||
|
||||
ci = cpu_info()
|
||||
isa = " ".join(f"{k}={v}" for k, v in ci["isa"].items())
|
||||
print("=== ENV ===")
|
||||
print(f"cpu: {ci['cpu']}")
|
||||
print(f"cores: {ci['phys']} phys / {ci['logical']} logical")
|
||||
print(f"isa: {isa}")
|
||||
print(f"onnxruntime: {ort.__version__}")
|
||||
print(f"providers avail: {avail}")
|
||||
print(f"openvino EP: {ov_note}")
|
||||
print(f"config: provider={prov} intra={args.intra or 'default'} "
|
||||
f"inter={args.inter or 'default'} runs={args.runs}")
|
||||
|
||||
med = {}
|
||||
csv_rows = [("graph", "med_ms", "mean_ms", "p90_ms", "min_ms", "runs")]
|
||||
op_rows = [("graph", "op", "ms_per_run", "pct", "provider")]
|
||||
for name, path in models.items():
|
||||
print(f"\n=== {name.upper()} ===")
|
||||
print(f"path: {path} size: {path.stat().st_size / 1e6:.3g} MB")
|
||||
try:
|
||||
sess = make_session(path, prov, args.intra, args.inter)
|
||||
feeds, shapes = resolve_inputs(sess, meta, args.seq, rng)
|
||||
print("inputs: " + " ".join(
|
||||
f"{k}[{','.join(map(str, s))}]{t}" for k, (s, t) in shapes.items()))
|
||||
ts, out = bench(sess, feeds, args.runs, args.warmup)
|
||||
m = statistics.median(ts)
|
||||
med[name] = m
|
||||
p90 = sorted(ts)[int(0.9 * len(ts)) - 1]
|
||||
print(f"latency ms: med {m:.3g} mean {statistics.fmean(ts):.3g} "
|
||||
f"p90 {p90:.3g} min {min(ts):.3g}")
|
||||
csv_rows.append((name, f"{m:.3g}", f"{statistics.fmean(ts):.3g}",
|
||||
f"{p90:.3g}", f"{min(ts):.3g}", args.runs))
|
||||
|
||||
so = static_ops(path)
|
||||
if so:
|
||||
print("ops static: " + " ".join(f"{k}:{v}" for k, v in list(so.items())[:10]))
|
||||
|
||||
rows, total, pmap = profile_ops(path, prov, args.intra, args.inter, feeds, out, args.warmup or 5)
|
||||
multi = len({p for ps in pmap.values() for p in ps}) > 1
|
||||
parts = []
|
||||
for op, dur in rows[:6]:
|
||||
pr = "/".join(sorted(x.replace("ExecutionProvider", "") for x in pmap.get(op, [])))
|
||||
tag = f"({pr})" if multi else ""
|
||||
parts.append(f"{op}{tag} {dur / (args.warmup or 5) / 1e3:.3g}ms {100 * dur / total:.0f}%")
|
||||
op_rows.append((name, op, f"{dur / (args.warmup or 5) / 1e3:.3g}",
|
||||
f"{100 * dur / total:.0f}", pr or "CPU"))
|
||||
print("ops time: " + " | ".join(parts))
|
||||
except Exception as e:
|
||||
print(f"FAILED: {e}")
|
||||
|
||||
print("\n=== ROLLUP ===")
|
||||
ds = meta.get("downsample_factor", 1)
|
||||
tok16 = ds * meta.get("wavlm_hop", 1)
|
||||
sr16 = meta.get("ssl_sample_rate", 16000)
|
||||
chunk = meta.get("chunk", 1)
|
||||
audio_s = chunk * tok16 / sr16
|
||||
per_win = sum(med.get(g, 0.0) for g in ("ssl", "encode", "decode"))
|
||||
print(f"chunk={chunk} tok16={tok16} audio/window={audio_s * 1e3:.3g}ms")
|
||||
print(f"per-window compute (ssl+encode+decode): {per_win:.3g}ms")
|
||||
if audio_s > 0:
|
||||
print(f"est streaming RTF: {(per_win / 1e3) / audio_s:.3g} (global enc one-shot, excluded)")
|
||||
|
||||
if args.quant:
|
||||
print("fp32 -> quant:")
|
||||
for g in ("ssl", "encode", "decode", "global"):
|
||||
if g in med and f"{g}_q" in med:
|
||||
f0, f1 = med[g], med[f"{g}_q"]
|
||||
print(f" {g}: {f0:.3g} -> {f1:.3g}ms ({100 * (1 - f1 / f0):+.0f}%)")
|
||||
per_q = sum(med.get(f"{g}_q", med.get(g, 0.0)) for g in ("ssl", "encode", "decode"))
|
||||
if audio_s > 0:
|
||||
print(f"per-window quant: {per_q:.3g}ms RTF {(per_q / 1e3) / audio_s:.3g}")
|
||||
|
||||
od = Path("outputs")
|
||||
od.mkdir(exist_ok=True)
|
||||
import csv
|
||||
with open(od / "bench.csv", "w", newline="") as f:
|
||||
csv.writer(f).writerows(csv_rows)
|
||||
with open(od / "ops.csv", "w", newline="") as f:
|
||||
csv.writer(f).writerows(op_rows)
|
||||
print(f"\nwrote {od/'bench.csv'} {od/'ops.csv'}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
+3
-3
@@ -416,10 +416,10 @@ if __name__ == "__main__":
|
||||
p.add_argument("--weights")
|
||||
p.add_argument("--out-dir", default="outputs")
|
||||
p.add_argument("--chunk", type=int, default=6)
|
||||
p.add_argument("--enc-left", type=int, default=48)
|
||||
p.add_argument("--enc-right", type=int, default=2)
|
||||
p.add_argument("--enc-left", type=int, default=32)
|
||||
p.add_argument("--enc-right", type=int, default=4)
|
||||
p.add_argument("--dec-left", type=int, default=32)
|
||||
p.add_argument("--dec-right", type=int, default=3)
|
||||
p.add_argument("--dec-right", type=int, default=4)
|
||||
p.add_argument("--mode", choices=["all", "ssl", "encode", "decode", "global"], default="all")
|
||||
args = p.parse_args()
|
||||
|
||||
|
||||
@@ -0,0 +1,413 @@
|
||||
import argparse
|
||||
import math
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import sounddevice as sd
|
||||
import soundfile as sf
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
import torchaudio
|
||||
|
||||
from miocodec.model import MioCodecModel
|
||||
|
||||
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
import gc
|
||||
|
||||
class StreamingISTFT:
|
||||
def __init__(self, n_fft, hop, device):
|
||||
self.n_fft = n_fft
|
||||
self.win = n_fft
|
||||
self.hop = hop
|
||||
self.pad = (self.win - hop) // 2
|
||||
self.window = torch.hann_window(self.win, device=device)
|
||||
self.win_sq = (self.window**2).view(1, -1, 1)
|
||||
self.carry = self.win - self.hop
|
||||
self.tail_y = torch.zeros(1, 0, device=device)
|
||||
self.tail_e = torch.zeros(1, 0, device=device)
|
||||
self.started = False
|
||||
|
||||
def reset(self):
|
||||
self.tail_y = self.tail_y[:, :0]
|
||||
self.tail_e = self.tail_e[:, :0]
|
||||
self.started = False
|
||||
|
||||
def process(self, spec):
|
||||
T = spec.shape[-1]
|
||||
ifft = torch.fft.irfft(spec, self.n_fft, dim=1, norm="backward") * self.window.view(1, -1, 1)
|
||||
region = (T - 1) * self.hop + self.win
|
||||
y = F.fold(ifft, (1, region), (1, self.win), stride=(1, self.hop))[:, 0, 0, :]
|
||||
e = F.fold(self.win_sq.expand(1, self.win, T), (1, region), (1, self.win), stride=(1, self.hop))[:, 0, 0, :]
|
||||
tl = self.tail_y.shape[-1]
|
||||
if tl:
|
||||
y[:, :tl] += self.tail_y
|
||||
e[:, :tl] += self.tail_e
|
||||
emit = region - self.carry
|
||||
out = y[:, :emit] / e[:, :emit].clamp(min=1e-8)
|
||||
self.tail_y = y[:, emit:].clone()
|
||||
self.tail_e = e[:, emit:].clone()
|
||||
if not self.started:
|
||||
out = out[:, self.pad:]
|
||||
self.started = True
|
||||
return out.squeeze(0)
|
||||
|
||||
|
||||
class StreamingVC:
|
||||
def __init__(self, model, device, *, chunk=6, enc_left=48, enc_right=2,
|
||||
dec_left=32, dec_right=3, ema_alpha=0.9):
|
||||
self.m = model.to(device).eval()
|
||||
self.dev = device
|
||||
|
||||
c = model.config
|
||||
ssl_fps = self.m.ssl_feature_extractor.ssl_sample_rate // self.m.ssl_feature_extractor.hop_size
|
||||
self.token_hz = ssl_fps // c.downsample_factor
|
||||
self.sr = c.sample_rate
|
||||
self.tok_samples = self.sr // self.token_hz
|
||||
ups_total = self.m.wave_upsampler.total_upsample_factor
|
||||
self.frames_per_tok = c.wave_upsample_factor * ups_total
|
||||
assert self.frames_per_tok * c.hop_length == self.tok_samples, "token/frame/sample ratios disagree"
|
||||
|
||||
self.chunk = chunk
|
||||
self.enc_left, self.enc_right = enc_left, enc_right
|
||||
self.dec_left, self.dec_right = dec_left, dec_right
|
||||
self.local_layers = list(self.m.local_ssl_layers)
|
||||
|
||||
self.istft = StreamingISTFT(c.n_fft, c.hop_length, device)
|
||||
self.global_emb = None
|
||||
self.src_mean = self.src_std = None
|
||||
self.tokens = None
|
||||
self.decoded = 0
|
||||
|
||||
self.ema_alpha = ema_alpha
|
||||
self.prev_local_feats = None
|
||||
|
||||
def _raw_local(self, audio):
|
||||
feats = self.m.ssl_feature_extractor(audio.to(self.dev))
|
||||
sel = [feats[i - 1] for i in self.local_layers]
|
||||
return torch.stack(sel, 0).mean(0) if len(sel) > 1 else sel[0]
|
||||
|
||||
def apply_ema(self, local_feats):
|
||||
if self.prev_local_feats is not None and local_feats.shape == self.prev_local_feats.shape:
|
||||
local_feats = self.ema_alpha * local_feats + (1.0 - self.ema_alpha) * self.prev_local_feats
|
||||
self.prev_local_feats = local_feats.clone()
|
||||
return local_feats
|
||||
|
||||
@torch.inference_mode()
|
||||
def set_target(self, ref_audio):
|
||||
feats = self.m.encode(ref_audio.to(self.dev), return_content=False, return_global=True)
|
||||
self.global_emb = feats.global_embedding.view(1, -1)
|
||||
|
||||
def _encode_features(self, loc):
|
||||
loc_norm = (loc - self.src_mean) / (self.src_std + 1e-8)
|
||||
enc = self.m.local_encoder(loc_norm)
|
||||
enc = self.m.conv_downsample(enc.transpose(1, 2)).transpose(1, 2)
|
||||
_, idx = self.m.local_quantizer.encode(enc)
|
||||
return idx
|
||||
|
||||
@torch.inference_mode()
|
||||
def seed(self, seed_audio):
|
||||
self.reset()
|
||||
if seed_audio.dim() == 1:
|
||||
seed_audio = seed_audio.unsqueeze(0)
|
||||
|
||||
loc = self._raw_local(seed_audio)
|
||||
self.src_mean = loc.mean(dim=1, keepdim=True).clone()
|
||||
self.src_std = loc.std(dim=1, keepdim=True).clone()
|
||||
|
||||
idx = self._encode_features(loc)
|
||||
self.tokens = idx.clone()
|
||||
self.decoded = idx.shape[1]
|
||||
|
||||
def reset(self):
|
||||
self.istft.reset()
|
||||
self.tokens = None
|
||||
self.decoded = 0
|
||||
self.prev_local_feats = None
|
||||
|
||||
@torch.inference_mode()
|
||||
def _encode(self, window_audio):
|
||||
loc = self._raw_local(window_audio)
|
||||
loc = self.apply_ema(loc)
|
||||
return self._encode_features(loc)
|
||||
|
||||
@torch.inference_mode()
|
||||
def _wave_stages(self, tok_window):
|
||||
Tw = tok_window.shape[1]
|
||||
emb = self.m.local_quantizer.decode(tok_window)
|
||||
x = self.m.wave_prenet(emb)
|
||||
x = self.m.wave_conv_upsample(x.transpose(1, 2)).transpose(1, 2)
|
||||
x = F.interpolate(x.transpose(1, 2), size=2 * Tw, mode=self.m.config.wave_interpolation_mode).transpose(1, 2)
|
||||
x = self.m.wave_prior_net(x.transpose(1, 2)).transpose(1, 2)
|
||||
x = self.m.wave_decoder(x, condition=self.global_emb.unsqueeze(1))
|
||||
x = self.m.wave_post_net(x.transpose(1, 2)).transpose(1, 2)
|
||||
return self.m.wave_upsampler(x.transpose(1, 2))
|
||||
|
||||
@torch.inference_mode()
|
||||
def _decode(self, tok_window, keep_left, keep_n):
|
||||
x = self._wave_stages(tok_window)
|
||||
h = self.m.istft_head.out(x).transpose(1, 2)
|
||||
mag, phase = h.chunk(2, dim=1)
|
||||
mag = torch.exp(mag).clamp(max=1e2)
|
||||
spec = torch.complex(mag * torch.cos(phase), mag * torch.sin(phase))
|
||||
f0 = keep_left * self.frames_per_tok
|
||||
f1 = (keep_left + keep_n) * self.frames_per_tok
|
||||
return self.istft.process(spec[..., f0:f1])
|
||||
|
||||
def _commit_tokens(self, new_idx):
|
||||
self.tokens = new_idx if self.tokens is None else torch.cat([self.tokens, new_idx], dim=1)
|
||||
|
||||
def _drain(self, final=False):
|
||||
out = []
|
||||
committed = self.tokens.shape[1]
|
||||
while True:
|
||||
d0 = self.decoded
|
||||
avail = committed - d0
|
||||
if avail <= 0 or (not final and avail < self.chunk + self.dec_right):
|
||||
break
|
||||
keep_n = min(self.chunk, avail) if final else self.chunk
|
||||
left = min(self.dec_left, d0)
|
||||
right = min(self.dec_right, committed - (d0 + keep_n))
|
||||
win = self.tokens[:, d0 - left: d0 + keep_n + right]
|
||||
out.append(self._decode(win, left, keep_n))
|
||||
self.decoded += keep_n
|
||||
return torch.cat(out) if out else torch.zeros(0, device=self.dev)
|
||||
|
||||
|
||||
def list_devices():
|
||||
print(f"{'idx':>4} {'name':<50} {'in':>3} {'out':>3} {'sr':>7}")
|
||||
print("-" * 76)
|
||||
for i, d in enumerate(sd.query_devices()):
|
||||
print(f"{i:>4} {d['name']:<50} {d['max_input_channels']:>3} {d['max_output_channels']:>3} {int(d['default_samplerate']):>7}")
|
||||
|
||||
|
||||
def sync_time(fn):
|
||||
if DEVICE.type == "cuda":
|
||||
torch.cuda.synchronize()
|
||||
t0 = time.perf_counter()
|
||||
out = fn()
|
||||
if DEVICE.type == "cuda":
|
||||
torch.cuda.synchronize()
|
||||
return out, (time.perf_counter() - t0) * 1000
|
||||
|
||||
|
||||
def load_audio(path, target_sr):
|
||||
a, sr = sf.read(path, dtype="float32", always_2d=True)
|
||||
a = a.mean(axis=1)
|
||||
|
||||
if sr != target_sr:
|
||||
print(f"Resampling {path.name} from {sr} Hz to {target_sr} Hz...")
|
||||
tensor = torch.from_numpy(a)
|
||||
tensor = torchaudio.functional.resample(tensor, orig_freq=sr, new_freq=target_sr)
|
||||
else:
|
||||
tensor = torch.from_numpy(a)
|
||||
|
||||
p = torch.abs(tensor).max()
|
||||
return tensor / p if p > 1e-8 else tensor
|
||||
|
||||
|
||||
def main():
|
||||
gc.collect()
|
||||
gc.freeze()
|
||||
gc.disable()
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--list-devices", action="store_true")
|
||||
parser.add_argument("--input", type=int)
|
||||
parser.add_argument("--output", type=int)
|
||||
parser.add_argument("--target", type=Path, help="Target voice reference WAV")
|
||||
parser.add_argument("--seed-audio", type=Path, help="Seed speaker calibration WAV (optional)")
|
||||
parser.add_argument("--chunk", type=int, default=6)
|
||||
parser.add_argument("--enc-left", type=int, default=48)
|
||||
parser.add_argument("--enc-right", type=int, default=4)
|
||||
parser.add_argument("--dec-left", type=int, default=32)
|
||||
parser.add_argument("--dec-right", type=int, default=4)
|
||||
parser.add_argument("--ema-alpha", type=float, default=0.9,
|
||||
help="EMA smoothing on local SSL features (0=full smoothing, 1=no smoothing)")
|
||||
parser.add_argument("--rms-floor", type=float, default=0.0035,
|
||||
help="RMS threshold below which audio chunk is evaluated as silence")
|
||||
parser.add_argument("--hangover-chunks", type=int, default=5,
|
||||
help="Number of chunks to hold the gate open after RMS drop")
|
||||
parser.add_argument("--silence-fade-ms", type=float, default=10.0,
|
||||
help="Ramp-down duration in ms at silence boundary (0 to disable)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.list_devices:
|
||||
list_devices()
|
||||
return
|
||||
|
||||
if args.input is None or args.output is None:
|
||||
parser.error("--input and --output required")
|
||||
|
||||
model = MioCodecModel.from_pretrained("Aratako/MioCodec-25Hz-44.1kHz-v2")
|
||||
|
||||
vc = StreamingVC(
|
||||
model, DEVICE, chunk=args.chunk, enc_left=args.enc_left, enc_right=args.enc_right,
|
||||
dec_left=args.dec_left, dec_right=args.dec_right, ema_alpha=args.ema_alpha
|
||||
)
|
||||
|
||||
sr = vc.sr
|
||||
ts = vc.tok_samples
|
||||
chunk_samples = vc.chunk * ts
|
||||
left_pad = vc.enc_left * ts
|
||||
right_pad = vc.enc_right * ts
|
||||
budget_ms = (vc.chunk / vc.token_hz) * 1000
|
||||
fade_samples = int(args.silence_fade_ms * 0.001 * sr)
|
||||
|
||||
print(f"Sample Rate: {sr} Hz | Chunk: {args.chunk} tokens ({budget_ms:.1f}ms budget)")
|
||||
print(f"EMA alpha: {args.ema_alpha} | Silence fade: {args.silence_fade_ms:.0f}ms")
|
||||
|
||||
print(f"Loading target speaker profile: {args.target}...")
|
||||
target_audio = load_audio(args.target, sr)
|
||||
vc.set_target(target_audio)
|
||||
|
||||
in_info = sd.query_devices(args.input)
|
||||
n_in_ch = min(in_info["max_input_channels"], 2)
|
||||
|
||||
if args.seed_audio:
|
||||
print(f"Loading speaker calibration profile: {args.seed_audio}...")
|
||||
seed_audio = load_audio(args.seed_audio, sr)
|
||||
else:
|
||||
print("\n" + "=" * 60)
|
||||
print("No seed-audio provided. Recording 3 seconds for normalization calibration.")
|
||||
print("Please speak into your microphone...")
|
||||
print("=" * 60)
|
||||
recorded = sd.rec(int(3.0 * sr), samplerate=sr, channels=n_in_ch, dtype="float32")
|
||||
sd.wait()
|
||||
print("Recording complete. Calibrating feature scaling...")
|
||||
recorded_mono = recorded.mean(axis=1) if recorded.shape[1] > 1 else recorded[:, 0]
|
||||
seed_audio = torch.from_numpy(recorded_mono)
|
||||
|
||||
print("Seeding streaming context from speaker profile...")
|
||||
vc.seed(seed_audio)
|
||||
|
||||
if seed_audio.numel() >= left_pad:
|
||||
raw_input_accum = seed_audio[-left_pad:].numpy()
|
||||
else:
|
||||
raw_input_accum = np.pad(seed_audio.numpy(), (left_pad - seed_audio.numel(), 0))
|
||||
|
||||
in_q = queue.Queue(maxsize=8)
|
||||
out_q = queue.Queue(maxsize=2)
|
||||
stop_event = threading.Event()
|
||||
|
||||
def input_cb(indata, frames, time_info, status):
|
||||
if in_q.full():
|
||||
in_q.get_nowait()
|
||||
mono = indata.mean(axis=1) if indata.shape[1] > 1 else indata[:, 0]
|
||||
in_q.put_nowait(mono.copy())
|
||||
|
||||
def write_thread(out_stream):
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
pcm = out_q.get(timeout=0.5)
|
||||
out_stream.write(pcm)
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
print(f"\n{'chunk':>6} {'q_in':>4} {'q_out':>5} {'enc':>7} {'dec':>7} {'total':>7} {'budget':>7} {'gap':>7}")
|
||||
print("-" * 76)
|
||||
|
||||
chunk_n = 0
|
||||
t_last = None
|
||||
hangover_counter = 0
|
||||
|
||||
if fade_samples > 0:
|
||||
ramp_down = np.linspace(1.0, 0.0, fade_samples, dtype=np.float32)
|
||||
|
||||
with sd.InputStream(device=args.input, channels=n_in_ch, samplerate=sr,
|
||||
blocksize=chunk_samples, dtype="float32",
|
||||
callback=input_cb, latency="low"):
|
||||
with sd.OutputStream(device=args.output, channels=2, samplerate=sr,
|
||||
dtype="float32", latency="low") as out_stream:
|
||||
|
||||
writer = threading.Thread(target=write_thread, args=(out_stream,), daemon=True)
|
||||
writer.start()
|
||||
|
||||
try:
|
||||
while True:
|
||||
raw = in_q.get()
|
||||
t_now = time.perf_counter()
|
||||
gap_ms = (t_now - t_last) * 1000 if t_last else 0.0
|
||||
t_last = t_now
|
||||
|
||||
rms = float(np.sqrt(np.mean(raw ** 2)))
|
||||
|
||||
if rms >= args.rms_floor:
|
||||
hangover_counter = args.hangover_chunks
|
||||
is_silence = False
|
||||
else:
|
||||
if hangover_counter > 0:
|
||||
hangover_counter -= 1
|
||||
is_silence = False
|
||||
else:
|
||||
is_silence = True
|
||||
|
||||
raw_input_accum = np.concatenate([raw_input_accum, raw])
|
||||
required_samples = left_pad + chunk_samples + right_pad
|
||||
|
||||
if len(raw_input_accum) >= required_samples:
|
||||
window_np = raw_input_accum[:required_samples]
|
||||
raw_input_accum = raw_input_accum[chunk_samples:]
|
||||
|
||||
if is_silence:
|
||||
window_np = window_np.copy()
|
||||
active_start = left_pad
|
||||
active_end = left_pad + chunk_samples
|
||||
if fade_samples > 0:
|
||||
fade_end = active_start + fade_samples
|
||||
window_np[active_start:fade_end] *= ramp_down
|
||||
window_np[fade_end:active_end] = 0.0
|
||||
else:
|
||||
window_np[active_start:active_end] = 0.0
|
||||
|
||||
window_torch = torch.from_numpy(window_np).unsqueeze(0).to(DEVICE)
|
||||
|
||||
with torch.no_grad():
|
||||
idx, t_enc = sync_time(lambda: vc._encode(window_torch))
|
||||
chunk_tokens = idx[:, vc.enc_left : vc.enc_left + vc.chunk]
|
||||
vc._commit_tokens(chunk_tokens)
|
||||
audio_out, t_dec = sync_time(lambda: vc._drain(final=False))
|
||||
|
||||
if audio_out.numel() == 0:
|
||||
pcm_out = np.zeros((chunk_samples, 2), dtype=np.float32)
|
||||
else:
|
||||
pcm = audio_out.cpu().numpy()
|
||||
pcm = np.clip(pcm, -1.0, 1.0)
|
||||
pcm_out = np.stack([pcm, pcm], axis=1)
|
||||
else:
|
||||
pcm_out = np.zeros((chunk_samples, 2), dtype=np.float32)
|
||||
t_enc, t_dec = 0.0, 0.0
|
||||
|
||||
out_q.put(pcm_out)
|
||||
|
||||
total = t_enc + t_dec
|
||||
chunk_n += 1
|
||||
|
||||
if is_silence:
|
||||
print(
|
||||
f"{chunk_n:>6} {in_q.qsize():>4} {out_q.qsize():>5} "
|
||||
f"{'--silence--':>31} rms={rms:.4f}",
|
||||
flush=True,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"{chunk_n:>6} {in_q.qsize():>4} {out_q.qsize():>5} "
|
||||
f"{t_enc:>6.1f}ms {t_dec:>6.1f}ms "
|
||||
f"{total:>6.1f}ms {budget_ms:>6.0f}ms {gap_ms:>6.1f}ms",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
stop_event.set()
|
||||
writer.join()
|
||||
|
||||
print("stopped")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
+104
-89
@@ -53,7 +53,20 @@ class StreamingISTFT:
|
||||
self.win_sq = self.window ** 2
|
||||
self.tail_y = np.zeros(0, dtype=np.float32)
|
||||
self.tail_e = np.zeros(0, dtype=np.float32)
|
||||
self.started = False
|
||||
self.started = False\
|
||||
|
||||
def block(self, real, imag):
|
||||
spec = real + 1j * imag
|
||||
T = spec.shape[1]
|
||||
ifft = (np.fft.irfft(spec, self.n_fft, axis=0) * self.window[:, None]).astype(np.float32)
|
||||
region = (T - 1) * self.hop + self.win
|
||||
y = np.zeros(region, dtype=np.float32)
|
||||
e = np.zeros(region, dtype=np.float32)
|
||||
for t in range(T):
|
||||
s = t * self.hop
|
||||
y[s : s + self.win] += ifft[:, t]
|
||||
e[s : s + self.win] += self.win_sq
|
||||
return (y / np.maximum(e, 1e-8)).astype(np.float32)
|
||||
|
||||
def process(self, real, imag):
|
||||
spec = real + 1j * imag
|
||||
@@ -100,7 +113,7 @@ class StreamingVCONNX:
|
||||
|
||||
opts = ort.SessionOptions()
|
||||
opts.inter_op_num_threads = 1
|
||||
opts.intra_op_num_threads = 0
|
||||
opts.intra_op_num_threads = 4
|
||||
opts.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
||||
opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
|
||||
@@ -110,20 +123,23 @@ class StreamingVCONNX:
|
||||
prov = ["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
else:
|
||||
prov = ["CPUExecutionProvider"]
|
||||
|
||||
|
||||
self.ssl = ort.InferenceSession(args.ssl, sess_options=opts, providers=prov)
|
||||
self.enc = ort.InferenceSession(args.encode, sess_options=opts, providers=prov)
|
||||
self.dec = ort.InferenceSession(args.decode, sess_options=opts, providers=prov)
|
||||
self.glb = ort.InferenceSession(args.global_path, sess_options=opts, providers=prov)
|
||||
|
||||
self.istft = StreamingISTFT(meta["n_fft"], meta["hop_length"])
|
||||
self.xfade_frames = 9
|
||||
self.istft_margin = int(np.ceil(meta["n_fft"] / meta["hop_length"]))
|
||||
self.xfade_tail = None
|
||||
self.global_emb = None
|
||||
self.src_mean = None
|
||||
self.src_std = None
|
||||
self.tokens = None
|
||||
self.decoded = 0
|
||||
self.prev_local_feats = None
|
||||
self.ema_alpha = 0.4
|
||||
self.ema_alpha = 0.9
|
||||
|
||||
def _ssl(self, win16):
|
||||
w = take(win16, 0, self.ssl_in).reshape(1, -1)
|
||||
@@ -159,33 +175,46 @@ class StreamingVCONNX:
|
||||
frames = np.concatenate([l[c : c + keep * self.ds] for keep, l in locals_], axis=0)
|
||||
self.src_mean = frames.mean(axis=0).astype(np.float32)
|
||||
self.src_std = frames.std(axis=0, ddof=1).astype(np.float32)
|
||||
|
||||
|
||||
seed_tokens = np.concatenate(
|
||||
[self._encode(l, self.src_mean, self.src_std)[self.enc_left : self.enc_left + keep] for keep, l in locals_]
|
||||
) if locals_ else np.zeros(0, dtype=np.int64)
|
||||
|
||||
|
||||
self.tokens = seed_tokens.astype(np.int64)
|
||||
self.decoded = len(self.tokens)
|
||||
|
||||
def reset(self):
|
||||
self.istft = StreamingISTFT(self.meta["n_fft"], self.meta["hop_length"])
|
||||
self.xfade_tail = None
|
||||
self.tokens = None
|
||||
self.decoded = 0
|
||||
|
||||
def apply_ema(self, local_feats):
|
||||
if self.prev_local_feats is not None and local_feats.shape == self.prev_local_feats.shape:
|
||||
local_feats = self.ema_alpha * local_feats + (1.0 - self.ema_alpha) * self.prev_local_feats
|
||||
shift = self.chunk * self.ds
|
||||
if self.prev_local_feats is not None:
|
||||
n = local_feats.shape[0] - shift
|
||||
if n > 0:
|
||||
local_feats[:n] = (self.ema_alpha * local_feats[:n]
|
||||
+ (1 - self.ema_alpha) * self.prev_local_feats[shift:shift + n])
|
||||
self.prev_local_feats = local_feats.copy()
|
||||
return local_feats
|
||||
|
||||
def _decode(self, win_tokens, keep_left, keep_n):
|
||||
def _decode(self, win_tokens, keep_left, keep_n, right_tokens):
|
||||
real, imag = self.dec.run(
|
||||
["spec_real", "spec_imag"],
|
||||
{"content_token_indices": win_tokens, "global_embedding": self.global_emb}
|
||||
{"content_token_indices": win_tokens, "global_embedding": self.global_emb},
|
||||
)
|
||||
f0 = keep_left * self.fpt
|
||||
f1 = (keep_left + keep_n) * self.fpt
|
||||
return self.istft.process(real[:, f0:f1], imag[:, f0:f1])
|
||||
fpt, hop = self.fpt, self.istft.hop
|
||||
a = keep_left * fpt
|
||||
b = (keep_left + keep_n) * fpt
|
||||
right_frames = right_tokens * fpt
|
||||
ov = min(self.xfade_frames, max(0, right_frames))
|
||||
m = min(self.istft_margin, a, max(0, right_frames - ov))
|
||||
F0, F1 = a - m, b + ov + m
|
||||
audio = self.istft.block(real[:, F0:F1], imag[:, F0:F1])
|
||||
start = (a - F0) * hop
|
||||
seg = audio[start : start + (keep_n * fpt + ov) * hop]
|
||||
return seg, ov * hop
|
||||
|
||||
def _commit_tokens(self, new_idx):
|
||||
if self.tokens is None:
|
||||
@@ -195,6 +224,7 @@ class StreamingVCONNX:
|
||||
|
||||
def _drain(self, final=False):
|
||||
out = []
|
||||
hop = self.istft.hop
|
||||
committed = len(self.tokens) if self.tokens is not None else 0
|
||||
while True:
|
||||
d0 = self.decoded
|
||||
@@ -204,13 +234,23 @@ class StreamingVCONNX:
|
||||
keep_n = min(self.chunk, avail) if final else self.chunk
|
||||
left = min(self.dec_left, d0)
|
||||
right = min(self.dec_right, committed - (d0 + keep_n))
|
||||
|
||||
lo = d0 - left
|
||||
hi = d0 + keep_n + right
|
||||
win_idx = np.clip(np.arange(lo, hi), 0, committed - 1)
|
||||
win = self.tokens[win_idx].astype(np.int64)
|
||||
|
||||
out.append(self._decode(win, left, keep_n))
|
||||
lo, hi = d0 - left, d0 + keep_n + right
|
||||
win = self.tokens[np.clip(np.arange(lo, hi), 0, committed - 1)].astype(np.int64)
|
||||
|
||||
seg, h = self._decode(win, left, keep_n, right)
|
||||
body_end = keep_n * self.fpt * hop
|
||||
head, body, tail = seg[:h], seg[h:body_end], seg[body_end:]
|
||||
|
||||
if self.xfade_tail is not None and len(self.xfade_tail) == h and h > 0:
|
||||
t = np.linspace(0.0, 1.0, h, dtype=np.float32)
|
||||
out.append((1.0 - t) * self.xfade_tail + t * head)
|
||||
else:
|
||||
out.append(head)
|
||||
out.append(body)
|
||||
|
||||
self.xfade_tail = None if final else tail
|
||||
if final and tail.size:
|
||||
out.append(tail)
|
||||
self.decoded += keep_n
|
||||
return np.concatenate(out) if out else np.zeros(0, dtype=np.float32)
|
||||
|
||||
@@ -264,7 +304,7 @@ def main():
|
||||
|
||||
sr = vc.sr
|
||||
sr16 = vc.sr16
|
||||
|
||||
|
||||
token_hz = meta["token_hz"]
|
||||
tok_samples = sr // token_hz
|
||||
chunk_samples = vc.chunk * tok_samples
|
||||
@@ -274,6 +314,10 @@ def main():
|
||||
chunk_samples_16k = vc.chunk * tok16
|
||||
left_pad_16k = vc.enc_left * tok16
|
||||
right_pad_16k = vc.enc_right * tok16
|
||||
required_samples_16k = left_pad_16k + chunk_samples_16k + right_pad_16k
|
||||
|
||||
fade_len = int(0.01 * sr16)
|
||||
ramp_down = np.linspace(1.0, 0.0, fade_len, dtype=np.float32)
|
||||
|
||||
print(f"Sample Rate: {sr} Hz (target) | 16000 Hz (SSL internal)")
|
||||
print(f"Chunk Size: {vc.chunk} tokens ({budget_ms:.1f}ms budget)")
|
||||
@@ -295,12 +339,11 @@ def main():
|
||||
vc.seed(seed_audio)
|
||||
|
||||
if len(seed_audio) >= left_pad_16k:
|
||||
raw_input_accum_16k = seed_audio[-left_pad_16k:]
|
||||
accum_16k = seed_audio[-left_pad_16k:]
|
||||
else:
|
||||
raw_input_accum_16k = np.pad(seed_audio, (left_pad_16k - len(seed_audio), 0))
|
||||
accum_16k = np.pad(seed_audio, (left_pad_16k - len(seed_audio), 0))
|
||||
|
||||
in_q = queue.Queue(maxsize=8)
|
||||
ssl_q = queue.Queue(maxsize=8)
|
||||
out_q = queue.Queue(maxsize=2)
|
||||
stop_event = threading.Event()
|
||||
|
||||
@@ -318,58 +361,12 @@ def main():
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
def ssl_thread_func(accum_16k):
|
||||
hangover_counter = 0
|
||||
t_last = None
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
raw = in_q.get(timeout=0.5)
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
t_now = time.perf_counter()
|
||||
gap_ms = (t_now - t_last) * 1000 if t_last else 0.0
|
||||
t_last = t_now
|
||||
|
||||
rms = float(np.sqrt(np.mean(raw ** 2)))
|
||||
|
||||
if rms >= args.rms_floor:
|
||||
hangover_counter = args.hangover_chunks
|
||||
is_silence = False
|
||||
else:
|
||||
if hangover_counter > 0:
|
||||
hangover_counter -= 1
|
||||
is_silence = False
|
||||
else:
|
||||
is_silence = True
|
||||
|
||||
raw_16k = resample(raw, sr, sr16)
|
||||
accum_16k = np.concatenate([accum_16k, raw_16k])
|
||||
required_samples_16k = left_pad_16k + chunk_samples_16k + right_pad_16k
|
||||
|
||||
if len(accum_16k) >= required_samples_16k:
|
||||
window_16k = accum_16k[:required_samples_16k]
|
||||
accum_16k = accum_16k[chunk_samples_16k:]
|
||||
|
||||
fade_len = int(0.01 * sr16)
|
||||
ramp_down = np.linspace(1.0, 0.0, fade_len, dtype=np.float32)
|
||||
|
||||
if is_silence:
|
||||
window_16k = window_16k.copy()
|
||||
active_start = left_pad_16k
|
||||
active_end = left_pad_16k + chunk_samples_16k
|
||||
window_16k[active_start : active_start + fade_len] *= ramp_down
|
||||
window_16k[active_start + fade_len : active_end] = 0.0
|
||||
|
||||
local_feats, t_ssl = sync_time(lambda: vc._ssl(window_16k)[0])
|
||||
ssl_q.put((local_feats, is_silence, t_ssl, gap_ms, rms))
|
||||
else:
|
||||
ssl_q.put((None, is_silence, 0.0, gap_ms, rms))
|
||||
|
||||
print(f"\n{'chunk':>6} {'q_in':>4} {'q_ss':>4} {'q_out':>5} {'ssl':>7} {'enc':>7} {'dec':>7} {'total':>7} {'budget':>7} {'gap':>7}")
|
||||
print("-" * 88)
|
||||
print(f"\n{'chunk':>6} {'q_in':>4} {'q_out':>5} {'ssl':>7} {'enc':>7} {'dec':>7} {'total':>7} {'budget':>7} {'gap':>7}")
|
||||
print("-" * 80)
|
||||
|
||||
chunk_n = 0
|
||||
t_last = None
|
||||
hangover_counter = 0
|
||||
|
||||
with sd.InputStream(device=args.input, channels=n_in_ch, samplerate=sr,
|
||||
blocksize=chunk_samples, dtype="float32",
|
||||
@@ -378,21 +375,40 @@ def main():
|
||||
dtype="float32", latency="low") as out_stream:
|
||||
|
||||
writer = threading.Thread(target=write_thread, args=(out_stream,), daemon=True)
|
||||
ssl_worker = threading.Thread(target=ssl_thread_func, args=(raw_input_accum_16k,), daemon=True)
|
||||
|
||||
writer.start()
|
||||
ssl_worker.start()
|
||||
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
item = ssl_q.get(timeout=0.5)
|
||||
except queue.Empty:
|
||||
continue
|
||||
raw = in_q.get()
|
||||
t_now = time.perf_counter()
|
||||
gap_ms = (t_now - t_last) * 1000 if t_last else 0.0
|
||||
t_last = t_now
|
||||
|
||||
local_feats, is_silence, t_ssl, gap_ms, rms = item
|
||||
rms = float(np.sqrt(np.mean(raw ** 2)))
|
||||
if rms >= args.rms_floor:
|
||||
hangover_counter = args.hangover_chunks
|
||||
is_silence = False
|
||||
elif hangover_counter > 0:
|
||||
hangover_counter -= 1
|
||||
is_silence = False
|
||||
else:
|
||||
is_silence = True
|
||||
|
||||
if local_feats is not None:
|
||||
raw_16k = resample(raw, sr, sr16)
|
||||
accum_16k = np.concatenate([accum_16k, raw_16k])
|
||||
|
||||
if len(accum_16k) >= required_samples_16k:
|
||||
window_16k = accum_16k[:required_samples_16k]
|
||||
accum_16k = accum_16k[chunk_samples_16k:]
|
||||
|
||||
if is_silence:
|
||||
window_16k = window_16k.copy()
|
||||
active_start = left_pad_16k
|
||||
active_end = left_pad_16k + chunk_samples_16k
|
||||
window_16k[active_start : active_start + fade_len] *= ramp_down
|
||||
window_16k[active_start + fade_len : active_end] = 0.0
|
||||
|
||||
local_feats, t_ssl = sync_time(lambda: vc._ssl(window_16k)[0])
|
||||
local_feats = vc.apply_ema(local_feats)
|
||||
idx, t_enc = sync_time(lambda: vc._encode(local_feats, vc.src_mean, vc.src_std))
|
||||
chunk_tokens = idx[vc.enc_left : vc.enc_left + vc.chunk]
|
||||
@@ -406,22 +422,22 @@ def main():
|
||||
pcm_out = np.stack([pcm, pcm], axis=1)
|
||||
else:
|
||||
pcm_out = np.zeros((chunk_samples, 2), dtype=np.float32)
|
||||
t_enc, t_dec = 0.0, 0.0
|
||||
t_ssl, t_enc, t_dec = 0.0, 0.0, 0.0
|
||||
|
||||
out_q.put(pcm_out)
|
||||
|
||||
total = t_ssl + t_enc + t_dec
|
||||
chunk_n += 1
|
||||
|
||||
|
||||
if is_silence:
|
||||
print(
|
||||
f"{chunk_n:>6} {in_q.qsize():>4} {ssl_q.qsize():>4} {out_q.qsize():>5} "
|
||||
f"{'--silence--':>54} rms={rms:.4f}",
|
||||
f"{chunk_n:>6} {in_q.qsize():>4} {out_q.qsize():>5} "
|
||||
f"{'--silence--':>41} rms={rms:.4f}",
|
||||
flush=True,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"{chunk_n:>6} {in_q.qsize():>4} {ssl_q.qsize():>4} {out_q.qsize():>5} "
|
||||
f"{chunk_n:>6} {in_q.qsize():>4} {out_q.qsize():>5} "
|
||||
f"{t_ssl:>6.1f}ms {t_enc:>6.1f}ms {t_dec:>6.1f}ms "
|
||||
f"{total:>6.1f}ms {budget_ms:>6.0f}ms {gap_ms:>6.1f}ms",
|
||||
flush=True,
|
||||
@@ -432,7 +448,6 @@ def main():
|
||||
finally:
|
||||
stop_event.set()
|
||||
writer.join()
|
||||
ssl_worker.join()
|
||||
|
||||
print("stopped")
|
||||
|
||||
|
||||
+34
@@ -0,0 +1,34 @@
|
||||
# optimize_models.py
|
||||
from onnxruntime.transformers.optimizer import optimize_model
|
||||
from onnxruntime.transformers.fusion_options import FusionOptions
|
||||
|
||||
def optimize_custom(input_path, output_path):
|
||||
print(f"Optimizing {input_path}...")
|
||||
|
||||
# Load default BERT fusion options
|
||||
options = FusionOptions("bert")
|
||||
|
||||
# Disable LayerNorm fusions that break on AdaLN / dynamic biases
|
||||
options.enable_skip_layer_norm = False
|
||||
options.enable_layer_norm = False
|
||||
|
||||
# Run the optimizer
|
||||
optimizer = optimize_model(
|
||||
input=input_path,
|
||||
model_type="bert",
|
||||
optimization_options=options
|
||||
)
|
||||
|
||||
optimizer.save_model_to_file(output_path)
|
||||
print(f"Saved optimized model to {output_path}\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
optimize_custom("outputs/encode.onnx", "outputs/encode_opt.onnx")
|
||||
optimize_custom("outputs/decode.onnx", "outputs/decode_opt.onnx")
|
||||
|
||||
# ssl.onnx (WavLM) is a standard BERT architecture, so we can leave
|
||||
# all standard fusions enabled for maximum speed.
|
||||
print("Optimizing outputs/ssl.onnx...")
|
||||
ssl_opt = optimize_model("outputs/ssl.onnx", model_type="bert")
|
||||
ssl_opt.save_model_to_file("outputs/ssl_opt.onnx")
|
||||
print("Saved optimized model to outputs/ssl_opt.onnx")
|
||||
+2
-6
@@ -1,5 +1,5 @@
|
||||
[project]
|
||||
name = "dovc"
|
||||
name = "mioonnx"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
@@ -7,14 +7,10 @@ requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"miocodec",
|
||||
"numpy>=2.4.6",
|
||||
"onnxruntime>=1.26.0",
|
||||
"onnxruntime-gpu>=1.26.0",
|
||||
"onnxruntime-openvino>=1.24.1",
|
||||
"onnxruntime-tools>=1.7.0",
|
||||
"onnxscript>=0.7.0",
|
||||
"sounddevice>=0.5.5",
|
||||
"sympy>=1.14.0",
|
||||
"torch>=2.11.0",
|
||||
"soundfile>=0.13.1",
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
|
||||
@@ -121,18 +121,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "coloredlogs"
|
||||
version = "15.0.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "humanfriendly" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520, upload-time = "2021-06-11T10:22:45.202Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018, upload-time = "2021-06-11T10:22:42.561Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cuda-bindings"
|
||||
version = "12.9.7"
|
||||
@@ -213,38 +201,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/5f/ed01f9a3cdffbd5a008556fc7b2a08ddb1cc6ace7effa7340604b1d16699/docstring_parser-0.18.0-py3-none-any.whl", hash = "sha256:b3fcbed555c47d8479be0796ef7e19c2670d428d72e96da63f3a40122860374b", size = 22484, upload-time = "2026-04-14T04:09:18.638Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dovc"
|
||||
version = "0.1.0"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "miocodec" },
|
||||
{ name = "numpy" },
|
||||
{ name = "onnxruntime" },
|
||||
{ name = "onnxruntime-gpu" },
|
||||
{ name = "onnxruntime-openvino" },
|
||||
{ name = "onnxruntime-tools" },
|
||||
{ name = "onnxscript" },
|
||||
{ name = "sounddevice" },
|
||||
{ name = "sympy" },
|
||||
{ name = "torch", version = "2.11.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
|
||||
{ name = "torch", version = "2.12.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "miocodec", git = "https://github.com/Aratako/MioCodec" },
|
||||
{ name = "numpy", specifier = ">=2.4.6" },
|
||||
{ name = "onnxruntime", specifier = ">=1.26.0" },
|
||||
{ name = "onnxruntime-gpu", specifier = ">=1.26.0" },
|
||||
{ name = "onnxruntime-openvino", specifier = ">=1.24.1" },
|
||||
{ name = "onnxruntime-tools", specifier = ">=1.7.0" },
|
||||
{ name = "onnxscript", specifier = ">=0.7.0" },
|
||||
{ name = "sounddevice", specifier = ">=0.5.5" },
|
||||
{ name = "sympy", specifier = ">=1.14.0" },
|
||||
{ name = "torch", specifier = ">=2.11.0" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "einops"
|
||||
version = "0.8.2"
|
||||
@@ -370,18 +326,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/02/28/d7cef5e477b855c25d415b8f57e5bc7347c7a90cad3acf1725d0c92ca294/huggingface_hub-1.17.0-py3-none-any.whl", hash = "sha256:3b8156d23118e87f6a587648bfbc04f04a12a757ccb4ed298b35c4ae638bf24c", size = 671546, upload-time = "2026-05-28T15:12:11.441Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "humanfriendly"
|
||||
version = "10.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pyreadline3", marker = "sys_platform == 'win32'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702, upload-time = "2021-09-17T21:40:43.31Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794, upload-time = "2021-09-17T21:40:39.897Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.17"
|
||||
@@ -553,6 +497,29 @@ dependencies = [
|
||||
{ name = "torchaudio", version = "2.11.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mioonnx"
|
||||
version = "0.1.0"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "miocodec" },
|
||||
{ name = "numpy" },
|
||||
{ name = "onnxruntime-gpu" },
|
||||
{ name = "onnxscript" },
|
||||
{ name = "sounddevice" },
|
||||
{ name = "soundfile" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "miocodec", git = "https://github.com/Aratako/MioCodec" },
|
||||
{ name = "numpy", specifier = ">=2.4.6" },
|
||||
{ name = "onnxruntime-gpu", specifier = ">=1.26.0" },
|
||||
{ name = "onnxscript", specifier = ">=0.7.0" },
|
||||
{ name = "sounddevice", specifier = ">=0.5.5" },
|
||||
{ name = "soundfile", specifier = ">=0.13.1" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ml-dtypes"
|
||||
version = "0.5.4"
|
||||
@@ -863,38 +830,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/8c/aa/f7a53321c60b9ad9ee184b6018292ed6b5389947592a2c8c09c736bb7f9e/onnx_ir-0.2.1-py3-none-any.whl", hash = "sha256:c7285da889312f91882de2092e298a9eeeefbfc1d1951c49d983992967eb09a7", size = 166792, upload-time = "2026-04-20T20:21:46.357Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "onnxruntime"
|
||||
version = "1.26.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "flatbuffers" },
|
||||
{ name = "numpy" },
|
||||
{ name = "packaging" },
|
||||
{ name = "protobuf" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/81/b1/d111b1df656761f980d9e298a60039a9cb66036b1d039e777537743d0ac3/onnxruntime-1.26.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05b028781b322ad74b57ce5b50aa5280bb1fe96ceec334628ade681e0b24c1ac", size = 18016624, upload-time = "2026-05-12T00:41:01.735Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f6/a0/3f9d896a0385a36bd04345d6d0b802821a5782adde562e7e135f6bb71c73/onnxruntime-1.26.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:91f2bb870a4b9224eba0a6728c1fa7a9e552b8e59e1083c51fbbc3d013f2b5c0", size = 16052692, upload-time = "2026-05-08T19:07:13.829Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7c/43/2a4e04f8dbeffad19bbcced4bcd4289bf478921518437404d6b92bdf213b/onnxruntime-1.26.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9b6dd70599005bd1bf29779f04a91978b92b5e719c11a20068a8f8e535f725b6", size = 18185439, upload-time = "2026-05-08T19:07:36.299Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/44/fc/026d0a7162b9c2153dac292baea9e027c42304dc1d9dc6f8ff5b4cfbaedd/onnxruntime-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:a26374dc7fbcaae593601086b242120e13f2310558df0991da6dd8b8fac00414", size = 13026427, upload-time = "2026-05-08T19:08:03.503Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3e/27/1dcf88e45e4c69db5f7b106f2dacc3801ba98994e082ca03e1dfdf7bfe57/onnxruntime-1.26.0-cp312-cp312-win_arm64.whl", hash = "sha256:54a8053410fd31fd66469bd754fcfe8a4df9f7eb44756b4b5479bf50c842d948", size = 12796647, upload-time = "2026-05-08T19:07:52.108Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cf/a2/c801242685e0ce48a4ca51dfafbb588765e0446397e123be53ba5598f3f5/onnxruntime-1.26.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ccce19c5f771b8268902f77d9fed9e88f9499465d6780808faa6611a789d33f0", size = 18016563, upload-time = "2026-05-08T19:07:28.081Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/64/0492c0b1db04e29b2630c87cfa36f9d6872b1ca8614b90c5cad58fac7d76/onnxruntime-1.26.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdbed8cf3b672b66acb032f33a253bc27f42bce6ece48ae3fab4fa483a5e96e0", size = 16052634, upload-time = "2026-05-08T19:07:16.885Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/26/4d09ddc755a84fc8d5e192991626b0e0680e8f6c5d58f4f1d05c42bc48cf/onnxruntime-1.26.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c07af6fc6d5557835f2b6ee7a96d8b3235d0c57a8e230efdedaee106a8a3cbc6", size = 18185632, upload-time = "2026-05-08T19:07:38.756Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/77/89/3e52249aa08fa301e217ecba07b5246a8338fa2b401e109326e3fc5be0f9/onnxruntime-1.26.0-cp313-cp313-win_amd64.whl", hash = "sha256:61bec80655efa460591c2bc655392d57d2650ce85533a6b9b3b7a790d7ea7916", size = 13026751, upload-time = "2026-05-08T19:08:06.2Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/06/b3/c1c8782b14af6797c303de132d6eef26a9fb80dfacd3750ce57911d11c6b/onnxruntime-1.26.0-cp313-cp313-win_arm64.whl", hash = "sha256:a6677545ff451e3539a02746d2f207d8c5baa4a0a818886bb9d6a6eb9511ee89", size = 12796807, upload-time = "2026-05-08T19:07:54.879Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c3/f5/47b0676408abec652c14b84d7173e389837832d850c24f87184277313e8d/onnxruntime-1.26.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e016edc15d3c19f36807e1c6b10be5b27807688c32720f91b5ae480a95215d0", size = 16057265, upload-time = "2026-05-08T19:07:19.603Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/45/33ab6deeef010ca844c877dd618cebc079590bbe52d2a3678e7223b1b908/onnxruntime-1.26.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f5fc48a91a046a6a5c9b147f83fb41d65d24d24923373b222cdd248f0f4f4aac", size = 18197590, upload-time = "2026-05-08T19:07:41.422Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/40/89/17546c1c20f6bfc3ae41c22152378a26edfea918af3129e2139dcd7c99f3/onnxruntime-1.26.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:33a791f31432a3af1a96db5e54818b37aba5e5eefc2e6af5794c10a9118a9993", size = 18019724, upload-time = "2026-05-08T19:07:30.723Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bb/24/89457a35f6af29538a76647f2c18c3a28277e6c19234c847e7b4b7c19860/onnxruntime-1.26.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e90c00732c4553618103149d93f688e8c3063017938f8983e21a71d9f3b6d22e", size = 16054821, upload-time = "2026-05-08T19:07:22.348Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/12/f9/15b2e1815cf570d238e0135529f80d2dce64e8e8818a1489cae83823c5c6/onnxruntime-1.26.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01498e80ba8988428d08c2d51b1338f89e3de2a93e6ffe555f79c68f26a5c06b", size = 18185815, upload-time = "2026-05-08T19:07:44.179Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d7/65/2e11055faf015e4b07f45b513fa49b391baf2e19d92d77d73ebee13c1004/onnxruntime-1.26.0-cp314-cp314-win_amd64.whl", hash = "sha256:7ead61450d8405167c87dd3a31d8da1d576b490a57dab1aa8b82a7da6825f5aa", size = 13349887, upload-time = "2026-05-08T19:08:08.671Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/e4/0f9d1a5718b1781c610c1e354765a3820597081754277a6a9a2b50705702/onnxruntime-1.26.0-cp314-cp314-win_arm64.whl", hash = "sha256:31d71a53490e46910877d0902b5ad99c69a5955e5c7ea6c82863519410e1ba7c", size = 13140121, upload-time = "2026-05-08T19:07:57.804Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/42/3b8e635f067d06d9f45bede470b8d539d101a4166c272213158dfd08b6ce/onnxruntime-1.26.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b6d258fb78fdfcf049795bcfaa74dcb90ae7baa277afd21e6fd28b83f2c496", size = 16057240, upload-time = "2026-05-08T19:07:25.163Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/93/99/f2be40a31b908d96b861ae0ce98582fa376c18a7f816b9d5eb4cd6aa0a4c/onnxruntime-1.26.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4eefd386a45202aefb7a5132b94f32df9d506c9edcc7faf2fc60d65183f4b183", size = 18197382, upload-time = "2026-05-08T19:07:46.965Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "onnxruntime-gpu"
|
||||
version = "1.26.0"
|
||||
@@ -916,42 +851,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/97/91/93ffe5431d154989f5e04864a25a97eea480997d771232bcbbc538188241/onnxruntime_gpu-1.26.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56dc7b73954ff4bdc71f5b8ab306b6f61be5d007881b6ef423a609e2b9cd088b", size = 276991545, upload-time = "2026-05-08T19:16:33.347Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "onnxruntime-openvino"
|
||||
version = "1.24.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "flatbuffers" },
|
||||
{ name = "numpy" },
|
||||
{ name = "packaging" },
|
||||
{ name = "protobuf" },
|
||||
{ name = "sympy" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/50/cf/17ba72de2df0fcba349937d2788f154397bbc2d1a2d67772a97e26f6bc5f/onnxruntime_openvino-1.24.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d617fac2f59a6ab5ea59a788c3e1592240a129642519aaeaa774761dfe35150e", size = 84433207, upload-time = "2026-02-26T13:44:41.395Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/59/37/d301f2c68b19a9485ed5db3047e0fb52478f3e73eb08c7d2a7c61be7cc1c/onnxruntime_openvino-1.24.1-cp312-cp312-win_amd64.whl", hash = "sha256:f186335a9c9b255633275290da7521d3d4d14c7773fee3127bfa040234d3fa5a", size = 13658075, upload-time = "2026-02-26T13:44:44.905Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/08/07/f225999919f56506b603aaa3ff837ad563ab26f86906ed7fa7e5abcd849e/onnxruntime_openvino-1.24.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:2c3bb73e68ac27f4891af8a595c1faf574ec68b772e6583c90a0b997a1822782", size = 84433183, upload-time = "2026-02-26T13:44:50.254Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3e/92/46ae2cd565961a89189900f385bb2f13a9fa731ea4674001d23720fbb1e0/onnxruntime_openvino-1.24.1-cp313-cp313-win_amd64.whl", hash = "sha256:434bf49aa71393c577a456c9d76c98e6d6958a833fa0876793e3d5437b5a511a", size = 13658485, upload-time = "2026-02-26T13:44:53.889Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "onnxruntime-tools"
|
||||
version = "1.7.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "coloredlogs" },
|
||||
{ name = "numpy" },
|
||||
{ name = "onnx" },
|
||||
{ name = "packaging" },
|
||||
{ name = "psutil" },
|
||||
{ name = "py-cpuinfo" },
|
||||
{ name = "py3nvml" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fd/b5/c36283fef3b1d492a39d1b5f3f195965fbf002b168633daad302c51d8f4c/onnxruntime_tools-1.7.0.tar.gz", hash = "sha256:6dbdcee49424e066bcd10357c37d51bc422ae26494e3c2f0c1970d534f967f6d", size = 141435, upload-time = "2021-03-25T21:42:42.571Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/b0/db0e73356df0aaa8737e6f13c0dac499b5d904d3fa267c8ebf24515e8001/onnxruntime_tools-1.7.0-py3-none-any.whl", hash = "sha256:1dff888b5c482ac5bc627f12e108445fefcb3d600c43f63633975316fe617ad8", size = 212695, upload-time = "2021-03-25T21:42:40.551Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "onnxscript"
|
||||
version = "0.7.0"
|
||||
@@ -993,55 +892,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/ef/50433d346c56657a70d27f156c7b349ac59a068b01de4eb796e747eecc43/protobuf-7.35.0-py3-none-any.whl", hash = "sha256:c13f325cf242bad135c350629eeb5d54b24228eb472fb3e2e9ebbd4c5dc20ca0", size = 171659, upload-time = "2026-05-19T23:02:27.842Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psutil"
|
||||
version = "7.2.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/aa/c6/d1ddf4abb55e93cebc4f2ed8b5d6dbad109ecb8d63748dd2b20ab5e57ebe/psutil-7.2.2.tar.gz", hash = "sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372", size = 493740, upload-time = "2026-01-28T18:14:54.428Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/51/08/510cbdb69c25a96f4ae523f733cdc963ae654904e8db864c07585ef99875/psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b", size = 130595, upload-time = "2026-01-28T18:14:57.293Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d6/f5/97baea3fe7a5a9af7436301f85490905379b1c6f2dd51fe3ecf24b4c5fbf/psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea", size = 131082, upload-time = "2026-01-28T18:14:59.732Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/37/d6/246513fbf9fa174af531f28412297dd05241d97a75911ac8febefa1a53c6/psutil-7.2.2-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63", size = 181476, upload-time = "2026-01-28T18:15:01.884Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/b5/9182c9af3836cca61696dabe4fd1304e17bc56cb62f17439e1154f225dd3/psutil-7.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312", size = 184062, upload-time = "2026-01-28T18:15:04.436Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/16/ba/0756dca669f5a9300d0cbcbfae9a4c30e446dfc7440ffe43ded5724bfd93/psutil-7.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b", size = 139893, upload-time = "2026-01-28T18:15:06.378Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/61/8fa0e26f33623b49949346de05ec1ddaad02ed8ba64af45f40a147dbfa97/psutil-7.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9", size = 135589, upload-time = "2026-01-28T18:15:08.03Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/81/69/ef179ab5ca24f32acc1dac0c247fd6a13b501fd5534dbae0e05a1c48b66d/psutil-7.2.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00", size = 130664, upload-time = "2026-01-28T18:15:09.469Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7b/64/665248b557a236d3fa9efc378d60d95ef56dd0a490c2cd37dafc7660d4a9/psutil-7.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9", size = 131087, upload-time = "2026-01-28T18:15:11.724Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/2e/e6782744700d6759ebce3043dcfa661fb61e2fb752b91cdeae9af12c2178/psutil-7.2.2-cp314-cp314t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a", size = 182383, upload-time = "2026-01-28T18:15:13.445Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/57/49/0a41cefd10cb7505cdc04dab3eacf24c0c2cb158a998b8c7b1d27ee2c1f5/psutil-7.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf", size = 185210, upload-time = "2026-01-28T18:15:16.002Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dd/2c/ff9bfb544f283ba5f83ba725a3c5fec6d6b10b8f27ac1dc641c473dc390d/psutil-7.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1", size = 141228, upload-time = "2026-01-28T18:15:18.385Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/fc/f8d9c31db14fcec13748d373e668bc3bed94d9077dbc17fb0eebc073233c/psutil-7.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841", size = 136284, upload-time = "2026-01-28T18:15:19.912Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e7/36/5ee6e05c9bd427237b11b3937ad82bb8ad2752d72c6969314590dd0c2f6e/psutil-7.2.2-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486", size = 129090, upload-time = "2026-01-28T18:15:22.168Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/80/c4/f5af4c1ca8c1eeb2e92ccca14ce8effdeec651d5ab6053c589b074eda6e1/psutil-7.2.2-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979", size = 129859, upload-time = "2026-01-28T18:15:23.795Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/70/5d8df3b09e25bce090399cf48e452d25c935ab72dad19406c77f4e828045/psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9", size = 155560, upload-time = "2026-01-28T18:15:25.976Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/63/65/37648c0c158dc222aba51c089eb3bdfa238e621674dc42d48706e639204f/psutil-7.2.2-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e", size = 156997, upload-time = "2026-01-28T18:15:27.794Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8e/13/125093eadae863ce03c6ffdbae9929430d116a246ef69866dad94da3bfbc/psutil-7.2.2-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8", size = 148972, upload-time = "2026-01-28T18:15:29.342Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/78/0acd37ca84ce3ddffaa92ef0f571e073faa6d8ff1f0559ab1272188ea2be/psutil-7.2.2-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc", size = 148266, upload-time = "2026-01-28T18:15:31.597Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b4/90/e2159492b5426be0c1fef7acba807a03511f97c5f86b3caeda6ad92351a7/psutil-7.2.2-cp37-abi3-win_amd64.whl", hash = "sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988", size = 137737, upload-time = "2026-01-28T18:15:33.849Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8c/c7/7bb2e321574b10df20cbde462a94e2b71d05f9bbda251ef27d104668306a/psutil-7.2.2-cp37-abi3-win_arm64.whl", hash = "sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee", size = 134617, upload-time = "2026-01-28T18:15:36.514Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "py-cpuinfo"
|
||||
version = "9.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "py3nvml"
|
||||
version = "0.2.7"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "xmltodict" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/10/7e/fa282e456b87570d663ce97946b4dcb16850d4495ce4bd625a1a10c8ed56/py3nvml-0.2.7.tar.gz", hash = "sha256:09ee1d04598a6e664e24465f804ce3bfe119a6fdb5362df1c168f8aa929fbd73", size = 58224, upload-time = "2021-11-22T14:30:27.541Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/03/3a/ea6f2419bd20f97f65ee55a9910c722313fe99cacc0bf77afb4b74b446ff/py3nvml-0.2.7-py3-none-any.whl", hash = "sha256:30101170d1f51419c8d21fd8ca6cdc333a552b4f8a945c2fc7d107d77e4220dd", size = 55503, upload-time = "2021-11-22T14:30:25.794Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pycparser"
|
||||
version = "3.0"
|
||||
@@ -1060,15 +910,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyreadline3"
|
||||
version = "3.5.6"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b6/6d/f94028646d7bbe6d9d873c47ee7c246f2d29129d253f0d96cb6fcab70733/pyreadline3-3.5.6.tar.gz", hash = "sha256:61e53218b99656091ddb077df9e71f25850e72e030b6183b39c9b7e6e4f4a9bf", size = 100368, upload-time = "2026-05-14T17:55:04.471Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f7/5e/35c856e186b74678c24927847ad9895a51f1bc02a0c6126477a6c6040064/pyreadline3-3.5.6-py3-none-any.whl", hash = "sha256:8449b734232e42a5dcd74048e39b60db2839a4c38cf3ae2bf7707d58b5389c0d", size = 85243, upload-time = "2026-05-14T17:55:03.262Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyyaml"
|
||||
version = "6.0.3"
|
||||
@@ -1406,12 +1247,3 @@ sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac8
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xmltodict"
|
||||
version = "1.0.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/19/70/80f3b7c10d2630aa66414bf23d210386700aa390547278c789afa994fd7e/xmltodict-1.0.4.tar.gz", hash = "sha256:6d94c9f834dd9e44514162799d344d815a3a4faec913717a9ecbfa5be1bb8e61", size = 26124, upload-time = "2026-02-22T02:21:22.074Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/38/34/98a2f52245f4d47be93b580dae5f9861ef58977d73a79eb47c58f1ad1f3a/xmltodict-1.0.4-py3-none-any.whl", hash = "sha256:a4a00d300b0e1c59fc2bfccb53d7b2e88c32f200df138a0dd2229f842497026a", size = 13580, upload-time = "2026-02-22T02:21:21.039Z" },
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user