-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwavmark.py
More file actions
114 lines (91 loc) · 4.61 KB
/
wavmark.py
File metadata and controls
114 lines (91 loc) · 4.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from utils import wm_add_util, file_reader, wm_decode_util, my_parser, metric_util, path_util
from models import my_model
import torch
import numpy as np
import soundfile
from huggingface_hub import hf_hub_download
def load_model():
resume_path = hf_hub_download(repo_id="M4869/WavMark",
filename="step59000_snr39.99_pesq4.35_BERP_none0.30_mean1.81_std1.81.model.pkl",
)
model = my_model.Model(16000, num_bit=32, n_fft=1000, hop_length=400, num_layers=8).to(device)
checkpoint = torch.load(resume_path, map_location=torch.device('cpu'))
model_ckpt = checkpoint
model.load_state_dict(model_ckpt, strict=True)
model.eval()
return model
def add_watermark(signal, audio_length_second, watermark_text):
watermark_npy = np.array([int(i) for i in watermark_text])
pattern_bit = wm_add_util.fix_pattern[0:args.pattern_bit_length]
watermark = np.concatenate([pattern_bit, watermark_npy])
assert len(watermark) == 32
signal_wmd, info = wm_add_util.add_watermark(watermark, signal, 16000, 0.1, device, model, args.min_snr,
args.max_snr)
info["snr"] = metric_util.signal_noise_ratio(signal, signal_wmd)
path_util.mk_parent_dir_if_necessary(args.output)
soundfile.write(args.output, signal_wmd, 16000)
print("Audio Length:%ds,Time Cost:%ds, Speed:x%.1f" % (audio_length_second, info["time_cost"],
audio_length_second / info["time_cost"]))
print("Added %d watermark segments, skipped %d muted segments" % (
info["encoded_sections"], info["skip_sections"]))
if info["encoded_sections"] == 0:
print("Warning! No watermarked added!! You can setup a lower min_snr value")
def decode_watermark(signal, audio_length_second):
len_start_bit = args.pattern_bit_length
start_bit = wm_add_util.fix_pattern[0:len_start_bit]
mean_result, info = wm_decode_util.extract_watermark(
signal,
start_bit,
0.1,
16000,
model,
device, args.decode_batch_size)
print("Audio length:%ds, Time Cost:%ds, Speed:x%.1f" % (audio_length_second, info["time_cost"],
audio_length_second / info["time_cost"]))
if mean_result is None:
print("No Watermark Found")
return
payload = mean_result[len_start_bit:]
payload_str = "".join([str(i) for i in payload])
print("Decoded Result:", payload_str)
# print("This result is found in the following time point:")
# for obj in info["results"]:
# print("%.1fs" % obj["start_time_position"])
print("This result is found in %d positions:" % len(info["results"]))
if __name__ == "__main__":
parser = my_parser.MyParser()
parser.custom({
"mode": "encode", # encode\decode
"input": "",
"output": "",
"watermark": "0010101010100111",
"pattern_bit_length": 16,
# if the watermarked audio has SNR > max_snr, we think the watermark is insufficient and perform "Repeated Encoding"
"max_snr": 38,
# if the watermarked audio has SNR < min_snr, we think it affects perception, thus skip this section
"min_snr": 20,
"decode_batch_size": 10,
"min_time_length": 4,
})
args = parser.parse()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = load_model()
assert args.mode in ["encode", "decode"], "wrong mode"
# input check
assert len(args.input) > 0, "you should setup an input path"
signal, sr, audio_length_second = file_reader.read_as_single_channel_16k(args.input, 16000)
assert audio_length_second > args.min_time_length, "audio time length should larger than %d seconds" % args.min_time_length
# watermark check
watermark_text = args.watermark
if len(watermark_text) > 0 or args.mode == "encode":
payload_length = 32 - args.pattern_bit_length
assert len(watermark_text) == payload_length, "watermark length should be %d, current is %d" \
% (payload_length, len(watermark_text))
assert set(watermark_text) == {'1', '0'}, "watermark should only has 0 and 1"
if args.mode == "encode":
# output check
assert len(args.output) > 0, "you should setup an output path"
assert args.output.lower().endswith(".wav"), "output should be a .wav filename"
add_watermark(signal, audio_length_second, args.watermark)
elif args.mode == "decode":
decode_watermark(signal, audio_length_second)