diff --git a/README-EN.md b/README-EN.md
index 0e22abf..96020b6 100644
--- a/README-EN.md
+++ b/README-EN.md
@@ -65,6 +65,26 @@ python scripts/convert-pt-to-gguf.py \
 ```
 
 ### Non-Streaming Speech Recognition (Silero-VAD + SenseVoice)
+
+#### Parameter Description
+Only the following parameters are currently supported:
+```bash
+usage: ./bin/sense-voice-main [options] file.wav
+
+options:
+  -t N,      --threads N        [4     ] Number of decoding threads
+  -l LANG,   --language LANG    [auto  ] Language code ('auto' for detection), supports [`zh`, `en`, `yue`, `ja`, `ko`]
+  -m FNAME,  --model FNAME      [models/sense-voice-small-q4_k.gguf] Path to GGUF model
+  -f FNAME,  --file FNAME       [      ] Path to WAV file (only supports 16kHz)
+  --min_speech_duration_ms      [250   ] VAD parameter: minimum speech length in ms
+  --max_speech_duration_ms      [15000 ] VAD parameter: maximum speech length in ms
+  --min_silence_duration_ms     [100   ] VAD parameter: minimum silence length in ms
+  -ng,       --no-gpu           [false ] Disable GPU
+  -fa,       --flash-attn       [false ] Enable flash attention decoding
+  -itn,      --use-itn          [false ] Use inverse text normalization (includes punctuation)
+  -prfix,    --use-prefix       [false ] Output extra info: language, emotion, event, itn
+```
+
 ```bash
 
 git clone https://github.com/lovemefan/SenseVoice.cpp
@@ -80,37 +100,59 @@ cmake -DCMAKE_BUILD_TYPE=Release .. && make -j 8
 
 ### Output
 
-Currently using the sense-voice-f16 model for output:
+Example output on MacBook M1 using the sense-voice-q4_k model:
 
 ```
-$./bin/sense-voice-main -m /data/code/SenseVoice.cpp/scripts/resources/gguf-fp16-sense-voice.bin /data/code/SenseVoice.cpp/scripts/resources/SenseVoiceSmall/example/asr_example_zh.wav  -t 4
-
-sense_voice_small_init_from_file_with_params_no_state: loading model from '/data/code/SenseVoice.cpp/scripts/resources/gguf-fp16-sense-voice-small.bin'     
-sense_voice_model_load: version:      3                                                                                                                     
-sense_voice_model_load: alignment:   32 
-sense_voice_model_load: data offset: 444480                                                                                                     
-sense_voice_model_load: loading model                                                                                                                       
-sense_voice_model_load: n_vocab = 25055                                                                                                                     
-sense_voice_model_load: n_encoder_hidden_state = 512                                                                                                        
-sense_voice_model_load: n_encoder_linear_units = 2048                                                                                                       
-sense_voice_model_load: n_encoder_attention_heads  = 4                                                                                                      
-sense_voice_model_load: n_encoder_layers = 50                                                                                                               
-sense_voice_model_load: n_mels  = 80                                                                                                                        
-sense_voice_model_load: ftype  = 1                                                                                                                          
-sense_voice_model_load: vocab[25055] loaded 
-sense_voice_model_load: CPU total size =   468.98 MB
-sense_voice_model_load: n_tensors: 1197
-sense_voice_model_load: load SenseVoiceSmall takes 0.213000 second 
-sense_voice_init_state: compute buffer (encoder)   =   50.40 MB
-sense_voice_init_state: compute buffer (decoder)   =   13.72 MB
-
-system_info: n_threads = 4 / 256 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | METAL = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | CUDA = 0 | COREML = 0 | OPENVINO = 0
-
-main: processing audio (88747 samples, 5.54669 sec) , 4 threads, 1 processors, lang = auto...
-
-sense_voice_pcm_to_feature_with_state: calculate fbank and cmvn takes 7.207 ms
-<|zh|><|NEUTRAL|><|Speech|><|withitn|>欢迎大家来体验达摩院推出的语音识别模型。
-sense_voice_full_with_state: decoder audio use 1.011289 s, rtf is 0.182323.
+$$ ./bin/sense-voice-main -m /Users/Code/cpp-project/SenseVoice.cpp/scripts/resources/SenseVoiceGGUF/sense-voice-small-q4_k.gguf /Users/Downloads/en.wav  -t 1 -l auto -itn -prefix
+
+sense_voice_small_init_from_file_with_params_no_state: loading model from '/Users/Code/cpp-project/SenseVoice.cpp/scripts/resources/SenseVoiceGGUF/sense-voice-small-q4_k.gguf'
+sense_voice_init_with_params_no_state: use gpu    = 1
+sense_voice_init_with_params_no_state: flash attn = 0
+sense_voice_init_with_params_no_state: gpu_device = 0
+sense_voice_init_with_params_no_state: devices    = 3
+sense_voice_init_with_params_no_state: backends   = 3
+sense_voice_model_load: version:      3
+sense_voice_model_load: alignment:   32
+sense_voice_model_load: data offset: 423680
+sense_voice_model_load: loading model
+sense_voice_model_load: n_vocab = 25055
+sense_voice_model_load: n_encoder_hidden_state = 512
+sense_voice_model_load: n_encoder_linear_units = 2048
+sense_voice_model_load: n_encoder_attention_heads  = 4
+sense_voice_model_load: n_encoder_layers = 50
+sense_voice_model_load: n_mels  = 80
+sense_voice_model_load: ftype  = 12
+sense_voice_model_load: vocab[25055] loaded
+sense_voice_default_buffer_type: using device Metal (Apple M1 Pro)
+sense_voice_model_load: Metal total size =   181.86 MB
+sense_voice_model_load: n_tensors: 1212
+sense_voice_model_load: load SenseVoiceSmall takes 0.338000 second 
+sense_voice_backend_init_gpu: using Metal backend
+ggml_metal_init: allocating
+ggml_metal_init: found device: Apple M1 Pro
+ggml_metal_init: picking default device: Apple M1 Pro
+ggml_metal_init: using embedded metal library
+ggml_metal_init: GPU name:   Apple M1 Pro
+ggml_metal_init: GPU family: MTLGPUFamilyApple7  (1007)
+ggml_metal_init: GPU family: MTLGPUFamilyCommon3 (3003)
+ggml_metal_init: GPU family: MTLGPUFamilyMetal3  (5001)
+...
+sense_voice_backend_init: using BLAS backend
+sense_voice_backend_init: using CPU backend
+sense_voice_init_state: kv pad  size  =    3.67 MB
+sense_voice_init_state: compute buffer (encoder)   =    3.09 MB
+sense_voice_init_state: compute buffer (encoder)   =   17.53 MB
+sense_voice_init_state: compute buffer (decoder)   =    7.99 MB
+
+system_info: n_threads = 1 / 8 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | COREML = 0 | OPENVINO = 0
+
+main: processing audio (114816 samples, 7.17600 sec) , 1 threads, 1 processors, lang = auto...
+
+[1.12-3.42] <|en|><|NEUTRAL|><|Speech|><|withitn|>The tribal chief then called for the boy.
+[3.87-6.53] <|en|><|NEUTRAL|><|Speech|><|withitn|>And presented him with 50 pieces of gold.
+
+main: decoder audio use 0.135743 s, rtf is 0.018916. 
+
 ```
 
 ### Streaming Speech Recognition
diff --git a/README.md b/README.md
index 482ae12..e67a3b9 100644
--- a/README.md
+++ b/README.md
@@ -59,6 +59,27 @@ python scripts/convert-pt-to-gguf.py \
 ```
 
 ### 非流式语音识别 silero-vad + sense voice
+
+#### 参数说明
+
+以下列举的参数支持，未列举的暂不支持：
+```bash
+usage: ./bin/sense-voice-main [options] file.wav
+
+options:
+  -t N,      --threads N        [4     ] 解码使用的线程数
+  -l LANG,   --language LANG    [auto  ] 语音代码 ('auto' 为自动检测), 支持 [`zh`, `en`, `yue`, `ja`, `ko`]，分别对应中文、英文、粤语、日语、韩语
+  -m FNAME,  --model FNAME      [models/sense-voice-small-q4_k.gguf] gguf模型路径
+  -f FNAME,  --file FNAME       [      ] wav文件路径， 当前仅支持16k采样率的音频
+  --min_speech_duration_ms      [250   ] vad 参数， 切割音频最小长度，单位毫秒
+  --max_speech_duration_ms      [15000 ] vad 参数， 切割音频最大长度，单位毫秒
+  --min_silence_duration_ms     [100   ] vad 参数，静默最小长度
+  -ng,       --no-gpu           [false ] 不使用GPU
+  -fa,       --flash-attn       [false ] 使用flash attention 解码
+  -itn,      --use-itn          [false ] 使用逆文本正则化，包括标点。
+  -prfix,    --use-prefix       [false ] 输出语种、情感、事件、是否itn
+ ```
+#### 使用
 ```bash
 
 git clone https://github.com/lovemefan/SenseVoice.cpp
@@ -74,40 +95,78 @@ cmake -DCMAKE_BUILD_TYPE=Release .. && make -j 8
 
 ### 输出
 
-当前使用sense-voice-f16模型输出
+以下是使用sense-voice-q4_k模型在Macbook M1上输出:
 
 ```
-$./bin/sense-voice-main -m /data/code/SenseVoice.cpp/scripts/resources/gguf-fp16-sense-voice.bin /data/code/SenseVoice.cpp/scripts/resources/SenseVoiceSmall/example/asr_example_zh.wav  -t 4
-
-sense_voice_small_init_from_file_with_params_no_state: loading model from '/data/code/SenseVoice.cpp/scripts/resources/gguf-fp16-sense-voice-small.bin'     
-sense_voice_model_load: version:      3                                                                                                                     
-sense_voice_model_load: alignment:   32 
-sense_voice_model_load: data offset: 444480                                                                                                     
-sense_voice_model_load: loading model                                                                                                                       
-sense_voice_model_load: n_vocab = 25055                                                                                                                     
-sense_voice_model_load: n_encoder_hidden_state = 512                                                                                                        
-sense_voice_model_load: n_encoder_linear_units = 2048                                                                                                       
-sense_voice_model_load: n_encoder_attention_heads  = 4                                                                                                      
-sense_voice_model_load: n_encoder_layers = 50                                                                                                               
-sense_voice_model_load: n_mels  = 80                                                                                                                        
-sense_voice_model_load: ftype  = 1                                                                                                                          
-sense_voice_model_load: vocab[25055] loaded 
-sense_voice_model_load: CPU total size =   468.98 MB
-sense_voice_model_load: n_tensors: 1197
-sense_voice_model_load: load SenseVoiceSmall takes 0.213000 second 
-sense_voice_init_state: compute buffer (encoder)   =   50.40 MB
-sense_voice_init_state: compute buffer (decoder)   =   13.72 MB
-
-system_info: n_threads = 4 / 256 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | METAL = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | CUDA = 0 | COREML = 0 | OPENVINO = 0
-
-main: processing audio (88747 samples, 5.54669 sec) , 4 threads, 1 processors, lang = auto...
-
-sense_voice_pcm_to_feature_with_state: calculate fbank and cmvn takes 7.207 ms
-<|zh|><|NEUTRAL|><|Speech|><|withitn|>欢迎大家来体验达摩院推出的语音识别模型。
-sense_voice_full_with_state: decoder audio use 1.011289 s, rtf is 0.182323.
+$ ./bin/sense-voice-main -m /Users/Code/cpp-project/SenseVoice.cpp/scripts/resources/SenseVoiceGGUF/sense-voice-small-q4_k.gguf /Users/Downloads/asr_example_zh.wav  -t 1 -l auto -itn -prefix
+
+sense_voice_small_init_from_file_with_params_no_state: loading model from '/Users/Code/cpp-project/SenseVoice.cpp/scripts/resources/SenseVoiceGGUF/sense-voice-small-q4_k.gguf'
+sense_voice_init_with_params_no_state: use gpu    = 1
+sense_voice_init_with_params_no_state: flash attn = 0
+sense_voice_init_with_params_no_state: gpu_device = 0
+sense_voice_init_with_params_no_state: devices    = 3
+sense_voice_init_with_params_no_state: backends   = 3
+sense_voice_model_load: version:      3
+sense_voice_model_load: alignment:   32
+sense_voice_model_load: data offset: 423680
+sense_voice_model_load: loading model
+sense_voice_model_load: n_vocab = 25055
+sense_voice_model_load: n_encoder_hidden_state = 512
+sense_voice_model_load: n_encoder_linear_units = 2048
+sense_voice_model_load: n_encoder_attention_heads  = 4
+sense_voice_model_load: n_encoder_layers = 50
+sense_voice_model_load: n_mels  = 80
+sense_voice_model_load: ftype  = 12
+sense_voice_model_load: vocab[25055] loaded
+sense_voice_default_buffer_type: using device Metal (Apple M1 Pro)
+sense_voice_model_load: Metal total size =   181.86 MB
+sense_voice_model_load: n_tensors: 1212
+sense_voice_model_load: load SenseVoiceSmall takes 0.338000 second 
+sense_voice_backend_init_gpu: using Metal backend
+ggml_metal_init: allocating
+ggml_metal_init: found device: Apple M1 Pro
+ggml_metal_init: picking default device: Apple M1 Pro
+ggml_metal_init: using embedded metal library
+ggml_metal_init: GPU name:   Apple M1 Pro
+ggml_metal_init: GPU family: MTLGPUFamilyApple7  (1007)
+ggml_metal_init: GPU family: MTLGPUFamilyCommon3 (3003)
+ggml_metal_init: GPU family: MTLGPUFamilyMetal3  (5001)
+...
+sense_voice_backend_init: using BLAS backend
+sense_voice_backend_init: using CPU backend
+sense_voice_init_state: kv pad  size  =    3.67 MB
+sense_voice_init_state: compute buffer (encoder)   =    3.09 MB
+sense_voice_init_state: compute buffer (encoder)   =   17.53 MB
+sense_voice_init_state: compute buffer (decoder)   =    7.99 MB
+
+system_info: n_threads = 1 / 8 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | COREML = 0 | OPENVINO = 0
+
+main: processing audio (88747 samples, 5.54669 sec) , 1 threads, 1 processors, lang = auto...
+
+[0.96-5.18] <|zh|><|NEUTRAL|><|Speech|><|withitn|>欢迎大家来体验达摩院推出的语音识别模型。
+
+main: decoder audio use 0.103725 s, rtf is 0.018700. 
 ```
 ### 流式语音识别识别
-
+流式的vad是基于信号处理实现的，区别于非流式的vad是使用模型实现的
+```bash
+usage: ./bin/sense-voice-stream [options]
+
+options:
+  -t N,     --threads N         [4      ] [SenseVoice] 解码使用的线程数
+            --chunk_size        [100    ] vad chunk 大小(单位ms)
+  -mmc      --min-mute-chunks   [10     ] 静音片段最小chunk数量
+  -mnc      --max-nomute-chunks [80     ] 最大非静音chunk数量
+            --use-vad           [false  ] 是否使用vad
+            --use-prefix        [false  ] 是否使用 sensevoice的额外信息（语种、情感、事件、是否itn）
+  -c ID,    --capture ID        [-1     ] [Device] capture device ID
+  -l LANG,  --language LANG     [auto   ] [SenseVoice] 语音代码 ('auto' 为自动检测), 支持 [`zh`, `en`, `yue`, `ja`, `ko`]，分别对应中文、英文、粤语、日语、韩语
+  -m FNAME, --model FNAME       [models/sense-voice-small-q4_k.gguf] [SenseVoice] 模型路径
+  -ng,      --no-gpu           [false ] 不使用GPU
+  -fa,      --flash-attn       [false ] 使用flash attention 解码
+  -itn,     --use-itn          [false ] 使用逆文本正则化，包括标点。
+ 
+```
 
 ```bash
 sudo apt install libsdl2-dev
diff --git a/sense-voice/csrc/main.cc b/sense-voice/csrc/main.cc
index 7b67e4c..c9c2e82 100644
--- a/sense-voice/csrc/main.cc
+++ b/sense-voice/csrc/main.cc
@@ -72,6 +72,7 @@ struct sense_voice_params {
     bool use_gpu         = true;
     bool flash_attn      = false;
     bool use_itn         = false;
+    bool use_prefix      = false;
 
     std::string language  = "auto";
     std::string prompt;
@@ -149,10 +150,7 @@ static void sense_voice_print_usage(int /*argc*/, char ** argv, const sense_voic
     fprintf(stderr, "  -tdrz,     --tinydiarize       [%-7s] enable tinydiarize (requires a tdrz model)\n",     params.tinydiarize ? "true" : "false");
     fprintf(stderr, "  -nf,       --no-fallback       [%-7s] do not use temperature fallback while decoding\n", params.no_fallback ? "true" : "false");
     fprintf(stderr, "  -otxt,     --output-txt        [%-7s] output result in a text file\n",                   params.output_txt ? "true" : "false");
-    fprintf(stderr, "  -ovtt,     --output-vtt        [%-7s] output result in a vtt file\n",                    params.output_vtt ? "true" : "false");
     fprintf(stderr, "  -osrt,     --output-srt        [%-7s] output result in a srt file\n",                    params.output_srt ? "true" : "false");
-    fprintf(stderr, "  -olrc,     --output-lrc        [%-7s] output result in a lrc file\n",                    params.output_lrc ? "true" : "false");
-    fprintf(stderr, "  -owts,     --output-words      [%-7s] output script for generating karaoke video\n",     params.output_wts ? "true" : "false");
     fprintf(stderr, "  -ocsv,     --output-csv        [%-7s] output result in a CSV file\n",                    params.output_csv ? "true" : "false");
     fprintf(stderr, "  -oj,       --output-json       [%-7s] output result in a JSON file\n",                   params.output_jsn ? "true" : "false");
     fprintf(stderr, "  -ojf,      --output-json-full  [%-7s] include more information in the JSON file\n",      params.output_jsn_full ? "true" : "false");
@@ -175,6 +173,7 @@ static void sense_voice_print_usage(int /*argc*/, char ** argv, const sense_voic
     fprintf(stderr, "  -ng,       --no-gpu            [%-7s] disable GPU\n",                                    params.use_gpu ? "false" : "true");
     fprintf(stderr, "  -fa,       --flash-attn        [%-7s] flash attention\n",                                params.flash_attn ? "true" : "false");
     fprintf(stderr, "  -itn,      --use-itn           [%-7s] use itn\n",                                        params.use_itn ? "true" : "false");
+    fprintf(stderr, "  -prefix,      --use-prefix           [%-7s] use itn\n",                                        params.use_itn ? "true" : "false");
     fprintf(stderr, "\n");
 }
 
@@ -265,6 +264,7 @@ static bool sense_voice_params_parse(int argc, char ** argv, sense_voice_params
         else if (arg == "-fa"   || arg == "--flash-attn")      { params.flash_attn      = true; }
         else if (                  arg == "--grammar-penalty") { params.grammar_penalty = std::stof(argv[++i]); }
         else if (arg == "-itn"  || arg == "--use-itn")         { params.use_itn         = true; }
+        else if (arg == "-prefix"  || arg == "--use-prefix")   { params.use_prefix      = true; }
         else {
             fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
             sense_voice_print_usage(argc, argv, params);
@@ -590,7 +590,7 @@ int main(int argc, char ** argv) {
                                 fprintf(stderr, "%s: failed to process audio\n", argv[0]);
                                 return 10;
                             }
-                            sense_voice_print_output(ctx, true, params.use_itn, false);
+                            sense_voice_print_output(ctx, params.use_prefix, params.use_itn, false);
                             current_speech_end = current_speech_start = 0;
                             if (next_start < prev_end) {
                                 triggered = false;
@@ -639,7 +639,7 @@ int main(int argc, char ** argv) {
                                     fprintf(stderr, "%s: failed to process audio\n", argv[0]);
                                     return 10;
                                 }
-                                sense_voice_print_output(ctx, true, params.use_itn, false);
+                                sense_voice_print_output(ctx, params.use_prefix, params.use_itn, false);
                                 current_speech_end = current_speech_start = 0;
                             }
                             prev_end = next_start = 0;
@@ -663,7 +663,7 @@ int main(int argc, char ** argv) {
                     fprintf(stderr, "%s: failed to process audio\n", argv[0]);
                     return 10;
                 }
-                sense_voice_print_output(ctx, true, params.use_itn, false);
+                sense_voice_print_output(ctx, true, params.use_prefix, false);
             }
         }
         SENSE_VOICE_LOG_INFO("\n%s: decoder audio use %f s, rtf is %f. \n\n",