diff --git a/BUILD.gn b/BUILD.gn
index 7b4dd6d..2734d19 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -8,6 +8,7 @@ rtc_library("krisp_processor") {
configs += [ "../modules/audio_processing:apm_debug_dump", ]
configs += [ "//build/config/android:hide_all_but_jni" ]
defines = ["WEBRTC_ANDROID", "WEBRTC_POSIX",]
+ include_dirs = [ "krisp_c_api_include" ]
sources = [
"krisp_processor.cc",
"krisp_processor_jni.cc",
@@ -24,12 +25,12 @@ rtc_library("krisp_processor") {
rtc_android_library("krisp_java") {
visibility = [ "*" ]
sources = [
- "java/src/org/webrtc/KrispAudioProcessingImpl.java",]
+ "java/src/org/webrtc/KrispAudioProcessingFactory.java",]
deps = ["//sdk/android:peerconnection_java",]
}
generate_jni("generated_krisp_jni") {
- sources = [ "java/src/org/webrtc/KrispAudioProcessingImpl.java",]
+ sources = [ "java/src/org/webrtc/KrispAudioProcessingFactory.java",]
namespace = "Krisp"
jni_generator_include = "//sdk/android/src/jni/jni_generator_helper.h"
}
diff --git a/README.md b/README.md
index 45627e3..2c6114c 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,9 @@
1.1. include the `libwebrtc.aar` into the Android project.
-1.2. `import org.webrtc.KrispAudioProcessingImpl`
+1.2. `import org.webrtc.KrispAudioProcessingFactory`
-1.3. `
var audioProcessorModule = KrispAudioProcessingImpl()`
+1.3. `var audioProcessorModule = KrispAudioProcessingFactory()`
### 2 Load dependencies
2.1. Load the stdlib required by Krisp Audio SDK. Load it before using Krisp.
@@ -21,14 +21,23 @@
`System.loadLibrary("jingle_peerconnection_so")`
-### 3. Load the Krisp Dynamic Library with the model
+### 3. Load the Krisp Dynamic Library
+Load the Krisp dynamic library once before initializing a model.
+```
+val krispDllpath = "libkrisp-audio-sdk.so"
+val loaded = KrispAudioProcessingFactory.LoadKrisp(krispDllpath)
+if (!loaded) {
+ // report error, read the logs for the details
+}
+```
+
+### 4. Initialize the Krisp model
#### 3.1. Using the model file path
You can load Krisp model specifying file path. For this scenario you should make sure the Android app has access to the file resource.
```
-val modelFilePath = “c6.f.s.ced125.kw”
-val krispDllpath = "libkrisp-audio-sdk.so"
-var retValue = audioProcessorModule.Init(modelFilePath, krispDllpath)
+val modelFilePath = "krisp-nc-o-med-v7.kef"
+var retValue = audioProcessorModule.Init(modelFilePath)
if (!retValue) {
// report error, read the logs for the details
}
@@ -39,26 +48,31 @@ Make sure to specify correct file paths, these are hard coded sample values.
Alternatively, you can load Krisp model by specifying model data content loaded into the memory.
```
var modelData: ByteArray // = load the model into the memory
-audioProcessorModule.InitWithData(modelData, krispDllpath)
+audioProcessorModule.Init(modelData)
if (!retValue) {
return null
}
```
-### 4. Enable, disable Krisp NC during runtime
+### 5. Enable, disable Krisp NC during runtime
to enable Krisp NC during runtime
`audioProcessorModule.Enable(true)`
to disable Krisp NC
`audioProcessorModule.Enable(false)`
-### 5. Integrate Krisp Module into the WebRTC PeerConnectionFactory
+### 6. Integrate Krisp Module into the WebRTC PeerConnectionFactory
```
PeerConnectionFactory
.builder()
.setAudioProcessingFactory(audioProcessorModule)
```
+### 7. Unload Krisp when done
+```
+KrispAudioProcessingFactory.UnloadKrisp()
+```
+
## Build Instructions
diff --git a/inc/krisp-audio-sdk-nc-stats.hpp b/inc/krisp-audio-sdk-nc-stats.hpp
deleted file mode 100644
index 2df4026..0000000
--- a/inc/krisp-audio-sdk-nc-stats.hpp
+++ /dev/null
@@ -1,202 +0,0 @@
-///
-/// Copyright Krisp, Inc
-///
-
-#ifndef KRISP_AUDIO_SDK_NC_STATS_HPP_
-#define KRISP_AUDIO_SDK_NC_STATS_HPP_
-
-#include "krisp-audio-sdk.hpp"
-
-/*!
- * Cleaned secondary speech states
- */
-typedef enum {
- UNDEFINED = 0,
- DETECTED = 1,
- NOT_DETECTED = 2
-} KrispAudioCleanedSecondarySpeechStatus;
-
-/*!
- * Krisp audio per-frame info
- */
-typedef struct krispAudioNcPerFrameInfo_t {
- /*!
- * Voice energy level in the processed frame. Value range [0,100]
- */
- unsigned int voiceEnergy;
-
- /*!
- * Noise energy level in the processed frame. Value range [0,100]
- */
- unsigned int noiseEnergy;
-
- /*!
- * BVC specific feature.
- * Returns the removed secondary speech state, e.g. in case if secondary speech detected and removed returns TRUE,
- * otherwise returns FALSE.
- * UNDEFINED will be returned in case of running the NC.
- */
- KrispAudioCleanedSecondarySpeechStatus cleanedSecondarySpeechStatus;
-} KrispAudioNcPerFrameInfo;
-
-/*!
- * Krisp audio voice stats
- */
-typedef struct krispAudioNcVoiceStats_t {
- unsigned int talkTimeMs;
-} KrispAudioNcVoiceStats;
-
-/*!
- * Krisp audio noise stats based on the noise intensity level
- */
-typedef struct krispAudioNcNoiseStats_t {
- unsigned int noNoiseMs;
- unsigned int lowNoiseMs;
- unsigned int mediumNoiseMs;
- unsigned int highNoiseMs;
-} KrispAudioNcNoiseStats;
-
-/*!
- * Krisp audio noise/voice stats
- */
-typedef struct krispAudioNcStats_t {
- KrispAudioNcVoiceStats voiceStats;
- KrispAudioNcNoiseStats noiseStats;
-} KrispAudioNcStats;
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*!
- * @brief This function creates Speech Enhance(Noise Canceler NC with stats) session object
- *
- * @param[in] inputSampleRate Sampling frequency of the input data
- * @param[in] outputSampleRate Sampling frequency of the output data
- * @param[in] frameDuration Frame duration
- * @param[in] modelName The session ties to this model, and cleans the future frames using it.
- * If modelName is \em nullptr then the SDK auto-detects the model based on input sampleRate
- * @attention Always provide modelName explicitly to avoid ambiguity
- *
- * @return created session handle
- */
-KRISP_AUDIO_API KrispAudioSessionID
-krispAudioNcWithStatsCreateSession(KrispAudioSamplingRate inputSampleRate,
- KrispAudioSamplingRate outputSampleRate,
- KrispAudioFrameDuration frameDuration,
- const char* modelName);
-
-/*!
- * @brief This function releases all data tied to this particular session, closes the given NC session
- *
- * @param[in, out] pSession Handle to the NC with stats session to be closed
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcWithStatsCloseSession(KrispAudioSessionID pSession);
-
-/*!
- * @brief This function cleans the ambient noise for the given single frame. Works with shorts (int16) with value in range [-2^15+1, 2^15]
- *
- * @param[in] pSession The NC With Stats Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000
- * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size
- * @param[in] frameOutSize : this is output buffer size which must be frameDuration * outputSampleRate / 100
- * @param[out] energyInfo Returns voice and noise energy levels of the current frame
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcWithStatsCleanAmbientNoiseInt16(KrispAudioSessionID pSession,
- const short* pFrameIn,
- unsigned int frameInSize,
- short* pFrameOut,
- unsigned int frameOutSize,
- KrispAudioNcPerFrameInfo* energyInfo);
-
-/*!
- * @brief This function cleans the ambient noise for the given single frame,if there is no ringtone. Works with shorts (int16) with value in range [-2^15+1, 2^15]
- *
- * @param[in] pSession The NC With Stats Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000
- * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size
- * @param[in] frameOutSize : this is output buffer size which must be frameDuration * outputSampleRate / 100
- * @param[in] ringtone : This specifies whether there is ringtone in the pFrameIn input buffer.
- * @param[out] energyInfo Returns voice and noise energy levels of the current frame if ringtone is false otherwise 0.
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcWithStatsCleanAmbientNoiseWithRingtoneInt16(KrispAudioSessionID pSession,
- const short* pFrameIn,
- unsigned int frameInSize,
- short* pFrameOut,
- unsigned int frameOutSize,
- bool ringtone,
- KrispAudioNcPerFrameInfo* energyInfo);
-
-/*!
- * @brief This function cleans the ambient noise for the given single frame. Works with floats with values normalized in range [-1,1]
- * @param[in] pSession The NC With Stats Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000
- * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size
- * @param[in] frameOutSize This is output buffer size which must be frameDuration * outputSampleRate / 100
- * @param[out] energyInfo Returns voice and noise energy levels of the current frame
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcWithStatsCleanAmbientNoiseFloat(KrispAudioSessionID pSession,
- const float* pFrameIn,
- unsigned int frameInSize,
- float* pFrameOut,
- unsigned int frameOutSize,
- KrispAudioNcPerFrameInfo* energyInfo);
-
-/*!
- * @brief This function cleans the ambient noise for the given single frame, if there is no ringtone. Works with floats with values normalized in range [-1,1]
- *
- * @param[in] pSession The NC With Stats Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000
- * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size
- * @param[in] frameOutSize This is output buffer size which must be frameDuration * outputSampleRate / 100
- * @param[in] ringtone : This specifies whether there is ringtone in the pFrameIn input buffer. * @retval 0 Success
- * @param[out] energyInfo Returns voice and noise energy levels of the current frame if ringtone is false otherwise 0.
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcWithStatsCleanAmbientNoiseWithRingtoneFloat(KrispAudioSessionID pSession,
- const float* pFrameIn,
- unsigned int frameInSize,
- float* pFrameOut,
- unsigned int frameOutSize,
- bool ringtone,
- KrispAudioNcPerFrameInfo* energyInfo);
-
-/*!
- * @brief This function used to retrieve the noise/voice stats while processing noise canceler.
- * The recommended stats retrieval frequency is bigger or equal to 200ms.
- * If it's required only at the end of the noise canceler processing (end of the call/audio stream)
- * function should be called before pSession becomes invalid, i.e. after closing the specified session.
- *
- * @param[in] pSession The NC With Stats Session to which the stats belongs
- * @param[out] pStats Noise/Voice stats returned
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcWithStatsRetrieveStats(KrispAudioSessionID pSession,
- KrispAudioNcStats* pStats);
-
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif // KRISP_AUDIO_SDK_NC_STATS_HPP_
diff --git a/inc/krisp-audio-sdk-nc.hpp b/inc/krisp-audio-sdk-nc.hpp
deleted file mode 100644
index 736f840..0000000
--- a/inc/krisp-audio-sdk-nc.hpp
+++ /dev/null
@@ -1,137 +0,0 @@
-///
-/// Copyright Krisp, Inc
-///
-
-#ifndef KRISP_AUDIO_SDK_NC_HPP_
-#define KRISP_AUDIO_SDK_NC_HPP_
-
-#include "krisp-audio-sdk.hpp"
-#include
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*!
- * @brief This function creates Speech Enhance(Noise Canceler NC) session object
- *
- * @param[in] inputSampleRate Sampling frequency of the input data
- * @param[in] outputSampleRate Sampling frequency of the output data
- * @param[in] frameDuration Frame duration
- * @param[in] modelName The session ties to this model, and cleans the future frames using it.
- * If modelName is \em nullptr than the sdk auto-detecs the model based on input sampleRate
- * @attention Always provide modelName explicitly to avoid ambiguity
- *
- * @return created session handle
- */
-KRISP_AUDIO_API KrispAudioSessionID
-krispAudioNcCreateSession(KrispAudioSamplingRate inputSampleRate,
- KrispAudioSamplingRate outputSampleRate,
- KrispAudioFrameDuration frameDuration,
- const char* modelName);
-
-/*!
- * @brief This function releases all data tied to this particular session, closes the given NC session
- *
- * @param[in, out] pSession Handle to the NC session to be closed
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcCloseSession(KrispAudioSessionID pSession);
-
-/*!
- * @brief This function cleans the ambient noise for the given single frame. Works with shorts (int16) with value in range [-2^15+1, 2^15]
- *
- * @param[in] pSession The NC Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000
- * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size
- * @param[in] frameOutSize : this is output buffer size which must be frameDuration * outputSampleRate / 100
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcCleanAmbientNoiseInt16(KrispAudioSessionID pSession,
- const short* pFrameIn,
- unsigned int frameInSize,
- short* pFrameOut,
- unsigned int frameOutSize);
-
-/*!
- * @brief This function cleans the ambient noise for the given single frame,if there is no ringtone. Works with shorts (int16) with value in range [-2^15+1, 2^15]
- *
- * @param[in] pSession The NC Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000
- * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size
- * @param[in] frameOutSize : this is output buffer size which must be frameDuration * outputSampleRate / 100
- * @param[in] ringtone : This specifies whether there is ringtone in the pFrameIn input buffer.
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcCleanAmbientNoiseWithRingtoneInt16(KrispAudioSessionID pSession,
- const short* pFrameIn,
- unsigned int frameInSize,
- short* pFrameOut,
- unsigned int frameOutSize,
- bool ringtone);
-
-/*!
- * @brief This function cleans the ambient noise for the given single frame. Works with floats with values normalized in range [-1,1]
- *
- * @param[in] pSession The NC Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000
- * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size
- * @param[in] frameOutSize This is output buffer size which must be frameDuration * outputSampleRate / 100
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcCleanAmbientNoiseFloat(KrispAudioSessionID pSession,
- const float* pFrameIn,
- unsigned int frameInSize,
- float* pFrameOut,
- unsigned int frameOutSize);
-
-/*!
- * @brief This function cleans the ambient noise for the given single frame, if there is no ringtone. Works with floats with values normalized in range [-1,1]
- *
- * @param[in] pSession The NC Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000
- * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size
- * @param[in] frameOutSize This is output buffer size which must be frameDuration * outputSampleRate / 100
- * @param[in] ringtone : This specifies whether there is ringtone in the pFrameIn input buffer. * @retval 0 Success
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioNcCleanAmbientNoiseWithRingtoneFloat(KrispAudioSessionID pSession,
- const float* pFrameIn,
- unsigned int frameInSize,
- float* pFrameOut,
- unsigned int frameOutSize,
- bool ringtone);
-
-/*!
- * @brief This function turns on/off background speaker fix feature.
- *
- * @param[in] pSession The NC Session to which the frame belongs
- * @param[in] on on/off background speaker fix feature
- * @return 0 Value was set successfully
- * @return 1 Background speaker fix feature missing for this type of noise_cleaner
- *
- * @return -1, -2, -3, 2 Errors
- */
-KRISP_AUDIO_API bool
-krispAudioNcBackgroundSpeakerFixOnOff(KrispAudioSessionID pSession,
- bool on);
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif //// KRISP_AUDIO_SDK_NC_HPP_
diff --git a/inc/krisp-audio-sdk-rt.hpp b/inc/krisp-audio-sdk-rt.hpp
deleted file mode 100644
index 2e9cb80..0000000
--- a/inc/krisp-audio-sdk-rt.hpp
+++ /dev/null
@@ -1,72 +0,0 @@
-///
-/// Copyright Krisp, Inc
-///
-
-#ifndef KRISP_AUDIO_SDK_RT_HPP_
-#define KRISP_AUDIO_SDK_RT_HPP_
-
-#include "krisp-audio-sdk.hpp"
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*!
- * @brief This function creates Ringtone detection session object
- *
- * @param[in] inputSampleRate Sampling frequency of the input data.
- * @param[in] frameDuration Frame duration
- * @param[in] modelName The session ties to this model, and processes the future frames using it
- * If modelName is \em nullptr then the SDK auto-detects the model based on input sampleRate.
- * @attention Always provide modelName explicitly to avoid ambiguity
- *
- * @return created session handle
- */
-KRISP_AUDIO_API KrispAudioSessionID
-krispAudioRingtoneCreateSession(KrispAudioSamplingRate inputSampleRate,
- KrispAudioFrameDuration frameDuration,
- const char* modelName);
-
-/*!
- * @brief This function releases all data tied to this particular session, closes the given Ringtone session
- *
- * @param[in, out] pSession Handle to the Ringtone session to be closed
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioRingtoneCloseSession(KrispAudioSessionID pSession);
-
-/*!
- * @brief This function processes the given frame and returns the Ringtone detection value. Works with shorts (int16) with value in range [-2^15+1, 2^15]
- *
- * @param[in] pSession The Ringtone Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000
- *
- * @return Value in range [0,1].
- */
-KRISP_AUDIO_API float
-krispAudioDetectRingtoneFrameInt16(KrispAudioSessionID pSession,
- const short* pFrameIn,
- unsigned int frameInSize);
-
-/*!
- * @brief This function processes the given frame and returns the Ringtone detection value. Works with float values normalized in range [-1,1]
- *
- * @param[in] pSession The Ringtone Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000
- *
- * @return Value in range [0,1].
- */
-KRISP_AUDIO_API float
-krispAudioDetectRingtoneFrameFloat(KrispAudioSessionID pSession,
- const float* pFrameIn,
- unsigned int frameInSize);
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif //// KRISP_AUDIO_SDK_RT_HPP_
diff --git a/inc/krisp-audio-sdk-vad.hpp b/inc/krisp-audio-sdk-vad.hpp
deleted file mode 100644
index 70368b8..0000000
--- a/inc/krisp-audio-sdk-vad.hpp
+++ /dev/null
@@ -1,133 +0,0 @@
-///
-/// Copyright Krisp, Inc
-///
-
-#ifndef KRISP_AUDIO_SDK_VAD_HPP_
-#define KRISP_AUDIO_SDK_VAD_HPP_
-
-#include "krisp-audio-sdk.hpp"
-
-/*!
- * Krisp Audio bandwidth values
- */
-typedef enum {
- BAND_WIDTH_UNKNOWN = 0,
- BAND_WIDTH_4000HZ = 1,
- BAND_WIDTH_8000HZ = 2,
- BAND_WIDTH_16000HZ = 3,
-} KrispAudioBandWidth;
-
-/*!
- * Krisp Audio real bandwidth info struct used by krispAudioVadFrameInt16Ex() and
- * krispAudioVadFrameFloatEx() APIs
- */
-typedef struct KrispAudioBandWidthInfo_t {
- /* [out] Predicted real bandwidth, one of the @KrispAudioBandWidth values */
- KrispAudioBandWidth realBandwidth;
- /* [in] Algorithm processing start point */
- int procStartDelayMs;
- /* [in] Algorithm processing duration counted from the procStartDelayMs */
- int procDurationMs;
- int reserved;
-} KrispAudioBandWidthInfo;
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*!
- * @brief This function creates Voice Activity Detection session object ( VAD )
- *
- * @param[in] inputSampleRate Sampling frequency of the input data.
- * @param[in] frameDuration Frame duration
- * @param[in] modelName The session ties to this model, and processes the future frames using it
- * If modelName is \em nullptr then the SDK auto-detects the model based on input sampleRate.
- * @attention Always provide modelName explicitly to avoid ambiguity
- *
- * @return created session handle
- */
-KRISP_AUDIO_API KrispAudioSessionID
-krispAudioVadCreateSession(KrispAudioSamplingRate inputSampleRate,
- KrispAudioFrameDuration frameDuration,
- const char* modelName);
-
-/*!
- * @brief This function releases all data tied to this particular session, closes the given VAD session
- *
- * @param[in, out] pSession Handle to the VAD session to be closed
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioVadCloseSession(KrispAudioSessionID pSession);
-
-/*!
- * @brief This function processes the given frame and returns the VAD detection value. Works with shorts (int16) with value in range [-2^15+1, 2^15]
- *
- * @param[in] pSession The VAD Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000
- *
- * @return Value in range [0,1]. The scale is adjusted so that 0.5 corresponds to the best F1 score on our test dataset (based on TIMIT core test dataset speech examples).
- * The Threshold needs to be adjusted to fit a particular use case.
- */
-KRISP_AUDIO_API float
-krispAudioVadFrameInt16(KrispAudioSessionID pSession,
- const short* pFrameIn,
- unsigned int frameInSize);
-
-/*!
- * @brief This function processes the given frame and returns the VAD detection value. Works with shorts (int16) with value in range [-2^15+1, 2^15]
- *
- * @param[in] pSession The VAD Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000
- * @param[in,out] bandwidthInfo Returns BAND_WIDTH_UNKNOWN if still not predicted, otherwise the real bandwidth: one of the KrispAudioBandWidth values
- *
- * @return Value in range [0,1]. The scale is adjusted so that 0.5 corresponds to the best F1 score on our test dataset (based on TIMIT core test dataset speech examples).
- * The Threshold needs to be adjusted to fit a particular use case.
- */
-KRISP_AUDIO_API float
-krispAudioVadFrameInt16Ex(KrispAudioSessionID pSession,
- const short* pFrameIn,
- unsigned int frameInSize,
- KrispAudioBandWidthInfo* bandwidthInfo);
-
-/*!
- * @brief This function processes the given frame and returns the VAD detection value. Works with float values normalized in range [-1,1]
- *
- * @param[in] pSession The VAD Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000
- *
- * @return Value in range [0,1]. The scale is adjusted so that 0.5 corresponds to the best F1 score on our test dataset (based on TIMIT core test dataset speech examples).
- * The Threshold needs to be adjusted to fit a particular use case.
- */
-KRISP_AUDIO_API float
-krispAudioVadFrameFloat(KrispAudioSessionID pSession,
- const float* pFrameIn,
- unsigned int frameInSize);
-
-
-/*!
- * @brief This function processes the given frame and returns the VAD detection value. Works with float values normalized in range [-1,1]
- *
- * @param[in] pSession The VAD Session to which the frame belongs
- * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000
- * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000
- * @param[in,out] bandwidthInfo Returns BAND_WIDTH_UNKNOWN if still not predicted, otherwise the real bandwidth: one of the KrispAudioBandWidth values
- *
- * @return Value in range [0,1]. The scale is adjusted so that 0.5 corresponds to the best F1 score on our test dataset (based on TIMIT core test dataset speech examples).
- * The Threshold needs to be adjusted to fit a particular use case.
- */
-KRISP_AUDIO_API float
-krispAudioVadFrameFloatEx(KrispAudioSessionID pSession,
- const float* pFrameIn,
- unsigned int frameInSize,
- KrispAudioBandWidthInfo* bandwidthInfo);
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif //// KRISP_AUDIO_SDK_VAD_HPP_
diff --git a/inc/krisp-audio-sdk.hpp b/inc/krisp-audio-sdk.hpp
deleted file mode 100644
index 49f8947..0000000
--- a/inc/krisp-audio-sdk.hpp
+++ /dev/null
@@ -1,152 +0,0 @@
-///
-/// Copyright Krisp, Inc
-///
-
-#ifndef KRISP_AUDIO_SDK_HPP_
-#define KRISP_AUDIO_SDK_HPP_
-#if defined _WIN32 || defined __CYGWIN__
- #ifdef KRISP_AUDIO_STATIC
- #define KRISP_AUDIO_API
- #else
- #ifdef KRISP_AUDIO_EXPORTS
- #ifdef __GNUC__
- #define KRISP_AUDIO_API __attribute__ ((dllexport))
- #else
- #define KRISP_AUDIO_API __declspec(dllexport) // Note: actually gcc seems to also support this syntax.
- #endif
- #else
- #ifdef __GNUC__
- #define KRISP_AUDIO_API __attribute__ ((dllimport))
- #else
- #define KRISP_AUDIO_API __declspec(dllimport) // Note: actually gcc seems to also support this syntax.
- #endif
- #endif
- #endif
-#else
- #if __GNUC__ >= 4
- #define KRISP_AUDIO_API __attribute__ ((visibility ("default")))
- #else
- #define KRISP_AUDIO_API
- #endif
-#endif
-
-typedef void* KrispAudioSessionID;
-
-typedef enum {
- KRISP_AUDIO_SAMPLING_RATE_8000HZ=8000,
- KRISP_AUDIO_SAMPLING_RATE_16000HZ=16000,
- KRISP_AUDIO_SAMPLING_RATE_24000HZ=24000,
- KRISP_AUDIO_SAMPLING_RATE_32000HZ=32000,
- KRISP_AUDIO_SAMPLING_RATE_44100HZ=44100,
- KRISP_AUDIO_SAMPLING_RATE_48000HZ=48000,
- KRISP_AUDIO_SAMPLING_RATE_88200HZ=88200,
- KRISP_AUDIO_SAMPLING_RATE_96000HZ=96000
-} KrispAudioSamplingRate;
-
-typedef enum {
- KRISP_AUDIO_FRAME_DURATION_10MS=10
-} KrispAudioFrameDuration;
-
-typedef struct krispAudioVersionInfo_t {
- unsigned short major;
- unsigned short minor;
- unsigned short patch;
- unsigned short build;
-} KrispAudioVersionInfo;
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*!
- * @brief This function initializes the global data needed for the SDK
- *
- * @param[in] workingPath The path to the working directory. Can be nullptr to have the default behavior.
- *
- * @retval 0 success, negative on error
-*/
-KRISP_AUDIO_API int
-krispAudioGlobalInit(const wchar_t* workingPath);
-
-
-/*!
- * @brief This function frees all global resources allocated by SDK. The session's data will also be released and can't be used in future.
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioGlobalDestroy();
-
-/*!
- * @brief This function populates the versionInfo structure with API version information upon successful completion.
- *
- * @param[in,out] versionInfo The structure that gets populated upon successful completion of this call.
- * Contains major , minor , patch and build components.
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioGetVersion(KrispAudioVersionInfo* versionInfo);
-
-/*!
- * @brief This function sets the Krisp model to be used. The weight file for provided model must exist. Several models can be set.
- * The specified model is later tied to specific session during the session creation process.
- *
- * @param[in] weightFilePath The Krisp model weight file associated with the model
- * @param[in] modelName Model name alias that allows to later distinguish between different models that have been set by this function call
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioSetModel(const wchar_t* weightFilePath,
- const char* modelName);
-
-/*!
- * @brief This function sets the Krisp model by giving weight-config blob data. Weight blob data must be valid.
- * The model specified model is later tied to specific session during the session creation process.
- * @param[in] weightBlob The Krisp model weight blob data
- * @param[in] blobSize Blob data size
- * @param[in] modelName Model name alias that allows to later distinguish between different models that have been set by this function call
- * @retval 0 Success
- * @retval -1 Error
- */
-KRISP_AUDIO_API int krispAudioSetModelBlob(const void* weightBlob, unsigned int blobSize, const char* modelName);
-
-/*!
- * @brief This function removes the Krisp model.
- *
- * @param[in] modelName Model name alias that allows to remove model that has been set.
- *
- * @retval 0 success, negative on error
- */
-KRISP_AUDIO_API int
-krispAudioRemoveModel(const char* modelName);
-
-/*!
- * @brief This function returns the energy amount for the given frame. Works with floats with values normalized in range [-1,1]
- * Note: It may be used without initializing global SDK context by krispAudioGlobalInit()
- * @param[in] pFrameIn pFrameIn Pointer to input frame.
- * @param[in] frameInSize This is buffer size.
- * @return Value in range [0, 100].
- */
-KRISP_AUDIO_API unsigned int
-krispAudioGetFrameEnergyFloat(const float* pFrameIn,
- unsigned int frameInSize);
-
-/*!
- * @brief This function returns the energy amount for the given frame. Works with shorts (int16) with value in range [-2^15+1, 2^15]
- * Note: It may be used without initializing global SDK context by krispAudioGlobalInit()
- * @param[in] pFrameIn pFrameIn Pointer to input frame.
- * @param[in] frameInSize This is buffer size.
- * @return Value in range [0, 100].
- */
-KRISP_AUDIO_API unsigned int
-krispAudioGetFrameEnergyInt16(const short* pFrameIn,
- unsigned int frameInSize);
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif //// KRISP_AUDIO_SDK_HPP_
diff --git a/java/src/org/webrtc/KrispAudioProcessingFactory.java b/java/src/org/webrtc/KrispAudioProcessingFactory.java
new file mode 100644
index 0000000..f246622
--- /dev/null
+++ b/java/src/org/webrtc/KrispAudioProcessingFactory.java
@@ -0,0 +1,97 @@
+package org.webrtc;
+import org.webrtc.AudioProcessingFactory;
+
+public class KrispAudioProcessingFactory implements AudioProcessingFactory {
+ private long nativeModule;
+ private boolean destroyed;
+
+ private long requireNativeModule(String caller) {
+ if (destroyed) {
+ throw new IllegalStateException("KrispAudioProcessingFactory is destroyed");
+ }
+ if (nativeModule == 0) {
+ throw new IllegalStateException("Call Init method before " + caller);
+ }
+ return nativeModule;
+ }
+
+ @Override
+ public long createNative() {
+ if (destroyed) {
+ throw new IllegalStateException("KrispAudioProcessingFactory is destroyed");
+ }
+ if (nativeModule == 0) {
+ throw new IllegalStateException("Call Init method before createNative()");
+ }
+ return nativeGetAudioProcessorModule(nativeModule);
+ }
+
+ public static boolean LoadKrisp(String dllPath) {
+ return nativeLoadKrisp(dllPath);
+ }
+
+ public static boolean UnloadKrisp() {
+ return nativeUnloadKrisp();
+ }
+
+ public boolean Init(String modelPath) {
+ if (destroyed) {
+ throw new IllegalStateException("KrispAudioProcessingFactory is destroyed");
+ }
+ if (nativeModule == 0) {
+ nativeModule = nativeCreateModuleWithModelPath(modelPath);
+ return nativeModule != 0;
+ }
+ return nativeInit(nativeModule, modelPath);
+ }
+
+ public boolean Init(byte[] modelData) {
+ if (destroyed) {
+ throw new IllegalStateException("KrispAudioProcessingFactory is destroyed");
+ }
+ if (nativeModule == 0) {
+ nativeModule = nativeCreateModuleWithModelData(modelData);
+ return nativeModule != 0;
+ }
+ return nativeInitWithData(nativeModule, modelData);
+ }
+
+ public void Enable(boolean enable) {
+ long module = requireNativeModule("Enable");
+ nativeEnable(module, enable);
+ }
+
+ public boolean IsEnabled() {
+ long module = requireNativeModule("IsEnabled");
+ return nativeIsEnabled(module);
+ }
+
+ public void Destroy() {
+ long module = requireNativeModule("Destroy");
+ nativeDestroy(module);
+ nativeModule = 0;
+ destroyed = true;
+ }
+
+ private static native void nativeEnable(long nativeModule, boolean disable);
+
+ private static native boolean nativeIsEnabled(long nativeModule);
+
+ private static native boolean nativeInit(long nativeModule, String modelPath);
+
+ private static native boolean nativeInitWithData(long nativeModule, byte[] modelData);
+
+ private static native boolean nativeLoadKrisp(String dllPath);
+
+ private static native boolean nativeUnloadKrisp();
+
+ private static native void nativeDestroy(long nativeModule);
+
+ private static native long nativeCreateModule();
+
+ private static native long nativeGetAudioProcessorModule(long nativeModule);
+
+ private static native long nativeCreateModuleWithModelPath(String modelPath);
+
+ private static native long nativeCreateModuleWithModelData(byte[] modelData);
+}
diff --git a/java/src/org/webrtc/KrispAudioProcessingImpl.java b/java/src/org/webrtc/KrispAudioProcessingImpl.java
deleted file mode 100644
index b37f523..0000000
--- a/java/src/org/webrtc/KrispAudioProcessingImpl.java
+++ /dev/null
@@ -1,43 +0,0 @@
-package org.webrtc;
-import org.webrtc.AudioProcessingFactory;
-
-public class KrispAudioProcessingImpl implements AudioProcessingFactory {
- @Override
- public long createNative() {
- return nativeGetAudioProcessorModule();
- }
-
- public boolean Init(String modelPath, String dllPath) {
- return nativeInit(modelPath, dllPath);
- }
-
- public boolean InitWithData(byte[] modelData, String dllPath) {
- return nativeInitWithData(modelData, dllPath);
- }
-
- public void Enable(boolean enable) {
- nativeEnable(enable);
- }
-
- public boolean IsEnabled() {
- return nativeIsEnabled();
- }
-
- public void Destroy() {
- nativeDestroy();
- }
-
- private static native void nativeEnable(boolean disable);
-
- private static native boolean nativeIsEnabled();
-
- private static native boolean nativeInit(String modelPath, String dllPath);
-
- private static native boolean nativeInitWithData(byte[] dataData, String dllPath);
-
- private static native void nativeDestroy();
-
- private static native long nativeGetAudioProcessorModule();
-}
-
-
diff --git a/krisp_c_api_include/krisp-audio-api-definitions-c.h b/krisp_c_api_include/krisp-audio-api-definitions-c.h
new file mode 100644
index 0000000..2b20a15
--- /dev/null
+++ b/krisp_c_api_include/krisp-audio-api-definitions-c.h
@@ -0,0 +1,105 @@
+#pragma once
+
+#include
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined _WIN32 || defined __CYGWIN__
+#ifdef KRISP_AUDIO_STATIC
+#define KRISP_AUDIO_API
+#else
+#ifdef KRISP_AUDIO_EXPORTS
+#ifdef __GNUC__
+#define KRISP_AUDIO_API __attribute__((dllexport))
+#else
+#define KRISP_AUDIO_API __declspec(dllexport) // Note: actually gcc seems to also support this syntax.
+#endif
+#else
+#ifdef __GNUC__
+#define KRISP_AUDIO_API __attribute__((dllimport))
+#else
+#define KRISP_AUDIO_API __declspec(dllimport) // Note: actually gcc seems to also support this syntax.
+#endif
+#endif
+#endif
+#else
+#if __GNUC__ >= 4 || __clang__
+#define KRISP_AUDIO_API __attribute__((visibility("default")))
+#else
+#define KRISP_AUDIO_API
+#endif
+#endif
+
+/// @brief Sampling frequency of the audio frame
+typedef enum
+{
+ Sr8000Hz = 8000,
+ Sr16000Hz = 16000,
+ Sr24000Hz = 24000,
+ Sr32000Hz = 32000,
+ Sr44100Hz = 44100,
+ Sr48000Hz = 48000,
+ Sr88200Hz = 88200,
+ Sr96000Hz = 96000
+} KrispSamplingRate;
+
+/// @brief Input audio frame duration in ms
+typedef enum
+{
+ Fd10ms = 10,
+ Fd15ms = 15,
+ Fd20ms = 20,
+ Fd30ms = 30,
+ Fd32ms = 32,
+} KrispFrameDuration;
+
+/// @brief Version information
+typedef struct
+{
+ uint16_t major;
+ uint16_t minor;
+ uint16_t patch;
+ uint32_t build;
+} KrispVersionInfo;
+
+/// @brief Model Info containing path to the model or its content blob.
+typedef struct
+{
+ /// @brief Path to the model file
+ const wchar_t* path;
+
+ /// @brief Model file content as a blob
+ struct
+ {
+ const uint8_t* data;
+ size_t size;
+ } blob;
+} KrispModelInfo;
+
+/// @brief Return results of the API calls
+typedef enum
+{
+ KrispRetValSuccess = 0,
+ KrispRetValUnknowError = 1,
+ KrispRetValInternalError = 2,
+ KrispRetValInvalidInput = 3,
+} KrispRetVal;
+
+/// @brief The log levels.
+typedef enum
+{
+ LogLevelTrace = 0,
+ LogLevelDebug = 1,
+ LogLevelInfo = 2,
+ LogLevelWarn = 3,
+ LogLevelErr = 4,
+ LogLevelCritical = 5,
+ LogLevelOff = 6
+} KrispLogLevel;
+
+#ifdef __cplusplus
+} // extern "c"
+#endif
diff --git a/krisp_c_api_include/krisp-audio-sdk-c.h b/krisp_c_api_include/krisp-audio-sdk-c.h
new file mode 100644
index 0000000..e461b86
--- /dev/null
+++ b/krisp_c_api_include/krisp-audio-sdk-c.h
@@ -0,0 +1,32 @@
+///
+/// Copyright Krisp, Inc
+///
+#pragma once
+
+#include "krisp-audio-api-definitions-c.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/// @brief Initializes the global data needed for the SDK
+/// @param[in] workingPath The path to the working directory. Can be empty for using default execution directory.
+/// @param[in] logCallback The callback to call when a log message is emitted.
+/// @param[in] logLevel Log level.
+/// @retval KrispRetValSuccess on success
+KRISP_AUDIO_API KrispRetVal krispGlobalInit(
+ const wchar_t* workingPath, void (*logCallback)(const char*, KrispLogLevel), KrispLogLevel logLevel);
+
+/// @brief Frees all the global resources allocated by SDK.
+/// @retval KrispRetValSuccess on success
+KRISP_AUDIO_API KrispRetVal krispGlobalDestroy();
+
+/// @brief Populates the versionInfo structure with API version information upon successful completion.
+/// @param[in,out] versionInfo The structure that gets populated upon successful completion of this call.
+/// @retval KrispRetValSuccess on success
+KRISP_AUDIO_API KrispRetVal krispGetVersion(KrispVersionInfo* versionInfo);
+
+#ifdef __cplusplus
+} // extern "c"
+#endif
diff --git a/krisp_c_api_include/krisp-audio-sdk-nc-c.h b/krisp_c_api_include/krisp-audio-sdk-nc-c.h
new file mode 100644
index 0000000..f670cb9
--- /dev/null
+++ b/krisp_c_api_include/krisp-audio-sdk-nc-c.h
@@ -0,0 +1,214 @@
+///
+/// Copyright Krisp, Inc
+///
+#pragma once
+
+#include "krisp-audio-api-definitions-c.h"
+#include
+#include
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// @brief Ringtone configuration used with inbound NC models to keep ringtones.
+typedef struct
+{
+ /// @brief Ringtone model configuration.
+ KrispModelInfo modelInfo;
+} KrispNcRingtoneCfg;
+
+/// @brief NC session configuration.
+typedef struct
+{
+ /// @brief Sampling frequency of the input data.
+ KrispSamplingRate inputSampleRate;
+
+ /// @brief Input audio frame duration.
+ KrispFrameDuration inputFrameDuration;
+
+ /// @brief Sampling frequency of the output data.
+ KrispSamplingRate outputSampleRate;
+
+ /// @brief NC model configuration.
+ KrispModelInfo* modelInfo;
+
+ /// @brief Set true to enable collection of NC session statistics
+ bool enableSessionStats;
+
+ /// @brief Optional: Ringtone configuration that may be provided with inbound NC models to retain ringtones.
+ /// Pass NULL to skip the ringtone retention feature.
+ KrispNcRingtoneCfg* ringtoneCfg;
+} KrispNcSessionConfig;
+
+/// @brief Audio frame energy information struct describing noise/voice energy values
+typedef struct
+{
+ /// @brief Voice energy level, range [0,100]
+ uint8_t voiceEnergy;
+
+ /// @brief Noise energy level, range [0,100]
+ uint8_t noiseEnergy;
+} KrispNcEnergyInfo;
+
+/// @brief Cleaned secondary speech status enum
+typedef enum
+{
+ /// @brief Cleaned secondary speech algorithm is not available (if non BVC model provided)
+ Undefined = 0,
+
+ /// @brief Cleaned secondary speech detected in the processed frame
+ Detected = 1,
+
+ /// @brief Cleaned secondary speech is not detected in the processed frame
+ NotDetected = 2
+} KrispNcCleanedSecondarySpeechStatus;
+
+/// @brief Per-frame information returned after NC processing of the given frame
+typedef struct
+{
+ /// @brief Voice and noise energy info.
+ KrispNcEnergyInfo energy;
+
+ /// @brief BVC specific feature.
+ /// Returns the state of the removed secondary speech.
+ /// If secondary speech is detected and removed, it returns Detected otherwise, it returns NotDetected.
+ // Undefined will be returned in case of running the NC.
+ KrispNcCleanedSecondarySpeechStatus cleanedSecondarySpeechStatus;
+} KrispNcPerFrameStats;
+
+/// @brief Voice stats
+typedef struct
+{
+ /// @brief Voice duration in ms
+ uint32_t talkTimeMs;
+} KrispNcVoiceStats;
+
+/// @brief Noise stats based on the noise intensity levels
+typedef struct
+{
+ /// @brief No noise duration in ms
+ uint32_t noNoiseMs;
+
+ /// @brief Low intensity noise duration in ms
+ uint32_t lowNoiseMs;
+
+ /// @brief Medium intensity noise duration in ms
+ uint32_t mediumNoiseMs;
+
+ /// @brief High intensity noise duration in ms
+ uint32_t highNoiseMs;
+
+ /// @brief Cleaned secondary speech detected duration in ms
+ uint32_t cleanedSecondarySpeechMs;
+
+ /// @brief Cleaned secondary speech not detected duration in ms
+ uint32_t cleanedSecondarySpeechNotDetectedMs;
+
+ /// @brief Cleaned secondary speech undefined duration in ms (non BVC use-case)
+ uint32_t cleanedSecondarySpeechUndefinedMs;
+} KrispNcNoiseStats;
+
+/// @brief NC stats containing noise and voice information
+typedef struct
+{
+ /// @brief Voice stats
+ KrispNcVoiceStats voiceStats;
+
+ /// @brief Noise stats
+ KrispNcNoiseStats noiseStats;
+} KrispNcSessionStats;
+
+typedef uint64_t krispNcHandle;
+
+/// @brief Creates a new instance of Nc session for int16 stream processing.
+/// AI technology removes background noises, reverb, and background voices from the main speaker's voice
+/// in real-time, while also providing noise and voice statistics for the audio stream and frame
+/// @param[in] config Configuration for the Nc Session.
+/// @retval Valid pointer on success, otherwise NULL.
+KRISP_AUDIO_API krispNcHandle krispCreateNcInt16(const KrispNcSessionConfig* config);
+
+/// @brief Creates a new instance of Nc session for float stream processing.
+/// AI technology removes background noises, reverb, and background voices from the main speaker's voice
+/// in real-time, while also providing noise and voice statistics for the audio stream and frame
+/// @param[in] config Configuration for the Nc Session.
+/// @retval Valid pointer on success, otherwise NULL.
+KRISP_AUDIO_API krispNcHandle krispCreateNcFloat(const KrispNcSessionConfig* config);
+
+/// @brief Destroys the Nc instance.
+/// Should be called if the Nc instance is no longer needed, before krispGlobalDestroy()
+/// @param nc The Nc instance to destroy.
+/// @retval KrispRetValSuccess on success
+KRISP_AUDIO_API KrispRetVal krispDestroyNc(const krispNcHandle nc);
+
+/// @brief Processes an input frame of audio data with int16 samples.
+/// @param[in] nc The handle of Nc instance to process the audio data.
+/// @param[in] inputSamples Pointer to the input buffer containing audio samples.
+/// The buffer should hold enough samples to fill a frame of audio data,
+/// calculated as frameDuration * inputSampleRate / 1000 of FrameDataType samples.
+/// @param[in] numInputSamples The number of samples in the input buffer.
+/// Must be sufficient to match the expected input frame size.
+/// @param[out] outputSamples Pointer to the buffer for the processed audio samples.
+/// The caller must allocate a buffer of sufficient size to handle
+/// a frame of output samples, calculated as frameDuration * outputSampleRate / 1000 of
+/// FrameDataType samples.
+/// @param[in] numOutputSamples The number of samples the output buffer can handle.
+/// Must be sufficient to match the expected output frame size.
+/// @param[in] noiseSuppressionLevel Noise suppression level in the range [0, 100]%
+/// Used to adjust the intensity of the applied noise suppression.
+/// - 0% indicates no noise suppression.
+/// - 100% indicates full noise suppression.
+/// @param[out] frameStats Optional: Frame statistics calculated during NC processing.
+/// Pass NULL to skip calculation, or provide a valid pointer to receive the statistics.
+/// @retval KrispRetValSuccess on success
+KRISP_AUDIO_API KrispRetVal krispProcessNcInt16(
+ const krispNcHandle nc,
+ const int16_t* inputSamples,
+ size_t numInputSamples,
+ int16_t* outputSamples,
+ size_t numOutputSamples,
+ float noiseSuppressionLevel,
+ KrispNcPerFrameStats* frameStats);
+
+/// @brief Processes an input frame of audio data with float samples.
+/// @param[in] nc The handle of Nc instance to process the audio data.
+/// @param[in] inputSamples Pointer to the input buffer containing audio samples.
+/// The buffer should hold enough samples to fill a frame of audio data,
+/// calculated as frameDuration * inputSampleRate / 1000 of FrameDataType samples.
+/// @param[in] numInputSamples The number of samples in the input buffer.
+/// Must be sufficient to match the expected input frame size.
+/// @param[out] outputSamples Pointer to the buffer for the processed audio samples.
+/// The caller must allocate a buffer of sufficient size to handle
+/// a frame of output samples, calculated as frameDuration * outputSampleRate / 1000 of
+/// FrameDataType samples.
+/// @param[in] numOutputSamples The number of samples the output buffer can handle.
+/// Must be sufficient to match the expected output frame size.
+/// @param[in] noiseSuppressionLevel Noise suppression level in the range [0, 100]%
+/// Used to adjust the intensity of the applied noise suppression.
+/// - 0% indicates no noise suppression.
+/// - 100% indicates full noise suppression.
+/// @param[out] frameStats Optional: Frame statistics calculated during NC processing.
+/// Pass NULL to skip calculation, or provide a valid pointer to receive the statistics.
+/// @retval KrispRetValSuccess on success
+KRISP_AUDIO_API KrispRetVal krispProcessNcFloat(
+ const krispNcHandle nc,
+ const float* inputSamples,
+ size_t numInputSamples,
+ float* outputSamples,
+ size_t numOutputSamples,
+ float noiseSuppressionLevel,
+ KrispNcPerFrameStats* frameStats);
+
+/// @brief Retrieves noise and voice statistics calculated from the start of NC processing.
+/// To enable statistics collection, ensure that NcSessionConfig::enableStats is set when creating the NC object.
+/// The recommended frequency for retrieving stats is 200ms or more.
+/// If it's required only at the end of the NC session, call this function once
+/// before the NC class object is destroyed.
+/// @param stats Session statistics
+/// @retval KrispRetValSuccess on success
+KRISP_AUDIO_API KrispRetVal krispGetNcSessionStats(const krispNcHandle nc, KrispNcSessionStats* stats);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/krisp_processor.cc b/krisp_processor.cc
index 98715db..30c848a 100644
--- a/krisp_processor.cc
+++ b/krisp_processor.cc
@@ -1,19 +1,149 @@
#include "krisp_processor.hpp"
+#include
+#include
+#include
+#include
+
#include
+#include "krisp-audio-api-definitions-c.h"
+#include "krisp-audio-sdk-nc-c.h"
#include "rtc_base/time_utils.h"
-#include "inc/krisp-audio-sdk.hpp"
-#include "inc/krisp-audio-sdk-nc.hpp"
-
#include "krisp_sdk.h"
namespace Krisp
{
-KrispProcessor* KrispProcessor::_singleton = nullptr;
+static void logCallback(const char* message, KrispLogLevel level)
+{
+ syslog(LOG_INFO, "KrispProcessor::logCallback: %s", message);
+ switch (level) {
+ case LogLevelTrace:
+ syslog(LOG_DEBUG, "KrispProcessor::logCallback: %s", message);
+ break;
+ case LogLevelDebug:
+ syslog(LOG_DEBUG, "KrispProcessor::logCallback: %s", message);
+ break;
+ case LogLevelInfo:
+ syslog(LOG_INFO, "KrispProcessor::logCallback: %s", message);
+ break;
+ case LogLevelWarn:
+ syslog(LOG_WARNING, "KrispProcessor::logCallback: %s", message);
+ break;
+ case LogLevelErr:
+ syslog(LOG_ERR, "KrispProcessor::logCallback: %s", message);
+ break;
+ case LogLevelCritical:
+ syslog(LOG_CRIT, "KrispProcessor::logCallback: %s", message);
+ break;
+ case LogLevelOff:
+ break;
+ }
+}
+
+bool LoadKrisp(const char* krispDllPath)
+{
+ if (!KrispSDK::LoadDll(krispDllPath))
+ {
+ syslog(LOG_ERR, "KrispProcessor::Init: Unable to load Krisp DLL");
+ return false;
+ }
+
+ if (!KrispSDK::GlobalInit(nullptr, logCallback, KrispLogLevel::LogLevelTrace))
+ {
+ syslog(LOG_ERR, "KrispProcessor::Init: Failed to initialize Krisp globals");
+ return false;
+ }
+ return true;
+}
+
+bool UnloadKrisp()
+{
+ if (KrispSDK::GlobalDestroy() != KrispRetValSuccess)
+ {
+ syslog(LOG_ERR, "KrispProcessor::Unload: Failed to destroy Krisp globals");
+ return false;
+ }
+ KrispSDK::UnloadDll();
+ return true;
+}
+
+static KrispSamplingRate GetSampleRate(size_t sampleRate)
+{
+ switch (sampleRate)
+ {
+ case 8000:
+ return KrispSamplingRate::Sr8000Hz;
+ case 16000:
+ return KrispSamplingRate::Sr16000Hz;
+ case 24000:
+ return KrispSamplingRate::Sr24000Hz;
+ case 32000:
+ return KrispSamplingRate::Sr32000Hz;
+ case 44100:
+ return KrispSamplingRate::Sr44100Hz;
+ case 48000:
+ return KrispSamplingRate::Sr48000Hz;
+ case 88200:
+ return KrispSamplingRate::Sr88200Hz;
+ case 96000:
+ return KrispSamplingRate::Sr96000Hz;
+ default:
+ syslog(LOG_INFO, "KrispProcessor::GetSampleRate: The input sampling rate: %zu \
+ is not supported. Using default 48khz.", sampleRate);
+ return KrispSamplingRate::Sr48000Hz;
+ }
+}
+
+static bool IsModelSet(const KrispModelInfo& modelInfo)
+{
+ const bool hasPath = modelInfo.path != nullptr && modelInfo.path[0] != L'\0';
+ const bool hasBlob = modelInfo.blob.data != nullptr && modelInfo.blob.size > 0;
+ return hasPath || hasBlob;
+}
+
+static bool ValidateModelPath(const char* modelPath)
+{
+ if (!modelPath || modelPath[0] == '\0') {
+ syslog(LOG_ERR, "KrispProcessor::Init: model path is empty");
+ return false;
+ }
+ syslog(LOG_INFO, "KrispProcessor::Init: model path: %s", modelPath);
+
+ struct stat st;
+ if (stat(modelPath, &st) != 0) {
+ syslog(LOG_ERR, "KrispProcessor::Init: stat failed for %s: %s",
+ modelPath, strerror(errno));
+ return false;
+ }
+
+ if (st.st_size <= 0) {
+ syslog(LOG_ERR, "KrispProcessor::Init: model file is empty: %s", modelPath);
+ return false;
+ }
+
+ FILE* file = std::fopen(modelPath, "rb");
+ if (!file) {
+ syslog(LOG_ERR, "KrispProcessor::Init: fopen failed for %s: %s",
+ modelPath, strerror(errno));
+ return false;
+ }
+ unsigned char byte = 0;
+ size_t read = std::fread(&byte, 1, 1, file);
+ std::fclose(file);
+ if (read != 1) {
+ syslog(LOG_ERR, "KrispProcessor::Init: fread failed for %s: %s",
+ modelPath, strerror(errno));
+ return false;
+ }
+
+ syslog(LOG_INFO, "KrispProcessor::Init: model file size: %lld bytes",
+ static_cast(st.st_size));
+ return true;
+}
inline std::wstring convertMBString2WString(const std::string& str)
{
@@ -21,125 +151,130 @@ inline std::wstring convertMBString2WString(const std::string& str)
return w;
}
-KrispProcessor::KrispProcessor() :
+KrispNoiseFilter::KrispNoiseFilter() :
m_isEnabled(false),
- m_session(nullptr),
- m_sampleRate(KRISP_AUDIO_SAMPLING_RATE_16000HZ),
m_numberOfChannels(1),
m_lastTimeStamp(0),
+ m_modelPath(),
+ m_modelData(),
m_bufferIn(),
m_bufferOut()
{
+ m_modelInfo.path = L"";
+ m_modelInfo.blob.data = nullptr;
+ m_modelInfo.blob.size = 0;
+ m_sessionConfig.enableSessionStats = false;
+ m_sessionConfig.inputSampleRate = KrispSamplingRate::Sr16000Hz;
+ m_sessionConfig.inputFrameDuration = KrispFrameDuration::Fd10ms;
+ m_sessionConfig.outputSampleRate = KrispSamplingRate::Sr16000Hz;
+ m_sessionConfig.modelInfo = &m_modelInfo;
+ m_sessionConfig.ringtoneCfg = nullptr;
+ m_ncCachedHandle = 0;
}
-KrispProcessor::~KrispProcessor()
+KrispNoiseFilter::~KrispNoiseFilter()
{
syslog(LOG_INFO,"KrispProcessor::~KrispProcessor()");
DeInit();
}
-KrispProcessor* KrispProcessor::GetInstance()
-{
- if(_singleton == nullptr)
+void KrispNoiseFilter::DeInit() {
+ if (m_ncCachedHandle)
{
- _singleton = new KrispProcessor();
+ KrispSDK::DestroyNcFloat(m_ncCachedHandle);
+ m_ncCachedHandle = 0;
}
- return _singleton;
+ m_modelPath.clear();
+ m_modelPath.shrink_to_fit();
+ m_modelData.clear();
+ m_modelData.shrink_to_fit();
+ m_modelInfo.path = nullptr;
+ m_modelInfo.blob.data = nullptr;
+ m_modelInfo.blob.size = 0;
}
-void KrispProcessor::DeInit() {
- if (m_session)
- {
- KrispSDK::NcCloseSession(m_session);
- m_session = nullptr;
- }
- KrispSDK::RemoveModel("default");
- KrispSDK::GlobalDestroy();
- KrispSDK::UnloadDll();
-}
-
-bool KrispProcessor::Init(const char* modelPath, const char* krispDllPath)
+bool KrispNoiseFilter::Init(const char* modelPath)
{
- if (!KrispSDK::LoadDll(krispDllPath))
- {
- syslog(LOG_ERR, "KrispProcessor::Init: Unable to load Krisp DLL");
+ if (!ValidateModelPath(modelPath)) {
return false;
}
-
- if (!KrispSDK::GlobalInit(nullptr))
+ m_modelPath = convertMBString2WString(modelPath);
+ m_modelInfo.path = m_modelPath.c_str();
+ m_modelInfo.blob.data = nullptr;
+ m_modelInfo.blob.size = 0;
+ m_modelData.clear();
+ if (m_ncCachedHandle)
{
- syslog(LOG_ERR, "KrispProcessor::Init: Failed to initialize Krisp globals");
- return false;
- }
-
- if (KrispSDK::SetModel(convertMBString2WString(modelPath).c_str(), "default") != 0)
+ KrispSDK::DestroyNcFloat(m_ncCachedHandle);
+ m_ncCachedHandle = 0;
+ }
+ m_ncCachedHandle = KrispSDK::CreateNcFloat(&m_sessionConfig);
+ if (m_ncCachedHandle == 0)
{
- syslog(LOG_ERR, "KrispProcessor::Init: Failed to set model file %s", modelPath);
+ syslog(LOG_ERR, "KrispProcessor::Init: Failed to create Krisp NC session");
return false;
- }
-
+ }
return true;
}
-bool KrispProcessor::Init(const void* modelAddr, unsigned int modelSize, const char* krispDllPath)
+bool KrispNoiseFilter::Init(const void* modelAddr, unsigned int modelSize)
{
- if (!KrispSDK::LoadDll(krispDllPath))
- {
- syslog(LOG_ERR, "KrispProcessor::Init: Unable to find Krisp DLL");
+ m_modelData.resize(modelSize);
+ std::memcpy(m_modelData.data(), modelAddr, modelSize);
+ m_modelInfo.path = L"";
+ m_modelInfo.blob.data = m_modelData.data();
+ m_modelInfo.blob.size = modelSize;
+
+ m_ncCachedHandle = KrispSDK::CreateNcFloat(&m_sessionConfig);
+ if (m_ncCachedHandle == 0)
+ {
+ syslog(LOG_ERR, "KrispProcessor::Init: Failed to create Krisp NC session");
return false;
}
-
- if (!KrispSDK::GlobalInit(nullptr))
- {
- syslog(LOG_ERR, "KrispProcessor::Init Failed to initialize Krisp globals");
- return false;
- }
-
- if (KrispSDK::SetModelBlob(modelAddr, modelSize, "default") != 0)
- {
- syslog(LOG_ERR, "KrispProcessor::Init: Krisp failed to set model via blob api");
- return false;
- }
-
return true;
}
-void KrispProcessor::Enable(bool isEnable)
+void KrispNoiseFilter::Enable(bool isEnable)
{
- m_isEnabled = isEnable;
+ m_isEnabled.store(isEnable, std::memory_order_release);
}
-bool KrispProcessor::IsEnabled() const
+bool KrispNoiseFilter::IsEnabled() const
{
- return m_isEnabled;
+ return m_isEnabled.load(std::memory_order_acquire);
}
-void KrispProcessor::Initialize(int sampleRate, int numberOfChannels)
+void KrispNoiseFilter::InitializeSession(int sampleRate, int numberOfChannels)
{
syslog(LOG_INFO, "KrispProcessor::Initialize: sampleRate: %i\
numberOfChannels: %i", sampleRate, numberOfChannels);
+
m_numberOfChannels = numberOfChannels;
- if (m_sampleRate != sampleRate || m_session == nullptr)
+ m_sessionConfig.inputSampleRate = GetSampleRate(sampleRate);
+ m_sessionConfig.outputSampleRate = m_sessionConfig.inputSampleRate;
+
+ if (!IsModelSet(m_modelInfo)) {
+ syslog(LOG_INFO, "KrispProcessor::Initialize: model not loaded yet");
+ return;
+ }
+
+ krispNcHandle newNcHandle = KrispSDK::CreateNcFloat(&m_sessionConfig);
+ if (newNcHandle == 0)
{
- if (m_session)
- {
- KrispSDK::NcCloseSession(m_session);
- }
- m_session = CreateAudioSession(sampleRate);
- m_sampleRate = sampleRate;
- if (m_session == nullptr)
- {
- // TODO: throw a valid WebRTC exception for error handling
- syslog(LOG_ERR, "KrispProcessor::Initialize: Failed creating Krisp AudioSession");
- return;
- }
+ syslog(LOG_ERR, "KrispProcessor::Initialize: Failed to create Krisp NC session");
+ return;
+ }
+ if (m_ncCachedHandle && KrispSDK::DestroyNcFloat(m_ncCachedHandle) != KrispRetValSuccess) {
+ syslog(LOG_ERR, "KrispProcessor::Initialize: Failed to destroy Krisp NC session");
+ // TODO: handle memory leak
}
+ m_ncCachedHandle = newNcHandle;
}
-void KrispProcessor::Process(webrtc::AudioBuffer* audioBuffer)
+void KrispNoiseFilter::ProcessFrame(webrtc::AudioBuffer* audioBuffer)
{
- if(!KrispProcessor::IsEnabled())
+ if(!KrispNoiseFilter::IsEnabled())
{
syslog(LOG_DEBUG, "KrispProcessor::Process: Bypassing NoiseSuppressor::Process");
return;
@@ -154,37 +289,51 @@ void KrispProcessor::Process(webrtc::AudioBuffer* audioBuffer)
m_lastTimeStamp = now;
}
- int audioBufferSampleRate = audioBuffer->num_frames() * 1000;
- if(audioBufferSampleRate != m_sampleRate)
+ int audioBufferSampleRate = audioBuffer->num_frames() * 100;
+ if(audioBufferSampleRate != static_cast(m_sessionConfig.inputSampleRate))
{
- if (m_session)
+ m_sessionConfig.inputSampleRate = GetSampleRate(audioBufferSampleRate);
+ m_sessionConfig.outputSampleRate = m_sessionConfig.inputSampleRate;
+ krispNcHandle newNcHandle = KrispSDK::CreateNcFloat(&m_sessionConfig);
+ if (newNcHandle == 0)
{
- KrispSDK::NcCloseSession(m_session);
+ syslog(LOG_ERR, "KrispProcessor::Process: Failed to create Krisp NC session");
+ return;
}
- m_session = CreateAudioSession(audioBufferSampleRate);
- m_sampleRate = audioBufferSampleRate;
- if (m_session == nullptr)
+ if (m_ncCachedHandle)
{
- syslog(LOG_ERR, "KrispProcessor::Process: Failed creating AudioSession");
- return;
- }
+ if (KrispSDK::DestroyNcFloat(m_ncCachedHandle) != KrispRetValSuccess) {
+ syslog(LOG_ERR, "KrispProcessor::Process: Failed to destroy Krisp NC session");
+ // TODO: handle memory leak
+ }
+ }
+ m_ncCachedHandle = newNcHandle;
}
- constexpr size_t kNsFrameSize = 160;
- size_t bufferSize = kNsFrameSize * audioBuffer->num_bands();
- m_bufferIn.resize(bufferSize);
- m_bufferOut.resize(bufferSize);
+ if (!m_ncCachedHandle) {
+ syslog(LOG_DEBUG, "KrispProcessor::Process: Krisp session is not initialized");
+ return;
+ }
+
+ size_t bufferSize = audioBuffer->num_frames();
+ if (m_bufferIn.size() != bufferSize) {
+ m_bufferIn.resize(bufferSize);
+ }
+ if (m_bufferOut.size() != bufferSize) {
+ m_bufferOut.resize(bufferSize);
+ }
for (size_t i = 0; i < bufferSize; ++i)
{
m_bufferIn[i] = audioBuffer->channels()[0][i] / 32768.f;
}
- auto returnCode = KrispSDK::NcCleanAmbientNoiseFloat(
- m_session, m_bufferIn.data(), bufferSize,
- m_bufferOut.data(), bufferSize);
+ auto returnCode = KrispSDK::ProcessNcFloat(
+ m_ncCachedHandle,
+ m_bufferIn.data(), bufferSize,
+ m_bufferOut.data(), bufferSize, 100.0f, nullptr);
- if (returnCode != 0)
+ if (returnCode != KrispRetValSuccess)
{
syslog(LOG_INFO, "KrispProcessor::Process: Krisp noise cleanup error");
return;
@@ -196,60 +345,77 @@ void KrispProcessor::Process(webrtc::AudioBuffer* audioBuffer)
}
}
-std::string KrispProcessor::ToString() const
+
+KrispAdapter::KrispAdapter(const std::shared_ptr& krispProcessor) :
+ m_krispProcessor(krispProcessor)
{
- return "KrispAudioProcessor";
}
-void KrispProcessor::SetRuntimeSetting(webrtc::AudioProcessing::RuntimeSetting setting)
+void KrispAdapter::Initialize(int sampleRate, int numOfChannels)
{
+ m_krispProcessor->InitializeSession(sampleRate, numOfChannels);
}
-static KrispAudioSamplingRate GetSampleRate(size_t sampleRate)
+void KrispAdapter::Process(webrtc::AudioBuffer* audioBuffer)
{
- switch (sampleRate)
- {
- case 8000:
- return KRISP_AUDIO_SAMPLING_RATE_8000HZ;
- case 16000:
- return KRISP_AUDIO_SAMPLING_RATE_16000HZ;
- case 24000:
- return KRISP_AUDIO_SAMPLING_RATE_24000HZ;
- case 32000:
- return KRISP_AUDIO_SAMPLING_RATE_32000HZ;
- case 44100:
- return KRISP_AUDIO_SAMPLING_RATE_44100HZ;
- case 48000:
- return KRISP_AUDIO_SAMPLING_RATE_48000HZ;
- case 88200:
- return KRISP_AUDIO_SAMPLING_RATE_88200HZ;
- case 96000:
- return KRISP_AUDIO_SAMPLING_RATE_96000HZ;
- default:
- syslog(LOG_INFO, "KrispProcessor::GetSampleRate: The input sampling rate: %zu \
- is not supported. Using default 48khz.", sampleRate);
- return KRISP_AUDIO_SAMPLING_RATE_48000HZ;
- }
+ m_krispProcessor->ProcessFrame(audioBuffer);
+}
+
+std::string KrispAdapter::ToString() const
+{
+ return "KrispAudioProcessor";
}
-static KrispAudioFrameDuration GetFrameDuration(size_t duration)
+void KrispAdapter::SetRuntimeSetting(webrtc::AudioProcessing::RuntimeSetting setting)
{
- switch (duration)
+ if (setting.type() ==
+ webrtc::AudioProcessing::RuntimeSetting::Type::kCaptureOutputUsed)
{
- case 10:
- return KRISP_AUDIO_FRAME_DURATION_10MS;
- default:
- syslog(LOG_INFO, "KrispProcessor::GetFrameDuration: Frame duration: %zu \
- is not supported. Switching to default 10ms", duration);
- return KRISP_AUDIO_FRAME_DURATION_10MS;
+ bool enable = false;
+ setting.GetBool(&enable);
+ m_krispProcessor->Enable(enable);
+ }
+}
+
+static std::unique_ptr BuildModule(
+ const std::shared_ptr& proc)
+{
+ auto m = std::make_unique();
+ m->proc = proc;
+ m->apm = webrtc::AudioProcessingBuilder()
+ .SetCapturePostProcessing(std::make_unique(m->proc))
+ .Create();
+ webrtc::AudioProcessing::Config config;
+ config.echo_canceller.enabled = false;
+ config.echo_canceller.mobile_mode = true;
+ m->apm->ApplyConfig(config);
+ return m;
+}
+
+std::unique_ptr NativeKrispModule::Create()
+{
+ return BuildModule(std::make_shared());
+}
+
+std::unique_ptr NativeKrispModule::CreateWithModelPath(
+ const char* modelPath)
+{
+ auto proc = std::make_shared();
+ if (!proc->Init(modelPath)) {
+ return nullptr;
}
+ return BuildModule(proc);
}
-void * KrispProcessor::CreateAudioSession(int sampleRate)
+std::unique_ptr NativeKrispModule::CreateWithModelData(
+ const void* modelData, unsigned int modelSize)
{
- auto krispSampleRate = GetSampleRate(sampleRate);
- auto krispFrameDuration = GetFrameDuration(KRISP_AUDIO_FRAME_DURATION_10MS);
- return KrispSDK::NcCreateSession(krispSampleRate, krispSampleRate, krispFrameDuration, "default");
+ auto proc = std::make_shared();
+ if (!proc->Init(modelData, modelSize)) {
+ return nullptr;
+ }
+ return BuildModule(proc);
}
+
}
diff --git a/krisp_processor.hpp b/krisp_processor.hpp
index d837887..0e44f16 100644
--- a/krisp_processor.hpp
+++ b/krisp_processor.hpp
@@ -1,47 +1,86 @@
+#include
+#include
+#include
+#include
+
#include "modules/audio_processing/include/audio_processing.h"
-#include "modules/audio_processing/audio_processing_impl.h"
#include "modules/audio_processing/audio_buffer.h"
+#include "krisp-audio-sdk-nc-c.h"
namespace Krisp
{
-class KrispProcessor : public webrtc::CustomProcessing
-{
-public:
+// Load Krisp DLL before using Krisp API
+bool LoadKrisp(const char* krispDllPath);
- KrispProcessor(const KrispProcessor&) = delete;
- KrispProcessor(KrispProcessor&&) = delete;
- KrispProcessor& operator=(const KrispProcessor&) = delete;
- KrispProcessor& operator=(KrispProcessor&&) = delete;
- ~KrispProcessor();
+// Unload Krisp DLL only after disposing all KrispNoiseFilter instances
+bool UnloadKrisp();
- static KrispProcessor* GetInstance();
+class KrispNoiseFilter
+{
+public:
+ KrispNoiseFilter();
+ KrispNoiseFilter(const KrispNoiseFilter&) = delete;
+ KrispNoiseFilter(KrispNoiseFilter&&) = delete;
+ KrispNoiseFilter& operator=(const KrispNoiseFilter&) = delete;
+ KrispNoiseFilter& operator=(KrispNoiseFilter&&) = delete;
+ virtual ~KrispNoiseFilter();
- bool Init(const char* modelPath, const char* krispDllPath);
- bool Init(const void* modelAddr, unsigned int modelSize, const char* krispDllPath);
+ bool Init(const char* modelPath);
+ bool Init(const void* modelAddr, unsigned int modelSize);
void DeInit();
void Enable(bool isEnable);
bool IsEnabled() const;
-private:
- KrispProcessor();
-
- static KrispProcessor* _singleton;
+ // Call this when sample rate changes.
+ // Call this when audio stream changes.
+ // Call this after the end of the call, or before the next call.
+ void InitializeSession(int sampleRate, int numberOfChannels);
+ void ProcessFrame(webrtc::AudioBuffer* audioBuffer);
- bool m_isEnabled;
- void* m_session;
- int m_sampleRate;
+private:
+ std::atomic m_isEnabled;
int m_numberOfChannels;
long m_lastTimeStamp;
+ std::wstring m_modelPath;
+ std::vector m_modelData;
std::vector m_bufferIn;
std::vector m_bufferOut;
+ KrispModelInfo m_modelInfo;
+ KrispNcSessionConfig m_sessionConfig;
+ krispNcHandle m_ncCachedHandle;
+};
- static void * CreateAudioSession(int sampleRate);
-
+class KrispAdapter : public webrtc::CustomProcessing
+{
+public:
+ explicit KrispAdapter(const std::shared_ptr& krispProcessor);
+ // Do not allow copy
+ KrispAdapter(const KrispAdapter&) = delete;
+ KrispAdapter& operator=(const KrispAdapter&) = delete;
+ // Allow move
+ KrispAdapter(KrispAdapter&&) = default;
+ KrispAdapter& operator=(KrispAdapter&&) = default;
+ virtual ~KrispAdapter() = default;
+private:
void Initialize(int sampleRate, int numOfChannels) override ;
void Process(webrtc::AudioBuffer* audioBuffer) override;
std::string ToString() const override;
void SetRuntimeSetting(webrtc::AudioProcessing::RuntimeSetting setting) override;
+
+ std::shared_ptr m_krispProcessor;
};
+
+struct NativeKrispModule {
+ std::shared_ptr proc;
+ rtc::scoped_refptr apm;
+
+ static std::unique_ptr Create();
+ static std::unique_ptr CreateWithModelPath(const char* modelPath);
+ static std::unique_ptr CreateWithModelData(
+ const void* modelData, unsigned int modelSize);
+};
+
+
}
diff --git a/krisp_processor_jni.cc b/krisp_processor_jni.cc
index 0b308b5..97e0cb7 100644
--- a/krisp_processor_jni.cc
+++ b/krisp_processor_jni.cc
@@ -4,80 +4,154 @@
#include
#include
-#include "rtc_base/time_utils.h"
-#include "rtc_base/checks.h"
-#include "rtc_base/ref_counted_object.h"
-#include "rtc_base/thread.h"
#include "sdk/android/src/jni/jni_helpers.h"
-#include "webrtc-android-krisp-module/generated_krisp_jni/KrispAudioProcessingImpl_jni.h"
+namespace jni_zero {
+template
+class JavaParamRef;
+}
+
+namespace Krisp {
+class Module {
+public:
+ explicit Module(std::unique_ptr module)
+ : module_(std::move(module)) {}
+
+ jlong GetAudioProcessorModule(JNIEnv* env);
+ jboolean Init(JNIEnv* env, const jni_zero::JavaParamRef& modelPathRef);
+ jboolean InitWithData(JNIEnv* env, const jni_zero::JavaParamRef& modelDataRef);
+ void Enable(JNIEnv* env, jboolean disable);
+ jboolean IsEnabled(JNIEnv* env);
+ void Destroy(JNIEnv* env);
+
+private:
+ std::unique_ptr module_;
+};
+}
+
+#if __has_include("webrtc-android-krisp-module/generated_krisp_jni/KrispAudioProcessingFactory_jni.h")
+#include "webrtc-android-krisp-module/generated_krisp_jni/KrispAudioProcessingFactory_jni.h"
+#endif
+
namespace Krisp
{
-static webrtc::AudioProcessing* apmPtr;
+#if defined(__GNUC__)
+#define JNI_UNUSED __attribute__((unused))
+#else
+#define JNI_UNUSED
+#endif
-static jlong JNI_KrispAudioProcessingImpl_GetAudioProcessorModule(JNIEnv* env)
+static jlong JNI_UNUSED JNI_KrispAudioProcessingFactory_CreateModule(JNIEnv* env)
{
+ auto module = NativeKrispModule::Create();
+ return webrtc::jni::jlongFromPointer(new Module(std::move(module)));
+}
- std::unique_ptr krisp_processor(
- KrispProcessor::GetInstance());
- auto apm = webrtc::AudioProcessingBuilder()
- .SetCapturePostProcessing(std::move(krisp_processor))
- .Create();
- webrtc::AudioProcessing::Config config;
- config.echo_canceller.enabled = false;
- config.echo_canceller.mobile_mode = true;
- apm->ApplyConfig(config);
- apmPtr = apm.release();
- return webrtc::jni::jlongFromPointer(apmPtr);
-
+static jlong JNI_UNUSED JNI_KrispAudioProcessingFactory_CreateModuleWithModelPath(
+ JNIEnv* env,
+ const webrtc::JavaParamRef& modelPathRef)
+{
+ const char *modelFilePath = env->GetStringUTFChars(modelPathRef.obj(), nullptr);
+ auto module = NativeKrispModule::CreateWithModelPath(modelFilePath);
+ env->ReleaseStringUTFChars(modelPathRef.obj(), modelFilePath);
+ if (!module) {
+ return 0;
+ }
+ return webrtc::jni::jlongFromPointer(new Module(std::move(module)));
}
-static void JNI_KrispAudioProcessingImpl_Enable(JNIEnv* env, jboolean disable)
+static jlong JNI_UNUSED JNI_KrispAudioProcessingFactory_CreateModuleWithModelData(
+ JNIEnv* env,
+ const webrtc::JavaParamRef& modelDataRef)
{
- KrispProcessor::GetInstance()->Enable(disable);
+ jbyteArray javaByteArray = modelDataRef.obj();
+ jsize javaModelSize = env->GetArrayLength(javaByteArray);
+ jbyte *javaModelData = env->GetByteArrayElements(javaByteArray, nullptr);
+ auto module = NativeKrispModule::CreateWithModelData(
+ javaModelData, static_cast(javaModelSize));
+ env->ReleaseByteArrayElements(javaByteArray, javaModelData, JNI_ABORT);
+ if (!module) {
+ return 0;
+ }
+ return webrtc::jni::jlongFromPointer(new Module(std::move(module)));
}
-static jboolean JNI_KrispAudioProcessingImpl_IsEnabled(JNIEnv* env)
+jlong Module::GetAudioProcessorModule(JNIEnv* env)
{
- return KrispProcessor::GetInstance()->IsEnabled();
+ if (!module_ || !module_->apm) {
+ return 0;
+ }
+ return webrtc::jni::jlongFromPointer(module_->apm.get());
}
-static jboolean JNI_KrispAudioProcessingImpl_Init(JNIEnv* env,
- const webrtc::JavaParamRef& modelPathRef,
+jboolean JNI_UNUSED JNI_KrispAudioProcessingFactory_LoadKrisp(
+ JNIEnv* env,
const webrtc::JavaParamRef& krispDllPath)
{
+ const char *dllPath = env->GetStringUTFChars(krispDllPath.obj(), nullptr);
+ bool retValue = LoadKrisp(dllPath);
+ env->ReleaseStringUTFChars(krispDllPath.obj(), dllPath);
+ return static_cast(retValue);
+}
+
+jboolean JNI_UNUSED JNI_KrispAudioProcessingFactory_UnloadKrisp(JNIEnv* env)
+{
+ return static_cast(UnloadKrisp());
+}
+
+void Module::Enable(JNIEnv* env, jboolean disable)
+{
+ if (!module_ || !module_->apm) {
+ return;
+ }
+ module_->apm->SetRuntimeSetting(
+ webrtc::AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting(disable));
+}
+
+jboolean Module::IsEnabled(JNIEnv* env)
+{
+ if (!module_ || !module_->proc) {
+ return false;
+ }
+ return module_->proc->IsEnabled();
+}
+
+jboolean Module::Init(JNIEnv* env, const jni_zero::JavaParamRef& modelPathRef)
+{
+ if (!module_ || !module_->proc) {
+ return false;
+ }
jstring javaModelPath = modelPathRef.obj();
- jstring javaDllPath = krispDllPath.obj();
const char *modelFilePath = env->GetStringUTFChars(javaModelPath, nullptr);
- const char *dllPath = env->GetStringUTFChars(javaDllPath, nullptr);
- bool retValue = KrispProcessor::GetInstance()->Init(modelFilePath, dllPath);
+ bool retValue = module_->proc->Init(modelFilePath);
env->ReleaseStringUTFChars(javaModelPath, modelFilePath);
return static_cast(retValue);
}
-static jboolean JNI_KrispAudioProcessingImpl_InitWithData(JNIEnv* env,
- const webrtc::JavaParamRef& modelDataRef,
- const webrtc::JavaParamRef& krispDllPath)
+jboolean Module::InitWithData(JNIEnv* env,
+ const jni_zero::JavaParamRef& modelDataRef)
{
+ if (!module_ || !module_->proc) {
+ return false;
+ }
jbyteArray javaByteArray = modelDataRef.obj();
- jstring javaDllPath = krispDllPath.obj();
jsize javaModelSize = env->GetArrayLength(javaByteArray);
jbyte *javaModelData = env->GetByteArrayElements(javaByteArray, nullptr);
- const char *dllPath = env->GetStringUTFChars(javaDllPath, nullptr);
size_t arraySize = static_cast(javaModelSize);
std::unique_ptr modelData(new char[arraySize]);
std::memcpy(modelData.get(), javaModelData, arraySize);
- bool retValue = KrispProcessor::GetInstance()->Init(modelData.get(), arraySize, dllPath);
+ bool retValue = module_->proc->Init(modelData.get(), arraySize);
env->ReleaseByteArrayElements(javaByteArray, javaModelData, JNI_ABORT);
return static_cast(retValue);
}
-static void JNI_KrispAudioProcessingImpl_Destroy(JNIEnv* env)
+void Module::Destroy(JNIEnv* env)
{
- delete apmPtr;
- apmPtr = nullptr;
+ delete this;
}
+#undef JNI_UNUSED
+
}
diff --git a/krisp_sdk.cc b/krisp_sdk.cc
index ff5ec5b..ba077f8 100644
--- a/krisp_sdk.cc
+++ b/krisp_sdk.cc
@@ -1,63 +1,57 @@
#include "krisp_sdk.h"
-#include
+// required by dlopen, dlclose, dlsym
#include
+// required by syslog
#include
-
+// required by std::array
#include
+// required by std::call_once
+#include
namespace KrispSDK {
enum class KrispFunctionId
{
- krispAudioGlobalInit = 0,
- krispAudioGlobalDestroy = 1,
- krispAudioSetModel = 2,
- krispAudioSetModelBlob = 3,
- krispAudioRemoveModel = 4,
- krispAudioNcCreateSession = 5,
- krispAudioNcCloseSession = 6,
- krispAudioNcCleanAmbientNoiseFloat = 7
+ krispGlobalInit = 0,
+ krispGlobalDestroy = 1,
+ krispCreateNcFloat = 2,
+ krispDestroyNc = 3,
+ krispProcessNcFloat = 4,
};
-class KrispAudioSdkDllGate
-{
-public:
-
- static KrispAudioSdkDllGate * singleton()
- {
- if (!_singleton)
- {
- _singleton = new KrispAudioSdkDllGate;
- }
- return _singleton;
- }
-
- bool LoadDll(const char* krispDllPath)
- {
- dlerror();
- _dllHandle = dlopen(krispDllPath, RTLD_LAZY);
- if (!_dllHandle) {
- syslog(LOG_ERR, "KrispSDK::LoadDll: Failed to load the library = %s\n", krispDllPath);
- return false;
- }
- syslog(LOG_INFO, "Krisp DLL loaded: %s", krispDllPath);
- return LoadFunctions();
- }
-
- void UnloadDll()
- {
- if (_dllHandle)
- {
- dlclose(_dllHandle);
- _dllHandle = nullptr;
- }
- for (auto & functionPtr : _functionPointers)
- {
- functionPtr = nullptr;
- }
- }
+class KrispAudioSdkDllGate {
+ public:
+ static KrispAudioSdkDllGate* singleton() {
+ std::call_once(_initFlag, []() {
+ _singleton = new KrispAudioSdkDllGate;
+ });
+ return _singleton;
+ }
+
+ bool LoadDll(const char* krispDllPath) {
+ dlerror();
+ _dllHandle = dlopen(krispDllPath, RTLD_LAZY);
+ if (!_dllHandle) {
+ syslog(LOG_ERR,
+ "KrispSDK::LoadDll: Failed to load the library = %s\n",
+ krispDllPath);
+ return false;
+ }
+ syslog(LOG_INFO, "Krisp DLL loaded: %s", krispDllPath);
+ return LoadFunctions();
+ }
+
+ void UnloadDll() {
+ if (_dllHandle) {
+ dlclose(_dllHandle);
+ _dllHandle = nullptr;
+ }
+ for (auto& functionPtr : _functionPointers) {
+ functionPtr = nullptr;
+ }
+ }
template
ReturnType InvokeFunction(KrispFunctionId functionId, Args... args)
@@ -93,23 +87,22 @@ class KrispAudioSdkDllGate
}
static KrispAudioSdkDllGate * _singleton;
+ static std::once_flag _initFlag;
void* _dllHandle = nullptr;
- static constexpr unsigned int _functionCount = 8;
+ static constexpr unsigned int _functionCount = 5;
static constexpr std::array _functionNames =
{
- "krispAudioGlobalInit",
- "krispAudioGlobalDestroy",
- "krispAudioSetModel",
- "krispAudioSetModelBlob",
- "krispAudioRemoveModel",
- "krispAudioNcCreateSession",
- "krispAudioNcCloseSession",
- "krispAudioNcCleanAmbientNoiseFloat"
+ "krispGlobalInit",
+ "krispGlobalDestroy",
+ "krispCreateNcFloat",
+ "krispDestroyNc",
+ "krispProcessNcFloat"
};
std::array _functionPointers = {};
};
KrispAudioSdkDllGate * KrispAudioSdkDllGate::_singleton = nullptr;
+std::once_flag KrispAudioSdkDllGate::_initFlag;
template
ReturnType InvokeFunction(KrispFunctionId functionId, Args... args)
@@ -127,72 +120,44 @@ void UnloadDll()
KrispAudioSdkDllGate::singleton()->UnloadDll();
}
-bool GlobalInit(void* param)
-{
- int result = InvokeFunction(KrispFunctionId::krispAudioGlobalInit, param);
- return result == 0 ? true: false;
-}
-
-int SetModel(const wchar_t* weightFilePath, const char* modelName)
-{
- int result = InvokeFunction(KrispFunctionId::krispAudioSetModel, weightFilePath, modelName);
- return result;
-}
-
-int SetModelBlob(const void* modelAddress, unsigned int modelSize, const char* modelName)
-{
- int result = InvokeFunction(KrispFunctionId::krispAudioSetModelBlob, modelAddress, modelSize, modelName);
- return result;
-}
-
-int RemoveModel(const char* modelName)
+KrispRetVal GlobalInit(const wchar_t* workingPath,
+ void (*logCallback)(const char*, KrispLogLevel),
+ KrispLogLevel logLevel)
{
- return InvokeFunction(KrispFunctionId::krispAudioRemoveModel, modelName);
+ return InvokeFunction(KrispFunctionId::krispGlobalInit, workingPath, logCallback, logLevel);
}
-int GlobalDestroy()
+KrispRetVal GlobalDestroy()
{
- int result = InvokeFunction(KrispFunctionId::krispAudioGlobalDestroy);
- return result;
+ return InvokeFunction(KrispFunctionId::krispGlobalDestroy);
}
-int NcCloseSession(void* session)
+krispNcHandle CreateNcFloat(const KrispNcSessionConfig* config)
{
- int result = InvokeFunction(KrispFunctionId::krispAudioNcCloseSession, session);
- return result;
+ return InvokeFunction(KrispFunctionId::krispCreateNcFloat, config);
}
-KrispAudioSessionID NcCreateSession(
- KrispAudioSamplingRate inputSampleRate,
- KrispAudioSamplingRate outputSampleRate,
- KrispAudioFrameDuration frameDuration,
- const char* modelName)
+KrispRetVal DestroyNcFloat(const krispNcHandle session)
{
-
- KrispAudioSessionID result = InvokeFunction(KrispFunctionId::krispAudioNcCreateSession,
- inputSampleRate,
- outputSampleRate,
- frameDuration,
- modelName);
-
- return result;
+ return InvokeFunction(KrispFunctionId::krispDestroyNc, session);
}
-int NcCleanAmbientNoiseFloat(
- KrispAudioSessionID pSession,
- const float* pFrameIn,
- unsigned int frameInSize,
- float* pFrameOut,
- unsigned int frameOutSize)
+KrispRetVal ProcessNcFloat(const krispNcHandle session,
+ const float* inputSamples,
+ size_t numInputSamples,
+ float* outputSamples,
+ size_t numOutputSamples,
+ float noiseSuppressionLevel,
+ KrispNcPerFrameStats* frameStats)
{
- int result = InvokeFunction(KrispFunctionId::krispAudioNcCleanAmbientNoiseFloat,
- pSession,
- pFrameIn,
- frameInSize,
- pFrameOut,
- frameOutSize
- );
- return result;
+ return InvokeFunction(KrispFunctionId::krispProcessNcFloat,
+ session,
+ inputSamples,
+ numInputSamples,
+ outputSamples,
+ numOutputSamples,
+ noiseSuppressionLevel,
+ frameStats);
}
}
\ No newline at end of file
diff --git a/krisp_sdk.h b/krisp_sdk.h
index f359b24..f271a34 100644
--- a/krisp_sdk.h
+++ b/krisp_sdk.h
@@ -1,26 +1,31 @@
-#include "inc/krisp-audio-sdk.hpp"
-#include "inc/krisp-audio-sdk-nc.hpp"
+// required by KrispRetVal, KrispLogLevel, KrispVersionInfo
+#include "krisp-audio-sdk-c.h"
+// required by KrispNcSessionConfig, KrispNcHandle, KrispNcPerFrameStats
+#include "krisp-audio-sdk-nc-c.h"
namespace KrispSDK
{
- bool LoadDll(const char* dllPath);
- void UnloadDll();
- bool GlobalInit(void* param);
- int SetModel(const wchar_t* weightFilePath, const char* modelName);
- int SetModelBlob(const void* weightBlob, unsigned int blobSize, const char* modelName);
- int RemoveModel(const char* modelName);
- int GlobalDestroy();
- int NcCloseSession(void* m_session);
- KrispAudioSessionID NcCreateSession(
- KrispAudioSamplingRate inputSampleRate,
- KrispAudioSamplingRate outputSampleRate,
- KrispAudioFrameDuration frameDuration,
- const char* modelName);
- int NcCleanAmbientNoiseFloat(
- KrispAudioSessionID pSession,
- const float* pFrameIn,
- unsigned int frameInSize,
- float* pFrameOut,
- unsigned int frameOutSize);
+ // Loads the Krisp SDK shared library from the provided path.
+ bool LoadDll(const char* dllPath);
+ void UnloadDll();
+
+ // Global lifecycle.
+ KrispRetVal GlobalInit(const wchar_t* workingPath,
+ void (*logCallback)(const char*, KrispLogLevel),
+ KrispLogLevel logLevel);
+ KrispRetVal GlobalDestroy();
+
+ // Noise cancellation session management.
+ krispNcHandle CreateNcFloat(const KrispNcSessionConfig* config);
+ KrispRetVal DestroyNcFloat(const krispNcHandle session);
+
+ // Per-frame processing.
+ KrispRetVal ProcessNcFloat(const krispNcHandle session,
+ const float* inputSamples,
+ size_t numInputSamples,
+ float* outputSamples,
+ size_t numOutputSamples,
+ float noiseSuppressionLevel = 100.0f,
+ KrispNcPerFrameStats* frameStats = nullptr);
}