diff --git a/BUILD.gn b/BUILD.gn index 7b4dd6d..2734d19 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -8,6 +8,7 @@ rtc_library("krisp_processor") { configs += [ "../modules/audio_processing:apm_debug_dump", ] configs += [ "//build/config/android:hide_all_but_jni" ] defines = ["WEBRTC_ANDROID", "WEBRTC_POSIX",] + include_dirs = [ "krisp_c_api_include" ] sources = [ "krisp_processor.cc", "krisp_processor_jni.cc", @@ -24,12 +25,12 @@ rtc_library("krisp_processor") { rtc_android_library("krisp_java") { visibility = [ "*" ] sources = [ - "java/src/org/webrtc/KrispAudioProcessingImpl.java",] + "java/src/org/webrtc/KrispAudioProcessingFactory.java",] deps = ["//sdk/android:peerconnection_java",] } generate_jni("generated_krisp_jni") { - sources = [ "java/src/org/webrtc/KrispAudioProcessingImpl.java",] + sources = [ "java/src/org/webrtc/KrispAudioProcessingFactory.java",] namespace = "Krisp" jni_generator_include = "//sdk/android/src/jni/jni_generator_helper.h" } diff --git a/README.md b/README.md index 45627e3..2c6114c 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ 1.1. include the `libwebrtc.aar` into the Android project. -1.2. `import org.webrtc.KrispAudioProcessingImpl` +1.2. `import org.webrtc.KrispAudioProcessingFactory` -1.3. `
var audioProcessorModule = KrispAudioProcessingImpl()` +1.3. `var audioProcessorModule = KrispAudioProcessingFactory()` ### 2 Load dependencies 2.1. Load the stdlib required by Krisp Audio SDK. Load it before using Krisp. @@ -21,14 +21,23 @@ `System.loadLibrary("jingle_peerconnection_so")` -### 3. Load the Krisp Dynamic Library with the model +### 3. Load the Krisp Dynamic Library +Load the Krisp dynamic library once before initializing a model. +``` +val krispDllpath = "libkrisp-audio-sdk.so" +val loaded = KrispAudioProcessingFactory.LoadKrisp(krispDllpath) +if (!loaded) { + // report error, read the logs for the details +} +``` + +### 4. Initialize the Krisp model #### 3.1. Using the model file path You can load Krisp model specifying file path. For this scenario you should make sure the Android app has access to the file resource. ``` -val modelFilePath = “c6.f.s.ced125.kw” -val krispDllpath = "libkrisp-audio-sdk.so"
 -var retValue = audioProcessorModule.Init(modelFilePath, krispDllpath) +val modelFilePath = "krisp-nc-o-med-v7.kef" +var retValue = audioProcessorModule.Init(modelFilePath) if (!retValue) { // report error, read the logs for the details } @@ -39,26 +48,31 @@ Make sure to specify correct file paths, these are hard coded sample values. Alternatively, you can load Krisp model by specifying model data content loaded into the memory. ``` var modelData: ByteArray // = load the model into the memory -audioProcessorModule.InitWithData(modelData, krispDllpath) +audioProcessorModule.Init(modelData) if (!retValue) { return null } ``` -### 4. Enable, disable Krisp NC during runtime +### 5. Enable, disable Krisp NC during runtime to enable Krisp NC during runtime `audioProcessorModule.Enable(true)` to disable Krisp NC `audioProcessorModule.Enable(false)` -### 5. Integrate Krisp Module into the WebRTC PeerConnectionFactory +### 6. Integrate Krisp Module into the WebRTC PeerConnectionFactory ``` PeerConnectionFactory .builder() .setAudioProcessingFactory(audioProcessorModule) ``` +### 7. Unload Krisp when done +``` +KrispAudioProcessingFactory.UnloadKrisp() +``` + ## Build Instructions diff --git a/inc/krisp-audio-sdk-nc-stats.hpp b/inc/krisp-audio-sdk-nc-stats.hpp deleted file mode 100644 index 2df4026..0000000 --- a/inc/krisp-audio-sdk-nc-stats.hpp +++ /dev/null @@ -1,202 +0,0 @@ -/// -/// Copyright Krisp, Inc -/// - -#ifndef KRISP_AUDIO_SDK_NC_STATS_HPP_ -#define KRISP_AUDIO_SDK_NC_STATS_HPP_ - -#include "krisp-audio-sdk.hpp" - -/*! - * Cleaned secondary speech states - */ -typedef enum { - UNDEFINED = 0, - DETECTED = 1, - NOT_DETECTED = 2 -} KrispAudioCleanedSecondarySpeechStatus; - -/*! - * Krisp audio per-frame info - */ -typedef struct krispAudioNcPerFrameInfo_t { - /*! - * Voice energy level in the processed frame. Value range [0,100] - */ - unsigned int voiceEnergy; - - /*! - * Noise energy level in the processed frame. Value range [0,100] - */ - unsigned int noiseEnergy; - - /*! - * BVC specific feature. - * Returns the removed secondary speech state, e.g. in case if secondary speech detected and removed returns TRUE, - * otherwise returns FALSE. - * UNDEFINED will be returned in case of running the NC. - */ - KrispAudioCleanedSecondarySpeechStatus cleanedSecondarySpeechStatus; -} KrispAudioNcPerFrameInfo; - -/*! - * Krisp audio voice stats - */ -typedef struct krispAudioNcVoiceStats_t { - unsigned int talkTimeMs; -} KrispAudioNcVoiceStats; - -/*! - * Krisp audio noise stats based on the noise intensity level - */ -typedef struct krispAudioNcNoiseStats_t { - unsigned int noNoiseMs; - unsigned int lowNoiseMs; - unsigned int mediumNoiseMs; - unsigned int highNoiseMs; -} KrispAudioNcNoiseStats; - -/*! - * Krisp audio noise/voice stats - */ -typedef struct krispAudioNcStats_t { - KrispAudioNcVoiceStats voiceStats; - KrispAudioNcNoiseStats noiseStats; -} KrispAudioNcStats; - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/*! - * @brief This function creates Speech Enhance(Noise Canceler NC with stats) session object - * - * @param[in] inputSampleRate Sampling frequency of the input data - * @param[in] outputSampleRate Sampling frequency of the output data - * @param[in] frameDuration Frame duration - * @param[in] modelName The session ties to this model, and cleans the future frames using it. - * If modelName is \em nullptr then the SDK auto-detects the model based on input sampleRate - * @attention Always provide modelName explicitly to avoid ambiguity - * - * @return created session handle - */ -KRISP_AUDIO_API KrispAudioSessionID -krispAudioNcWithStatsCreateSession(KrispAudioSamplingRate inputSampleRate, - KrispAudioSamplingRate outputSampleRate, - KrispAudioFrameDuration frameDuration, - const char* modelName); - -/*! - * @brief This function releases all data tied to this particular session, closes the given NC session - * - * @param[in, out] pSession Handle to the NC with stats session to be closed - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcWithStatsCloseSession(KrispAudioSessionID pSession); - -/*! - * @brief This function cleans the ambient noise for the given single frame. Works with shorts (int16) with value in range [-2^15+1, 2^15] - * - * @param[in] pSession The NC With Stats Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000 - * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size - * @param[in] frameOutSize : this is output buffer size which must be frameDuration * outputSampleRate / 100 - * @param[out] energyInfo Returns voice and noise energy levels of the current frame - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcWithStatsCleanAmbientNoiseInt16(KrispAudioSessionID pSession, - const short* pFrameIn, - unsigned int frameInSize, - short* pFrameOut, - unsigned int frameOutSize, - KrispAudioNcPerFrameInfo* energyInfo); - -/*! - * @brief This function cleans the ambient noise for the given single frame,if there is no ringtone. Works with shorts (int16) with value in range [-2^15+1, 2^15] - * - * @param[in] pSession The NC With Stats Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000 - * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size - * @param[in] frameOutSize : this is output buffer size which must be frameDuration * outputSampleRate / 100 - * @param[in] ringtone : This specifies whether there is ringtone in the pFrameIn input buffer. - * @param[out] energyInfo Returns voice and noise energy levels of the current frame if ringtone is false otherwise 0. - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcWithStatsCleanAmbientNoiseWithRingtoneInt16(KrispAudioSessionID pSession, - const short* pFrameIn, - unsigned int frameInSize, - short* pFrameOut, - unsigned int frameOutSize, - bool ringtone, - KrispAudioNcPerFrameInfo* energyInfo); - -/*! - * @brief This function cleans the ambient noise for the given single frame. Works with floats with values normalized in range [-1,1] - * @param[in] pSession The NC With Stats Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000 - * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size - * @param[in] frameOutSize This is output buffer size which must be frameDuration * outputSampleRate / 100 - * @param[out] energyInfo Returns voice and noise energy levels of the current frame - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcWithStatsCleanAmbientNoiseFloat(KrispAudioSessionID pSession, - const float* pFrameIn, - unsigned int frameInSize, - float* pFrameOut, - unsigned int frameOutSize, - KrispAudioNcPerFrameInfo* energyInfo); - -/*! - * @brief This function cleans the ambient noise for the given single frame, if there is no ringtone. Works with floats with values normalized in range [-1,1] - * - * @param[in] pSession The NC With Stats Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000 - * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size - * @param[in] frameOutSize This is output buffer size which must be frameDuration * outputSampleRate / 100 - * @param[in] ringtone : This specifies whether there is ringtone in the pFrameIn input buffer. * @retval 0 Success - * @param[out] energyInfo Returns voice and noise energy levels of the current frame if ringtone is false otherwise 0. - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcWithStatsCleanAmbientNoiseWithRingtoneFloat(KrispAudioSessionID pSession, - const float* pFrameIn, - unsigned int frameInSize, - float* pFrameOut, - unsigned int frameOutSize, - bool ringtone, - KrispAudioNcPerFrameInfo* energyInfo); - -/*! - * @brief This function used to retrieve the noise/voice stats while processing noise canceler. - * The recommended stats retrieval frequency is bigger or equal to 200ms. - * If it's required only at the end of the noise canceler processing (end of the call/audio stream) - * function should be called before pSession becomes invalid, i.e. after closing the specified session. - * - * @param[in] pSession The NC With Stats Session to which the stats belongs - * @param[out] pStats Noise/Voice stats returned - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcWithStatsRetrieveStats(KrispAudioSessionID pSession, - KrispAudioNcStats* pStats); - - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif // KRISP_AUDIO_SDK_NC_STATS_HPP_ diff --git a/inc/krisp-audio-sdk-nc.hpp b/inc/krisp-audio-sdk-nc.hpp deleted file mode 100644 index 736f840..0000000 --- a/inc/krisp-audio-sdk-nc.hpp +++ /dev/null @@ -1,137 +0,0 @@ -/// -/// Copyright Krisp, Inc -/// - -#ifndef KRISP_AUDIO_SDK_NC_HPP_ -#define KRISP_AUDIO_SDK_NC_HPP_ - -#include "krisp-audio-sdk.hpp" -#include - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/*! - * @brief This function creates Speech Enhance(Noise Canceler NC) session object - * - * @param[in] inputSampleRate Sampling frequency of the input data - * @param[in] outputSampleRate Sampling frequency of the output data - * @param[in] frameDuration Frame duration - * @param[in] modelName The session ties to this model, and cleans the future frames using it. - * If modelName is \em nullptr than the sdk auto-detecs the model based on input sampleRate - * @attention Always provide modelName explicitly to avoid ambiguity - * - * @return created session handle - */ -KRISP_AUDIO_API KrispAudioSessionID -krispAudioNcCreateSession(KrispAudioSamplingRate inputSampleRate, - KrispAudioSamplingRate outputSampleRate, - KrispAudioFrameDuration frameDuration, - const char* modelName); - -/*! - * @brief This function releases all data tied to this particular session, closes the given NC session - * - * @param[in, out] pSession Handle to the NC session to be closed - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcCloseSession(KrispAudioSessionID pSession); - -/*! - * @brief This function cleans the ambient noise for the given single frame. Works with shorts (int16) with value in range [-2^15+1, 2^15] - * - * @param[in] pSession The NC Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000 - * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size - * @param[in] frameOutSize : this is output buffer size which must be frameDuration * outputSampleRate / 100 - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcCleanAmbientNoiseInt16(KrispAudioSessionID pSession, - const short* pFrameIn, - unsigned int frameInSize, - short* pFrameOut, - unsigned int frameOutSize); - -/*! - * @brief This function cleans the ambient noise for the given single frame,if there is no ringtone. Works with shorts (int16) with value in range [-2^15+1, 2^15] - * - * @param[in] pSession The NC Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000 - * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size - * @param[in] frameOutSize : this is output buffer size which must be frameDuration * outputSampleRate / 100 - * @param[in] ringtone : This specifies whether there is ringtone in the pFrameIn input buffer. - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcCleanAmbientNoiseWithRingtoneInt16(KrispAudioSessionID pSession, - const short* pFrameIn, - unsigned int frameInSize, - short* pFrameOut, - unsigned int frameOutSize, - bool ringtone); - -/*! - * @brief This function cleans the ambient noise for the given single frame. Works with floats with values normalized in range [-1,1] - * - * @param[in] pSession The NC Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000 - * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size - * @param[in] frameOutSize This is output buffer size which must be frameDuration * outputSampleRate / 100 - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcCleanAmbientNoiseFloat(KrispAudioSessionID pSession, - const float* pFrameIn, - unsigned int frameInSize, - float* pFrameOut, - unsigned int frameOutSize); - -/*! - * @brief This function cleans the ambient noise for the given single frame, if there is no ringtone. Works with floats with values normalized in range [-1,1] - * - * @param[in] pSession The NC Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is input buffer size which must be frameDuration * inputSampleRate / 1000 - * @param[in,out] pFrameOut Processed frames. The caller should allocate a buffer of at least frameDuration * outputSampleRate / 1000 size - * @param[in] frameOutSize This is output buffer size which must be frameDuration * outputSampleRate / 100 - * @param[in] ringtone : This specifies whether there is ringtone in the pFrameIn input buffer. * @retval 0 Success - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioNcCleanAmbientNoiseWithRingtoneFloat(KrispAudioSessionID pSession, - const float* pFrameIn, - unsigned int frameInSize, - float* pFrameOut, - unsigned int frameOutSize, - bool ringtone); - -/*! - * @brief This function turns on/off background speaker fix feature. - * - * @param[in] pSession The NC Session to which the frame belongs - * @param[in] on on/off background speaker fix feature - * @return 0 Value was set successfully - * @return 1 Background speaker fix feature missing for this type of noise_cleaner - * - * @return -1, -2, -3, 2 Errors - */ -KRISP_AUDIO_API bool -krispAudioNcBackgroundSpeakerFixOnOff(KrispAudioSessionID pSession, - bool on); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif //// KRISP_AUDIO_SDK_NC_HPP_ diff --git a/inc/krisp-audio-sdk-rt.hpp b/inc/krisp-audio-sdk-rt.hpp deleted file mode 100644 index 2e9cb80..0000000 --- a/inc/krisp-audio-sdk-rt.hpp +++ /dev/null @@ -1,72 +0,0 @@ -/// -/// Copyright Krisp, Inc -/// - -#ifndef KRISP_AUDIO_SDK_RT_HPP_ -#define KRISP_AUDIO_SDK_RT_HPP_ - -#include "krisp-audio-sdk.hpp" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/*! - * @brief This function creates Ringtone detection session object - * - * @param[in] inputSampleRate Sampling frequency of the input data. - * @param[in] frameDuration Frame duration - * @param[in] modelName The session ties to this model, and processes the future frames using it - * If modelName is \em nullptr then the SDK auto-detects the model based on input sampleRate. - * @attention Always provide modelName explicitly to avoid ambiguity - * - * @return created session handle - */ -KRISP_AUDIO_API KrispAudioSessionID -krispAudioRingtoneCreateSession(KrispAudioSamplingRate inputSampleRate, - KrispAudioFrameDuration frameDuration, - const char* modelName); - -/*! - * @brief This function releases all data tied to this particular session, closes the given Ringtone session - * - * @param[in, out] pSession Handle to the Ringtone session to be closed - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioRingtoneCloseSession(KrispAudioSessionID pSession); - -/*! - * @brief This function processes the given frame and returns the Ringtone detection value. Works with shorts (int16) with value in range [-2^15+1, 2^15] - * - * @param[in] pSession The Ringtone Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000 - * - * @return Value in range [0,1]. - */ -KRISP_AUDIO_API float -krispAudioDetectRingtoneFrameInt16(KrispAudioSessionID pSession, - const short* pFrameIn, - unsigned int frameInSize); - -/*! - * @brief This function processes the given frame and returns the Ringtone detection value. Works with float values normalized in range [-1,1] - * - * @param[in] pSession The Ringtone Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000 - * - * @return Value in range [0,1]. - */ -KRISP_AUDIO_API float -krispAudioDetectRingtoneFrameFloat(KrispAudioSessionID pSession, - const float* pFrameIn, - unsigned int frameInSize); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif //// KRISP_AUDIO_SDK_RT_HPP_ diff --git a/inc/krisp-audio-sdk-vad.hpp b/inc/krisp-audio-sdk-vad.hpp deleted file mode 100644 index 70368b8..0000000 --- a/inc/krisp-audio-sdk-vad.hpp +++ /dev/null @@ -1,133 +0,0 @@ -/// -/// Copyright Krisp, Inc -/// - -#ifndef KRISP_AUDIO_SDK_VAD_HPP_ -#define KRISP_AUDIO_SDK_VAD_HPP_ - -#include "krisp-audio-sdk.hpp" - -/*! - * Krisp Audio bandwidth values - */ -typedef enum { - BAND_WIDTH_UNKNOWN = 0, - BAND_WIDTH_4000HZ = 1, - BAND_WIDTH_8000HZ = 2, - BAND_WIDTH_16000HZ = 3, -} KrispAudioBandWidth; - -/*! - * Krisp Audio real bandwidth info struct used by krispAudioVadFrameInt16Ex() and - * krispAudioVadFrameFloatEx() APIs - */ -typedef struct KrispAudioBandWidthInfo_t { - /* [out] Predicted real bandwidth, one of the @KrispAudioBandWidth values */ - KrispAudioBandWidth realBandwidth; - /* [in] Algorithm processing start point */ - int procStartDelayMs; - /* [in] Algorithm processing duration counted from the procStartDelayMs */ - int procDurationMs; - int reserved; -} KrispAudioBandWidthInfo; - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/*! - * @brief This function creates Voice Activity Detection session object ( VAD ) - * - * @param[in] inputSampleRate Sampling frequency of the input data. - * @param[in] frameDuration Frame duration - * @param[in] modelName The session ties to this model, and processes the future frames using it - * If modelName is \em nullptr then the SDK auto-detects the model based on input sampleRate. - * @attention Always provide modelName explicitly to avoid ambiguity - * - * @return created session handle - */ -KRISP_AUDIO_API KrispAudioSessionID -krispAudioVadCreateSession(KrispAudioSamplingRate inputSampleRate, - KrispAudioFrameDuration frameDuration, - const char* modelName); - -/*! - * @brief This function releases all data tied to this particular session, closes the given VAD session - * - * @param[in, out] pSession Handle to the VAD session to be closed - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioVadCloseSession(KrispAudioSessionID pSession); - -/*! - * @brief This function processes the given frame and returns the VAD detection value. Works with shorts (int16) with value in range [-2^15+1, 2^15] - * - * @param[in] pSession The VAD Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000 - * - * @return Value in range [0,1]. The scale is adjusted so that 0.5 corresponds to the best F1 score on our test dataset (based on TIMIT core test dataset speech examples). - * The Threshold needs to be adjusted to fit a particular use case. - */ -KRISP_AUDIO_API float -krispAudioVadFrameInt16(KrispAudioSessionID pSession, - const short* pFrameIn, - unsigned int frameInSize); - -/*! - * @brief This function processes the given frame and returns the VAD detection value. Works with shorts (int16) with value in range [-2^15+1, 2^15] - * - * @param[in] pSession The VAD Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000 - * @param[in,out] bandwidthInfo Returns BAND_WIDTH_UNKNOWN if still not predicted, otherwise the real bandwidth: one of the KrispAudioBandWidth values - * - * @return Value in range [0,1]. The scale is adjusted so that 0.5 corresponds to the best F1 score on our test dataset (based on TIMIT core test dataset speech examples). - * The Threshold needs to be adjusted to fit a particular use case. - */ -KRISP_AUDIO_API float -krispAudioVadFrameInt16Ex(KrispAudioSessionID pSession, - const short* pFrameIn, - unsigned int frameInSize, - KrispAudioBandWidthInfo* bandwidthInfo); - -/*! - * @brief This function processes the given frame and returns the VAD detection value. Works with float values normalized in range [-1,1] - * - * @param[in] pSession The VAD Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000 - * - * @return Value in range [0,1]. The scale is adjusted so that 0.5 corresponds to the best F1 score on our test dataset (based on TIMIT core test dataset speech examples). - * The Threshold needs to be adjusted to fit a particular use case. - */ -KRISP_AUDIO_API float -krispAudioVadFrameFloat(KrispAudioSessionID pSession, - const float* pFrameIn, - unsigned int frameInSize); - - -/*! - * @brief This function processes the given frame and returns the VAD detection value. Works with float values normalized in range [-1,1] - * - * @param[in] pSession The VAD Session to which the frame belongs - * @param[in] pFrameIn Pointer to input frame. It's a continuous buffer with overall size of frameDuration * inputSampleRate / 1000 - * @param[in] frameInSize This is buffer size which must be frameDuration * inputSampleRate / 1000 - * @param[in,out] bandwidthInfo Returns BAND_WIDTH_UNKNOWN if still not predicted, otherwise the real bandwidth: one of the KrispAudioBandWidth values - * - * @return Value in range [0,1]. The scale is adjusted so that 0.5 corresponds to the best F1 score on our test dataset (based on TIMIT core test dataset speech examples). - * The Threshold needs to be adjusted to fit a particular use case. - */ -KRISP_AUDIO_API float -krispAudioVadFrameFloatEx(KrispAudioSessionID pSession, - const float* pFrameIn, - unsigned int frameInSize, - KrispAudioBandWidthInfo* bandwidthInfo); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif //// KRISP_AUDIO_SDK_VAD_HPP_ diff --git a/inc/krisp-audio-sdk.hpp b/inc/krisp-audio-sdk.hpp deleted file mode 100644 index 49f8947..0000000 --- a/inc/krisp-audio-sdk.hpp +++ /dev/null @@ -1,152 +0,0 @@ -/// -/// Copyright Krisp, Inc -/// - -#ifndef KRISP_AUDIO_SDK_HPP_ -#define KRISP_AUDIO_SDK_HPP_ -#if defined _WIN32 || defined __CYGWIN__ - #ifdef KRISP_AUDIO_STATIC - #define KRISP_AUDIO_API - #else - #ifdef KRISP_AUDIO_EXPORTS - #ifdef __GNUC__ - #define KRISP_AUDIO_API __attribute__ ((dllexport)) - #else - #define KRISP_AUDIO_API __declspec(dllexport) // Note: actually gcc seems to also support this syntax. - #endif - #else - #ifdef __GNUC__ - #define KRISP_AUDIO_API __attribute__ ((dllimport)) - #else - #define KRISP_AUDIO_API __declspec(dllimport) // Note: actually gcc seems to also support this syntax. - #endif - #endif - #endif -#else - #if __GNUC__ >= 4 - #define KRISP_AUDIO_API __attribute__ ((visibility ("default"))) - #else - #define KRISP_AUDIO_API - #endif -#endif - -typedef void* KrispAudioSessionID; - -typedef enum { - KRISP_AUDIO_SAMPLING_RATE_8000HZ=8000, - KRISP_AUDIO_SAMPLING_RATE_16000HZ=16000, - KRISP_AUDIO_SAMPLING_RATE_24000HZ=24000, - KRISP_AUDIO_SAMPLING_RATE_32000HZ=32000, - KRISP_AUDIO_SAMPLING_RATE_44100HZ=44100, - KRISP_AUDIO_SAMPLING_RATE_48000HZ=48000, - KRISP_AUDIO_SAMPLING_RATE_88200HZ=88200, - KRISP_AUDIO_SAMPLING_RATE_96000HZ=96000 -} KrispAudioSamplingRate; - -typedef enum { - KRISP_AUDIO_FRAME_DURATION_10MS=10 -} KrispAudioFrameDuration; - -typedef struct krispAudioVersionInfo_t { - unsigned short major; - unsigned short minor; - unsigned short patch; - unsigned short build; -} KrispAudioVersionInfo; - - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -/*! - * @brief This function initializes the global data needed for the SDK - * - * @param[in] workingPath The path to the working directory. Can be nullptr to have the default behavior. - * - * @retval 0 success, negative on error -*/ -KRISP_AUDIO_API int -krispAudioGlobalInit(const wchar_t* workingPath); - - -/*! - * @brief This function frees all global resources allocated by SDK. The session's data will also be released and can't be used in future. - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioGlobalDestroy(); - -/*! - * @brief This function populates the versionInfo structure with API version information upon successful completion. - * - * @param[in,out] versionInfo The structure that gets populated upon successful completion of this call. - * Contains major , minor , patch and build components. - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioGetVersion(KrispAudioVersionInfo* versionInfo); - -/*! - * @brief This function sets the Krisp model to be used. The weight file for provided model must exist. Several models can be set. - * The specified model is later tied to specific session during the session creation process. - * - * @param[in] weightFilePath The Krisp model weight file associated with the model - * @param[in] modelName Model name alias that allows to later distinguish between different models that have been set by this function call - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioSetModel(const wchar_t* weightFilePath, - const char* modelName); - -/*! - * @brief This function sets the Krisp model by giving weight-config blob data. Weight blob data must be valid. - * The model specified model is later tied to specific session during the session creation process. - * @param[in] weightBlob The Krisp model weight blob data - * @param[in] blobSize Blob data size - * @param[in] modelName Model name alias that allows to later distinguish between different models that have been set by this function call - * @retval 0 Success - * @retval -1 Error - */ -KRISP_AUDIO_API int krispAudioSetModelBlob(const void* weightBlob, unsigned int blobSize, const char* modelName); - -/*! - * @brief This function removes the Krisp model. - * - * @param[in] modelName Model name alias that allows to remove model that has been set. - * - * @retval 0 success, negative on error - */ -KRISP_AUDIO_API int -krispAudioRemoveModel(const char* modelName); - -/*! - * @brief This function returns the energy amount for the given frame. Works with floats with values normalized in range [-1,1] - * Note: It may be used without initializing global SDK context by krispAudioGlobalInit() - * @param[in] pFrameIn pFrameIn Pointer to input frame. - * @param[in] frameInSize This is buffer size. - * @return Value in range [0, 100]. - */ -KRISP_AUDIO_API unsigned int -krispAudioGetFrameEnergyFloat(const float* pFrameIn, - unsigned int frameInSize); - -/*! - * @brief This function returns the energy amount for the given frame. Works with shorts (int16) with value in range [-2^15+1, 2^15] - * Note: It may be used without initializing global SDK context by krispAudioGlobalInit() - * @param[in] pFrameIn pFrameIn Pointer to input frame. - * @param[in] frameInSize This is buffer size. - * @return Value in range [0, 100]. - */ -KRISP_AUDIO_API unsigned int -krispAudioGetFrameEnergyInt16(const short* pFrameIn, - unsigned int frameInSize); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif //// KRISP_AUDIO_SDK_HPP_ diff --git a/java/src/org/webrtc/KrispAudioProcessingFactory.java b/java/src/org/webrtc/KrispAudioProcessingFactory.java new file mode 100644 index 0000000..f246622 --- /dev/null +++ b/java/src/org/webrtc/KrispAudioProcessingFactory.java @@ -0,0 +1,97 @@ +package org.webrtc; +import org.webrtc.AudioProcessingFactory; + +public class KrispAudioProcessingFactory implements AudioProcessingFactory { + private long nativeModule; + private boolean destroyed; + + private long requireNativeModule(String caller) { + if (destroyed) { + throw new IllegalStateException("KrispAudioProcessingFactory is destroyed"); + } + if (nativeModule == 0) { + throw new IllegalStateException("Call Init method before " + caller); + } + return nativeModule; + } + + @Override + public long createNative() { + if (destroyed) { + throw new IllegalStateException("KrispAudioProcessingFactory is destroyed"); + } + if (nativeModule == 0) { + throw new IllegalStateException("Call Init method before createNative()"); + } + return nativeGetAudioProcessorModule(nativeModule); + } + + public static boolean LoadKrisp(String dllPath) { + return nativeLoadKrisp(dllPath); + } + + public static boolean UnloadKrisp() { + return nativeUnloadKrisp(); + } + + public boolean Init(String modelPath) { + if (destroyed) { + throw new IllegalStateException("KrispAudioProcessingFactory is destroyed"); + } + if (nativeModule == 0) { + nativeModule = nativeCreateModuleWithModelPath(modelPath); + return nativeModule != 0; + } + return nativeInit(nativeModule, modelPath); + } + + public boolean Init(byte[] modelData) { + if (destroyed) { + throw new IllegalStateException("KrispAudioProcessingFactory is destroyed"); + } + if (nativeModule == 0) { + nativeModule = nativeCreateModuleWithModelData(modelData); + return nativeModule != 0; + } + return nativeInitWithData(nativeModule, modelData); + } + + public void Enable(boolean enable) { + long module = requireNativeModule("Enable"); + nativeEnable(module, enable); + } + + public boolean IsEnabled() { + long module = requireNativeModule("IsEnabled"); + return nativeIsEnabled(module); + } + + public void Destroy() { + long module = requireNativeModule("Destroy"); + nativeDestroy(module); + nativeModule = 0; + destroyed = true; + } + + private static native void nativeEnable(long nativeModule, boolean disable); + + private static native boolean nativeIsEnabled(long nativeModule); + + private static native boolean nativeInit(long nativeModule, String modelPath); + + private static native boolean nativeInitWithData(long nativeModule, byte[] modelData); + + private static native boolean nativeLoadKrisp(String dllPath); + + private static native boolean nativeUnloadKrisp(); + + private static native void nativeDestroy(long nativeModule); + + private static native long nativeCreateModule(); + + private static native long nativeGetAudioProcessorModule(long nativeModule); + + private static native long nativeCreateModuleWithModelPath(String modelPath); + + private static native long nativeCreateModuleWithModelData(byte[] modelData); +} diff --git a/java/src/org/webrtc/KrispAudioProcessingImpl.java b/java/src/org/webrtc/KrispAudioProcessingImpl.java deleted file mode 100644 index b37f523..0000000 --- a/java/src/org/webrtc/KrispAudioProcessingImpl.java +++ /dev/null @@ -1,43 +0,0 @@ -package org.webrtc; -import org.webrtc.AudioProcessingFactory; - -public class KrispAudioProcessingImpl implements AudioProcessingFactory { - @Override - public long createNative() { - return nativeGetAudioProcessorModule(); - } - - public boolean Init(String modelPath, String dllPath) { - return nativeInit(modelPath, dllPath); - } - - public boolean InitWithData(byte[] modelData, String dllPath) { - return nativeInitWithData(modelData, dllPath); - } - - public void Enable(boolean enable) { - nativeEnable(enable); - } - - public boolean IsEnabled() { - return nativeIsEnabled(); - } - - public void Destroy() { - nativeDestroy(); - } - - private static native void nativeEnable(boolean disable); - - private static native boolean nativeIsEnabled(); - - private static native boolean nativeInit(String modelPath, String dllPath); - - private static native boolean nativeInitWithData(byte[] dataData, String dllPath); - - private static native void nativeDestroy(); - - private static native long nativeGetAudioProcessorModule(); -} - - diff --git a/krisp_c_api_include/krisp-audio-api-definitions-c.h b/krisp_c_api_include/krisp-audio-api-definitions-c.h new file mode 100644 index 0000000..2b20a15 --- /dev/null +++ b/krisp_c_api_include/krisp-audio-api-definitions-c.h @@ -0,0 +1,105 @@ +#pragma once + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined _WIN32 || defined __CYGWIN__ +#ifdef KRISP_AUDIO_STATIC +#define KRISP_AUDIO_API +#else +#ifdef KRISP_AUDIO_EXPORTS +#ifdef __GNUC__ +#define KRISP_AUDIO_API __attribute__((dllexport)) +#else +#define KRISP_AUDIO_API __declspec(dllexport) // Note: actually gcc seems to also support this syntax. +#endif +#else +#ifdef __GNUC__ +#define KRISP_AUDIO_API __attribute__((dllimport)) +#else +#define KRISP_AUDIO_API __declspec(dllimport) // Note: actually gcc seems to also support this syntax. +#endif +#endif +#endif +#else +#if __GNUC__ >= 4 || __clang__ +#define KRISP_AUDIO_API __attribute__((visibility("default"))) +#else +#define KRISP_AUDIO_API +#endif +#endif + +/// @brief Sampling frequency of the audio frame +typedef enum +{ + Sr8000Hz = 8000, + Sr16000Hz = 16000, + Sr24000Hz = 24000, + Sr32000Hz = 32000, + Sr44100Hz = 44100, + Sr48000Hz = 48000, + Sr88200Hz = 88200, + Sr96000Hz = 96000 +} KrispSamplingRate; + +/// @brief Input audio frame duration in ms +typedef enum +{ + Fd10ms = 10, + Fd15ms = 15, + Fd20ms = 20, + Fd30ms = 30, + Fd32ms = 32, +} KrispFrameDuration; + +/// @brief Version information +typedef struct +{ + uint16_t major; + uint16_t minor; + uint16_t patch; + uint32_t build; +} KrispVersionInfo; + +/// @brief Model Info containing path to the model or its content blob. +typedef struct +{ + /// @brief Path to the model file + const wchar_t* path; + + /// @brief Model file content as a blob + struct + { + const uint8_t* data; + size_t size; + } blob; +} KrispModelInfo; + +/// @brief Return results of the API calls +typedef enum +{ + KrispRetValSuccess = 0, + KrispRetValUnknowError = 1, + KrispRetValInternalError = 2, + KrispRetValInvalidInput = 3, +} KrispRetVal; + +/// @brief The log levels. +typedef enum +{ + LogLevelTrace = 0, + LogLevelDebug = 1, + LogLevelInfo = 2, + LogLevelWarn = 3, + LogLevelErr = 4, + LogLevelCritical = 5, + LogLevelOff = 6 +} KrispLogLevel; + +#ifdef __cplusplus +} // extern "c" +#endif diff --git a/krisp_c_api_include/krisp-audio-sdk-c.h b/krisp_c_api_include/krisp-audio-sdk-c.h new file mode 100644 index 0000000..e461b86 --- /dev/null +++ b/krisp_c_api_include/krisp-audio-sdk-c.h @@ -0,0 +1,32 @@ +/// +/// Copyright Krisp, Inc +/// +#pragma once + +#include "krisp-audio-api-definitions-c.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/// @brief Initializes the global data needed for the SDK +/// @param[in] workingPath The path to the working directory. Can be empty for using default execution directory. +/// @param[in] logCallback The callback to call when a log message is emitted. +/// @param[in] logLevel Log level. +/// @retval KrispRetValSuccess on success +KRISP_AUDIO_API KrispRetVal krispGlobalInit( + const wchar_t* workingPath, void (*logCallback)(const char*, KrispLogLevel), KrispLogLevel logLevel); + +/// @brief Frees all the global resources allocated by SDK. +/// @retval KrispRetValSuccess on success +KRISP_AUDIO_API KrispRetVal krispGlobalDestroy(); + +/// @brief Populates the versionInfo structure with API version information upon successful completion. +/// @param[in,out] versionInfo The structure that gets populated upon successful completion of this call. +/// @retval KrispRetValSuccess on success +KRISP_AUDIO_API KrispRetVal krispGetVersion(KrispVersionInfo* versionInfo); + +#ifdef __cplusplus +} // extern "c" +#endif diff --git a/krisp_c_api_include/krisp-audio-sdk-nc-c.h b/krisp_c_api_include/krisp-audio-sdk-nc-c.h new file mode 100644 index 0000000..f670cb9 --- /dev/null +++ b/krisp_c_api_include/krisp-audio-sdk-nc-c.h @@ -0,0 +1,214 @@ +/// +/// Copyright Krisp, Inc +/// +#pragma once + +#include "krisp-audio-api-definitions-c.h" +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// @brief Ringtone configuration used with inbound NC models to keep ringtones. +typedef struct +{ + /// @brief Ringtone model configuration. + KrispModelInfo modelInfo; +} KrispNcRingtoneCfg; + +/// @brief NC session configuration. +typedef struct +{ + /// @brief Sampling frequency of the input data. + KrispSamplingRate inputSampleRate; + + /// @brief Input audio frame duration. + KrispFrameDuration inputFrameDuration; + + /// @brief Sampling frequency of the output data. + KrispSamplingRate outputSampleRate; + + /// @brief NC model configuration. + KrispModelInfo* modelInfo; + + /// @brief Set true to enable collection of NC session statistics + bool enableSessionStats; + + /// @brief Optional: Ringtone configuration that may be provided with inbound NC models to retain ringtones. + /// Pass NULL to skip the ringtone retention feature. + KrispNcRingtoneCfg* ringtoneCfg; +} KrispNcSessionConfig; + +/// @brief Audio frame energy information struct describing noise/voice energy values +typedef struct +{ + /// @brief Voice energy level, range [0,100] + uint8_t voiceEnergy; + + /// @brief Noise energy level, range [0,100] + uint8_t noiseEnergy; +} KrispNcEnergyInfo; + +/// @brief Cleaned secondary speech status enum +typedef enum +{ + /// @brief Cleaned secondary speech algorithm is not available (if non BVC model provided) + Undefined = 0, + + /// @brief Cleaned secondary speech detected in the processed frame + Detected = 1, + + /// @brief Cleaned secondary speech is not detected in the processed frame + NotDetected = 2 +} KrispNcCleanedSecondarySpeechStatus; + +/// @brief Per-frame information returned after NC processing of the given frame +typedef struct +{ + /// @brief Voice and noise energy info. + KrispNcEnergyInfo energy; + + /// @brief BVC specific feature. + /// Returns the state of the removed secondary speech. + /// If secondary speech is detected and removed, it returns Detected otherwise, it returns NotDetected. + // Undefined will be returned in case of running the NC. + KrispNcCleanedSecondarySpeechStatus cleanedSecondarySpeechStatus; +} KrispNcPerFrameStats; + +/// @brief Voice stats +typedef struct +{ + /// @brief Voice duration in ms + uint32_t talkTimeMs; +} KrispNcVoiceStats; + +/// @brief Noise stats based on the noise intensity levels +typedef struct +{ + /// @brief No noise duration in ms + uint32_t noNoiseMs; + + /// @brief Low intensity noise duration in ms + uint32_t lowNoiseMs; + + /// @brief Medium intensity noise duration in ms + uint32_t mediumNoiseMs; + + /// @brief High intensity noise duration in ms + uint32_t highNoiseMs; + + /// @brief Cleaned secondary speech detected duration in ms + uint32_t cleanedSecondarySpeechMs; + + /// @brief Cleaned secondary speech not detected duration in ms + uint32_t cleanedSecondarySpeechNotDetectedMs; + + /// @brief Cleaned secondary speech undefined duration in ms (non BVC use-case) + uint32_t cleanedSecondarySpeechUndefinedMs; +} KrispNcNoiseStats; + +/// @brief NC stats containing noise and voice information +typedef struct +{ + /// @brief Voice stats + KrispNcVoiceStats voiceStats; + + /// @brief Noise stats + KrispNcNoiseStats noiseStats; +} KrispNcSessionStats; + +typedef uint64_t krispNcHandle; + +/// @brief Creates a new instance of Nc session for int16 stream processing. +/// AI technology removes background noises, reverb, and background voices from the main speaker's voice +/// in real-time, while also providing noise and voice statistics for the audio stream and frame +/// @param[in] config Configuration for the Nc Session. +/// @retval Valid pointer on success, otherwise NULL. +KRISP_AUDIO_API krispNcHandle krispCreateNcInt16(const KrispNcSessionConfig* config); + +/// @brief Creates a new instance of Nc session for float stream processing. +/// AI technology removes background noises, reverb, and background voices from the main speaker's voice +/// in real-time, while also providing noise and voice statistics for the audio stream and frame +/// @param[in] config Configuration for the Nc Session. +/// @retval Valid pointer on success, otherwise NULL. +KRISP_AUDIO_API krispNcHandle krispCreateNcFloat(const KrispNcSessionConfig* config); + +/// @brief Destroys the Nc instance. +/// Should be called if the Nc instance is no longer needed, before krispGlobalDestroy() +/// @param nc The Nc instance to destroy. +/// @retval KrispRetValSuccess on success +KRISP_AUDIO_API KrispRetVal krispDestroyNc(const krispNcHandle nc); + +/// @brief Processes an input frame of audio data with int16 samples. +/// @param[in] nc The handle of Nc instance to process the audio data. +/// @param[in] inputSamples Pointer to the input buffer containing audio samples. +/// The buffer should hold enough samples to fill a frame of audio data, +/// calculated as frameDuration * inputSampleRate / 1000 of FrameDataType samples. +/// @param[in] numInputSamples The number of samples in the input buffer. +/// Must be sufficient to match the expected input frame size. +/// @param[out] outputSamples Pointer to the buffer for the processed audio samples. +/// The caller must allocate a buffer of sufficient size to handle +/// a frame of output samples, calculated as frameDuration * outputSampleRate / 1000 of +/// FrameDataType samples. +/// @param[in] numOutputSamples The number of samples the output buffer can handle. +/// Must be sufficient to match the expected output frame size. +/// @param[in] noiseSuppressionLevel Noise suppression level in the range [0, 100]% +/// Used to adjust the intensity of the applied noise suppression. +/// - 0% indicates no noise suppression. +/// - 100% indicates full noise suppression. +/// @param[out] frameStats Optional: Frame statistics calculated during NC processing. +/// Pass NULL to skip calculation, or provide a valid pointer to receive the statistics. +/// @retval KrispRetValSuccess on success +KRISP_AUDIO_API KrispRetVal krispProcessNcInt16( + const krispNcHandle nc, + const int16_t* inputSamples, + size_t numInputSamples, + int16_t* outputSamples, + size_t numOutputSamples, + float noiseSuppressionLevel, + KrispNcPerFrameStats* frameStats); + +/// @brief Processes an input frame of audio data with float samples. +/// @param[in] nc The handle of Nc instance to process the audio data. +/// @param[in] inputSamples Pointer to the input buffer containing audio samples. +/// The buffer should hold enough samples to fill a frame of audio data, +/// calculated as frameDuration * inputSampleRate / 1000 of FrameDataType samples. +/// @param[in] numInputSamples The number of samples in the input buffer. +/// Must be sufficient to match the expected input frame size. +/// @param[out] outputSamples Pointer to the buffer for the processed audio samples. +/// The caller must allocate a buffer of sufficient size to handle +/// a frame of output samples, calculated as frameDuration * outputSampleRate / 1000 of +/// FrameDataType samples. +/// @param[in] numOutputSamples The number of samples the output buffer can handle. +/// Must be sufficient to match the expected output frame size. +/// @param[in] noiseSuppressionLevel Noise suppression level in the range [0, 100]% +/// Used to adjust the intensity of the applied noise suppression. +/// - 0% indicates no noise suppression. +/// - 100% indicates full noise suppression. +/// @param[out] frameStats Optional: Frame statistics calculated during NC processing. +/// Pass NULL to skip calculation, or provide a valid pointer to receive the statistics. +/// @retval KrispRetValSuccess on success +KRISP_AUDIO_API KrispRetVal krispProcessNcFloat( + const krispNcHandle nc, + const float* inputSamples, + size_t numInputSamples, + float* outputSamples, + size_t numOutputSamples, + float noiseSuppressionLevel, + KrispNcPerFrameStats* frameStats); + +/// @brief Retrieves noise and voice statistics calculated from the start of NC processing. +/// To enable statistics collection, ensure that NcSessionConfig::enableStats is set when creating the NC object. +/// The recommended frequency for retrieving stats is 200ms or more. +/// If it's required only at the end of the NC session, call this function once +/// before the NC class object is destroyed. +/// @param stats Session statistics +/// @retval KrispRetValSuccess on success +KRISP_AUDIO_API KrispRetVal krispGetNcSessionStats(const krispNcHandle nc, KrispNcSessionStats* stats); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/krisp_processor.cc b/krisp_processor.cc index 98715db..30c848a 100644 --- a/krisp_processor.cc +++ b/krisp_processor.cc @@ -1,19 +1,149 @@ #include "krisp_processor.hpp" +#include +#include +#include +#include + #include +#include "krisp-audio-api-definitions-c.h" +#include "krisp-audio-sdk-nc-c.h" #include "rtc_base/time_utils.h" -#include "inc/krisp-audio-sdk.hpp" -#include "inc/krisp-audio-sdk-nc.hpp" - #include "krisp_sdk.h" namespace Krisp { -KrispProcessor* KrispProcessor::_singleton = nullptr; +static void logCallback(const char* message, KrispLogLevel level) +{ + syslog(LOG_INFO, "KrispProcessor::logCallback: %s", message); + switch (level) { + case LogLevelTrace: + syslog(LOG_DEBUG, "KrispProcessor::logCallback: %s", message); + break; + case LogLevelDebug: + syslog(LOG_DEBUG, "KrispProcessor::logCallback: %s", message); + break; + case LogLevelInfo: + syslog(LOG_INFO, "KrispProcessor::logCallback: %s", message); + break; + case LogLevelWarn: + syslog(LOG_WARNING, "KrispProcessor::logCallback: %s", message); + break; + case LogLevelErr: + syslog(LOG_ERR, "KrispProcessor::logCallback: %s", message); + break; + case LogLevelCritical: + syslog(LOG_CRIT, "KrispProcessor::logCallback: %s", message); + break; + case LogLevelOff: + break; + } +} + +bool LoadKrisp(const char* krispDllPath) +{ + if (!KrispSDK::LoadDll(krispDllPath)) + { + syslog(LOG_ERR, "KrispProcessor::Init: Unable to load Krisp DLL"); + return false; + } + + if (!KrispSDK::GlobalInit(nullptr, logCallback, KrispLogLevel::LogLevelTrace)) + { + syslog(LOG_ERR, "KrispProcessor::Init: Failed to initialize Krisp globals"); + return false; + } + return true; +} + +bool UnloadKrisp() +{ + if (KrispSDK::GlobalDestroy() != KrispRetValSuccess) + { + syslog(LOG_ERR, "KrispProcessor::Unload: Failed to destroy Krisp globals"); + return false; + } + KrispSDK::UnloadDll(); + return true; +} + +static KrispSamplingRate GetSampleRate(size_t sampleRate) +{ + switch (sampleRate) + { + case 8000: + return KrispSamplingRate::Sr8000Hz; + case 16000: + return KrispSamplingRate::Sr16000Hz; + case 24000: + return KrispSamplingRate::Sr24000Hz; + case 32000: + return KrispSamplingRate::Sr32000Hz; + case 44100: + return KrispSamplingRate::Sr44100Hz; + case 48000: + return KrispSamplingRate::Sr48000Hz; + case 88200: + return KrispSamplingRate::Sr88200Hz; + case 96000: + return KrispSamplingRate::Sr96000Hz; + default: + syslog(LOG_INFO, "KrispProcessor::GetSampleRate: The input sampling rate: %zu \ + is not supported. Using default 48khz.", sampleRate); + return KrispSamplingRate::Sr48000Hz; + } +} + +static bool IsModelSet(const KrispModelInfo& modelInfo) +{ + const bool hasPath = modelInfo.path != nullptr && modelInfo.path[0] != L'\0'; + const bool hasBlob = modelInfo.blob.data != nullptr && modelInfo.blob.size > 0; + return hasPath || hasBlob; +} + +static bool ValidateModelPath(const char* modelPath) +{ + if (!modelPath || modelPath[0] == '\0') { + syslog(LOG_ERR, "KrispProcessor::Init: model path is empty"); + return false; + } + syslog(LOG_INFO, "KrispProcessor::Init: model path: %s", modelPath); + + struct stat st; + if (stat(modelPath, &st) != 0) { + syslog(LOG_ERR, "KrispProcessor::Init: stat failed for %s: %s", + modelPath, strerror(errno)); + return false; + } + + if (st.st_size <= 0) { + syslog(LOG_ERR, "KrispProcessor::Init: model file is empty: %s", modelPath); + return false; + } + + FILE* file = std::fopen(modelPath, "rb"); + if (!file) { + syslog(LOG_ERR, "KrispProcessor::Init: fopen failed for %s: %s", + modelPath, strerror(errno)); + return false; + } + unsigned char byte = 0; + size_t read = std::fread(&byte, 1, 1, file); + std::fclose(file); + if (read != 1) { + syslog(LOG_ERR, "KrispProcessor::Init: fread failed for %s: %s", + modelPath, strerror(errno)); + return false; + } + + syslog(LOG_INFO, "KrispProcessor::Init: model file size: %lld bytes", + static_cast(st.st_size)); + return true; +} inline std::wstring convertMBString2WString(const std::string& str) { @@ -21,125 +151,130 @@ inline std::wstring convertMBString2WString(const std::string& str) return w; } -KrispProcessor::KrispProcessor() : +KrispNoiseFilter::KrispNoiseFilter() : m_isEnabled(false), - m_session(nullptr), - m_sampleRate(KRISP_AUDIO_SAMPLING_RATE_16000HZ), m_numberOfChannels(1), m_lastTimeStamp(0), + m_modelPath(), + m_modelData(), m_bufferIn(), m_bufferOut() { + m_modelInfo.path = L""; + m_modelInfo.blob.data = nullptr; + m_modelInfo.blob.size = 0; + m_sessionConfig.enableSessionStats = false; + m_sessionConfig.inputSampleRate = KrispSamplingRate::Sr16000Hz; + m_sessionConfig.inputFrameDuration = KrispFrameDuration::Fd10ms; + m_sessionConfig.outputSampleRate = KrispSamplingRate::Sr16000Hz; + m_sessionConfig.modelInfo = &m_modelInfo; + m_sessionConfig.ringtoneCfg = nullptr; + m_ncCachedHandle = 0; } -KrispProcessor::~KrispProcessor() +KrispNoiseFilter::~KrispNoiseFilter() { syslog(LOG_INFO,"KrispProcessor::~KrispProcessor()"); DeInit(); } -KrispProcessor* KrispProcessor::GetInstance() -{ - if(_singleton == nullptr) +void KrispNoiseFilter::DeInit() { + if (m_ncCachedHandle) { - _singleton = new KrispProcessor(); + KrispSDK::DestroyNcFloat(m_ncCachedHandle); + m_ncCachedHandle = 0; } - return _singleton; + m_modelPath.clear(); + m_modelPath.shrink_to_fit(); + m_modelData.clear(); + m_modelData.shrink_to_fit(); + m_modelInfo.path = nullptr; + m_modelInfo.blob.data = nullptr; + m_modelInfo.blob.size = 0; } -void KrispProcessor::DeInit() { - if (m_session) - { - KrispSDK::NcCloseSession(m_session); - m_session = nullptr; - } - KrispSDK::RemoveModel("default"); - KrispSDK::GlobalDestroy(); - KrispSDK::UnloadDll(); -} - -bool KrispProcessor::Init(const char* modelPath, const char* krispDllPath) +bool KrispNoiseFilter::Init(const char* modelPath) { - if (!KrispSDK::LoadDll(krispDllPath)) - { - syslog(LOG_ERR, "KrispProcessor::Init: Unable to load Krisp DLL"); + if (!ValidateModelPath(modelPath)) { return false; } - - if (!KrispSDK::GlobalInit(nullptr)) + m_modelPath = convertMBString2WString(modelPath); + m_modelInfo.path = m_modelPath.c_str(); + m_modelInfo.blob.data = nullptr; + m_modelInfo.blob.size = 0; + m_modelData.clear(); + if (m_ncCachedHandle) { - syslog(LOG_ERR, "KrispProcessor::Init: Failed to initialize Krisp globals"); - return false; - } - - if (KrispSDK::SetModel(convertMBString2WString(modelPath).c_str(), "default") != 0) + KrispSDK::DestroyNcFloat(m_ncCachedHandle); + m_ncCachedHandle = 0; + } + m_ncCachedHandle = KrispSDK::CreateNcFloat(&m_sessionConfig); + if (m_ncCachedHandle == 0) { - syslog(LOG_ERR, "KrispProcessor::Init: Failed to set model file %s", modelPath); + syslog(LOG_ERR, "KrispProcessor::Init: Failed to create Krisp NC session"); return false; - } - + } return true; } -bool KrispProcessor::Init(const void* modelAddr, unsigned int modelSize, const char* krispDllPath) +bool KrispNoiseFilter::Init(const void* modelAddr, unsigned int modelSize) { - if (!KrispSDK::LoadDll(krispDllPath)) - { - syslog(LOG_ERR, "KrispProcessor::Init: Unable to find Krisp DLL"); + m_modelData.resize(modelSize); + std::memcpy(m_modelData.data(), modelAddr, modelSize); + m_modelInfo.path = L""; + m_modelInfo.blob.data = m_modelData.data(); + m_modelInfo.blob.size = modelSize; + + m_ncCachedHandle = KrispSDK::CreateNcFloat(&m_sessionConfig); + if (m_ncCachedHandle == 0) + { + syslog(LOG_ERR, "KrispProcessor::Init: Failed to create Krisp NC session"); return false; } - - if (!KrispSDK::GlobalInit(nullptr)) - { - syslog(LOG_ERR, "KrispProcessor::Init Failed to initialize Krisp globals"); - return false; - } - - if (KrispSDK::SetModelBlob(modelAddr, modelSize, "default") != 0) - { - syslog(LOG_ERR, "KrispProcessor::Init: Krisp failed to set model via blob api"); - return false; - } - return true; } -void KrispProcessor::Enable(bool isEnable) +void KrispNoiseFilter::Enable(bool isEnable) { - m_isEnabled = isEnable; + m_isEnabled.store(isEnable, std::memory_order_release); } -bool KrispProcessor::IsEnabled() const +bool KrispNoiseFilter::IsEnabled() const { - return m_isEnabled; + return m_isEnabled.load(std::memory_order_acquire); } -void KrispProcessor::Initialize(int sampleRate, int numberOfChannels) +void KrispNoiseFilter::InitializeSession(int sampleRate, int numberOfChannels) { syslog(LOG_INFO, "KrispProcessor::Initialize: sampleRate: %i\ numberOfChannels: %i", sampleRate, numberOfChannels); + m_numberOfChannels = numberOfChannels; - if (m_sampleRate != sampleRate || m_session == nullptr) + m_sessionConfig.inputSampleRate = GetSampleRate(sampleRate); + m_sessionConfig.outputSampleRate = m_sessionConfig.inputSampleRate; + + if (!IsModelSet(m_modelInfo)) { + syslog(LOG_INFO, "KrispProcessor::Initialize: model not loaded yet"); + return; + } + + krispNcHandle newNcHandle = KrispSDK::CreateNcFloat(&m_sessionConfig); + if (newNcHandle == 0) { - if (m_session) - { - KrispSDK::NcCloseSession(m_session); - } - m_session = CreateAudioSession(sampleRate); - m_sampleRate = sampleRate; - if (m_session == nullptr) - { - // TODO: throw a valid WebRTC exception for error handling - syslog(LOG_ERR, "KrispProcessor::Initialize: Failed creating Krisp AudioSession"); - return; - } + syslog(LOG_ERR, "KrispProcessor::Initialize: Failed to create Krisp NC session"); + return; + } + if (m_ncCachedHandle && KrispSDK::DestroyNcFloat(m_ncCachedHandle) != KrispRetValSuccess) { + syslog(LOG_ERR, "KrispProcessor::Initialize: Failed to destroy Krisp NC session"); + // TODO: handle memory leak } + m_ncCachedHandle = newNcHandle; } -void KrispProcessor::Process(webrtc::AudioBuffer* audioBuffer) +void KrispNoiseFilter::ProcessFrame(webrtc::AudioBuffer* audioBuffer) { - if(!KrispProcessor::IsEnabled()) + if(!KrispNoiseFilter::IsEnabled()) { syslog(LOG_DEBUG, "KrispProcessor::Process: Bypassing NoiseSuppressor::Process"); return; @@ -154,37 +289,51 @@ void KrispProcessor::Process(webrtc::AudioBuffer* audioBuffer) m_lastTimeStamp = now; } - int audioBufferSampleRate = audioBuffer->num_frames() * 1000; - if(audioBufferSampleRate != m_sampleRate) + int audioBufferSampleRate = audioBuffer->num_frames() * 100; + if(audioBufferSampleRate != static_cast(m_sessionConfig.inputSampleRate)) { - if (m_session) + m_sessionConfig.inputSampleRate = GetSampleRate(audioBufferSampleRate); + m_sessionConfig.outputSampleRate = m_sessionConfig.inputSampleRate; + krispNcHandle newNcHandle = KrispSDK::CreateNcFloat(&m_sessionConfig); + if (newNcHandle == 0) { - KrispSDK::NcCloseSession(m_session); + syslog(LOG_ERR, "KrispProcessor::Process: Failed to create Krisp NC session"); + return; } - m_session = CreateAudioSession(audioBufferSampleRate); - m_sampleRate = audioBufferSampleRate; - if (m_session == nullptr) + if (m_ncCachedHandle) { - syslog(LOG_ERR, "KrispProcessor::Process: Failed creating AudioSession"); - return; - } + if (KrispSDK::DestroyNcFloat(m_ncCachedHandle) != KrispRetValSuccess) { + syslog(LOG_ERR, "KrispProcessor::Process: Failed to destroy Krisp NC session"); + // TODO: handle memory leak + } + } + m_ncCachedHandle = newNcHandle; } - constexpr size_t kNsFrameSize = 160; - size_t bufferSize = kNsFrameSize * audioBuffer->num_bands(); - m_bufferIn.resize(bufferSize); - m_bufferOut.resize(bufferSize); + if (!m_ncCachedHandle) { + syslog(LOG_DEBUG, "KrispProcessor::Process: Krisp session is not initialized"); + return; + } + + size_t bufferSize = audioBuffer->num_frames(); + if (m_bufferIn.size() != bufferSize) { + m_bufferIn.resize(bufferSize); + } + if (m_bufferOut.size() != bufferSize) { + m_bufferOut.resize(bufferSize); + } for (size_t i = 0; i < bufferSize; ++i) { m_bufferIn[i] = audioBuffer->channels()[0][i] / 32768.f; } - auto returnCode = KrispSDK::NcCleanAmbientNoiseFloat( - m_session, m_bufferIn.data(), bufferSize, - m_bufferOut.data(), bufferSize); + auto returnCode = KrispSDK::ProcessNcFloat( + m_ncCachedHandle, + m_bufferIn.data(), bufferSize, + m_bufferOut.data(), bufferSize, 100.0f, nullptr); - if (returnCode != 0) + if (returnCode != KrispRetValSuccess) { syslog(LOG_INFO, "KrispProcessor::Process: Krisp noise cleanup error"); return; @@ -196,60 +345,77 @@ void KrispProcessor::Process(webrtc::AudioBuffer* audioBuffer) } } -std::string KrispProcessor::ToString() const + +KrispAdapter::KrispAdapter(const std::shared_ptr& krispProcessor) : + m_krispProcessor(krispProcessor) { - return "KrispAudioProcessor"; } -void KrispProcessor::SetRuntimeSetting(webrtc::AudioProcessing::RuntimeSetting setting) +void KrispAdapter::Initialize(int sampleRate, int numOfChannels) { + m_krispProcessor->InitializeSession(sampleRate, numOfChannels); } -static KrispAudioSamplingRate GetSampleRate(size_t sampleRate) +void KrispAdapter::Process(webrtc::AudioBuffer* audioBuffer) { - switch (sampleRate) - { - case 8000: - return KRISP_AUDIO_SAMPLING_RATE_8000HZ; - case 16000: - return KRISP_AUDIO_SAMPLING_RATE_16000HZ; - case 24000: - return KRISP_AUDIO_SAMPLING_RATE_24000HZ; - case 32000: - return KRISP_AUDIO_SAMPLING_RATE_32000HZ; - case 44100: - return KRISP_AUDIO_SAMPLING_RATE_44100HZ; - case 48000: - return KRISP_AUDIO_SAMPLING_RATE_48000HZ; - case 88200: - return KRISP_AUDIO_SAMPLING_RATE_88200HZ; - case 96000: - return KRISP_AUDIO_SAMPLING_RATE_96000HZ; - default: - syslog(LOG_INFO, "KrispProcessor::GetSampleRate: The input sampling rate: %zu \ - is not supported. Using default 48khz.", sampleRate); - return KRISP_AUDIO_SAMPLING_RATE_48000HZ; - } + m_krispProcessor->ProcessFrame(audioBuffer); +} + +std::string KrispAdapter::ToString() const +{ + return "KrispAudioProcessor"; } -static KrispAudioFrameDuration GetFrameDuration(size_t duration) +void KrispAdapter::SetRuntimeSetting(webrtc::AudioProcessing::RuntimeSetting setting) { - switch (duration) + if (setting.type() == + webrtc::AudioProcessing::RuntimeSetting::Type::kCaptureOutputUsed) { - case 10: - return KRISP_AUDIO_FRAME_DURATION_10MS; - default: - syslog(LOG_INFO, "KrispProcessor::GetFrameDuration: Frame duration: %zu \ - is not supported. Switching to default 10ms", duration); - return KRISP_AUDIO_FRAME_DURATION_10MS; + bool enable = false; + setting.GetBool(&enable); + m_krispProcessor->Enable(enable); + } +} + +static std::unique_ptr BuildModule( + const std::shared_ptr& proc) +{ + auto m = std::make_unique(); + m->proc = proc; + m->apm = webrtc::AudioProcessingBuilder() + .SetCapturePostProcessing(std::make_unique(m->proc)) + .Create(); + webrtc::AudioProcessing::Config config; + config.echo_canceller.enabled = false; + config.echo_canceller.mobile_mode = true; + m->apm->ApplyConfig(config); + return m; +} + +std::unique_ptr NativeKrispModule::Create() +{ + return BuildModule(std::make_shared()); +} + +std::unique_ptr NativeKrispModule::CreateWithModelPath( + const char* modelPath) +{ + auto proc = std::make_shared(); + if (!proc->Init(modelPath)) { + return nullptr; } + return BuildModule(proc); } -void * KrispProcessor::CreateAudioSession(int sampleRate) +std::unique_ptr NativeKrispModule::CreateWithModelData( + const void* modelData, unsigned int modelSize) { - auto krispSampleRate = GetSampleRate(sampleRate); - auto krispFrameDuration = GetFrameDuration(KRISP_AUDIO_FRAME_DURATION_10MS); - return KrispSDK::NcCreateSession(krispSampleRate, krispSampleRate, krispFrameDuration, "default"); + auto proc = std::make_shared(); + if (!proc->Init(modelData, modelSize)) { + return nullptr; + } + return BuildModule(proc); } + } diff --git a/krisp_processor.hpp b/krisp_processor.hpp index d837887..0e44f16 100644 --- a/krisp_processor.hpp +++ b/krisp_processor.hpp @@ -1,47 +1,86 @@ +#include +#include +#include +#include + #include "modules/audio_processing/include/audio_processing.h" -#include "modules/audio_processing/audio_processing_impl.h" #include "modules/audio_processing/audio_buffer.h" +#include "krisp-audio-sdk-nc-c.h" namespace Krisp { -class KrispProcessor : public webrtc::CustomProcessing -{ -public: +// Load Krisp DLL before using Krisp API +bool LoadKrisp(const char* krispDllPath); - KrispProcessor(const KrispProcessor&) = delete; - KrispProcessor(KrispProcessor&&) = delete; - KrispProcessor& operator=(const KrispProcessor&) = delete; - KrispProcessor& operator=(KrispProcessor&&) = delete; - ~KrispProcessor(); +// Unload Krisp DLL only after disposing all KrispNoiseFilter instances +bool UnloadKrisp(); - static KrispProcessor* GetInstance(); +class KrispNoiseFilter +{ +public: + KrispNoiseFilter(); + KrispNoiseFilter(const KrispNoiseFilter&) = delete; + KrispNoiseFilter(KrispNoiseFilter&&) = delete; + KrispNoiseFilter& operator=(const KrispNoiseFilter&) = delete; + KrispNoiseFilter& operator=(KrispNoiseFilter&&) = delete; + virtual ~KrispNoiseFilter(); - bool Init(const char* modelPath, const char* krispDllPath); - bool Init(const void* modelAddr, unsigned int modelSize, const char* krispDllPath); + bool Init(const char* modelPath); + bool Init(const void* modelAddr, unsigned int modelSize); void DeInit(); void Enable(bool isEnable); bool IsEnabled() const; -private: - KrispProcessor(); - - static KrispProcessor* _singleton; + // Call this when sample rate changes. + // Call this when audio stream changes. + // Call this after the end of the call, or before the next call. + void InitializeSession(int sampleRate, int numberOfChannels); + void ProcessFrame(webrtc::AudioBuffer* audioBuffer); - bool m_isEnabled; - void* m_session; - int m_sampleRate; +private: + std::atomic m_isEnabled; int m_numberOfChannels; long m_lastTimeStamp; + std::wstring m_modelPath; + std::vector m_modelData; std::vector m_bufferIn; std::vector m_bufferOut; + KrispModelInfo m_modelInfo; + KrispNcSessionConfig m_sessionConfig; + krispNcHandle m_ncCachedHandle; +}; - static void * CreateAudioSession(int sampleRate); - +class KrispAdapter : public webrtc::CustomProcessing +{ +public: + explicit KrispAdapter(const std::shared_ptr& krispProcessor); + // Do not allow copy + KrispAdapter(const KrispAdapter&) = delete; + KrispAdapter& operator=(const KrispAdapter&) = delete; + // Allow move + KrispAdapter(KrispAdapter&&) = default; + KrispAdapter& operator=(KrispAdapter&&) = default; + virtual ~KrispAdapter() = default; +private: void Initialize(int sampleRate, int numOfChannels) override ; void Process(webrtc::AudioBuffer* audioBuffer) override; std::string ToString() const override; void SetRuntimeSetting(webrtc::AudioProcessing::RuntimeSetting setting) override; + + std::shared_ptr m_krispProcessor; }; + +struct NativeKrispModule { + std::shared_ptr proc; + rtc::scoped_refptr apm; + + static std::unique_ptr Create(); + static std::unique_ptr CreateWithModelPath(const char* modelPath); + static std::unique_ptr CreateWithModelData( + const void* modelData, unsigned int modelSize); +}; + + } diff --git a/krisp_processor_jni.cc b/krisp_processor_jni.cc index 0b308b5..97e0cb7 100644 --- a/krisp_processor_jni.cc +++ b/krisp_processor_jni.cc @@ -4,80 +4,154 @@ #include #include -#include "rtc_base/time_utils.h" -#include "rtc_base/checks.h" -#include "rtc_base/ref_counted_object.h" -#include "rtc_base/thread.h" #include "sdk/android/src/jni/jni_helpers.h" -#include "webrtc-android-krisp-module/generated_krisp_jni/KrispAudioProcessingImpl_jni.h" +namespace jni_zero { +template +class JavaParamRef; +} + +namespace Krisp { +class Module { +public: + explicit Module(std::unique_ptr module) + : module_(std::move(module)) {} + + jlong GetAudioProcessorModule(JNIEnv* env); + jboolean Init(JNIEnv* env, const jni_zero::JavaParamRef& modelPathRef); + jboolean InitWithData(JNIEnv* env, const jni_zero::JavaParamRef& modelDataRef); + void Enable(JNIEnv* env, jboolean disable); + jboolean IsEnabled(JNIEnv* env); + void Destroy(JNIEnv* env); + +private: + std::unique_ptr module_; +}; +} + +#if __has_include("webrtc-android-krisp-module/generated_krisp_jni/KrispAudioProcessingFactory_jni.h") +#include "webrtc-android-krisp-module/generated_krisp_jni/KrispAudioProcessingFactory_jni.h" +#endif + namespace Krisp { -static webrtc::AudioProcessing* apmPtr; +#if defined(__GNUC__) +#define JNI_UNUSED __attribute__((unused)) +#else +#define JNI_UNUSED +#endif -static jlong JNI_KrispAudioProcessingImpl_GetAudioProcessorModule(JNIEnv* env) +static jlong JNI_UNUSED JNI_KrispAudioProcessingFactory_CreateModule(JNIEnv* env) { + auto module = NativeKrispModule::Create(); + return webrtc::jni::jlongFromPointer(new Module(std::move(module))); +} - std::unique_ptr krisp_processor( - KrispProcessor::GetInstance()); - auto apm = webrtc::AudioProcessingBuilder() - .SetCapturePostProcessing(std::move(krisp_processor)) - .Create(); - webrtc::AudioProcessing::Config config; - config.echo_canceller.enabled = false; - config.echo_canceller.mobile_mode = true; - apm->ApplyConfig(config); - apmPtr = apm.release(); - return webrtc::jni::jlongFromPointer(apmPtr); - +static jlong JNI_UNUSED JNI_KrispAudioProcessingFactory_CreateModuleWithModelPath( + JNIEnv* env, + const webrtc::JavaParamRef& modelPathRef) +{ + const char *modelFilePath = env->GetStringUTFChars(modelPathRef.obj(), nullptr); + auto module = NativeKrispModule::CreateWithModelPath(modelFilePath); + env->ReleaseStringUTFChars(modelPathRef.obj(), modelFilePath); + if (!module) { + return 0; + } + return webrtc::jni::jlongFromPointer(new Module(std::move(module))); } -static void JNI_KrispAudioProcessingImpl_Enable(JNIEnv* env, jboolean disable) +static jlong JNI_UNUSED JNI_KrispAudioProcessingFactory_CreateModuleWithModelData( + JNIEnv* env, + const webrtc::JavaParamRef& modelDataRef) { - KrispProcessor::GetInstance()->Enable(disable); + jbyteArray javaByteArray = modelDataRef.obj(); + jsize javaModelSize = env->GetArrayLength(javaByteArray); + jbyte *javaModelData = env->GetByteArrayElements(javaByteArray, nullptr); + auto module = NativeKrispModule::CreateWithModelData( + javaModelData, static_cast(javaModelSize)); + env->ReleaseByteArrayElements(javaByteArray, javaModelData, JNI_ABORT); + if (!module) { + return 0; + } + return webrtc::jni::jlongFromPointer(new Module(std::move(module))); } -static jboolean JNI_KrispAudioProcessingImpl_IsEnabled(JNIEnv* env) +jlong Module::GetAudioProcessorModule(JNIEnv* env) { - return KrispProcessor::GetInstance()->IsEnabled(); + if (!module_ || !module_->apm) { + return 0; + } + return webrtc::jni::jlongFromPointer(module_->apm.get()); } -static jboolean JNI_KrispAudioProcessingImpl_Init(JNIEnv* env, - const webrtc::JavaParamRef& modelPathRef, +jboolean JNI_UNUSED JNI_KrispAudioProcessingFactory_LoadKrisp( + JNIEnv* env, const webrtc::JavaParamRef& krispDllPath) { + const char *dllPath = env->GetStringUTFChars(krispDllPath.obj(), nullptr); + bool retValue = LoadKrisp(dllPath); + env->ReleaseStringUTFChars(krispDllPath.obj(), dllPath); + return static_cast(retValue); +} + +jboolean JNI_UNUSED JNI_KrispAudioProcessingFactory_UnloadKrisp(JNIEnv* env) +{ + return static_cast(UnloadKrisp()); +} + +void Module::Enable(JNIEnv* env, jboolean disable) +{ + if (!module_ || !module_->apm) { + return; + } + module_->apm->SetRuntimeSetting( + webrtc::AudioProcessing::RuntimeSetting::CreateCaptureOutputUsedSetting(disable)); +} + +jboolean Module::IsEnabled(JNIEnv* env) +{ + if (!module_ || !module_->proc) { + return false; + } + return module_->proc->IsEnabled(); +} + +jboolean Module::Init(JNIEnv* env, const jni_zero::JavaParamRef& modelPathRef) +{ + if (!module_ || !module_->proc) { + return false; + } jstring javaModelPath = modelPathRef.obj(); - jstring javaDllPath = krispDllPath.obj(); const char *modelFilePath = env->GetStringUTFChars(javaModelPath, nullptr); - const char *dllPath = env->GetStringUTFChars(javaDllPath, nullptr); - bool retValue = KrispProcessor::GetInstance()->Init(modelFilePath, dllPath); + bool retValue = module_->proc->Init(modelFilePath); env->ReleaseStringUTFChars(javaModelPath, modelFilePath); return static_cast(retValue); } -static jboolean JNI_KrispAudioProcessingImpl_InitWithData(JNIEnv* env, - const webrtc::JavaParamRef& modelDataRef, - const webrtc::JavaParamRef& krispDllPath) +jboolean Module::InitWithData(JNIEnv* env, + const jni_zero::JavaParamRef& modelDataRef) { + if (!module_ || !module_->proc) { + return false; + } jbyteArray javaByteArray = modelDataRef.obj(); - jstring javaDllPath = krispDllPath.obj(); jsize javaModelSize = env->GetArrayLength(javaByteArray); jbyte *javaModelData = env->GetByteArrayElements(javaByteArray, nullptr); - const char *dllPath = env->GetStringUTFChars(javaDllPath, nullptr); size_t arraySize = static_cast(javaModelSize); std::unique_ptr modelData(new char[arraySize]); std::memcpy(modelData.get(), javaModelData, arraySize); - bool retValue = KrispProcessor::GetInstance()->Init(modelData.get(), arraySize, dllPath); + bool retValue = module_->proc->Init(modelData.get(), arraySize); env->ReleaseByteArrayElements(javaByteArray, javaModelData, JNI_ABORT); return static_cast(retValue); } -static void JNI_KrispAudioProcessingImpl_Destroy(JNIEnv* env) +void Module::Destroy(JNIEnv* env) { - delete apmPtr; - apmPtr = nullptr; + delete this; } +#undef JNI_UNUSED + } diff --git a/krisp_sdk.cc b/krisp_sdk.cc index ff5ec5b..ba077f8 100644 --- a/krisp_sdk.cc +++ b/krisp_sdk.cc @@ -1,63 +1,57 @@ #include "krisp_sdk.h" -#include +// required by dlopen, dlclose, dlsym #include +// required by syslog #include - +// required by std::array #include +// required by std::call_once +#include namespace KrispSDK { enum class KrispFunctionId { - krispAudioGlobalInit = 0, - krispAudioGlobalDestroy = 1, - krispAudioSetModel = 2, - krispAudioSetModelBlob = 3, - krispAudioRemoveModel = 4, - krispAudioNcCreateSession = 5, - krispAudioNcCloseSession = 6, - krispAudioNcCleanAmbientNoiseFloat = 7 + krispGlobalInit = 0, + krispGlobalDestroy = 1, + krispCreateNcFloat = 2, + krispDestroyNc = 3, + krispProcessNcFloat = 4, }; -class KrispAudioSdkDllGate -{ -public: - - static KrispAudioSdkDllGate * singleton() - { - if (!_singleton) - { - _singleton = new KrispAudioSdkDllGate; - } - return _singleton; - } - - bool LoadDll(const char* krispDllPath) - { - dlerror(); - _dllHandle = dlopen(krispDllPath, RTLD_LAZY); - if (!_dllHandle) { - syslog(LOG_ERR, "KrispSDK::LoadDll: Failed to load the library = %s\n", krispDllPath); - return false; - } - syslog(LOG_INFO, "Krisp DLL loaded: %s", krispDllPath); - return LoadFunctions(); - } - - void UnloadDll() - { - if (_dllHandle) - { - dlclose(_dllHandle); - _dllHandle = nullptr; - } - for (auto & functionPtr : _functionPointers) - { - functionPtr = nullptr; - } - } +class KrispAudioSdkDllGate { + public: + static KrispAudioSdkDllGate* singleton() { + std::call_once(_initFlag, []() { + _singleton = new KrispAudioSdkDllGate; + }); + return _singleton; + } + + bool LoadDll(const char* krispDllPath) { + dlerror(); + _dllHandle = dlopen(krispDllPath, RTLD_LAZY); + if (!_dllHandle) { + syslog(LOG_ERR, + "KrispSDK::LoadDll: Failed to load the library = %s\n", + krispDllPath); + return false; + } + syslog(LOG_INFO, "Krisp DLL loaded: %s", krispDllPath); + return LoadFunctions(); + } + + void UnloadDll() { + if (_dllHandle) { + dlclose(_dllHandle); + _dllHandle = nullptr; + } + for (auto& functionPtr : _functionPointers) { + functionPtr = nullptr; + } + } template ReturnType InvokeFunction(KrispFunctionId functionId, Args... args) @@ -93,23 +87,22 @@ class KrispAudioSdkDllGate } static KrispAudioSdkDllGate * _singleton; + static std::once_flag _initFlag; void* _dllHandle = nullptr; - static constexpr unsigned int _functionCount = 8; + static constexpr unsigned int _functionCount = 5; static constexpr std::array _functionNames = { - "krispAudioGlobalInit", - "krispAudioGlobalDestroy", - "krispAudioSetModel", - "krispAudioSetModelBlob", - "krispAudioRemoveModel", - "krispAudioNcCreateSession", - "krispAudioNcCloseSession", - "krispAudioNcCleanAmbientNoiseFloat" + "krispGlobalInit", + "krispGlobalDestroy", + "krispCreateNcFloat", + "krispDestroyNc", + "krispProcessNcFloat" }; std::array _functionPointers = {}; }; KrispAudioSdkDllGate * KrispAudioSdkDllGate::_singleton = nullptr; +std::once_flag KrispAudioSdkDllGate::_initFlag; template ReturnType InvokeFunction(KrispFunctionId functionId, Args... args) @@ -127,72 +120,44 @@ void UnloadDll() KrispAudioSdkDllGate::singleton()->UnloadDll(); } -bool GlobalInit(void* param) -{ - int result = InvokeFunction(KrispFunctionId::krispAudioGlobalInit, param); - return result == 0 ? true: false; -} - -int SetModel(const wchar_t* weightFilePath, const char* modelName) -{ - int result = InvokeFunction(KrispFunctionId::krispAudioSetModel, weightFilePath, modelName); - return result; -} - -int SetModelBlob(const void* modelAddress, unsigned int modelSize, const char* modelName) -{ - int result = InvokeFunction(KrispFunctionId::krispAudioSetModelBlob, modelAddress, modelSize, modelName); - return result; -} - -int RemoveModel(const char* modelName) +KrispRetVal GlobalInit(const wchar_t* workingPath, + void (*logCallback)(const char*, KrispLogLevel), + KrispLogLevel logLevel) { - return InvokeFunction(KrispFunctionId::krispAudioRemoveModel, modelName); + return InvokeFunction(KrispFunctionId::krispGlobalInit, workingPath, logCallback, logLevel); } -int GlobalDestroy() +KrispRetVal GlobalDestroy() { - int result = InvokeFunction(KrispFunctionId::krispAudioGlobalDestroy); - return result; + return InvokeFunction(KrispFunctionId::krispGlobalDestroy); } -int NcCloseSession(void* session) +krispNcHandle CreateNcFloat(const KrispNcSessionConfig* config) { - int result = InvokeFunction(KrispFunctionId::krispAudioNcCloseSession, session); - return result; + return InvokeFunction(KrispFunctionId::krispCreateNcFloat, config); } -KrispAudioSessionID NcCreateSession( - KrispAudioSamplingRate inputSampleRate, - KrispAudioSamplingRate outputSampleRate, - KrispAudioFrameDuration frameDuration, - const char* modelName) +KrispRetVal DestroyNcFloat(const krispNcHandle session) { - - KrispAudioSessionID result = InvokeFunction(KrispFunctionId::krispAudioNcCreateSession, - inputSampleRate, - outputSampleRate, - frameDuration, - modelName); - - return result; + return InvokeFunction(KrispFunctionId::krispDestroyNc, session); } -int NcCleanAmbientNoiseFloat( - KrispAudioSessionID pSession, - const float* pFrameIn, - unsigned int frameInSize, - float* pFrameOut, - unsigned int frameOutSize) +KrispRetVal ProcessNcFloat(const krispNcHandle session, + const float* inputSamples, + size_t numInputSamples, + float* outputSamples, + size_t numOutputSamples, + float noiseSuppressionLevel, + KrispNcPerFrameStats* frameStats) { - int result = InvokeFunction(KrispFunctionId::krispAudioNcCleanAmbientNoiseFloat, - pSession, - pFrameIn, - frameInSize, - pFrameOut, - frameOutSize - ); - return result; + return InvokeFunction(KrispFunctionId::krispProcessNcFloat, + session, + inputSamples, + numInputSamples, + outputSamples, + numOutputSamples, + noiseSuppressionLevel, + frameStats); } } \ No newline at end of file diff --git a/krisp_sdk.h b/krisp_sdk.h index f359b24..f271a34 100644 --- a/krisp_sdk.h +++ b/krisp_sdk.h @@ -1,26 +1,31 @@ -#include "inc/krisp-audio-sdk.hpp" -#include "inc/krisp-audio-sdk-nc.hpp" +// required by KrispRetVal, KrispLogLevel, KrispVersionInfo +#include "krisp-audio-sdk-c.h" +// required by KrispNcSessionConfig, KrispNcHandle, KrispNcPerFrameStats +#include "krisp-audio-sdk-nc-c.h" namespace KrispSDK { - bool LoadDll(const char* dllPath); - void UnloadDll(); - bool GlobalInit(void* param); - int SetModel(const wchar_t* weightFilePath, const char* modelName); - int SetModelBlob(const void* weightBlob, unsigned int blobSize, const char* modelName); - int RemoveModel(const char* modelName); - int GlobalDestroy(); - int NcCloseSession(void* m_session); - KrispAudioSessionID NcCreateSession( - KrispAudioSamplingRate inputSampleRate, - KrispAudioSamplingRate outputSampleRate, - KrispAudioFrameDuration frameDuration, - const char* modelName); - int NcCleanAmbientNoiseFloat( - KrispAudioSessionID pSession, - const float* pFrameIn, - unsigned int frameInSize, - float* pFrameOut, - unsigned int frameOutSize); + // Loads the Krisp SDK shared library from the provided path. + bool LoadDll(const char* dllPath); + void UnloadDll(); + + // Global lifecycle. + KrispRetVal GlobalInit(const wchar_t* workingPath, + void (*logCallback)(const char*, KrispLogLevel), + KrispLogLevel logLevel); + KrispRetVal GlobalDestroy(); + + // Noise cancellation session management. + krispNcHandle CreateNcFloat(const KrispNcSessionConfig* config); + KrispRetVal DestroyNcFloat(const krispNcHandle session); + + // Per-frame processing. + KrispRetVal ProcessNcFloat(const krispNcHandle session, + const float* inputSamples, + size_t numInputSamples, + float* outputSamples, + size_t numOutputSamples, + float noiseSuppressionLevel = 100.0f, + KrispNcPerFrameStats* frameStats = nullptr); }