Windows Audio Session API 상세 가이드
Windows Audio Session API: Vista 이후 저수준 오디오 API
- ✅ 낮은 지연 (DirectSound 대비 10배 빠름)
- ✅ 높은 품질 (비트 퍼펙트)
- ✅ 프로세스별 제어
- ✅ Exclusive 모드 지원
Discord/Chrome → WASAPI Loopback → 16kHz PCM → STT
IMMDeviceEnumerator (디바이스 목록)
↓
IMMDevice (특정 디바이스)
↓
IAudioClient (오디오 세션)
↓
IAudioCaptureClient (데이터 읽기)
역할: 오디오 디바이스 목록 관리
IMMDeviceEnumerator* enumerator = NULL;
CoCreateInstance(__uuidof(MMDeviceEnumerator), ...);
// 기본 스피커 가져오기
IMMDevice* device = NULL;
enumerator->GetDefaultAudioEndpoint(eRender, eConsole, &device);역할: 특정 디바이스 표현
// IAudioClient 활성화
IAudioClient* audioClient = NULL;
device->Activate(__uuidof(IAudioClient), CLSCTX_ALL,
NULL, (void**)&audioClient);역할: 오디오 세션 관리
// 초기화
WAVEFORMATEX format = {...};
audioClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_LOOPBACK,
0, 0, &format, NULL
);
// 시작/정지
audioClient->Start();
audioClient->Stop();역할: 실제 오디오 데이터 읽기
IAudioCaptureClient* captureClient = NULL;
audioClient->GetService(__uuidof(IAudioCaptureClient),
(void**)&captureClient);
// 데이터 읽기
BYTE* data;
UINT32 numFrames;
captureClient->GetBuffer(&data, &numFrames, &flags, NULL, NULL);
// ... 처리 ...
captureClient->ReleaseBuffer(numFrames);CoInitializeEx(NULL, COINIT_MULTITHREADED);IMMDeviceEnumerator* enumerator;
CoCreateInstance(__uuidof(MMDeviceEnumerator), ...);
IMMDevice* device;
enumerator->GetDefaultAudioEndpoint(eRender, eConsole, &device);// Process Loopback용 (비동기)
AUDIOCLIENT_PROCESS_LOOPBACK_PARAMS params = {...};
ActivateAudioInterfaceAsync(...);
WaitForSingleObject(hEvent, INFINITE);WAVEFORMATEX format = {};
format.wFormatTag = WAVE_FORMAT_PCM;
format.nChannels = 1; // Mono
format.nSamplesPerSec = 16000; // 16kHz
format.wBitsPerSample = 16;
format.nBlockAlign = 2;
format.nAvgBytesPerSec = 32000;
audioClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
0, 0, &format, NULL
);IAudioCaptureClient* captureClient;
audioClient->GetService(__uuidof(IAudioCaptureClient),
(void**)&captureClient);
HANDLE hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
audioClient->SetEventHandle(hEvent);audioClient->Start();일반 Loopback:
시스템 전체 오디오 (Discord + YouTube + 알림 + ...)
Process Loopback:
특정 앱만 (Discord만!)
// 1. Process Loopback 파라미터
AUDIOCLIENT_PROCESS_LOOPBACK_PARAMS loopbackParams = {};
loopbackParams.TargetProcessId = discordPid;
loopbackParams.ProcessLoopbackMode =
PROCESS_LOOPBACK_MODE_INCLUDE_TARGET_PROCESS_TREE;
// 2. Activation 파라미터
AUDIOCLIENT_ACTIVATION_PARAMS activationParams = {};
activationParams.ActivationType =
AUDIOCLIENT_ACTIVATION_TYPE_PROCESS_LOOPBACK;
activationParams.ProcessLoopbackParams = &loopbackParams;
// 3. PROPVARIANT 래핑
PROPVARIANT activateParams = {};
activateParams.vt = VT_BLOB;
activateParams.blob.cbSize = sizeof(activationParams);
activateParams.blob.pBlobData = (BYTE*)&activationParams;ActivateAudioInterfaceAsync(
deviceIdString,
__uuidof(IAudioClient),
&activateParams,
pCompletionHandler,
&pAsyncOp
);
WaitForSingleObject(hCompletionEvent, INFINITE);audioClient->Start();
while (!stopped) {
// 이벤트 대기
WaitForSingleObject(hCaptureEvent, 2000);
UINT32 packetLength;
captureClient->GetNextPacketSize(&packetLength);
while (packetLength > 0) {
BYTE* data;
UINT32 numFrames;
DWORD flags;
captureClient->GetBuffer(&data, &numFrames, &flags, NULL, NULL);
// 무음 아니면 처리
if (!(flags & AUDCLNT_BUFFERFLAGS_SILENT)) {
ProcessAudio(data, numFrames * blockAlign);
}
captureClient->ReleaseBuffer(numFrames);
captureClient->GetNextPacketSize(&packetLength);
}
}
audioClient->Stop();| 플래그 | 값 | 의미 |
|---|---|---|
| AUDCLNT_BUFFERFLAGS_SILENT | 0x2 | 무음 구간 |
| AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY | 0x1 | 데이터 불연속 |
// ❌ 독점 모드
audioClient->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE, ...);
// ✅ 공유 모드
audioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, ...);// 포맷 지원 확인
WAVEFORMATEX* closestMatch;
HRESULT hr = audioClient->IsFormatSupported(
AUDCLNT_SHAREMODE_SHARED,
&requestedFormat,
&closestMatch
);
if (hr == S_FALSE) {
// closestMatch 사용
}// 10ms 버퍼 (100ns 단위)
REFERENCE_TIME bufferDuration = 100000;
audioClient->Initialize(
AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_LOOPBACK,
bufferDuration,
0, &format, NULL
);DWORD taskIndex;
HANDLE hTask = AvSetMmThreadCharacteristics(TEXT("Audio"), &taskIndex);
// ... 캡처 루프 ...
AvRevertMmThreadCharacteristics(hTask);// 오디오 스레드
queue.enqueue(audioData); // 빠름!
// Worker 스레드
if (queue.try_dequeue(data)) {
SendToServer(data); // 느린 작업
}