Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.19)

# Audio Sending is implemented, but not performant enough yet
add_compile_definitions(SEND_AUDIO=0)
add_compile_definitions(SEND_VIDEO=1)

if(NOT IDF_TARGET STREQUAL linux)
if(NOT DEFINED ENV{WIFI_SSID} OR NOT DEFINED ENV{WIFI_PASSWORD})
Expand Down
2 changes: 2 additions & 0 deletions src/idf_component.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
dependencies:
espressif/esp32-camera: "^2.0.12"
espressif/esp_h264: "^1.0.4"
idf:
version: ">=4.1.0"
18 changes: 18 additions & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <peer.h>

#ifndef LINUX_BUILD
#include <esp_h264_enc_single_sw.h>

#include "nvs_flash.h"

extern "C" void app_main(void) {
Expand All @@ -18,8 +20,24 @@ extern "C" void app_main(void) {

ESP_ERROR_CHECK(esp_event_loop_create_default());
peer_init();

#if SEND_AUDIO
lk_init_audio_capture();
lk_init_audio_decoder();
#endif

#ifdef SEND_VIDEO
if (lk_init_video_capture() != ESP_OK) {
printf("Camera Init Failed\n");
return;
}

if (lk_init_video_encoder() != ESP_H264_ERR_OK) {
printf("Video Encoder failed to start\n");
return;
}
#endif

lk_wifi();
lk_websocket(LIVEKIT_URL, LIVEKIT_TOKEN);
}
Expand Down
6 changes: 5 additions & 1 deletion src/main.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,14 @@ void lk_websocket(const char *url, const char *token);
void lk_wifi(void);
void lk_init_audio_capture(void);
void lk_init_audio_decoder(void);
void lk_populate_answer(char *answer, size_t answer_size, int include_audio);
void lk_populate_answer(char *answer, size_t answer_size, int include_media);
void lk_publisher_peer_connection_task(void *user_data);
void lk_subscriber_peer_connection_task(void *user_data);
void lk_audio_encoder_task(void *arg);
void lk_audio_decode(uint8_t *data, size_t size);
void lk_init_audio_encoder();
void lk_send_audio(PeerConnection *peer_connection);

void lk_send_video(PeerConnection *peer_connection);
int lk_init_video_capture(void);
int lk_init_video_encoder(void);
161 changes: 161 additions & 0 deletions src/media.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
#include <driver/i2s.h>
#include <driver/i2s_pdm.h>
#include <driver/i2s_std.h>
#include <esp_camera.h>
#include <esp_h264_alloc.h>
#include <esp_h264_enc_single.h>
#include <esp_h264_enc_single_sw.h>
#include <opus.h>

#include "main.h"
Expand All @@ -16,6 +22,59 @@
#define OPUS_ENCODER_BITRATE 30000
#define OPUS_ENCODER_COMPLEXITY 0

#define CAM_PIN_PWDN -1
#define CAM_PIN_RESET -1
#define CAM_PIN_XCLK 15
#define CAM_PIN_SIOD 4
#define CAM_PIN_SIOC 5
#define CAM_PIN_D7 16
#define CAM_PIN_D6 17
#define CAM_PIN_D5 18
#define CAM_PIN_D4 12
#define CAM_PIN_D3 10
#define CAM_PIN_D2 8
#define CAM_PIN_D1 9
#define CAM_PIN_D0 11
#define CAM_PIN_VSYNC 6
#define CAM_PIN_HREF 7
#define CAM_PIN_PCLK 13

void *esp_h264_aligned_calloc(uint32_t alignment, uint32_t n, uint32_t size,
uint32_t *actual_size, uint32_t caps) {
*actual_size = ALIGN_UP(n * size, alignment);
void *out_ptr = heap_caps_aligned_calloc((size_t)alignment, (size_t)n,
(size_t)size, caps);
return out_ptr;
}

/*
* SPDX-FileCopyrightText: 2021 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/

#include <errno.h>
#include <unistd.h>

#include "sdkconfig.h"

#ifdef CONFIG_FREERTOS_UNICORE
#define CPU_NUM 1
#else
#define CPU_NUM CONFIG_SOC_CPU_CORES_NUM
#endif

long sysconf(int arg) {
switch (arg) {
case _SC_NPROCESSORS_CONF:
case _SC_NPROCESSORS_ONLN:
return CPU_NUM;
default:
errno = EINVAL;
return -1;
}
}

void lk_init_audio_capture() {
i2s_config_t i2s_config_out = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX),
Expand Down Expand Up @@ -104,6 +163,7 @@ void lk_audio_decode(uint8_t *data, size_t size) {
OpusEncoder *opus_encoder = NULL;
opus_int16 *encoder_input_buffer = NULL;
uint8_t *encoder_output_buffer = NULL;
esp_h264_enc_handle_t h264_encoder = NULL;

void lk_init_audio_encoder() {
int encoder_error;
Expand Down Expand Up @@ -140,3 +200,104 @@ void lk_send_audio(PeerConnection *peer_connection) {
peer_connection_send_audio(peer_connection, encoder_output_buffer,
encoded_size);
}

camera_fb_t *fb = NULL;
esp_h264_enc_in_frame_t in_frame;
int ret;
void lk_send_video(PeerConnection *peer_connection) {
fb = esp_camera_fb_get();

if (!fb) {
printf("Camera capture failed\n");
esp_camera_fb_return(fb);
return;
}

esp_h264_enc_out_frame_t out_frame;

in_frame.raw_data.len = fb->len;
in_frame.raw_data.buffer = fb->buf;

out_frame.raw_data.len = fb->width * fb->height * 2;
out_frame.raw_data.buffer = (uint8_t *)esp_h264_aligned_calloc(
16, 1, out_frame.raw_data.len, &out_frame.raw_data.len,
MALLOC_CAP_SPIRAM);

if ((ret = esp_h264_enc_process(h264_encoder, &in_frame, &out_frame)) !=
ESP_H264_ERR_OK) {
printf("failed to encode %d\n", ret);
heap_caps_free(out_frame.raw_data.buffer);
esp_camera_fb_return(fb);
return;
}

if ((ret = peer_connection_send_video(peer_connection,
(uint8_t *)out_frame.raw_data.buffer,
(int)out_frame.length)) < 1) {
printf("failed to send video %d\n", ret);
}
heap_caps_free(out_frame.raw_data.buffer);
esp_camera_fb_return(fb);
}

int lk_init_video_encoder() {
esp_h264_enc_cfg_sw_t cfg;
cfg.gop = 20;
cfg.fps = 20;
cfg.res.width = 96;
cfg.res.height = 96;
cfg.rc.bitrate = cfg.res.width * cfg.res.height * cfg.fps / 20;
cfg.rc.qp_min = 10;
cfg.rc.qp_max = 10;
cfg.pic_type = ESP_H264_RAW_FMT_YUYV;

int ret;

if ((ret = esp_h264_enc_sw_new(&cfg, &h264_encoder)) != ESP_H264_ERR_OK) {
return ret;
}

if ((ret = esp_h264_enc_open(h264_encoder)) != ESP_H264_ERR_OK) {
return ret;
}

return ESP_H264_ERR_OK;
}

int lk_init_video_capture() {
static camera_config_t camera_config = {
.pin_pwdn = CAM_PIN_PWDN,
.pin_reset = CAM_PIN_RESET,

.pin_xclk = CAM_PIN_XCLK,

.pin_sccb_sda = CAM_PIN_SIOD,
.pin_sccb_scl = CAM_PIN_SIOC,

.pin_d7 = CAM_PIN_D7,
.pin_d6 = CAM_PIN_D6,
.pin_d5 = CAM_PIN_D5,
.pin_d4 = CAM_PIN_D4,
.pin_d3 = CAM_PIN_D3,
.pin_d2 = CAM_PIN_D2,
.pin_d1 = CAM_PIN_D1,
.pin_d0 = CAM_PIN_D0,

.pin_vsync = CAM_PIN_VSYNC,
.pin_href = CAM_PIN_HREF,

.pin_pclk = CAM_PIN_PCLK,

.xclk_freq_hz = 16000000,

.ledc_timer = LEDC_TIMER_0,
.ledc_channel = LEDC_CHANNEL_0,

.pixel_format = PIXFORMAT_YUV422, // PIXFORMAT_YUV422,
.frame_size = FRAMESIZE_96X96,
.jpeg_quality = 10,
.fb_count = 2,
.grab_mode = CAMERA_GRAB_WHEN_EMPTY};

return esp_camera_init(&camera_config);
}
14 changes: 11 additions & 3 deletions src/webrtc.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#ifndef LINUX_BUILD
#include <driver/i2s.h>
#include <driver/i2s_pdm.h>
#include <driver/i2s_std.h>
#include <opus.h>
#endif

Expand Down Expand Up @@ -146,7 +147,9 @@ void lk_subscriber_peer_connection_task(void *user_data) {

void lk_publisher_peer_connection_task(void *user_data) {
#ifndef LINUX_BUILD
#if SEND_AUDIO
lk_init_audio_encoder();
#endif
#endif

while (1) {
Expand All @@ -166,7 +169,12 @@ void lk_publisher_peer_connection_task(void *user_data) {
}

#ifndef LINUX_BUILD
#if SEND_AUDIO
lk_send_audio(publisher_peer_connection);
#endif
#if SEND_VIDEO
lk_send_video(publisher_peer_connection);
#endif
#endif

peer_connection_loop(publisher_peer_connection);
Expand All @@ -177,8 +185,8 @@ void lk_publisher_peer_connection_task(void *user_data) {
PeerConnection *lk_create_peer_connection(int isPublisher) {
PeerConfiguration peer_connection_config = {
.ice_servers = {},
.audio_codec = CODEC_OPUS,
.video_codec = CODEC_NONE,
.audio_codec = CODEC_NONE,
.video_codec = CODEC_H264,
.datachannel = isPublisher ? DATA_CHANNEL_NONE : DATA_CHANNEL_STRING,
.onaudiotrack = [](uint8_t *data, size_t size, void *userdata) -> void {
#ifndef LINUX_BUILD
Expand Down
9 changes: 8 additions & 1 deletion src/websocket.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,13 +304,20 @@ void lk_websocket(const char *room_url, const char *token) {

while (true) {
if (xSemaphoreTake(g_mutex, portMAX_DELAY) == pdTRUE) {
if (get_publisher_status() == 1 && SEND_AUDIO) {
if (get_publisher_status() == 1 && (SEND_AUDIO || SEND_VIDEO)) {
Livekit__SignalRequest r = LIVEKIT__SIGNAL_REQUEST__INIT;
Livekit__AddTrackRequest a = LIVEKIT__ADD_TRACK_REQUEST__INIT;

#if SEND_AUDIO
a.cid = (char *)"microphone";
a.name = (char *)"microphone";
a.source = LIVEKIT__TRACK_SOURCE__MICROPHONE;
#endif
#if SEND_VIDEO
a.cid = (char *)"camera";
a.name = (char *)"camera";
a.source = LIVEKIT__TRACK_SOURCE__CAMERA;
#endif

r.add_track = &a;
r.message_case = LIVEKIT__SIGNAL_REQUEST__MESSAGE_ADD_TRACK;
Expand Down