Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@
"everyNthFrame": 6,
"minIntervalUsec": 200000,
"queueDepth": 8,
"normalize": {
"width": 0,
"height": 0,
"pixelFmt": ""
},
"snapshots": {
"enabled": false,
"dir": "./recordings/snapshots",
Expand Down
5 changes: 5 additions & 0 deletions config.rtsp.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@
"everyNthFrame": 6,
"minIntervalUsec": 200000,
"queueDepth": 8,
"normalize": {
"width": 0,
"height": 0,
"pixelFmt": ""
},
"snapshots": {
"enabled": true,
"dir": "./recordings/snapshots",
Expand Down
5 changes: 5 additions & 0 deletions packaging/config.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@
"everyNthFrame": 6,
"minIntervalUsec": 200000,
"queueDepth": 8,
"normalize": {
"width": 0,
"height": 0,
"pixelFmt": ""
},
"snapshots": {
"enabled": false,
"dir": "./recordings/snapshots",
Expand Down
5 changes: 5 additions & 0 deletions packaging/config.rtsp.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@
"everyNthFrame": 6,
"minIntervalUsec": 200000,
"queueDepth": 8,
"normalize": {
"width": 0,
"height": 0,
"pixelFmt": ""
},
"snapshots": {
"enabled": true,
"dir": "./recordings/snapshots",
Expand Down
9 changes: 9 additions & 0 deletions src/server/internal/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,15 @@ ConfigLoadResult loadConfigResult(const std::string& path)
c.vision.everyNthFrame = v.value("everyNthFrame", c.vision.everyNthFrame);
c.vision.minIntervalUsec = v.value("minIntervalUsec", c.vision.minIntervalUsec);
c.vision.queueDepth = v.value("queueDepth", c.vision.queueDepth);
if (v.contains("normalize")) {
auto& normalize = v["normalize"];
c.vision.normalize.width =
normalize.value("width", c.vision.normalize.width);
c.vision.normalize.height =
normalize.value("height", c.vision.normalize.height);
c.vision.normalize.pixelFmt =
normalize.value("pixelFmt", c.vision.normalize.pixelFmt);
}
if (v.contains("motion")) {
auto& motion = v["motion"];
c.vision.motionGridWidth = motion.value("gridWidth", c.vision.motionGridWidth);
Expand Down
8 changes: 8 additions & 0 deletions src/server/internal/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ struct Config

struct VisionConfig
{
struct NormalizerConfig
{
int width = 0;
int height = 0;
std::string pixelFmt;
};

struct SnapshotConfig
{
bool enabled = false;
Expand All @@ -49,6 +56,7 @@ struct Config
uint32_t everyNthFrame = 6;
int64_t minIntervalUsec = 200000;
int queueDepth = 8;
NormalizerConfig normalize;
uint32_t motionGridWidth = 32;
uint32_t motionGridHeight = 18;
uint32_t motionWarmupFrames = 2;
Expand Down
32 changes: 30 additions & 2 deletions src/server/internal/media.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ std::string intelligenceSourceLabel(const Config& config, const std::string& pee
return config.source.empty() ? peerId : config.source;
}


std::string intelligenceStreamId(const std::string& peerId)
{
return "stream/" + peerId;
}

} // namespace


Expand Down Expand Up @@ -425,6 +431,12 @@ json::Value MediaSession::intelligenceStatus() const
vision["sampledFrames"] = stats.forwarded;
vision["sampledDropped"] = stats.dropped;
}
if (_visionNormalizer) {
const auto stats = _visionNormalizer->stats();
vision["normalizedFrames"] = stats.emitted;
vision["normalizerDropped"] = stats.dropped;
vision["normalizerConverted"] = stats.converted;
}
if (_visionQueue) {
vision["queueDepth"] = static_cast<std::uint64_t>(_visionQueue->size());
vision["queueDropped"] = static_cast<std::uint64_t>(_visionQueue->dropped());
Expand Down Expand Up @@ -577,6 +589,8 @@ void MediaSession::startStreaming()

if (_visionSampler)
_visionSampler->reset();
if (_visionNormalizer)
_visionNormalizer->reset();
if (_visionDetector)
_visionDetector->reset();
if (_visionArtifacts)
Expand Down Expand Up @@ -605,6 +619,7 @@ void MediaSession::setupIntelligence()
return;

const auto sourceLabel = intelligenceSourceLabel(_config, _peerId);
const auto streamId = intelligenceStreamId(_peerId);

if (_config.vision.enabled) {
_visionArtifacts = std::make_unique<VisionArtifacts>(
Expand All @@ -623,9 +638,18 @@ void MediaSession::setupIntelligence()
.everyNthFrame = _config.vision.everyNthFrame,
.minIntervalUsec = _config.vision.minIntervalUsec,
});
_visionNormalizer = std::make_shared<vision::FrameNormalizer>(
vision::FrameNormalizerConfig{
.sourceId = sourceLabel,
.streamId = streamId,
.width = _config.vision.normalize.width,
.height = _config.vision.normalize.height,
.pixelFmt = _config.vision.normalize.pixelFmt,
});
_visionQueue = std::make_shared<vision::DetectionQueue>(_config.vision.queueDepth);
_visionDetector = std::make_unique<vision::MotionDetector>(vision::MotionDetectorConfig{
.source = sourceLabel,
.streamId = streamId,
.detectorName = "motion",
.gridWidth = _config.vision.motionGridWidth,
.gridHeight = _config.vision.motionGridHeight,
Expand All @@ -644,12 +668,16 @@ void MediaSession::setupIntelligence()
_visionSampler->process(packet);
};
_visionSampler->emitter += [this](IPacket& packet) {
auto* frame = dynamic_cast<av::PlanarVideoPacket*>(&packet);
if (_visionNormalizer)
_visionNormalizer->process(packet);
};
_visionNormalizer->emitter += [this](IPacket& packet) {
auto* frame = dynamic_cast<vision::VisionFramePacket*>(&packet);
if (frame && _visionQueue)
_visionQueue->process(*frame);
};
_visionQueue->emitter += [this](IPacket& packet) {
auto* frame = dynamic_cast<av::PlanarVideoPacket*>(&packet);
auto* frame = dynamic_cast<vision::VisionFramePacket*>(&packet);
if (frame && _visionDetector)
_visionDetector->process(*frame);
};
Expand Down
2 changes: 2 additions & 0 deletions src/server/internal/media.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "icy/speech/voiceactivitydetector.h"
#include "icy/symple/server.h"
#include "icy/vision/detectionqueue.h"
#include "icy/vision/framenormalizer.h"
#include "icy/vision/framesampler.h"
#include "icy/vision/motiondetector.h"
#include "icy/webrtc/peersession.h"
Expand Down Expand Up @@ -113,6 +114,7 @@ class MediaSession : public std::enable_shared_from_this<MediaSession>
std::shared_ptr<av::VideoPacketEncoder> _videoEncoder;
std::shared_ptr<av::AudioPacketEncoder> _audioEncoder;
std::shared_ptr<vision::FrameSampler> _visionSampler;
std::shared_ptr<vision::FrameNormalizer> _visionNormalizer;
std::shared_ptr<vision::DetectionQueue> _visionQueue;
std::unique_ptr<vision::MotionDetector> _visionDetector;
std::shared_ptr<speech::SpeechQueue> _speechQueue;
Expand Down
27 changes: 18 additions & 9 deletions src/server/internal/visionartifacts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ double fpsFromWindow(uint64_t frames, int64_t firstUsec, int64_t lastUsec)
(static_cast<long double>(frames - 1) * 1000000.0L) / durationUsec);
}


int64_t eventFrameTimeUsec(const vision::VisionEvent& event)
{
if (event.frame.ptsUsec > 0)
return event.frame.ptsUsec;
return event.emittedAtUsec;
}

} // namespace


Expand Down Expand Up @@ -152,22 +160,23 @@ void VisionArtifacts::onFrame(const av::PlanarVideoPacket& packet)
VisionArtifactResult VisionArtifacts::onEvent(const vision::VisionEvent& event)
{
std::lock_guard lock(_mutex);
const int64_t frameTimeUsec = eventFrameTimeUsec(event);

VisionArtifactResult result;
result.latencyUsec = latencyForFrameLocked(event.frame.timeUsec);
result.latencyUsec = latencyForFrameLocked(frameTimeUsec);
_lastLatencyUsec = result.latencyUsec;

if (_config.snapshotsEnabled) {
const int64_t sinceLastSnapshot = event.frame.timeUsec - _lastSnapshotTimeUsec;
const int64_t sinceLastSnapshot = frameTimeUsec - _lastSnapshotTimeUsec;
if (_lastSnapshotTimeUsec == 0 ||
sinceLastSnapshot >= _config.snapshotMinIntervalUsec) {
if (auto* frame = bestFrameLocked(event.frame.timeUsec)) {
const auto path = makeSnapshotPathLocked(event.frame.timeUsec);
if (auto* frame = bestFrameLocked(frameTimeUsec)) {
const auto path = makeSnapshotPathLocked(frameTimeUsec);
if (writeSnapshotLocked(*frame, path)) {
result.snapshotPath = path;
result.snapshotUrl = artifactUrlFor(
fs::makePath("snapshots", fs::filename(path)));
_lastSnapshotTimeUsec = event.frame.timeUsec;
_lastSnapshotTimeUsec = frameTimeUsec;
_lastSnapshotPath = result.snapshotPath;
_lastSnapshotUrl = result.snapshotUrl;
++_snapshotsWritten;
Expand All @@ -180,14 +189,14 @@ VisionArtifactResult VisionArtifacts::onEvent(const vision::VisionEvent& event)
}

if (_config.clipsEnabled) {
if (_clip && event.frame.timeUsec <= _clip->deadlineUsec) {
if (_clip && frameTimeUsec <= _clip->deadlineUsec) {
_clip->deadlineUsec = std::max(
_clip->deadlineUsec,
event.frame.timeUsec + _config.clipPostRollUsec);
frameTimeUsec + _config.clipPostRollUsec);
} else {
finishClipLocked();
startClipLocked(event.frame.timeUsec);
flushBufferedFramesLocked(event.frame.timeUsec - _config.clipPreRollUsec);
startClipLocked(frameTimeUsec);
flushBufferedFramesLocked(frameTimeUsec - _config.clipPreRollUsec);
}

if (_clip) {
Expand Down
10 changes: 9 additions & 1 deletion web/src/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,15 @@ function buildArtifactLinks (event) {
}

function formatEventTime (event) {
const usec = Number(event?.time || event?.audio?.time || event?.frame?.time || 0)
const usec = Number(
event?.frame?.ptsUsec ??
event?.audio?.timeUsec ??
event?.time ??
event?.audio?.time ??
event?.frame?.time ??
event?.emittedAtUsec ??
0
)
if (!Number.isFinite(usec) || usec <= 0) {
return 'live'
}
Expand Down
Loading