Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions README.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,46 @@ Min/avg/max FPS: 167.926 / 1598.4 / 6024.1

50/90/95/99 percentiles (us): 410 / 434 / 589 / 5018
----


Accuracy
--------

When available, https://www.opengl.org/registry/specs/ARB/timer_query.txt[OpenGL timer query objects]
are used to provide more accurate results:
for each frame, the GPU timestamp -- at the moment when all GPU commands are finished --
is used to calculate the frametime.

When not available, the CPU timestamp -- just before the asynchronous swap buffers function is called --
is used to calculate the frametime.
To force using CPU timestamps, set the environment variable `LIBFRAMETIME_TIMERQUERIES_DISABLED=1`.

The improvement of using GPU timestamps over CPU timestamps is quite dramatic,
the former resembles the visual experience much more.
Now the frametimes can be used to perform variance analysis more reliably.
For reference, below figure compares both methods in a single run of the Unigine Valley benchmark.

image::https://cdn.pbrd.co/images/rjm9xX9Bm.png[Figure which shows the asynchronous nature of swap buffers when using CPU timestamps results in a lot of jitter compared to using GPU timestamps]

*Warning*: some driver implementations have a bug where the drift of the GPU timer is significant (up to (-)40ms per second).
Currently, Intel Broadwell and Skylake on Mesa are known to be affected by this problem.
Run the `arb_timer_query-timestamp-get` test of https://piglit.freedesktop.org/[the piglit framework]
to see whether you're affected.
Alternatively, just perform a run of a benchmark with timer queries enabled vs disabled,
and compare avg frametimes to see whether there's a significant difference.
If you're affected, but the drift is consistent between runs,
you can still use the results after having applied following scaling factor
(positive drift means GPU timer is slower than CPU timer; CPU timer is considered correct):
----
corrected_frametime = reported_frametime * 1000000000ns / (drift + 1000000000ns)
----

The use of timer queries should not degrade performance of the to-be-benchmarked application.
If you think it does, set the environment variable `LIBFRAMETIME_TIMERQUERIES_DEBUG=1`
and verify the following is not printed to stderr during the benchmark:
----
libframetime: Warning: timer query result not yet available
----
If it is printed, please report this as a GitHub issue on this project page,
in that case it would also be interesting to run
https://github.com/Eliasvan/find-min-required-number-of-timer-queries[find-min-required-number-of-timer-queries].
237 changes: 193 additions & 44 deletions frametime.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
libframetime, a frame time dumper for all GL applications
Copyright (C) 2013 Lauri Kasanen
Copyright (C) 2016 Elias Vanderstuyft

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
Expand Down Expand Up @@ -47,7 +48,32 @@ static FILE *f = NULL;
static struct timeval oldtime;
static u8 firstdone = 0;

static void timing() {
#ifdef GL_ARB_timer_query
static PFNGLGETSTRINGIPROC glGetStringi = NULL;
static PFNGLGETQUERYIVPROC glGetQueryiv = NULL;
static PFNGLGENQUERIESPROC glGenQueries = NULL;
static PFNGLQUERYCOUNTERPROC glQueryCounter = NULL;
static PFNGLGETQUERYOBJECTIVPROC glGetQueryObjectiv = NULL;
static PFNGLGETQUERYOBJECTUI64VPROC glGetQueryObjectui64v = NULL;

static u8 timerqueries_disabled = 0;
static u8 timerqueries_debug = 0;
/*
The following constant must be set high enough such that
after TIMERQUERIES_NUM frames the first query result should be available.
Tested with Skylake and nouveau GTX 760;
run with LIBFRAMETIME_TIMERQUERIES_DEBUG=1 to verify this works (= stderr clear) on your system.
*/
#define TIMERQUERIES_NUM 4
static GLuint timerqueries[TIMERQUERIES_NUM];
static u8 timerqueries_idxinit = 0, timerqueries_idxback = 0, timerqueries_idxfront = 1;
static GLuint64 timerqueries_oldtime;
static GLint timerqueries_numbits = 0;
#else
static const u8 timerqueries_disabled = 1;
#endif

static void timing_cpuclock() {

if (!firstdone) {
gettimeofday(&oldtime, NULL);
Expand All @@ -66,6 +92,115 @@ static void timing() {
}
}

#ifdef GL_ARB_timer_query
static int init_timerqueries() {

# define INIT_GLEXT_AND_RETURN_ON_NULL(function_type, function_name) \
do { \
function_name = (function_type)real_glXGetProcAddressARB((GLubyte *)#function_name); \
if (!function_name) { \
fprintf(stderr, PREFIX "Error: function pointer to %s is NULL\n", #function_name); \
return -1; \
} \
} while (0)

// If this function is not available (v3.0+), GL_ARB_timer_query isn't either (v3.2+).
INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLGETSTRINGIPROC, glGetStringi);

const GLubyte *extension;
int i = 0;
while ((extension = glGetStringi(GL_EXTENSIONS, i++)) && strcmp((const char *)extension, "GL_ARB_timer_query"));
if (!extension) {
fprintf(stderr, PREFIX "Error: extension GL_ARB_timer_query is not available\n");
return -1;
}

INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLGETQUERYIVPROC, glGetQueryiv);
INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLGENQUERIESPROC, glGenQueries);
INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLQUERYCOUNTERPROC, glQueryCounter);
INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLGETQUERYOBJECTIVPROC, glGetQueryObjectiv);
INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLGETQUERYOBJECTUI64VPROC, glGetQueryObjectui64v);

# undef INIT_GLEXT_AND_RETURN_ON_NULL

glGetQueryiv(GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &timerqueries_numbits);
if (timerqueries_debug)
printf(PREFIX "Number of bits of timer query counters: %d\n", timerqueries_numbits);
if (timerqueries_numbits < 30) {
// According to the spec, implementations with less than 30 bits (frame of ~ 1s) are not allowed.
fprintf(stderr, PREFIX "Error: number of bits of timer query counters (%d) is too small\n", timerqueries_numbits);
return -1;
}

glGenQueries(TIMERQUERIES_NUM, timerqueries);
return 0;
}

static void timing_timerqueries() {

if (!timerqueries_idxinit) {
// Initialize timer queries now, because we now have a valid OpenGL context.
if (init_timerqueries()) {
// Degrade gracefully upon failure by using timing_cpuclock() instead.
fprintf(stderr, PREFIX "Failed to init timer queries, therefore using cpu clock instead\n");
timerqueries_disabled = 1;
return;
}
}

// Schedule a record of the GPU timestamp after the GPU commands submitted for this frame (i.e. at idx back).
glQueryCounter(timerqueries[timerqueries_idxback], GL_TIMESTAMP);

if (timerqueries_idxinit < TIMERQUERIES_NUM - 1) {
// Don't query any result yet, since not all timer queries are scheduled yet.
++timerqueries_idxinit;
} else {
if (timerqueries_debug) {
// Verify assumption "result is available"; if not true, querying the result would stall the pipeline.
GLint available = 0;
glGetQueryObjectiv(timerqueries[timerqueries_idxfront], GL_QUERY_RESULT_AVAILABLE, &available);
if (!available)
fprintf(stderr, PREFIX "Warning: timer query result not yet available, "
"TIMERQUERIES_NUM (%d) is too low => please report this issue!\n", TIMERQUERIES_NUM);
}

// Query the oldest result (i.e. at idx front), we assume the result is available by now.
GLuint64 timerqueries_newtime = 0;
glGetQueryObjectui64v(timerqueries[timerqueries_idxfront], GL_QUERY_RESULT, &timerqueries_newtime);

if (timerqueries_idxinit < TIMERQUERIES_NUM) {
// timerqueries_oldtime is not yet valid, so we can't calculate a delta yet.
timerqueries_idxinit = TIMERQUERIES_NUM;
} else {
// Calculate delta, consider timer query bits, and perform integer rounding to convert from ns to us.
const GLuint64 nsec = (timerqueries_newtime - timerqueries_oldtime) &
(timerqueries_numbits < 64 ?
((GLuint64) 1 << timerqueries_numbits) - 1 : (GLuint64) -1);
const GLuint64 usec = (nsec + 500) / 1000;

fprintf(f, "Frametime %" PRIu64 " us\n", usec);
fflush(f);
}
timerqueries_oldtime = timerqueries_newtime;
}

// 'Swap'/rotate idx pointers to timer queries.
timerqueries_idxback = timerqueries_idxfront;
timerqueries_idxfront = (timerqueries_idxfront + 1) % TIMERQUERIES_NUM;
}
#endif

static void timing() {

# ifdef GL_ARB_timer_query
if (!timerqueries_disabled)
timing_timerqueries();
# endif

if (timerqueries_disabled)
timing_cpuclock();
}

static void die(const char fmt[], ...) {

va_list ap;
Expand Down Expand Up @@ -93,14 +228,19 @@ static void init_dlsym() {
void *dlsym(void *handle, const char *symbol) {
// High evil wrapping this function.

if (symbol && !strcmp(symbol, "glXGetProcAddressARB"))
return glXGetProcAddressARB;
if (symbol && !strcmp(symbol, "glXSwapBuffers"))
return glXSwapBuffers;
#ifndef NO_EGL
if (symbol && !strcmp(symbol, "eglSwapBuffers"))
return eglSwapBuffers;
#endif
# define RETURN_ON_MATCH(function_name) \
do { \
if (symbol && !strcmp(symbol, #function_name)) \
return function_name; \
} while (0)

RETURN_ON_MATCH(glXGetProcAddressARB);
RETURN_ON_MATCH(glXSwapBuffers);
# ifndef NO_EGL
RETURN_ON_MATCH(eglSwapBuffers);
# endif

# undef RETURN_ON_MATCH

if (!real_dlsym)
init_dlsym();
Expand Down Expand Up @@ -132,56 +272,65 @@ EGLBoolean eglSwapBuffers(EGLDisplay display, EGLSurface surface) {
}
#endif

static void *load_func_from_next(const char * const function_name) {

dlerror();
void * const real_function = real_dlsym(RTLD_NEXT, function_name);

const char * const err = dlerror();
if (err)
die(PREFIX "dlsym failed: %s\n", err);

return real_function;
}

static void *load_func_from_lib(const char * const function_name, const char * const library_name) {

// If possible, load from the next object such that we don't have to load the lib.
void *real_function = load_func_from_next(function_name);

if (!real_function) {
void * const lib = dlopen(library_name, RTLD_LAZY);
if (!lib)
die(PREFIX "Failed to open %s\n", library_name);

real_function = real_dlsym(lib, function_name);
if (!real_function)
die(PREFIX "Failed to find %s in %s\n", function_name, library_name);
}

return real_function;
}

static void init() __attribute__((constructor));
static void deinit() __attribute__((destructor));

static void init() {

const char * const env = getenv("LIBFRAMETIME_FILE");
const char *env = getenv("LIBFRAMETIME_FILE");
const char * const name = env ? env : "/tmp/libframetime.out";

f = fopen(name, "w");

if (!f)
die(PREFIX "Failed to open %s for writing\n", name);

if (!real_dlsym)
init_dlsym();

dlerror();
real_glXSwapBuffers = real_dlsym(RTLD_NEXT, "glXSwapBuffers");

const char *err = dlerror();
if (err)
die(PREFIX "dlsym failed: %s\n", err);

real_glXGetProcAddressARB = real_dlsym(RTLD_NEXT, "glXGetProcAddressARB");

// If the app loads libs dynamically, the symbol may be NULL.
if (!real_glXSwapBuffers) {
void *libgl = dlopen("libGL.so", RTLD_LAZY);
if (!libgl)
die(PREFIX "dynamic libGL failed\n");
real_glXSwapBuffers = real_dlsym(libgl, "glXSwapBuffers");
real_glXGetProcAddressARB = real_dlsym(libgl, "glXGetProcAddressARB");
}
# ifdef GL_ARB_timer_query
env = getenv("LIBFRAMETIME_TIMERQUERIES_DISABLED");
timerqueries_disabled = (env && !strcmp(env, "1"));

#ifndef NO_EGL
dlerror();
real_eglSwapBuffers = real_dlsym(RTLD_NEXT, "eglSwapBuffers");
env = getenv("LIBFRAMETIME_TIMERQUERIES_DEBUG");
timerqueries_debug = (env && !strcmp(env, "1"));
# endif

err = dlerror();
if (err)
die(PREFIX "dlsym failed: %s\n", err);
if (!real_dlsym)
init_dlsym();

// If the app loads libs dynamically, the symbol may be NULL.
if (!real_eglSwapBuffers) {
void *libegl = dlopen("libEGL.so", RTLD_LAZY);
if (!libegl)
die(PREFIX "dynamic libEGL failed\n");
real_eglSwapBuffers = real_dlsym(libegl, "eglSwapBuffers");
}
#endif
real_glXGetProcAddressARB = load_func_from_lib("glXGetProcAddressARB", "libGL.so");
real_glXSwapBuffers = load_func_from_lib("glXSwapBuffers", "libGL.so");
# ifndef NO_EGL
real_eglSwapBuffers = load_func_from_lib("eglSwapBuffers", "libEGL.so");
# endif
}

static void deinit() {
Expand Down