From b415a359f361f6ba4d59e467756023df0f38d569 Mon Sep 17 00:00:00 2001 From: Elias Vanderstuyft Date: Tue, 18 Oct 2016 22:18:06 +0200 Subject: [PATCH 1/5] Remove code duplication by using C macros: add and use RETURN_ON_MATCH(function_name) in dlsym() --- frametime.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/frametime.c b/frametime.c index 5dc3e4f..e44e1a8 100644 --- a/frametime.c +++ b/frametime.c @@ -93,14 +93,19 @@ static void init_dlsym() { void *dlsym(void *handle, const char *symbol) { // High evil wrapping this function. - if (symbol && !strcmp(symbol, "glXGetProcAddressARB")) - return glXGetProcAddressARB; - if (symbol && !strcmp(symbol, "glXSwapBuffers")) - return glXSwapBuffers; -#ifndef NO_EGL - if (symbol && !strcmp(symbol, "eglSwapBuffers")) - return eglSwapBuffers; -#endif +# define RETURN_ON_MATCH(function_name) \ + do { \ + if (symbol && !strcmp(symbol, #function_name)) \ + return function_name; \ + } while (0) + + RETURN_ON_MATCH(glXGetProcAddressARB); + RETURN_ON_MATCH(glXSwapBuffers); +# ifndef NO_EGL + RETURN_ON_MATCH(eglSwapBuffers); +# endif + +# undef RETURN_ON_MATCH if (!real_dlsym) init_dlsym(); From 4490ca6e68b3c3bf794b07d8ab1e8d6dd8d3fb59 Mon Sep 17 00:00:00 2001 From: Elias Vanderstuyft Date: Sun, 2 Oct 2016 17:08:06 +0200 Subject: [PATCH 2/5] Refactor the code by adding some currently unneeded error handling to init(), which will go away with the later patches by using C macros --- frametime.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/frametime.c b/frametime.c index e44e1a8..476a51d 100644 --- a/frametime.c +++ b/frametime.c @@ -154,13 +154,26 @@ static void init() { init_dlsym(); dlerror(); - real_glXSwapBuffers = real_dlsym(RTLD_NEXT, "glXSwapBuffers"); + real_glXGetProcAddressARB = real_dlsym(RTLD_NEXT, "glXGetProcAddressARB"); const char *err = dlerror(); if (err) die(PREFIX "dlsym failed: %s\n", err); - real_glXGetProcAddressARB = real_dlsym(RTLD_NEXT, "glXGetProcAddressARB"); + // If the app loads libs dynamically, the symbol may be NULL. + if (!real_glXGetProcAddressARB) { + void *libgl = dlopen("libGL.so", RTLD_LAZY); + if (!libgl) + die(PREFIX "dynamic libGL failed\n"); + real_glXGetProcAddressARB = real_dlsym(libgl, "glXGetProcAddressARB"); + } + + dlerror(); + real_glXSwapBuffers = real_dlsym(RTLD_NEXT, "glXSwapBuffers"); + + err = dlerror(); + if (err) + die(PREFIX "dlsym failed: %s\n", err); // If the app loads libs dynamically, the symbol may be NULL. if (!real_glXSwapBuffers) { @@ -168,7 +181,6 @@ static void init() { if (!libgl) die(PREFIX "dynamic libGL failed\n"); real_glXSwapBuffers = real_dlsym(libgl, "glXSwapBuffers"); - real_glXGetProcAddressARB = real_dlsym(libgl, "glXGetProcAddressARB"); } #ifndef NO_EGL From acf1625fabae10825036c5eb329bccffeccdd27f Mon Sep 17 00:00:00 2001 From: Elias Vanderstuyft Date: Sun, 2 Oct 2016 17:20:29 +0200 Subject: [PATCH 3/5] Error out when symbol was not found in dynamic library --- frametime.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/frametime.c b/frametime.c index 476a51d..633489e 100644 --- a/frametime.c +++ b/frametime.c @@ -166,6 +166,8 @@ static void init() { if (!libgl) die(PREFIX "dynamic libGL failed\n"); real_glXGetProcAddressARB = real_dlsym(libgl, "glXGetProcAddressARB"); + if (!real_glXGetProcAddressARB) + die(PREFIX "Failed to find glXGetProcAddressARB in libGL.so\n"); } dlerror(); @@ -181,6 +183,8 @@ static void init() { if (!libgl) die(PREFIX "dynamic libGL failed\n"); real_glXSwapBuffers = real_dlsym(libgl, "glXSwapBuffers"); + if (!real_glXSwapBuffers) + die(PREFIX "Failed to find glXSwapBuffers in libGL.so\n"); } #ifndef NO_EGL @@ -197,6 +201,8 @@ static void init() { if (!libegl) die(PREFIX "dynamic libEGL failed\n"); real_eglSwapBuffers = real_dlsym(libegl, "eglSwapBuffers"); + if (!real_eglSwapBuffers) + die(PREFIX "Failed to find eglSwapBuffers in libEGL.so\n"); } #endif } From dda8962dadcae8ee511a87d48752814b520531d4 Mon Sep 17 00:00:00 2001 From: Elias Vanderstuyft Date: Sun, 30 Oct 2016 22:39:13 +0100 Subject: [PATCH 4/5] Remove code duplication: add load_func_from_next() and load_func_from_lib() and use them in init() --- frametime.c | 87 +++++++++++++++++++++-------------------------------- 1 file changed, 35 insertions(+), 52 deletions(-) diff --git a/frametime.c b/frametime.c index 633489e..26312fe 100644 --- a/frametime.c +++ b/frametime.c @@ -137,6 +137,36 @@ EGLBoolean eglSwapBuffers(EGLDisplay display, EGLSurface surface) { } #endif +static void *load_func_from_next(const char * const function_name) { + + dlerror(); + void * const real_function = real_dlsym(RTLD_NEXT, function_name); + + const char * const err = dlerror(); + if (err) + die(PREFIX "dlsym failed: %s\n", err); + + return real_function; +} + +static void *load_func_from_lib(const char * const function_name, const char * const library_name) { + + // If possible, load from the next object such that we don't have to load the lib. + void *real_function = load_func_from_next(function_name); + + if (!real_function) { + void * const lib = dlopen(library_name, RTLD_LAZY); + if (!lib) + die(PREFIX "Failed to open %s\n", library_name); + + real_function = real_dlsym(lib, function_name); + if (!real_function) + die(PREFIX "Failed to find %s in %s\n", function_name, library_name); + } + + return real_function; +} + static void init() __attribute__((constructor)); static void deinit() __attribute__((destructor)); @@ -153,58 +183,11 @@ static void init() { if (!real_dlsym) init_dlsym(); - dlerror(); - real_glXGetProcAddressARB = real_dlsym(RTLD_NEXT, "glXGetProcAddressARB"); - - const char *err = dlerror(); - if (err) - die(PREFIX "dlsym failed: %s\n", err); - - // If the app loads libs dynamically, the symbol may be NULL. - if (!real_glXGetProcAddressARB) { - void *libgl = dlopen("libGL.so", RTLD_LAZY); - if (!libgl) - die(PREFIX "dynamic libGL failed\n"); - real_glXGetProcAddressARB = real_dlsym(libgl, "glXGetProcAddressARB"); - if (!real_glXGetProcAddressARB) - die(PREFIX "Failed to find glXGetProcAddressARB in libGL.so\n"); - } - - dlerror(); - real_glXSwapBuffers = real_dlsym(RTLD_NEXT, "glXSwapBuffers"); - - err = dlerror(); - if (err) - die(PREFIX "dlsym failed: %s\n", err); - - // If the app loads libs dynamically, the symbol may be NULL. - if (!real_glXSwapBuffers) { - void *libgl = dlopen("libGL.so", RTLD_LAZY); - if (!libgl) - die(PREFIX "dynamic libGL failed\n"); - real_glXSwapBuffers = real_dlsym(libgl, "glXSwapBuffers"); - if (!real_glXSwapBuffers) - die(PREFIX "Failed to find glXSwapBuffers in libGL.so\n"); - } - -#ifndef NO_EGL - dlerror(); - real_eglSwapBuffers = real_dlsym(RTLD_NEXT, "eglSwapBuffers"); - - err = dlerror(); - if (err) - die(PREFIX "dlsym failed: %s\n", err); - - // If the app loads libs dynamically, the symbol may be NULL. - if (!real_eglSwapBuffers) { - void *libegl = dlopen("libEGL.so", RTLD_LAZY); - if (!libegl) - die(PREFIX "dynamic libEGL failed\n"); - real_eglSwapBuffers = real_dlsym(libegl, "eglSwapBuffers"); - if (!real_eglSwapBuffers) - die(PREFIX "Failed to find eglSwapBuffers in libEGL.so\n"); - } -#endif + real_glXGetProcAddressARB = load_func_from_lib("glXGetProcAddressARB", "libGL.so"); + real_glXSwapBuffers = load_func_from_lib("glXSwapBuffers", "libGL.so"); +# ifndef NO_EGL + real_eglSwapBuffers = load_func_from_lib("eglSwapBuffers", "libEGL.so"); +# endif } static void deinit() { From 00817ec378ac5adc75f7a68cab5bf708e8b0779d Mon Sep 17 00:00:00 2001 From: Elias Vanderstuyft Date: Tue, 15 Nov 2016 21:20:08 +0100 Subject: [PATCH 5/5] Add support for OpenGL timer queries, and update README accordingly --- README.asciidoc | 43 ++++++++++++++ frametime.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 188 insertions(+), 2 deletions(-) diff --git a/README.asciidoc b/README.asciidoc index b99156c..f3c3dd8 100644 --- a/README.asciidoc +++ b/README.asciidoc @@ -49,3 +49,46 @@ Min/avg/max FPS: 167.926 / 1598.4 / 6024.1 50/90/95/99 percentiles (us): 410 / 434 / 589 / 5018 ---- + + +Accuracy +-------- + +When available, https://www.opengl.org/registry/specs/ARB/timer_query.txt[OpenGL timer query objects] +are used to provide more accurate results: +for each frame, the GPU timestamp -- at the moment when all GPU commands are finished -- +is used to calculate the frametime. + +When not available, the CPU timestamp -- just before the asynchronous swap buffers function is called -- +is used to calculate the frametime. +To force using CPU timestamps, set the environment variable `LIBFRAMETIME_TIMERQUERIES_DISABLED=1`. + +The improvement of using GPU timestamps over CPU timestamps is quite dramatic, +the former resembles the visual experience much more. +Now the frametimes can be used to perform variance analysis more reliably. +For reference, below figure compares both methods in a single run of the Unigine Valley benchmark. + +image::https://cdn.pbrd.co/images/rjm9xX9Bm.png[Figure which shows the asynchronous nature of swap buffers when using CPU timestamps results in a lot of jitter compared to using GPU timestamps] + +*Warning*: some driver implementations have a bug where the drift of the GPU timer is significant (up to (-)40ms per second). +Currently, Intel Broadwell and Skylake on Mesa are known to be affected by this problem. +Run the `arb_timer_query-timestamp-get` test of https://piglit.freedesktop.org/[the piglit framework] +to see whether you're affected. +Alternatively, just perform a run of a benchmark with timer queries enabled vs disabled, +and compare avg frametimes to see whether there's a significant difference. +If you're affected, but the drift is consistent between runs, +you can still use the results after having applied following scaling factor +(positive drift means GPU timer is slower than CPU timer; CPU timer is considered correct): +---- +corrected_frametime = reported_frametime * 1000000000ns / (drift + 1000000000ns) +---- + +The use of timer queries should not degrade performance of the to-be-benchmarked application. +If you think it does, set the environment variable `LIBFRAMETIME_TIMERQUERIES_DEBUG=1` +and verify the following is not printed to stderr during the benchmark: +---- +libframetime: Warning: timer query result not yet available +---- +If it is printed, please report this as a GitHub issue on this project page, +in that case it would also be interesting to run +https://github.com/Eliasvan/find-min-required-number-of-timer-queries[find-min-required-number-of-timer-queries]. diff --git a/frametime.c b/frametime.c index 26312fe..211d012 100644 --- a/frametime.c +++ b/frametime.c @@ -1,6 +1,7 @@ /* libframetime, a frame time dumper for all GL applications Copyright (C) 2013 Lauri Kasanen +Copyright (C) 2016 Elias Vanderstuyft This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by @@ -47,7 +48,32 @@ static FILE *f = NULL; static struct timeval oldtime; static u8 firstdone = 0; -static void timing() { +#ifdef GL_ARB_timer_query +static PFNGLGETSTRINGIPROC glGetStringi = NULL; +static PFNGLGETQUERYIVPROC glGetQueryiv = NULL; +static PFNGLGENQUERIESPROC glGenQueries = NULL; +static PFNGLQUERYCOUNTERPROC glQueryCounter = NULL; +static PFNGLGETQUERYOBJECTIVPROC glGetQueryObjectiv = NULL; +static PFNGLGETQUERYOBJECTUI64VPROC glGetQueryObjectui64v = NULL; + +static u8 timerqueries_disabled = 0; +static u8 timerqueries_debug = 0; +/* +The following constant must be set high enough such that +after TIMERQUERIES_NUM frames the first query result should be available. +Tested with Skylake and nouveau GTX 760; +run with LIBFRAMETIME_TIMERQUERIES_DEBUG=1 to verify this works (= stderr clear) on your system. +*/ +#define TIMERQUERIES_NUM 4 +static GLuint timerqueries[TIMERQUERIES_NUM]; +static u8 timerqueries_idxinit = 0, timerqueries_idxback = 0, timerqueries_idxfront = 1; +static GLuint64 timerqueries_oldtime; +static GLint timerqueries_numbits = 0; +#else +static const u8 timerqueries_disabled = 1; +#endif + +static void timing_cpuclock() { if (!firstdone) { gettimeofday(&oldtime, NULL); @@ -66,6 +92,115 @@ static void timing() { } } +#ifdef GL_ARB_timer_query +static int init_timerqueries() { + +# define INIT_GLEXT_AND_RETURN_ON_NULL(function_type, function_name) \ + do { \ + function_name = (function_type)real_glXGetProcAddressARB((GLubyte *)#function_name); \ + if (!function_name) { \ + fprintf(stderr, PREFIX "Error: function pointer to %s is NULL\n", #function_name); \ + return -1; \ + } \ + } while (0) + + // If this function is not available (v3.0+), GL_ARB_timer_query isn't either (v3.2+). + INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLGETSTRINGIPROC, glGetStringi); + + const GLubyte *extension; + int i = 0; + while ((extension = glGetStringi(GL_EXTENSIONS, i++)) && strcmp((const char *)extension, "GL_ARB_timer_query")); + if (!extension) { + fprintf(stderr, PREFIX "Error: extension GL_ARB_timer_query is not available\n"); + return -1; + } + + INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLGETQUERYIVPROC, glGetQueryiv); + INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLGENQUERIESPROC, glGenQueries); + INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLQUERYCOUNTERPROC, glQueryCounter); + INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLGETQUERYOBJECTIVPROC, glGetQueryObjectiv); + INIT_GLEXT_AND_RETURN_ON_NULL(PFNGLGETQUERYOBJECTUI64VPROC, glGetQueryObjectui64v); + +# undef INIT_GLEXT_AND_RETURN_ON_NULL + + glGetQueryiv(GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &timerqueries_numbits); + if (timerqueries_debug) + printf(PREFIX "Number of bits of timer query counters: %d\n", timerqueries_numbits); + if (timerqueries_numbits < 30) { + // According to the spec, implementations with less than 30 bits (frame of ~ 1s) are not allowed. + fprintf(stderr, PREFIX "Error: number of bits of timer query counters (%d) is too small\n", timerqueries_numbits); + return -1; + } + + glGenQueries(TIMERQUERIES_NUM, timerqueries); + return 0; +} + +static void timing_timerqueries() { + + if (!timerqueries_idxinit) { + // Initialize timer queries now, because we now have a valid OpenGL context. + if (init_timerqueries()) { + // Degrade gracefully upon failure by using timing_cpuclock() instead. + fprintf(stderr, PREFIX "Failed to init timer queries, therefore using cpu clock instead\n"); + timerqueries_disabled = 1; + return; + } + } + + // Schedule a record of the GPU timestamp after the GPU commands submitted for this frame (i.e. at idx back). + glQueryCounter(timerqueries[timerqueries_idxback], GL_TIMESTAMP); + + if (timerqueries_idxinit < TIMERQUERIES_NUM - 1) { + // Don't query any result yet, since not all timer queries are scheduled yet. + ++timerqueries_idxinit; + } else { + if (timerqueries_debug) { + // Verify assumption "result is available"; if not true, querying the result would stall the pipeline. + GLint available = 0; + glGetQueryObjectiv(timerqueries[timerqueries_idxfront], GL_QUERY_RESULT_AVAILABLE, &available); + if (!available) + fprintf(stderr, PREFIX "Warning: timer query result not yet available, " + "TIMERQUERIES_NUM (%d) is too low => please report this issue!\n", TIMERQUERIES_NUM); + } + + // Query the oldest result (i.e. at idx front), we assume the result is available by now. + GLuint64 timerqueries_newtime = 0; + glGetQueryObjectui64v(timerqueries[timerqueries_idxfront], GL_QUERY_RESULT, &timerqueries_newtime); + + if (timerqueries_idxinit < TIMERQUERIES_NUM) { + // timerqueries_oldtime is not yet valid, so we can't calculate a delta yet. + timerqueries_idxinit = TIMERQUERIES_NUM; + } else { + // Calculate delta, consider timer query bits, and perform integer rounding to convert from ns to us. + const GLuint64 nsec = (timerqueries_newtime - timerqueries_oldtime) & + (timerqueries_numbits < 64 ? + ((GLuint64) 1 << timerqueries_numbits) - 1 : (GLuint64) -1); + const GLuint64 usec = (nsec + 500) / 1000; + + fprintf(f, "Frametime %" PRIu64 " us\n", usec); + fflush(f); + } + timerqueries_oldtime = timerqueries_newtime; + } + + // 'Swap'/rotate idx pointers to timer queries. + timerqueries_idxback = timerqueries_idxfront; + timerqueries_idxfront = (timerqueries_idxfront + 1) % TIMERQUERIES_NUM; +} +#endif + +static void timing() { + +# ifdef GL_ARB_timer_query + if (!timerqueries_disabled) + timing_timerqueries(); +# endif + + if (timerqueries_disabled) + timing_cpuclock(); +} + static void die(const char fmt[], ...) { va_list ap; @@ -172,7 +307,7 @@ static void deinit() __attribute__((destructor)); static void init() { - const char * const env = getenv("LIBFRAMETIME_FILE"); + const char *env = getenv("LIBFRAMETIME_FILE"); const char * const name = env ? env : "/tmp/libframetime.out"; f = fopen(name, "w"); @@ -180,6 +315,14 @@ static void init() { if (!f) die(PREFIX "Failed to open %s for writing\n", name); +# ifdef GL_ARB_timer_query + env = getenv("LIBFRAMETIME_TIMERQUERIES_DISABLED"); + timerqueries_disabled = (env && !strcmp(env, "1")); + + env = getenv("LIBFRAMETIME_TIMERQUERIES_DEBUG"); + timerqueries_debug = (env && !strcmp(env, "1")); +# endif + if (!real_dlsym) init_dlsym();