From 74f764fec2d7a043af47837e492af46775a14028 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Tue, 30 Mar 2021 23:56:45 -0700 Subject: [PATCH 1/3] Processes only need steps for large speedup I measured this as reducing CPU usage from ~78% to ~45%. Almost doubling the speed, which is expected as described below. Tg processes the audio in a series of steps that double in size: 2, 4, 8, and then 16 seconds long. This is the one to four dots shown in the display. Tg starts at step 1 (2 seconds) and goes up, stopping if a step doesn't pass. Data from smaller steps isn't used if a larger step passes. This means when running at a constant step 4 with good signal, CPU usage is almost double what it needs to be, since steps 1-3 are processed and unused and add up to 14 seconds, almost as much as step 4's 16 seconds. Change this to start at the previous iteration's step. With good signal, only step 4 will be processed and CPU usage is cut almost in half. If the previous step fails, it will try smaller steps. If it passes and is not yet the top step, it will try larger steps. End result should end up on the same step as before, but get there sooner. It could be slower if the step drops a lot, e.g. from 4 to 0, but this happens far less often than the step saying the same or nearly the same. To do this, I moved the step logic out of analyze_pa_data() and entirely into compute_update(). analyze_pa_data() will now just processes one step and compute_update() decides which step(s). Previously, analyze_pa_data() did all steps and compute_update() decided which step to actually use. There is a small change to the algorithm. To be good, a step needs to pass a number of changes done in process(). Then there is one more check, of sigma vs period, done in compute_update(). Previously a step didn't need to pass the sigma check and it still counted enough to increase last_tic and show up as a signal dot in the display. But the data wasn't actually used unless it passed to sigma check too. I no longer keep track of "partially" passing steps like this. Either it passes all checks, including the sigma check, or not. last_tic and signal level only count fully passing steps. I see no practical difference in my tests, but I think it could show up with some kind of marginal signal that has a high error in period estimation with the longer steps. --- src/audio.c | 29 ++++++++++------------------- src/computer.c | 45 ++++++++++++++++++++++++++++++++++++--------- src/tg.h | 6 +++++- 3 files changed, 51 insertions(+), 29 deletions(-) diff --git a/src/audio.c b/src/audio.c index 05574fe..d8a86db 100644 --- a/src/audio.c +++ b/src/audio.c @@ -164,7 +164,7 @@ uint64_t get_timestamp(int light) return ts; } -static void fill_buffers(struct processing_buffers *ps, int light) +void fill_buffers(struct processing_buffers *ps, int light) { pthread_mutex_lock(&audio_mutex); uint64_t ts = timestamp; @@ -187,26 +187,17 @@ static void fill_buffers(struct processing_buffers *ps, int light) } } -int analyze_pa_data(struct processing_data *pd, int bph, double la, uint64_t events_from) +/* Returns if buffer was processed ok */ +bool analyze_pa_data(struct processing_data *pd, int step, int bph, double la, uint64_t events_from) { - struct processing_buffers *p = pd->buffers; - fill_buffers(p, pd->is_light); + struct processing_buffers *p = &pd->buffers[step]; - int i; - debug("\nSTART OF COMPUTATION CYCLE\n\n"); - for(i=0; ilast_tic; - p[i].events_from = events_from; - process(&p[i], bph, la, pd->is_light); - if( !p[i].ready ) break; - debug("step %d : %f +- %f\n",i,p[i].period/p[i].sample_rate,p[i].sigma/p[i].sample_rate); - } - if(i) { - pd->last_tic = p[i-1].last_tic; - debug("%f +- %f\n",p[i-1].period/p[i-1].sample_rate,p[i-1].sigma/p[i-1].sample_rate); - } else - debug("---\n"); - return i; + p->last_tic = pd->last_tic; + p->events_from = events_from; + process(p, bph, la, pd->is_light); + debug("step %d : %f +- %f\n", step, p->period/p->sample_rate, p->sigma/p->sample_rate); + + return p->ready; } int analyze_pa_data_cal(struct processing_data *pd, struct calibration_data *cd) diff --git a/src/computer.c b/src/computer.c index 03942d3..938785e 100644 --- a/src/computer.c +++ b/src/computer.c @@ -91,19 +91,45 @@ static void compute_update_cal(struct computer *c) static void compute_update(struct computer *c) { - int signal = analyze_pa_data(c->pdata, c->actv->bph, c->actv->la, c->actv->events_from); - struct processing_buffers *p = c->pdata->buffers; - int i; - for(i=0; i=0 && p[i].sigma > p[i].period / 10000; i--); - if(i>=0) { + struct processing_data *pd = c->pdata; + struct processing_buffers *ps = pd->buffers; + int step = pd->last_step; + + pd->last_step = 0; + /* Do all buffers at once so that all computation interval(s) use the + * same data. Buffers for some intervals will probably not be used, but + * it's not expensive to fill them. Processing is the slow part. */ + fill_buffers(ps, pd->is_light); + + debug("\nSTART OF COMPUTATION CYCLE\n\n"); + unsigned int stepmask = BITMASK(NSTEPS); // Mask of available steps + do { + stepmask &= ~BIT(step); + analyze_pa_data(c->pdata, step, c->actv->bph, c->actv->la, c->actv->events_from); + + if (ps[step].ready && ps[step].sigma < ps[step].period / 10000) { + // Try next step if it's available + if (stepmask & BIT(step+1)) step++; + } else { + // This step didn't pass, try a lesser step + step--; + } + } while(step >= 0 && stepmask & BIT(step)); + + if (step >= 0) { + debug("%f +- %f\n", ps[step].period/ps[step].sample_rate, ps[step].sigma/ps[step].sample_rate); + pd->last_tic = ps[step].last_tic; + pd->last_step = step; + if(c->actv->pb) pb_destroy_clone(c->actv->pb); - c->actv->pb = pb_clone(&p[i]); + c->actv->pb = pb_clone(&ps[step]); c->actv->is_old = 0; - c->actv->signal = i == NSTEPS-1 && p[i].amp < 0 ? signal-1 : signal; + /* Signal's range is 0 to NSTEPS, while step is -1 to NSTEPS-1, i.e. signal = step+1 */ + c->actv->signal = step == NSTEPS-1 && ps[step].amp < 0 ? step : step+1; } else { + debug("---\n"); c->actv->is_old = 1; - c->actv->signal = -signal; + c->actv->signal = 0; } } @@ -251,6 +277,7 @@ struct computer *start_computer(int nominal_sr, int bph, double la, int cal, int pd->buffers = p; pd->last_tic = 0; pd->is_light = light; + pd->last_step = 0; struct calibration_data *cd = malloc(sizeof(struct calibration_data)); setup_cal_data(cd); diff --git a/src/tg.h b/src/tg.h index 789f66c..04f3e11 100644 --- a/src/tg.h +++ b/src/tg.h @@ -75,6 +75,8 @@ #endif #define UNUSED(X) (void)(X) +#define BIT(n) (1u << (n)) +#define BITMASK(n) ((1u << (n)) - 1u) /* algo.c */ struct processing_buffers { @@ -122,13 +124,15 @@ int process_cal(struct processing_buffers *p, struct calibration_data *cd); struct processing_data { struct processing_buffers *buffers; uint64_t last_tic; + int last_step; //!< Guess of step (buffers index) to try first, based on last iteration int is_light; }; int start_portaudio(int *nominal_sample_rate, double *real_sample_rate); int terminate_portaudio(); uint64_t get_timestamp(int light); -int analyze_pa_data(struct processing_data *pd, int bph, double la, uint64_t events_from); +void fill_buffers(struct processing_buffers *ps, int light); +bool analyze_pa_data(struct processing_data *pd, int step, int bph, double la, uint64_t events_from); int analyze_pa_data_cal(struct processing_data *pd, struct calibration_data *cd); void set_audio_light(bool light); From 87e06226bc5b945ab3fa242ff6f6bc8fec7bbc23 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Thu, 1 Apr 2021 16:15:57 -0700 Subject: [PATCH 2/3] Draw partial signal dot for no amplitude Previously max signal level (4 dots) but with no amplitude measured was counted as signal level 3. But level 3 or lower with no amplitude sill counts as the same level. Have compute_update() no longer do this signal level adjustment and just report the level used, which indicates the averaging interval. The dot graphic will now indicate "no amplitude" by using a hollow dot for the final signal level's dot. This way the number of dots always shows the averaging interval and a hollow dot always shows that the signal is too poor to measure amplitude. --- src/computer.c | 2 +- src/output_panel.c | 24 +++++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/computer.c b/src/computer.c index 938785e..3f86ea3 100644 --- a/src/computer.c +++ b/src/computer.c @@ -125,7 +125,7 @@ static void compute_update(struct computer *c) c->actv->pb = pb_clone(&ps[step]); c->actv->is_old = 0; /* Signal's range is 0 to NSTEPS, while step is -1 to NSTEPS-1, i.e. signal = step+1 */ - c->actv->signal = step == NSTEPS-1 && ps[step].amp < 0 ? step : step+1; + c->actv->signal = step+1; } else { debug("---\n"); c->actv->is_old = 1; diff --git a/src/output_panel.c b/src/output_panel.c index 0e7fc8b..835e4f8 100644 --- a/src/output_panel.c +++ b/src/output_panel.c @@ -112,11 +112,20 @@ static double amplitude_to_time(double lift_angle, double amp) return asin(lift_angle / (2 * amp)) / M_PI; } -static double draw_watch_icon(cairo_t *c, int signal, int happy, int light) +/** Draw the watch graphic that has status info. + * + * @param[in,out] c Cairo context to use. + * @param signal Signal level, i.e. dots, 0 to NSTEPS inclusive. + * @param partial Specified signal level is only partially achieved. + * @param happy Green happy face or red frowny face. + * @param light Indicate light sampling mode. + * @return Y coodinate of top margin. + */ + +static double draw_watch_icon(cairo_t *c, int signal, bool partial, bool happy, bool light) { - happy = !!happy; - cairo_set_line_width(c,3); - cairo_set_source(c,happy?green:red); + cairo_set_line_width(c, 3); + cairo_set_source(c, happy ? green : red); cairo_move_to(c, OUTPUT_WINDOW_HEIGHT * 0.5, OUTPUT_WINDOW_HEIGHT * 0.5); cairo_line_to(c, OUTPUT_WINDOW_HEIGHT * 0.75, OUTPUT_WINDOW_HEIGHT * (0.75 - 0.5*happy)); cairo_move_to(c, OUTPUT_WINDOW_HEIGHT * 0.5, OUTPUT_WINDOW_HEIGHT * 0.5); @@ -126,7 +135,7 @@ static double draw_watch_icon(cairo_t *c, int signal, int happy, int light) cairo_stroke(c); int l = OUTPUT_WINDOW_HEIGHT * 0.8 / (2*NSTEPS - 1); int i; - cairo_set_line_width(c,1); + cairo_set_line_width(c, 1); for(i = 0; i < signal; i++) { cairo_move_to(c, OUTPUT_WINDOW_HEIGHT + 0.5*l, OUTPUT_WINDOW_HEIGHT * 0.9 - 2*i*l); cairo_line_to(c, OUTPUT_WINDOW_HEIGHT + 1.5*l, OUTPUT_WINDOW_HEIGHT * 0.9 - 2*i*l); @@ -134,7 +143,7 @@ static double draw_watch_icon(cairo_t *c, int signal, int happy, int light) cairo_line_to(c, OUTPUT_WINDOW_HEIGHT + 0.5*l, OUTPUT_WINDOW_HEIGHT * 0.9 - (2*i+1)*l); cairo_line_to(c, OUTPUT_WINDOW_HEIGHT + 0.5*l, OUTPUT_WINDOW_HEIGHT * 0.9 - 2*i*l); cairo_stroke_preserve(c); - cairo_fill(c); + if (i < signal-1 || !partial) cairo_fill(c); } if(light) { int l = OUTPUT_WINDOW_HEIGHT * 0.15; @@ -194,7 +203,8 @@ static gboolean output_draw_event(GtkWidget *widget, cairo_t *c, struct output_p struct processing_buffers *p = snst->pb; int old = snst->is_old; - double x = draw_watch_icon(c,snst->signal,snst->calibrate ? snst->signal==NSTEPS : snst->signal, snst->is_light); + double x = draw_watch_icon(c, snst->signal, snst->amp <= 0, + snst->signal >= (snst->calibrate ? NSTEPS : 1), snst->is_light); cairo_text_extents_t extents; From cbd3a00c4328c28fbae4043cd14ce70f80bb2546 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Sun, 18 Apr 2021 16:43:49 -0700 Subject: [PATCH 3/3] Change one sample sigma estimation to 0 When there is just one period in the processing buffer it is not possible to calculate the sample standard deviation (sigma). In this case, the period value was being used as the sigma estimate, which effectively gives a huge sigma when there is 1 period in the buffer. This results in the processing buffer being rejected as bad and a larger buffer, which would have multiple periods, is never tried. One period per buffer would happen with a combination of long period and short buffer, e.g. a small BPH in light mode, since light mode uses buffers half as long as normal mode. Use 0 as the sigma value for 1 sample. This means the buffer will pass the sigma check and a larger buffer will be tried. --- src/algo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/algo.c b/src/algo.c index 1c744cb..fa52137 100644 --- a/src/algo.c +++ b/src/algo.c @@ -452,7 +452,7 @@ static int compute_period(struct processing_buffers *b, int bph) if(count > 1) b->sigma = sqrt((sq_sum - count * estimate * estimate)/ (count-1)); else - b->sigma = b->period; + b->sigma = 0; // No std. dev. estimate possible with just 1 sample return 0; }