Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/collections/id_map/direct_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ impl<E: Eq + Hash + From<u64> + Into<u64> + Copy, I: From<u64> + Into<u64> + Cop

#[inline(always)]
pub fn insert_with_new_id(&mut self, internal_id: I) -> Option<E> {
let higher_order_bits: u64 = self.generate_id() as u64;
let higher_order_bits = self.generate_id() as u64;
// Use random number for higher order bits and the offset for lower order bits.
let external_id: u64 = higher_order_bits << 32 | <I as Into<u64>>::into(internal_id);
let external_id = higher_order_bits << 32 | <I as Into<u64>>::into(internal_id);
Some(external_id.into())
}

Expand All @@ -39,7 +39,7 @@ impl<E: Eq + Hash + From<u64> + Into<u64> + Copy, I: From<u64> + Into<u64> + Cop
}

fn mask_id(external_id: &E) -> I {
let masked_id: u32 = <E as Into<u64>>::into(*external_id) as u32;
let masked_id = <E as Into<u64>>::into(*external_id) as u32;
<I as From<u64>>::from(masked_id as u64)
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/collections/id_map/indirect_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ impl<E: Eq + Hash + From<u64> + Into<u64> + Copy, I: From<u64> + Into<u64> + Cop
pub fn insert_with_new_id(&mut self, internal_id: I) -> Option<E> {
// Otherwise, allocate a new external id.
for _ in 0..MAX_RETRIES_ID_ALLOC {
let external_id: E = E::from(self.generate_id());
let external_id = E::from(self.generate_id());
if let Entry::Vacant(e) = self.ids.entry(external_id) {
e.insert(internal_id);
return Some(external_id);
Expand Down Expand Up @@ -76,7 +76,7 @@ impl<E: Eq + Hash + From<u32> + Into<u32> + Copy, I: From<u32> + Into<u32> + Cop
/// until we find an unused id (up to a maximum number of tries).
pub fn insert_with_new_id(&mut self, internal_id: I) -> Option<E> {
for _ in 0..MAX_RETRIES_ID_ALLOC {
let external_id: E = E::from(self.generate_id());
let external_id = E::from(self.generate_id());
if let Entry::Vacant(e) = self.ids.entry(external_id) {
e.insert(internal_id);
return Some(external_id);
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ macro_rules! expect_ok {
macro_rules! timer {
($name:expr) => {
#[cfg(feature = "profiler")]
let _guard = $crate::perftools::profiler::PROFILER.with(|p| p.clone().create_and_enter_sync_scope($name));
let _guard = $crate::perftools::profiler::PROFILER.with(|p| p.clone().enter_sync($name));
};
}

Expand Down
118 changes: 60 additions & 58 deletions src/perftools/profiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use ::std::{
ops::{Deref, DerefMut},
pin::Pin,
thread,
time::{Duration, SystemTime},
time::SystemTime,
};

//======================================================================================================================
Expand All @@ -45,10 +45,10 @@ thread_local!(
/// thread-local instance of `Profiler` in [`PROFILER`](constant.PROFILER.html), so it is not possible to manually
/// create an instance of `Profiler`.
pub struct Profiler {
root_scopes: Vec<SharedScope>,
current_sync_scope: Option<SharedScope>,
current_async_scope: Option<SharedScope>,
perf_callback: Option<demi_callback_t>,
roots: Vec<SharedScope>,
current_sync: Option<SharedScope>,
current_async: Option<SharedScope>,
callback: Option<demi_callback_t>,
}

#[derive(Clone)]
Expand All @@ -62,8 +62,8 @@ pub fn reset() {
PROFILER.with(|p| p.clone().reset());
}

pub fn set_callback(perf_callback: demi_callback_t) {
PROFILER.with(|p| p.clone().set_callback(perf_callback));
pub fn set_callback(callback: demi_callback_t) {
PROFILER.with(|p| p.clone().set_callback(callback));
}

/// Create a special async scopes that is rooted because it does not run under other scopes.
Expand All @@ -77,48 +77,47 @@ impl Profiler {
let thread_id = thread::current().id();
let ns_per_cycle = Self::measure_ns_per_cycle();

// Header row
writeln!(
out,
"call_depth,thread_id,function_name,num_calls,cycles_per_call,nanoseconds_per_call,total_duration,total_duration_exclusive"
)?;
const HEADER_ROW: &'static str = "call_depth,thread_id,function_name,num_calls,cycles_per_call,nanoseconds_per_call,total_duration,total_duration_exclusive";
writeln!(out, "{}", HEADER_ROW)?;

for s in self.root_scopes.iter() {
s.write_recursive(out, thread_id, 0, ns_per_cycle)?;
for scope in self.roots.iter() {
scope.write_recursive(out, thread_id, 0, ns_per_cycle)?;
}

out.flush()
}

fn measure_ns_per_cycle() -> f64 {
let start: SystemTime = SystemTime::now();
let start_cycle: u64 = unsafe { x86::time::rdtscp().0 };
let start_ts = SystemTime::now();
let start_cycles = unsafe { x86::time::rdtscp().0 };

test::black_box((0..10000).fold(0, |old, new| old ^ new)); // dummy calculations for measurement
// dummy calculations for measurement
test::black_box((0..10000).fold(0, |old, new| old ^ new));

let end_cycle: u64 = unsafe { x86::time::rdtscp().0 };
let since_the_epoch: Duration = SystemTime::now().duration_since(start).expect("Time went backwards");
let in_ns: u64 = since_the_epoch.as_secs() * 1_000_000_000 + since_the_epoch.subsec_nanos() as u64;
let end_cycles = unsafe { x86::time::rdtscp().0 };
let duration = SystemTime::now().duration_since(start_ts).expect("Time went backwards");
let duration_in_ns = duration.as_secs() * 1_000_000_000 + duration.subsec_nanos() as u64;
let ns_per_cycle = duration_in_ns as f64 / (end_cycles - start_cycles) as f64;

in_ns as f64 / (end_cycle - start_cycle) as f64
ns_per_cycle
}
}

impl SharedProfiler {
pub fn set_callback(&mut self, perf_callback: demi_callback_t) {
self.perf_callback = Some(perf_callback)
pub fn set_callback(&mut self, callback: demi_callback_t) {
self.callback = Some(callback)
}

fn find_or_create_new_scope(
scopes: &mut Vec<SharedScope>,
name: &'static str,
parent_scope: Option<SharedScope>,
perf_callback: Option<demi_callback_t>,
parent: Option<SharedScope>,
callback: Option<demi_callback_t>,
) -> SharedScope {
match scopes.iter().find(|s| s.name == name) {
Some(existing_scope) => existing_scope.clone(),
Some(s) => s.clone(),
None => {
let new_scope: SharedScope = SharedScope::new(name, parent_scope, perf_callback);
let new_scope = SharedScope::new(name, parent, callback);
scopes.push(new_scope.clone());
new_scope
},
Expand All @@ -128,57 +127,60 @@ impl SharedProfiler {
/// Create and enter a syncronous scope. Returns a [`Guard`](struct.Guard.html) that should be dropped upon
/// leaving the scope. Usually, this method will be called by the [`profile`](macro.profile.html) macro,
/// so it does not need to be used directly.
fn create_scope(&mut self, current_scope: &mut Option<SharedScope>, name: &'static str) -> SharedScope {
let perf_callback: Option<demi_callback_t> = self.perf_callback;
match current_scope {
Some(current_scope) => {
let parent_scope: Option<SharedScope> = Some(current_scope.clone());
Self::find_or_create_new_scope(&mut current_scope.children_scopes, name, parent_scope, perf_callback)
fn create_scope(&mut self, current: &mut Option<SharedScope>, name: &'static str) -> SharedScope {
let callback = self.callback;
match current {
Some(current) => {
let parent = Some(current.clone());
Self::find_or_create_new_scope(&mut current.children, name, parent, callback)
},
None => Self::find_or_create_new_scope(&mut self.root_scopes, name, None, perf_callback),
None => Self::find_or_create_new_scope(&mut self.roots, name, None, callback),
}
}

pub fn create_and_enter_sync_scope(&mut self, name: &'static str) -> SyncScopeGuard {
let mut current_scope: Option<SharedScope> = self.current_sync_scope.clone();
let scope = self.create_scope(&mut current_scope, name);
self.current_sync_scope = Some(scope.clone());
scope.enter_sync_scope()
pub fn enter_sync(&mut self, name: &'static str) -> SyncScopeGuard {
let mut current = self.current_sync.clone();
let scope = self.create_scope(&mut current, name);
self.current_sync = Some(scope.clone());
scope.enter_sync()
}

#[inline]
fn leave_sync_scope(&mut self, duration: u64) {
fn leave_sync(&mut self, duration: u64) {
// Note that we could now still be anywhere in the previous profiling
// tree, so we can not simply reset `self.current`. However, as the
// frame comes to an end we will eventually leave a root node, at which
// point `self.current` will be set to `None`.
self.current_sync_scope = if let Some(mut current_scope) = self.current_sync_scope.take() {
current_scope.add_duration(duration);
current_scope.parent_scope.as_ref().cloned()
self.current_sync = if let Some(mut current) = self.current_sync.take() {
current.add_duration(duration);
let parent = current.parent.as_ref().cloned();
parent
} else {
// This should not happen with proper usage.
unreachable!("Called perftools::profiler::leave() while not in any scope");
unreachable!("Called perftools::profiler::leave_sync() while not in any scope");
};
}

pub fn create_and_enter_async_scope(&mut self, name: &'static str) {
let mut current_scope: Option<SharedScope> = self.current_async_scope.clone();
let scope = self.create_scope(&mut current_scope, name);
self.current_async_scope = Some(scope.clone());
pub fn enter_async(&mut self, name: &'static str) {
let mut current = self.current_async.clone();
let scope = self.create_scope(&mut current, name);
self.current_async = Some(scope.clone());
}

#[inline]
fn leave_async_scope(&mut self, duration: u64) {
self.current_async_scope = if let Some(mut current_scope) = self.current_async_scope.take() {
current_scope.add_duration(duration);
current_scope.parent_scope.as_ref().cloned()
fn leave_async(&mut self, duration: u64) {
self.current_async = if let Some(mut current) = self.current_async.take() {
current.add_duration(duration);
let parent = current.parent.as_ref().cloned();
parent
} else {
// This should not happen with proper usage.
unreachable!("Called perftools::profiler::leave() while not in any scope");
unreachable!("Called perftools::profiler::leave_async() while not in any scope");
};
}

fn reset(&mut self) {
self.root_scopes.clear();
self.roots.clear();
}
}

Expand All @@ -203,10 +205,10 @@ impl DerefMut for SharedProfiler {
impl Default for SharedProfiler {
fn default() -> Self {
Self(SharedObject::new(Profiler {
root_scopes: Vec::new(),
current_sync_scope: None,
current_async_scope: None,
perf_callback: None,
roots: Vec::new(),
current_sync: None,
current_async: None,
callback: None,
}))
}
}
Expand Down
56 changes: 26 additions & 30 deletions src/perftools/profiler/scope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ use ::std::{
/// profiled blocks.
pub struct Scope {
pub name: &'static str,
pub parent_scope: Option<SharedScope>,
pub children_scopes: Vec<SharedScope>,
pub parent: Option<SharedScope>,
pub children: Vec<SharedScope>,
/// Callback to report statistics. If this is set to None, we collect averages by default.
pub perf_callback: Option<demi_callback_t>,
pub callback: Option<demi_callback_t>,
pub num_calls: usize,
/// In total, how much time has been spent in this scope?
pub duration_sum: u64,
Expand All @@ -57,32 +57,28 @@ pub struct AsyncScope<'a, F: Future> {
//======================================================================================================================

impl SharedScope {
pub fn new(
name: &'static str,
parent_scope: Option<SharedScope>,
perf_callback: Option<demi_callback_t>,
) -> SharedScope {
pub fn new(name: &'static str, parent: Option<SharedScope>, callback: Option<demi_callback_t>) -> SharedScope {
Self(SharedObject::new(Scope {
name,
parent_scope,
children_scopes: Vec::new(),
parent,
children: Vec::new(),
num_calls: 0,
duration_sum: 0,
perf_callback,
callback,
}))
}

/// Enter this scope. Returns a `Guard` instance that should be dropped when leaving the scope.
#[inline]
pub fn enter_sync_scope(&self) -> SyncScopeGuard {
pub fn enter_sync(&self) -> SyncScopeGuard {
SyncScopeGuard::enter()
}

/// Leave this scope. Called automatically by the `Guard` instance.
#[inline]
pub fn add_duration(&mut self, duration: u64) {
if let Some(callback_fn) = self.perf_callback {
callback_fn(self.name.as_ptr() as *const i8, self.name.len() as u32, duration);
if let Some(callback) = self.callback {
callback(self.name.as_ptr() as *const i8, self.name.len() as u32, duration);
} else {
self.num_calls += 1;
// Even though this is extremely unlikely, let's not panic on overflow.
Expand All @@ -91,10 +87,10 @@ impl SharedScope {
}

pub fn compute_exclusive_duration(&self) -> u64 {
let mut children_total_duration: u64 = 0;
let mut children_total_duration = 0;

for s in &self.children_scopes {
children_total_duration += s.duration_sum;
for scope in &self.children {
children_total_duration += scope.duration_sum;
}

self.duration_sum - children_total_duration
Expand All @@ -107,7 +103,7 @@ impl SharedScope {
depth: usize,
ns_per_cycle: f64,
) -> io::Result<()> {
let duration_sum: f64 = self.duration_sum as f64;
let duration_sum = self.duration_sum as f64;

// Write markers.
let mut markers = "+".to_string();
Expand All @@ -126,8 +122,8 @@ impl SharedScope {
self.compute_exclusive_duration(),
)?;

for child_scope in &self.children_scopes {
child_scope.write_recursive(out, thread_id, depth + 1, ns_per_cycle)?;
for child in &self.children {
child.write_recursive(out, thread_id, depth + 1, ns_per_cycle)?;
}

Ok(())
Expand All @@ -143,7 +139,7 @@ impl<'a, F: Future> AsyncScope<'a, F> {
impl SyncScopeGuard {
#[inline]
pub fn enter() -> Self {
let now: u64 = unsafe { x86::time::rdtscp().0 };
let now = unsafe { x86::time::rdtscp().0 };
Self { enter_time: now }
}
}
Expand Down Expand Up @@ -176,24 +172,24 @@ impl<'a, F: Future> Future for AsyncScope<'a, F> {
type Output = F::Output;

fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Self::Output> {
let self_: &mut Self = self.get_mut();
let self_ = self.get_mut();

PROFILER.with(|p| p.clone().create_and_enter_async_scope(self_.name));
let start: u64 = unsafe { x86::time::rdtscp().0 };
PROFILER.with(|p| p.clone().enter_async(self_.name));
let start = unsafe { x86::time::rdtscp().0 };
let result = Future::poll(self_.future.as_mut(), ctx);
let end: u64 = unsafe { x86::time::rdtscp().0 };
let duration: u64 = end - start;
PROFILER.with(|p| p.clone().leave_async_scope(duration));
let end = unsafe { x86::time::rdtscp().0 };
let duration = end - start;
PROFILER.with(|p| p.clone().leave_async(duration));
result
}
}

impl Drop for SyncScopeGuard {
#[inline]
fn drop(&mut self) {
let now: u64 = unsafe { x86::time::rdtscp().0 };
let duration: u64 = now - self.enter_time;
let now = unsafe { x86::time::rdtscp().0 };
let duration = now - self.enter_time;

PROFILER.with(|p| p.clone().leave_sync_scope(duration));
PROFILER.with(|p| p.clone().leave_sync(duration));
}
}
Loading
Loading