subscriber: remove user-space thread local from filters (#578)

## Motivation

Currently, `tracing-subscriber`'s `EnvFilter` type uses a user-space
thread local for storing any entered spans which are relevant to
filters. This was intended to support allowing multiple `EnvFilter`s to
coexist in the same subscriber, which was never implemented. It turns
out this has a noticeable performance impact versus a proper
thread-local.

## Solution 

I replaced the user-space thread local type with a proper thread-local.

## Benchmark Results

(vs master, note that there was some noise in the benchmark environment,
as evidenced by the "regression" in the baseline tests)

```
     Running target/release/deps/filter-56ce39a1aef6b2a0
static/baseline_single_threaded
                        time:   [98.622 ns 101.54 ns 104.78 ns]
                        change: [+2.5820% +6.1667% +9.7976%] (p = 0.00 < 0.05)
                        Performance has regressed.
Found 7 outliers among 100 measurements (7.00%)
  6 (6.00%) high mild
  1 (1.00%) high severe
static/single_threaded  time:   [50.699 ns 51.776 ns 52.896 ns]
                        change: [-2.8933% -1.4141% +0.1262%] (p = 0.07 > 0.05)
                        No change in performance detected.
Found 7 outliers among 100 measurements (7.00%)
  5 (5.00%) high mild
  2 (2.00%) high severe
static/baseline_multithreaded
                        time:   [9.5372 us 9.7739 us 10.025 us]
                        change: [-22.208% -16.587% -10.758%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 6 outliers among 100 measurements (6.00%)
  3 (3.00%) high mild
  3 (3.00%) high severe
static/multithreaded    time:   [9.3605 us 9.6779 us 10.031 us]
                        change: [-31.493% -26.773% -21.761%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) high mild

dynamic/baseline_single_threaded
                        time:   [165.50 ns 166.57 ns 167.68 ns]
                        change: [-10.102% -8.6136% -7.1789%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 4 outliers among 100 measurements (4.00%)
  4 (4.00%) high mild
dynamic/single_threaded time:   [539.01 ns 542.44 ns 546.11 ns]
                        change: [-41.078% -40.104% -39.195%] (p = 0.00 < 0.05)
                        Performance has improved.
dynamic/baseline_multithreaded
                        time:   [9.3314 us 9.7205 us 10.222 us]
                        change: [-45.458% -41.873% -37.948%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 9 outliers among 100 measurements (9.00%)
  4 (4.00%) high mild
  5 (5.00%) high severe
dynamic/multithreaded   time:   [10.095 us 10.295 us 10.520 us]
                        change: [-66.323% -63.666% -61.130%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 11 outliers among 100 measurements (11.00%)
  1 (1.00%) low mild
  7 (7.00%) high mild
  3 (3.00%) high severe
  ```

Signed-off-by: Eliza Weisman <eliza@buoyant.io>
This commit is contained in:
Eliza Weisman 2020-02-11 17:31:53 -08:00 committed by GitHub
parent eb23155d15
commit 263a154e92
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -15,9 +15,8 @@ use crate::{
filter::LevelFilter,
layer::{Context, Layer},
sync::RwLock,
thread,
};
use std::{collections::HashMap, env, error::Error, fmt, str::FromStr};
use std::{cell::RefCell, collections::HashMap, env, error::Error, fmt, str::FromStr};
use tracing_core::{
callsite,
field::Field,
@ -95,9 +94,6 @@ use tracing_core::{
#[cfg_attr(docsrs, doc(cfg(feature = "env-filter")))]
#[derive(Debug)]
pub struct EnvFilter {
// TODO: eventually, this should be exposed by the registry.
scope: thread::Local<Vec<LevelFilter>>,
statics: directive::Statics,
dynamics: directive::Dynamics,
@ -105,6 +101,10 @@ pub struct EnvFilter {
by_cs: RwLock<HashMap<callsite::Identifier, directive::CallsiteMatcher>>,
}
thread_local! {
static SCOPE: RefCell<Vec<LevelFilter>> = RefCell::new(Vec::new());
}
type FieldMap<T> = HashMap<Field, T>;
#[cfg(feature = "smallvec")]
@ -232,7 +232,6 @@ impl EnvFilter {
}
Self {
scope: thread::Local::new(),
statics,
dynamics,
by_id: RwLock::new(HashMap::new()),
@ -277,22 +276,20 @@ impl<S: Subscriber> Layer<S> for EnvFilter {
fn enabled(&self, metadata: &Metadata<'_>, _: Context<'_, S>) -> bool {
let level = metadata.level();
self.scope
.with(|scope| {
for filter in scope.iter() {
if filter >= level {
return true;
}
SCOPE.with(|scope| {
for filter in scope.borrow().iter() {
if filter >= level {
return true;
}
}
// Otherwise, fall back to checking if the callsite is
// statically enabled.
// TODO(eliza): we *might* want to check this only if the `log`
// feature is enabled, since if this is a `tracing` event with a
// real callsite, it would already have been statically enabled...
self.statics.enabled(metadata)
})
.unwrap_or_else(|| self.statics.enabled(metadata))
// Otherwise, fall back to checking if the callsite is
// statically enabled.
// TODO(eliza): we *might* want to check this only if the `log`
// feature is enabled, since if this is a `tracing` event with a
// real callsite, it would already have been statically enabled...
self.statics.enabled(metadata)
})
}
fn new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, _: Context<'_, S>) {
@ -314,13 +311,13 @@ impl<S: Subscriber> Layer<S> for EnvFilter {
// that to allow changing the filter while a span is already entered.
// But that might be much less efficient...
if let Some(span) = try_lock!(self.by_id.read()).get(id) {
self.scope.with(|scope| scope.push(span.level()));
SCOPE.with(|scope| scope.borrow_mut().push(span.level()));
}
}
fn on_exit(&self, id: &span::Id, _: Context<'_, S>) {
if self.cares_about_span(id) {
self.scope.with(|scope| scope.pop());
SCOPE.with(|scope| scope.borrow_mut().pop());
}
}