From 8e7ed8ae21e448c664d42b48091472d86bbbc2c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Ci=C4=99=C5=BCarkiewicz?= Date: Fri, 4 Nov 2016 21:48:34 -0700 Subject: [PATCH 1/4] Add some benchmarks. To be moved into separate repo. --- benches/latency.rs | 105 +++++++++++++++++++++++++++++++++++++++++++++ benches/mio-ops.rs | 57 ++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 benches/latency.rs create mode 100644 benches/mio-ops.rs diff --git a/benches/latency.rs b/benches/latency.rs new file mode 100644 index 000000000..a4b91bb5b --- /dev/null +++ b/benches/latency.rs @@ -0,0 +1,105 @@ +#![feature(test)] + +extern crate test; +extern crate futures; +#[macro_use] +extern crate tokio_core; + +use std::io; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::net::SocketAddr; + +use futures::{Future, Poll}; +use tokio_core::net::UdpSocket; +use tokio_core::reactor::Core; + +use test::Bencher; +use std::thread; +use std::time::Duration; + +/// UDP echo server +struct Server { + socket : UdpSocket, + buf : Vec, + to_send : Option<(usize, SocketAddr)>, + stop : Arc, +} + +impl Server { + fn new(s : UdpSocket, stop : Arc) -> Self { + Server { + socket: s, + to_send: None, + buf: vec![0u8; 1600], + stop: stop, + } + } +} + +impl Future for Server { + type Item = (); + type Error = io::Error; + + fn poll(&mut self) -> Poll<(), io::Error> { + loop { + if self.stop.load(Ordering::SeqCst) { + return Ok(futures::Async::Ready(())) + } + + if let Some((size, peer)) = self.to_send.take() { + match self.socket.send_to(&self.buf[..size], &peer) { + Err(e) => { + self.to_send = Some((size, peer)); + return try_nb!(Err(e)); + }, + Ok(_) => { } + } + } + + self.to_send = Some( + try_nb!(self.socket.recv_from(&mut self.buf)) + ); + } + } +} +#[bench] +fn udp_echo_latency(b: &mut Bencher) { + let server_addr= "127.0.0.1:7398".to_string(); + let server_addr = server_addr.parse::().unwrap(); + let client_addr= "127.0.0.1:7399".to_string(); + let client_addr = client_addr.parse::().unwrap(); + + let stop = Arc::new(AtomicBool::new(false)); + let stop2 = stop.clone(); + + let child = thread::spawn(move || { + let mut l = Core::new().unwrap(); + let handle = l.handle(); + + let socket = tokio_core::net::UdpSocket::bind(&server_addr, &handle).unwrap(); + + let server = Server::new(socket, stop); + + l.run(server).unwrap(); + }); + + // TODO: More reliable way to bind server socket and start server + // first + thread::sleep(Duration::from_millis(100)); + + let client = std::net::UdpSocket::bind(client_addr).unwrap(); + + let mut buf = [0u8; 1000]; + b.iter(|| { + client.send_to(&buf, &server_addr).unwrap(); + let _ = client.recv_from(&mut buf).unwrap(); + }); + + // Stop the server; TODO: Use better method + stop2.store(true, Ordering::SeqCst); + thread::sleep(Duration::from_millis(1)); + client.send_to(&buf, &server_addr).unwrap(); + + child.join().unwrap(); +} diff --git a/benches/mio-ops.rs b/benches/mio-ops.rs new file mode 100644 index 000000000..43de5f777 --- /dev/null +++ b/benches/mio-ops.rs @@ -0,0 +1,57 @@ +// Measure cost of different operations +// to get a sense of performance tradeoffs +#![feature(test)] + +extern crate test; +extern crate mio; + +use test::Bencher; + +use mio::tcp::TcpListener; +use mio::{Token, Ready, PollOpt}; + + +#[bench] +fn mio_register_deregister(b: &mut Bencher) { + let addr = "127.0.0.1:13265".parse().unwrap(); + // Setup the server socket + let sock = TcpListener::bind(&addr).unwrap(); + let poll = mio::Poll::new().unwrap(); + + const CLIENT: Token = Token(1); + + b.iter(|| { + poll.register(&sock, CLIENT, Ready::readable(), + PollOpt::edge()).unwrap(); + poll.deregister(&sock).unwrap(); + }); +} + +#[bench] +fn mio_reregister(b: &mut Bencher) { + let addr = "127.0.0.1:13265".parse().unwrap(); + // Setup the server socket + let sock = TcpListener::bind(&addr).unwrap(); + let poll = mio::Poll::new().unwrap(); + + const CLIENT: Token = Token(1); + poll.register(&sock, CLIENT, Ready::readable(), + PollOpt::edge()).unwrap(); + + b.iter(|| { + poll.reregister(&sock, CLIENT, Ready::readable(), + PollOpt::edge()).unwrap(); + }); + poll.deregister(&sock).unwrap(); +} + +#[bench] +fn mio_poll(b: &mut Bencher) { + let poll = mio::Poll::new().unwrap(); + let timeout = std::time::Duration::new(0, 0); + let mut events = mio::Events::with_capacity(1024); + + b.iter(|| { + poll.poll(&mut events, Some(timeout)).unwrap(); + }); +} From d1a2a9d3242ab9e93bde1fa43219f1159f2eee4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Ci=C4=99=C5=BCarkiewicz?= Date: Tue, 8 Nov 2016 19:01:37 -0800 Subject: [PATCH 2/4] Add `channel_lantency` and fix previous issues. Warmup round before actually performing the test seems to eliminate variance. --- benches/latency.rs | 123 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 118 insertions(+), 5 deletions(-) diff --git a/benches/latency.rs b/benches/latency.rs index a4b91bb5b..4de7d2ee1 100644 --- a/benches/latency.rs +++ b/benches/latency.rs @@ -18,17 +18,19 @@ use test::Bencher; use std::thread; use std::time::Duration; +use futures::stream::Stream; + /// UDP echo server -struct Server { +struct EchoServer { socket : UdpSocket, buf : Vec, to_send : Option<(usize, SocketAddr)>, stop : Arc, } -impl Server { +impl EchoServer { fn new(s : UdpSocket, stop : Arc) -> Self { - Server { + EchoServer { socket: s, to_send: None, buf: vec![0u8; 1600], @@ -37,7 +39,7 @@ impl Server { } } -impl Future for Server { +impl Future for EchoServer { type Item = (); type Error = io::Error; @@ -63,6 +65,55 @@ impl Future for Server { } } } + +/// UDP echo server +struct ChanServer { + rx : tokio_core::channel::Receiver, + tx : tokio_core::channel::Sender, + buf : Option, + stop : Arc, +} + +impl ChanServer { + fn new(tx : tokio_core::channel::Sender, rx : tokio_core::channel::Receiver, stop : Arc) -> Self { + ChanServer { + rx: rx, + tx: tx, + buf: None, + stop: stop, + } + } +} + +impl Future for ChanServer { + type Item = (); + type Error = io::Error; + + fn poll(&mut self) -> Poll<(), io::Error> { + loop { + if self.stop.load(Ordering::SeqCst) { + return Ok(futures::Async::Ready(())) + } + + if let Some(u) = self.buf.take() { + match self.tx.send(u) { + Err(e) => { + self.buf = Some(u); + return try_nb!(Err(e)); + }, + Ok(_) => { } + } + } + + match self.rx.poll() { + Ok(futures::Async::Ready(None)) => return Ok(futures::Async::Ready(())), + Ok(futures::Async::Ready(Some(t))) => { self.buf = Some(t) }, + Ok(futures::Async::NotReady) => return Ok(futures::Async::NotReady), + Err(e) => return try_nb!(Err(e)), + } + } + } +} #[bench] fn udp_echo_latency(b: &mut Bencher) { let server_addr= "127.0.0.1:7398".to_string(); @@ -79,7 +130,7 @@ fn udp_echo_latency(b: &mut Bencher) { let socket = tokio_core::net::UdpSocket::bind(&server_addr, &handle).unwrap(); - let server = Server::new(socket, stop); + let server = EchoServer::new(socket, stop); l.run(server).unwrap(); }); @@ -91,6 +142,16 @@ fn udp_echo_latency(b: &mut Bencher) { let client = std::net::UdpSocket::bind(client_addr).unwrap(); let mut buf = [0u8; 1000]; + + // warmup phase; for some reason initial couple of + // rounds is much slower + // + // TODO: Describe the exact reasons; caching? branch predictor? lazy closures? + for _ in 0..1000 { + client.send_to(&buf, &server_addr).unwrap(); + let _ = client.recv_from(&mut buf).unwrap(); + } + b.iter(|| { client.send_to(&buf, &server_addr).unwrap(); let _ = client.recv_from(&mut buf).unwrap(); @@ -103,3 +164,55 @@ fn udp_echo_latency(b: &mut Bencher) { child.join().unwrap(); } + +#[bench] +fn channel_latency(b: &mut Bencher) { + let stop = Arc::new(AtomicBool::new(false)); + let stop2 = stop.clone(); + + // TODO: Any way to start Loop on a separate thread and yet get + // a tokio_core::channel to it? + let (tx, rx) = std::sync::mpsc::channel(); + + let child = thread::spawn(move || { + let mut l = Core::new().unwrap(); + let handle = l.handle(); + + let (in_tx, in_rx) = tokio_core::channel::channel(&handle).unwrap(); + let (out_tx, out_rx) = tokio_core::channel::channel(&handle).unwrap(); + + let server = ChanServer::new(out_tx, in_rx, stop); + + tx.send((in_tx, out_rx)).unwrap(); + l.run(server).unwrap(); + }); + + let (in_tx, out_rx) = rx.recv().unwrap(); + + // TODO: More reliable way to bind server socket and start server + // first + thread::sleep(Duration::from_millis(100)); + + let mut rx_iter = out_rx.wait(); + + // warmup phase; for some reason initial couple of + // rounds is much slower + // + // TODO: Describe the exact reasons; caching? branch predictor? lazy closures? + for _ in 0..1000 { + in_tx.send(1usize).unwrap(); + let _ = rx_iter.next(); + } + + b.iter(|| { + in_tx.send(1usize).unwrap(); + let _ = rx_iter.next(); + }); + + // Stop the server; TODO: Use better method + stop2.store(true, Ordering::SeqCst); + thread::sleep(Duration::from_millis(1)); + in_tx.send(1usize).unwrap(); + + child.join().unwrap(); +} From 307ba7a8679023180f10f0d951fcf25edcd1ac95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Ci=C4=99=C5=BCarkiewicz?= Date: Tue, 8 Nov 2016 19:09:26 -0800 Subject: [PATCH 3/4] travis: Run `cargo bench` on `nightly` --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 9637e42a9..6a2e8fa48 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,8 @@ script: - cargo build - cargo test - cargo doc --no-deps + - if [ "$TRAVIS_RUST_VERSION" == "nightly" ]; then cargo bench ; fi + after_success: - travis-cargo --only nightly doc-upload env: From 7ae124077e840bdb1d585ab4c5c7088fe010773c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Ci=C4=99=C5=BCarkiewicz?= Date: Thu, 10 Nov 2016 14:11:06 -0800 Subject: [PATCH 4/4] Improve latency benchmarks. --- benches/latency.rs | 77 ++++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 51 deletions(-) diff --git a/benches/latency.rs b/benches/latency.rs index 4de7d2ee1..ed76b73dd 100644 --- a/benches/latency.rs +++ b/benches/latency.rs @@ -6,8 +6,6 @@ extern crate futures; extern crate tokio_core; use std::io; -use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; use std::net::SocketAddr; use futures::{Future, Poll}; @@ -16,20 +14,20 @@ use tokio_core::reactor::Core; use test::Bencher; use std::thread; -use std::time::Duration; use futures::stream::Stream; +use futures::{oneshot, Oneshot}; /// UDP echo server struct EchoServer { socket : UdpSocket, buf : Vec, to_send : Option<(usize, SocketAddr)>, - stop : Arc, + stop : Oneshot<()>, } impl EchoServer { - fn new(s : UdpSocket, stop : Arc) -> Self { + fn new(s : UdpSocket, stop : Oneshot<()>) -> Self { EchoServer { socket: s, to_send: None, @@ -45,19 +43,14 @@ impl Future for EchoServer { fn poll(&mut self) -> Poll<(), io::Error> { loop { - if self.stop.load(Ordering::SeqCst) { + if self.stop.poll() != Ok(futures::Async::NotReady) { return Ok(futures::Async::Ready(())) } - if let Some((size, peer)) = self.to_send.take() { - match self.socket.send_to(&self.buf[..size], &peer) { - Err(e) => { - self.to_send = Some((size, peer)); - return try_nb!(Err(e)); - }, - Ok(_) => { } - } + if let Some(&(size, peer)) = self.to_send.as_ref() { + let _ = try_nb!(self.socket.send_to(&self.buf[..size], &peer)); } + self.to_send = None; self.to_send = Some( try_nb!(self.socket.recv_from(&mut self.buf)) @@ -67,20 +60,20 @@ impl Future for EchoServer { } /// UDP echo server +/// +/// TODO: This may be replace-able with the newly-minted Sink::send_all function in the futures crate struct ChanServer { rx : tokio_core::channel::Receiver, tx : tokio_core::channel::Sender, buf : Option, - stop : Arc, } impl ChanServer { - fn new(tx : tokio_core::channel::Sender, rx : tokio_core::channel::Receiver, stop : Arc) -> Self { + fn new(tx : tokio_core::channel::Sender, rx : tokio_core::channel::Receiver) -> Self { ChanServer { rx: rx, tx: tx, buf: None, - stop: stop, } } } @@ -91,10 +84,6 @@ impl Future for ChanServer { fn poll(&mut self) -> Poll<(), io::Error> { loop { - if self.stop.load(Ordering::SeqCst) { - return Ok(futures::Async::Ready(())) - } - if let Some(u) = self.buf.take() { match self.tx.send(u) { Err(e) => { @@ -114,15 +103,17 @@ impl Future for ChanServer { } } } + #[bench] fn udp_echo_latency(b: &mut Bencher) { - let server_addr= "127.0.0.1:7398".to_string(); + let server_addr= "127.0.0.1:0".to_string(); let server_addr = server_addr.parse::().unwrap(); - let client_addr= "127.0.0.1:7399".to_string(); + let client_addr= "127.0.0.1:0".to_string(); let client_addr = client_addr.parse::().unwrap(); - let stop = Arc::new(AtomicBool::new(false)); - let stop2 = stop.clone(); + let (stop_c, stop_p) = oneshot::<()>(); + + let (tx, rx) = std::sync::mpsc::channel(); let child = thread::spawn(move || { let mut l = Core::new().unwrap(); @@ -130,24 +121,23 @@ fn udp_echo_latency(b: &mut Bencher) { let socket = tokio_core::net::UdpSocket::bind(&server_addr, &handle).unwrap(); - let server = EchoServer::new(socket, stop); + tx.send(socket.local_addr().unwrap()).unwrap(); + let server = EchoServer::new(socket, stop_p); l.run(server).unwrap(); }); - // TODO: More reliable way to bind server socket and start server - // first - thread::sleep(Duration::from_millis(100)); let client = std::net::UdpSocket::bind(client_addr).unwrap(); + let server_addr = rx.recv().unwrap(); let mut buf = [0u8; 1000]; // warmup phase; for some reason initial couple of - // rounds is much slower + // runs are much slower // // TODO: Describe the exact reasons; caching? branch predictor? lazy closures? - for _ in 0..1000 { + for _ in 0..8 { client.send_to(&buf, &server_addr).unwrap(); let _ = client.recv_from(&mut buf).unwrap(); } @@ -157,21 +147,14 @@ fn udp_echo_latency(b: &mut Bencher) { let _ = client.recv_from(&mut buf).unwrap(); }); - // Stop the server; TODO: Use better method - stop2.store(true, Ordering::SeqCst); - thread::sleep(Duration::from_millis(1)); - client.send_to(&buf, &server_addr).unwrap(); + stop_c.complete(()); child.join().unwrap(); } #[bench] fn channel_latency(b: &mut Bencher) { - let stop = Arc::new(AtomicBool::new(false)); - let stop2 = stop.clone(); - // TODO: Any way to start Loop on a separate thread and yet get - // a tokio_core::channel to it? let (tx, rx) = std::sync::mpsc::channel(); let child = thread::spawn(move || { @@ -181,7 +164,7 @@ fn channel_latency(b: &mut Bencher) { let (in_tx, in_rx) = tokio_core::channel::channel(&handle).unwrap(); let (out_tx, out_rx) = tokio_core::channel::channel(&handle).unwrap(); - let server = ChanServer::new(out_tx, in_rx, stop); + let server = ChanServer::new(out_tx, in_rx); tx.send((in_tx, out_rx)).unwrap(); l.run(server).unwrap(); @@ -189,17 +172,12 @@ fn channel_latency(b: &mut Bencher) { let (in_tx, out_rx) = rx.recv().unwrap(); - // TODO: More reliable way to bind server socket and start server - // first - thread::sleep(Duration::from_millis(100)); - let mut rx_iter = out_rx.wait(); - // warmup phase; for some reason initial couple of - // rounds is much slower + // warmup phase; for some reason initial couple of runs are much slower // // TODO: Describe the exact reasons; caching? branch predictor? lazy closures? - for _ in 0..1000 { + for _ in 0..8 { in_tx.send(1usize).unwrap(); let _ = rx_iter.next(); } @@ -209,10 +187,7 @@ fn channel_latency(b: &mut Bencher) { let _ = rx_iter.next(); }); - // Stop the server; TODO: Use better method - stop2.store(true, Ordering::SeqCst); - thread::sleep(Duration::from_millis(1)); - in_tx.send(1usize).unwrap(); + drop(in_tx); child.join().unwrap(); }