mirror of
https://github.com/tokio-rs/tokio.git
synced 2025-09-28 12:10:37 +00:00
Add max line length to LinesCodec
(#632)
## Motivation Currently, there is a potential denial of service vulnerability in the `lines` codec. Since there is no bound on the buffer that holds data before it is split into a new line, an attacker could send an unbounded amount of data without sending a `\n` character. ## Solution This branch adds a `new_with_max_length` constructor for `LinesCodec` that configures a limit on the maximum number of bytes per line. When the limit is reached, the the overly long line will be discarded (in `max_length`-sized increments until a newline character or the end of the buffer is reached. It was also necessary to add some special-case logic to avoid creating an empty line when the length limit is reached at the character immediately _before_ a `\n` character. Additionally, this branch adds new tests for this function, including a test for changing the line limit in-flight. ## Notes This branch makes the following changes from my original PR with this change (#590): - The whole too-long line is discarded at once in the first call to `decode` that encounters it. - Only one error is emitted per too-long line. - Made all the changes requested by @carllerche in https://github.com/tokio-rs/tokio/pull/590#issuecomment-420735023 Fixes: #186 Signed-off-by: Eliza Weisman <eliza@buoyant.io>
This commit is contained in:
parent
be67eda117
commit
3dd95a9ff1
@ -1,6 +1,6 @@
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use tokio_io::_tokio_codec::{Encoder, Decoder};
|
||||
use std::{io, str};
|
||||
use std::{cmp, io, str, usize};
|
||||
|
||||
/// A simple `Codec` implementation that splits up data into lines.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||
@ -12,12 +12,93 @@ pub struct LinesCodec {
|
||||
// The next time `decode` is called with `abcde\n`, the method will
|
||||
// only look at `de\n` before returning.
|
||||
next_index: usize,
|
||||
|
||||
/// The maximum length for a given line. If `usize::MAX`, lines will be
|
||||
/// read until a `\n` character is reached.
|
||||
max_length: usize,
|
||||
|
||||
/// Are we currently discarding the remainder of a line which was over
|
||||
/// the length limit?
|
||||
is_discarding: bool,
|
||||
}
|
||||
|
||||
impl LinesCodec {
|
||||
/// Returns a `LinesCodec` for splitting up data into lines.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// The returned `LinesCodec` will not have an upper bound on the length
|
||||
/// of a buffered line. See the documentation for [`new_with_max_length`]
|
||||
/// for information on why this could be a potential security risk.
|
||||
///
|
||||
/// [`new_with_max_length`]: #method.new_with_max_length
|
||||
pub fn new() -> LinesCodec {
|
||||
LinesCodec { next_index: 0 }
|
||||
LinesCodec {
|
||||
next_index: 0,
|
||||
max_length: usize::MAX,
|
||||
is_discarding: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a `LinesCodec` with a maximum line length limit.
|
||||
///
|
||||
/// If this is set, calls to `LinesCodec::decode` will return a
|
||||
/// [`LengthError`] when a line exceeds the length limit. Subsequent calls
|
||||
/// will discard up to `limit` bytes from that line until a newline
|
||||
/// character is reached, returning `None` until the line over the limit
|
||||
/// has been fully discarded. After that point, calls to `decode` will
|
||||
/// function as normal.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// Setting a length limit is highly recommended for any `LinesCodec` which
|
||||
/// will be exposed to untrusted input. Otherwise, the size of the buffer
|
||||
/// that holds the line currently being read is unbounded. An attacker could
|
||||
/// exploit this unbounded buffer by sending an unbounded amount of input
|
||||
/// without any `\n` characters, causing unbounded memory consumption.
|
||||
///
|
||||
/// [`LengthError`]: ../struct.LengthError
|
||||
pub fn new_with_max_length(max_length: usize) -> Self {
|
||||
LinesCodec {
|
||||
max_length,
|
||||
..LinesCodec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the maximum line length when decoding.
|
||||
///
|
||||
/// ```
|
||||
/// use std::usize;
|
||||
/// use tokio_codec::LinesCodec;
|
||||
///
|
||||
/// let codec = LinesCodec::new();
|
||||
/// assert_eq!(codec.max_length(), usize::MAX);
|
||||
/// ```
|
||||
/// ```
|
||||
/// use tokio_codec::LinesCodec;
|
||||
///
|
||||
/// let codec = LinesCodec::new_with_max_length(256);
|
||||
/// assert_eq!(codec.max_length(), 256);
|
||||
/// ```
|
||||
pub fn max_length(&self) -> usize {
|
||||
self.max_length
|
||||
}
|
||||
|
||||
fn discard(&mut self, newline_offset: Option<usize>, read_to: usize, buf: &mut BytesMut) {
|
||||
let discard_to = if let Some(offset) = newline_offset {
|
||||
// If we found a newline, discard up to that offset and
|
||||
// then stop discarding. On the next iteration, we'll try
|
||||
// to read a line normally.
|
||||
self.is_discarding = false;
|
||||
offset + self.next_index + 1
|
||||
} else {
|
||||
// Otherwise, we didn't find a newline, so we'll discard
|
||||
// everything we read. On the next iteration, we'll continue
|
||||
// discarding up to max_len bytes unless we find a newline.
|
||||
read_to
|
||||
};
|
||||
buf.advance(discard_to);
|
||||
self.next_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -38,22 +119,49 @@ fn without_carriage_return(s: &[u8]) -> &[u8] {
|
||||
|
||||
impl Decoder for LinesCodec {
|
||||
type Item = String;
|
||||
// TODO: in the next breaking change, this should be changed to a custom
|
||||
// error type that indicates the "max length exceeded" condition better.
|
||||
type Error = io::Error;
|
||||
|
||||
fn decode(&mut self, buf: &mut BytesMut) -> Result<Option<String>, io::Error> {
|
||||
if let Some(newline_offset) =
|
||||
buf[self.next_index..].iter().position(|b| *b == b'\n')
|
||||
{
|
||||
let newline_index = newline_offset + self.next_index;
|
||||
let line = buf.split_to(newline_index + 1);
|
||||
let line = &line[..line.len()-1];
|
||||
let line = without_carriage_return(line);
|
||||
let line = utf8(line)?;
|
||||
self.next_index = 0;
|
||||
Ok(Some(line.to_string()))
|
||||
} else {
|
||||
self.next_index = buf.len();
|
||||
Ok(None)
|
||||
loop {
|
||||
// Determine how far into the buffer we'll search for a newline. If
|
||||
// there's no max_length set, we'll read to the end of the buffer.
|
||||
let read_to = cmp::min(self.max_length.saturating_add(1), buf.len());
|
||||
|
||||
let newline_offset = buf[self.next_index..read_to]
|
||||
.iter()
|
||||
.position(|b| *b == b'\n');
|
||||
|
||||
if self.is_discarding {
|
||||
self.discard(newline_offset, read_to, buf);
|
||||
} else {
|
||||
return if let Some(offset) = newline_offset {
|
||||
// Found a line!
|
||||
let newline_index = offset + self.next_index;
|
||||
self.next_index = 0;
|
||||
let line = buf.split_to(newline_index + 1);
|
||||
let line = &line[..line.len() - 1];
|
||||
let line = without_carriage_return(line);
|
||||
let line = utf8(line)?;
|
||||
|
||||
Ok(Some(line.to_string()))
|
||||
} else if buf.len() > self.max_length {
|
||||
// Reached the maximum length without finding a
|
||||
// newline, return an error and start discarding on the
|
||||
// next call.
|
||||
self.is_discarding = true;
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"line length limit exceeded"
|
||||
))
|
||||
} else {
|
||||
// We didn't find a line or reach the length limit, so the next
|
||||
// call will resume searching at the current offset.
|
||||
self.next_index = read_to;
|
||||
Ok(None)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -55,6 +55,116 @@ fn lines_decoder() {
|
||||
assert_eq!(None, codec.decode_eof(buf).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_decoder_max_length() {
|
||||
const MAX_LENGTH: usize = 6;
|
||||
|
||||
let mut codec = LinesCodec::new_with_max_length(MAX_LENGTH);
|
||||
let buf = &mut BytesMut::new();
|
||||
|
||||
buf.reserve(200);
|
||||
buf.put("line 1 is too long\nline 2\nline 3\r\nline 4\n\r\n\r");
|
||||
|
||||
assert!(codec.decode(buf).is_err());
|
||||
|
||||
let line = codec.decode(buf).unwrap().unwrap();
|
||||
assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
|
||||
assert_eq!("line 2", line);
|
||||
|
||||
assert!(codec.decode(buf).is_err());
|
||||
|
||||
let line = codec.decode(buf).unwrap().unwrap();
|
||||
assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
|
||||
assert_eq!("line 4", line);
|
||||
|
||||
let line = codec.decode(buf).unwrap().unwrap();
|
||||
assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
|
||||
assert_eq!("", line);
|
||||
|
||||
assert_eq!(None, codec.decode(buf).unwrap());
|
||||
assert_eq!(None, codec.decode_eof(buf).unwrap());
|
||||
buf.put("k");
|
||||
assert_eq!(None, codec.decode(buf).unwrap());
|
||||
|
||||
let line = codec.decode_eof(buf).unwrap().unwrap();
|
||||
assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
|
||||
assert_eq!("\rk", line);
|
||||
|
||||
assert_eq!(None, codec.decode(buf).unwrap());
|
||||
assert_eq!(None, codec.decode_eof(buf).unwrap());
|
||||
|
||||
// Line that's one character too long. This could cause an out of bounds
|
||||
// error if we peek at the next characters using slice indexing.
|
||||
// buf.put("aaabbbc");
|
||||
// assert!(codec.decode(buf).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_decoder_max_length_underrun() {
|
||||
const MAX_LENGTH: usize = 6;
|
||||
|
||||
let mut codec = LinesCodec::new_with_max_length(MAX_LENGTH);
|
||||
let buf = &mut BytesMut::new();
|
||||
|
||||
buf.reserve(200);
|
||||
buf.put("line ");
|
||||
assert_eq!(None, codec.decode(buf).unwrap());
|
||||
buf.put("too l");
|
||||
assert!(codec.decode(buf).is_err());
|
||||
buf.put("ong\n");
|
||||
assert_eq!(None, codec.decode(buf).unwrap());
|
||||
|
||||
buf.put("line 2");
|
||||
assert_eq!(None, codec.decode(buf).unwrap());
|
||||
buf.put("\n");
|
||||
assert_eq!("line 2", codec.decode(buf).unwrap().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_decoder_max_length_bursts() {
|
||||
const MAX_LENGTH: usize = 10;
|
||||
|
||||
let mut codec = LinesCodec::new_with_max_length(MAX_LENGTH);
|
||||
let buf = &mut BytesMut::new();
|
||||
|
||||
buf.reserve(200);
|
||||
buf.put("line ");
|
||||
assert_eq!(None, codec.decode(buf).unwrap());
|
||||
buf.put("too l");
|
||||
assert_eq!(None, codec.decode(buf).unwrap());
|
||||
buf.put("ong\n");
|
||||
assert!(codec.decode(buf).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_decoder_max_length_big_burst() {
|
||||
const MAX_LENGTH: usize = 10;
|
||||
|
||||
let mut codec = LinesCodec::new_with_max_length(MAX_LENGTH);
|
||||
let buf = &mut BytesMut::new();
|
||||
|
||||
buf.reserve(200);
|
||||
buf.put("line ");
|
||||
assert_eq!(None, codec.decode(buf).unwrap());
|
||||
buf.put("too long!\n");
|
||||
assert!(codec.decode(buf).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_decoder_max_length_newline_between_decodes() {
|
||||
const MAX_LENGTH: usize = 5;
|
||||
|
||||
let mut codec = LinesCodec::new_with_max_length(MAX_LENGTH);
|
||||
let buf = &mut BytesMut::new();
|
||||
|
||||
buf.reserve(200);
|
||||
buf.put("hello");
|
||||
assert_eq!(None, codec.decode(buf).unwrap());
|
||||
|
||||
buf.put("\nworld");
|
||||
assert_eq!("hello", codec.decode(buf).unwrap().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lines_encoder() {
|
||||
let mut codec = LinesCodec::new();
|
||||
|
Loading…
x
Reference in New Issue
Block a user