Add max line length to LinesCodec (#632)

## Motivation Currently, there is a potential denial of service vulnerability in the `lines` codec. Since there is no bound on the buffer that holds data before it is split into a new line, an attacker could send an unbounded amount of data without sending a `\n` character. ## Solution This branch adds a `new_with_max_length` constructor for `LinesCodec` that configures a limit on the maximum number of bytes per line. When the limit is reached, the the overly long line will be discarded (in `max_length`-sized increments until a newline character or the end of the buffer is reached. It was also necessary to add some special-case logic to avoid creating an empty line when the length limit is reached at the character immediately _before_ a `\n` character. Additionally, this branch adds new tests for this function, including a test for changing the line limit in-flight. ## Notes This branch makes the following changes from my original PR with this change (#590): - The whole too-long line is discarded at once in the first call to `decode` that encounters it. - Only one error is emitted per too-long line. - Made all the changes requested by @carllerche in https://github.com/tokio-rs/tokio/pull/590#issuecomment-420735023 Fixes: #186 Signed-off-by: Eliza Weisman <eliza@buoyant.io>
2025-09-28 12:10:37 +00:00 · 2018-09-20 17:08:00 -07:00 · 2018-09-20 17:08:00 -07:00 · 3dd95a9ff1
commit 3dd95a9ff1
parent be67eda117
2 changed files with 233 additions and 15 deletions
--- a/tokio-codec/src/lines_codec.rs
+++ b/tokio-codec/src/lines_codec.rs
@ -1,6 +1,6 @@
 use bytes::{BufMut, BytesMut};
 use tokio_io::_tokio_codec::{Encoder, Decoder};
-use std::{io, str};
+use std::{cmp, io, str, usize};

 /// A simple `Codec` implementation that splits up data into lines.
 #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
@ -12,12 +12,93 @@ pub struct LinesCodec {
    // The next time `decode` is called with `abcde\n`, the method will
    // only look at `de\n` before returning.
    next_index: usize,
+
+    /// The maximum length for a given line. If `usize::MAX`, lines will be
+    /// read until a `\n` character is reached.
+    max_length: usize,
+
+    /// Are we currently discarding the remainder of a line which was over
+    /// the length limit?
+    is_discarding: bool,
 }

 impl LinesCodec {
    /// Returns a `LinesCodec` for splitting up data into lines.
+    ///
+    /// # Note
+    ///
+    /// The returned `LinesCodec` will not have an upper bound on the length
+    /// of a buffered line. See the documentation for [`new_with_max_length`]
+    /// for information on why this could be a potential security risk.
+    ///
+    /// [`new_with_max_length`]: #method.new_with_max_length
    pub fn new() -> LinesCodec {
-        LinesCodec { next_index: 0 }
+        LinesCodec {
+            next_index: 0,
+            max_length: usize::MAX,
+            is_discarding: false,
+        }
+    }
+
+    /// Returns a `LinesCodec` with a maximum line length limit.
+    ///
+    /// If this is set, calls to `LinesCodec::decode` will return a
+    /// [`LengthError`] when a line exceeds the length limit. Subsequent calls
+    /// will discard up to `limit` bytes from that line until a newline
+    /// character is reached, returning `None` until the line over the limit
+    /// has been fully discarded. After that point, calls to `decode` will
+    /// function as normal.
+    ///
+    /// # Note
+    ///
+    /// Setting a length limit is highly recommended for any `LinesCodec` which
+    /// will be exposed to untrusted input. Otherwise, the size of the buffer
+    /// that holds the line currently being read is unbounded. An attacker could
+    /// exploit this unbounded buffer by sending an unbounded amount of input
+    /// without any `\n` characters, causing unbounded memory consumption.
+    ///
+    /// [`LengthError`]: ../struct.LengthError
+    pub fn new_with_max_length(max_length: usize) -> Self {
+        LinesCodec {
+            max_length,
+            ..LinesCodec::new()
+        }
+    }
+
+    /// Returns the maximum line length when decoding.
+    ///
+    /// ```
+    /// use std::usize;
+    /// use tokio_codec::LinesCodec;
+    ///
+    /// let codec = LinesCodec::new();
+    /// assert_eq!(codec.max_length(), usize::MAX);
+    /// ```
+    /// ```
+    /// use tokio_codec::LinesCodec;
+    ///
+    /// let codec = LinesCodec::new_with_max_length(256);
+    /// assert_eq!(codec.max_length(), 256);
+    /// ```
+    pub fn max_length(&self) -> usize {
+        self.max_length
+    }
+
+    fn discard(&mut self, newline_offset: Option<usize>, read_to: usize, buf: &mut BytesMut) {
+        let discard_to = if let Some(offset) = newline_offset {
+            // If we found a newline, discard up to that offset and
+            // then stop discarding. On the next iteration, we'll try
+            // to read a line normally.
+            self.is_discarding = false;
+            offset + self.next_index + 1
+        } else {
+            // Otherwise, we didn't find a newline, so we'll discard
+            // everything we read. On the next iteration, we'll continue
+            // discarding up to max_len bytes unless we find a newline.
+            read_to
+        };
+        buf.advance(discard_to);
+        self.next_index = 0;
    }
 }

@ -38,22 +119,49 @@ fn without_carriage_return(s: &[u8]) -> &[u8] {

 impl Decoder for LinesCodec {
    type Item = String;
+    // TODO: in the next breaking change, this should be changed to a custom
+    // error type that indicates the "max length exceeded" condition better.
    type Error = io::Error;

    fn decode(&mut self, buf: &mut BytesMut) -> Result<Option<String>, io::Error> {
-        if let Some(newline_offset) =
-            buf[self.next_index..].iter().position(|b| *b == b'\n')
-        {
-            let newline_index = newline_offset + self.next_index;
-            let line = buf.split_to(newline_index + 1);
-            let line = &line[..line.len()-1];
-            let line = without_carriage_return(line);
-            let line = utf8(line)?;
-            self.next_index = 0;
-            Ok(Some(line.to_string()))
-        } else {
-            self.next_index = buf.len();
-            Ok(None)
+        loop {
+            // Determine how far into the buffer we'll search for a newline. If
+            // there's no max_length set, we'll read to the end of the buffer.
+            let read_to = cmp::min(self.max_length.saturating_add(1), buf.len());
+
+            let newline_offset = buf[self.next_index..read_to]
+                .iter()
+                .position(|b| *b == b'\n');
+
+            if self.is_discarding {
+                self.discard(newline_offset, read_to, buf);
+            } else {
+                return if let Some(offset) = newline_offset {
+                    // Found a line!
+                    let newline_index = offset + self.next_index;
+                    self.next_index = 0;
+                    let line = buf.split_to(newline_index + 1);
+                    let line = &line[..line.len() - 1];
+                    let line = without_carriage_return(line);
+                    let line = utf8(line)?;
+
+                    Ok(Some(line.to_string()))
+                } else if buf.len() > self.max_length {
+                    // Reached the maximum length without finding a
+                    // newline, return an error and start discarding on the
+                    // next call.
+                    self.is_discarding = true;
+                    Err(io::Error::new(
+                        io::ErrorKind::Other,
+                        "line length limit exceeded"
+                    ))
+                } else {
+                    // We didn't find a line or reach the length limit, so the next
+                    // call will resume searching at the current offset.
+                    self.next_index = read_to;
+                    Ok(None)
+                };
+            }
        }
    }

--- a/tokio-codec/tests/codecs.rs
+++ b/tokio-codec/tests/codecs.rs
@ -55,6 +55,116 @@ fn lines_decoder() {
    assert_eq!(None, codec.decode_eof(buf).unwrap());
 }

+#[test]
+fn lines_decoder_max_length() {
+    const MAX_LENGTH: usize = 6;
+
+    let mut codec = LinesCodec::new_with_max_length(MAX_LENGTH);
+    let buf = &mut BytesMut::new();
+
+    buf.reserve(200);
+    buf.put("line 1 is too long\nline 2\nline 3\r\nline 4\n\r\n\r");
+
+    assert!(codec.decode(buf).is_err());
+
+    let line = codec.decode(buf).unwrap().unwrap();
+    assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
+    assert_eq!("line 2", line);
+
+    assert!(codec.decode(buf).is_err());
+
+    let line = codec.decode(buf).unwrap().unwrap();
+    assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
+    assert_eq!("line 4", line);
+
+    let line = codec.decode(buf).unwrap().unwrap();
+    assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
+    assert_eq!("", line);
+
+    assert_eq!(None, codec.decode(buf).unwrap());
+    assert_eq!(None, codec.decode_eof(buf).unwrap());
+    buf.put("k");
+    assert_eq!(None, codec.decode(buf).unwrap());
+
+    let line = codec.decode_eof(buf).unwrap().unwrap();
+    assert!(line.len() <= MAX_LENGTH, "{:?}.len() <= {:?}", line, MAX_LENGTH);
+    assert_eq!("\rk", line);
+
+    assert_eq!(None, codec.decode(buf).unwrap());
+    assert_eq!(None, codec.decode_eof(buf).unwrap());
+
+    // Line that's one character too long. This could cause an out of bounds
+    // error if we peek at the next characters using slice indexing.
+    // buf.put("aaabbbc");
+    // assert!(codec.decode(buf).is_err());
+}
+
+#[test]
+fn lines_decoder_max_length_underrun() {
+    const MAX_LENGTH: usize = 6;
+
+    let mut codec = LinesCodec::new_with_max_length(MAX_LENGTH);
+    let buf = &mut BytesMut::new();
+
+    buf.reserve(200);
+    buf.put("line ");
+    assert_eq!(None, codec.decode(buf).unwrap());
+    buf.put("too l");
+    assert!(codec.decode(buf).is_err());
+    buf.put("ong\n");
+    assert_eq!(None, codec.decode(buf).unwrap());
+
+    buf.put("line 2");
+    assert_eq!(None, codec.decode(buf).unwrap());
+    buf.put("\n");
+    assert_eq!("line 2", codec.decode(buf).unwrap().unwrap());
+}
+
+#[test]
+fn lines_decoder_max_length_bursts() {
+    const MAX_LENGTH: usize = 10;
+
+    let mut codec = LinesCodec::new_with_max_length(MAX_LENGTH);
+    let buf = &mut BytesMut::new();
+
+    buf.reserve(200);
+    buf.put("line ");
+    assert_eq!(None, codec.decode(buf).unwrap());
+    buf.put("too l");
+    assert_eq!(None, codec.decode(buf).unwrap());
+    buf.put("ong\n");
+    assert!(codec.decode(buf).is_err());
+}
+
+#[test]
+fn lines_decoder_max_length_big_burst() {
+    const MAX_LENGTH: usize = 10;
+
+    let mut codec = LinesCodec::new_with_max_length(MAX_LENGTH);
+    let buf = &mut BytesMut::new();
+
+    buf.reserve(200);
+    buf.put("line ");
+    assert_eq!(None, codec.decode(buf).unwrap());
+    buf.put("too long!\n");
+    assert!(codec.decode(buf).is_err());
+}
+
+#[test]
+fn lines_decoder_max_length_newline_between_decodes() {
+    const MAX_LENGTH: usize = 5;
+
+    let mut codec = LinesCodec::new_with_max_length(MAX_LENGTH);
+    let buf = &mut BytesMut::new();
+
+    buf.reserve(200);
+    buf.put("hello");
+    assert_eq!(None, codec.decode(buf).unwrap());
+
+    buf.put("\nworld");
+    assert_eq!("hello", codec.decode(buf).unwrap().unwrap());
+}
+
 #[test]
 fn lines_encoder() {
    let mut codec = LinesCodec::new();