From 79db7d4fe72d3a650a29158822ac04190ef38f0d Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 4 Sep 2017 12:02:00 -0700 Subject: [PATCH] Optimize deserialize_ignored_any --- src/de.rs | 219 +++++++++++++++++++++++++++++++++++++++++++++++++- src/read.rs | 108 ++++++++++++++++++++++++- tests/test.rs | 10 ++- 3 files changed, 330 insertions(+), 7 deletions(-) diff --git a/src/de.rs b/src/de.rs index 9ec5165..fd2a489 100644 --- a/src/de.rs +++ b/src/de.rs @@ -532,6 +532,215 @@ impl<'de, R: Read<'de>> Deserializer { None => Err(self.peek_error(ErrorCode::EofWhileParsingObject)), } } + + fn ignore_value(&mut self) -> Result<()> { + let peek = match try!(self.parse_whitespace()) { + Some(b) => b, + None => { + return Err(self.peek_error(ErrorCode::EofWhileParsingValue)); + } + }; + + match peek { + b'n' => { + self.eat_char(); + self.parse_ident(b"ull") + } + b't' => { + self.eat_char(); + self.parse_ident(b"rue") + } + b'f' => { + self.eat_char(); + self.parse_ident(b"alse") + } + b'-' => { + self.eat_char(); + self.ignore_integer() + } + b'0'...b'9' => { + self.ignore_integer() + } + b'"' => { + self.eat_char(); + self.read.ignore_str() + } + b'[' => { + self.remaining_depth -= 1; + if self.remaining_depth == 0 { + return Err(self.peek_error(ErrorCode::RecursionLimitExceeded)); + } + + self.eat_char(); + let res = self.ignore_seq(); + self.remaining_depth += 1; + res + } + b'{' => { + self.remaining_depth -= 1; + if self.remaining_depth == 0 { + return Err(self.peek_error(ErrorCode::RecursionLimitExceeded)); + } + + self.eat_char(); + let res = self.ignore_map(); + self.remaining_depth += 1; + res + } + _ => { + Err(self.peek_error(ErrorCode::ExpectedSomeValue)) + } + } + } + + fn ignore_integer(&mut self) -> Result<()> { + match try!(self.next_char_or_null()) { + b'0' => { + // There can be only one leading '0'. + if let b'0'...b'9' = try!(self.peek_or_null()) { + return Err(self.peek_error(ErrorCode::InvalidNumber)); + } + } + b'1'...b'9' => { + while let b'0'...b'9' = try!(self.peek_or_null()) { + self.eat_char(); + } + } + _ => { + return Err(self.error(ErrorCode::InvalidNumber)); + } + } + + match try!(self.peek_or_null()) { + b'.' => self.ignore_decimal(), + b'e' | b'E' => self.ignore_exponent(), + _ => Ok(()), + } + } + + fn ignore_decimal(&mut self) -> Result<()> { + self.eat_char(); + + let mut at_least_one_digit = false; + while let b'0'...b'9' = try!(self.peek_or_null()) { + self.eat_char(); + at_least_one_digit = true; + } + + if !at_least_one_digit { + return Err(self.peek_error(ErrorCode::InvalidNumber)); + } + + match try!(self.peek_or_null()) { + b'e' | b'E' => self.ignore_exponent(), + _ => Ok(()), + } + } + + fn ignore_exponent(&mut self) -> Result<()> { + self.eat_char(); + + match try!(self.peek_or_null()) { + b'+' | b'-' => self.eat_char(), + _ => {} + } + + // Make sure a digit follows the exponent place. + match try!(self.next_char_or_null()) { + b'0'...b'9' => {} + _ => { + return Err(self.error(ErrorCode::InvalidNumber)); + } + } + + while let b'0'...b'9' = try!(self.peek_or_null()) { + self.eat_char(); + } + + Ok(()) + } + + fn ignore_seq(&mut self) -> Result<()> { + let mut first = true; + + loop { + match try!(self.parse_whitespace()) { + Some(b']') => { + self.eat_char(); + return Ok(()); + } + Some(b',') if !first => { + self.eat_char(); + } + Some(_) => { + if first { + first = false; + } else { + return Err(self.peek_error(ErrorCode::ExpectedListCommaOrEnd)); + } + } + None => { + return Err(self.peek_error(ErrorCode::EofWhileParsingList)); + } + } + + try!(self.ignore_value()); + } + } + + fn ignore_map(&mut self) -> Result<()> { + let mut first = true; + + loop { + let peek = match try!(self.parse_whitespace()) { + Some(b'}') => { + self.eat_char(); + return Ok(()); + } + Some(b',') if !first => { + self.eat_char(); + try!(self.parse_whitespace()) + } + Some(b) => { + if first { + first = false; + Some(b) + } else { + return Err(self.peek_error(ErrorCode::ExpectedObjectCommaOrEnd)); + } + } + None => { + return Err(self.peek_error(ErrorCode::EofWhileParsingObject)); + } + }; + + match peek { + Some(b'"') => { + self.eat_char(); + try!(self.read.ignore_str()); + } + Some(_) => { + return Err(self.peek_error(ErrorCode::KeyMustBeAString)); + } + None => { + return Err(self.peek_error(ErrorCode::EofWhileParsingObject)); + } + } + + match try!(self.parse_whitespace()) { + Some(b':') => { + self.eat_char(); + try!(self.ignore_value()); + } + Some(_) => { + return Err(self.peek_error(ErrorCode::ExpectedColon)); + } + None => { + return Err(self.peek_error(ErrorCode::EofWhileParsingObject)); + } + } + } + } } #[cfg_attr(rustfmt, rustfmt_skip)] @@ -750,9 +959,17 @@ impl<'de, 'a, R: Read<'de>> de::Deserializer<'de> for &'a mut Deserializer { self.deserialize_bytes(visitor) } + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + try!(self.ignore_value()); + visitor.visit_unit() + } + forward_to_deserialize_any! { bool i8 i16 i32 i64 u8 u16 u32 u64 f32 f64 char str string unit - unit_struct seq tuple tuple_struct map struct identifier ignored_any + unit_struct seq tuple tuple_struct map struct identifier } } diff --git a/src/read.rs b/src/read.rs index a3aa6bf..02db8f7 100644 --- a/src/read.rs +++ b/src/read.rs @@ -71,6 +71,11 @@ pub trait Read<'de>: private::Sealed { &'s mut self, scratch: &'s mut Vec, ) -> Result>; + + /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped + /// string until the next quotation mark but discards the data. + #[doc(hidden)] + fn ignore_str(&mut self) -> Result<()>; } pub struct Position { @@ -257,6 +262,26 @@ where self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes)) .map(Reference::Copied) } + + fn ignore_str(&mut self) -> Result<()> { + loop { + let ch = try!(next_or_eof(self)); + if !ESCAPE[ch as usize] { + continue; + } + match ch { + b'"' => { + return Ok(()); + } + b'\\' => { + try!(ignore_escape(self)); + } + _ => { + return error(self, ErrorCode::InvalidUnicodeCodePoint); + } + } + } + } } ////////////////////////////////////////////////////////////////////////////// @@ -402,6 +427,30 @@ impl<'a> Read<'a> for SliceRead<'a> { ) -> Result> { self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes)) } + + fn ignore_str(&mut self) -> Result<()> { + loop { + while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] { + self.index += 1; + } + if self.index == self.slice.len() { + return error(self, ErrorCode::EofWhileParsingString); + } + match self.slice[self.index] { + b'"' => { + self.index += 1; + return Ok(()); + } + b'\\' => { + self.index += 1; + try!(ignore_escape(self)); + } + _ => { + return error(self, ErrorCode::InvalidUnicodeCodePoint); + } + } + } + } } ////////////////////////////////////////////////////////////////////////////// @@ -460,6 +509,10 @@ impl<'a> Read<'a> for StrRead<'a> { ) -> Result> { self.delegate.parse_str_raw(scratch) } + + fn ignore_str(&mut self) -> Result<()> { + self.delegate.ignore_str() + } } ////////////////////////////////////////////////////////////////////////////// @@ -492,14 +545,14 @@ static ESCAPE: [bool; 256] = [ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // F ]; -fn next_or_eof<'de, R: Read<'de>>(read: &mut R) -> Result { +fn next_or_eof<'de, R: ?Sized + Read<'de>>(read: &mut R) -> Result { match try!(read.next().map_err(Error::io)) { Some(b) => Ok(b), None => error(read, ErrorCode::EofWhileParsingString), } } -fn error<'de, R: Read<'de>, T>(read: &R, reason: ErrorCode) -> Result { +fn error<'de, R: ?Sized + Read<'de>, T>(read: &R, reason: ErrorCode) -> Result { let pos = read.position(); Err(Error::syntax(reason, pos.line, pos.column)) } @@ -546,7 +599,7 @@ fn parse_escape<'de, R: Read<'de>>(read: &mut R, scratch: &mut Vec) -> Resul let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; - match char::from_u32(n as u32) { + match char::from_u32(n) { Some(c) => c, None => { return error(read, ErrorCode::InvalidUnicodeCodePoint); @@ -578,7 +631,54 @@ fn parse_escape<'de, R: Read<'de>>(read: &mut R, scratch: &mut Vec) -> Resul Ok(()) } -fn decode_hex_escape<'de, R: Read<'de>>(read: &mut R) -> Result { +/// Parses a JSON escape sequence and discards the value. Assumes the previous +/// byte read was a backslash. +fn ignore_escape<'de, R: ?Sized + Read<'de>>(read: &mut R) -> Result<()> { + let ch = try!(next_or_eof(read)); + + match ch { + b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {} + b'u' => { + let n = match try!(decode_hex_escape(read)) { + 0xDC00...0xDFFF => { + return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape); + } + + // Non-BMP characters are encoded as a sequence of + // two hex escapes, representing UTF-16 surrogates. + n1 @ 0xD800...0xDBFF => { + if try!(next_or_eof(read)) != b'\\' { + return error(read, ErrorCode::UnexpectedEndOfHexEscape); + } + if try!(next_or_eof(read)) != b'u' { + return error(read, ErrorCode::UnexpectedEndOfHexEscape); + } + + let n2 = try!(decode_hex_escape(read)); + + if n2 < 0xDC00 || n2 > 0xDFFF { + return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape); + } + + (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000 + } + + n => n as u32, + }; + + if char::from_u32(n).is_none() { + return error(read, ErrorCode::InvalidUnicodeCodePoint); + } + } + _ => { + return error(read, ErrorCode::InvalidEscape); + } + } + + Ok(()) +} + +fn decode_hex_escape<'de, R: ?Sized + Read<'de>>(read: &mut R) -> Result { let mut n = 0; for _ in 0..4 { n = match try!(next_or_eof(read)) { diff --git a/tests/test.rs b/tests/test.rs index 89b1d78..f8ccd0c 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -34,7 +34,7 @@ use std::iter; use std::marker::PhantomData; use std::{u8, u16, u32, u64}; -use serde::de::{self, Deserialize}; +use serde::de::{self, Deserialize, IgnoredAny}; use serde::ser::{self, Serialize, Serializer}; use serde_bytes::{ByteBuf, Bytes}; @@ -594,6 +594,12 @@ where // Make sure we can round trip back to `Value`. let json_value2: Value = from_value(json_value.clone()).unwrap(); assert_eq!(json_value2, json_value); + + // Make sure we can fully ignore. + let twoline = s.to_owned() + "\n3735928559"; + let mut de = Deserializer::from_str(&twoline); + IgnoredAny::deserialize(&mut de).unwrap(); + assert_eq!(0xDEAD_BEEF, u64::deserialize(&mut de).unwrap()); } } @@ -1844,4 +1850,4 @@ fn test_borrow() { fn null_invalid_type() { let err = serde_json::from_str::("null").unwrap_err(); assert_eq!(format!("{}", err), String::from("invalid type: null, expected a string at line 1 column 4")); -} \ No newline at end of file +}