Merge pull request #356 from serde-rs/ignore

Optimize deserialize_ignored_any
This commit is contained in:
David Tolnay
2017-09-04 13:41:24 -07:00
committed by GitHub
3 changed files with 330 additions and 7 deletions

219
src/de.rs
View File

@@ -532,6 +532,215 @@ impl<'de, R: Read<'de>> Deserializer<R> {
None => Err(self.peek_error(ErrorCode::EofWhileParsingObject)),
}
}
fn ignore_value(&mut self) -> Result<()> {
let peek = match try!(self.parse_whitespace()) {
Some(b) => b,
None => {
return Err(self.peek_error(ErrorCode::EofWhileParsingValue));
}
};
match peek {
b'n' => {
self.eat_char();
self.parse_ident(b"ull")
}
b't' => {
self.eat_char();
self.parse_ident(b"rue")
}
b'f' => {
self.eat_char();
self.parse_ident(b"alse")
}
b'-' => {
self.eat_char();
self.ignore_integer()
}
b'0'...b'9' => {
self.ignore_integer()
}
b'"' => {
self.eat_char();
self.read.ignore_str()
}
b'[' => {
self.remaining_depth -= 1;
if self.remaining_depth == 0 {
return Err(self.peek_error(ErrorCode::RecursionLimitExceeded));
}
self.eat_char();
let res = self.ignore_seq();
self.remaining_depth += 1;
res
}
b'{' => {
self.remaining_depth -= 1;
if self.remaining_depth == 0 {
return Err(self.peek_error(ErrorCode::RecursionLimitExceeded));
}
self.eat_char();
let res = self.ignore_map();
self.remaining_depth += 1;
res
}
_ => {
Err(self.peek_error(ErrorCode::ExpectedSomeValue))
}
}
}
fn ignore_integer(&mut self) -> Result<()> {
match try!(self.next_char_or_null()) {
b'0' => {
// There can be only one leading '0'.
if let b'0'...b'9' = try!(self.peek_or_null()) {
return Err(self.peek_error(ErrorCode::InvalidNumber));
}
}
b'1'...b'9' => {
while let b'0'...b'9' = try!(self.peek_or_null()) {
self.eat_char();
}
}
_ => {
return Err(self.error(ErrorCode::InvalidNumber));
}
}
match try!(self.peek_or_null()) {
b'.' => self.ignore_decimal(),
b'e' | b'E' => self.ignore_exponent(),
_ => Ok(()),
}
}
fn ignore_decimal(&mut self) -> Result<()> {
self.eat_char();
let mut at_least_one_digit = false;
while let b'0'...b'9' = try!(self.peek_or_null()) {
self.eat_char();
at_least_one_digit = true;
}
if !at_least_one_digit {
return Err(self.peek_error(ErrorCode::InvalidNumber));
}
match try!(self.peek_or_null()) {
b'e' | b'E' => self.ignore_exponent(),
_ => Ok(()),
}
}
fn ignore_exponent(&mut self) -> Result<()> {
self.eat_char();
match try!(self.peek_or_null()) {
b'+' | b'-' => self.eat_char(),
_ => {}
}
// Make sure a digit follows the exponent place.
match try!(self.next_char_or_null()) {
b'0'...b'9' => {}
_ => {
return Err(self.error(ErrorCode::InvalidNumber));
}
}
while let b'0'...b'9' = try!(self.peek_or_null()) {
self.eat_char();
}
Ok(())
}
fn ignore_seq(&mut self) -> Result<()> {
let mut first = true;
loop {
match try!(self.parse_whitespace()) {
Some(b']') => {
self.eat_char();
return Ok(());
}
Some(b',') if !first => {
self.eat_char();
}
Some(_) => {
if first {
first = false;
} else {
return Err(self.peek_error(ErrorCode::ExpectedListCommaOrEnd));
}
}
None => {
return Err(self.peek_error(ErrorCode::EofWhileParsingList));
}
}
try!(self.ignore_value());
}
}
fn ignore_map(&mut self) -> Result<()> {
let mut first = true;
loop {
let peek = match try!(self.parse_whitespace()) {
Some(b'}') => {
self.eat_char();
return Ok(());
}
Some(b',') if !first => {
self.eat_char();
try!(self.parse_whitespace())
}
Some(b) => {
if first {
first = false;
Some(b)
} else {
return Err(self.peek_error(ErrorCode::ExpectedObjectCommaOrEnd));
}
}
None => {
return Err(self.peek_error(ErrorCode::EofWhileParsingObject));
}
};
match peek {
Some(b'"') => {
self.eat_char();
try!(self.read.ignore_str());
}
Some(_) => {
return Err(self.peek_error(ErrorCode::KeyMustBeAString));
}
None => {
return Err(self.peek_error(ErrorCode::EofWhileParsingObject));
}
}
match try!(self.parse_whitespace()) {
Some(b':') => {
self.eat_char();
try!(self.ignore_value());
}
Some(_) => {
return Err(self.peek_error(ErrorCode::ExpectedColon));
}
None => {
return Err(self.peek_error(ErrorCode::EofWhileParsingObject));
}
}
}
}
}
#[cfg_attr(rustfmt, rustfmt_skip)]
@@ -750,9 +959,17 @@ impl<'de, 'a, R: Read<'de>> de::Deserializer<'de> for &'a mut Deserializer<R> {
self.deserialize_bytes(visitor)
}
fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value>
where
V: de::Visitor<'de>,
{
try!(self.ignore_value());
visitor.visit_unit()
}
forward_to_deserialize_any! {
bool i8 i16 i32 i64 u8 u16 u32 u64 f32 f64 char str string unit
unit_struct seq tuple tuple_struct map struct identifier ignored_any
unit_struct seq tuple tuple_struct map struct identifier
}
}

View File

@@ -71,6 +71,11 @@ pub trait Read<'de>: private::Sealed {
&'s mut self,
scratch: &'s mut Vec<u8>,
) -> Result<Reference<'de, 's, [u8]>>;
/// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
/// string until the next quotation mark but discards the data.
#[doc(hidden)]
fn ignore_str(&mut self) -> Result<()>;
}
pub struct Position {
@@ -257,6 +262,26 @@ where
self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
.map(Reference::Copied)
}
fn ignore_str(&mut self) -> Result<()> {
loop {
let ch = try!(next_or_eof(self));
if !ESCAPE[ch as usize] {
continue;
}
match ch {
b'"' => {
return Ok(());
}
b'\\' => {
try!(ignore_escape(self));
}
_ => {
return error(self, ErrorCode::InvalidUnicodeCodePoint);
}
}
}
}
}
//////////////////////////////////////////////////////////////////////////////
@@ -402,6 +427,30 @@ impl<'a> Read<'a> for SliceRead<'a> {
) -> Result<Reference<'a, 's, [u8]>> {
self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
}
fn ignore_str(&mut self) -> Result<()> {
loop {
while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
self.index += 1;
}
if self.index == self.slice.len() {
return error(self, ErrorCode::EofWhileParsingString);
}
match self.slice[self.index] {
b'"' => {
self.index += 1;
return Ok(());
}
b'\\' => {
self.index += 1;
try!(ignore_escape(self));
}
_ => {
return error(self, ErrorCode::InvalidUnicodeCodePoint);
}
}
}
}
}
//////////////////////////////////////////////////////////////////////////////
@@ -460,6 +509,10 @@ impl<'a> Read<'a> for StrRead<'a> {
) -> Result<Reference<'a, 's, [u8]>> {
self.delegate.parse_str_raw(scratch)
}
fn ignore_str(&mut self) -> Result<()> {
self.delegate.ignore_str()
}
}
//////////////////////////////////////////////////////////////////////////////
@@ -492,14 +545,14 @@ static ESCAPE: [bool; 256] = [
O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // F
];
fn next_or_eof<'de, R: Read<'de>>(read: &mut R) -> Result<u8> {
fn next_or_eof<'de, R: ?Sized + Read<'de>>(read: &mut R) -> Result<u8> {
match try!(read.next().map_err(Error::io)) {
Some(b) => Ok(b),
None => error(read, ErrorCode::EofWhileParsingString),
}
}
fn error<'de, R: Read<'de>, T>(read: &R, reason: ErrorCode) -> Result<T> {
fn error<'de, R: ?Sized + Read<'de>, T>(read: &R, reason: ErrorCode) -> Result<T> {
let pos = read.position();
Err(Error::syntax(reason, pos.line, pos.column))
}
@@ -546,7 +599,7 @@ fn parse_escape<'de, R: Read<'de>>(read: &mut R, scratch: &mut Vec<u8>) -> Resul
let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
match char::from_u32(n as u32) {
match char::from_u32(n) {
Some(c) => c,
None => {
return error(read, ErrorCode::InvalidUnicodeCodePoint);
@@ -578,7 +631,54 @@ fn parse_escape<'de, R: Read<'de>>(read: &mut R, scratch: &mut Vec<u8>) -> Resul
Ok(())
}
fn decode_hex_escape<'de, R: Read<'de>>(read: &mut R) -> Result<u16> {
/// Parses a JSON escape sequence and discards the value. Assumes the previous
/// byte read was a backslash.
fn ignore_escape<'de, R: ?Sized + Read<'de>>(read: &mut R) -> Result<()> {
let ch = try!(next_or_eof(read));
match ch {
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
b'u' => {
let n = match try!(decode_hex_escape(read)) {
0xDC00...0xDFFF => {
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
}
// Non-BMP characters are encoded as a sequence of
// two hex escapes, representing UTF-16 surrogates.
n1 @ 0xD800...0xDBFF => {
if try!(next_or_eof(read)) != b'\\' {
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
}
if try!(next_or_eof(read)) != b'u' {
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
}
let n2 = try!(decode_hex_escape(read));
if n2 < 0xDC00 || n2 > 0xDFFF {
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
}
(((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000
}
n => n as u32,
};
if char::from_u32(n).is_none() {
return error(read, ErrorCode::InvalidUnicodeCodePoint);
}
}
_ => {
return error(read, ErrorCode::InvalidEscape);
}
}
Ok(())
}
fn decode_hex_escape<'de, R: ?Sized + Read<'de>>(read: &mut R) -> Result<u16> {
let mut n = 0;
for _ in 0..4 {
n = match try!(next_or_eof(read)) {

View File

@@ -34,7 +34,7 @@ use std::iter;
use std::marker::PhantomData;
use std::{u8, u16, u32, u64};
use serde::de::{self, Deserialize};
use serde::de::{self, Deserialize, IgnoredAny};
use serde::ser::{self, Serialize, Serializer};
use serde_bytes::{ByteBuf, Bytes};
@@ -594,6 +594,12 @@ where
// Make sure we can round trip back to `Value`.
let json_value2: Value = from_value(json_value.clone()).unwrap();
assert_eq!(json_value2, json_value);
// Make sure we can fully ignore.
let twoline = s.to_owned() + "\n3735928559";
let mut de = Deserializer::from_str(&twoline);
IgnoredAny::deserialize(&mut de).unwrap();
assert_eq!(0xDEAD_BEEF, u64::deserialize(&mut de).unwrap());
}
}
@@ -1844,4 +1850,4 @@ fn test_borrow() {
fn null_invalid_type() {
let err = serde_json::from_str::<String>("null").unwrap_err();
assert_eq!(format!("{}", err), String::from("invalid type: null, expected a string at line 1 column 4"));
}
}