mirror of
https://github.com/rust-lang/rust.git
synced 2025-10-27 02:53:43 +00:00
1032 lines
41 KiB
Rust
1032 lines
41 KiB
Rust
//! # Token Streams
|
|
//!
|
|
//! `TokenStream`s represent syntactic objects before they are converted into ASTs.
|
|
//! A `TokenStream` is, roughly speaking, a sequence of [`TokenTree`]s,
|
|
//! which are themselves a single [`Token`] or a `Delimited` subsequence of tokens.
|
|
//!
|
|
//! ## Ownership
|
|
//!
|
|
//! `TokenStream`s are persistent data structures constructed as ropes with reference
|
|
//! counted-children. In general, this means that calling an operation on a `TokenStream`
|
|
//! (such as `slice`) produces an entirely new `TokenStream` from the borrowed reference to
|
|
//! the original. This essentially coerces `TokenStream`s into "views" of their subparts,
|
|
//! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
|
|
//! ownership of the original.
|
|
|
|
use std::borrow::Cow;
|
|
use std::ops::Range;
|
|
use std::sync::Arc;
|
|
use std::{cmp, fmt, iter, mem};
|
|
|
|
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
|
|
use rustc_data_structures::sync;
|
|
use rustc_macros::{Decodable, Encodable, HashStable_Generic, Walkable};
|
|
use rustc_serialize::{Decodable, Encodable};
|
|
use rustc_span::{DUMMY_SP, Span, SpanDecoder, SpanEncoder, Symbol, sym};
|
|
use thin_vec::ThinVec;
|
|
|
|
use crate::ast::AttrStyle;
|
|
use crate::ast_traits::{HasAttrs, HasTokens};
|
|
use crate::token::{self, Delimiter, Token, TokenKind};
|
|
use crate::{AttrVec, Attribute};
|
|
|
|
/// Part of a `TokenStream`.
|
|
#[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
|
|
pub enum TokenTree {
|
|
/// A single token. Should never be `OpenDelim` or `CloseDelim`, because
|
|
/// delimiters are implicitly represented by `Delimited`.
|
|
Token(Token, Spacing),
|
|
/// A delimited sequence of token trees.
|
|
Delimited(DelimSpan, DelimSpacing, Delimiter, TokenStream),
|
|
}
|
|
|
|
// Ensure all fields of `TokenTree` are `DynSend` and `DynSync`.
|
|
fn _dummy()
|
|
where
|
|
Token: sync::DynSend + sync::DynSync,
|
|
Spacing: sync::DynSend + sync::DynSync,
|
|
DelimSpan: sync::DynSend + sync::DynSync,
|
|
Delimiter: sync::DynSend + sync::DynSync,
|
|
TokenStream: sync::DynSend + sync::DynSync,
|
|
{
|
|
}
|
|
|
|
impl TokenTree {
|
|
/// Checks if this `TokenTree` is equal to the other, regardless of span/spacing information.
|
|
pub fn eq_unspanned(&self, other: &TokenTree) -> bool {
|
|
match (self, other) {
|
|
(TokenTree::Token(token, _), TokenTree::Token(token2, _)) => token.kind == token2.kind,
|
|
(TokenTree::Delimited(.., delim, tts), TokenTree::Delimited(.., delim2, tts2)) => {
|
|
delim == delim2
|
|
&& tts.len() == tts2.len()
|
|
&& tts.iter().zip(tts2.iter()).all(|(a, b)| a.eq_unspanned(b))
|
|
}
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
/// Retrieves the `TokenTree`'s span.
|
|
pub fn span(&self) -> Span {
|
|
match self {
|
|
TokenTree::Token(token, _) => token.span,
|
|
TokenTree::Delimited(sp, ..) => sp.entire(),
|
|
}
|
|
}
|
|
|
|
/// Create a `TokenTree::Token` with alone spacing.
|
|
pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree {
|
|
TokenTree::Token(Token::new(kind, span), Spacing::Alone)
|
|
}
|
|
|
|
/// Create a `TokenTree::Token` with joint spacing.
|
|
pub fn token_joint(kind: TokenKind, span: Span) -> TokenTree {
|
|
TokenTree::Token(Token::new(kind, span), Spacing::Joint)
|
|
}
|
|
|
|
/// Create a `TokenTree::Token` with joint-hidden spacing.
|
|
pub fn token_joint_hidden(kind: TokenKind, span: Span) -> TokenTree {
|
|
TokenTree::Token(Token::new(kind, span), Spacing::JointHidden)
|
|
}
|
|
|
|
pub fn uninterpolate(&self) -> Cow<'_, TokenTree> {
|
|
match self {
|
|
TokenTree::Token(token, spacing) => match token.uninterpolate() {
|
|
Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *spacing)),
|
|
Cow::Borrowed(_) => Cow::Borrowed(self),
|
|
},
|
|
_ => Cow::Borrowed(self),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<CTX> HashStable<CTX> for TokenStream
|
|
where
|
|
CTX: crate::HashStableContext,
|
|
{
|
|
fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
|
|
for sub_tt in self.iter() {
|
|
sub_tt.hash_stable(hcx, hasher);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A lazy version of [`AttrTokenStream`], which defers creation of an actual
|
|
/// `AttrTokenStream` until it is needed.
|
|
#[derive(Clone)]
|
|
pub struct LazyAttrTokenStream(Arc<LazyAttrTokenStreamInner>);
|
|
|
|
impl LazyAttrTokenStream {
|
|
pub fn new_direct(stream: AttrTokenStream) -> LazyAttrTokenStream {
|
|
LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Direct(stream)))
|
|
}
|
|
|
|
pub fn new_pending(
|
|
start_token: (Token, Spacing),
|
|
cursor_snapshot: TokenCursor,
|
|
num_calls: u32,
|
|
break_last_token: u32,
|
|
node_replacements: ThinVec<NodeReplacement>,
|
|
) -> LazyAttrTokenStream {
|
|
LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Pending {
|
|
start_token,
|
|
cursor_snapshot,
|
|
num_calls,
|
|
break_last_token,
|
|
node_replacements,
|
|
}))
|
|
}
|
|
|
|
pub fn to_attr_token_stream(&self) -> AttrTokenStream {
|
|
self.0.to_attr_token_stream()
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for LazyAttrTokenStream {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "LazyAttrTokenStream({:?})", self.to_attr_token_stream())
|
|
}
|
|
}
|
|
|
|
impl<S: SpanEncoder> Encodable<S> for LazyAttrTokenStream {
|
|
fn encode(&self, _s: &mut S) {
|
|
panic!("Attempted to encode LazyAttrTokenStream");
|
|
}
|
|
}
|
|
|
|
impl<D: SpanDecoder> Decodable<D> for LazyAttrTokenStream {
|
|
fn decode(_d: &mut D) -> Self {
|
|
panic!("Attempted to decode LazyAttrTokenStream");
|
|
}
|
|
}
|
|
|
|
impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
|
|
fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
|
|
panic!("Attempted to compute stable hash for LazyAttrTokenStream");
|
|
}
|
|
}
|
|
|
|
/// A token range within a `Parser`'s full token stream.
|
|
#[derive(Clone, Debug)]
|
|
pub struct ParserRange(pub Range<u32>);
|
|
|
|
/// A token range within an individual AST node's (lazy) token stream, i.e.
|
|
/// relative to that node's first token. Distinct from `ParserRange` so the two
|
|
/// kinds of range can't be mixed up.
|
|
#[derive(Clone, Debug)]
|
|
pub struct NodeRange(pub Range<u32>);
|
|
|
|
/// Indicates a range of tokens that should be replaced by an `AttrsTarget`
|
|
/// (replacement) or be replaced by nothing (deletion). This is used in two
|
|
/// places during token collection.
|
|
///
|
|
/// 1. Replacement. During the parsing of an AST node that may have a
|
|
/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]`
|
|
/// or `#[cfg_attr]`, we replace the entire inner AST node with
|
|
/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an
|
|
/// `AttrTokenStream`.
|
|
///
|
|
/// 2. Deletion. We delete inner attributes from all collected token streams,
|
|
/// and instead track them through the `attrs` field on the AST node. This
|
|
/// lets us manipulate them similarly to outer attributes. When we create a
|
|
/// `TokenStream`, the inner attributes are inserted into the proper place
|
|
/// in the token stream.
|
|
///
|
|
/// Each replacement starts off in `ParserReplacement` form but is converted to
|
|
/// `NodeReplacement` form when it is attached to a single AST node, via
|
|
/// `LazyAttrTokenStreamImpl`.
|
|
pub type ParserReplacement = (ParserRange, Option<AttrsTarget>);
|
|
|
|
/// See the comment on `ParserReplacement`.
|
|
pub type NodeReplacement = (NodeRange, Option<AttrsTarget>);
|
|
|
|
impl NodeRange {
|
|
// Converts a range within a parser's tokens to a range within a
|
|
// node's tokens beginning at `start_pos`.
|
|
//
|
|
// For example, imagine a parser with 50 tokens in its token stream, a
|
|
// function that spans `ParserRange(20..40)` and an inner attribute within
|
|
// that function that spans `ParserRange(30..35)`. We would find the inner
|
|
// attribute's range within the function's tokens by subtracting 20, which
|
|
// is the position of the function's start token. This gives
|
|
// `NodeRange(10..15)`.
|
|
pub fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
|
|
assert!(!parser_range.is_empty());
|
|
assert!(parser_range.start >= start_pos);
|
|
NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
|
|
}
|
|
}
|
|
|
|
enum LazyAttrTokenStreamInner {
|
|
// The token stream has already been produced.
|
|
Direct(AttrTokenStream),
|
|
|
|
// From a value of this type we can reconstruct the `TokenStream` seen by
|
|
// the `f` callback passed to a call to `Parser::collect_tokens`, by
|
|
// replaying the getting of the tokens. This saves us producing a
|
|
// `TokenStream` if it is never needed, e.g. a captured `macro_rules!`
|
|
// argument that is never passed to a proc macro. In practice, token stream
|
|
// creation happens rarely compared to calls to `collect_tokens` (see some
|
|
// statistics in #78736) so we are doing as little up-front work as
|
|
// possible.
|
|
//
|
|
// This also makes `Parser` very cheap to clone, since there is no
|
|
// intermediate collection buffer to clone.
|
|
Pending {
|
|
start_token: (Token, Spacing),
|
|
cursor_snapshot: TokenCursor,
|
|
num_calls: u32,
|
|
break_last_token: u32,
|
|
node_replacements: ThinVec<NodeReplacement>,
|
|
},
|
|
}
|
|
|
|
impl LazyAttrTokenStreamInner {
|
|
fn to_attr_token_stream(&self) -> AttrTokenStream {
|
|
match self {
|
|
LazyAttrTokenStreamInner::Direct(stream) => stream.clone(),
|
|
LazyAttrTokenStreamInner::Pending {
|
|
start_token,
|
|
cursor_snapshot,
|
|
num_calls,
|
|
break_last_token,
|
|
node_replacements,
|
|
} => {
|
|
// The token produced by the final call to `{,inlined_}next` was not
|
|
// actually consumed by the callback. The combination of chaining the
|
|
// initial token and using `take` produces the desired result - we
|
|
// produce an empty `TokenStream` if no calls were made, and omit the
|
|
// final token otherwise.
|
|
let mut cursor_snapshot = cursor_snapshot.clone();
|
|
let tokens = iter::once(FlatToken::Token(*start_token))
|
|
.chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
|
|
.take(*num_calls as usize);
|
|
|
|
if node_replacements.is_empty() {
|
|
make_attr_token_stream(tokens, *break_last_token)
|
|
} else {
|
|
let mut tokens: Vec<_> = tokens.collect();
|
|
let mut node_replacements = node_replacements.to_vec();
|
|
node_replacements.sort_by_key(|(range, _)| range.0.start);
|
|
|
|
#[cfg(debug_assertions)]
|
|
for [(node_range, tokens), (next_node_range, next_tokens)] in
|
|
node_replacements.array_windows()
|
|
{
|
|
assert!(
|
|
node_range.0.end <= next_node_range.0.start
|
|
|| node_range.0.end >= next_node_range.0.end,
|
|
"Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
|
|
node_range,
|
|
tokens,
|
|
next_node_range,
|
|
next_tokens,
|
|
);
|
|
}
|
|
|
|
// Process the replace ranges, starting from the highest start
|
|
// position and working our way back. If have tokens like:
|
|
//
|
|
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
|
|
//
|
|
// Then we will generate replace ranges for both
|
|
// the `#[cfg(FALSE)] field: bool` and the entire
|
|
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
|
|
//
|
|
// By starting processing from the replace range with the greatest
|
|
// start position, we ensure that any (outer) replace range which
|
|
// encloses another (inner) replace range will fully overwrite the
|
|
// inner range's replacement.
|
|
for (node_range, target) in node_replacements.into_iter().rev() {
|
|
assert!(
|
|
!node_range.0.is_empty(),
|
|
"Cannot replace an empty node range: {:?}",
|
|
node_range.0
|
|
);
|
|
|
|
// Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s,
|
|
// plus enough `FlatToken::Empty`s to fill up the rest of the range. This
|
|
// keeps the total length of `tokens` constant throughout the replacement
|
|
// process, allowing us to do all replacements without adjusting indices.
|
|
let target_len = target.is_some() as usize;
|
|
tokens.splice(
|
|
(node_range.0.start as usize)..(node_range.0.end as usize),
|
|
target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
|
|
iter::repeat(FlatToken::Empty)
|
|
.take(node_range.0.len() - target_len),
|
|
),
|
|
);
|
|
}
|
|
make_attr_token_stream(tokens.into_iter(), *break_last_token)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A helper struct used when building an `AttrTokenStream` from
|
|
/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens
|
|
/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
|
|
/// is then 'parsed' to build up an `AttrTokenStream` with nested
|
|
/// `AttrTokenTree::Delimited` tokens.
|
|
#[derive(Debug, Clone)]
|
|
enum FlatToken {
|
|
/// A token - this holds both delimiter (e.g. '{' and '}')
|
|
/// and non-delimiter tokens
|
|
Token((Token, Spacing)),
|
|
/// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
|
|
/// directly into the constructed `AttrTokenStream` as an
|
|
/// `AttrTokenTree::AttrsTarget`.
|
|
AttrsTarget(AttrsTarget),
|
|
/// A special 'empty' token that is ignored during the conversion
|
|
/// to an `AttrTokenStream`. This is used to simplify the
|
|
/// handling of replace ranges.
|
|
Empty,
|
|
}
|
|
|
|
/// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
|
|
/// information about the tokens for attribute targets. This is used
|
|
/// during expansion to perform early cfg-expansion, and to process attributes
|
|
/// during proc-macro invocations.
|
|
#[derive(Clone, Debug, Default, Encodable, Decodable)]
|
|
pub struct AttrTokenStream(pub Arc<Vec<AttrTokenTree>>);
|
|
|
|
/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
|
|
/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
|
|
/// close delims.
|
|
fn make_attr_token_stream(
|
|
iter: impl Iterator<Item = FlatToken>,
|
|
break_last_token: u32,
|
|
) -> AttrTokenStream {
|
|
#[derive(Debug)]
|
|
struct FrameData {
|
|
// This is `None` for the first frame, `Some` for all others.
|
|
open_delim_sp: Option<(Delimiter, Span, Spacing)>,
|
|
inner: Vec<AttrTokenTree>,
|
|
}
|
|
// The stack always has at least one element. Storing it separately makes for shorter code.
|
|
let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] };
|
|
let mut stack_rest = vec![];
|
|
for flat_token in iter {
|
|
match flat_token {
|
|
FlatToken::Token((token @ Token { kind, span }, spacing)) => {
|
|
if let Some(delim) = kind.open_delim() {
|
|
stack_rest.push(mem::replace(
|
|
&mut stack_top,
|
|
FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] },
|
|
));
|
|
} else if let Some(delim) = kind.close_delim() {
|
|
let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap());
|
|
let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
|
|
assert!(
|
|
open_delim.eq_ignoring_invisible_origin(&delim),
|
|
"Mismatched open/close delims: open={open_delim:?} close={span:?}"
|
|
);
|
|
let dspan = DelimSpan::from_pair(open_sp, span);
|
|
let dspacing = DelimSpacing::new(open_spacing, spacing);
|
|
let stream = AttrTokenStream::new(frame_data.inner);
|
|
let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
|
|
stack_top.inner.push(delimited);
|
|
} else {
|
|
stack_top.inner.push(AttrTokenTree::Token(token, spacing))
|
|
}
|
|
}
|
|
FlatToken::AttrsTarget(target) => {
|
|
stack_top.inner.push(AttrTokenTree::AttrsTarget(target))
|
|
}
|
|
FlatToken::Empty => {}
|
|
}
|
|
}
|
|
|
|
if break_last_token > 0 {
|
|
let last_token = stack_top.inner.pop().unwrap();
|
|
if let AttrTokenTree::Token(last_token, spacing) = last_token {
|
|
let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap();
|
|
|
|
// Tokens are always ASCII chars, so we can use byte arithmetic here.
|
|
let mut first_span = last_token.span.shrink_to_lo();
|
|
first_span =
|
|
first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token));
|
|
|
|
stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing));
|
|
} else {
|
|
panic!("Unexpected last token {last_token:?}")
|
|
}
|
|
}
|
|
AttrTokenStream::new(stack_top.inner)
|
|
}
|
|
|
|
/// Like `TokenTree`, but for `AttrTokenStream`.
|
|
#[derive(Clone, Debug, Encodable, Decodable)]
|
|
pub enum AttrTokenTree {
|
|
Token(Token, Spacing),
|
|
Delimited(DelimSpan, DelimSpacing, Delimiter, AttrTokenStream),
|
|
/// Stores the attributes for an attribute target,
|
|
/// along with the tokens for that attribute target.
|
|
/// See `AttrsTarget` for more information
|
|
AttrsTarget(AttrsTarget),
|
|
}
|
|
|
|
impl AttrTokenStream {
|
|
pub fn new(tokens: Vec<AttrTokenTree>) -> AttrTokenStream {
|
|
AttrTokenStream(Arc::new(tokens))
|
|
}
|
|
|
|
/// Converts this `AttrTokenStream` to a plain `Vec<TokenTree>`. During
|
|
/// conversion, any `AttrTokenTree::AttrsTarget` gets "flattened" back to a
|
|
/// `TokenStream`, as described in the comment on
|
|
/// `attrs_and_tokens_to_token_trees`.
|
|
pub fn to_token_trees(&self) -> Vec<TokenTree> {
|
|
let mut res = Vec::with_capacity(self.0.len());
|
|
for tree in self.0.iter() {
|
|
match tree {
|
|
AttrTokenTree::Token(inner, spacing) => {
|
|
res.push(TokenTree::Token(inner.clone(), *spacing));
|
|
}
|
|
AttrTokenTree::Delimited(span, spacing, delim, stream) => {
|
|
res.push(TokenTree::Delimited(
|
|
*span,
|
|
*spacing,
|
|
*delim,
|
|
TokenStream::new(stream.to_token_trees()),
|
|
))
|
|
}
|
|
AttrTokenTree::AttrsTarget(target) => {
|
|
attrs_and_tokens_to_token_trees(&target.attrs, &target.tokens, &mut res);
|
|
}
|
|
}
|
|
}
|
|
res
|
|
}
|
|
}
|
|
|
|
// Converts multiple attributes and the tokens for a target AST node into token trees, and appends
|
|
// them to `res`.
|
|
//
|
|
// Example: if the AST node is "fn f() { blah(); }", then:
|
|
// - Simple if no attributes are present, e.g. "fn f() { blah(); }"
|
|
// - Simple if only outer attribute are present, e.g. "#[outer1] #[outer2] fn f() { blah(); }"
|
|
// - Trickier if inner attributes are present, because they must be moved within the AST node's
|
|
// tokens, e.g. "#[outer] fn f() { #![inner] blah() }"
|
|
fn attrs_and_tokens_to_token_trees(
|
|
attrs: &[Attribute],
|
|
target_tokens: &LazyAttrTokenStream,
|
|
res: &mut Vec<TokenTree>,
|
|
) {
|
|
let idx = attrs.partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer));
|
|
let (outer_attrs, inner_attrs) = attrs.split_at(idx);
|
|
|
|
// Add outer attribute tokens.
|
|
for attr in outer_attrs {
|
|
res.extend(attr.token_trees());
|
|
}
|
|
|
|
// Add target AST node tokens.
|
|
res.extend(target_tokens.to_attr_token_stream().to_token_trees());
|
|
|
|
// Insert inner attribute tokens.
|
|
if !inner_attrs.is_empty() {
|
|
let found = insert_inner_attrs(inner_attrs, res);
|
|
assert!(found, "Failed to find trailing delimited group in: {res:?}");
|
|
}
|
|
|
|
// Inner attributes are only supported on blocks, functions, impls, and
|
|
// modules. All of these have their inner attributes placed at the
|
|
// beginning of the rightmost outermost braced group:
|
|
// e.g. `fn foo() { #![my_attr] }`. (Note: the braces may be within
|
|
// invisible delimiters.)
|
|
//
|
|
// Therefore, we can insert them back into the right location without
|
|
// needing to do any extra position tracking.
|
|
//
|
|
// Note: Outline modules are an exception - they can have attributes like
|
|
// `#![my_attr]` at the start of a file. Support for custom attributes in
|
|
// this position is not properly implemented - we always synthesize fake
|
|
// tokens, so we never reach this code.
|
|
fn insert_inner_attrs(inner_attrs: &[Attribute], tts: &mut Vec<TokenTree>) -> bool {
|
|
for tree in tts.iter_mut().rev() {
|
|
if let TokenTree::Delimited(span, spacing, Delimiter::Brace, stream) = tree {
|
|
// Found it: the rightmost, outermost braced group.
|
|
let mut tts = vec![];
|
|
for inner_attr in inner_attrs {
|
|
tts.extend(inner_attr.token_trees());
|
|
}
|
|
tts.extend(stream.0.iter().cloned());
|
|
let stream = TokenStream::new(tts);
|
|
*tree = TokenTree::Delimited(*span, *spacing, Delimiter::Brace, stream);
|
|
return true;
|
|
} else if let TokenTree::Delimited(span, spacing, Delimiter::Invisible(src), stream) =
|
|
tree
|
|
{
|
|
// Recurse inside invisible delimiters.
|
|
let mut vec: Vec<_> = stream.iter().cloned().collect();
|
|
if insert_inner_attrs(inner_attrs, &mut vec) {
|
|
*tree = TokenTree::Delimited(
|
|
*span,
|
|
*spacing,
|
|
Delimiter::Invisible(*src),
|
|
TokenStream::new(vec),
|
|
);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
false
|
|
}
|
|
}
|
|
|
|
/// Stores the tokens for an attribute target, along
|
|
/// with its attributes.
|
|
///
|
|
/// This is constructed during parsing when we need to capture
|
|
/// tokens, for `cfg` and `cfg_attr` attributes.
|
|
///
|
|
/// For example, `#[cfg(FALSE)] struct Foo {}` would
|
|
/// have an `attrs` field containing the `#[cfg(FALSE)]` attr,
|
|
/// and a `tokens` field storing the (unparsed) tokens `struct Foo {}`
|
|
///
|
|
/// The `cfg`/`cfg_attr` processing occurs in
|
|
/// `StripUnconfigured::configure_tokens`.
|
|
#[derive(Clone, Debug, Encodable, Decodable)]
|
|
pub struct AttrsTarget {
|
|
/// Attributes, both outer and inner.
|
|
/// These are stored in the original order that they were parsed in.
|
|
pub attrs: AttrVec,
|
|
/// The underlying tokens for the attribute target that `attrs`
|
|
/// are applied to
|
|
pub tokens: LazyAttrTokenStream,
|
|
}
|
|
|
|
/// A `TokenStream` is an abstract sequence of tokens, organized into [`TokenTree`]s.
|
|
#[derive(Clone, Debug, Default, Encodable, Decodable)]
|
|
pub struct TokenStream(pub(crate) Arc<Vec<TokenTree>>);
|
|
|
|
/// Indicates whether a token can join with the following token to form a
|
|
/// compound token. Used for conversions to `proc_macro::Spacing`. Also used to
|
|
/// guide pretty-printing, which is where the `JointHidden` value (which isn't
|
|
/// part of `proc_macro::Spacing`) comes in useful.
|
|
#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
|
|
pub enum Spacing {
|
|
/// The token cannot join with the following token to form a compound
|
|
/// token.
|
|
///
|
|
/// In token streams parsed from source code, the compiler will use `Alone`
|
|
/// for any token immediately followed by whitespace, a non-doc comment, or
|
|
/// EOF.
|
|
///
|
|
/// When constructing token streams within the compiler, use this for each
|
|
/// token that (a) should be pretty-printed with a space after it, or (b)
|
|
/// is the last token in the stream. (In the latter case the choice of
|
|
/// spacing doesn't matter because it is never used for the last token. We
|
|
/// arbitrarily use `Alone`.)
|
|
///
|
|
/// Converts to `proc_macro::Spacing::Alone`, and
|
|
/// `proc_macro::Spacing::Alone` converts back to this.
|
|
Alone,
|
|
|
|
/// The token can join with the following token to form a compound token.
|
|
///
|
|
/// In token streams parsed from source code, the compiler will use `Joint`
|
|
/// for any token immediately followed by punctuation (as determined by
|
|
/// `Token::is_punct`).
|
|
///
|
|
/// When constructing token streams within the compiler, use this for each
|
|
/// token that (a) should be pretty-printed without a space after it, and
|
|
/// (b) is followed by a punctuation token.
|
|
///
|
|
/// Converts to `proc_macro::Spacing::Joint`, and
|
|
/// `proc_macro::Spacing::Joint` converts back to this.
|
|
Joint,
|
|
|
|
/// The token can join with the following token to form a compound token,
|
|
/// but this will not be visible at the proc macro level. (This is what the
|
|
/// `Hidden` means; see below.)
|
|
///
|
|
/// In token streams parsed from source code, the compiler will use
|
|
/// `JointHidden` for any token immediately followed by anything not
|
|
/// covered by the `Alone` and `Joint` cases: an identifier, lifetime,
|
|
/// literal, delimiter, doc comment.
|
|
///
|
|
/// When constructing token streams, use this for each token that (a)
|
|
/// should be pretty-printed without a space after it, and (b) is followed
|
|
/// by a non-punctuation token.
|
|
///
|
|
/// Converts to `proc_macro::Spacing::Alone`, but
|
|
/// `proc_macro::Spacing::Alone` converts back to `token::Spacing::Alone`.
|
|
/// Because of that, pretty-printing of `TokenStream`s produced by proc
|
|
/// macros is unavoidably uglier (with more whitespace between tokens) than
|
|
/// pretty-printing of `TokenStream`'s produced by other means (i.e. parsed
|
|
/// source code, internally constructed token streams, and token streams
|
|
/// produced by declarative macros).
|
|
JointHidden,
|
|
}
|
|
|
|
impl TokenStream {
|
|
/// Given a `TokenStream` with a `Stream` of only two arguments, return a new `TokenStream`
|
|
/// separating the two arguments with a comma for diagnostic suggestions.
|
|
pub fn add_comma(&self) -> Option<(TokenStream, Span)> {
|
|
// Used to suggest if a user writes `foo!(a b);`
|
|
let mut suggestion = None;
|
|
let mut iter = self.0.iter().enumerate().peekable();
|
|
while let Some((pos, ts)) = iter.next() {
|
|
if let Some((_, next)) = iter.peek() {
|
|
let sp = match (&ts, &next) {
|
|
(_, TokenTree::Token(Token { kind: token::Comma, .. }, _)) => continue,
|
|
(
|
|
TokenTree::Token(token_left, Spacing::Alone),
|
|
TokenTree::Token(token_right, _),
|
|
) if (token_left.is_non_reserved_ident() || token_left.is_lit())
|
|
&& (token_right.is_non_reserved_ident() || token_right.is_lit()) =>
|
|
{
|
|
token_left.span
|
|
}
|
|
(TokenTree::Delimited(sp, ..), _) => sp.entire(),
|
|
_ => continue,
|
|
};
|
|
let sp = sp.shrink_to_hi();
|
|
let comma = TokenTree::token_alone(token::Comma, sp);
|
|
suggestion = Some((pos, comma, sp));
|
|
}
|
|
}
|
|
if let Some((pos, comma, sp)) = suggestion {
|
|
let mut new_stream = Vec::with_capacity(self.0.len() + 1);
|
|
let parts = self.0.split_at(pos + 1);
|
|
new_stream.extend_from_slice(parts.0);
|
|
new_stream.push(comma);
|
|
new_stream.extend_from_slice(parts.1);
|
|
return Some((TokenStream::new(new_stream), sp));
|
|
}
|
|
None
|
|
}
|
|
}
|
|
|
|
impl FromIterator<TokenTree> for TokenStream {
|
|
fn from_iter<I: IntoIterator<Item = TokenTree>>(iter: I) -> Self {
|
|
TokenStream::new(iter.into_iter().collect::<Vec<TokenTree>>())
|
|
}
|
|
}
|
|
|
|
impl Eq for TokenStream {}
|
|
|
|
impl PartialEq<TokenStream> for TokenStream {
|
|
fn eq(&self, other: &TokenStream) -> bool {
|
|
self.iter().eq(other.iter())
|
|
}
|
|
}
|
|
|
|
impl TokenStream {
|
|
pub fn new(tts: Vec<TokenTree>) -> TokenStream {
|
|
TokenStream(Arc::new(tts))
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.0.is_empty()
|
|
}
|
|
|
|
pub fn len(&self) -> usize {
|
|
self.0.len()
|
|
}
|
|
|
|
pub fn get(&self, index: usize) -> Option<&TokenTree> {
|
|
self.0.get(index)
|
|
}
|
|
|
|
pub fn iter(&self) -> TokenStreamIter<'_> {
|
|
TokenStreamIter::new(self)
|
|
}
|
|
|
|
/// Create a token stream containing a single token with alone spacing. The
|
|
/// spacing used for the final token in a constructed stream doesn't matter
|
|
/// because it's never used. In practice we arbitrarily use
|
|
/// `Spacing::Alone`.
|
|
pub fn token_alone(kind: TokenKind, span: Span) -> TokenStream {
|
|
TokenStream::new(vec![TokenTree::token_alone(kind, span)])
|
|
}
|
|
|
|
pub fn from_ast(node: &(impl HasAttrs + HasTokens + fmt::Debug)) -> TokenStream {
|
|
let tokens = node.tokens().unwrap_or_else(|| panic!("missing tokens for node: {:?}", node));
|
|
let mut tts = vec![];
|
|
attrs_and_tokens_to_token_trees(node.attrs(), tokens, &mut tts);
|
|
TokenStream::new(tts)
|
|
}
|
|
|
|
// If `vec` is not empty, try to glue `tt` onto its last token. The return
|
|
// value indicates if gluing took place.
|
|
fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
|
|
if let Some(TokenTree::Token(last_tok, Spacing::Joint | Spacing::JointHidden)) = vec.last()
|
|
&& let TokenTree::Token(tok, spacing) = tt
|
|
&& let Some(glued_tok) = last_tok.glue(tok)
|
|
{
|
|
// ...then overwrite the last token tree in `vec` with the
|
|
// glued token, and skip the first token tree from `stream`.
|
|
*vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
/// Push `tt` onto the end of the stream, possibly gluing it to the last
|
|
/// token. Uses `make_mut` to maximize efficiency.
|
|
pub fn push_tree(&mut self, tt: TokenTree) {
|
|
let vec_mut = Arc::make_mut(&mut self.0);
|
|
|
|
if Self::try_glue_to_last(vec_mut, &tt) {
|
|
// nothing else to do
|
|
} else {
|
|
vec_mut.push(tt);
|
|
}
|
|
}
|
|
|
|
/// Push `stream` onto the end of the stream, possibly gluing the first
|
|
/// token tree to the last token. (No other token trees will be glued.)
|
|
/// Uses `make_mut` to maximize efficiency.
|
|
pub fn push_stream(&mut self, stream: TokenStream) {
|
|
let vec_mut = Arc::make_mut(&mut self.0);
|
|
|
|
let stream_iter = stream.0.iter().cloned();
|
|
|
|
if let Some(first) = stream.0.first()
|
|
&& Self::try_glue_to_last(vec_mut, first)
|
|
{
|
|
// Now skip the first token tree from `stream`.
|
|
vec_mut.extend(stream_iter.skip(1));
|
|
} else {
|
|
// Append all of `stream`.
|
|
vec_mut.extend(stream_iter);
|
|
}
|
|
}
|
|
|
|
pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
|
|
self.0.chunks(chunk_size)
|
|
}
|
|
|
|
/// Desugar doc comments like `/// foo` in the stream into `#[doc =
|
|
/// r"foo"]`. Modifies the `TokenStream` via `Arc::make_mut`, but as little
|
|
/// as possible.
|
|
pub fn desugar_doc_comments(&mut self) {
|
|
if let Some(desugared_stream) = desugar_inner(self.clone()) {
|
|
*self = desugared_stream;
|
|
}
|
|
|
|
// The return value is `None` if nothing in `stream` changed.
|
|
fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
|
|
let mut i = 0;
|
|
let mut modified = false;
|
|
while let Some(tt) = stream.0.get(i) {
|
|
match tt {
|
|
&TokenTree::Token(
|
|
Token { kind: token::DocComment(_, attr_style, data), span },
|
|
_spacing,
|
|
) => {
|
|
let desugared = desugared_tts(attr_style, data, span);
|
|
let desugared_len = desugared.len();
|
|
Arc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
|
|
modified = true;
|
|
i += desugared_len;
|
|
}
|
|
|
|
&TokenTree::Token(..) => i += 1,
|
|
|
|
&TokenTree::Delimited(sp, spacing, delim, ref delim_stream) => {
|
|
if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
|
|
let new_tt =
|
|
TokenTree::Delimited(sp, spacing, delim, desugared_delim_stream);
|
|
Arc::make_mut(&mut stream.0)[i] = new_tt;
|
|
modified = true;
|
|
}
|
|
i += 1;
|
|
}
|
|
}
|
|
}
|
|
if modified { Some(stream) } else { None }
|
|
}
|
|
|
|
fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
|
|
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
|
|
// required to wrap the text. E.g.
|
|
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
|
|
// - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
|
|
// - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
|
|
let mut num_of_hashes = 0;
|
|
let mut count = 0;
|
|
for ch in data.as_str().chars() {
|
|
count = match ch {
|
|
'"' => 1,
|
|
'#' if count > 0 => count + 1,
|
|
_ => 0,
|
|
};
|
|
num_of_hashes = cmp::max(num_of_hashes, count);
|
|
}
|
|
|
|
// `/// foo` becomes `[doc = r"foo"]`.
|
|
let delim_span = DelimSpan::from_single(span);
|
|
let body = TokenTree::Delimited(
|
|
delim_span,
|
|
DelimSpacing::new(Spacing::JointHidden, Spacing::Alone),
|
|
Delimiter::Bracket,
|
|
[
|
|
TokenTree::token_alone(token::Ident(sym::doc, token::IdentIsRaw::No), span),
|
|
TokenTree::token_alone(token::Eq, span),
|
|
TokenTree::token_alone(
|
|
TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
|
|
span,
|
|
),
|
|
]
|
|
.into_iter()
|
|
.collect::<TokenStream>(),
|
|
);
|
|
|
|
if attr_style == AttrStyle::Inner {
|
|
vec![
|
|
TokenTree::token_joint(token::Pound, span),
|
|
TokenTree::token_joint_hidden(token::Bang, span),
|
|
body,
|
|
]
|
|
} else {
|
|
vec![TokenTree::token_joint_hidden(token::Pound, span), body]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct TokenStreamIter<'t> {
|
|
stream: &'t TokenStream,
|
|
index: usize,
|
|
}
|
|
|
|
impl<'t> TokenStreamIter<'t> {
|
|
fn new(stream: &'t TokenStream) -> Self {
|
|
TokenStreamIter { stream, index: 0 }
|
|
}
|
|
|
|
// Peeking could be done via `Peekable`, but most iterators need peeking,
|
|
// and this is simple and avoids the need to use `peekable` and `Peekable`
|
|
// at all the use sites.
|
|
pub fn peek(&self) -> Option<&'t TokenTree> {
|
|
self.stream.0.get(self.index)
|
|
}
|
|
}
|
|
|
|
impl<'t> Iterator for TokenStreamIter<'t> {
|
|
type Item = &'t TokenTree;
|
|
|
|
fn next(&mut self) -> Option<&'t TokenTree> {
|
|
self.stream.0.get(self.index).map(|tree| {
|
|
self.index += 1;
|
|
tree
|
|
})
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct TokenTreeCursor {
|
|
stream: TokenStream,
|
|
/// Points to the current token tree in the stream. In `TokenCursor::curr`,
|
|
/// this can be any token tree. In `TokenCursor::stack`, this is always a
|
|
/// `TokenTree::Delimited`.
|
|
index: usize,
|
|
}
|
|
|
|
impl TokenTreeCursor {
|
|
#[inline]
|
|
pub fn new(stream: TokenStream) -> Self {
|
|
TokenTreeCursor { stream, index: 0 }
|
|
}
|
|
|
|
#[inline]
|
|
pub fn curr(&self) -> Option<&TokenTree> {
|
|
self.stream.get(self.index)
|
|
}
|
|
|
|
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
|
|
self.stream.get(self.index + n)
|
|
}
|
|
|
|
#[inline]
|
|
pub fn bump(&mut self) {
|
|
self.index += 1;
|
|
}
|
|
}
|
|
|
|
/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that
|
|
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
|
|
/// use this type to emit them as a linear sequence. But a linear sequence is
|
|
/// what the parser expects, for the most part.
|
|
#[derive(Clone, Debug)]
|
|
pub struct TokenCursor {
|
|
// Cursor for the current (innermost) token stream. The index within the
|
|
// cursor can point to any token tree in the stream (or one past the end).
|
|
// The delimiters for this token stream are found in `self.stack.last()`;
|
|
// if that is `None` we are in the outermost token stream which never has
|
|
// delimiters.
|
|
pub curr: TokenTreeCursor,
|
|
|
|
// Token streams surrounding the current one. The index within each cursor
|
|
// always points to a `TokenTree::Delimited`.
|
|
pub stack: Vec<TokenTreeCursor>,
|
|
}
|
|
|
|
impl TokenCursor {
|
|
pub fn next(&mut self) -> (Token, Spacing) {
|
|
self.inlined_next()
|
|
}
|
|
|
|
/// This always-inlined version should only be used on hot code paths.
|
|
#[inline(always)]
|
|
pub fn inlined_next(&mut self) -> (Token, Spacing) {
|
|
loop {
|
|
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
|
|
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
|
|
// below can be removed.
|
|
if let Some(tree) = self.curr.curr() {
|
|
match tree {
|
|
&TokenTree::Token(token, spacing) => {
|
|
debug_assert!(!token.kind.is_delim());
|
|
let res = (token, spacing);
|
|
self.curr.bump();
|
|
return res;
|
|
}
|
|
&TokenTree::Delimited(sp, spacing, delim, ref tts) => {
|
|
let trees = TokenTreeCursor::new(tts.clone());
|
|
self.stack.push(mem::replace(&mut self.curr, trees));
|
|
if !delim.skip() {
|
|
return (Token::new(delim.as_open_token_kind(), sp.open), spacing.open);
|
|
}
|
|
// No open delimiter to return; continue on to the next iteration.
|
|
}
|
|
};
|
|
} else if let Some(parent) = self.stack.pop() {
|
|
// We have exhausted this token stream. Move back to its parent token stream.
|
|
let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else {
|
|
panic!("parent should be Delimited")
|
|
};
|
|
self.curr = parent;
|
|
self.curr.bump(); // move past the `Delimited`
|
|
if !delim.skip() {
|
|
return (Token::new(delim.as_close_token_kind(), span.close), spacing.close);
|
|
}
|
|
// No close delimiter to return; continue on to the next iteration.
|
|
} else {
|
|
// We have exhausted the outermost token stream. The use of
|
|
// `Spacing::Alone` is arbitrary and immaterial, because the
|
|
// `Eof` token's spacing is never used.
|
|
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic, Walkable)]
|
|
pub struct DelimSpan {
|
|
pub open: Span,
|
|
pub close: Span,
|
|
}
|
|
|
|
impl DelimSpan {
|
|
pub fn from_single(sp: Span) -> Self {
|
|
DelimSpan { open: sp, close: sp }
|
|
}
|
|
|
|
pub fn from_pair(open: Span, close: Span) -> Self {
|
|
DelimSpan { open, close }
|
|
}
|
|
|
|
pub fn dummy() -> Self {
|
|
Self::from_single(DUMMY_SP)
|
|
}
|
|
|
|
pub fn entire(self) -> Span {
|
|
self.open.with_hi(self.close.hi())
|
|
}
|
|
}
|
|
|
|
#[derive(Copy, Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
|
|
pub struct DelimSpacing {
|
|
pub open: Spacing,
|
|
pub close: Spacing,
|
|
}
|
|
|
|
impl DelimSpacing {
|
|
pub fn new(open: Spacing, close: Spacing) -> DelimSpacing {
|
|
DelimSpacing { open, close }
|
|
}
|
|
}
|
|
|
|
// Some types are used a lot. Make sure they don't unintentionally get bigger.
|
|
#[cfg(target_pointer_width = "64")]
|
|
mod size_asserts {
|
|
use rustc_data_structures::static_assert_size;
|
|
|
|
use super::*;
|
|
// tidy-alphabetical-start
|
|
static_assert_size!(AttrTokenStream, 8);
|
|
static_assert_size!(AttrTokenTree, 32);
|
|
static_assert_size!(LazyAttrTokenStream, 8);
|
|
static_assert_size!(LazyAttrTokenStreamInner, 88);
|
|
static_assert_size!(Option<LazyAttrTokenStream>, 8); // must be small, used in many AST nodes
|
|
static_assert_size!(TokenStream, 8);
|
|
static_assert_size!(TokenTree, 32);
|
|
// tidy-alphabetical-end
|
|
}
|