mirror of
				https://github.com/rust-lang/rust.git
				synced 2025-10-31 04:57:19 +00:00 
			
		
		
		
	 ccffea5b6b
			
		
	
	
		ccffea5b6b
		
	
	
	
	
		
			
			StringReader is an intornal abstraction which at the moment changes a lot, so these unit tests cause quite a bit of friction. Moving them to rustc_lexer and more ingerated-testing style should make them much less annoying, hopefully without decreasing their usefulness much. Note that coloncolon tests are removed (it's unclear what those are testing). \r\n tests are removed as well, as we normalize line endings even before lexing.
		
			
				
	
	
		
			288 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			288 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
| use super::*;
 | |
| 
 | |
| use expect_test::{expect, Expect};
 | |
| 
 | |
| fn check_raw_str(s: &str, expected_hashes: u16, expected_err: Option<RawStrError>) {
 | |
|     let s = &format!("r{}", s);
 | |
|     let mut cursor = Cursor::new(s);
 | |
|     cursor.bump();
 | |
|     let (n_hashes, err) = cursor.raw_double_quoted_string(0);
 | |
|     assert_eq!(n_hashes, expected_hashes);
 | |
|     assert_eq!(err, expected_err);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_naked_raw_str() {
 | |
|     check_raw_str(r#""abc""#, 0, None);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_raw_no_start() {
 | |
|     check_raw_str(r##""abc"#"##, 0, None);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_too_many_terminators() {
 | |
|     // this error is handled in the parser later
 | |
|     check_raw_str(r###"#"abc"##"###, 1, None);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_unterminated() {
 | |
|     check_raw_str(
 | |
|         r#"#"abc"#,
 | |
|         1,
 | |
|         Some(RawStrError::NoTerminator { expected: 1, found: 0, possible_terminator_offset: None }),
 | |
|     );
 | |
|     check_raw_str(
 | |
|         r###"##"abc"#"###,
 | |
|         2,
 | |
|         Some(RawStrError::NoTerminator {
 | |
|             expected: 2,
 | |
|             found: 1,
 | |
|             possible_terminator_offset: Some(7),
 | |
|         }),
 | |
|     );
 | |
|     // We're looking for "# not just any #
 | |
|     check_raw_str(
 | |
|         r###"##"abc#"###,
 | |
|         2,
 | |
|         Some(RawStrError::NoTerminator { expected: 2, found: 0, possible_terminator_offset: None }),
 | |
|     )
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_invalid_start() {
 | |
|     check_raw_str(r##"#~"abc"#"##, 1, Some(RawStrError::InvalidStarter { bad_char: '~' }));
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_unterminated_no_pound() {
 | |
|     // https://github.com/rust-lang/rust/issues/70677
 | |
|     check_raw_str(
 | |
|         r#"""#,
 | |
|         0,
 | |
|         Some(RawStrError::NoTerminator { expected: 0, found: 0, possible_terminator_offset: None }),
 | |
|     );
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_valid_shebang() {
 | |
|     // https://github.com/rust-lang/rust/issues/70528
 | |
|     let input = "#!/usr/bin/rustrun\nlet x = 5;";
 | |
|     assert_eq!(strip_shebang(input), Some(18));
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_invalid_shebang_valid_rust_syntax() {
 | |
|     // https://github.com/rust-lang/rust/issues/70528
 | |
|     let input = "#!    [bad_attribute]";
 | |
|     assert_eq!(strip_shebang(input), None);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_shebang_second_line() {
 | |
|     // Because shebangs are interpreted by the kernel, they must be on the first line
 | |
|     let input = "\n#!/bin/bash";
 | |
|     assert_eq!(strip_shebang(input), None);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_shebang_space() {
 | |
|     let input = "#!    /bin/bash";
 | |
|     assert_eq!(strip_shebang(input), Some(input.len()));
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_shebang_empty_shebang() {
 | |
|     let input = "#!    \n[attribute(foo)]";
 | |
|     assert_eq!(strip_shebang(input), None);
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_invalid_shebang_comment() {
 | |
|     let input = "#!//bin/ami/a/comment\n[";
 | |
|     assert_eq!(strip_shebang(input), None)
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_invalid_shebang_another_comment() {
 | |
|     let input = "#!/*bin/ami/a/comment*/\n[attribute";
 | |
|     assert_eq!(strip_shebang(input), None)
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_shebang_valid_rust_after() {
 | |
|     let input = "#!/*bin/ami/a/comment*/\npub fn main() {}";
 | |
|     assert_eq!(strip_shebang(input), Some(23))
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_shebang_followed_by_attrib() {
 | |
|     let input = "#!/bin/rust-scripts\n#![allow_unused(true)]";
 | |
|     assert_eq!(strip_shebang(input), Some(19));
 | |
| }
 | |
| 
 | |
| fn check_lexing(src: &str, expect: Expect) {
 | |
|     let actual: String = tokenize(src).map(|token| format!("{:?}\n", token)).collect();
 | |
|     expect.assert_eq(&actual)
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn smoke_test() {
 | |
|     check_lexing(
 | |
|         "/* my source file */ fn main() { println!(\"zebra\"); }\n",
 | |
|         expect![[r#"
 | |
|             Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Ident, len: 2 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Ident, len: 4 }
 | |
|             Token { kind: OpenParen, len: 1 }
 | |
|             Token { kind: CloseParen, len: 1 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: OpenBrace, len: 1 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Ident, len: 7 }
 | |
|             Token { kind: Bang, len: 1 }
 | |
|             Token { kind: OpenParen, len: 1 }
 | |
|             Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 7 }, len: 7 }
 | |
|             Token { kind: CloseParen, len: 1 }
 | |
|             Token { kind: Semi, len: 1 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: CloseBrace, len: 1 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|         "#]],
 | |
|     )
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn comment_flavors() {
 | |
|     check_lexing(
 | |
|         r"
 | |
| // line
 | |
| //// line as well
 | |
| /// outer doc line
 | |
| //! inner doc line
 | |
| /* block */
 | |
| /**/
 | |
| /*** also block */
 | |
| /** outer doc block */
 | |
| /*! inner doc block */
 | |
| ",
 | |
|         expect![[r#"
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: LineComment { doc_style: None }, len: 7 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: LineComment { doc_style: None }, len: 17 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|         "#]],
 | |
|     )
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn nested_block_comments() {
 | |
|     check_lexing(
 | |
|         "/* /* */ */'a'",
 | |
|         expect![[r#"
 | |
|             Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 }
 | |
|             Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
 | |
|         "#]],
 | |
|     )
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn characters() {
 | |
|     check_lexing(
 | |
|         "'a' ' ' '\\n'",
 | |
|         expect![[r#"
 | |
|             Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 4 }, len: 4 }
 | |
|         "#]],
 | |
|     );
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn lifetime() {
 | |
|     check_lexing(
 | |
|         "'abc",
 | |
|         expect![[r#"
 | |
|             Token { kind: Lifetime { starts_with_number: false }, len: 4 }
 | |
|         "#]],
 | |
|     );
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn raw_string() {
 | |
|     check_lexing(
 | |
|         "r###\"\"#a\\b\x00c\"\"###",
 | |
|         expect![[r#"
 | |
|             Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 17 }, len: 17 }
 | |
|         "#]],
 | |
|     )
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn literal_suffixes() {
 | |
|     check_lexing(
 | |
|         r####"
 | |
| 'a'
 | |
| b'a'
 | |
| "a"
 | |
| b"a"
 | |
| 1234
 | |
| 0b101
 | |
| 0xABC
 | |
| 1.0
 | |
| 1.0e10
 | |
| 2us
 | |
| r###"raw"###suffix
 | |
| br###"raw"###suffix
 | |
| "####,
 | |
|         expect![[r#"
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Byte { terminated: true }, suffix_start: 4 }, len: 4 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: ByteStr { terminated: true }, suffix_start: 4 }, len: 4 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 4 }, len: 4 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Int { base: Binary, empty_int: false }, suffix_start: 5 }, len: 5 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Int { base: Hexadecimal, empty_int: false }, suffix_start: 5 }, len: 5 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 3 }, len: 3 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 6 }, len: 6 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 12 }, len: 18 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|             Token { kind: Literal { kind: RawByteStr { n_hashes: 3, err: None }, suffix_start: 13 }, len: 19 }
 | |
|             Token { kind: Whitespace, len: 1 }
 | |
|         "#]],
 | |
|     )
 | |
| }
 |