mirror of
				https://github.com/rust-lang/rust.git
				synced 2025-10-31 21:16:44 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			504 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			504 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
| use std::str::pattern::*;
 | |
| 
 | |
| // This macro makes it easier to write
 | |
| // tests that do a series of iterations
 | |
| macro_rules! search_asserts {
 | |
|     ($haystack:expr, $needle:expr, $testname:expr, [$($func:ident),*], $result:expr) => {
 | |
|         let mut searcher = $needle.into_searcher($haystack);
 | |
|         let arr = [$( Step::from(searcher.$func()) ),*];
 | |
|         assert_eq!(&arr[..], &$result, $testname);
 | |
|     }
 | |
| }
 | |
| 
 | |
| /// Combined enum for the results of next() and next_match()/next_reject()
 | |
| #[derive(Debug, PartialEq, Eq)]
 | |
| enum Step {
 | |
|     // variant names purposely chosen to
 | |
|     // be the same length for easy alignment
 | |
|     Matches(usize, usize),
 | |
|     Rejects(usize, usize),
 | |
|     InRange(usize, usize),
 | |
|     Done,
 | |
| }
 | |
| 
 | |
| use self::Step::*;
 | |
| 
 | |
| impl From<SearchStep> for Step {
 | |
|     fn from(x: SearchStep) -> Self {
 | |
|         match x {
 | |
|             SearchStep::Match(a, b) => Matches(a, b),
 | |
|             SearchStep::Reject(a, b) => Rejects(a, b),
 | |
|             SearchStep::Done => Done,
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| impl From<Option<(usize, usize)>> for Step {
 | |
|     fn from(x: Option<(usize, usize)>) -> Self {
 | |
|         match x {
 | |
|             Some((a, b)) => InRange(a, b),
 | |
|             None => Done,
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| // FIXME(Manishearth) these tests focus on single-character searching  (CharSearcher)
 | |
| // and on next()/next_match(), not next_reject(). This is because
 | |
| // the memchr changes make next_match() for single chars complex, but next_reject()
 | |
| // continues to use next() under the hood. We should add more test cases for all
 | |
| // of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
 | |
| 
 | |
| #[test]
 | |
| fn test_simple_iteration() {
 | |
|     search_asserts!(
 | |
|         "abcdeabcd",
 | |
|         'a',
 | |
|         "forward iteration for ASCII string",
 | |
|         // a            b              c              d              e              a              b              c              d              EOF
 | |
|         [next, next, next, next, next, next, next, next, next, next],
 | |
|         [
 | |
|             Matches(0, 1),
 | |
|             Rejects(1, 2),
 | |
|             Rejects(2, 3),
 | |
|             Rejects(3, 4),
 | |
|             Rejects(4, 5),
 | |
|             Matches(5, 6),
 | |
|             Rejects(6, 7),
 | |
|             Rejects(7, 8),
 | |
|             Rejects(8, 9),
 | |
|             Done
 | |
|         ]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         "abcdeabcd",
 | |
|         'a',
 | |
|         "reverse iteration for ASCII string",
 | |
|         // d            c              b              a            e                d              c              b              a             EOF
 | |
|         [
 | |
|             next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back,
 | |
|             next_back, next_back
 | |
|         ],
 | |
|         [
 | |
|             Rejects(8, 9),
 | |
|             Rejects(7, 8),
 | |
|             Rejects(6, 7),
 | |
|             Matches(5, 6),
 | |
|             Rejects(4, 5),
 | |
|             Rejects(3, 4),
 | |
|             Rejects(2, 3),
 | |
|             Rejects(1, 2),
 | |
|             Matches(0, 1),
 | |
|             Done
 | |
|         ]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         "我爱我的猫",
 | |
|         '我',
 | |
|         "forward iteration for Chinese string",
 | |
|         // 我           愛             我             的              貓               EOF
 | |
|         [next, next, next, next, next, next],
 | |
|         [Matches(0, 3), Rejects(3, 6), Matches(6, 9), Rejects(9, 12), Rejects(12, 15), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         "我的猫说meow",
 | |
|         'm',
 | |
|         "forward iteration for mixed string",
 | |
|         // 我           的             猫             说              m                e                o                w                EOF
 | |
|         [next, next, next, next, next, next, next, next, next],
 | |
|         [
 | |
|             Rejects(0, 3),
 | |
|             Rejects(3, 6),
 | |
|             Rejects(6, 9),
 | |
|             Rejects(9, 12),
 | |
|             Matches(12, 13),
 | |
|             Rejects(13, 14),
 | |
|             Rejects(14, 15),
 | |
|             Rejects(15, 16),
 | |
|             Done
 | |
|         ]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         "我的猫说meow",
 | |
|         '猫',
 | |
|         "reverse iteration for mixed string",
 | |
|         // w             o                 e                m                说              猫             的             我             EOF
 | |
|         [
 | |
|             next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back,
 | |
|             next_back
 | |
|         ],
 | |
|         [
 | |
|             Rejects(15, 16),
 | |
|             Rejects(14, 15),
 | |
|             Rejects(13, 14),
 | |
|             Rejects(12, 13),
 | |
|             Rejects(9, 12),
 | |
|             Matches(6, 9),
 | |
|             Rejects(3, 6),
 | |
|             Rejects(0, 3),
 | |
|             Done
 | |
|         ]
 | |
|     );
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_simple_search() {
 | |
|     search_asserts!(
 | |
|         "abcdeabcdeabcde",
 | |
|         'a',
 | |
|         "next_match for ASCII string",
 | |
|         [next_match, next_match, next_match, next_match],
 | |
|         [InRange(0, 1), InRange(5, 6), InRange(10, 11), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         "abcdeabcdeabcde",
 | |
|         'a',
 | |
|         "next_match_back for ASCII string",
 | |
|         [next_match_back, next_match_back, next_match_back, next_match_back],
 | |
|         [InRange(10, 11), InRange(5, 6), InRange(0, 1), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         "abcdeab",
 | |
|         'a',
 | |
|         "next_reject for ASCII string",
 | |
|         [next_reject, next_reject, next_match, next_reject, next_reject],
 | |
|         [InRange(1, 2), InRange(2, 3), InRange(5, 6), InRange(6, 7), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         "abcdeabcdeabcde",
 | |
|         'a',
 | |
|         "next_reject_back for ASCII string",
 | |
|         [
 | |
|             next_reject_back,
 | |
|             next_reject_back,
 | |
|             next_match_back,
 | |
|             next_reject_back,
 | |
|             next_reject_back,
 | |
|             next_reject_back
 | |
|         ],
 | |
|         [
 | |
|             InRange(14, 15),
 | |
|             InRange(13, 14),
 | |
|             InRange(10, 11),
 | |
|             InRange(9, 10),
 | |
|             InRange(8, 9),
 | |
|             InRange(7, 8)
 | |
|         ]
 | |
|     );
 | |
| }
 | |
| 
 | |
| // Á, 각, ก, 😀 all end in 0x81
 | |
| // 🁀, ᘀ do not end in 0x81 but contain the byte
 | |
| // ꁁ has 0x81 as its second and third bytes.
 | |
| //
 | |
| // The memchr-using implementation of next_match
 | |
| // and next_match_back temporarily violate
 | |
| // the property that the search is always on a unicode boundary,
 | |
| // which is fine as long as this never reaches next() or next_back().
 | |
| // So we test if next() is correct after each next_match() as well.
 | |
| const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a";
 | |
| 
 | |
| #[test]
 | |
| fn test_stress_indices() {
 | |
|     // this isn't really a test, more of documentation on the indices of each character in the stresstest string
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'x',
 | |
|         "Indices of characters in stress test",
 | |
|         [
 | |
|             next, next, next, next, next, next, next, next, next, next, next, next, next, next,
 | |
|             next, next, next, next, next, next, next
 | |
|         ],
 | |
|         [
 | |
|             Rejects(0, 2),   // Á
 | |
|             Rejects(2, 3),   // a
 | |
|             Rejects(3, 7),   // 🁀
 | |
|             Rejects(7, 8),   // b
 | |
|             Rejects(8, 10),  // Á
 | |
|             Rejects(10, 13), // ꁁ
 | |
|             Rejects(13, 14), // f
 | |
|             Rejects(14, 15), // g
 | |
|             Rejects(15, 19), // 😀
 | |
|             Rejects(19, 22), // 각
 | |
|             Rejects(22, 25), // ก
 | |
|             Rejects(25, 28), // ᘀ
 | |
|             Rejects(28, 31), // 각
 | |
|             Rejects(31, 32), // a
 | |
|             Rejects(32, 34), // Á
 | |
|             Rejects(34, 37), // 각
 | |
|             Rejects(37, 40), // ꁁ
 | |
|             Rejects(40, 43), // ก
 | |
|             Rejects(43, 47), // 😀
 | |
|             Rejects(47, 48), // a
 | |
|             Done
 | |
|         ]
 | |
|     );
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_forward_search_shared_bytes() {
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'Á',
 | |
|         "Forward search for two-byte Latin character",
 | |
|         [next_match, next_match, next_match, next_match],
 | |
|         [InRange(0, 2), InRange(8, 10), InRange(32, 34), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'Á',
 | |
|         "Forward search for two-byte Latin character; check if next() still works",
 | |
|         [next_match, next, next_match, next, next_match, next, next_match],
 | |
|         [
 | |
|             InRange(0, 2),
 | |
|             Rejects(2, 3),
 | |
|             InRange(8, 10),
 | |
|             Rejects(10, 13),
 | |
|             InRange(32, 34),
 | |
|             Rejects(34, 37),
 | |
|             Done
 | |
|         ]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         '각',
 | |
|         "Forward search for three-byte Hangul character",
 | |
|         [next_match, next, next_match, next_match, next_match],
 | |
|         [InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         '각',
 | |
|         "Forward search for three-byte Hangul character; check if next() still works",
 | |
|         [next_match, next, next_match, next, next_match, next, next_match],
 | |
|         [
 | |
|             InRange(19, 22),
 | |
|             Rejects(22, 25),
 | |
|             InRange(28, 31),
 | |
|             Rejects(31, 32),
 | |
|             InRange(34, 37),
 | |
|             Rejects(37, 40),
 | |
|             Done
 | |
|         ]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'ก',
 | |
|         "Forward search for three-byte Thai character",
 | |
|         [next_match, next, next_match, next, next_match],
 | |
|         [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'ก',
 | |
|         "Forward search for three-byte Thai character; check if next() still works",
 | |
|         [next_match, next, next_match, next, next_match],
 | |
|         [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         '😁',
 | |
|         "Forward search for four-byte emoji",
 | |
|         [next_match, next, next_match, next, next_match],
 | |
|         [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         '😁',
 | |
|         "Forward search for four-byte emoji; check if next() still works",
 | |
|         [next_match, next, next_match, next, next_match],
 | |
|         [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'ꁁ',
 | |
|         "Forward search for three-byte Yi character with repeated bytes",
 | |
|         [next_match, next, next_match, next, next_match],
 | |
|         [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'ꁁ',
 | |
|         "Forward search for three-byte Yi character with repeated bytes; check if next() still works",
 | |
|         [next_match, next, next_match, next, next_match],
 | |
|         [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
 | |
|     );
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn test_reverse_search_shared_bytes() {
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'Á',
 | |
|         "Reverse search for two-byte Latin character",
 | |
|         [next_match_back, next_match_back, next_match_back, next_match_back],
 | |
|         [InRange(32, 34), InRange(8, 10), InRange(0, 2), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'Á',
 | |
|         "Reverse search for two-byte Latin character; check if next_back() still works",
 | |
|         [next_match_back, next_back, next_match_back, next_back, next_match_back, next_back],
 | |
|         [InRange(32, 34), Rejects(31, 32), InRange(8, 10), Rejects(7, 8), InRange(0, 2), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         '각',
 | |
|         "Reverse search for three-byte Hangul character",
 | |
|         [next_match_back, next_back, next_match_back, next_match_back, next_match_back],
 | |
|         [InRange(34, 37), Rejects(32, 34), InRange(28, 31), InRange(19, 22), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         '각',
 | |
|         "Reverse search for three-byte Hangul character; check if next_back() still works",
 | |
|         [
 | |
|             next_match_back,
 | |
|             next_back,
 | |
|             next_match_back,
 | |
|             next_back,
 | |
|             next_match_back,
 | |
|             next_back,
 | |
|             next_match_back
 | |
|         ],
 | |
|         [
 | |
|             InRange(34, 37),
 | |
|             Rejects(32, 34),
 | |
|             InRange(28, 31),
 | |
|             Rejects(25, 28),
 | |
|             InRange(19, 22),
 | |
|             Rejects(15, 19),
 | |
|             Done
 | |
|         ]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'ก',
 | |
|         "Reverse search for three-byte Thai character",
 | |
|         [next_match_back, next_back, next_match_back, next_back, next_match_back],
 | |
|         [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'ก',
 | |
|         "Reverse search for three-byte Thai character; check if next_back() still works",
 | |
|         [next_match_back, next_back, next_match_back, next_back, next_match_back],
 | |
|         [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         '😁',
 | |
|         "Reverse search for four-byte emoji",
 | |
|         [next_match_back, next_back, next_match_back, next_back, next_match_back],
 | |
|         [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         '😁',
 | |
|         "Reverse search for four-byte emoji; check if next_back() still works",
 | |
|         [next_match_back, next_back, next_match_back, next_back, next_match_back],
 | |
|         [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'ꁁ',
 | |
|         "Reverse search for three-byte Yi character with repeated bytes",
 | |
|         [next_match_back, next_back, next_match_back, next_back, next_match_back],
 | |
|         [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
 | |
|     );
 | |
| 
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'ꁁ',
 | |
|         "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works",
 | |
|         [next_match_back, next_back, next_match_back, next_back, next_match_back],
 | |
|         [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
 | |
|     );
 | |
| }
 | |
| 
 | |
| #[test]
 | |
| fn double_ended_regression_test() {
 | |
|     // https://github.com/rust-lang/rust/issues/47175
 | |
|     // Ensures that double ended searching comes to a convergence
 | |
|     search_asserts!(
 | |
|         "abcdeabcdeabcde",
 | |
|         'a',
 | |
|         "alternating double ended search",
 | |
|         [next_match, next_match_back, next_match, next_match_back],
 | |
|         [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
 | |
|     );
 | |
|     search_asserts!(
 | |
|         "abcdeabcdeabcde",
 | |
|         'a',
 | |
|         "triple double ended search for a",
 | |
|         [next_match, next_match_back, next_match_back, next_match_back],
 | |
|         [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
 | |
|     );
 | |
|     search_asserts!(
 | |
|         "abcdeabcdeabcde",
 | |
|         'd',
 | |
|         "triple double ended search for d",
 | |
|         [next_match, next_match_back, next_match_back, next_match_back],
 | |
|         [InRange(3, 4), InRange(13, 14), InRange(8, 9), Done]
 | |
|     );
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'Á',
 | |
|         "Double ended search for two-byte Latin character",
 | |
|         [next_match, next_match_back, next_match, next_match_back],
 | |
|         [InRange(0, 2), InRange(32, 34), InRange(8, 10), Done]
 | |
|     );
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         '각',
 | |
|         "Reverse double ended search for three-byte Hangul character",
 | |
|         [next_match_back, next_back, next_match, next, next_match_back, next_match],
 | |
|         [InRange(34, 37), Rejects(32, 34), InRange(19, 22), Rejects(22, 25), InRange(28, 31), Done]
 | |
|     );
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'ก',
 | |
|         "Double ended search for three-byte Thai character",
 | |
|         [next_match, next_back, next, next_match_back, next_match],
 | |
|         [InRange(22, 25), Rejects(47, 48), Rejects(25, 28), InRange(40, 43), Done]
 | |
|     );
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         '😁',
 | |
|         "Double ended search for four-byte emoji",
 | |
|         [next_match_back, next, next_match, next_back, next_match],
 | |
|         [InRange(43, 47), Rejects(0, 2), InRange(15, 19), Rejects(40, 43), Done]
 | |
|     );
 | |
|     search_asserts!(
 | |
|         STRESS,
 | |
|         'ꁁ',
 | |
|         "Double ended search for three-byte Yi character with repeated bytes",
 | |
|         [next_match, next, next_match_back, next_back, next_match],
 | |
|         [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(34, 37), Done]
 | |
|     );
 | |
| }
 | 
