From daf1ee1f24ac6b2313419e31177b652b5ef34b70 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 27 Sep 2015 00:59:19 -0400 Subject: [PATCH] Lots of polish. Docs. Refactoring. Simplifying. --- .travis.yml | 9 + COPYING | 3 + Cargo.toml | 2 +- LICENSE-APACHE | 201 -------- LICENSE-MIT | 40 +- README.md | 137 +++++- UNLICENSE | 24 + appveyor.yml | 18 + compare/nftw.c | 25 + compare/walk.py | 10 + examples/walkdir.rs | 53 ++- src/lib.rs | 1075 +++++++++++++++++++++++++++++++------------ src/same_file.rs | 24 +- src/tests.rs | 141 +++--- 14 files changed, 1133 insertions(+), 629 deletions(-) create mode 100644 .travis.yml create mode 100644 COPYING delete mode 100644 LICENSE-APACHE create mode 100644 UNLICENSE create mode 100644 appveyor.yml create mode 100644 compare/nftw.c create mode 100644 compare/walk.py diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..e1ccf11 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,9 @@ +language: rust +rust: + - 1.3.0 + - beta + - nightly +script: + - cargo build --verbose + - cargo test --verbose + - cargo doc diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..bb9c20a --- /dev/null +++ b/COPYING @@ -0,0 +1,3 @@ +This project is dual-licensed under the Unlicense and MIT licenses. + +You may use this code under the terms of either license. diff --git a/Cargo.toml b/Cargo.toml index ae51f7a..2a7adfd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ homepage = "https://github.com/BurntSushi/walkdir" repository = "https://github.com/BurntSushi/walkdir" readme = "README.md" keywords = ["directory", "recursive", "walk", "iterator"] -license = "MIT/Apache-2.0" +license = "Unlicense/MIT" [dependencies] libc = "0.1" diff --git a/LICENSE-APACHE b/LICENSE-APACHE deleted file mode 100644 index 16fe87b..0000000 --- a/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT index 39d4bdb..3b0a5dc 100644 --- a/LICENSE-MIT +++ b/LICENSE-MIT @@ -1,25 +1,21 @@ -Copyright (c) 2014 The Rust Project Developers +The MIT License (MIT) -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: +Copyright (c) 2015 Andrew Gallant -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md index eea52ef..92ba931 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,139 @@ walkdir ======= +A cross platform Rust library for efficiently walking a directory recursively. +Comes with support for following symbolic links, controlling the number of file +descriptors and efficient mechanisms for pruning the entries in the directory +tree. -A Rust library for efficiently walking a directory recursively. +[![Build status](https://api.travis-ci.org/BurntSushi/walkdir.png)](https://travis-ci.org/BurntSushi/walkdir) +[![](http://meritbadge.herokuapp.com/walkdir)](https://crates.io/crates/walkdir) -This is a work in progress and is (hopefully) destined for `std::fs`. +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + +### Documentation + +[http://burntsushi.net/rustdoc/walkdir/](http://burntsushi.net/rustdoc/walkdir/) + +### Usage + +To use this crate, add `walkdir` as a dependency to your project's +`Cargo.toml`: + +``` +[dependencies] +walkdir = "0.1" +``` + +### Example + +The following code recursively iterates over the directory given and prints +the path for each entry: + +```rust,no_run +use walkdir::WalkDir; + +for entry in WalkDir::new("foo") { + let entry = entry.unwrap(); + println!("{}", entry.path().display()); +} +``` + +Or, if you'd like to iterate over all entries and ignore any errors that may +arise, use `filter_map`. (e.g., This code below will silently skip directories +that the owner of the running process does not have permission to access.) + +```rust,no_run +use walkdir::WalkDir; + +for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) { + println!("{}", entry.path().display()); +} +``` + +### Example: follow symbolic links + +The same code as above, except `follow_links` is enabled: + +```rust,no_run +use walkdir::WalkDir; + +for entry in WalkDir::new("foo").follow_links(true) { + let entry = entry.unwrap(); + println!("{}", entry.path().display()); +} +``` + +### Example: skip hidden files and directories efficiently on unix + +This uses the `filter_entry` iterator adapter to avoid yielding hidden files +and directories efficiently: + +```rust,no_run +use walkdir::{DirEntry, WalkDir, WalkDirIterator}; + +fn is_hidden(entry: &DirEntry) -> bool { + entry.file_name() + .to_str() + .map(|s| s.starts_with(".")) + .unwrap_or(false) +} + +let walker = WalkDir::new("foo").into_iter(); +for entry in walker.filter_entry(|e| !is_hidden(e)) { + let entry = entry.unwrap(); + println!("{}", entry.path().display()); +} +``` + +### Motivation + +`std::fs` has an unstable `walk_dir` implementation that needed some design +work. I started off on that task, but it quickly became apparent that walking +a directory recursively is quite complex and may not be a good fit for `std` +right away. + +This should at least resolve most or all of the issues reported here (and then +some): + +* https://github.com/rust-lang/rust/issues/27707 +* https://github.com/rust-lang/rust/issues/23715 + +### Performance + +The short story is that performance is comparable with `find` and glibc's +`nftw` on both a warm and cold file cache. In fact, I cannot observe any +performance difference after running `find /`, `walkdir /` and `nftw /` on my +local file system (SSD, ~3 million entries). More precisely, I am reasonably +confident that this crate makes as few system calls and close to as few +allocations as possible. + +I haven't recorded any benchmarks, but here are some things you can try with a +local checkout of `walkdir`: + +``` +# The directory you want to recursively walk: +DIR=$HOME + +# If you want to observe perf on a cold file cache, run this before *each* +# command: +sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' + +# To warm the caches +find $HOME + +# Test speed of `find` on warm cache: +time find $HOME + +# Compile and test speed of `walkdir` crate: +cargo build --release --example walkdir +time ./target/release/examples/walkdir $DIR + +# Compile and test speed of glibc's `nftw`: +gcc -O3 -o nftw ./compare/nftw.c +time ./nftw $DIR + +# For shits and giggles, test speed of Python's (2 or 3) os.walk: +time python ./compare/walk.py $DIR +``` + +On my system, the performance of `walkdir`, `find` and `nftw` is comparable. diff --git a/UNLICENSE b/UNLICENSE new file mode 100644 index 0000000..68a49da --- /dev/null +++ b/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..fe87403 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,18 @@ +environment: + matrix: + - TARGET: x86_64-pc-windows-msvc + - TARGET: i686-pc-windows-msvc + - TARGET: i686-pc-windows-gnu +install: + - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe" + - rust-nightly-%TARGET%.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" + - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin + - SET PATH=%PATH%;C:\MinGW\bin + - rustc -V + - cargo -V + +build: false + +test_script: + - cargo build --verbose + - cargo test --verbose diff --git a/compare/nftw.c b/compare/nftw.c new file mode 100644 index 0000000..7d36e2f --- /dev/null +++ b/compare/nftw.c @@ -0,0 +1,25 @@ +#define _XOPEN_SOURCE 500 +#include +#include +#include +#include +#include + +static int +display_info(const char *fpath, const struct stat *sb, + int tflag, struct FTW *ftwbuf) +{ + printf("%s\n", fpath); + return 0; +} + +int +main(int argc, char *argv[]) +{ + int flags = FTW_PHYS; + if (nftw((argc < 2) ? "." : argv[1], display_info, 20, flags) == -1) { + perror("nftw"); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); +} diff --git a/compare/walk.py b/compare/walk.py new file mode 100644 index 0000000..303d323 --- /dev/null +++ b/compare/walk.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import, division, print_function + +import os +import sys + +for dirpath, dirnames, filenames in os.walk(sys.argv[1]): + for n in dirnames: + print(os.path.join(dirpath, n)) + for n in filenames: + print(os.path.join(dirpath, n)) diff --git a/examples/walkdir.rs b/examples/walkdir.rs index abdd50f..eff0d26 100644 --- a/examples/walkdir.rs +++ b/examples/walkdir.rs @@ -2,6 +2,8 @@ extern crate docopt; extern crate rustc_serialize; extern crate walkdir; +use std::io::{self, Write}; + use docopt::Docopt; use walkdir::WalkDir; @@ -12,11 +14,11 @@ Usage: Options: -h, --help -L, --follow-links Follow symlinks. - -d, --depth Traverse contents of directories first. --min-depth NUM Minimum depth. --max-depth NUM Maximum depth. -n, --fd-max NUM Maximum open file descriptors. [default: 32] --tree Show output as a tree. + -q, --ignore-errors Ignore errors. "; #[derive(Debug, RustcDecodable)] @@ -27,37 +29,50 @@ struct Args { flag_min_depth: Option, flag_max_depth: Option, flag_fd_max: usize, - flag_depth: bool, flag_tree: bool, + flag_ignore_errors: bool, } +macro_rules! wout { ($($tt:tt)*) => { {writeln!($($tt)*)}.unwrap() } } + fn main() { let args: Args = Docopt::new(USAGE).and_then(|d| d.decode()) .unwrap_or_else(|e| e.exit()); let mind = args.flag_min_depth.unwrap_or(0); let maxd = args.flag_max_depth.unwrap_or(::std::usize::MAX); - let mut it = WalkDir::new(args.arg_dir.unwrap_or(".".to_owned())) - .max_open(args.flag_fd_max) - .follow_links(args.flag_follow_links) - .contents_first(args.flag_depth) - .min_depth(mind) - .max_depth(maxd) - .into_iter(); + let it = WalkDir::new(args.arg_dir.clone().unwrap_or(".".to_owned())) + .max_open(args.flag_fd_max) + .follow_links(args.flag_follow_links) + .min_depth(mind) + .max_depth(maxd) + .into_iter(); + let mut out = io::BufWriter::new(io::stdout()); + let mut eout = io::stderr(); if args.flag_tree { - loop { - let dent = match it.next() { - None => break, - Some(Err(err)) => { println!("ERROR: {}", err); continue } - Some(Ok(dent)) => dent, - }; - let name = dent.file_name().into_string().unwrap(); - println!("{}{}", indent(it.depth()), name); + for dent in it { + match dent { + Err(err) => { + out.flush().unwrap(); + wout!(eout, "ERROR: {}", err); + } + Ok(dent) => { + let name = dent.file_name().to_string_lossy(); + wout!(out, "{}{}", indent(dent.depth()), name); + } + } + } + } else if args.flag_ignore_errors { + for dent in it.filter_map(|e| e.ok()) { + wout!(out, "{}", dent.path().display()); } } else { for dent in it { match dent { - Ok(dent) => println!("{}", dent.path().display()), - Err(err) => println!("ERROR: {}", err), + Err(err) => { + out.flush().unwrap(); + wout!(eout, "ERROR: {}", err); + } + Ok(dent) => wout!(out, "{}", dent.path().display()), } } } diff --git a/src/lib.rs b/src/lib.rs index 3fec5bd..577c491 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,101 @@ +/*! +Crate `walkdir` provides an efficient and cross platform implementation +of recursive directory traversal. Several options are exposed to control +iteration, such as whether to follow symbolic links (default off), limit the +maximum number of simultaneous open file descriptors and the ability to +efficiently skip descending into directories. + +To use this crate, add `walkdir` as a dependency to your project's +`Cargo.toml`: + +``` +[dependencies] +walkdir = "0.1" +``` + +# From the top + +The `WalkDir` type builds iterators. The `WalkDirIterator` trait provides +methods for directory iterator adapters, such as efficiently pruning entries +during traversal. The `DirEntry` type describes values yielded by the iterator. +Finally, the `Error` type is a small wrapper around `std::io::Error` with +additional information, such as if a loop was detected while following symbolic +links (not enabled by default). + +# Example + +The following code recursively iterates over the directory given and prints +the path for each entry: + +```rust,no_run +use walkdir::WalkDir; + +for entry in WalkDir::new("foo") { + let entry = entry.unwrap(); + println!("{}", entry.path().display()); +} +``` + +Or, if you'd like to iterate over all entries and ignore any errors that may +arise, use `filter_map`. (e.g., This code below will silently skip directories +that the owner of the running process does not have permission to access.) + +```rust,no_run +use walkdir::WalkDir; + +for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) { + println!("{}", entry.path().display()); +} +``` + +# Example: follow symbolic links + +The same code as above, except `follow_links` is enabled: + +```rust,no_run +use walkdir::WalkDir; + +for entry in WalkDir::new("foo").follow_links(true) { + let entry = entry.unwrap(); + println!("{}", entry.path().display()); +} +``` + +# Example: skip hidden files and directories efficiently on unix + +This uses the `filter_entry` iterator adapter to avoid yielding hidden files +and directories efficiently: + +```rust,no_run +use walkdir::{DirEntry, WalkDir, WalkDirIterator}; + +fn is_hidden(entry: &DirEntry) -> bool { + entry.file_name() + .to_str() + .map(|s| s.starts_with(".")) + .unwrap_or(false) +} + +let walker = WalkDir::new("foo").into_iter(); +for entry in walker.filter_entry(|e| !is_hidden(e)) { + let entry = entry.unwrap(); + println!("{}", entry.path().display()); +} +``` + +*/ #[cfg(windows)] extern crate libc; #[cfg(test)] extern crate quickcheck; #[cfg(test)] extern crate rand; use std::cmp::min; -use std::borrow::Cow; use std::error; use std::fmt; -use std::fs::{self, ReadDir}; +use std::fs::{self, FileType, ReadDir}; use std::io; -use std::ffi::{OsStr, OsString}; +use std::ffi::OsStr; use std::path::{Path, PathBuf}; +use std::result; use std::vec; use same_file::is_same_file; @@ -17,414 +103,815 @@ use same_file::is_same_file; mod same_file; #[cfg(test)] mod tests; -/// Create an iterator to recursively walk a directory. -pub struct WalkDir

{ - root: P, +/// Like try, but for iterators that return `Option>`. +macro_rules! itry { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(err) => return Some(Err(From::from(err))), + } + } +} + +/// A result type for walkdir operations. +/// +/// Note that this result type embeds the error type in this crate. This +/// is only useful if you care about the additional information provided by +/// the error (such as the path associated with the error or whether a loop +/// was dectected). If you want things to Just Work, then you can use +/// `io::Result` instead since the error type in this package will +/// automatically convert to an `io::Result` when using the `try!` macro. +pub type Result = ::std::result::Result; + +/// A builder to create an iterator for recursively walking a directory. +/// +/// Results are returned in depth first fashion, with directories yielded +/// before their contents. The order is unspecified. Directory entries `.` +/// and `..` are always omitted. +/// +/// If an error occurs at any point during iteration, then it is returned in +/// place of its corresponding directory entry and iteration continues as +/// normal. If an error occurs while opening a directory for reading, it +/// is skipped. Iteration may be stopped at any time. When the iterator is +/// destroyed, all resources associated with it are freed. +/// +/// # Usage +/// +/// This type implements `IntoIterator` so that it may be used as the subject +/// of a `for` loop. You may need to call `into_iter` explicitly if you want +/// to use iterator adapters such as `filter_entry`. +/// +/// Idiomatic use of this type should use method chaining to set desired +/// options. For example, this only shows entries with a depth of `1`, `2` +/// or `3` (relative to `foo`): +/// +/// ```rust,no_run +/// use walkdir::WalkDir; +/// +/// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) { +/// let entry = entry.unwrap(); +/// println!("{}", entry.path().display()); +/// } +/// ``` +/// +/// Note that the iterator by default includes the top-most directory. Since +/// this is the only directory yielded with depth `0`, it is easy to ignore it +/// with the `min_depth` setting: +/// +/// ```rust,no_run +/// use walkdir::WalkDir; +/// +/// for entry in WalkDir::new("foo").min_depth(1) { +/// let entry = entry.unwrap(); +/// println!("{}", entry.path().display()); +/// } +/// ``` +/// +/// This will only return descendents of the `foo` directory and not `foo` +/// itself. +/// +/// # Loops +/// +/// This iterator (like most/all recursive directory iterators) assumes that +/// no loops can be made with *hard* links on your file system. In particular, +/// this would require creating a hard link to a directory such that it creates +/// a loop. On most platforms, this operation is illegal. +/// +/// Note that when following symbolic/soft links, loops are detected and an +/// error is reported. +pub struct WalkDir { opts: WalkDirOptions, + root: PathBuf, } struct WalkDirOptions { follow_links: bool, max_open: usize, - contents_first: bool, min_depth: usize, max_depth: usize, } -impl> WalkDir

{ - pub fn new(root: P) -> Self { +impl WalkDir { + /// Create a builder for a recursive directory iterator starting at the + /// file path `root`. If `root` is a directory, then it is the first item + /// yielded by the iterator. If `root` is a file, then it is the first + /// and only item yielded by the iterator. + pub fn new>(root: P) -> Self { WalkDir { - root: root, opts: WalkDirOptions { follow_links: false, - max_open: 32, - contents_first: false, + max_open: 10, min_depth: 0, max_depth: ::std::usize::MAX, - } + }, + root: root.as_ref().to_path_buf(), } } + /// Set the minimum depth of entries yielded by the iterator. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on this type. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + pub fn min_depth(mut self, depth: usize) -> Self { + self.opts.min_depth = depth; + if self.opts.min_depth > self.opts.max_depth { + self.opts.min_depth = self.opts.max_depth; + } + self + } + + /// Set the maximum depth of entries yield by the iterator. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on this type. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + /// + /// Note that this will not simply filter the entries of the iterator, but + /// it will actually avoid descending into directories when the depth is + /// exceeded. + pub fn max_depth(mut self, depth: usize) -> Self { + self.opts.max_depth = depth; + if self.opts.max_depth < self.opts.min_depth { + self.opts.max_depth = self.opts.min_depth; + } + self + } + + /// Follow symbolic links. By default, this is disabled. + /// + /// When `yes` is `true`, symbolic links are followed as if they were + /// normal directories and files. If a symbolic link is broken or is + /// involved in a loop, an error is yielded. + /// + /// When enabled, the yielded `DirEntry` values represent the target of + /// the link while the path corresponds to the link. See the `DirEntry` + /// type for more details. + /// + /// # Warning: bug with junctions on Window + /// + /// There [is a bug](https://github.com/rust-lang/rust/issues/28528) + /// that may affect following symbolic links on Windows when using + /// junctions. + pub fn follow_links(mut self, yes: bool) -> Self { + self.opts.follow_links = yes; + self + } + + /// Set the maximum number of simultaneously open file descriptors used + /// by the iterator. + /// + /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set + /// to `1` automatically. If this is not set, then it defaults to some + /// reasonably low number. + /// + /// This setting has no impact on the results yielded by the iterator + /// (even when `n` is `1`). Instead, this setting represents a trade off + /// between scarce resources (file descriptors) and memory. Namely, when + /// the maximum number of file descriptors is reached and a new directory + /// needs to be opened to continue iteration, then a previous directory + /// handle is closed and has its unyielded entries stored in memory. In + /// practice, this is a satisfying trade off because it scales with respect + /// to the *depth* of your file tree. Therefore, low values (even `1`) are + /// acceptable. + /// + /// Note that this value does not impact the number of system calls made by + /// an exhausted iterator. pub fn max_open(mut self, mut n: usize) -> Self { - // A value of 0 is nonsensical and will prevent the file walker from - // working in any meaningful sense. So just set the limit to 1. if n == 0 { n = 1; } self.opts.max_open = n; self } - - pub fn follow_links(mut self, yes: bool) -> Self { - self.opts.follow_links = yes; - self - } - - pub fn contents_first(mut self, yes: bool) -> Self { - self.opts.contents_first = yes; - self - } - - pub fn min_depth(mut self, depth: usize) -> Self { - self.opts.min_depth = depth; - self - } - - pub fn max_depth(mut self, depth: usize) -> Self { - self.opts.max_depth = depth; - self - } } -impl> IntoIterator for WalkDir

{ - type Item = Result; - type IntoIter = WalkDirIter; +impl IntoIterator for WalkDir { + type Item = Result; + type IntoIter = Iter; - fn into_iter(self) -> WalkDirIter { - assert!(self.opts.min_depth <= self.opts.max_depth); - WalkDirIter { + fn into_iter(self) -> Iter { + Iter { opts: self.opts, - start: Some(self.root.as_ref().to_path_buf()), - stack: vec![], + start: Some(self.root), + stack_list: vec![], + stack_path: vec![], oldest_opened: 0, depth: 0, } } } -pub struct WalkDirIter { +/// A trait for recursive directory iterators. +pub trait WalkDirIterator: Iterator { + /// Skips the current directory. + /// + /// This causes the iterator to stop traversing the contents of the least + /// recently yielded directory. This means any remaining entries in that + /// directory will be skipped (including sub-directories). + /// + /// Note that the ergnomics of this method are questionable since it + /// borrows the iterator mutably. Namely, you must write out the looping + /// condition manually. For example, to skip hidden entries efficiently on + /// unix systems: + /// + /// ```rust,no_run + /// use walkdir::{DirEntry, WalkDir, WalkDirIterator}; + /// + /// fn is_hidden(entry: &DirEntry) -> bool { + /// entry.file_name() + /// .to_str() + /// .map(|s| s.starts_with(".")) + /// .unwrap_or(false) + /// } + /// + /// let mut it = WalkDir::new("foo").into_iter(); + /// loop { + /// let entry = match it.next() { + /// None => break, + /// Some(Err(err)) => panic!("ERROR: {}", err), + /// Some(Ok(entry)) => entry, + /// }; + /// if is_hidden(&entry) { + /// if entry.file_type().is_dir() { + /// it.skip_current_dir(); + /// } + /// continue; + /// } + /// println!("{}", entry.path().display()); + /// } + /// ``` + /// + /// You may find it more convenient to use the `filter_entry` iterator + /// adapter. (See its documentation for the same example functionality as + /// above.) + fn skip_current_dir(&mut self); + + /// Yields only entries which satisfy the given predicate and skips + /// descending into directories that do not satisfy the given predicate. + /// + /// The predicate is applied to all entries. If the predicate is + /// true, iteration carries on as normal. If the predicate is false, the + /// entry is ignored and if it is a directory, it is not descended into. + /// + /// This is often more convenient to use than `skip_current_dir`. For + /// example, to skip hidden files and directories efficiently on unix + /// systems: + /// + /// ```rust,no_run + /// use walkdir::{DirEntry, WalkDir, WalkDirIterator}; + /// + /// fn is_hidden(entry: &DirEntry) -> bool { + /// entry.file_name() + /// .to_str() + /// .map(|s| s.starts_with(".")) + /// .unwrap_or(false) + /// } + /// + /// for entry in WalkDir::new("foo") + /// .into_iter() + /// .filter_entry(|e| !is_hidden(e)) { + /// let entry = entry.unwrap(); + /// println!("{}", entry.path().display()); + /// } + /// ``` + /// + /// Note that the iterator will still yield errors for reading entries that + /// may not satisfy the predicate. + /// + /// Note that entries skipped with `min_depth` and `max_depth` are not + /// passed to this predicate. + fn filter_entry

(self, predicate: P) -> IterFilterEntry + where Self: Sized, P: FnMut(&DirEntry) -> bool { + IterFilterEntry { it: self, predicate: predicate } + } +} + +/// An iterator for recursively descending into a directory. +/// +/// A value with this type must be constructed with the `WalkDir` type, which +/// uses a builder pattern to set options such as min/max depth, max open file +/// descriptors and whether the iterator should follow symbolic links. +/// +/// The order of elements yielded by this iterator is unspecified. +pub struct Iter { + /// Options specified in the builder. Depths, max fds, etc. opts: WalkDirOptions, + /// The start path. + /// + /// This is only `Some(...)` at the beginning. After the first iteration, + /// this is always `None`. start: Option, - stack: Vec, + /// A stack of open (up to max fd) or closed handles to directories. + /// An open handle is a plain `fs::ReadDir` while a closed handle is + /// a `Vec` corresponding to the as-of-yet consumed entries. + stack_list: Vec, + /// A stack of file paths. + /// + /// This is *only* used when `follow_links` is enabled. In all other cases + /// this stack is empty. + stack_path: Vec, + /// An index into `stack_list` that points to the oldest open directory + /// handle. If the maximum fd limit is reached and a new directory needs + /// to be read, the handle at this index is closed before the new directory + /// is opened. oldest_opened: usize, + /// The current depth of iteration (the length of the stack at the + /// beginning of each iteration). depth: usize, } -pub struct DirEntry(DirEntryInner); - -enum DirEntryInner { - Raw(fs::DirEntry), - Meta { path: PathBuf, meta: fs::Metadata }, -} - -struct StackEntry { - dir: Dir, - list: DirList, -} - -enum Dir { - Path(PathBuf), - Entry(DirEntry), -} - +/// A sequence of unconsumed directory entries. +/// +/// This represents the opened or closed state of a directory handle. When +/// open, future entries are read by iterating over the raw `fs::ReadDir`. +/// When closed, all future entries are read into memory. Iteration then +/// proceeds over a `Vec`. enum DirList { - Opened(Result>), - Closed(vec::IntoIter>), + /// An opened handle. + /// + /// This includes the depth of the handle itself. + /// + /// If there was an error with the initial `fs::read_dir` call, then it is + /// stored here. (We use an `Option<...>` to make yielding the error + /// exactly once simpler.) + Opened { depth: usize, it: result::Result> }, + /// A closed handle. + /// + /// All remaining directory entries are read into memory. + Closed(vec::IntoIter>), } -impl Iterator for WalkDirIter { - type Item = Result; +/// A directory entry. +/// +/// This is the type of value that is yielded from the iterators defined in +/// this crate. +/// +/// # Differences with `std::fs::DirEntry` +/// +/// This type mostly mirrors the type by the same name in `std::fs`. There are +/// some differences however: +/// +/// * All recursive directory iterators must inspect the entry's type. +/// Therefore, the value is stored and its access is guaranteed to be cheap and +/// successful. +/// * `path` and `file_name` return borrowed variants. +/// * If `follow_links` was enabled on the originating iterator, then all +/// operations except for `path` operate on the link target. Otherwise, all +/// operations operate on the symbolic link. +pub struct DirEntry { + /// The path as reported by the `fs::ReadDir` iterator (even if it's a + /// symbolic link). + path: PathBuf, + /// The file type. Necessary for recursive iteration, so store it. + ty: FileType, + /// Is set when this entry was created from a symbolic link and the user + /// excepts the iterator to follow symbolic links. + follow_link: bool, + /// The underlying `fs::DirEntry`, if one exists. This is really only + /// useful to provide some operations (e.g., `metadata`) cheaply when + /// possible. + entry: Option, + /// The depth at which this entry was generated relative to the root. + depth: usize, +} - fn next(&mut self) -> Option> { - macro_rules! walk_try { - ($dent:expr, $e:expr) => { - match $e { - Ok(v) => v, - Err(err) => { - let err = WalkDirError::from_io($dent.path(), err); - return Some(Err(err)); - } - } - } - } - - macro_rules! skip { - ($walkdir:expr, $ret:expr) => {{ - let d = $walkdir.depth; - if d < $walkdir.opts.min_depth || d > $walkdir.opts.max_depth { - continue; - } else { - return $ret; - } - }} - } +impl Iterator for Iter { + type Item = Result; + fn next(&mut self) -> Option> { if let Some(start) = self.start.take() { - self.push_path(start, None); - } - while !self.stack.is_empty() { - self.depth = self.stack.len() - 1; - let mut dent = match self.stack.last_mut().and_then(|v| v.next()) { - None => { - if let Dir::Entry(dent) = self.pop().dir { - self.depth = self.depth.saturating_sub(1); - skip!(self, Some(Ok(dent))); - } else { - continue; - } - } - Some(Err(err)) => return Some(Err(err)), - Some(Ok(dent)) => dent, - }; - // On both Windows and most unixes, this should not require a - // syscall. But it's not guaranteed, so only call it once. ---AG - let mut ty = walk_try!(dent, dent.file_type()); - if ty.is_symlink() { - if !self.opts.follow_links { - skip!(self, Some(Ok(dent))); - } else { - let p = dent.path(); - dent = walk_try!(dent, DirEntry::from_path(&p)); - ty = walk_try!(dent, dent.file_type()); - assert!(!ty.is_symlink()); - // The only way a symlink can cause a loop is if it points - // to a directory. Otherwise, it always points to a leaf - // and we can omit any loop checks. - if ty.is_dir() { - let looperr = walk_try!(dent, self.loop_error(p)); - if let Some(err) = looperr { - return Some(Err(err)); - } - } - } + let dent = itry!(DirEntry::from_path(0, start)); + if let Some(result) = self.handle_entry(dent) { + return Some(result); } - if ty.is_dir() { - if self.depth == self.opts.max_depth { - // Don't descend into this directory, just return it. - // Since min_depth <= max_depth, we don't need to check - // if we're skipping here. - // - // Note that this is a perf optimization and is not - // required for correctness. - return Some(Ok(dent)); - } else if let Some(dent) = self.push(dent) { - skip!(self, Some(Ok(dent))); + } + while !self.stack_list.is_empty() { + self.depth = self.stack_list.len(); + match self.stack_list.last_mut().unwrap().next() { + None => self.pop(), + Some(Err(err)) => return Some(Err(err)), + Some(Ok(dent)) => { + let dent = itry!(DirEntry::from_entry(self.depth, dent)); + if let Some(result) = self.handle_entry(dent) { + return Some(result); + } } - } else { - skip!(self, Some(Ok(dent))); } } None } } -impl WalkDirIter { - pub fn skip_current_dir(&mut self) { - if !self.stack.is_empty() { - self.stack.pop(); +impl WalkDirIterator for Iter { + fn skip_current_dir(&mut self) { + if !self.stack_list.is_empty() { + self.stack_list.pop(); } - } - - pub fn depth(&self) -> usize { - self.depth - } - - fn push(&mut self, dent: DirEntry) -> Option { - self.push_path(dent.path(), Some(dent)) - } - - fn push_path( - &mut self, - p: PathBuf, - dent: Option, - ) -> Option { - // Make room for another open file descriptor if we've hit the max. - if self.stack.len() - self.oldest_opened == self.opts.max_open { - self.stack[self.oldest_opened].close(); - self.oldest_opened = self.oldest_opened.checked_add(1).unwrap(); + if !self.stack_path.is_empty() { + self.stack_path.pop(); } - // Open a handle to reading the directory's entries. - let list = DirList::Opened(fs::read_dir(&p).map_err(|err| { - Some(WalkDirError::from_io(&p, err)) - })); - // If we have a dir entry (the only time we don't is when pushing the - // initial path) and we are enumerating the contents of a directory - // before the directory itself, then we need to hang on to that dir - // entry in the stack. Otherwise, we pass the dir entry back to the - // caller and hang on to a path to the directory instead. - if self.opts.contents_first && dent.is_some() { - self.stack.push(StackEntry { - dir: Dir::Entry(dent.expect("DirEntry")), - list: list, - }); - None - } else { - self.stack.push(StackEntry { - dir: Dir::Path(p), - list: list, - }); - dent - } - } - - fn pop(&mut self) -> StackEntry { - let ent = self.stack.pop().expect("cannot pop from empty stack"); - // If everything in the stack is already closed, then there is - // room for at least one more open descriptor and it will - // always be at the top of the stack. - self.oldest_opened = min(self.oldest_opened, self.stack.len()); - ent - } - - fn loop_error(&self, child: PathBuf) -> io::Result> { - for ent in self.stack.iter().rev() { - let ancestor = ent.dir.path(); - if try!(is_same_file(&ancestor, &child)) { - return Ok(Some(WalkDirError::Loop { - ancestor: ancestor.into_owned(), - child: child, - })); - } - } - Ok(None) } } -impl StackEntry { +impl Iter { + fn handle_entry( + &mut self, + mut dent: DirEntry, + ) -> Option> { + if self.opts.follow_links && dent.file_type().is_symlink() { + dent = itry!(self.follow(dent)); + } + if dent.file_type().is_dir() && self.depth < self.opts.max_depth { + self.push(&dent); + } + if self.skippable() { None } else { Some(Ok(dent)) } + } + + fn push(&mut self, dent: &DirEntry) { + // Make room for another open file descriptor if we've hit the max. + if self.stack_list.len() - self.oldest_opened == self.opts.max_open { + self.stack_list[self.oldest_opened].close(); + self.oldest_opened = self.oldest_opened.checked_add(1).unwrap(); + } + // Open a handle to reading the directory's entries. + let rd = fs::read_dir(dent.path()).map_err(|err| { + Some(Error::from_path(self.depth, dent.path().to_path_buf(), err)) + }); + self.stack_list.push(DirList::Opened { depth: self.depth, it: rd }); + if self.opts.follow_links { + self.stack_path.push(dent.path().to_path_buf()); + } + } + + fn pop(&mut self) { + self.stack_list.pop().expect("cannot pop from empty stack"); + if self.opts.follow_links { + self.stack_path.pop().expect("BUG: list/path stacks out of sync"); + } + // If everything in the stack is already closed, then there is + // room for at least one more open descriptor and it will + // always be at the top of the stack. + self.oldest_opened = min(self.oldest_opened, self.stack_list.len()); + } + + fn follow(&self, mut dent: DirEntry) -> Result { + dent = try!(DirEntry::from_link(self.depth, + dent.path().to_path_buf())); + // The only way a symlink can cause a loop is if it points + // to a directory. Otherwise, it always points to a leaf + // and we can omit any loop checks. + if dent.file_type().is_dir() { + try!(self.check_loop(dent.path())); + } + Ok(dent) + } + + fn check_loop>(&self, child: P) -> Result<()> { + for ancestor in self.stack_path.iter().rev() { + let same = try!(is_same_file(ancestor, &child).map_err(|err| { + Error::from_io(self.depth, err) + })); + if same { + return Err(Error { + depth: self.depth, + inner: ErrorInner::Loop { + ancestor: ancestor.to_path_buf(), + child: child.as_ref().to_path_buf(), + }, + }); + } + } + Ok(()) + } + + fn skippable(&self) -> bool { + self.depth < self.opts.min_depth || self.depth > self.opts.max_depth + } +} + +impl DirList { fn close(&mut self) { - if let DirList::Opened(_) = self.list { - self.list = DirList::Closed(self.collect::>().into_iter()); + if let DirList::Opened { .. } = *self { + *self = DirList::Closed(self.collect::>().into_iter()); } else { unreachable!("BUG: entry already closed"); } } } -impl Dir { - fn path(&self) -> Cow { +impl Iterator for DirList { + type Item = Result; + + fn next(&mut self) -> Option> { match *self { - Dir::Path(ref p) => Cow::Borrowed(p), - Dir::Entry(ref dent) => Cow::Owned(dent.path()), - } - } -} - -impl Iterator for StackEntry { - type Item = Result; - - fn next(&mut self) -> Option> { - match self.list { DirList::Closed(ref mut it) => it.next(), - DirList::Opened(ref mut rd) => match *rd { + DirList::Opened { depth, ref mut it } => match *it { Err(ref mut err) => err.take().map(Err), - Ok(ref mut rd) => match rd.next() { - None => None, - Some(Ok(dent)) => Some(Ok(dent.into())), - Some(Err(err)) => { - let p = self.dir.path().to_path_buf(); - Some(Err(WalkDirError::from_io(p, err))) - } - } + Ok(ref mut rd) => rd.next().map(|r| r.map_err(|err| { + Error::from_io(depth + 1, err) + })), } } } } impl DirEntry { - pub fn path(&self) -> PathBuf { - match self.0 { - DirEntryInner::Raw(ref dent) => dent.path(), - DirEntryInner::Meta { ref path, .. } => path.clone(), - } + /// The full path that this entry represents. + /// + /// The full path is created by joining the parents of this entry up to the + /// root initially given to `WalkDir::new` with the file name of this + /// entry. + /// + /// Note that this *always* returns the path reported by the underlying + /// directory entry, even when symbolic links are followed. To get the + /// target path, use `file_type` and `followed_link` to (cheaply) check if + /// this entry corresponds to a symbolic link, and `std::fs::read_link` to + /// resolve the target. + pub fn path(&self) -> &Path { + &self.path } - pub fn metadata(&self) -> io::Result { - match self.0 { - DirEntryInner::Raw(ref dent) => dent.metadata(), - DirEntryInner::Meta { ref path, .. } => fs::metadata(path), - } + /// Returns `true` if and only if this entry was created from a symbolic + /// link. This is unaffected by the `follow_links` setting. + /// + /// When `true`, the value returned by the `path` method is a + /// symbolic link name. To get the full target path, you must call + /// `std::fs::read_link(entry.path())`. + pub fn path_is_symbolic_link(&self) -> bool { + self.ty.is_symlink() || self.follow_link } - pub fn file_type(&self) -> io::Result { - match self.0 { - DirEntryInner::Raw(ref dent) => dent.file_type(), - DirEntryInner::Meta { ref meta, .. } => Ok(meta.file_type()), - } + /// Return the metadata for the file that this entry points to. + /// + /// This will follow symbolic links if and only if the `WalkDir` value + /// has `follow_links` enabled. + /// + /// # Platform behavior + /// + /// On Windows, this function requires no additional system calls. On Unix, + /// this always calls `std::fs::symlink_metadata`. + /// + /// If this entry is a symbolic link and `follow_links` is enabled, then + /// `std::fs::metadata` is called regardless of platform. + pub fn metadata(&self) -> Result { + if let Some(dent) = self.entry.as_ref() { + dent.metadata() + } else if self.follow_link { + fs::metadata(&self.path) + } else { + fs::symlink_metadata(&self.path) + }.map_err(|err| Error::from_entry(self, err)) } - pub fn file_name(&self) -> OsString { - match self.0 { - DirEntryInner::Raw(ref dent) => dent.file_name(), - DirEntryInner::Meta { ref path, .. } => { - // We never create dir entries with "." or "..", so `file_name` - // is only `None` when the path is `/`. - path.file_name().unwrap_or(OsStr::new("")).to_os_string() - } - } + /// Return the file type for the file that this entry points to. + /// + /// If this is a symbolic link and `follow_links` is `true`, then this + /// returns the type of the target. + /// + /// This never makes any system calls. + pub fn file_type(&self) -> fs::FileType { + self.ty } - fn from_path>(p: P) -> io::Result { - let pb = p.as_ref().to_path_buf(); - let md = try!(fs::metadata(&pb)); - Ok(DirEntry(DirEntryInner::Meta { path: pb, meta: md })) + /// Return the file name of this entry. + /// + /// If this entry has no file name (e.g., `/`), then the full path is + /// returned. + pub fn file_name(&self) -> &OsStr { + self.path.file_name().unwrap_or_else(|| self.path.as_os_str()) + } + + /// Returns the depth at which this entry was created relative to the root. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on `WalkDir`. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + pub fn depth(&self) -> usize { + self.depth + } + + fn from_entry(depth: usize, ent: fs::DirEntry) -> Result { + let ty = try!(ent.file_type().map_err(|err| { + Error::from_path(depth, ent.path(), err) + })); + Ok(DirEntry { + path: ent.path(), + ty: ty, + follow_link: false, + entry: Some(ent), + depth: depth, + }) + } + + fn from_link(depth: usize, pb: PathBuf) -> Result { + let md = try!(fs::metadata(&pb).map_err(|err| { + Error::from_path(depth, pb.clone(), err) + })); + Ok(DirEntry { + path: pb, + ty: md.file_type(), + follow_link: true, + entry: None, + depth: depth, + }) + } + + fn from_path(depth: usize, pb: PathBuf) -> Result { + let md = try!(fs::symlink_metadata(&pb).map_err(|err| { + Error::from_path(depth, pb.clone(), err) + })); + Ok(DirEntry { + path: pb, + ty: md.file_type(), + follow_link: false, + entry: None, + depth: depth, + }) } } -impl From for DirEntry { - fn from(dent: fs::DirEntry) -> DirEntry { - DirEntry(DirEntryInner::Raw(dent)) +impl Clone for DirEntry { + fn clone(&self) -> DirEntry { + DirEntry { + path: self.path.clone(), + ty: self.ty, + follow_link: self.follow_link, + entry: None, + depth: self.depth, + } } } impl fmt::Debug for DirEntry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.0 { - DirEntryInner::Raw(ref dent) => { - write!(f, "DirEntry({:?})", dent.path()) - } - DirEntryInner::Meta { ref path, .. } => { - write!(f, "DirEntry({:?})", path) + write!(f, "DirEntry({:?})", self.path) + } +} + +/// A recursive directory iterator that skips entries. +/// +/// Directories that fail the predicate `P` are skipped. Namely, they are +/// never yielded and never descended into. +/// +/// Entries that are skipped with the `min_depth` and `max_depth` options are +/// not passed through this filter. +/// +/// If opening a handle to a directory resulted in an error, then it is yielded +/// and no corresponding call to the predicate is made. +/// +/// Type parameter `I` refers to the underlying iterator and `P` refers to the +/// predicate, which is usually `FnMut(&DirEntry) -> bool`. +pub struct IterFilterEntry { + it: I, + predicate: P, +} + +impl Iterator for IterFilterEntry + where I: WalkDirIterator>, + P: FnMut(&DirEntry) -> bool { + type Item = Result; + + fn next(&mut self) -> Option> { + loop { + let dent = match self.it.next() { + None => return None, + Some(result) => itry!(result), + }; + if !(self.predicate)(&dent) { + self.it.skip_current_dir(); + continue; } + return Some(Ok(dent)); } } } +impl WalkDirIterator for IterFilterEntry + where I: WalkDirIterator>, + P: FnMut(&DirEntry) -> bool { + fn skip_current_dir(&mut self) { + self.it.skip_current_dir(); + } +} + +/// An error produced by recursively walking a directory. +/// +/// This error type is a light wrapper around `std::io::Error`. In particular, +/// it adds the following information: +/// +/// * The depth at which the error occurred in the file tree, relative to the +/// root. +/// * The path, if any, associated with the IO error. +/// * An indication that a loop occurred when following symbolic links. In this +/// case, there is no underlying IO error. +/// +/// To maintain good ergnomics, this type has a +/// `impl From for std::io::Error` defined so that you may use an +/// `io::Result` with methods in this crate if you don't care about accessing +/// the underlying error data in a structured form. +#[derive(Debug)] +pub struct Error { + depth: usize, + inner: ErrorInner, +} + #[derive(Debug)] -pub enum WalkDirError { - Io { path: PathBuf, err: io::Error }, +enum ErrorInner { + Io { path: Option, err: io::Error }, Loop { ancestor: PathBuf, child: PathBuf }, } -impl WalkDirError { - fn from_io>(p: P, err: io::Error) -> Self { - WalkDirError::Io { - path: p.as_ref().to_path_buf(), - err: err, +impl Error { + /// Returns the path associated with this error if one exists. + /// + /// For example, if an error occurred while opening a directory handle, + /// the error will include the path passed to `std::fs::read_dir`. + pub fn path(&self) -> Option<&Path> { + match self.inner { + ErrorInner::Io { path: None, .. } => None, + ErrorInner::Io { path: Some(ref path), .. } => Some(path), + ErrorInner::Loop { ref child, .. } => Some(child), } } - pub fn path(&self) -> &Path { - match *self { - WalkDirError::Io { ref path, .. } => path, - WalkDirError::Loop { ref child, .. } => child, + /// Returns the path at which a cycle was detected. + /// + /// If no cycle was detected, `None` is returned. + /// + /// A cycle is detected when a directory entry is equivalent to one of + /// its ancestors. + /// + /// To get the path to the child directory entry in the cycle, use the + /// `path` method. + pub fn loop_ancestor(&self) -> Option<&Path> { + match self.inner { + ErrorInner::Loop { ref ancestor, .. } => Some(ancestor), + _ => None, + } + } + + /// Returns the depth at which this error occurred relative to the root. + /// + /// The smallest depth is `0` and always corresponds to the path given + /// to the `new` function on `WalkDir`. Its direct descendents have depth + /// `1`, and their descendents have depth `2`, and so on. + pub fn depth(&self) -> usize { + self.depth + } + + fn from_path(depth: usize, pb: PathBuf, err: io::Error) -> Self { + Error { + depth: depth, + inner: ErrorInner::Io { path: Some(pb), err: err }, + } + } + + fn from_entry(dent: &DirEntry, err: io::Error) -> Self { + Error { + depth: dent.depth, + inner: ErrorInner::Io { + path: Some(dent.path().to_path_buf()), + err: err, + }, + } + } + + fn from_io(depth: usize, err: io::Error) -> Self { + Error { + depth: depth, + inner: ErrorInner::Io { path: None, err: err }, } } } -impl error::Error for WalkDirError { +impl error::Error for Error { fn description(&self) -> &str { - match *self { - WalkDirError::Io { ref err, .. } => err.description(), - WalkDirError::Loop { .. } => "file system loop found", + match self.inner { + ErrorInner::Io { ref err, .. } => err.description(), + ErrorInner::Loop { .. } => "file system loop found", } } fn cause(&self) -> Option<&error::Error> { - match *self { - WalkDirError::Io { ref err, .. } => Some(err), - WalkDirError::Loop { .. } => None, + match self.inner { + ErrorInner::Io { ref err, .. } => Some(err), + ErrorInner::Loop { .. } => None, } } } -impl fmt::Display for WalkDirError { +impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - WalkDirError::Io { ref path, ref err } => { + match self.inner { + ErrorInner::Io { path: None, ref err } => { + err.fmt(f) + } + ErrorInner::Io { path: Some(ref path), ref err } => { write!(f, "IO error for operation on {}: {}", path.display(), err) } - WalkDirError::Loop { ref ancestor, ref child } => { + ErrorInner::Loop { ref ancestor, ref child } => { write!(f, "File system loop found: \ {} points to an ancestor {}", child.display(), ancestor.display()) @@ -433,11 +920,11 @@ impl fmt::Display for WalkDirError { } } -impl From for io::Error { - fn from(err: WalkDirError) -> io::Error { +impl From for io::Error { + fn from(err: Error) -> io::Error { match err { - WalkDirError::Io { err, .. } => err, - err @ WalkDirError::Loop { .. } => { + Error { inner: ErrorInner::Io { err, .. }, .. } => err, + err @ Error { inner: ErrorInner::Loop { .. }, .. } => { io::Error::new(io::ErrorKind::Other, err) } } diff --git a/src/same_file.rs b/src/same_file.rs index afc1d92..71f2062 100644 --- a/src/same_file.rs +++ b/src/same_file.rs @@ -1,9 +1,9 @@ use std::io; use std::path::Path; -// Below are platform specific functions for testing the equality of two -// files. Namely, we want to know whether the two paths points to precisely -// the same underlying file object. +// Below are platform specific functions for testing the equality of two files. +// Namely, we want to know whether two paths points to precisely the same +// underlying file object. // // In our particular use case, the paths should only be directories. If we're // assuming that directories cannot be hard linked, then it seems like equality @@ -11,7 +11,8 @@ use std::path::Path; // // I'd also note that other popular libraries (Java's NIO and Boost) expose // a function like `is_same_file` whose implementation is similar. (i.e., check -// dev/inode on Unix and check `nFileIndex{High,Low}` on Windows.) +// dev/inode on Unix and check `nFileIndex{High,Low}` on Windows.) So this may +// be a candidate for extracting into a separate crate. // // ---AG @@ -119,9 +120,9 @@ where P: AsRef, Q: AsRef { s.as_os_str().encode_wide().chain(Some(0)).collect() } - // For correctness, it is critical that both file handles remain open - // while their attributes are checked for equality. In particular, - // the file index numbers are not guaranteed to remain stable over time. + // For correctness, it is critical that both file handles remain open while + // their attributes are checked for equality. In particular, the file index + // numbers are not guaranteed to remain stable over time. // // See the docs and remarks on MSDN: // https://msdn.microsoft.com/en-us/library/windows/desktop/aa363788(v=vs.85).aspx @@ -133,8 +134,8 @@ where P: AsRef, Q: AsRef { // https://msdn.microsoft.com/en-us/library/windows/desktop/hh802691(v=vs.85).aspx // // It seems straight-forward enough to modify this code to use - // `FILE_ID_INFO` when available (minimum Windows Server 2012), but - // I don't have access to such Windows machines. + // `FILE_ID_INFO` when available (minimum Windows Server 2012), but I don't + // have access to such Windows machines. // // Two notes. // @@ -144,7 +145,7 @@ where P: AsRef, Q: AsRef { // `nFileIndex{Low,High}` are not unique. // // 2. LLVM has a bug where they fetch the id of a file and continue to use - // it even after the file has been closed, so that uniqueness is no + // it even after the handle has been closed, so that uniqueness is no // longer guaranteed (when `nFileIndex{Low,High}` are unique). // bug report: http://lists.llvm.org/pipermail/llvm-bugs/2014-December/037218.html // @@ -156,7 +157,8 @@ where P: AsRef, Q: AsRef { // In the case where this code is erroneous, two files will be reported // as equivalent when they are in fact distinct. This will cause the loop // detection code to report a false positive, which will prevent descending - // into the offending directory. + // into the offending directory. As far as failure modes goes, this isn't + // that bad. let h1 = try!(open_read_attr(&p1)); let h2 = try!(open_read_attr(&p2)); let i1 = try!(file_info(&h1)); diff --git a/src/tests.rs b/src/tests.rs index b8c6263..a853728 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,7 +1,6 @@ -#![allow(dead_code, unused_imports)] +#![cfg_attr(windows, allow(dead_code, unused_imports))] use std::env; -use std::fmt; use std::fs::{self, File}; use std::io; use std::path::{Path, PathBuf}; @@ -9,7 +8,7 @@ use std::path::{Path, PathBuf}; use quickcheck::{Arbitrary, Gen, QuickCheck, StdGen}; use rand::{self, Rng}; -use super::{DirEntry, WalkDir, WalkDirError, WalkDirIter}; +use super::{DirEntry, WalkDir, WalkDirIterator, Iter, Error, ErrorInner}; #[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] enum Tree { @@ -19,15 +18,11 @@ enum Tree { } impl Tree { - fn from_walk>(p: P) -> io::Result { - Tree::from_walk_with(p, |wd| wd) - } - fn from_walk_with( p: P, f: F, ) -> io::Result - where P: AsRef, F: FnOnce(WalkDir

) -> WalkDir

{ + where P: AsRef, F: FnOnce(WalkDir) -> WalkDir { let mut stack = vec![Tree::Dir(p.as_ref().to_path_buf(), vec![])]; let it: WalkEventIter = f(WalkDir::new(p)).into(); for ev in it { @@ -43,7 +38,7 @@ impl Tree { stack.push(Tree::Dir(pb(dent.file_name()), vec![])); } WalkEvent::File(dent) => { - let node = if try!(dent.file_type()).is_symlink() { + let node = if dent.file_type().is_symlink() { let src = try!(fs::read_link(dent.path())); let dst = pb(dent.file_name()); Tree::Symlink(src, dst) @@ -222,42 +217,6 @@ impl Arbitrary for Tree { } } -/* -impl fmt::Debug for Tree { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fn rep(c: char, n: usize) -> String { - ::std::iter::repeat(c).take(n).collect() - } - - fn fmt( - f: &mut fmt::Formatter, - tree: &Tree, - depth: usize, - ) -> fmt::Result { - match *tree { - Tree::File(ref pb) => { - writeln!(f, "{}{}", rep(' ', 2 * depth), pb.display()) - } - Tree::Symlink(ref src, ref dst) => { - writeln!(f, "{}{} -> {}", - rep(' ', 2 * depth), - dst.display(), src.display()) - } - Tree::Dir(ref pb, ref children) => { - try!(writeln!(f, "{}{}", - rep(' ', 2 * depth), pb.display())); - for c in children { - try!(fmt(f, c, depth + 1)); - } - Ok(()) - } - } - } - fmt(f, self, 0) - } -} -*/ - #[derive(Debug)] enum WalkEvent { Dir(DirEntry), @@ -267,12 +226,12 @@ enum WalkEvent { struct WalkEventIter { depth: usize, - it: WalkDirIter, - next: Option>, + it: Iter, + next: Option>, } -impl> From> for WalkEventIter { - fn from(it: WalkDir

) -> WalkEventIter { +impl From for WalkEventIter { + fn from(it: WalkDir) -> WalkEventIter { WalkEventIter { depth: 0, it: it.into_iter(), next: None } } } @@ -282,26 +241,26 @@ impl Iterator for WalkEventIter { fn next(&mut self) -> Option> { let dent = self.next.take().or_else(|| self.it.next()); - if self.it.depth() < self.depth { + let depth = match dent { + None => 0, + Some(Ok(ref dent)) => dent.depth(), + Some(Err(ref err)) => err.depth(), + }; + if depth < self.depth { self.depth -= 1; self.next = dent; return Some(Ok(WalkEvent::Exit)); } - self.depth = self.it.depth(); + self.depth = depth; match dent { None => None, Some(Err(err)) => Some(Err(From::from(err))), Some(Ok(dent)) => { - match dent.file_type() { - Err(err) => Some(Err(err)), - Ok(ty) => { - if ty.is_dir() { - self.depth += 1; - Some(Ok(WalkEvent::Dir(dent))) - } else { - Some(Ok(WalkEvent::File(dent))) - } - } + if dent.file_type().is_dir() { + self.depth += 1; + Some(Ok(WalkEvent::Dir(dent))) + } else { + Some(Ok(WalkEvent::File(dent))) } } } @@ -311,10 +270,6 @@ impl Iterator for WalkEventIter { struct TempDir(PathBuf); impl TempDir { - fn join(&self, path: &str) -> PathBuf { - (&*self.0).join(path) - } - fn path<'a>(&'a self) -> &'a Path { &self.0 } @@ -335,11 +290,11 @@ fn tmpdir() -> TempDir { } fn dir_setup_with(t: &Tree, f: F) -> (TempDir, Tree) - where F: FnOnce(WalkDir<&Path>) -> WalkDir<&Path> { + where F: FnOnce(WalkDir) -> WalkDir { let tmp = tmpdir(); t.create_in(tmp.path()).unwrap(); let got = Tree::from_walk_with(tmp.path(), f).unwrap(); - (tmp, got.unwrap_singleton()) + (tmp, got.unwrap_singleton().unwrap_singleton()) } fn dir_setup(t: &Tree) -> (TempDir, Tree) { @@ -366,6 +321,10 @@ fn soft_link, Q: AsRef>( symlink(src, dst) } +// TODO: Figure out how to do symlinks on windows. +// Windows differentiates dir and file symlinks. +// We may need to tweak the `Tree` data type to +// split links into dir/file. #[cfg(windows)] fn soft_link, Q: AsRef>( _src: P, @@ -498,10 +457,10 @@ fn walk_dir_sym_detect_loop() { .collect::, _>>(); match got { Ok(x) => panic!("expected loop error, got no error: {:?}", x), - Err(WalkDirError::Io { .. }) => { + Err(Error { inner: ErrorInner::Io { .. }, .. }) => { panic!("expected loop error, got generic IO error"); } - Err(WalkDirError::Loop { .. }) => {} + Err(Error { inner: ErrorInner::Loop { .. }, .. }) => {} } } @@ -517,10 +476,10 @@ fn walk_dir_sym_infinite() { .collect::, _>>(); match got { Ok(x) => panic!("expected IO error, got no error: {:?}", x), - Err(WalkDirError::Loop { .. }) => { + Err(Error { inner: ErrorInner::Loop { .. }, .. }) => { panic!("expected IO error, but got loop error"); } - Err(WalkDirError::Io { .. }) => {} + Err(Error { inner: ErrorInner::Io { .. }, .. }) => {} } } @@ -536,7 +495,7 @@ fn walk_dir_min_depth_2() { let exp = td("foo", vec![tf("bar"), tf("baz")]); let tmp = tmpdir(); exp.create_in(tmp.path()).unwrap(); - let got = Tree::from_walk_with(tmp.path(), |wd| wd.min_depth(1)) + let got = Tree::from_walk_with(tmp.path(), |wd| wd.min_depth(2)) .unwrap().unwrap_dir(); assert_tree_eq!(exp, td("foo", got)); } @@ -550,7 +509,7 @@ fn walk_dir_min_depth_3() { ]); let tmp = tmpdir(); exp.create_in(tmp.path()).unwrap(); - let got = Tree::from_walk_with(tmp.path(), |wd| wd.min_depth(2)) + let got = Tree::from_walk_with(tmp.path(), |wd| wd.min_depth(3)) .unwrap().unwrap_dir(); assert_eq!(vec![tf("xyz")], got); } @@ -558,14 +517,14 @@ fn walk_dir_min_depth_3() { #[test] fn walk_dir_max_depth_1() { let exp = td("foo", vec![tf("bar")]); - let (_tmp, got) = dir_setup_with(&exp, |wd| wd.max_depth(0)); + let (_tmp, got) = dir_setup_with(&exp, |wd| wd.max_depth(1)); assert_tree_eq!(td("foo", vec![]), got); } #[test] fn walk_dir_max_depth_2() { let exp = td("foo", vec![tf("bar"), tf("baz")]); - let (_tmp, got) = dir_setup_with(&exp, |wd| wd.max_depth(0)); + let (_tmp, got) = dir_setup_with(&exp, |wd| wd.max_depth(1)); assert_tree_eq!(td("foo", vec![]), got); } @@ -581,7 +540,7 @@ fn walk_dir_max_depth_3() { td("abc", vec![]), tf("baz"), ]); - let (_tmp, got) = dir_setup_with(&exp, |wd| wd.max_depth(1)); + let (_tmp, got) = dir_setup_with(&exp, |wd| wd.max_depth(2)); assert_tree_eq!(exp_trimmed, got); } @@ -595,7 +554,7 @@ fn walk_dir_min_max_depth() { let tmp = tmpdir(); exp.create_in(tmp.path()).unwrap(); let got = Tree::from_walk_with(tmp.path(), - |wd| wd.min_depth(1).max_depth(1)) + |wd| wd.min_depth(2).max_depth(2)) .unwrap().unwrap_dir(); assert_tree_eq!( td("foo", vec![tf("bar"), td("abc", vec![]), tf("baz")]), @@ -612,13 +571,13 @@ fn walk_dir_skip() { let tmp = tmpdir(); exp.create_in(tmp.path()).unwrap(); let mut got = vec![]; - let mut it = WalkDir::new(tmp.path()).into_iter(); + let mut it = WalkDir::new(tmp.path()).min_depth(1).into_iter(); loop { let dent = match it.next().map(|x| x.unwrap()) { None => break, Some(dent) => dent, }; - let name = dent.file_name().into_string().unwrap(); + let name = dent.file_name().to_str().unwrap().to_owned(); if name == "abc" { it.skip_current_dir(); } @@ -628,6 +587,30 @@ fn walk_dir_skip() { assert_eq!(got, vec!["abc", "bar", "baz", "foo"]); // missing xyz! } +#[test] +fn walk_dir_filter() { + let exp = td("foo", vec![ + tf("bar"), + td("abc", vec![tf("fit")]), + tf("faz"), + ]); + let tmp = tmpdir(); + let tmp_path = tmp.path().to_path_buf(); + exp.create_in(tmp.path()).unwrap(); + let it = WalkDir::new(tmp.path()).min_depth(1) + .into_iter() + .filter_entry(move |d| { + let n = d.file_name().to_string_lossy().into_owned(); + !d.file_type().is_dir() + || n.starts_with("f") + || d.path() == &*tmp_path + }); + let mut got = it.map(|d| d.unwrap().file_name().to_str().unwrap().into()) + .collect::>(); + got.sort(); + assert_eq!(got, vec!["bar", "faz", "foo"]); +} + #[test] fn qc_roundtrip() { fn p(exp: Tree) -> bool {