D7928: rust-status: add function for sequential traversal of the working directory

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
6 messages Options
Reply | Threaded
Open this post in threaded view
|

D7928: rust-status: add function for sequential traversal of the working directory

marmoute (Pierre-Yves David)
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This change also introduces helper structs to make things clearer.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7928

AFFECTED FILES
  rust/hg-core/src/dirstate/status.rs
  rust/hg-core/src/lib.rs
  rust/hg-cpython/src/dirstate/status.rs

CHANGE DETAILS

diff --git a/rust/hg-cpython/src/dirstate/status.rs b/rust/hg-cpython/src/dirstate/status.rs
--- a/rust/hg-cpython/src/dirstate/status.rs
+++ b/rust/hg-cpython/src/dirstate/status.rs
@@ -20,7 +20,7 @@
     matchers::{AlwaysMatcher, FileMatcher},
     status,
     utils::{files::get_path_from_bytes, hg_path::HgPath},
-    StatusResult,
+    DirstateStatus,
 };
 use std::borrow::Borrow;
 
@@ -114,7 +114,7 @@
 
 fn build_response(
     lookup: Vec<&HgPath>,
-    status_res: StatusResult,
+    status_res: DirstateStatus,
     py: Python,
 ) -> PyResult<(PyList, PyList, PyList, PyList, PyList, PyList, PyList)> {
     let modified = collect_pybytes_list(py, status_res.modified.as_ref());
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -13,7 +13,7 @@
     dirs_multiset::{DirsMultiset, DirsMultisetIter},
     dirstate_map::DirstateMap,
     parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
-    status::{status, StatusResult},
+    status::{status, DirstateStatus},
     CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
     StateMap, StateMapIter,
 };
diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -11,20 +11,39 @@
 
 use crate::{
     dirstate::SIZE_FROM_OTHER_PARENT,
-    matchers::Matcher,
+    matchers::{Matcher, VisitChildrenSet},
     utils::{
         files::HgMetadata,
         hg_path::{
             hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
         },
     },
-    CopyMap, DirstateEntry, DirstateMap, EntryState,
+    CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
 };
 use rayon::prelude::*;
-use std::collections::HashSet;
+use std::borrow::Cow;
+use std::collections::{HashSet, VecDeque};
 use std::fs::{read_dir, DirEntry};
+use std::io::ErrorKind;
+use std::ops::Deref;
 use std::path::Path;
 
+#[derive(Debug)]
+pub enum BadType {
+    CharacterDevice,
+    BlockDevice,
+    FIFO,
+    Socket,
+    Directory,
+    Unknown,
+}
+
+#[derive(Debug)]
+pub enum BadMatch {
+    OsError(i32),
+    BadType(BadType),
+}
+
 /// Marker enum used to dispatch new status entries into the right collections.
 /// Is similar to `crate::EntryState`, but represents the transient state of
 /// entries during the lifetime of a command.
@@ -36,6 +55,11 @@
     Deleted,
     Clean,
     Unknown,
+    Ignored,
+    /// Empty dispatch, the file is not worth listing
+    None,
+    /// Was explicitly matched but cannot be found/accessed
+    Bad(BadMatch),
 }
 
 type IoResult<T> = std::io::Result<T>;
@@ -81,9 +105,7 @@
     entry: DirstateEntry,
     metadata: HgMetadata,
     copy_map: &CopyMap,
-    check_exec: bool,
-    list_clean: bool,
-    last_normal_time: i64,
+    options: StatusOptions,
 ) -> Dispatch {
     let DirstateEntry {
         state,
@@ -103,7 +125,7 @@
         EntryState::Normal => {
             let size_changed = mod_compare(size, st_size as i32);
             let mode_changed =
-                (mode ^ st_mode as i32) & 0o100 != 0o000 && check_exec;
+                (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
             let metadata_changed = size >= 0 && (size_changed || mode_changed);
             let other_parent = size == SIZE_FROM_OTHER_PARENT;
             if metadata_changed
@@ -113,14 +135,14 @@
                 Dispatch::Modified
             } else if mod_compare(mtime, st_mtime as i32) {
                 Dispatch::Unsure
-            } else if st_mtime == last_normal_time {
+            } else if st_mtime == options.last_normal_time {
                 // the file may have just been marked as normal and
                 // it may have changed in the same second without
                 // changing its size. This can happen if we quickly
                 // do multiple commits. Force lookup, so we don't
                 // miss such a racy file change.
                 Dispatch::Unsure
-            } else if list_clean {
+            } else if options.list_clean {
                 Dispatch::Clean
             } else {
                 Dispatch::Unknown
@@ -153,9 +175,7 @@
     files: &'a HashSet<&HgPath>,
     dmap: &'a DirstateMap,
     root_dir: impl AsRef<Path> + Sync + Send,
-    check_exec: bool,
-    list_clean: bool,
-    last_normal_time: i64,
+    options: StatusOptions,
 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
     files.par_iter().filter_map(move |filename| {
         // TODO normalization
@@ -179,9 +199,7 @@
                                 *entry,
                                 HgMetadata::from_metadata(meta),
                                 &dmap.copy_map,
-                                check_exec,
-                                list_clean,
-                                last_normal_time,
+                                options,
                             ),
                         )));
                     }
@@ -204,14 +222,181 @@
     })
 }
 
+#[derive(Debug, Copy, Clone)]
+pub struct StatusOptions {
+    pub last_normal_time: i64,
+    pub check_exec: bool,
+    pub list_clean: bool,
+    pub list_ignored: bool,
+    pub list_unknown: bool,
+}
+
+/// Dispatch a single file found during `traverse`.
+/// If `file` is a folder that needs to be traversed, it will be pushed into
+/// `work`.
+fn traverse_worker<'a>(
+    work: &mut VecDeque<HgPathBuf>,
+    matcher: &impl Matcher,
+    dmap: &DirstateMap,
+    filename: impl AsRef<HgPath>,
+    dir_entry: &DirEntry,
+    ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
+    options: StatusOptions,
+) -> Option<IoResult<(Cow<'a, HgPath>, Dispatch)>> {
+    let file_type = match dir_entry.file_type() {
+        Ok(x) => x,
+        Err(e) => return Some(Err(e.into())),
+    };
+    let filename = filename.as_ref();
+    let entry_option = dmap.get(filename);
+
+    if file_type.is_dir() {
+        // Do we need to traverse it?
+        if !ignore_fn(&filename) {
+            work.push_front(filename.to_owned());
+        } else {
+            if options.list_ignored {
+                work.push_front(filename.to_owned());
+            }
+        }
+        // Nested `if` until `rust-lang/rust#53668` is stable
+        if let Some(entry) = entry_option {
+            // Used to be a file, is now a folder
+            if matcher.matches_everything() || matcher.matches(&filename) {
+                return Some(Ok((
+                    Cow::Owned(filename.to_owned()),
+                    dispatch_missing(entry.state),
+                )));
+            }
+        }
+    } else if file_type.is_file() || file_type.is_symlink() {
+        if let Some(entry) = entry_option {
+            if matcher.matches_everything() || matcher.matches(&filename) {
+                let metadata = match dir_entry.metadata() {
+                    Ok(x) => x,
+                    Err(e) => return Some(Err(e.into())),
+                };
+                return Some(Ok((
+                    Cow::Owned(filename.to_owned()),
+                    dispatch_found(
+                        &filename,
+                        *entry,
+                        HgMetadata::from_metadata(metadata),
+                        &dmap.copy_map,
+                        options,
+                    ),
+                )));
+            }
+        } else if (matcher.matches_everything() || matcher.matches(&filename))
+            && !ignore_fn(&filename)
+        {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                Dispatch::Unknown,
+            )));
+        } else if ignore_fn(&filename) {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                Dispatch::Ignored,
+            )));
+        }
+    } else if let Some(entry) = entry_option {
+        // Used to be a file or a folder, now something else.
+        if matcher.matches_everything() || matcher.matches(&filename) {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                dispatch_missing(entry.state),
+            )));
+        }
+    }
+    None
+}
+
+/// Walk the working directory recursively to look for changes compared to the
+/// current `DirstateMap`.
+fn traverse<'a>(
+    matcher: &(impl Matcher + Sync),
+    root_dir: impl AsRef<Path>,
+    dmap: &DirstateMap,
+    path: impl AsRef<HgPath>,
+    old_results: FastHashMap<Cow<'a, HgPath>, Dispatch>,
+    ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
+    options: StatusOptions,
+) -> IoResult<FastHashMap<Cow<'a, HgPath>, Dispatch>> {
+    let root_dir = root_dir.as_ref();
+    let mut new_results = FastHashMap::default();
+
+    let mut work = VecDeque::new();
+    work.push_front(path.as_ref().to_owned());
+
+    while let Some(ref directory) = work.pop_front() {
+        if directory.as_bytes() == b".hg" {
+            continue;
+        }
+        let visit_entries = match matcher.visit_children_set(directory) {
+            VisitChildrenSet::Empty => continue,
+            VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
+            VisitChildrenSet::Set(set) => Some(set),
+        };
+        let buf = hg_path_to_path_buf(directory)?;
+        let dir_path = root_dir.join(buf);
+
+        let skip_dot_hg = !directory.as_bytes().is_empty();
+        let entries = match list_directory(dir_path, skip_dot_hg) {
+            Err(e) => match e.kind() {
+                ErrorKind::NotFound | ErrorKind::PermissionDenied => {
+                    new_results.insert(
+                        Cow::Owned(directory.to_owned()),
+                        Dispatch::Bad(BadMatch::OsError(
+                            // Unwrapping here is OK because the error always
+                            // is a real os error
+                            e.raw_os_error().unwrap(),
+                        )),
+                    );
+                    continue;
+                }
+                _ => return Err(e),
+            },
+            Ok(entries) => entries,
+        };
+
+        for (filename, dir_entry) in entries {
+            if let Some(ref set) = visit_entries {
+                if !set.contains(filename.deref()) {
+                    continue;
+                }
+            }
+            // TODO normalize
+            let filename = if directory.is_empty() {
+                filename.to_owned()
+            } else {
+                directory.join(&filename)
+            };
+
+            if !old_results.contains_key(filename.deref()) {
+                if let Some((res, dispatch)) = traverse_worker(
+                    &mut work, matcher, &dmap, &filename, &dir_entry,
+                    &ignore_fn, options,
+                )
+                .transpose()?
+                {
+                    new_results.insert(res, dispatch);
+                }
+            }
+        }
+    }
+
+    new_results.extend(old_results.into_iter());
+
+    Ok(new_results)
+}
+
 /// Stat all entries in the `DirstateMap` and mark them for dispatch into
 /// the relevant collections.
 fn stat_dmap_entries(
     dmap: &DirstateMap,
     root_dir: impl AsRef<Path> + Sync + Send,
-    check_exec: bool,
-    list_clean: bool,
-    last_normal_time: i64,
+    options: StatusOptions,
 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
     dmap.par_iter().map(move |(filename, entry)| {
         let filename: &HgPath = filename;
@@ -232,9 +417,7 @@
                     *entry,
                     HgMetadata::from_metadata(m),
                     &dmap.copy_map,
-                    check_exec,
-                    list_clean,
-                    last_normal_time,
+                    options,
                 ),
             )),
             Err(ref e)
@@ -252,47 +435,59 @@
     })
 }
 
-pub struct StatusResult<'a> {
+pub struct DirstateStatus<'a> {
     pub modified: Vec<&'a HgPath>,
     pub added: Vec<&'a HgPath>,
     pub removed: Vec<&'a HgPath>,
     pub deleted: Vec<&'a HgPath>,
     pub clean: Vec<&'a HgPath>,
+    pub ignored: Vec<&'a HgPath>,
+    pub unknown: Vec<&'a HgPath>,
+    pub bad: Vec<(&'a HgPath, BadMatch)>,
     /* TODO ignored
      * TODO unknown */
 }
 
 fn build_response<'a>(
     results: impl IntoIterator<Item = IoResult<(&'a HgPath, Dispatch)>>,
-) -> IoResult<(Vec<&'a HgPath>, StatusResult<'a>)> {
+) -> IoResult<(Vec<&'a HgPath>, DirstateStatus<'a>)> {
     let mut lookup = vec![];
     let mut modified = vec![];
     let mut added = vec![];
     let mut removed = vec![];
     let mut deleted = vec![];
     let mut clean = vec![];
+    let mut ignored = vec![];
+    let mut unknown = vec![];
+    let mut bad = vec![];
 
     for res in results.into_iter() {
         let (filename, dispatch) = res?;
         match dispatch {
-            Dispatch::Unknown => {}
+            Dispatch::Unknown => unknown.push(filename),
             Dispatch::Unsure => lookup.push(filename),
             Dispatch::Modified => modified.push(filename),
             Dispatch::Added => added.push(filename),
             Dispatch::Removed => removed.push(filename),
             Dispatch::Deleted => deleted.push(filename),
             Dispatch::Clean => clean.push(filename),
+            Dispatch::Ignored => ignored.push(filename),
+            Dispatch::None => {}
+            Dispatch::Bad(reason) => bad.push((filename, reason)),
         }
     }
 
     Ok((
         lookup,
-        StatusResult {
+        DirstateStatus {
             modified,
             added,
             removed,
             deleted,
             clean,
+            ignored,
+            unknown,
+            bad,
         },
     ))
 }
@@ -301,31 +496,16 @@
     dmap: &'a DirstateMap,
     matcher: &'b (impl Matcher),
     root_dir: impl AsRef<Path> + Sync + Send + Copy,
-    list_clean: bool,
-    last_normal_time: i64,
-    check_exec: bool,
-) -> IoResult<(Vec<&'c HgPath>, StatusResult<'c>)> {
+    options: StatusOptions,
+) -> IoResult<(Vec<&'c HgPath>, DirstateStatus<'c>)> {
     let files = matcher.file_set();
     let mut results = vec![];
     if let Some(files) = files {
-        results.par_extend(walk_explicit(
-            &files,
-            &dmap,
-            root_dir,
-            check_exec,
-            list_clean,
-            last_normal_time,
-        ));
+        results.par_extend(walk_explicit(&files, &dmap, root_dir, options));
     }
 
     if !matcher.is_exact() {
-        let stat_results = stat_dmap_entries(
-            &dmap,
-            root_dir,
-            check_exec,
-            list_clean,
-            last_normal_time,
-        );
+        let stat_results = stat_dmap_entries(&dmap, root_dir, options);
         results.par_extend(stat_results);
     }
 



To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7928: rust-status: add function for sequential traversal of the working directory

marmoute (Pierre-Yves David)
This revision now requires changes to proceed.
marmoute added a comment.
marmoute requested changes to this revision.


  There is a lot going on in this changeset. I am counting at least:
 
  - new `BadType/BadMatch` structs,
  - `StatusResult` → `DirstateStatus` change,
  - new `StatusOptions` struct
  - new `traverse_worker` function.
 
  Can we get the smaller cleanup as reparated chagneset before hand? It would make the thing clearer in my opinion.

INLINE COMMENTS

> status.rs:61-62
> +    None,
> +    /// Was explicitly matched but cannot be found/accessed
> +    Bad(BadMatch),
>  }

nits: It might be worth adding the same commentan extra comment to the BadType/BadMatch declaration to clarify their intent.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7928/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7928

To: Alphare, #hg-reviewers, marmoute
Cc: marmoute, durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7928: rust-status: add function for sequential traversal of the working directory

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
Alphare added a comment.
Alphare marked an inline comment as done.


  In D7928#119428 <https://phab.mercurial-scm.org/D7928#119428>, @marmoute wrote:
 
  > There is a lot going on in this changeset. I am counting at least:
  >
  > - new `BadType/BadMatch` structs,
  > - `StatusResult` → `DirstateStatus` change,
  > - new `StatusOptions` struct
  > - new `traverse_worker` function.
  >
  > Can we get the smaller cleanup as reparated chagneset before hand? It would make the thing clearer in my opinion.
 
  I don't think the `BadType/BadMatch` struct warrant a new changeset. I'll split into 3 then.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7928/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7928

To: Alphare, #hg-reviewers, marmoute
Cc: marmoute, durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7928: rust-status: add function for sequential traversal of the working directory

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
Alphare updated this revision to Diff 19946.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7928?vs=19417&id=19946

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7928/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7928

AFFECTED FILES
  rust/hg-core/src/dirstate/status.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -11,18 +11,21 @@
 
 use crate::{
     dirstate::SIZE_FROM_OTHER_PARENT,
-    matchers::Matcher,
+    matchers::{Matcher, VisitChildrenSet},
     utils::{
         files::HgMetadata,
         hg_path::{
             hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
         },
     },
-    CopyMap, DirstateEntry, DirstateMap, EntryState,
+    CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
 };
 use rayon::prelude::*;
-use std::collections::HashSet;
+use std::borrow::Cow;
+use std::collections::{HashSet, VecDeque};
 use std::fs::{read_dir, DirEntry};
+use std::io::ErrorKind;
+use std::ops::Deref;
 use std::path::Path;
 
 /// Wrong type of file from a `BadMatch`
@@ -229,6 +232,168 @@
     pub last_normal_time: i64,
     pub check_exec: bool,
     pub list_clean: bool,
+    pub list_unknown: bool,
+    pub list_ignored: bool,
+}
+
+/// Dispatch a single file found during `traverse`.
+/// If `file` is a folder that needs to be traversed, it will be pushed into
+/// `work`.
+fn traverse_worker<'a>(
+    work: &mut VecDeque<HgPathBuf>,
+    matcher: &impl Matcher,
+    dmap: &DirstateMap,
+    filename: impl AsRef<HgPath>,
+    dir_entry: &DirEntry,
+    ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
+    options: StatusOptions,
+) -> Option<IoResult<(Cow<'a, HgPath>, Dispatch)>> {
+    let file_type = match dir_entry.file_type() {
+        Ok(x) => x,
+        Err(e) => return Some(Err(e.into())),
+    };
+    let filename = filename.as_ref();
+    let entry_option = dmap.get(filename);
+
+    if file_type.is_dir() {
+        // Do we need to traverse it?
+        if !ignore_fn(&filename) {
+            work.push_front(filename.to_owned());
+        } else {
+            if options.list_ignored {
+                work.push_front(filename.to_owned());
+            }
+        }
+        // Nested `if` until `rust-lang/rust#53668` is stable
+        if let Some(entry) = entry_option {
+            // Used to be a file, is now a folder
+            if matcher.matches_everything() || matcher.matches(&filename) {
+                return Some(Ok((
+                    Cow::Owned(filename.to_owned()),
+                    dispatch_missing(entry.state),
+                )));
+            }
+        }
+    } else if file_type.is_file() || file_type.is_symlink() {
+        if let Some(entry) = entry_option {
+            if matcher.matches_everything() || matcher.matches(&filename) {
+                let metadata = match dir_entry.metadata() {
+                    Ok(x) => x,
+                    Err(e) => return Some(Err(e.into())),
+                };
+                return Some(Ok((
+                    Cow::Owned(filename.to_owned()),
+                    dispatch_found(
+                        &filename,
+                        *entry,
+                        HgMetadata::from_metadata(metadata),
+                        &dmap.copy_map,
+                        options,
+                    ),
+                )));
+            }
+        } else if (matcher.matches_everything() || matcher.matches(&filename))
+            && !ignore_fn(&filename)
+        {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                Dispatch::Unknown,
+            )));
+        } else if ignore_fn(&filename) {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                Dispatch::Ignored,
+            )));
+        }
+    } else if let Some(entry) = entry_option {
+        // Used to be a file or a folder, now something else.
+        if matcher.matches_everything() || matcher.matches(&filename) {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                dispatch_missing(entry.state),
+            )));
+        }
+    }
+    None
+}
+
+/// Walk the working directory recursively to look for changes compared to the
+/// current `DirstateMap`.
+fn traverse<'a>(
+    matcher: &(impl Matcher + Sync),
+    root_dir: impl AsRef<Path>,
+    dmap: &DirstateMap,
+    path: impl AsRef<HgPath>,
+    old_results: FastHashMap<Cow<'a, HgPath>, Dispatch>,
+    ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
+    options: StatusOptions,
+) -> IoResult<FastHashMap<Cow<'a, HgPath>, Dispatch>> {
+    let root_dir = root_dir.as_ref();
+    let mut new_results = FastHashMap::default();
+
+    let mut work = VecDeque::new();
+    work.push_front(path.as_ref().to_owned());
+
+    while let Some(ref directory) = work.pop_front() {
+        if directory.as_bytes() == b".hg" {
+            continue;
+        }
+        let visit_entries = match matcher.visit_children_set(directory) {
+            VisitChildrenSet::Empty => continue,
+            VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
+            VisitChildrenSet::Set(set) => Some(set),
+        };
+        let buf = hg_path_to_path_buf(directory)?;
+        let dir_path = root_dir.join(buf);
+
+        let skip_dot_hg = !directory.as_bytes().is_empty();
+        let entries = match list_directory(dir_path, skip_dot_hg) {
+            Err(e) => match e.kind() {
+                ErrorKind::NotFound | ErrorKind::PermissionDenied => {
+                    new_results.insert(
+                        Cow::Owned(directory.to_owned()),
+                        Dispatch::Bad(BadMatch::OsError(
+                            // Unwrapping here is OK because the error always
+                            // is a real os error
+                            e.raw_os_error().unwrap(),
+                        )),
+                    );
+                    continue;
+                }
+                _ => return Err(e),
+            },
+            Ok(entries) => entries,
+        };
+
+        for (filename, dir_entry) in entries {
+            if let Some(ref set) = visit_entries {
+                if !set.contains(filename.deref()) {
+                    continue;
+                }
+            }
+            // TODO normalize
+            let filename = if directory.is_empty() {
+                filename.to_owned()
+            } else {
+                directory.join(&filename)
+            };
+
+            if !old_results.contains_key(filename.deref()) {
+                if let Some((res, dispatch)) = traverse_worker(
+                    &mut work, matcher, &dmap, &filename, &dir_entry,
+                    &ignore_fn, options,
+                )
+                .transpose()?
+                {
+                    new_results.insert(res, dispatch);
+                }
+            }
+        }
+    }
+
+    new_results.extend(old_results.into_iter());
+
+    Ok(new_results)
 }
 
 /// Stat all entries in the `DirstateMap` and mark them for dispatch into



To: Alphare, #hg-reviewers, marmoute
Cc: marmoute, durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7928: rust-status: add function for sequential traversal of the working directory

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
Alphare updated this revision to Diff 20047.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7928?vs=19946&id=20047

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7928/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7928

AFFECTED FILES
  rust/hg-core/src/dirstate/status.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -11,18 +11,21 @@
 
 use crate::{
     dirstate::SIZE_FROM_OTHER_PARENT,
-    matchers::Matcher,
+    matchers::{Matcher, VisitChildrenSet},
     utils::{
         files::HgMetadata,
         hg_path::{
             hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
         },
     },
-    CopyMap, DirstateEntry, DirstateMap, EntryState,
+    CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
 };
 use rayon::prelude::*;
-use std::collections::HashSet;
+use std::borrow::Cow;
+use std::collections::{HashSet, VecDeque};
 use std::fs::{read_dir, DirEntry};
+use std::io::ErrorKind;
+use std::ops::Deref;
 use std::path::Path;
 
 /// Wrong type of file from a `BadMatch`
@@ -233,6 +236,168 @@
     /// Whether we are on a filesystem with UNIX-like exec flags
     pub check_exec: bool,
     pub list_clean: bool,
+    pub list_unknown: bool,
+    pub list_ignored: bool,
+}
+
+/// Dispatch a single file found during `traverse`.
+/// If `file` is a folder that needs to be traversed, it will be pushed into
+/// `work`.
+fn traverse_worker<'a>(
+    work: &mut VecDeque<HgPathBuf>,
+    matcher: &impl Matcher,
+    dmap: &DirstateMap,
+    filename: impl AsRef<HgPath>,
+    dir_entry: &DirEntry,
+    ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
+    options: StatusOptions,
+) -> Option<IoResult<(Cow<'a, HgPath>, Dispatch)>> {
+    let file_type = match dir_entry.file_type() {
+        Ok(x) => x,
+        Err(e) => return Some(Err(e.into())),
+    };
+    let filename = filename.as_ref();
+    let entry_option = dmap.get(filename);
+
+    if file_type.is_dir() {
+        // Do we need to traverse it?
+        if !ignore_fn(&filename) {
+            work.push_front(filename.to_owned());
+        } else {
+            if options.list_ignored {
+                work.push_front(filename.to_owned());
+            }
+        }
+        // Nested `if` until `rust-lang/rust#53668` is stable
+        if let Some(entry) = entry_option {
+            // Used to be a file, is now a folder
+            if matcher.matches_everything() || matcher.matches(&filename) {
+                return Some(Ok((
+                    Cow::Owned(filename.to_owned()),
+                    dispatch_missing(entry.state),
+                )));
+            }
+        }
+    } else if file_type.is_file() || file_type.is_symlink() {
+        if let Some(entry) = entry_option {
+            if matcher.matches_everything() || matcher.matches(&filename) {
+                let metadata = match dir_entry.metadata() {
+                    Ok(x) => x,
+                    Err(e) => return Some(Err(e.into())),
+                };
+                return Some(Ok((
+                    Cow::Owned(filename.to_owned()),
+                    dispatch_found(
+                        &filename,
+                        *entry,
+                        HgMetadata::from_metadata(metadata),
+                        &dmap.copy_map,
+                        options,
+                    ),
+                )));
+            }
+        } else if (matcher.matches_everything() || matcher.matches(&filename))
+            && !ignore_fn(&filename)
+        {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                Dispatch::Unknown,
+            )));
+        } else if ignore_fn(&filename) {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                Dispatch::Ignored,
+            )));
+        }
+    } else if let Some(entry) = entry_option {
+        // Used to be a file or a folder, now something else.
+        if matcher.matches_everything() || matcher.matches(&filename) {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                dispatch_missing(entry.state),
+            )));
+        }
+    }
+    None
+}
+
+/// Walk the working directory recursively to look for changes compared to the
+/// current `DirstateMap`.
+fn traverse<'a>(
+    matcher: &(impl Matcher + Sync),
+    root_dir: impl AsRef<Path>,
+    dmap: &DirstateMap,
+    path: impl AsRef<HgPath>,
+    old_results: FastHashMap<Cow<'a, HgPath>, Dispatch>,
+    ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
+    options: StatusOptions,
+) -> IoResult<FastHashMap<Cow<'a, HgPath>, Dispatch>> {
+    let root_dir = root_dir.as_ref();
+    let mut new_results = FastHashMap::default();
+
+    let mut work = VecDeque::new();
+    work.push_front(path.as_ref().to_owned());
+
+    while let Some(ref directory) = work.pop_front() {
+        if directory.as_bytes() == b".hg" {
+            continue;
+        }
+        let visit_entries = match matcher.visit_children_set(directory) {
+            VisitChildrenSet::Empty => continue,
+            VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
+            VisitChildrenSet::Set(set) => Some(set),
+        };
+        let buf = hg_path_to_path_buf(directory)?;
+        let dir_path = root_dir.join(buf);
+
+        let skip_dot_hg = !directory.as_bytes().is_empty();
+        let entries = match list_directory(dir_path, skip_dot_hg) {
+            Err(e) => match e.kind() {
+                ErrorKind::NotFound | ErrorKind::PermissionDenied => {
+                    new_results.insert(
+                        Cow::Owned(directory.to_owned()),
+                        Dispatch::Bad(BadMatch::OsError(
+                            // Unwrapping here is OK because the error always
+                            // is a real os error
+                            e.raw_os_error().unwrap(),
+                        )),
+                    );
+                    continue;
+                }
+                _ => return Err(e),
+            },
+            Ok(entries) => entries,
+        };
+
+        for (filename, dir_entry) in entries {
+            if let Some(ref set) = visit_entries {
+                if !set.contains(filename.deref()) {
+                    continue;
+                }
+            }
+            // TODO normalize
+            let filename = if directory.is_empty() {
+                filename.to_owned()
+            } else {
+                directory.join(&filename)
+            };
+
+            if !old_results.contains_key(filename.deref()) {
+                if let Some((res, dispatch)) = traverse_worker(
+                    &mut work, matcher, &dmap, &filename, &dir_entry,
+                    &ignore_fn, options,
+                )
+                .transpose()?
+                {
+                    new_results.insert(res, dispatch);
+                }
+            }
+        }
+    }
+
+    new_results.extend(old_results.into_iter());
+
+    Ok(new_results)
 }
 
 /// Stat all entries in the `DirstateMap` and mark them for dispatch into



To: Alphare, #hg-reviewers, marmoute
Cc: marmoute, durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7928: rust-status: add function for sequential traversal of the working directory

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
Alphare updated this revision to Diff 20187.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7928?vs=20047&id=20187

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7928/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7928

AFFECTED FILES
  rust/hg-core/src/dirstate/status.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -11,18 +11,21 @@
 
 use crate::{
     dirstate::SIZE_FROM_OTHER_PARENT,
-    matchers::Matcher,
+    matchers::{Matcher, VisitChildrenSet},
     utils::{
         files::HgMetadata,
         hg_path::{
             hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
         },
     },
-    CopyMap, DirstateEntry, DirstateMap, EntryState,
+    CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
 };
 use rayon::prelude::*;
-use std::collections::HashSet;
+use std::borrow::Cow;
+use std::collections::{HashSet, VecDeque};
 use std::fs::{read_dir, DirEntry};
+use std::io::ErrorKind;
+use std::ops::Deref;
 use std::path::Path;
 
 /// Wrong type of file from a `BadMatch`
@@ -238,6 +241,168 @@
     /// Whether we are on a filesystem with UNIX-like exec flags
     pub check_exec: bool,
     pub list_clean: bool,
+    pub list_unknown: bool,
+    pub list_ignored: bool,
+}
+
+/// Dispatch a single file found during `traverse`.
+/// If `file` is a folder that needs to be traversed, it will be pushed into
+/// `work`.
+fn traverse_worker<'a>(
+    work: &mut VecDeque<HgPathBuf>,
+    matcher: &impl Matcher,
+    dmap: &DirstateMap,
+    filename: impl AsRef<HgPath>,
+    dir_entry: &DirEntry,
+    ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
+    options: StatusOptions,
+) -> Option<IoResult<(Cow<'a, HgPath>, Dispatch)>> {
+    let file_type = match dir_entry.file_type() {
+        Ok(x) => x,
+        Err(e) => return Some(Err(e.into())),
+    };
+    let filename = filename.as_ref();
+    let entry_option = dmap.get(filename);
+
+    if file_type.is_dir() {
+        // Do we need to traverse it?
+        if !ignore_fn(&filename) {
+            work.push_front(filename.to_owned());
+        } else {
+            if options.list_ignored {
+                work.push_front(filename.to_owned());
+            }
+        }
+        // Nested `if` until `rust-lang/rust#53668` is stable
+        if let Some(entry) = entry_option {
+            // Used to be a file, is now a folder
+            if matcher.matches_everything() || matcher.matches(&filename) {
+                return Some(Ok((
+                    Cow::Owned(filename.to_owned()),
+                    dispatch_missing(entry.state),
+                )));
+            }
+        }
+    } else if file_type.is_file() || file_type.is_symlink() {
+        if let Some(entry) = entry_option {
+            if matcher.matches_everything() || matcher.matches(&filename) {
+                let metadata = match dir_entry.metadata() {
+                    Ok(x) => x,
+                    Err(e) => return Some(Err(e.into())),
+                };
+                return Some(Ok((
+                    Cow::Owned(filename.to_owned()),
+                    dispatch_found(
+                        &filename,
+                        *entry,
+                        HgMetadata::from_metadata(metadata),
+                        &dmap.copy_map,
+                        options,
+                    ),
+                )));
+            }
+        } else if (matcher.matches_everything() || matcher.matches(&filename))
+            && !ignore_fn(&filename)
+        {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                Dispatch::Unknown,
+            )));
+        } else if ignore_fn(&filename) {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                Dispatch::Ignored,
+            )));
+        }
+    } else if let Some(entry) = entry_option {
+        // Used to be a file or a folder, now something else.
+        if matcher.matches_everything() || matcher.matches(&filename) {
+            return Some(Ok((
+                Cow::Owned(filename.to_owned()),
+                dispatch_missing(entry.state),
+            )));
+        }
+    }
+    None
+}
+
+/// Walk the working directory recursively to look for changes compared to the
+/// current `DirstateMap`.
+fn traverse<'a>(
+    matcher: &(impl Matcher + Sync),
+    root_dir: impl AsRef<Path>,
+    dmap: &DirstateMap,
+    path: impl AsRef<HgPath>,
+    old_results: FastHashMap<Cow<'a, HgPath>, Dispatch>,
+    ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
+    options: StatusOptions,
+) -> IoResult<FastHashMap<Cow<'a, HgPath>, Dispatch>> {
+    let root_dir = root_dir.as_ref();
+    let mut new_results = FastHashMap::default();
+
+    let mut work = VecDeque::new();
+    work.push_front(path.as_ref().to_owned());
+
+    while let Some(ref directory) = work.pop_front() {
+        if directory.as_bytes() == b".hg" {
+            continue;
+        }
+        let visit_entries = match matcher.visit_children_set(directory) {
+            VisitChildrenSet::Empty => continue,
+            VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
+            VisitChildrenSet::Set(set) => Some(set),
+        };
+        let buf = hg_path_to_path_buf(directory)?;
+        let dir_path = root_dir.join(buf);
+
+        let skip_dot_hg = !directory.as_bytes().is_empty();
+        let entries = match list_directory(dir_path, skip_dot_hg) {
+            Err(e) => match e.kind() {
+                ErrorKind::NotFound | ErrorKind::PermissionDenied => {
+                    new_results.insert(
+                        Cow::Owned(directory.to_owned()),
+                        Dispatch::Bad(BadMatch::OsError(
+                            // Unwrapping here is OK because the error always
+                            // is a real os error
+                            e.raw_os_error().unwrap(),
+                        )),
+                    );
+                    continue;
+                }
+                _ => return Err(e),
+            },
+            Ok(entries) => entries,
+        };
+
+        for (filename, dir_entry) in entries {
+            if let Some(ref set) = visit_entries {
+                if !set.contains(filename.deref()) {
+                    continue;
+                }
+            }
+            // TODO normalize
+            let filename = if directory.is_empty() {
+                filename.to_owned()
+            } else {
+                directory.join(&filename)
+            };
+
+            if !old_results.contains_key(filename.deref()) {
+                if let Some((res, dispatch)) = traverse_worker(
+                    &mut work, matcher, &dmap, &filename, &dir_entry,
+                    &ignore_fn, options,
+                )
+                .transpose()?
+                {
+                    new_results.insert(res, dispatch);
+                }
+            }
+        }
+    }
+
+    new_results.extend(old_results.into_iter());
+
+    Ok(new_results)
 }
 
 /// Stat all entries in the `DirstateMap` and mark them for dispatch into



To: Alphare, #hg-reviewers, marmoute
Cc: marmoute, durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel