D8110: rust-dirstatemap: cache non normal and other parent set

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
10 messages Options
Reply | Threaded
Open this post in threaded view
|

D8110: rust-dirstatemap: cache non normal and other parent set

marmoute (Pierre-Yves David)
marmoute created this revision.
marmoute added a comment.
Herald added subscribers: mercurial-devel, kevincox.
Herald added a reviewer: hg-reviewers.


  INTENDED FOR STABLE

REVISION SUMMARY
  Performance of `hg update` was significantly worse since the introduction of
  the Rust `dirstatemap`. This regression was noticed by Valentin Gatien-Baron
  when working on a large repository, as it goes unnoticed for smaller
  repositories like Mercurial itself.
 
  This fix introduces the same getter/setter mechanism at `hg-core` level as
  for `set/get_dirs`.
 
  While this technique is, as previously discussed, quite suboptimal, it fixes an
  important enough problem. Refactoring `hg-core` to use the typestate
  pattern could be a good approach to improving code quality in a future patch.
 
  This is a graft of stable of 83b2b829c94e <https://phab.mercurial-scm.org/rHG83b2b829c94ee984b95e34b4f38beb99b7f805e2>

REPOSITORY
  rHG Mercurial

BRANCH
  stable

REVISION DETAIL
  https://phab.mercurial-scm.org/D8110

AFFECTED FILES
  rust/hg-core/src/dirstate/dirstate_map.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs
--- a/rust/hg-core/src/dirstate/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs
@@ -34,8 +34,8 @@
     file_fold_map: Option<FileFoldMap>,
     pub dirs: Option<DirsMultiset>,
     pub all_dirs: Option<DirsMultiset>,
-    non_normal_set: HashSet<HgPathBuf>,
-    other_parent_set: HashSet<HgPathBuf>,
+    non_normal_set: Option<HashSet<HgPathBuf>>,
+    other_parent_set: Option<HashSet<HgPathBuf>>,
     parents: Option<DirstateParents>,
     dirty_parents: bool,
 }
@@ -69,8 +69,8 @@
         self.state_map.clear();
         self.copy_map.clear();
         self.file_fold_map = None;
-        self.non_normal_set.clear();
-        self.other_parent_set.clear();
+        self.non_normal_set = None;
+        self.other_parent_set = None;
         self.set_parents(&DirstateParents {
             p1: NULL_ID,
             p2: NULL_ID,
@@ -98,11 +98,19 @@
         self.state_map.insert(filename.to_owned(), entry.to_owned());
 
         if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET {
-            self.non_normal_set.insert(filename.to_owned());
+            self.get_non_normal_other_parent_entries()
+                .0
+                .as_mut()
+                .unwrap()
+                .insert(filename.to_owned());
         }
 
         if entry.size == SIZE_FROM_OTHER_PARENT {
-            self.other_parent_set.insert(filename.to_owned());
+            self.get_non_normal_other_parent_entries()
+                .1
+                .as_mut()
+                .unwrap()
+                .insert(filename.to_owned());
         }
         Ok(())
     }
@@ -142,7 +150,11 @@
                 mtime: 0,
             },
         );
-        self.non_normal_set.insert(filename.to_owned());
+        self.get_non_normal_other_parent_entries()
+            .0
+            .as_mut()
+            .unwrap()
+            .insert(filename.to_owned());
         Ok(())
     }
 
@@ -168,7 +180,11 @@
         if let Some(ref mut file_fold_map) = self.file_fold_map {
             file_fold_map.remove(&normalize_case(filename));
         }
-        self.non_normal_set.remove(filename);
+        self.get_non_normal_other_parent_entries()
+            .0
+            .as_mut()
+            .unwrap()
+            .remove(filename);
 
         Ok(exists)
     }
@@ -193,14 +209,55 @@
                     }
                 });
             if changed {
-                self.non_normal_set.insert(filename.to_owned());
+                self.get_non_normal_other_parent_entries()
+                    .0
+                    .as_mut()
+                    .unwrap()
+                    .insert(filename.to_owned());
             }
         }
     }
 
-    pub fn non_normal_other_parent_entries(
-        &self,
-    ) -> (HashSet<HgPathBuf>, HashSet<HgPathBuf>) {
+    pub fn non_normal_entries_remove(
+        &mut self,
+        key: impl AsRef<HgPath>,
+    ) -> bool {
+        self.get_non_normal_other_parent_entries()
+            .0
+            .as_mut()
+            .unwrap()
+            .remove(key.as_ref())
+    }
+    pub fn non_normal_entries_union(
+        &mut self,
+        other: HashSet<HgPathBuf>,
+    ) -> Vec<HgPathBuf> {
+        self.get_non_normal_other_parent_entries()
+            .0
+            .as_mut()
+            .unwrap()
+            .union(&other)
+            .map(|e| e.to_owned())
+            .collect()
+    }
+
+    pub fn get_non_normal_other_parent_entries(
+        &mut self,
+    ) -> (
+        &mut Option<HashSet<HgPathBuf>>,
+        &mut Option<HashSet<HgPathBuf>>,
+    ) {
+        self.set_non_normal_other_parent_entries(false);
+        (&mut self.non_normal_set, &mut self.other_parent_set)
+    }
+
+    pub fn set_non_normal_other_parent_entries(&mut self, force: bool) {
+        if !force
+            && self.non_normal_set.is_some()
+            && self.other_parent_set.is_some()
+        {
+            return;
+        }
         let mut non_normal = HashSet::new();
         let mut other_parent = HashSet::new();
 
@@ -219,8 +276,8 @@
                 other_parent.insert(filename.to_owned());
             }
         }
-
-        (non_normal, other_parent)
+        self.non_normal_set = Some(non_normal);
+        self.other_parent_set = Some(other_parent);
     }
 
     /// Both of these setters and their uses appear to be the simplest way to
@@ -325,9 +382,7 @@
 
         self.dirty_parents = false;
 
-        let result = self.non_normal_other_parent_entries();
-        self.non_normal_set = result.0;
-        self.other_parent_set = result.1;
+        self.set_non_normal_other_parent_entries(true);
         Ok(packed)
     }
 
@@ -385,13 +440,27 @@
         .unwrap();
 
         assert_eq!(1, map.len());
-        assert_eq!(0, map.non_normal_set.len());
-        assert_eq!(0, map.other_parent_set.len());
+        assert_eq!(
+            0,
+            map.get_non_normal_other_parent_entries()
+                .0
+                .as_ref()
+                .unwrap()
+                .len()
+        );
+        assert_eq!(
+            0,
+            map.get_non_normal_other_parent_entries()
+                .1
+                .as_ref()
+                .unwrap()
+                .len()
+        );
     }
 
     #[test]
     fn test_non_normal_other_parent_entries() {
-        let map: DirstateMap = [
+        let mut map: DirstateMap = [
             (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
             (b"f2", (EntryState::Normal, 1337, 1337, -1)),
             (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
@@ -427,10 +496,11 @@
 
         let mut other_parent = HashSet::new();
         other_parent.insert(HgPathBuf::from_bytes(b"f4"));
+        let entries = map.get_non_normal_other_parent_entries();
 
         assert_eq!(
-            (non_normal, other_parent),
-            map.non_normal_other_parent_entries()
+            (Some(non_normal), Some(other_parent)),
+            (entries.0.to_owned(), entries.1.to_owned())
         );
     }
 }



To: marmoute, #hg-reviewers
Cc: kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D8110: rust-dirstatemap: cache non normal and other parent set

marmoute (Pierre-Yves David)
kevincox added inline comments.

INLINE COMMENTS

> dirstate_map.rs:38
> +    non_normal_set: Option<HashSet<HgPathBuf>>,
> +    other_parent_set: Option<HashSet<HgPathBuf>>,
>      parents: Option<DirstateParents>,

I don't understand why an `Option<HashSet>` is faster than a `HashSet`. Could you add some explanation to the commit message? Is this to avoid attempting to initialize the entry multiple times?

> dirstate_map.rs:251
> +        self.set_non_normal_other_parent_entries(false);
> +        (&mut self.non_normal_set, &mut self.other_parent_set)
> +    }

If we have just set the fields to Some(..) in the previous line can't we do the unwrap here where it is obviously correct?

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8110/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8110

To: marmoute, #hg-reviewers
Cc: kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D8110: rust-dirstatemap: cache non normal and other parent set

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
marmoute added a comment.
marmoute added a subscriber: Alphare.


  This is supposed to be a graft of something already accepted on default. So unless I did a mistake on the graft (cc @Alphare for checking) any feedback on this also apply on the default one.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8110/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8110

To: marmoute, #hg-reviewers
Cc: Alphare, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D8110: rust-dirstatemap: cache non normal and other parent set

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
Alphare added a comment.


  In D8110#120671 <https://phab.mercurial-scm.org/D8110#120671>, @marmoute wrote:
 
  > This is supposed to be a graft of something already accepted on default. So unless I did a mistake on the graft (cc @Alphare for checking) any feedback on this also apply on the default one.
 
  This looks good (in that it looks exactly as bad as it should), thanks for the graft.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8110/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8110

To: marmoute, #hg-reviewers
Cc: Alphare, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D8110: rust-dirstatemap: cache non normal and other parent set

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
Alphare added inline comments.

INLINE COMMENTS

> kevincox wrote in dirstate_map.rs:38
> I don't understand why an `Option<HashSet>` is faster than a `HashSet`. Could you add some explanation to the commit message? Is this to avoid attempting to initialize the entry multiple times?

Using `Option` adds a layer of indirection so that any endpoint that needs the set can initialize it without having to doubt if it was already initialized.

> kevincox wrote in dirstate_map.rs:251
> If we have just set the fields to Some(..) in the previous line can't we do the unwrap here where it is obviously correct?

Unwrapping creates a temporary value that is dropped instantly, so unless I'm not seeing something, I don't think we can.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8110/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8110

To: marmoute, #hg-reviewers
Cc: Alphare, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D8110: rust-dirstatemap: cache non normal and other parent set

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
Closed by commit rHG58c74a517a00: rust-dirstatemap: cache non normal and other parent set (authored by Alphare).
This revision was automatically updated to reflect the committed changes.
This revision was not accepted when it landed; it landed in state "Needs Review".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D8110?vs=20177&id=20181

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8110/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8110

AFFECTED FILES
  rust/hg-core/src/dirstate/dirstate_map.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs
--- a/rust/hg-core/src/dirstate/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs
@@ -34,8 +34,8 @@
     file_fold_map: Option<FileFoldMap>,
     pub dirs: Option<DirsMultiset>,
     pub all_dirs: Option<DirsMultiset>,
-    non_normal_set: HashSet<HgPathBuf>,
-    other_parent_set: HashSet<HgPathBuf>,
+    non_normal_set: Option<HashSet<HgPathBuf>>,
+    other_parent_set: Option<HashSet<HgPathBuf>>,
     parents: Option<DirstateParents>,
     dirty_parents: bool,
 }
@@ -69,8 +69,8 @@
         self.state_map.clear();
         self.copy_map.clear();
         self.file_fold_map = None;
-        self.non_normal_set.clear();
-        self.other_parent_set.clear();
+        self.non_normal_set = None;
+        self.other_parent_set = None;
         self.set_parents(&DirstateParents {
             p1: NULL_ID,
             p2: NULL_ID,
@@ -98,11 +98,19 @@
         self.state_map.insert(filename.to_owned(), entry.to_owned());
 
         if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET {
-            self.non_normal_set.insert(filename.to_owned());
+            self.get_non_normal_other_parent_entries()
+                .0
+                .as_mut()
+                .unwrap()
+                .insert(filename.to_owned());
         }
 
         if entry.size == SIZE_FROM_OTHER_PARENT {
-            self.other_parent_set.insert(filename.to_owned());
+            self.get_non_normal_other_parent_entries()
+                .1
+                .as_mut()
+                .unwrap()
+                .insert(filename.to_owned());
         }
         Ok(())
     }
@@ -142,7 +150,11 @@
                 mtime: 0,
             },
         );
-        self.non_normal_set.insert(filename.to_owned());
+        self.get_non_normal_other_parent_entries()
+            .0
+            .as_mut()
+            .unwrap()
+            .insert(filename.to_owned());
         Ok(())
     }
 
@@ -168,7 +180,11 @@
         if let Some(ref mut file_fold_map) = self.file_fold_map {
             file_fold_map.remove(&normalize_case(filename));
         }
-        self.non_normal_set.remove(filename);
+        self.get_non_normal_other_parent_entries()
+            .0
+            .as_mut()
+            .unwrap()
+            .remove(filename);
 
         Ok(exists)
     }
@@ -193,14 +209,55 @@
                     }
                 });
             if changed {
-                self.non_normal_set.insert(filename.to_owned());
+                self.get_non_normal_other_parent_entries()
+                    .0
+                    .as_mut()
+                    .unwrap()
+                    .insert(filename.to_owned());
             }
         }
     }
 
-    pub fn non_normal_other_parent_entries(
-        &self,
-    ) -> (HashSet<HgPathBuf>, HashSet<HgPathBuf>) {
+    pub fn non_normal_entries_remove(
+        &mut self,
+        key: impl AsRef<HgPath>,
+    ) -> bool {
+        self.get_non_normal_other_parent_entries()
+            .0
+            .as_mut()
+            .unwrap()
+            .remove(key.as_ref())
+    }
+    pub fn non_normal_entries_union(
+        &mut self,
+        other: HashSet<HgPathBuf>,
+    ) -> Vec<HgPathBuf> {
+        self.get_non_normal_other_parent_entries()
+            .0
+            .as_mut()
+            .unwrap()
+            .union(&other)
+            .map(|e| e.to_owned())
+            .collect()
+    }
+
+    pub fn get_non_normal_other_parent_entries(
+        &mut self,
+    ) -> (
+        &mut Option<HashSet<HgPathBuf>>,
+        &mut Option<HashSet<HgPathBuf>>,
+    ) {
+        self.set_non_normal_other_parent_entries(false);
+        (&mut self.non_normal_set, &mut self.other_parent_set)
+    }
+
+    pub fn set_non_normal_other_parent_entries(&mut self, force: bool) {
+        if !force
+            && self.non_normal_set.is_some()
+            && self.other_parent_set.is_some()
+        {
+            return;
+        }
         let mut non_normal = HashSet::new();
         let mut other_parent = HashSet::new();
 
@@ -219,8 +276,8 @@
                 other_parent.insert(filename.to_owned());
             }
         }
-
-        (non_normal, other_parent)
+        self.non_normal_set = Some(non_normal);
+        self.other_parent_set = Some(other_parent);
     }
 
     /// Both of these setters and their uses appear to be the simplest way to
@@ -325,9 +382,7 @@
 
         self.dirty_parents = false;
 
-        let result = self.non_normal_other_parent_entries();
-        self.non_normal_set = result.0;
-        self.other_parent_set = result.1;
+        self.set_non_normal_other_parent_entries(true);
         Ok(packed)
     }
 
@@ -385,13 +440,27 @@
         .unwrap();
 
         assert_eq!(1, map.len());
-        assert_eq!(0, map.non_normal_set.len());
-        assert_eq!(0, map.other_parent_set.len());
+        assert_eq!(
+            0,
+            map.get_non_normal_other_parent_entries()
+                .0
+                .as_ref()
+                .unwrap()
+                .len()
+        );
+        assert_eq!(
+            0,
+            map.get_non_normal_other_parent_entries()
+                .1
+                .as_ref()
+                .unwrap()
+                .len()
+        );
     }
 
     #[test]
     fn test_non_normal_other_parent_entries() {
-        let map: DirstateMap = [
+        let mut map: DirstateMap = [
             (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
             (b"f2", (EntryState::Normal, 1337, 1337, -1)),
             (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
@@ -427,10 +496,11 @@
 
         let mut other_parent = HashSet::new();
         other_parent.insert(HgPathBuf::from_bytes(b"f4"));
+        let entries = map.get_non_normal_other_parent_entries();
 
         assert_eq!(
-            (non_normal, other_parent),
-            map.non_normal_other_parent_entries()
+            (Some(non_normal), Some(other_parent)),
+            (entries.0.to_owned(), entries.1.to_owned())
         );
     }
 }



To: marmoute, #hg-reviewers, Alphare
Cc: Alphare, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D8110: rust-dirstatemap: cache non normal and other parent set

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
kevincox added inline comments.

INLINE COMMENTS

> Alphare wrote in dirstate_map.rs:251
> Unwrapping creates a temporary value that is dropped instantly, so unless I'm not seeing something, I don't think we can.

You should be able to use `.as_mut()` just like you are currently doing in the caller.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8110/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8110

To: marmoute, #hg-reviewers, Alphare
Cc: Alphare, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D8110: rust-dirstatemap: cache non normal and other parent set

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
Alphare added inline comments.

INLINE COMMENTS

> kevincox wrote in dirstate_map.rs:251
> You should be able to use `.as_mut()` just like you are currently doing in the caller.

`as_mut()` only does `&mut Option<T> -> Option<&mut T>`, which isn't always what we want (however you can argue that it would save a few keystrokes), but calling `unwrap()` circles back to the same `error[E0515]: cannot return value referencing temporary value`.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8110/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8110

To: marmoute, #hg-reviewers, Alphare
Cc: Alphare, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D8110: rust-dirstatemap: cache non normal and other parent set

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
kevincox added inline comments.

INLINE COMMENTS

> Alphare wrote in dirstate_map.rs:251
> `as_mut()` only does `&mut Option<T> -> Option<&mut T>`, which isn't always what we want (however you can argue that it would save a few keystrokes), but calling `unwrap()` circles back to the same `error[E0515]: cannot return value referencing temporary value`.

It works fine for me: https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=f74f3771641d1e29bdd4de1444c43324

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8110/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8110

To: marmoute, #hg-reviewers, Alphare
Cc: Alphare, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D8110: rust-dirstatemap: cache non normal and other parent set

marmoute (Pierre-Yves David)
In reply to this post by marmoute (Pierre-Yves David)
Alphare added inline comments.

INLINE COMMENTS

> kevincox wrote in dirstate_map.rs:251
> It works fine for me: https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=f74f3771641d1e29bdd4de1444c43324

Aaah I still had `&mut`, and the auto-deref rules didn't work there. I see that this is much better. I will send a follow-up on `default`, as that is just code cleanup and does not constitute a bug fix.

Thanks!

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8110/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8110

To: marmoute, #hg-reviewers, Alphare
Cc: Alphare, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel