D5945: rust: itering less on MissingAncestors.bases for max()

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
9 messages Options
Reply | Threaded
Open this post in threaded view
|

D5945: rust: itering less on MissingAncestors.bases for max()

pulkit (Pulkit Goyal)
gracinet created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Instead of iterating on the whole `self.bases` each time to find
  its max, we keep the latter in a separate member attribute and
  keep it up to date in `add_bases()`
 
  On a perfdiscovery done on PyPy, with repos prepared with
  `contrib/discovery-helper.sh 50 100`, this gives a slight
  improvement (around 0.5% on wall time, but 10% on CPU)
 
  before:
  ! wall 0.172801 comb 0.180000 user 0.180000 sys 0.000000 (median of 541)
  after:
  ! wall 0.171798 comb 0.160000 user 0.160000 sys 0.000000 (median of 551)
 
  (perf command run time upped because of bigger variability during this test).

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5945

AFFECTED FILES
  contrib/perf.py
  rust/hg-core/src/ancestors.rs
  rust/hg-core/src/dagops.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dagops.rs b/rust/hg-core/src/dagops.rs
--- a/rust/hg-core/src/dagops.rs
+++ b/rust/hg-core/src/dagops.rs
@@ -46,7 +46,9 @@
     let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
     heads.remove(&NULL_REVISION);
     for rev in iter_revs {
-        remove_parents(graph, *rev, &mut heads)?;
+        if *rev != NULL_REVISION {
+            remove_parents(graph, *rev, &mut heads)?;
+        }
     }
     Ok(heads)
 }
@@ -71,7 +73,9 @@
     // mutating
     let as_vec: Vec<Revision> = revs.iter().cloned().collect();
     for rev in as_vec {
-        remove_parents(graph, rev, revs)?;
+        if rev != NULL_REVISION {
+            remove_parents(graph, rev, revs)?;
+        }
     }
     Ok(())
 }
diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs
--- a/rust/hg-core/src/ancestors.rs
+++ b/rust/hg-core/src/ancestors.rs
@@ -38,6 +38,7 @@
 pub struct MissingAncestors<G: Graph> {
     graph: G,
     bases: HashSet<Revision>,
+    max_base: Option<Revision>,
 }
 
 impl<G: Graph> AncestorsIterator<G> {
@@ -209,7 +210,13 @@
 
 impl<G: Graph> MissingAncestors<G> {
     pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
-        MissingAncestors { graph: graph, bases: bases.into_iter().collect() }
+        let mut created = MissingAncestors {
+            graph: graph,
+            bases: HashSet::new(),
+            max_base: None,
+        };
+        created.add_bases(bases);
+        created
     }
 
     pub fn has_bases(&self) -> bool {
@@ -237,12 +244,26 @@
         Ok(self.bases)
     }
 
+    /// Add some revisions to `self.bases`
+    ///
+    /// Takes care of keeping `self.max_base` up to date.
     pub fn add_bases(
         &mut self,
         new_bases: impl IntoIterator<Item = Revision>,
     ) {
-        self.bases.extend(new_bases);
+        // even if the type of Revision changes,
+        // NULL_REVISION would keep being the smallest
+        let mut max_base = self.max_base.unwrap_or(NULL_REVISION);
+        self.bases.extend(new_bases.into_iter().map(|r| {
+            if r > max_base {
+                max_base = r;
+            }
+            r
+        }));
         self.bases.remove(&NULL_REVISION);
+        if max_base != NULL_REVISION {
+            self.max_base = Some(max_base);
+        }
     }
 
     /// Remove all ancestors of self.bases from the revs set (in place)
@@ -261,14 +282,11 @@
         }
         // anything in revs > start is definitely not an ancestor of bases
         // revs <= start need to be investigated
-        // TODO optim: if a missingancestors is to be used several times,
-        // we shouldn't need to iterate each time on bases
-        let start = match self.bases.iter().cloned().max() {
-            Some(m) => m,
-            None => {  // self.bases is empty
-                return Ok(());
-            }
-        };
+
+        let start = match self.max_base {
+            Some(r) => r,
+            None => {return Ok(());}};
+
         // whatever happens, we'll keep at least keepcount of them
         // knowing this gives us a earlier stop condition than
         // going all the way to the root
@@ -285,12 +303,17 @@
         Ok(())
     }
 
-    /// Add rev's parents to self.bases
+    /// Add the parents of `rev` to `self.bases`
+    ///
+    /// This has no effect on `self.max_base`
     #[inline]
     fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
-        // No need to bother the set with inserting NULL_REVISION over and
-        // over
+        if rev == NULL_REVISION {
+            return Ok(());
+        }
         for p in self.graph.parents(rev)?.iter().cloned() {
+            // No need to bother the set with inserting NULL_REVISION over and
+            // over
             if p != NULL_REVISION {
                 self.bases.insert(p);
             }
@@ -320,12 +343,8 @@
         if revs_visit.is_empty() {
             return Ok(Vec::new());
         }
-
-        let max_bases =
-            bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
-        let max_revs =
-            revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
-        let start = max(max_bases, max_revs);
+        let max_revs = revs_visit.iter().cloned().max().unwrap();
+        let start = max(self.max_base.unwrap_or(max_revs), max_revs);
 
         // TODO heuristics for with_capacity()?
         let mut missing: Vec<Revision> = Vec::new();
@@ -571,11 +590,13 @@
             missing_ancestors.get_bases().iter().cloned().collect();
         as_vec.sort();
         assert_eq!(as_vec, [1, 3, 5]);
+        assert_eq!(missing_ancestors.max_base, Some(5));
 
         missing_ancestors.add_bases([3, 7, 8].iter().cloned());
         as_vec = missing_ancestors.get_bases().iter().cloned().collect();
         as_vec.sort();
         assert_eq!(as_vec, [1, 3, 5, 7, 8]);
+        assert_eq!(missing_ancestors.max_base, Some(8));
 
         as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
         as_vec.sort();
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -310,9 +310,9 @@
         count += 1
         results.append(item[0])
         cstop = util.timer()
-        if cstop - begin > 3 and count >= 100:
+        if cstop - begin > 3 and count >= 1000:
             break
-        if cstop - begin > 10 and count >= 3:
+        if cstop - begin > 90 and count >= 300:
             break
 
     formatone(fm, results, title=title, result=r,



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D5945: rust: itering less on MissingAncestors.bases for max()

pulkit (Pulkit Goyal)
kevincox accepted this revision.
kevincox added inline comments.

INLINE COMMENTS

> ancestors.rs:41
>      bases: HashSet<Revision>,
> +    max_base: Option<Revision>,
>  }

Does it make sense to just default this to -1 and remove the option?

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5945

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D5945: rust: itering less on MissingAncestors.bases for max()

pulkit (Pulkit Goyal)
In reply to this post by pulkit (Pulkit Goyal)
gracinet updated this revision to Diff 14046.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D5945?vs=14042&id=14046

REVISION DETAIL
  https://phab.mercurial-scm.org/D5945

AFFECTED FILES
  rust/hg-core/src/ancestors.rs
  rust/hg-core/src/dagops.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/dagops.rs b/rust/hg-core/src/dagops.rs
--- a/rust/hg-core/src/dagops.rs
+++ b/rust/hg-core/src/dagops.rs
@@ -46,7 +46,9 @@
     let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
     heads.remove(&NULL_REVISION);
     for rev in iter_revs {
-        remove_parents(graph, *rev, &mut heads)?;
+        if *rev != NULL_REVISION {
+            remove_parents(graph, *rev, &mut heads)?;
+        }
     }
     Ok(heads)
 }
@@ -71,7 +73,9 @@
     // mutating
     let as_vec: Vec<Revision> = revs.iter().cloned().collect();
     for rev in as_vec {
-        remove_parents(graph, rev, revs)?;
+        if rev != NULL_REVISION {
+            remove_parents(graph, rev, revs)?;
+        }
     }
     Ok(())
 }
diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs
--- a/rust/hg-core/src/ancestors.rs
+++ b/rust/hg-core/src/ancestors.rs
@@ -38,6 +38,7 @@
 pub struct MissingAncestors<G: Graph> {
     graph: G,
     bases: HashSet<Revision>,
+    max_base: Option<Revision>,
 }
 
 impl<G: Graph> AncestorsIterator<G> {
@@ -209,7 +210,13 @@
 
 impl<G: Graph> MissingAncestors<G> {
     pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
-        MissingAncestors { graph: graph, bases: bases.into_iter().collect() }
+        let mut created = MissingAncestors {
+            graph: graph,
+            bases: HashSet::new(),
+            max_base: None,
+        };
+        created.add_bases(bases);
+        created
     }
 
     pub fn has_bases(&self) -> bool {
@@ -237,12 +244,26 @@
         Ok(self.bases)
     }
 
+    /// Add some revisions to `self.bases`
+    ///
+    /// Takes care of keeping `self.max_base` up to date.
     pub fn add_bases(
         &mut self,
         new_bases: impl IntoIterator<Item = Revision>,
     ) {
-        self.bases.extend(new_bases);
+        // even if the type of Revision changes,
+        // NULL_REVISION would keep being the smallest
+        let mut max_base = self.max_base.unwrap_or(NULL_REVISION);
+        self.bases.extend(new_bases.into_iter().map(|r| {
+            if r > max_base {
+                max_base = r;
+            }
+            r
+        }));
         self.bases.remove(&NULL_REVISION);
+        if max_base != NULL_REVISION {
+            self.max_base = Some(max_base);
+        }
     }
 
     /// Remove all ancestors of self.bases from the revs set (in place)
@@ -261,14 +282,11 @@
         }
         // anything in revs > start is definitely not an ancestor of bases
         // revs <= start need to be investigated
-        // TODO optim: if a missingancestors is to be used several times,
-        // we shouldn't need to iterate each time on bases
-        let start = match self.bases.iter().cloned().max() {
-            Some(m) => m,
-            None => {  // self.bases is empty
-                return Ok(());
-            }
-        };
+
+        let start = match self.max_base {
+            Some(r) => r,
+            None => {return Ok(());}};
+
         // whatever happens, we'll keep at least keepcount of them
         // knowing this gives us a earlier stop condition than
         // going all the way to the root
@@ -285,12 +303,17 @@
         Ok(())
     }
 
-    /// Add rev's parents to self.bases
+    /// Add the parents of `rev` to `self.bases`
+    ///
+    /// This has no effect on `self.max_base`
     #[inline]
     fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
-        // No need to bother the set with inserting NULL_REVISION over and
-        // over
+        if rev == NULL_REVISION {
+            return Ok(());
+        }
         for p in self.graph.parents(rev)?.iter().cloned() {
+            // No need to bother the set with inserting NULL_REVISION over and
+            // over
             if p != NULL_REVISION {
                 self.bases.insert(p);
             }
@@ -320,12 +343,8 @@
         if revs_visit.is_empty() {
             return Ok(Vec::new());
         }
-
-        let max_bases =
-            bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
-        let max_revs =
-            revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
-        let start = max(max_bases, max_revs);
+        let max_revs = revs_visit.iter().cloned().max().unwrap();
+        let start = max(self.max_base.unwrap_or(max_revs), max_revs);
 
         // TODO heuristics for with_capacity()?
         let mut missing: Vec<Revision> = Vec::new();
@@ -571,11 +590,13 @@
             missing_ancestors.get_bases().iter().cloned().collect();
         as_vec.sort();
         assert_eq!(as_vec, [1, 3, 5]);
+        assert_eq!(missing_ancestors.max_base, Some(5));
 
         missing_ancestors.add_bases([3, 7, 8].iter().cloned());
         as_vec = missing_ancestors.get_bases().iter().cloned().collect();
         as_vec.sort();
         assert_eq!(as_vec, [1, 3, 5, 7, 8]);
+        assert_eq!(missing_ancestors.max_base, Some(8));
 
         as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
         as_vec.sort();



To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D5945: rust: itering less on MissingAncestors.bases for max()

pulkit (Pulkit Goyal)
In reply to this post by pulkit (Pulkit Goyal)
gracinet added inline comments.

INLINE COMMENTS

> kevincox wrote in ancestors.rs:41
> Does it make sense to just default this to -1 and remove the option?

I must confess to have hesitated a bit on that one. On one hand, it would work, but semantically if ever a new "special" revision -2 is introduced, this could become problematic. On the other hand, there are probably a few areas that already depend on the fact that NULL_REVISION is the smallest one.

From the graph side of things, NULL_REVISION being a common ancestor of all revisions, so it must be smaller or equal than all, so it doesn't matter much.

Option<Revision> was merely a simple way to be sure not to be wrong, I'm open to suggestions.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5945

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D5945: rust: itering less on MissingAncestors.bases for max()

pulkit (Pulkit Goyal)
In reply to this post by pulkit (Pulkit Goyal)
kevincox added inline comments.

INLINE COMMENTS

> gracinet wrote in ancestors.rs:41
> I must confess to have hesitated a bit on that one. On one hand, it would work, but semantically if ever a new "special" revision -2 is introduced, this could become problematic. On the other hand, there are probably a few areas that already depend on the fact that NULL_REVISION is the smallest one.
>
> From the graph side of things, NULL_REVISION being a common ancestor of all revisions, so it must be smaller or equal than all, so it doesn't matter much.
>
> Option<Revision> was merely a simple way to be sure not to be wrong, I'm open to suggestions.

If you don't want to depend on the value of NULL_REVISION you can use the minimum value for the type backing revision.

Other special revisions shouldn't be relevant because you shouldn't be comparing them as the max anyways.

That being said the currents state is fine. So if you think it makes more sense that sounds good to me.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5945

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D5945: rust: itering less on MissingAncestors.bases for max()

pulkit (Pulkit Goyal)
In reply to this post by pulkit (Pulkit Goyal)
gracinet added inline comments.

INLINE COMMENTS

> kevincox wrote in ancestors.rs:41
> If you don't want to depend on the value of NULL_REVISION you can use the minimum value for the type backing revision.
>
> Other special revisions shouldn't be relevant because you shouldn't be comparing them as the max anyways.
>
> That being said the currents state is fine. So if you think it makes more sense that sounds good to me.

Ok, I've made up my mind: whatever the representation, `NULL_REVISION` must be smaller than all other `Revision`. Using it, rather than `-1`will indeed make this patch simpler.

Also, in subsequent (not yet submitted) works, I'm already liberally using this fact that `NULL_REVISION` is the smallest of all, so…

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5945

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D5945: rust: itering less on MissingAncestors.bases for max()

pulkit (Pulkit Goyal)
In reply to this post by pulkit (Pulkit Goyal)
gracinet updated this revision to Diff 14057.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D5945?vs=14046&id=14057

REVISION DETAIL
  https://phab.mercurial-scm.org/D5945

AFFECTED FILES
  rust/hg-core/src/ancestors.rs
  rust/hg-core/src/dagops.rs
  rust/hg-core/src/lib.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -14,6 +14,11 @@
 /// 4 bytes, and are liberally converted to ints, whence the i32
 pub type Revision = i32;
 
+
+/// Marker expressing the absence of a parent
+///
+/// Independently of the actual representation, `NULL_REVISION` is guaranteed
+/// to be smaller that all existing revisions.
 pub const NULL_REVISION: Revision = -1;
 
 /// Same as `mercurial.node.wdirrev`
diff --git a/rust/hg-core/src/dagops.rs b/rust/hg-core/src/dagops.rs
--- a/rust/hg-core/src/dagops.rs
+++ b/rust/hg-core/src/dagops.rs
@@ -46,7 +46,9 @@
     let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
     heads.remove(&NULL_REVISION);
     for rev in iter_revs {
-        remove_parents(graph, *rev, &mut heads)?;
+        if *rev != NULL_REVISION {
+            remove_parents(graph, *rev, &mut heads)?;
+        }
     }
     Ok(heads)
 }
@@ -71,7 +73,9 @@
     // mutating
     let as_vec: Vec<Revision> = revs.iter().cloned().collect();
     for rev in as_vec {
-        remove_parents(graph, rev, revs)?;
+        if rev != NULL_REVISION {
+            remove_parents(graph, rev, revs)?;
+        }
     }
     Ok(())
 }
diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs
--- a/rust/hg-core/src/ancestors.rs
+++ b/rust/hg-core/src/ancestors.rs
@@ -38,6 +38,7 @@
 pub struct MissingAncestors<G: Graph> {
     graph: G,
     bases: HashSet<Revision>,
+    max_base: Revision,
 }
 
 impl<G: Graph> AncestorsIterator<G> {
@@ -209,7 +210,13 @@
 
 impl<G: Graph> MissingAncestors<G> {
     pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
-        MissingAncestors { graph: graph, bases: bases.into_iter().collect() }
+        let mut created = MissingAncestors {
+            graph: graph,
+            bases: HashSet::new(),
+            max_base: NULL_REVISION,
+        };
+        created.add_bases(bases);
+        created
     }
 
     pub fn has_bases(&self) -> bool {
@@ -232,17 +239,33 @@
     }
 
     /// Consumes the object and returns the relative heads of its bases.
-    pub fn into_bases_heads(mut self) -> Result<HashSet<Revision>, GraphError> {
+    pub fn into_bases_heads(
+        mut self,
+    ) -> Result<HashSet<Revision>, GraphError> {
         dagops::retain_heads(&self.graph, &mut self.bases)?;
         Ok(self.bases)
     }
 
+    /// Add some revisions to `self.bases`
+    ///
+    /// Takes care of keeping `self.max_base` up to date.
     pub fn add_bases(
         &mut self,
         new_bases: impl IntoIterator<Item = Revision>,
     ) {
-        self.bases
-            .extend(new_bases.into_iter().filter(|&rev| rev != NULL_REVISION));
+        let mut max_base = self.max_base;
+        self.bases.extend(
+            new_bases
+                .into_iter()
+                .filter(|&rev| rev != NULL_REVISION)
+                .map(|r| {
+                    if r > max_base {
+                        max_base = r;
+                    }
+                    r
+                }),
+        );
+        self.max_base = max_base;
     }
 
     /// Remove all ancestors of self.bases from the revs set (in place)
@@ -261,20 +284,16 @@
         }
         // anything in revs > start is definitely not an ancestor of bases
         // revs <= start need to be investigated
-        // TODO optim: if a missingancestors is to be used several times,
-        // we shouldn't need to iterate each time on bases
-        let start = match self.bases.iter().cloned().max() {
-            Some(m) => m,
-            None => {  // self.bases is empty
-                return Ok(());
-            }
-        };
+        if self.max_base == NULL_REVISION {
+            return Ok(());
+        }
+
         // whatever happens, we'll keep at least keepcount of them
         // knowing this gives us a earlier stop condition than
         // going all the way to the root
-        let keepcount = revs.iter().filter(|r| **r > start).count();
+        let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
 
-        let mut curr = start;
+        let mut curr = self.max_base;
         while curr != NULL_REVISION && revs.len() > keepcount {
             if self.bases.contains(&curr) {
                 revs.remove(&curr);
@@ -285,12 +304,17 @@
         Ok(())
     }
 
-    /// Add rev's parents to self.bases
+    /// Add the parents of `rev` to `self.bases`
+    ///
+    /// This has no effect on `self.max_base`
     #[inline]
     fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
-        // No need to bother the set with inserting NULL_REVISION over and
-        // over
+        if rev == NULL_REVISION {
+            return Ok(());
+        }
         for p in self.graph.parents(rev)?.iter().cloned() {
+            // No need to bother the set with inserting NULL_REVISION over and
+            // over
             if p != NULL_REVISION {
                 self.bases.insert(p);
             }
@@ -320,12 +344,8 @@
         if revs_visit.is_empty() {
             return Ok(Vec::new());
         }
-
-        let max_bases =
-            bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
-        let max_revs =
-            revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
-        let start = max(max_bases, max_revs);
+        let max_revs = revs_visit.iter().cloned().max().unwrap();
+        let start = max(self.max_base, max_revs);
 
         // TODO heuristics for with_capacity()?
         let mut missing: Vec<Revision> = Vec::new();
@@ -571,11 +591,13 @@
             missing_ancestors.get_bases().iter().cloned().collect();
         as_vec.sort();
         assert_eq!(as_vec, [1, 3, 5]);
+        assert_eq!(missing_ancestors.max_base, 5);
 
         missing_ancestors.add_bases([3, 7, 8].iter().cloned());
         as_vec = missing_ancestors.get_bases().iter().cloned().collect();
         as_vec.sort();
         assert_eq!(as_vec, [1, 3, 5, 7, 8]);
+        assert_eq!(missing_ancestors.max_base, 8);
 
         as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
         as_vec.sort();



To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D5945: rust: itering less on MissingAncestors.bases for max()

pulkit (Pulkit Goyal)
In reply to this post by pulkit (Pulkit Goyal)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG9060af281be7: rust: itering less on MissingAncestors.bases for max() (authored by gracinet, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D5945?vs=14057&id=14127

REVISION DETAIL
  https://phab.mercurial-scm.org/D5945

AFFECTED FILES
  rust/hg-core/src/ancestors.rs
  rust/hg-core/src/dagops.rs
  rust/hg-core/src/lib.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -13,6 +13,11 @@
 /// 4 bytes, and are liberally converted to ints, whence the i32
 pub type Revision = i32;
 
+
+/// Marker expressing the absence of a parent
+///
+/// Independently of the actual representation, `NULL_REVISION` is guaranteed
+/// to be smaller that all existing revisions.
 pub const NULL_REVISION: Revision = -1;
 
 /// Same as `mercurial.node.wdirrev`
diff --git a/rust/hg-core/src/dagops.rs b/rust/hg-core/src/dagops.rs
--- a/rust/hg-core/src/dagops.rs
+++ b/rust/hg-core/src/dagops.rs
@@ -46,7 +46,9 @@
     let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
     heads.remove(&NULL_REVISION);
     for rev in iter_revs {
-        remove_parents(graph, *rev, &mut heads)?;
+        if *rev != NULL_REVISION {
+            remove_parents(graph, *rev, &mut heads)?;
+        }
     }
     Ok(heads)
 }
@@ -71,7 +73,9 @@
     // mutating
     let as_vec: Vec<Revision> = revs.iter().cloned().collect();
     for rev in as_vec {
-        remove_parents(graph, rev, revs)?;
+        if rev != NULL_REVISION {
+            remove_parents(graph, rev, revs)?;
+        }
     }
     Ok(())
 }
diff --git a/rust/hg-core/src/ancestors.rs b/rust/hg-core/src/ancestors.rs
--- a/rust/hg-core/src/ancestors.rs
+++ b/rust/hg-core/src/ancestors.rs
@@ -38,6 +38,7 @@
 pub struct MissingAncestors<G: Graph> {
     graph: G,
     bases: HashSet<Revision>,
+    max_base: Revision,
 }
 
 impl<G: Graph> AncestorsIterator<G> {
@@ -209,7 +210,13 @@
 
 impl<G: Graph> MissingAncestors<G> {
     pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
-        MissingAncestors { graph: graph, bases: bases.into_iter().collect() }
+        let mut created = MissingAncestors {
+            graph: graph,
+            bases: HashSet::new(),
+            max_base: NULL_REVISION,
+        };
+        created.add_bases(bases);
+        created
     }
 
     pub fn has_bases(&self) -> bool {
@@ -232,17 +239,33 @@
     }
 
     /// Consumes the object and returns the relative heads of its bases.
-    pub fn into_bases_heads(mut self) -> Result<HashSet<Revision>, GraphError> {
+    pub fn into_bases_heads(
+        mut self,
+    ) -> Result<HashSet<Revision>, GraphError> {
         dagops::retain_heads(&self.graph, &mut self.bases)?;
         Ok(self.bases)
     }
 
+    /// Add some revisions to `self.bases`
+    ///
+    /// Takes care of keeping `self.max_base` up to date.
     pub fn add_bases(
         &mut self,
         new_bases: impl IntoIterator<Item = Revision>,
     ) {
-        self.bases
-            .extend(new_bases.into_iter().filter(|&rev| rev != NULL_REVISION));
+        let mut max_base = self.max_base;
+        self.bases.extend(
+            new_bases
+                .into_iter()
+                .filter(|&rev| rev != NULL_REVISION)
+                .map(|r| {
+                    if r > max_base {
+                        max_base = r;
+                    }
+                    r
+                }),
+        );
+        self.max_base = max_base;
     }
 
     /// Remove all ancestors of self.bases from the revs set (in place)
@@ -261,20 +284,16 @@
         }
         // anything in revs > start is definitely not an ancestor of bases
         // revs <= start need to be investigated
-        // TODO optim: if a missingancestors is to be used several times,
-        // we shouldn't need to iterate each time on bases
-        let start = match self.bases.iter().cloned().max() {
-            Some(m) => m,
-            None => {  // self.bases is empty
-                return Ok(());
-            }
-        };
+        if self.max_base == NULL_REVISION {
+            return Ok(());
+        }
+
         // whatever happens, we'll keep at least keepcount of them
         // knowing this gives us a earlier stop condition than
         // going all the way to the root
-        let keepcount = revs.iter().filter(|r| **r > start).count();
+        let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
 
-        let mut curr = start;
+        let mut curr = self.max_base;
         while curr != NULL_REVISION && revs.len() > keepcount {
             if self.bases.contains(&curr) {
                 revs.remove(&curr);
@@ -285,12 +304,17 @@
         Ok(())
     }
 
-    /// Add rev's parents to self.bases
+    /// Add the parents of `rev` to `self.bases`
+    ///
+    /// This has no effect on `self.max_base`
     #[inline]
     fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
-        // No need to bother the set with inserting NULL_REVISION over and
-        // over
+        if rev == NULL_REVISION {
+            return Ok(());
+        }
         for p in self.graph.parents(rev)?.iter().cloned() {
+            // No need to bother the set with inserting NULL_REVISION over and
+            // over
             if p != NULL_REVISION {
                 self.bases.insert(p);
             }
@@ -320,12 +344,8 @@
         if revs_visit.is_empty() {
             return Ok(Vec::new());
         }
-
-        let max_bases =
-            bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
-        let max_revs =
-            revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
-        let start = max(max_bases, max_revs);
+        let max_revs = revs_visit.iter().cloned().max().unwrap();
+        let start = max(self.max_base, max_revs);
 
         // TODO heuristics for with_capacity()?
         let mut missing: Vec<Revision> = Vec::new();
@@ -571,11 +591,13 @@
             missing_ancestors.get_bases().iter().cloned().collect();
         as_vec.sort();
         assert_eq!(as_vec, [1, 3, 5]);
+        assert_eq!(missing_ancestors.max_base, 5);
 
         missing_ancestors.add_bases([3, 7, 8].iter().cloned());
         as_vec = missing_ancestors.get_bases().iter().cloned().collect();
         as_vec.sort();
         assert_eq!(as_vec, [1, 3, 5, 7, 8]);
+        assert_eq!(missing_ancestors.max_base, 8);
 
         as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
         as_vec.sort();



To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D5945: rust: itering less on MissingAncestors.bases for max()

pulkit (Pulkit Goyal)
In reply to this post by pulkit (Pulkit Goyal)
kevincox added inline comments.

INLINE COMMENTS

> ancestors.rs:347
>          }
> -
> -        let max_bases =
> -            bases_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
> -        let max_revs =
> -            revs_visit.iter().cloned().max().unwrap_or(NULL_REVISION);
> -        let start = max(max_bases, max_revs);
> +        let max_revs = revs_visit.iter().cloned().max().unwrap();
> +        let start = max(self.max_base, max_revs);

You are doing an empty check and an unwrap. I think it would be nice to combine these to make it obvious that there is no panic chance here.

  let max_revs = revs_visit.iter().cloned().max();
  let max_revs = match max_revs {
    Some(max) => max,
    None => return Ok(Vec::new()),
  };

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5945

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel