Quantcast

[PATCH 1 of 6] clone: improve locking pattern for hg clone

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
15 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[PATCH 1 of 6] clone: improve locking pattern for hg clone

Durham Goode
# HG changeset patch
# User Durham Goode <[hidden email]>
# Date 1495129620 25200
#      Thu May 18 10:47:00 2017 -0700
# Node ID 19be454ee16925d01da8f9d329cb48556083da1b
# Parent  8a87bfc5bebbbe0ac996ac8e047a029eb931af45
clone: improve locking pattern for hg clone

Previously, in some cases hg.clone() would take the lock via unconventional
means (since the repo didn't exist yet). If something later on tried to take the
lock via normal means (repo.lock()), it would enter a permanent deadlock since
the lock was held by the process, but the repo object didn't realize it since
the lock was initially taken without using repo.lock().

The fix is to release the lock once we've boostrapped the clone and switch to a
normal repo.lock version.

diff --git a/mercurial/hg.py b/mercurial/hg.py
--- a/mercurial/hg.py
+++ b/mercurial/hg.py
@@ -586,6 +586,8 @@ def clone(ui, peeropts, source, dest=Non
             destpeer = peer(srcrepo, peeropts, dest)
             srcrepo.hook('outgoing', source='clone',
                           node=node.hex(node.nullid))
+            release(destlock)
+            destlock = None
         else:
             try:
                 destpeer = peer(srcrepo or ui, peeropts, dest, create=True)
@@ -628,52 +630,58 @@ def clone(ui, peeropts, source, dest=Non
 
         destrepo = destpeer.local()
         if destrepo:
-            template = uimod.samplehgrcs['cloned']
-            fp = destrepo.vfs("hgrc", "w", text=True)
-            u = util.url(abspath)
-            u.passwd = None
-            defaulturl = str(u)
-            fp.write(template % defaulturl)
-            fp.close()
+            wlock = lock = None
+            try:
+                wlock = destrepo.wlock()
+                lock = destrepo.lock()
+                template = uimod.samplehgrcs['cloned']
+                fp = destrepo.vfs("hgrc", "w", text=True)
+                u = util.url(abspath)
+                u.passwd = None
+                defaulturl = str(u)
+                fp.write(template % defaulturl)
+                fp.close()
 
-            destrepo.ui.setconfig('paths', 'default', defaulturl, 'clone')
+                destrepo.ui.setconfig('paths', 'default', defaulturl, 'clone')
 
-            if update:
-                if update is not True:
-                    checkout = srcpeer.lookup(update)
-                uprev = None
-                status = None
-                if checkout is not None:
-                    try:
-                        uprev = destrepo.lookup(checkout)
-                    except error.RepoLookupError:
-                        if update is not True:
+                if update:
+                    if update is not True:
+                        checkout = srcpeer.lookup(update)
+                    uprev = None
+                    status = None
+                    if checkout is not None:
+                        try:
+                            uprev = destrepo.lookup(checkout)
+                        except error.RepoLookupError:
+                            if update is not True:
+                                try:
+                                    uprev = destrepo.lookup(update)
+                                except error.RepoLookupError:
+                                    pass
+                    if uprev is None:
+                        try:
+                            uprev = destrepo._bookmarks['@']
+                            update = '@'
+                            bn = destrepo[uprev].branch()
+                            if bn == 'default':
+                                status = _("updating to bookmark @\n")
+                            else:
+                                status = (_("updating to bookmark @ on "
+                                            "branch %s\n") % bn)
+                        except KeyError:
                             try:
-                                uprev = destrepo.lookup(update)
+                                uprev = destrepo.branchtip('default')
                             except error.RepoLookupError:
-                                pass
-                if uprev is None:
-                    try:
-                        uprev = destrepo._bookmarks['@']
-                        update = '@'
+                                uprev = destrepo.lookup('tip')
+                    if not status:
                         bn = destrepo[uprev].branch()
-                        if bn == 'default':
-                            status = _("updating to bookmark @\n")
-                        else:
-                            status = (_("updating to bookmark @ on branch %s\n")
-                                      % bn)
-                    except KeyError:
-                        try:
-                            uprev = destrepo.branchtip('default')
-                        except error.RepoLookupError:
-                            uprev = destrepo.lookup('tip')
-                if not status:
-                    bn = destrepo[uprev].branch()
-                    status = _("updating to branch %s\n") % bn
-                destrepo.ui.status(status)
-                _update(destrepo, uprev)
-                if update in destrepo._bookmarks:
-                    bookmarks.activate(destrepo, update)
+                        status = _("updating to branch %s\n") % bn
+                    destrepo.ui.status(status)
+                    _update(destrepo, uprev)
+                    if update in destrepo._bookmarks:
+                        bookmarks.activate(destrepo, update)
+            finally:
+                release(lock, wlock)
     finally:
         release(srclock, destlock)
         if cleandir is not None:
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[PATCH 2 of 6] transaction: use unfiltered changelog length for transaction start

Durham Goode
# HG changeset patch
# User Durham Goode <[hidden email]>
# Date 1495129620 25200
#      Thu May 18 10:47:00 2017 -0700
# Node ID 35235b1cefb101dad0672a4c4ee9486fba8b935b
# Parent  19be454ee16925d01da8f9d329cb48556083da1b
transaction: use unfiltered changelog length for transaction start

Previously we used the normal changelog length for the transaction start. This
triggered the computehidden logic which in a future patch triggers logic in the
new hidden store which may start a transaction. This is circular and results in
broken transaction mechanics, so let's use the unfiltered repo instead to avoid
triggering computehidden during transaction start.

This seems like how it should've been in the first place anyway since recording
the length of the filtered changelog seems odd for a transaction.

This was causing test failures in a future patch, so test coverage will come
from the future patch.

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1123,7 +1123,7 @@ class localrepository(object):
         self.vfs.write("journal.branch",
                           encoding.fromlocal(self.dirstate.branch()))
         self.vfs.write("journal.desc",
-                          "%d\n%s\n" % (len(self), desc))
+                          "%d\n%s\n" % (len(self.unfiltered()), desc))
         self.vfs.write("journal.bookmarks",
                           self.vfs.tryread("bookmarks"))
         self.svfs.write("journal.phaseroots",
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[PATCH 3 of 6] hidden: add rootset class

Durham Goode
In reply to this post by Durham Goode
# HG changeset patch
# User Durham Goode <[hidden email]>
# Date 1495129620 25200
#      Thu May 18 10:47:00 2017 -0700
# Node ID 3c0381b4b5687dcd53d5b05c94ffa989ff3133b2
# Parent  35235b1cefb101dad0672a4c4ee9486fba8b935b
hidden: add rootset class

Adds a rootset class that acts like a set of commits where a given commit being
in the set implies that all descendants are also in the set. It stores this data
in as a series of roots for the set and validates that the set is consistent
when anything is added or removed.

diff --git a/mercurial/hidden.py b/mercurial/hidden.py
new file mode 100644
--- /dev/null
+++ b/mercurial/hidden.py
@@ -0,0 +1,137 @@
+# hidden.py - hidden commit storage and maintenance logic
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+from .i18n import _
+from .node import bin, hex
+from . import (
+    error,
+)
+
+class rootset(object):
+    """A commit set like object where a commit being in the set implies that all
+    descendants of that commit are also in the set.
+    """
+    def __init__(self, repo, opener, name):
+        self.repo = repo
+        self.opener = opener
+        self.name = name
+        self.filename = "%s.roots" % name
+        self.path = opener.join(self.filename)
+        self.dirty = False
+        self.roots = None
+        self._cache = None
+
+    def _deserialize(self, raw):
+        repo = self.repo
+        return set(bin(n) for n
+                   in raw.split(',')
+                   if n and bin(n) in repo.changelog.nodemap)
+
+    def _serialize(self):
+        return ','.join(hex(n) for n in sorted(self.roots))
+
+    def _write(self, fp):
+        fp.write(self._serialize())
+        self.dirty = False
+
+    def __nonzero__(self):
+        self._load()
+        return len(self.roots) != 0
+
+    def _load(self):
+        if self.roots is None:
+            if self.opener.exists(self.filename):
+                raw = self.opener.read(self.filename)
+                self.roots = self._deserialize(raw)
+            else:
+                self.roots = set()
+
+            self._fillcache()
+            self.dirty = False
+
+    def _fillcache(self):
+        cl = self.repo.changelog
+        realroots = list(cl.rev(n) for n in self.roots
+                         if n in cl.nodemap)
+        if not realroots:
+            self._cache = set()
+        else:
+            truerevs = cl.descendants(realroots)
+            self._cache = set(cl.node(r) for r in truerevs)
+            self._cache.update(self.roots)
+
+    def __contains__(self, node):
+        self._load()
+        if isinstance(node, int):
+            raise RuntimeError("%s should be a node", node)
+        return node in self._cache
+
+    def set(self, tr, node, value, childmap=None):
+        """Sets the given node to the given True/False value.
+
+        If setting to True, this function checks that the children are already
+        True and throws an exception if not.
+
+        If setting to False, ancestor commits that should now also be False are
+        automatically updated to be False as well.
+
+        The difference in behavior is for performance reasons. Ideally we would
+        automatically update the set when setting to True, but most updates come
+        in topological order (highest rev first), which means repeatedly setting
+        values to True is O(n^2) (since every set has to walk descendants).
+        """
+        if isinstance(node, int):
+            raise RuntimeError("%s should be a node", node)
+        self._load()
+        if value == (node in self._cache):
+            return
+
+        cl = self.repo.changelog
+        # If changing to true
+        if value:
+            # Make it a root and remove old roots
+            if childmap is not None:
+                children = childmap.get(cl.rev(node))
+                childrennodes = set(cl.node(r) for r in children)
+            else:
+                childrennodes = cl.children(node)
+            if any(c for c in childrennodes if c not in self._cache):
+                raise error.Abort(_("cannot make node hidden unless children "
+                                    "are already hidden"))
+            self.roots.difference_update(childrennodes)
+            self.roots.add(node)
+
+            # Update cache
+            self._cache.add(node)
+        else:
+            # If changing to false, move ancestor roots forward
+            newlyvisible = self.repo.revs('%ln::%n', self.roots, node)
+            newlyvisiblenodes = list(cl.node(r) for r in newlyvisible)
+            self._cache.difference_update(newlyvisiblenodes)
+            self.roots.difference_update(newlyvisiblenodes)
+
+            # Add new roots to maintain truthy-ness of revs unrelated to this
+            # change.
+            newroots = self.repo.revs(
+                '(children(%ld) - ancestors(%n)) - descendants(%ln)',
+                newlyvisible, node, self.roots
+            )
+            self.roots.update(cl.node(r) for r in newroots)
+
+        self.dirty = True
+        tr.addfilegenerator(self.name, (self.filename,), self._write,
+                            location='')
+        tr.hookargs['%s_changed' % self.name] = '1'
+
+    def __iter__(self):
+        self._load()
+        for node in self._cache:
+            yield node
+
+    def __len__(self):
+        self._load()
+        return len(self._cache)
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[PATCH 4 of 6] hidden: add a hidden rootset to the repo instance

Durham Goode
In reply to this post by Durham Goode
# HG changeset patch
# User Durham Goode <[hidden email]>
# Date 1495129620 25200
#      Thu May 18 10:47:00 2017 -0700
# Node ID 2aef3c4be6ba81e9d2453760515517df6fc2b816
# Parent  3c0381b4b5687dcd53d5b05c94ffa989ff3133b2
hidden: add a hidden rootset to the repo instance

As part of moving hiddenness to be persisted, let's add a hidden rootset onto
the localrepo.

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -37,6 +37,7 @@ from . import (
     exchange,
     extensions,
     filelog,
+    hidden,
     hook,
     lock as lockmod,
     manifest,
@@ -385,6 +386,10 @@ class localrepository(object):
         # generic mapping between names and nodes
         self.names = namespaces.namespaces()
 
+    @storecache('hidden.roots')
+    def hidden(self):
+        return hidden.rootset(self, self.svfs, 'hidden')
+
     def close(self):
         self._writecaches()
 
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[PATCH 5 of 6] hidden: add hiddenset and updatevisibility logic

Durham Goode
In reply to this post by Durham Goode
# HG changeset patch
# User Durham Goode <[hidden email]>
# Date 1495129620 25200
#      Thu May 18 10:47:00 2017 -0700
# Node ID 0979ce9d40803103441b6df1ebefafe80bb9a057
# Parent  2aef3c4be6ba81e9d2453760515517df6fc2b816
hidden: add hiddenset and updatevisibility logic

This adds a function that can incrementally update the hidden state for a
repository when given a list of nodes that have been changed. This will be
called by all actions that can affect hiddenness (bookmarks, tags, obsmarkers,
working copy parents). It uses a hash of the inputs that affect the hidden store
to ensure things are up-to-date.

This has the nice property that all logic for propagating hiddenness is
contained to one function, and all the consumers just have to report what
commits they touched. If an external extension wants to modify how hiding is
computed, it can just wrap this one function.

It also is nice because it updates the hidden state incrementally, instead of
recomputing the entire repo at once.

diff --git a/mercurial/hidden.py b/mercurial/hidden.py
--- a/mercurial/hidden.py
+++ b/mercurial/hidden.py
@@ -5,12 +5,38 @@
 
 from __future__ import absolute_import
 
+import hashlib
+import heapq
+
 from .i18n import _
-from .node import bin, hex
+from .node import bin, hex, nullid, nullrev
 from . import (
     error,
+    obsolete,
+    tags as tagsmod,
+    util,
 )
 
+class lazychildset(object):
+    """Data structure for efficiently answering the query 'what are my children'
+    for a series of revisions."""
+    def __init__(self, repo):
+        self.repo = repo.unfiltered()
+        self._childmap = {}
+        self._minprocessed = len(repo.changelog)
+
+    def get(self, rev):
+        childmap = self._childmap
+        cl = self.repo.changelog
+        if rev < self._minprocessed:
+            for r in xrange(rev, self._minprocessed):
+                for p in cl.parentrevs(r):
+                    if p != nullrev:
+                        childmap.setdefault(p, []).append(r)
+            self._minprocessed = rev
+
+        return childmap.get(rev, ())
+
 class rootset(object):
     """A commit set like object where a commit being in the set implies that all
     descendants of that commit are also in the set.
@@ -135,3 +161,168 @@ class rootset(object):
     def __len__(self):
         self._load()
         return len(self._cache)
+
+class hiddenset(rootset):
+    """A set representing what commits should be hidden. It contains logic to
+    update itself based on what nodes are reported as having been changed.
+    """
+    def __init__(self, repo, opener, name):
+        super(hiddenset, self).__init__(repo, opener, name)
+        self._load()
+
+    def _load(self):
+        if self.roots is None:
+            try:
+                cachehash = None
+                raw = self.opener.read(self.filename)
+                if len(raw) >= 20:
+                    cachehash = raw[:20]
+                    raw = raw[20:]
+                else:
+                    raise RuntimeError("invalid cache")
+
+                realhash = self._gethash()
+                if cachehash == realhash:
+                    self.roots = self._deserialize(raw)
+                else:
+                    raise RuntimeError("cache hash mismatch")
+            except (IOError, RuntimeError):
+                # Rebuild the hidden roots from scratch
+                self.roots = set()
+                self._cache = set()
+                revs = self.repo.revs('not public()')
+                self.updatevisibility(revs)
+
+            self._fillcache()
+            self.dirty = False
+
+    def _write(self, fp):
+        fp.write(self._gethash())
+        fp.write(self._serialize())
+        self.dirty = False
+
+    def _gethash(self):
+        hiderevs, nohiderevs = self._gethiderevs()
+        cl = self.repo.changelog
+        hash = hashlib.sha1()
+        hash.update("hide")
+        hash.update(''.join(sorted(cl.node(r) for r in hiderevs)))
+        hash.update("nohide")
+        hash.update(''.join(sorted(cl.node(r) for r in nohiderevs)))
+
+        return hash.digest()
+
+    def _gethiderevs(self):
+        """Returns the set of revs that potentially could be hidden and a set of
+        revs that definitely should not be hidden."""
+        hiderevs = set()
+        nohiderevs = set()
+
+        repo = self.repo
+        cl = repo.changelog
+
+        # Hide obsolete commits
+        hiderevs.update(obsolete.getrevs(repo, 'obsolete'))
+
+        # Don't hide bookmarks
+        nohiderevs.update(cl.rev(node) for name, node in
+                          repo._bookmarks.iteritems())
+
+        # Don't hide workingcopy parents
+        nohiderevs.update(cl.rev(node) for node in repo.dirstate.parents()
+                          if node != nullid and node in cl.nodemap)
+
+        # Don't hide local tags
+        tags = {}
+        tagsmod.readlocaltags(repo.ui, repo, tags, {})
+        nohiderevs.update(cl.rev(t[0]) for t in tags.values()
+                          if t[0] in cl.nodemap)
+
+        # Don't hide rebase commits
+        if util.safehasattr(repo, '_rebaseset'):
+            nohiderevs.update(repo._rebaseset)
+
+        return hiderevs, nohiderevs
+
+    def _shouldhide(self, rev, childmap, hiderevs, nohiderevs):
+        repo = self.repo
+        ispublic = repo._phasecache.phase(repo, rev) == 0
+        cl = repo.changelog
+        return (not ispublic and
+                rev in hiderevs and
+                rev not in nohiderevs and
+                all(cl.node(r) in self for r in childmap.get(rev)))
+
+    def _needupdate(self, revs):
+        repo = self.repo
+        cl = repo.changelog
+
+        hiderevs, nohiderevs = self._gethiderevs()
+
+        childmap = lazychildset(repo)
+        for rev in revs:
+            node = cl.node(rev)
+            # If the node should be visible...
+            if not self._shouldhide(rev, childmap, hiderevs, nohiderevs):
+                # ...but it's hidden
+                if node in self:
+                    return True
+            # Otherwise, the node should be hidden
+            elif node not in self:
+                return True
+
+        return False
+
+    def updatevisibility(self, revs):
+        """Updates the visibility of the given revs, and propagates those
+        updates to the rev parents as necessary."""
+        self._load()
+
+        repo = self.repo
+        seen = set(revs)
+        if not seen or not self._needupdate(seen):
+            # Even if there are no changes, we need to rewrite the file so the
+            # cache hash is up to date with the latest changes.
+            with self.opener(self.filename, 'w+', atomictemp=True) as fp:
+                self._write(fp)
+            return
+
+        # Sort the revs so we can process them in topo order
+        heap = []
+        for rev in seen:
+            heapq.heappush(heap, -rev)
+
+        with repo.lock():
+            with repo.transaction('hidden') as tr:
+                hiderevs, nohiderevs = self._gethiderevs()
+
+                cl = repo.changelog
+                childmap = lazychildset(repo)
+                while heap:
+                    rev = -heapq.heappop(heap)
+
+                    # If it should be visible...
+                    node = cl.node(rev)
+                    if not self._shouldhide(rev, childmap, hiderevs,
+                                            nohiderevs):
+                        # And it's currently invisible...
+                        if node in self:
+                            # Make it visible and propagate
+                            # (propogate first, since otherwise we can't access
+                            # the parents)
+                            for parent in cl.parentrevs(rev):
+                                if parent != nullrev and parent not in seen:
+                                    heapq.heappush(heap, -parent)
+                                    seen.add(parent)
+                            self.set(tr, cl.node(rev), False,
+                                     childmap=childmap)
+
+                    # If it should not be visible, and it's not already hidden..
+                    elif node not in self:
+                        # Hide it and propagate (propogate first, since
+                        # otherwise we can't access the parents)
+                        for parent in cl.parentrevs(rev):
+                            if parent != nullrev and parent not in seen:
+                                heapq.heappush(heap, -parent)
+                                seen.add(parent)
+                        self.set(tr, node, True, childmap=childmap)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -388,7 +388,7 @@ class localrepository(object):
 
     @storecache('hidden.roots')
     def hidden(self):
-        return hidden.rootset(self, self.svfs, 'hidden')
+        return hidden.hiddenset(self, self.svfs, 'hidden')
 
     def close(self):
         self._writecaches()
@@ -1384,6 +1384,10 @@ class localrepository(object):
         l = self._lock(self.svfs, "lock", wait, None,
                        self.invalidate, _('repository %s') % self.origroot)
         self._lockref = weakref.ref(l)
+        # Force the hidden cache to load, so it can validate it's cache hash
+        # against the unmodified bookmark, obsolete, tags, and working copy
+        # states.
+        self.hidden
         return l
 
     def _wlockchecktransaction(self):
diff --git a/tests/test-empty.t b/tests/test-empty.t
--- a/tests/test-empty.t
+++ b/tests/test-empty.t
@@ -26,6 +26,7 @@ Check the basic files created:
 Should be empty:
 
   $ ls .hg/store
+  hidden.roots
 
 Poke at a clone:
 
@@ -49,5 +50,6 @@ Poke at a clone:
 Should be empty:
 
   $ ls .hg/store
+  hidden.roots
 
   $ cd ..
diff --git a/tests/test-fncache.t b/tests/test-fncache.t
--- a/tests/test-fncache.t
+++ b/tests/test-fncache.t
@@ -92,6 +92,7 @@ Non store repo:
   .hg/data/tst.d.hg
   .hg/data/tst.d.hg/foo.i
   .hg/dirstate
+  .hg/hidden.roots
   .hg/last-message.txt
   .hg/phaseroots
   .hg/requires
@@ -129,6 +130,7 @@ Non fncache repo:
   .hg/store/data
   .hg/store/data/tst.d.hg
   .hg/store/data/tst.d.hg/_foo.i
+  .hg/store/hidden.roots
   .hg/store/phaseroots
   .hg/store/undo
   .hg/store/undo.backupfiles
@@ -313,6 +315,7 @@ Aborted transactions can be recovered la
   data/z.i
   $ hg recover
   rolling back interrupted transaction
+  warning: ignoring unknown working parent 4b0f1917ccc5!
   checking changesets
   checking manifests
   crosschecking files in changesets and manifests
diff --git a/tests/test-hardlinks.t b/tests/test-hardlinks.t
--- a/tests/test-hardlinks.t
+++ b/tests/test-hardlinks.t
@@ -48,6 +48,7 @@ Prepare repo r1:
   1 r1/.hg/store/data/d1/f2.i
   1 r1/.hg/store/data/f1.i
   1 r1/.hg/store/fncache
+  1 r1/.hg/store/hidden.roots
   1 r1/.hg/store/phaseroots
   1 r1/.hg/store/undo
   1 r1/.hg/store/undo.backup.fncache
@@ -87,6 +88,7 @@ Repos r1 and r2 should now contain hardl
   2 r1/.hg/store/data/d1/f2.i
   2 r1/.hg/store/data/f1.i
   2 r1/.hg/store/fncache
+  1 r1/.hg/store/hidden.roots
   1 r1/.hg/store/phaseroots
   1 r1/.hg/store/undo
   1 r1/.hg/store/undo.backup.fncache
@@ -99,6 +101,7 @@ Repos r1 and r2 should now contain hardl
   2 r2/.hg/store/data/d1/f2.i
   2 r2/.hg/store/data/f1.i
   2 r2/.hg/store/fncache
+  1 r2/.hg/store/hidden.roots
 
 Repo r3 should not be hardlinked:
 
@@ -108,6 +111,7 @@ Repo r3 should not be hardlinked:
   1 r3/.hg/store/data/d1/f2.i
   1 r3/.hg/store/data/f1.i
   1 r3/.hg/store/fncache
+  1 r3/.hg/store/hidden.roots
   1 r3/.hg/store/phaseroots
   1 r3/.hg/store/undo
   1 r3/.hg/store/undo.backupfiles
@@ -134,6 +138,7 @@ Create a non-inlined filelog in r3:
   1 r3/.hg/store/data/d1/f2.i
   1 r3/.hg/store/data/f1.i
   1 r3/.hg/store/fncache
+  1 r3/.hg/store/hidden.roots
   1 r3/.hg/store/phaseroots
   1 r3/.hg/store/undo
   1 r3/.hg/store/undo.backup.fncache
@@ -167,6 +172,7 @@ Push to repo r1 should break up most har
   1 r2/.hg/store/data/d1/f2.i
   2 r2/.hg/store/data/f1.i
   [12] r2/\.hg/store/fncache (re)
+  1 r2/.hg/store/hidden.roots
 
 #if hardlink-whitelisted
   $ nlinksdir r2/.hg/store/fncache
@@ -197,6 +203,7 @@ Committing a change to f1 in r1 must bre
   1 r2/.hg/store/data/d1/f2.i
   1 r2/.hg/store/data/f1.i
   [12] r2/\.hg/store/fncache (re)
+  1 r2/.hg/store/hidden.roots
 
 #if hardlink-whitelisted
   $ nlinksdir r2/.hg/store/fncache
@@ -245,6 +252,7 @@ r4 has hardlinks in the working dir (not
   2 r4/.hg/store/data/d1/f2.i
   2 r4/.hg/store/data/f1.i
   2 r4/.hg/store/fncache
+  2 r4/.hg/store/hidden.roots
   2 r4/.hg/store/phaseroots
   2 r4/.hg/store/undo
   2 r4/.hg/store/undo.backup.fncache
@@ -294,6 +302,7 @@ Update back to revision 11 in r4 should
   2 r4/.hg/store/data/d1/f2.i
   2 r4/.hg/store/data/f1.i
   2 r4/.hg/store/fncache
+  2 r4/.hg/store/hidden.roots
   2 r4/.hg/store/phaseroots
   2 r4/.hg/store/undo
   2 r4/.hg/store/undo.backup.fncache
diff --git a/tests/test-hook.t b/tests/test-hook.t
--- a/tests/test-hook.t
+++ b/tests/test-hook.t
@@ -190,6 +190,7 @@ more there after
   00manifest.i
   data
   fncache
+  hidden.roots
   journal.phaseroots
   phaseroots
   undo
diff --git a/tests/test-inherit-mode.t b/tests/test-inherit-mode.t
--- a/tests/test-inherit-mode.t
+++ b/tests/test-inherit-mode.t
@@ -79,6 +79,7 @@ new directories are setgid
   00660 ./.hg/store/data/dir/bar.i
   00660 ./.hg/store/data/foo.i
   00660 ./.hg/store/fncache
+  00660 ./.hg/store/hidden.roots
   00660 ./.hg/store/phaseroots
   00660 ./.hg/store/undo
   00660 ./.hg/store/undo.backupfiles
@@ -125,6 +126,7 @@ group can still write everything
   00660 ../push/.hg/store/data/dir/bar.i
   00660 ../push/.hg/store/data/foo.i
   00660 ../push/.hg/store/fncache
+  00660 ../push/.hg/store/hidden.roots
   00660 ../push/.hg/store/undo
   00660 ../push/.hg/store/undo.backupfiles
   00660 ../push/.hg/store/undo.phaseroots
diff --git a/tests/test-upgrade-repo.t b/tests/test-upgrade-repo.t
--- a/tests/test-upgrade-repo.t
+++ b/tests/test-upgrade-repo.t
@@ -195,6 +195,7 @@ Upgrading a repository that is already m
   repository locked and read-only
   creating temporary repository to stage migrated data: $TESTTMP/modern/.hg/upgrade.* (glob)
   (it is safe to interrupt this process any time before data migration completes)
+  copying hidden.roots
   data fully migrated to temporary repository
   marking source repository as being upgraded; clients will be unable to read from repository
   starting in-place swap of repository data
@@ -241,6 +242,7 @@ Upgrading a repository to generaldelta w
   migrating changelog containing 3 revisions (184 bytes in store; 181 bytes tracked data)
   finished migrating 3 changelog revisions; change in size: 0 bytes
   finished migrating 9 total revisions; total change in store size: 0 bytes
+  copying hidden.roots
   copying phaseroots
   data fully migrated to temporary repository
   marking source repository as being upgraded; clients will be unable to read from repository
@@ -277,6 +279,7 @@ store directory has files we expect
   00manifest.i
   data
   fncache
+  hidden.roots
   phaseroots
   undo
   undo.backupfiles
@@ -303,6 +306,7 @@ old store should be backed up
   00manifest.i
   data
   fncache
+  hidden.roots
   phaseroots
   undo
   undo.backup.fncache
@@ -339,6 +343,7 @@ store files with special filenames aren'
   finished migrating 1 changelog revisions; change in size: 0 bytes
   finished migrating 3 total revisions; total change in store size: 0 bytes
   copying .XX_special_filename
+  copying hidden.roots
   copying phaseroots
   data fully migrated to temporary repository
   marking source repository as being upgraded; clients will be unable to read from repository
diff --git a/tests/test-verify.t b/tests/test-verify.t
--- a/tests/test-verify.t
+++ b/tests/test-verify.t
@@ -78,6 +78,7 @@ Entire changelog missing
 
   $ rm .hg/store/00changelog.*
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
    0: empty or missing changelog
    manifest@0: d0b6632564d4 not in changesets
    manifest@1: 941fc4534185 not in changesets
@@ -116,6 +117,7 @@ Entire changelog and manifest log missin
   $ rm .hg/store/00changelog.*
   $ rm .hg/store/00manifest.*
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
   warning: orphan revlog 'data/file.i'
   1 warnings encountered!
   $ cp -R .hg/store-full/. .hg/store
@@ -125,6 +127,7 @@ Entire changelog and filelog missing
   $ rm .hg/store/00changelog.*
   $ rm .hg/store/data/file.*
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
    0: empty or missing changelog
    manifest@0: d0b6632564d4 not in changesets
    manifest@1: 941fc4534185 not in changesets
@@ -158,6 +161,7 @@ Changelog missing entry
 
   $ cp -f .hg/store-partial/00changelog.* .hg/store
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
    manifest@?: rev 1 points to nonexistent changeset 1
    manifest@?: 941fc4534185 not in changesets
    file@?: rev 1 points to nonexistent changeset 1
@@ -193,6 +197,7 @@ Changelog and manifest log missing entry
   $ cp -f .hg/store-partial/00changelog.* .hg/store
   $ cp -f .hg/store-partial/00manifest.* .hg/store
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
    file@?: rev 1 points to nonexistent changeset 1
    (expected 0)
    file@?: c10f2164107d not in manifests
@@ -206,6 +211,7 @@ Changelog and filelog missing entry
   $ cp -f .hg/store-partial/00changelog.* .hg/store
   $ cp -f .hg/store-partial/data/file.* .hg/store/data
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
    manifest@?: rev 1 points to nonexistent changeset 1
    manifest@?: 941fc4534185 not in changesets
    file@?: manifest refers to unknown revision c10f2164107d
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[PATCH 6 of 6] hidden: alert hidden store when obsmarkers are created

Durham Goode
In reply to this post by Durham Goode
# HG changeset patch
# User Durham Goode <[hidden email]>
# Date 1495129620 25200
#      Thu May 18 10:47:00 2017 -0700
# Node ID b09fa310c34bdbad4fc2b5b914e84e0fc9dc4b68
# Parent  0979ce9d40803103441b6df1ebefafe80bb9a057
hidden: alert hidden store when obsmarkers are created

This calls the new updatevisibility function from obsolete.createmarkers(). This
let's us update the currently stored hidden information when markers change.

diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
--- a/mercurial/obsolete.py
+++ b/mercurial/obsolete.py
@@ -1226,6 +1226,7 @@ def createmarkers(repo, relations, flag=
         metadata['user'] = repo.ui.username()
     tr = repo.transaction('add-obsolescence-marker')
     try:
+        affectednodes = set()
         markerargs = []
         for rel in relations:
             prec = rel[0]
@@ -1258,6 +1259,11 @@ def createmarkers(repo, relations, flag=
             repo.obsstore.create(tr, nprec, nsucs, flag, parents=npare,
                                  date=date, metadata=localmetadata)
             repo.filteredrevcache.clear()
+            affectednodes.add(nprec)
+
+        cl = repo.unfiltered().changelog
+        affectedrevs = (cl.rev(n) for n in affectednodes if n in cl.nodemap)
+        repo.hidden.updatevisibility(affectedrevs)
         tr.close()
     finally:
         tr.release()
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH 1 of 6] clone: improve locking pattern for hg clone

Durham Goode
In reply to this post by Durham Goode
On 5/18/17 11:23 AM, Durham Goode wrote:

> # HG changeset patch
> # User Durham Goode <[hidden email]>
> # Date 1495129620 25200
> #      Thu May 18 10:47:00 2017 -0700
> # Node ID 19be454ee16925d01da8f9d329cb48556083da1b
> # Parent  8a87bfc5bebbbe0ac996ac8e047a029eb931af45
> clone: improve locking pattern for hg clone
>
> Previously, in some cases hg.clone() would take the lock via unconventional
> means (since the repo didn't exist yet). If something later on tried to take the
> lock via normal means (repo.lock()), it would enter a permanent deadlock since
> the lock was held by the process, but the repo object didn't realize it since
> the lock was initially taken without using repo.lock().
>
> The fix is to release the lock once we've boostrapped the clone and switch to a
> normal repo.lock version.
>

This one is easier to review with whitespace diff off:

diff --git a/mercurial/hg.py b/mercurial/hg.py
--- a/mercurial/hg.py
+++ b/mercurial/hg.py
@@ -586,6 +586,8 @@ def clone(ui, peeropts, source, dest=Non
              destpeer = peer(srcrepo, peeropts, dest)
              srcrepo.hook('outgoing', source='clone',
                            node=node.hex(node.nullid))
+            release(destlock)
+            destlock = None
          else:
              try:
                  destpeer = peer(srcrepo or ui, peeropts, dest,
create=True)
@@ -628,6 +630,10 @@ def clone(ui, peeropts, source, dest=Non

          destrepo = destpeer.local()
          if destrepo:
+            wlock = lock = None
+            try:
+                wlock = destrepo.wlock()
+                lock = destrepo.lock()
              template = uimod.samplehgrcs['cloned']
              fp = destrepo.vfs("hgrc", "w", text=True)
              u = util.url(abspath)
@@ -660,8 +666,8 @@ def clone(ui, peeropts, source, dest=Non
                          if bn == 'default':
                              status = _("updating to bookmark @\n")
                          else:
-                            status = (_("updating to bookmark @ on
branch %s\n")
-                                      % bn)
+                                status = (_("updating to bookmark @ on "
+                                            "branch %s\n") % bn)
                      except KeyError:
                          try:
                              uprev = destrepo.branchtip('default')
@@ -675,6 +681,8 @@ def clone(ui, peeropts, source, dest=Non
                  if update in destrepo._bookmarks:
                      bookmarks.activate(destrepo, update)
      finally:
+                release(lock, wlock)
+    finally:
          release(srclock, destlock)
          if cleandir is not None:
              shutil.rmtree(cleandir, True)
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH 6 of 6] hidden: alert hidden store when obsmarkers are created

Durham Goode
In reply to this post by Durham Goode
On 5/18/17 11:24 AM, Durham Goode wrote:

> # HG changeset patch
> # User Durham Goode <[hidden email]>
> # Date 1495129620 25200
> #      Thu May 18 10:47:00 2017 -0700
> # Node ID b09fa310c34bdbad4fc2b5b914e84e0fc9dc4b68
> # Parent  0979ce9d40803103441b6df1ebefafe80bb9a057
> hidden: alert hidden store when obsmarkers are created
>
> This calls the new updatevisibility function from obsolete.createmarkers(). This
> let's us update the currently stored hidden information when markers change.
>
> diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
> --- a/mercurial/obsolete.py
> +++ b/mercurial/obsolete.py
> @@ -1226,6 +1226,7 @@ def createmarkers(repo, relations, flag=
>          metadata['user'] = repo.ui.username()
>      tr = repo.transaction('add-obsolescence-marker')
>      try:
> +        affectednodes = set()
>          markerargs = []
>          for rel in relations:
>              prec = rel[0]
> @@ -1258,6 +1259,11 @@ def createmarkers(repo, relations, flag=
>              repo.obsstore.create(tr, nprec, nsucs, flag, parents=npare,
>                                   date=date, metadata=localmetadata)
>              repo.filteredrevcache.clear()
> +            affectednodes.add(nprec)
> +
> +        cl = repo.unfiltered().changelog
> +        affectedrevs = (cl.rev(n) for n in affectednodes if n in cl.nodemap)
> +        repo.hidden.updatevisibility(affectedrevs)
>          tr.close()
>      finally:
>          tr.release()

This is the first half of a series that introduces the concept of an
explicit hidden store.  It is represented as a set, where you can test
if a given commit is hidden or not, and is serialized as a set of roots
of the hidden part of the graph.

It is currently just a cache of the current hidden algorithm, and the
final patch in the series (not sent yet) adds validation that the hidden
store always matches the computehidden result.

Other parts of Mercurial communicate with the hidden store simply by
telling it what nodes they've touched
(`repo.hidden.updatevisibility(affectedrevs)`), so knowledge about the
exact hidden semantics remains confined to hidden.py.

This direction has several benefits:
- moves the computation of hiddeness to write-time instead of read-time,
so we only have to pay it once
- enables external extensions to interact with hidden computations more
easily (they can now observe hidden affecting changes by wrapping the
updatevisibility function, without having to individually monkey patch
into bookmarks/tags/obsmarkers/etc)
- potentially gives us future flexibility on the defining and storing
what is hidden and what is not
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH 6 of 6] hidden: alert hidden store when obsmarkers are created

Gregory Szorc


> On May 18, 2017, at 11:34, Durham Goode <[hidden email]> wrote:
>
>> On 5/18/17 11:24 AM, Durham Goode wrote:
>> # HG changeset patch
>> # User Durham Goode <[hidden email]>
>> # Date 1495129620 25200
>> #      Thu May 18 10:47:00 2017 -0700
>> # Node ID b09fa310c34bdbad4fc2b5b914e84e0fc9dc4b68
>> # Parent  0979ce9d40803103441b6df1ebefafe80bb9a057
>> hidden: alert hidden store when obsmarkers are created
>>
>> This calls the new updatevisibility function from obsolete.createmarkers(). This
>> let's us update the currently stored hidden information when markers change.
>>
>> diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
>> --- a/mercurial/obsolete.py
>> +++ b/mercurial/obsolete.py
>> @@ -1226,6 +1226,7 @@ def createmarkers(repo, relations, flag=
>>         metadata['user'] = repo.ui.username()
>>     tr = repo.transaction('add-obsolescence-marker')
>>     try:
>> +        affectednodes = set()
>>         markerargs = []
>>         for rel in relations:
>>             prec = rel[0]
>> @@ -1258,6 +1259,11 @@ def createmarkers(repo, relations, flag=
>>             repo.obsstore.create(tr, nprec, nsucs, flag, parents=npare,
>>                                  date=date, metadata=localmetadata)
>>             repo.filteredrevcache.clear()
>> +            affectednodes.add(nprec)
>> +
>> +        cl = repo.unfiltered().changelog
>> +        affectedrevs = (cl.rev(n) for n in affectednodes if n in cl.nodemap)
>> +        repo.hidden.updatevisibility(affectedrevs)
>>         tr.close()
>>     finally:
>>         tr.release()
>
> This is the first half of a series that introduces the concept of an explicit hidden store.  It is represented as a set, where you can test if a given commit is hidden or not, and is serialized as a set of roots of the hidden part of the graph.
>
> It is currently just a cache of the current hidden algorithm, and the final patch in the series (not sent yet) adds validation that the hidden store always matches the computehidden result.

Do you have the full series (even if not in sendable state) available to pull from? I have a few questions that I think could be answered by seeing the final state...

Only glancing at this, I like the general direction. I do have several nits. But I'll hold off unless you ask for them privately because higher-level discussion is more important.

>
> Other parts of Mercurial communicate with the hidden store simply by telling it what nodes they've touched (`repo.hidden.updatevisibility(affectedrevs)`), so knowledge about the exact hidden semantics remains confined to hidden.py.
>
> This direction has several benefits:
> - moves the computation of hiddeness to write-time instead of read-time, so we only have to pay it once
> - enables external extensions to interact with hidden computations more easily (they can now observe hidden affecting changes by wrapping the updatevisibility function, without having to individually monkey patch into bookmarks/tags/obsmarkers/etc)
> - potentially gives us future flexibility on the defining and storing what is hidden and what is not
> _______________________________________________
> Mercurial-devel mailing list
> [hidden email]
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH 6 of 6] hidden: alert hidden store when obsmarkers are created

Durham Goode
On 5/18/17 12:10 PM, Gregory Szorc wrote:

>
>
>> On May 18, 2017, at 11:34, Durham Goode <[hidden email]> wrote:
>>
>>> On 5/18/17 11:24 AM, Durham Goode wrote:
>>> # HG changeset patch
>>> # User Durham Goode <[hidden email]>
>>> # Date 1495129620 25200
>>> #      Thu May 18 10:47:00 2017 -0700
>>> # Node ID b09fa310c34bdbad4fc2b5b914e84e0fc9dc4b68
>>> # Parent  0979ce9d40803103441b6df1ebefafe80bb9a057
>>> hidden: alert hidden store when obsmarkers are created
>>>
>>> This calls the new updatevisibility function from obsolete.createmarkers(). This
>>> let's us update the currently stored hidden information when markers change.
>>>
>>> diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
>>> --- a/mercurial/obsolete.py
>>> +++ b/mercurial/obsolete.py
>>> @@ -1226,6 +1226,7 @@ def createmarkers(repo, relations, flag=
>>>         metadata['user'] = repo.ui.username()
>>>     tr = repo.transaction('add-obsolescence-marker')
>>>     try:
>>> +        affectednodes = set()
>>>         markerargs = []
>>>         for rel in relations:
>>>             prec = rel[0]
>>> @@ -1258,6 +1259,11 @@ def createmarkers(repo, relations, flag=
>>>             repo.obsstore.create(tr, nprec, nsucs, flag, parents=npare,
>>>                                  date=date, metadata=localmetadata)
>>>             repo.filteredrevcache.clear()
>>> +            affectednodes.add(nprec)
>>> +
>>> +        cl = repo.unfiltered().changelog
>>> +        affectedrevs = (cl.rev(n) for n in affectednodes if n in cl.nodemap)
>>> +        repo.hidden.updatevisibility(affectedrevs)
>>>         tr.close()
>>>     finally:
>>>         tr.release()
>>
>> This is the first half of a series that introduces the concept of an explicit hidden store.  It is represented as a set, where you can test if a given commit is hidden or not, and is serialized as a set of roots of the hidden part of the graph.
>>
>> It is currently just a cache of the current hidden algorithm, and the final patch in the series (not sent yet) adds validation that the hidden store always matches the computehidden result.
>
> Do you have the full series (even if not in sendable state) available to pull from? I have a few questions that I think could be answered by seeing the final state...
>
> Only glancing at this, I like the general direction. I do have several nits. But I'll hold off unless you ask for them privately because higher-level discussion is more important.
>
The full series can be seen here:
https://bitbucket.org/DurhamG/hg/commits/branch/hidden

All the core test pass at the tip. The tip of the series includes
warning output if the cache ever doesn't match the old hidden result, so
the fact that the tests pass without giving that warning means the cache
always matches the current hidden result.
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH 5 of 6] hidden: add hiddenset and updatevisibility logic

Gregory Szorc
In reply to this post by Durham Goode
On Thu, May 18, 2017 at 11:23 AM, Durham Goode <[hidden email]> wrote:
# HG changeset patch
# User Durham Goode <[hidden email]>
# Date 1495129620 25200
#      Thu May 18 10:47:00 2017 -0700
# Node ID 0979ce9d40803103441b6df1ebefafe80bb9a057
# Parent  2aef3c4be6ba81e9d2453760515517df6fc2b816
hidden: add hiddenset and updatevisibility logic

This adds a function that can incrementally update the hidden state for a
repository when given a list of nodes that have been changed. This will be
called by all actions that can affect hiddenness (bookmarks, tags, obsmarkers,
working copy parents). It uses a hash of the inputs that affect the hidden store
to ensure things are up-to-date.

This has the nice property that all logic for propagating hiddenness is
contained to one function, and all the consumers just have to report what
commits they touched. If an external extension wants to modify how hiding is
computed, it can just wrap this one function.

It also is nice because it updates the hidden state incrementally, instead of
recomputing the entire repo at once.

Most comments inline. My biggest concerns so far in this series are around performance and cache semantics. You said in a separate email (and the code reinforces) that this file is a cache. Yet it isn't in .hg/cache nor does it have a repo requirements entry guarding it. The nearest analogue is the phaseroots file. But that's a canonical store (not a cache) and old clients don't care about it (like caches) so we don't need a requirements file. So there's a bit of work that needs to be done to turn this into a proper cache.
 

diff --git a/mercurial/hidden.py b/mercurial/hidden.py
--- a/mercurial/hidden.py
+++ b/mercurial/hidden.py
@@ -5,12 +5,38 @@

 from __future__ import absolute_import

+import hashlib
+import heapq
+
 from .i18n import _
-from .node import bin, hex
+from .node import bin, hex, nullid, nullrev
 from . import (
     error,
+    obsolete,
+    tags as tagsmod,
+    util,
 )

+class lazychildset(object):
+    """Data structure for efficiently answering the query 'what are my children'
+    for a series of revisions."""
+    def __init__(self, repo):
+        self.repo = repo.unfiltered()
+        self._childmap = {}
+        self._minprocessed = len(repo.changelog)
+
+    def get(self, rev):
+        childmap = self._childmap
+        cl = self.repo.changelog
+        if rev < self._minprocessed:
+            for r in xrange(rev, self._minprocessed):
+                for p in cl.parentrevs(r):
+                    if p != nullrev:
+                        childmap.setdefault(p, []).append(r)
+            self._minprocessed = rev
+
+        return childmap.get(rev, ())
+
 class rootset(object):
     """A commit set like object where a commit being in the set implies that all
     descendants of that commit are also in the set.
@@ -135,3 +161,168 @@ class rootset(object):
     def __len__(self):
         self._load()
         return len(self._cache)
+
+class hiddenset(rootset):
+    """A set representing what commits should be hidden. It contains logic to
+    update itself based on what nodes are reported as having been changed.
+    """
+    def __init__(self, repo, opener, name):
+        super(hiddenset, self).__init__(repo, opener, name)
+        self._load()
+
+    def _load(self):
+        if self.roots is None:
+            try:
+                cachehash = None
+                raw = self.opener.read(self.filename)
+                if len(raw) >= 20:
+                    cachehash = raw[:20]
+                    raw = raw[20:]
+                else:
+                    raise RuntimeError("invalid cache")

RuntimeError feels like the wrong exception here since lots of Python code can raise it and we don't want to be catching and swallowing unrelated errors. Can you invent a dummy type for this or validate the exception in the except block?
 
+
+                realhash = self._gethash()

_gethash() calls _gethiderevs() which queries a bunch of things. I'm worried it is doing too much for a perf critical read path. Specifically, the call to obsolete.getrevs(repo, 'obsolete') requires loading the obsstore. I thought a point of this work was alleviate scaling problems of the obsstore? Note that since the obsstore is append only, perhaps we only need to store and compare the size of the obsstore file for hash verification?

Another concern is loading tags. I'd have to look at the code, but if this needs to resolve .hgtags, that's not great for perf. Related to that, presumably this will get hooked into the visible repoview filter someday. Ideally, we shouldn't have to load tags, bookmarks, etc on the critical path of loading the default repoview.

While I'm here, could v2 please include a large comment block in this file about the caching strategy? That really helps us re-evaluate the roles of caches in the future. See tags.py for inspiration.
 
+                if cachehash == realhash:
+                    self.roots = self._deserialize(raw)
+                else:
+                    raise RuntimeError("cache hash mismatch")
+            except (IOError, RuntimeError):
+                # Rebuild the hidden roots from scratch
+                self.roots = set()
+                self._cache = set()
+                revs = self.repo.revs('not public()')
+                self.updatevisibility(revs)

Calling self.updatevisibility() here is problematic because updatevisibility() takes out a lock unconditionally and repo.hidden could be called by a process that doesn't have write permissions. See tags.py for how cache files should handle permissions errors. You'll also want a test for this scenario. There's one for the tags fnodes cache.

Also, this pattern of cache I/O complete with hash verification is common enough and has been the source of enough bugs over the years that I think it is time to formalize the pattern (possibly at the vfs layer) in order to cut down on bugs...

Finally, since this is a cache, why is it in .hg/store instead of .hg/cache?

 
+
+            self._fillcache()
+            self.dirty = False
+
+    def _write(self, fp):
+        fp.write(self._gethash())
+        fp.write(self._serialize())
+        self.dirty = False
+
+    def _gethash(self):
+        hiderevs, nohiderevs = self._gethiderevs()
+        cl = self.repo.changelog
+        hash = hashlib.sha1()
+        hash.update("hide")
+        hash.update(''.join(sorted(cl.node(r) for r in hiderevs)))
+        hash.update("nohide")
+        hash.update(''.join(sorted(cl.node(r) for r in nohiderevs)))
+
+        return hash.digest()
+
+    def _gethiderevs(self):
+        """Returns the set of revs that potentially could be hidden and a set of
+        revs that definitely should not be hidden."""
+        hiderevs = set()
+        nohiderevs = set()
+
+        repo = self.repo
+        cl = repo.changelog
+
+        # Hide obsolete commits
+        hiderevs.update(obsolete.getrevs(repo, 'obsolete'))
+
+        # Don't hide bookmarks
+        nohiderevs.update(cl.rev(node) for name, node in
+                          repo._bookmarks.iteritems())
+
+        # Don't hide workingcopy parents
+        nohiderevs.update(cl.rev(node) for node in repo.dirstate.parents()
+                          if node != nullid and node in cl.nodemap)
+
+        # Don't hide local tags
+        tags = {}
+        tagsmod.readlocaltags(repo.ui, repo, tags, {})
+        nohiderevs.update(cl.rev(t[0]) for t in tags.values()
+                          if t[0] in cl.nodemap)
+
+        # Don't hide rebase commits
+        if util.safehasattr(repo, '_rebaseset'):
+            nohiderevs.update(repo._rebaseset)
+
+        return hiderevs, nohiderevs
+
+    def _shouldhide(self, rev, childmap, hiderevs, nohiderevs):
+        repo = self.repo
+        ispublic = repo._phasecache.phase(repo, rev) == 0

Nit: use constant in phases module instead of 0
 
+        cl = repo.changelog
+        return (not ispublic and
+                rev in hiderevs and
+                rev not in nohiderevs and
+                all(cl.node(r) in self for r in childmap.get(rev)))
+
+    def _needupdate(self, revs):
+        repo = self.repo
+        cl = repo.changelog
+
+        hiderevs, nohiderevs = self._gethiderevs()
+
+        childmap = lazychildset(repo)
+        for rev in revs:
+            node = cl.node(rev)
+            # If the node should be visible...
+            if not self._shouldhide(rev, childmap, hiderevs, nohiderevs):

Resolving repo.changelog on every invocation inside _shouldhide() can add overhead. I wonder if _shouldhide() should accept an iterable of revs and return a dict...
 
+                # ...but it's hidden
+                if node in self:
+                    return True
+            # Otherwise, the node should be hidden
+            elif node not in self:
+                return True
+
+        return False
+
+    def updatevisibility(self, revs):
+        """Updates the visibility of the given revs, and propagates those
+        updates to the rev parents as necessary."""
+        self._load()
+
+        repo = self.repo
+        seen = set(revs)
+        if not seen or not self._needupdate(seen):
+            # Even if there are no changes, we need to rewrite the file so the
+            # cache hash is up to date with the latest changes.
+            with self.opener(self.filename, 'w+', atomictemp=True) as fp:
+                self._write(fp)

This being a cache, it needs to handle I/O failures gracefully if the user doesn't have permissions.
 
+            return
+
+        # Sort the revs so we can process them in topo order
+        heap = []
+        for rev in seen:
+            heapq.heappush(heap, -rev)
+
+        with repo.lock():
+            with repo.transaction('hidden') as tr:

Do we need a transaction here? We don't use transactions for other caches.
 
+                hiderevs, nohiderevs = self._gethiderevs()
+
+                cl = repo.changelog
+                childmap = lazychildset(repo)
+                while heap:
+                    rev = -heapq.heappop(heap)
+
+                    # If it should be visible...
+                    node = cl.node(rev)
+                    if not self._shouldhide(rev, childmap, hiderevs,
+                                            nohiderevs):

Another repo.changelog perf issue lurking in _shouldhide() inside a loop.
 
+                        # And it's currently invisible...
+                        if node in self:
+                            # Make it visible and propagate
+                            # (propogate first, since otherwise we can't access
+                            # the parents)
+                            for parent in cl.parentrevs(rev):
+                                if parent != nullrev and parent not in seen:
+                                    heapq.heappush(heap, -parent)
+                                    seen.add(parent)
+                            self.set(tr, cl.node(rev), False,
+                                     childmap=childmap)
+
+                    # If it should not be visible, and it's not already hidden..
+                    elif node not in self:
+                        # Hide it and propagate (propogate first, since
+                        # otherwise we can't access the parents)
+                        for parent in cl.parentrevs(rev):
+                            if parent != nullrev and parent not in seen:
+                                heapq.heappush(heap, -parent)
+                                seen.add(parent)
+                        self.set(tr, node, True, childmap=childmap)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -388,7 +388,7 @@ class localrepository(object):

     @storecache('hidden.roots')
     def hidden(self):
-        return hidden.rootset(self, self.svfs, 'hidden')
+        return hidden.hiddenset(self, self.svfs, 'hidden')

     def close(self):
         self._writecaches()
@@ -1384,6 +1384,10 @@ class localrepository(object):
         l = self._lock(self.svfs, "lock", wait, None,
                        self.invalidate, _('repository %s') % self.origroot)
         self._lockref = weakref.ref(l)
+        # Force the hidden cache to load, so it can validate it's cache hash
+        # against the unmodified bookmark, obsolete, tags, and working copy
+        # states.
+        self.hidden
         return l

     def _wlockchecktransaction(self):
diff --git a/tests/test-empty.t b/tests/test-empty.t
--- a/tests/test-empty.t
+++ b/tests/test-empty.t
@@ -26,6 +26,7 @@ Check the basic files created:
 Should be empty:

   $ ls .hg/store
+  hidden.roots

 Poke at a clone:

@@ -49,5 +50,6 @@ Poke at a clone:
 Should be empty:

   $ ls .hg/store
+  hidden.roots

   $ cd ..
diff --git a/tests/test-fncache.t b/tests/test-fncache.t
--- a/tests/test-fncache.t
+++ b/tests/test-fncache.t
@@ -92,6 +92,7 @@ Non store repo:
   .hg/data/tst.d.hg
   .hg/data/tst.d.hg/foo.i
   .hg/dirstate
+  .hg/hidden.roots
   .hg/last-message.txt
   .hg/phaseroots
   .hg/requires
@@ -129,6 +130,7 @@ Non fncache repo:
   .hg/store/data
   .hg/store/data/tst.d.hg
   .hg/store/data/tst.d.hg/_foo.i
+  .hg/store/hidden.roots
   .hg/store/phaseroots
   .hg/store/undo
   .hg/store/undo.backupfiles
@@ -313,6 +315,7 @@ Aborted transactions can be recovered la
   data/z.i
   $ hg recover
   rolling back interrupted transaction
+  warning: ignoring unknown working parent 4b0f1917ccc5!

What's this about? Accessing repo.dirstate as part of cache validation?
 
   checking changesets
   checking manifests
   crosschecking files in changesets and manifests
diff --git a/tests/test-hardlinks.t b/tests/test-hardlinks.t
--- a/tests/test-hardlinks.t
+++ b/tests/test-hardlinks.t
@@ -48,6 +48,7 @@ Prepare repo r1:
   1 r1/.hg/store/data/d1/f2.i
   1 r1/.hg/store/data/f1.i
   1 r1/.hg/store/fncache
+  1 r1/.hg/store/hidden.roots
   1 r1/.hg/store/phaseroots
   1 r1/.hg/store/undo
   1 r1/.hg/store/undo.backup.fncache
@@ -87,6 +88,7 @@ Repos r1 and r2 should now contain hardl
   2 r1/.hg/store/data/d1/f2.i
   2 r1/.hg/store/data/f1.i
   2 r1/.hg/store/fncache
+  1 r1/.hg/store/hidden.roots
   1 r1/.hg/store/phaseroots
   1 r1/.hg/store/undo
   1 r1/.hg/store/undo.backup.fncache
@@ -99,6 +101,7 @@ Repos r1 and r2 should now contain hardl
   2 r2/.hg/store/data/d1/f2.i
   2 r2/.hg/store/data/f1.i
   2 r2/.hg/store/fncache
+  1 r2/.hg/store/hidden.roots

 Repo r3 should not be hardlinked:

@@ -108,6 +111,7 @@ Repo r3 should not be hardlinked:
   1 r3/.hg/store/data/d1/f2.i
   1 r3/.hg/store/data/f1.i
   1 r3/.hg/store/fncache
+  1 r3/.hg/store/hidden.roots
   1 r3/.hg/store/phaseroots
   1 r3/.hg/store/undo
   1 r3/.hg/store/undo.backupfiles
@@ -134,6 +138,7 @@ Create a non-inlined filelog in r3:
   1 r3/.hg/store/data/d1/f2.i
   1 r3/.hg/store/data/f1.i
   1 r3/.hg/store/fncache
+  1 r3/.hg/store/hidden.roots
   1 r3/.hg/store/phaseroots
   1 r3/.hg/store/undo
   1 r3/.hg/store/undo.backup.fncache
@@ -167,6 +172,7 @@ Push to repo r1 should break up most har
   1 r2/.hg/store/data/d1/f2.i
   2 r2/.hg/store/data/f1.i
   [12] r2/\.hg/store/fncache (re)
+  1 r2/.hg/store/hidden.roots

 #if hardlink-whitelisted
   $ nlinksdir r2/.hg/store/fncache
@@ -197,6 +203,7 @@ Committing a change to f1 in r1 must bre
   1 r2/.hg/store/data/d1/f2.i
   1 r2/.hg/store/data/f1.i
   [12] r2/\.hg/store/fncache (re)
+  1 r2/.hg/store/hidden.roots

 #if hardlink-whitelisted
   $ nlinksdir r2/.hg/store/fncache
@@ -245,6 +252,7 @@ r4 has hardlinks in the working dir (not
   2 r4/.hg/store/data/d1/f2.i
   2 r4/.hg/store/data/f1.i
   2 r4/.hg/store/fncache
+  2 r4/.hg/store/hidden.roots
   2 r4/.hg/store/phaseroots
   2 r4/.hg/store/undo
   2 r4/.hg/store/undo.backup.fncache
@@ -294,6 +302,7 @@ Update back to revision 11 in r4 should
   2 r4/.hg/store/data/d1/f2.i
   2 r4/.hg/store/data/f1.i
   2 r4/.hg/store/fncache
+  2 r4/.hg/store/hidden.roots
   2 r4/.hg/store/phaseroots
   2 r4/.hg/store/undo
   2 r4/.hg/store/undo.backup.fncache
diff --git a/tests/test-hook.t b/tests/test-hook.t
--- a/tests/test-hook.t
+++ b/tests/test-hook.t
@@ -190,6 +190,7 @@ more there after
   00manifest.i
   data
   fncache
+  hidden.roots
   journal.phaseroots
   phaseroots
   undo
diff --git a/tests/test-inherit-mode.t b/tests/test-inherit-mode.t
--- a/tests/test-inherit-mode.t
+++ b/tests/test-inherit-mode.t
@@ -79,6 +79,7 @@ new directories are setgid
   00660 ./.hg/store/data/dir/bar.i
   00660 ./.hg/store/data/foo.i
   00660 ./.hg/store/fncache
+  00660 ./.hg/store/hidden.roots
   00660 ./.hg/store/phaseroots
   00660 ./.hg/store/undo
   00660 ./.hg/store/undo.backupfiles
@@ -125,6 +126,7 @@ group can still write everything
   00660 ../push/.hg/store/data/dir/bar.i
   00660 ../push/.hg/store/data/foo.i
   00660 ../push/.hg/store/fncache
+  00660 ../push/.hg/store/hidden.roots
   00660 ../push/.hg/store/undo
   00660 ../push/.hg/store/undo.backupfiles
   00660 ../push/.hg/store/undo.phaseroots
diff --git a/tests/test-upgrade-repo.t b/tests/test-upgrade-repo.t
--- a/tests/test-upgrade-repo.t
+++ b/tests/test-upgrade-repo.t
@@ -195,6 +195,7 @@ Upgrading a repository that is already m
   repository locked and read-only
   creating temporary repository to stage migrated data: $TESTTMP/modern/.hg/upgrade.* (glob)
   (it is safe to interrupt this process any time before data migration completes)
+  copying hidden.roots
   data fully migrated to temporary repository
   marking source repository as being upgraded; clients will be unable to read from repository
   starting in-place swap of repository data
@@ -241,6 +242,7 @@ Upgrading a repository to generaldelta w
   migrating changelog containing 3 revisions (184 bytes in store; 181 bytes tracked data)
   finished migrating 3 changelog revisions; change in size: 0 bytes
   finished migrating 9 total revisions; total change in store size: 0 bytes
+  copying hidden.roots
   copying phaseroots
   data fully migrated to temporary repository
   marking source repository as being upgraded; clients will be unable to read from repository
@@ -277,6 +279,7 @@ store directory has files we expect
   00manifest.i
   data
   fncache
+  hidden.roots
   phaseroots
   undo
   undo.backupfiles
@@ -303,6 +306,7 @@ old store should be backed up
   00manifest.i
   data
   fncache
+  hidden.roots
   phaseroots
   undo
   undo.backup.fncache
@@ -339,6 +343,7 @@ store files with special filenames aren'
   finished migrating 1 changelog revisions; change in size: 0 bytes
   finished migrating 3 total revisions; total change in store size: 0 bytes
   copying .XX_special_filename
+  copying hidden.roots
   copying phaseroots
   data fully migrated to temporary repository
   marking source repository as being upgraded; clients will be unable to read from repository
diff --git a/tests/test-verify.t b/tests/test-verify.t
--- a/tests/test-verify.t
+++ b/tests/test-verify.t
@@ -78,6 +78,7 @@ Entire changelog missing

   $ rm .hg/store/00changelog.*
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
    0: empty or missing changelog
    manifest@0: d0b6632564d4 not in changesets
    manifest@1: 941fc4534185 not in changesets
@@ -116,6 +117,7 @@ Entire changelog and manifest log missin
   $ rm .hg/store/00changelog.*
   $ rm .hg/store/00manifest.*
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
   warning: orphan revlog 'data/file.i'
   1 warnings encountered!
   $ cp -R .hg/store-full/. .hg/store
@@ -125,6 +127,7 @@ Entire changelog and filelog missing
   $ rm .hg/store/00changelog.*
   $ rm .hg/store/data/file.*
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
    0: empty or missing changelog
    manifest@0: d0b6632564d4 not in changesets
    manifest@1: 941fc4534185 not in changesets
@@ -158,6 +161,7 @@ Changelog missing entry

   $ cp -f .hg/store-partial/00changelog.* .hg/store
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
    manifest@?: rev 1 points to nonexistent changeset 1
    manifest@?: 941fc4534185 not in changesets
    file@?: rev 1 points to nonexistent changeset 1
@@ -193,6 +197,7 @@ Changelog and manifest log missing entry
   $ cp -f .hg/store-partial/00changelog.* .hg/store
   $ cp -f .hg/store-partial/00manifest.* .hg/store
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
    file@?: rev 1 points to nonexistent changeset 1
    (expected 0)
    file@?: c10f2164107d not in manifests
@@ -206,6 +211,7 @@ Changelog and filelog missing entry
   $ cp -f .hg/store-partial/00changelog.* .hg/store
   $ cp -f .hg/store-partial/data/file.* .hg/store/data
   $ hg verify -q
+  warning: ignoring unknown working parent c5ddb05ab828!
    manifest@?: rev 1 points to nonexistent changeset 1
    manifest@?: 941fc4534185 not in changesets
    file@?: manifest refers to unknown revision c10f2164107d
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH 2 of 6] transaction: use unfiltered changelog length for transaction start

Pierre-Yves David-2
In reply to this post by Durham Goode


On 05/18/2017 08:23 PM, Durham Goode wrote:

> # HG changeset patch
> # User Durham Goode <[hidden email]>
> # Date 1495129620 25200
> #      Thu May 18 10:47:00 2017 -0700
> # Node ID 35235b1cefb101dad0672a4c4ee9486fba8b935b
> # Parent  19be454ee16925d01da8f9d329cb48556083da1b
> transaction: use unfiltered changelog length for transaction start
>
> Previously we used the normal changelog length for the transaction start. This
> triggered the computehidden logic which in a future patch triggers logic in the
> new hidden store which may start a transaction. This is circular and results in
> broken transaction mechanics, so let's use the unfiltered repo instead to avoid
> triggering computehidden during transaction start.
>
> This seems like how it should've been in the first place anyway since recording
> the length of the filtered changelog seems odd for a transaction.
>
> This was causing test failures in a future patch, so test coverage will come
> from the future patch.

yes, writing filtered journal seems a bit odd. However I would rather
see the whole _writejournal run unfiltered.

There are a '@unfilteredmethod' decorator for this purpose.

To push things further. I wonder why we are not running the transaction
logic on an unfiltered repository, but I could see potential issue with
hooks running on an unfiltered repository.

I've run the testsuit with a unfiltered transaction and everything pass.
That might be from the lack of testing of the problematic cases.

> diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
> --- a/mercurial/localrepo.py
> +++ b/mercurial/localrepo.py
> @@ -1123,7 +1123,7 @@ class localrepository(object):
>          self.vfs.write("journal.branch",
>                            encoding.fromlocal(self.dirstate.branch()))
>          self.vfs.write("journal.desc",
> -                          "%d\n%s\n" % (len(self), desc))
> +                          "%d\n%s\n" % (len(self.unfiltered()), desc))
>          self.vfs.write("journal.bookmarks",
>                            self.vfs.tryread("bookmarks"))
>          self.svfs.write("journal.phaseroots",


--
Pierre-Yves David
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH 5 of 6] hidden: add hiddenset and updatevisibility logic

Pierre-Yves David-2
In reply to this post by Gregory Szorc
On 05/19/2017 05:25 AM, Gregory Szorc wrote:

> On Thu, May 18, 2017 at 11:23 AM, Durham Goode <[hidden email]
> <mailto:[hidden email]>> wrote:
>
>     # HG changeset patch
>     # User Durham Goode <[hidden email] <mailto:[hidden email]>>
>     # Date 1495129620 25200
>     #      Thu May 18 10:47:00 2017 -0700
>     # Node ID 0979ce9d40803103441b6df1ebefafe80bb9a057
>     # Parent  2aef3c4be6ba81e9d2453760515517df6fc2b816
>     hidden: add hiddenset and updatevisibility logic

TL;DR; This series seems to try to address multiple distinct problem.
Multiple of them having much simpler solution (some already available).
I think we should clarify our goals here. The result will probably be to
focus the effort on adding a simple "local-hidding" mechanism using the
existing hiding API.

--

[longer reply]

Having looked at the full series, I've the feeling that there are many
distinct topic that this new class tries to address. I've gathered the
following:

  * hidden API usable by extension,
  * speed improvement (using write time computation),
  * an API to get notified of relevant changes,
  * Local hiding solution,
  * unified storage for hiding.

There might be other I missed. Durham can you clarify your goals if needed?

Trying to tackle that many different problem at the same time worries
me. Addressing each of them independently should be simpler, in
addition, solving issue at a lower level bring benefit to other parts of
the code. For example, skipping obs-store loading just needed a straight
forward cache, easy to update iteratively. Having that cache in now
speeding up all operations that rely on the "obsolete-set" information,
and the cache logic can be reused for other related data.

My short opinion on all this topics are:

  * API: extensible API already exists so I'm a bit confused,
  * Speed: simpler solutions in review,
  * notification: seems redundant with 'tr.changes' and less robust.
  * local-hiding: Good idea, go for it (consider using ranges, not roots)
  * unifying storage: seems unnecessary and dangerous.

Longer opinion available below. I've also included technical comment in
the patch itself.

API for extensions
------------------

Quoting durham email:

> This has the nice property that all logic for propagating hiddenness is
> contained to one function, and all the consumers just have to report what
> commits they touched. If an external extension wants to modify how hiding is
> computed, it can just wrap this one function.

I'm a bit confused about that part of your description, because the
current implementation offer a small surface API already.

There are two functions "hideablerevs" and "_getdynamicblockers"[1].

* "hideablerevs": return the revision we would like to see hidden
   (eg: obsolete revision)

* "_getdynamicblockers": return revision that must be visible
   (eg: bookmarks)

Extensions can already overwrite these two functions and augment/alter
their return to get any wished behavior.

The one difference I see with your proposal is the fact it is two
functions, not one. We could have a single function returning the two
data if we think it is simpler.

So It is not clear to me what this new object improves this area. I'm
even afraid that the complexity of the new object will not helps here.

[1] writing this email made me send a patch to rename it "revealedrevs".

Speed
-----

The current approach for computing hidden can scale well. Actually, I've
two series on the list that speed it up down to 1ms order of magnitude.
The first one[2] is adding a small cache to retrieve the set of obsolete
changeset without loading the obsstore. The second one[3] is updating
the old computehidden code to only performs O(len(visible-mutable))
python operations.

If our goal is to provide local-only hiding, we can get there faster,
with simpler code, by simply feeding the "hideablerevs" function.

[2]
https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-May/098257.html 
(an equivalent is shipped within evolve for multiple weeks)

[3]
https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-May/098308.html

Notification API
----------------

There are a similar efforts already started around the
'transactions.changes' dictionary. It aims at tracking all changes
happening within a transaction. If we needs to track changes, I
recommend reusing that logic instead of duplicating the effort (adding
more odds for bugs and maintenance load).

The 'tr.changes' approach the low level function adding data recording
these changes. This is lower level than the approach in this patch and
will be more robust as We need to update the one function adding data,
instead of all the consumer of that function. We just fixed a bug from
this lack of collaboration last week[3], so it seems important to keep
it simple here.

[3] 99515353c72a

Local hiding solution
---------------------

I think this is the most important goal here. As pointed before, we
could focus on a dedicated storage and rely on existing compute-hidden
mechanism.

The rootset could be used for this. I recommend considering a storage
based on range instead of roots. It would be simpler to compute revs
from them.

unified storage for hiding
--------------------------

Given we can extract and compute hidden information from obsolescence in
the 1ms range, I don't see the advantage of unifying the storage. On the
contrary, hidden properties from obsolescence is a critical part of
changeset-evolution. Mixing the storage will makes it harder to
guarantee this property. So better keeping them separated.


>> This adds a function that can incrementally update the hidden state for a
>> repository when given a list of nodes that have been changed. This will be
>> called by all actions that can affect hiddenness (bookmarks, tags, obsmarkers,
>> working copy parents). It uses a hash of the inputs that affect the hidden store
>> to ensure things are up-to-date.
>>
>> This has the nice property that all logic for propagating hiddenness is
>> contained to one function, and all the consumers just have to report what
>> commits they touched. If an external extension wants to modify how hiding is
>> computed, it can just wrap this one function.
 >>
>> It also is nice because it updates the hidden state incrementally, instead of
>> recomputing the entire repo at once.

> Most comments inline. My biggest concerns so far in this series are
> around performance and cache semantics. You said in a separate email
> (and the code reinforces) that this file is a cache. Yet it isn't in
> .hg/cache nor does it have a repo requirements entry guarding it. The
> nearest analogue is the phaseroots file. But that's a canonical store
> (not a cache) and old clients don't care about it (like caches) so we
> don't need a requirements file. So there's a bit of work that needs to
> be done to turn this into a proper cache.
>
>
>
>     diff --git a/mercurial/hidden.py b/mercurial/hidden.py
>     --- a/mercurial/hidden.py
>     +++ b/mercurial/hidden.py
>     @@ -5,12 +5,38 @@
>
>      from __future__ import absolute_import
>
>     +import hashlib
>     +import heapq
>     +
>      from .i18n import _
>     -from .node import bin, hex
>     +from .node import bin, hex, nullid, nullrev
>      from . import (
>          error,
>     +    obsolete,
>     +    tags as tagsmod,
>     +    util,
>      )
>
>     +class lazychildset(object):
>     +    """Data structure for efficiently answering the query 'what are
>     my children'

Can you elaborate on the intended usage? Using children in Mercurial is
costly, so I tend to avoid algorithm that needs them. However, I'm
certain you know that already so I'm curious about your usage.


>     +    for a series of revisions."""
>     +    def __init__(self, repo):
>     +        self.repo = repo.unfiltered()
>     +        self._childmap = {}
>     +        self._minprocessed = len(repo.changelog)
>     +
>     +    def get(self, rev):
>     +        childmap = self._childmap
>     +        cl = self.repo.changelog
>     +        if rev < self._minprocessed:
>     +            for r in xrange(rev, self._minprocessed):
>     +                for p in cl.parentrevs(r):
>     +                    if p != nullrev:
>     +                        childmap.setdefault(p, []).append(r)
>     +            self._minprocessed = rev
>     +
>     +        return childmap.get(rev, ())
>     +
>      class rootset(object):
>          """A commit set like object where a commit being in the set
>     implies that all
>          descendants of that commit are also in the set.
>     @@ -135,3 +161,168 @@ class rootset(object):
>          def __len__(self):
>              self._load()
>              return len(self._cache)
>     +
>     +class hiddenset(rootset):
>     +    """A set representing what commits should be hidden. It
>     contains logic to
>     +    update itself based on what nodes are reported as having been
>     changed.
>     +    """
>     +    def __init__(self, repo, opener, name):
>     +        super(hiddenset, self).__init__(repo, opener, name)
>     +        self._load()
>     +
>     +    def _load(self):
>     +        if self.roots is None:
>     +            try:
>     +                cachehash = None
>     +                raw = self.opener.read(self.filename)
>     +                if len(raw) >= 20:
>     +                    cachehash = raw[:20]
>     +                    raw = raw[20:]
>     +                else:
>     +                    raise RuntimeError("invalid cache")
>
>
> RuntimeError feels like the wrong exception here since lots of Python
> code can raise it and we don't want to be catching and swallowing
> unrelated errors. Can you invent a dummy type for this or validate the
> exception in the except block?
>
>
>     +
>     +                realhash = self._gethash()
>
>
> _gethash() calls _gethiderevs() which queries a bunch of things. I'm
> worried it is doing too much for a perf critical read path.
> Specifically, the call to obsolete.getrevs(repo, 'obsolete') requires
> loading the obsstore. I thought a point of this work was alleviate
> scaling problems of the obsstore? Note that since the obsstore is append
> only, perhaps we only need to store and compare the size of the obsstore
> file for hash verification?

I've a series on review that adds a good cache key for the obsstore. It
use both the size and some of the content.

https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-May/098257.html

Note that the same series also allows to bypass the obsstore to retrieve
the 'obsolete-set'. The "official" cachekey probably still better.

> Another concern is loading tags. I'd have to look at the code, but if
> this needs to resolve .hgtags, that's not great for perf.

The code only access local tags, so not '.hgtags' resolution here.

> Related to
> that, presumably this will get hooked into the visible repoview filter
> someday. Ideally, we shouldn't have to load tags, bookmarks, etc on the
> critical path of loading the default repoview.

Running with my two speed up enabled, I don't see a large impact of
local tags and bookmarks (but I've few of these). If I'm not mistaken,
parsing of these file are still done in Python so we have rooms for easy
speed up if needed.

> While I'm here, could v2 please include a large comment block in this
> file about the caching strategy? That really helps us re-evaluate the
> roles of caches in the future. See tags.py for inspiration.

Yes please!

>     +                if cachehash == realhash:
>     +                    self.roots = self._deserialize(raw)
>     +                else:
>     +                    raise RuntimeError("cache hash mismatch")
>     +            except (IOError, RuntimeError):
>     +                # Rebuild the hidden roots from scratch
>     +                self.roots = set()
>     +                self._cache = set()
>     +                revs = self.repo.revs('not public()')
>     +                self.updatevisibility(revs)
>
>
> Calling self.updatevisibility() here is problematic because
> updatevisibility() takes out a lock unconditionally and repo.hidden
> could be called by a process that doesn't have write permissions. See
> tags.py for how cache files should handle permissions errors. You'll
> also want a test for this scenario. There's one for the tags fnodes cache.
>
> Also, this pattern of cache I/O complete with hash verification is
> common enough and has been the source of enough bugs over the years that
> I think it is time to formalize the pattern (possibly at the vfs layer)
> in order to cut down on bugs...

I'm curious about your (greg) idea here, There are always some worries
about this lurking somewhere in my head.

> Finally, since this is a cache, why is it in .hg/store instead of .hg/cache?
>
>     +            self._fillcache()
>     +            self.dirty = False
>     +
>     +    def _write(self, fp):
>     +        fp.write(self._gethash())
>     +        fp.write(self._serialize())
>     +        self.dirty = False
>     +
>     +    def _gethash(self):
>     +        hiderevs, nohiderevs = self._gethiderevs()
>     +        cl = self.repo.changelog
>     +        hash = hashlib.sha1()
>     +        hash.update("hide")
>     +        hash.update(''.join(sorted(cl.node(r) for r in hiderevs)))
>     +        hash.update("nohide")
>     +        hash.update(''.join(sorted(cl.node(r) for r in nohiderevs)))
>     +
>     +        return hash.digest()
>     +
>     +    def _gethiderevs(self):
>     +        """Returns the set of revs that potentially could be hidden
>     and a set of
>     +        revs that definitely should not be hidden."""
>     +        hiderevs = set()
>     +        nohiderevs = set()
>     +
>     +        repo = self.repo
>     +        cl = repo.changelog
>     +
>     +        # Hide obsolete commits
>     +        hiderevs.update(obsolete.getrevs(repo, 'obsolete'))
>     +
>     +        # Don't hide bookmarks
>     +        nohiderevs.update(cl.rev(node) for name, node in
>     +                          repo._bookmarks.iteritems())
>     +
>     +        # Don't hide workingcopy parents
>     +        nohiderevs.update(cl.rev(node) for node in
>     repo.dirstate.parents()
>     +                          if node != nullid and node in cl.nodemap)
>     +
>     +        # Don't hide local tags
>     +        tags = {}
>     +        tagsmod.readlocaltags(repo.ui, repo, tags, {})
>     +        nohiderevs.update(cl.rev(t[0]) for t in tags.values()
>     +                          if t[0] in cl.nodemap)
>     +
>     +        # Don't hide rebase commits
>     +        if util.safehasattr(repo, '_rebaseset'):
>     +            nohiderevs.update(repo._rebaseset)
>     +
>     +        return hiderevs, nohiderevs
>     +
>     +    def _shouldhide(self, rev, childmap, hiderevs, nohiderevs):
>     +        repo = self.repo
>     +        ispublic = repo._phasecache.phase(repo, rev) == 0
>
>
> Nit: use constant in phases module instead of 0
>
>
>     +        cl = repo.changelog
>     +        return (not ispublic and
>     +                rev in hiderevs and
>     +                rev not in nohiderevs and
>     +                all(cl.node(r) in self for r in childmap.get(rev)))

Having comment on the various repository method would help to understand
their intention.

For example, this function usage and behavior is not too clear to me (I
get a vague sense). Can you clarify what is this function doing?

>     +
>     +    def _needupdate(self, revs):
>     +        repo = self.repo
>     +        cl = repo.changelog
>     +
>     +        hiderevs, nohiderevs = self._gethiderevs()
>     +
>     +        childmap = lazychildset(repo)
>     +        for rev in revs:
>     +            node = cl.node(rev)
>     +            # If the node should be visible...
>     +            if not self._shouldhide(rev, childmap, hiderevs, nohiderevs):
>
> Resolving repo.changelog on every invocation inside _shouldhide() can
> add overhead. I wonder if _shouldhide() should accept an iterable of
> revs and return a dict...

+1, on reducing the number of function call and attribute access.
Overall, I'm a bit scared about the complexity and the amount of various
roundtrip of this class. I'm a bit afraid it will need large amount of
polish to be fast enough.

>     +                # ...but it's hidden
>     +                if node in self:
>     +                    return True
>     +            # Otherwise, the node should be hidden
>     +            elif node not in self:
>     +                return True
>     +
>     +        return False
>     +
>     +    def updatevisibility(self, revs):
>     +        """Updates the visibility of the given revs, and propagates
>     those
>     +        updates to the rev parents as necessary."""
>     +        self._load()
>     +
>     +        repo = self.repo
>     +        seen = set(revs)
>     +        if not seen or not self._needupdate(seen):
>     +            # Even if there are no changes, we need to rewrite the
>     file so the
>     +            # cache hash is up to date with the latest changes.
>     +            with self.opener(self.filename, 'w+', atomictemp=True)
>     as fp:
>     +                self._write(fp)
>
>
> This being a cache, it needs to handle I/O failures gracefully if the
> user doesn't have permissions.
>
>
>     +            return
>     +
>     +        # Sort the revs so we can process them in topo order
>     +        heap = []
>     +        for rev in seen:
>     +            heapq.heappush(heap, -rev)
>     +
>     +        with repo.lock():
>     +            with repo.transaction('hidden') as tr:
>
>
> Do we need a transaction here? We don't use transactions for other caches.
>
>
>     +                hiderevs, nohiderevs = self._gethiderevs()
>     +
>     +                cl = repo.changelog
>     +                childmap = lazychildset(repo)
>     +                while heap:
>     +                    rev = -heapq.heappop(heap)
>     +
>     +                    # If it should be visible...
>     +                    node = cl.node(rev)
>     +                    if not self._shouldhide(rev, childmap, hiderevs,
>     +                                            nohiderevs):
>
>
> Another repo.changelog perf issue lurking in _shouldhide() inside a loop.
>
>
>     +                        # And it's currently invisible...
>     +                        if node in self:
>     +                            # Make it visible and propagate
>     +                            # (propogate first, since otherwise we
>     can't access
>     +                            # the parents)
>     +                            for parent in cl.parentrevs(rev):
>     +                                if parent != nullrev and parent not
>     in seen:
>     +                                    heapq.heappush(heap, -parent)
>     +                                    seen.add(parent)
>     +                            self.set(tr, cl.node(rev), False,
>     +                                     childmap=childmap)
>     +
>     +                    # If it should not be visible, and it's not
>     already hidden..
>     +                    elif node not in self:
>     +                        # Hide it and propagate (propogate first, since
>     +                        # otherwise we can't access the parents)
>     +                        for parent in cl.parentrevs(rev):
>     +                            if parent != nullrev and parent not in
>     seen:
>     +                                heapq.heappush(heap, -parent)
>     +                                seen.add(parent)
>     +                        self.set(tr, node, True, childmap=childmap)
>     diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
>     --- a/mercurial/localrepo.py
>     +++ b/mercurial/localrepo.py
>     @@ -388,7 +388,7 @@ class localrepository(object):
>
>          @storecache('hidden.roots')
>          def hidden(self):
>     -        return hidden.rootset(self, self.svfs, 'hidden')
>     +        return hidden.hiddenset(self, self.svfs, 'hidden')
>
>          def close(self):
>              self._writecaches()
>     @@ -1384,6 +1384,10 @@ class localrepository(object):
>              l = self._lock(self.svfs, "lock", wait, None,
>                             self.invalidate, _('repository %s') %
>     self.origroot)
>              self._lockref = weakref.ref(l)
>     +        # Force the hidden cache to load, so it can validate it's
>     cache hash
>     +        # against the unmodified bookmark, obsolete, tags, and
>     working copy
>     +        # states.
>     +        self.hidden
>              return l
>
>          def _wlockchecktransaction(self):
>     diff --git a/tests/test-empty.t b/tests/test-empty.t
>     --- a/tests/test-empty.t
>     +++ b/tests/test-empty.t
>     @@ -26,6 +26,7 @@ Check the basic files created:
>      Should be empty:
>
>        $ ls .hg/store
>     +  hidden.roots
>
>      Poke at a clone:
>
>     @@ -49,5 +50,6 @@ Poke at a clone:
>      Should be empty:
>
>        $ ls .hg/store
>     +  hidden.roots
>
>        $ cd ..
>     diff --git a/tests/test-fncache.t b/tests/test-fncache.t
>     --- a/tests/test-fncache.t
>     +++ b/tests/test-fncache.t
>     @@ -92,6 +92,7 @@ Non store repo:
>        .hg/data/tst.d.hg
>        .hg/data/tst.d.hg/foo.i
>        .hg/dirstate
>     +  .hg/hidden.roots
>        .hg/last-message.txt
>        .hg/phaseroots
>        .hg/requires
>     @@ -129,6 +130,7 @@ Non fncache repo:
>        .hg/store/data
>        .hg/store/data/tst.d.hg
>        .hg/store/data/tst.d.hg/_foo.i
>     +  .hg/store/hidden.roots
>        .hg/store/phaseroots
>        .hg/store/undo
>        .hg/store/undo.backupfiles
>     @@ -313,6 +315,7 @@ Aborted transactions can be recovered la
>        data/z.i
>        $ hg recover
>        rolling back interrupted transaction
>     +  warning: ignoring unknown working parent 4b0f1917ccc5!
>
>
> What's this about? Accessing repo.dirstate as part of cache validation?
>
>
>        checking changesets
>        checking manifests
>        crosschecking files in changesets and manifests
>     diff --git a/tests/test-hardlinks.t b/tests/test-hardlinks.t
>     --- a/tests/test-hardlinks.t
>     +++ b/tests/test-hardlinks.t
>     @@ -48,6 +48,7 @@ Prepare repo r1:
>        1 r1/.hg/store/data/d1/f2.i
>        1 r1/.hg/store/data/f1.i
>        1 r1/.hg/store/fncache
>     +  1 r1/.hg/store/hidden.roots
>        1 r1/.hg/store/phaseroots
>        1 r1/.hg/store/undo
>        1 r1/.hg/store/undo.backup.fncache
>     @@ -87,6 +88,7 @@ Repos r1 and r2 should now contain hardl
>        2 r1/.hg/store/data/d1/f2.i
>        2 r1/.hg/store/data/f1.i
>        2 r1/.hg/store/fncache
>     +  1 r1/.hg/store/hidden.roots
>        1 r1/.hg/store/phaseroots
>        1 r1/.hg/store/undo
>        1 r1/.hg/store/undo.backup.fncache
>     @@ -99,6 +101,7 @@ Repos r1 and r2 should now contain hardl
>        2 r2/.hg/store/data/d1/f2.i
>        2 r2/.hg/store/data/f1.i
>        2 r2/.hg/store/fncache
>     +  1 r2/.hg/store/hidden.roots
>
>      Repo r3 should not be hardlinked:
>
>     @@ -108,6 +111,7 @@ Repo r3 should not be hardlinked:
>        1 r3/.hg/store/data/d1/f2.i
>        1 r3/.hg/store/data/f1.i
>        1 r3/.hg/store/fncache
>     +  1 r3/.hg/store/hidden.roots
>        1 r3/.hg/store/phaseroots
>        1 r3/.hg/store/undo
>        1 r3/.hg/store/undo.backupfiles
>     @@ -134,6 +138,7 @@ Create a non-inlined filelog in r3:
>        1 r3/.hg/store/data/d1/f2.i
>        1 r3/.hg/store/data/f1.i
>        1 r3/.hg/store/fncache
>     +  1 r3/.hg/store/hidden.roots
>        1 r3/.hg/store/phaseroots
>        1 r3/.hg/store/undo
>        1 r3/.hg/store/undo.backup.fncache
>     @@ -167,6 +172,7 @@ Push to repo r1 should break up most har
>        1 r2/.hg/store/data/d1/f2.i
>        2 r2/.hg/store/data/f1.i
>        [12] r2/\.hg/store/fncache (re)
>     +  1 r2/.hg/store/hidden.roots
>
>      #if hardlink-whitelisted
>        $ nlinksdir r2/.hg/store/fncache
>     @@ -197,6 +203,7 @@ Committing a change to f1 in r1 must bre
>        1 r2/.hg/store/data/d1/f2.i
>        1 r2/.hg/store/data/f1.i
>        [12] r2/\.hg/store/fncache (re)
>     +  1 r2/.hg/store/hidden.roots
>
>      #if hardlink-whitelisted
>        $ nlinksdir r2/.hg/store/fncache
>     @@ -245,6 +252,7 @@ r4 has hardlinks in the working dir (not
>        2 r4/.hg/store/data/d1/f2.i
>        2 r4/.hg/store/data/f1.i
>        2 r4/.hg/store/fncache
>     +  2 r4/.hg/store/hidden.roots
>        2 r4/.hg/store/phaseroots
>        2 r4/.hg/store/undo
>        2 r4/.hg/store/undo.backup.fncache
>     @@ -294,6 +302,7 @@ Update back to revision 11 in r4 should
>        2 r4/.hg/store/data/d1/f2.i
>        2 r4/.hg/store/data/f1.i
>        2 r4/.hg/store/fncache
>     +  2 r4/.hg/store/hidden.roots
>        2 r4/.hg/store/phaseroots
>        2 r4/.hg/store/undo
>        2 r4/.hg/store/undo.backup.fncache
>     diff --git a/tests/test-hook.t b/tests/test-hook.t
>     --- a/tests/test-hook.t
>     +++ b/tests/test-hook.t
>     @@ -190,6 +190,7 @@ more there after
>        00manifest.i
>        data
>        fncache
>     +  hidden.roots
>        journal.phaseroots
>        phaseroots
>        undo
>     diff --git a/tests/test-inherit-mode.t b/tests/test-inherit-mode.t
>     --- a/tests/test-inherit-mode.t
>     +++ b/tests/test-inherit-mode.t
>     @@ -79,6 +79,7 @@ new directories are setgid
>        00660 ./.hg/store/data/dir/bar.i
>        00660 ./.hg/store/data/foo.i
>        00660 ./.hg/store/fncache
>     +  00660 ./.hg/store/hidden.roots
>        00660 ./.hg/store/phaseroots
>        00660 ./.hg/store/undo
>        00660 ./.hg/store/undo.backupfiles
>     @@ -125,6 +126,7 @@ group can still write everything
>        00660 ../push/.hg/store/data/dir/bar.i
>        00660 ../push/.hg/store/data/foo.i
>        00660 ../push/.hg/store/fncache
>     +  00660 ../push/.hg/store/hidden.roots
>        00660 ../push/.hg/store/undo
>        00660 ../push/.hg/store/undo.backupfiles
>        00660 ../push/.hg/store/undo.phaseroots
>     diff --git a/tests/test-upgrade-repo.t b/tests/test-upgrade-repo.t
>     --- a/tests/test-upgrade-repo.t
>     +++ b/tests/test-upgrade-repo.t
>     @@ -195,6 +195,7 @@ Upgrading a repository that is already m
>        repository locked and read-only
>        creating temporary repository to stage migrated data:
>     $TESTTMP/modern/.hg/upgrade.* (glob)
>        (it is safe to interrupt this process any time before data
>     migration completes)
>     +  copying hidden.roots
>        data fully migrated to temporary repository
>        marking source repository as being upgraded; clients will be
>     unable to read from repository
>        starting in-place swap of repository data
>     @@ -241,6 +242,7 @@ Upgrading a repository to generaldelta w
>        migrating changelog containing 3 revisions (184 bytes in store;
>     181 bytes tracked data)
>        finished migrating 3 changelog revisions; change in size: 0 bytes
>        finished migrating 9 total revisions; total change in store size:
>     0 bytes
>     +  copying hidden.roots
>        copying phaseroots
>        data fully migrated to temporary repository
>        marking source repository as being upgraded; clients will be
>     unable to read from repository
>     @@ -277,6 +279,7 @@ store directory has files we expect
>        00manifest.i
>        data
>        fncache
>     +  hidden.roots
>        phaseroots
>        undo
>        undo.backupfiles
>     @@ -303,6 +306,7 @@ old store should be backed up
>        00manifest.i
>        data
>        fncache
>     +  hidden.roots
>        phaseroots
>        undo
>        undo.backup.fncache
>     @@ -339,6 +343,7 @@ store files with special filenames aren'
>        finished migrating 1 changelog revisions; change in size: 0 bytes
>        finished migrating 3 total revisions; total change in store size:
>     0 bytes
>        copying .XX_special_filename
>     +  copying hidden.roots
>        copying phaseroots
>        data fully migrated to temporary repository
>        marking source repository as being upgraded; clients will be
>     unable to read from repository
>     diff --git a/tests/test-verify.t b/tests/test-verify.t
>     --- a/tests/test-verify.t
>     +++ b/tests/test-verify.t
>     @@ -78,6 +78,7 @@ Entire changelog missing
>
>        $ rm .hg/store/00changelog.*
>        $ hg verify -q
>     +  warning: ignoring unknown working parent c5ddb05ab828!
>         0: empty or missing changelog
>         manifest@0: d0b6632564d4 not in changesets
>         manifest@1: 941fc4534185 not in changesets
>     @@ -116,6 +117,7 @@ Entire changelog and manifest log missin
>        $ rm .hg/store/00changelog.*
>        $ rm .hg/store/00manifest.*
>        $ hg verify -q
>     +  warning: ignoring unknown working parent c5ddb05ab828!
>        warning: orphan revlog 'data/file.i'
>        1 warnings encountered!
>        $ cp -R .hg/store-full/. .hg/store
>     @@ -125,6 +127,7 @@ Entire changelog and filelog missing
>        $ rm .hg/store/00changelog.*
>        $ rm .hg/store/data/file.*
>        $ hg verify -q
>     +  warning: ignoring unknown working parent c5ddb05ab828!
>         0: empty or missing changelog
>         manifest@0: d0b6632564d4 not in changesets
>         manifest@1: 941fc4534185 not in changesets
>     @@ -158,6 +161,7 @@ Changelog missing entry
>
>        $ cp -f .hg/store-partial/00changelog.* .hg/store
>        $ hg verify -q
>     +  warning: ignoring unknown working parent c5ddb05ab828!
>         manifest@?: rev 1 points to nonexistent changeset 1performant
>         manifest@?: 941fc4534185 not in changesets
>         file@?: rev 1 points to nonexistent changeset 1
>     @@ -193,6 +197,7 @@ Changelog and manifest log missing entry
>        $ cp -f .hg/store-partial/00changelog.* .hg/store
>        $ cp -f .hg/store-partial/00manifest.* .hg/store
>        $ hg verify -q
>     +  warning: ignoring unknown working parent c5ddb05ab828!
>         file@?: rev 1 points to nonexistent changeset 1
>         (expected 0)
>         file@?: c10f2164107d not in manifests
>     @@ -206,6 +211,7 @@ Changelog and filelog missing entry
>        $ cp -f .hg/store-partial/00changelog.* .hg/store
>        $ cp -f .hg/store-partial/data/file.* .hg/store/data
>        $ hg verify -q
>     +  warning: ignoring unknown working parent c5ddb05ab828!
>         manifest@?: rev 1 points to nonexistent changeset 1
>         manifest@?: 941fc4534185 not in changesets
>         file@?: manifest refers to unknown revision c10f2164107d
>     _______________________________________________
>     Mercurial-devel mailing list
>     [hidden email]
>     <mailto:[hidden email]>
>     https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>     <https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel>
>
>
>
>
> _______________________________________________
> Mercurial-devel mailing list
> [hidden email]
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>

--
Pierre-Yves David
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH 5 of 6] hidden: add hiddenset and updatevisibility logic

Durham Goode
On 5/22/17 2:38 AM, Pierre-Yves David wrote:

> On 05/19/2017 05:25 AM, Gregory Szorc wrote:
>> On Thu, May 18, 2017 at 11:23 AM, Durham Goode <[hidden email]
>> <mailto:[hidden email]>> wrote:
>>
>>     # HG changeset patch
>>     # User Durham Goode <[hidden email] <mailto:[hidden email]>>
>>     # Date 1495129620 25200
>>     #      Thu May 18 10:47:00 2017 -0700
>>     # Node ID 0979ce9d40803103441b6df1ebefafe80bb9a057
>>     # Parent  2aef3c4be6ba81e9d2453760515517df6fc2b816
>>     hidden: add hiddenset and updatevisibility logic
>
> TL;DR; This series seems to try to address multiple distinct problem.
> Multiple of them having much simpler solution (some already available).
> I think we should clarify our goals here. The result will probably be to
> focus the effort on adding a simple "local-hidding" mechanism using the
> existing hiding API.
>
> --
>
> [longer reply]
>
> Having looked at the full series, I've the feeling that there are many
> distinct topic that this new class tries to address. I've gathered the
> following:
>
>  * hidden API usable by extension,
>  * speed improvement (using write time computation),
>  * an API to get notified of relevant changes,
>  * Local hiding solution,
>  * unified storage for hiding.
>
> There might be other I missed. Durham can you clarify your goals if needed?

The overall goal here is to refactor hiding to be in an isolated part of
the code base, so we can reason about it and iterate on it more easily.
All the goals you list become easier to implement if we have a good
abstraction for hiding.

Part of the internal goal of this change is to put pieces in core that
would let us do hash preservation within Facebook, so we could get rid
of the inhibit extension.

> Trying to tackle that many different problem at the same time worries
> me. Addressing each of them independently should be simpler, in
> addition, solving issue at a lower level bring benefit to other parts of
> the code. For example, skipping obs-store loading just needed a straight
> forward cache, easy to update iteratively. Having that cache in now
> speeding up all operations that rely on the "obsolete-set" information,
> and the cache logic can be reused for other related data.

> My short opinion on all this topics are:
>
>  * API: extensible API already exists so I'm a bit confused,
>  * Speed: simpler solutions in review,
>  * notification: seems redundant with 'tr.changes' and less robust.
>  * local-hiding: Good idea, go for it (consider using ranges, not roots)
>  * unifying storage: seems unnecessary and dangerous.
>
> Longer opinion available below. I've also included technical comment in
> the patch itself.
>
> API for extensions
> ------------------
>
> Quoting durham email:
>
>> This has the nice property that all logic for propagating hiddenness is
>> contained to one function, and all the consumers just have to report what
>> commits they touched. If an external extension wants to modify how
>> hiding is
>> computed, it can just wrap this one function.
>
> I'm a bit confused about that part of your description, because the
> current implementation offer a small surface API already.
>
> There are two functions "hideablerevs" and "_getdynamicblockers"[1].
>
> * "hideablerevs": return the revision we would like to see hidden
>   (eg: obsolete revision)
>
> * "_getdynamicblockers": return revision that must be visible
>   (eg: bookmarks)
>
> Extensions can already overwrite these two functions and augment/alter
> their return to get any wished behavior.

The piece that is missing from the current API is infrastructure to
listen for changes that affect hiding at write time. This includes being
notified of a change, as well as having apis available to incrementally
update hiding during that change and a pluggable place to store the new
hidden computation results.

This series doesn't remove computehidden() as an extension point, it
just provides the ability to hook into hiding at more locations (write
time and storage time, in addition to the existing read time).

> Speed
> -----
>
> The current approach for computing hidden can scale well. Actually, I've
> two series on the list that speed it up down to 1ms order of magnitude.
> The first one[2] is adding a small cache to retrieve the set of obsolete
> changeset without loading the obsstore. The second one[3] is updating
> the old computehidden code to only performs O(len(visible-mutable))
> python operations.
>
> If our goal is to provide local-only hiding, we can get there faster,
> with simpler code, by simply feeding the "hideablerevs" function.
>

I eventually want every read operation to be O(number of changes you're
asking about).  If I run 'hg log -r .' and we go perform ancestor
queries on commits that are two months old, then that's too slow. We're
at the point where we're beginning to think about how to avoid loading
even parts of the changelog .i during standard read operations.

This series doesn't get us to O(1) hidden checks, but it puts in place
the hidden abstraction that would let us eventually get there. I don't
think we should have to solve O(1) lookups for all the inputs to hidden
before we can get O(1) hidden lookup time.

> Notification API
> ----------------
>
> There are a similar efforts already started around the
> 'transactions.changes' dictionary. It aims at tracking all changes
> happening within a transaction. If we needs to track changes, I
> recommend reusing that logic instead of duplicating the effort (adding
> more odds for bugs and maintenance load).
>
> The 'tr.changes' approach the low level function adding data recording
> these changes. This is lower level than the approach in this patch and
> will be more robust as We need to update the one function adding data,
> instead of all the consumer of that function. We just fixed a bug from
> this lack of collaboration last week[3], so it seems important to keep
> it simple here.

tr.changes might be the appropriate spot for this. I can definitely look
into that as a replacement for hidden.updatevisibility().

> Local hiding solution
> ---------------------
>
> I think this is the most important goal here. As pointed before, we
> could focus on a dedicated storage and rely on existing compute-hidden
> mechanism.

While local hiding is an important goal, I think it's somewhat
tangential to this series.  Local hiding could be done in the way you
propose, and could be done using my new apis as well. This series just
gives us some more options for when we get to that point.

> unified storage for hiding
> --------------------------
>
> Given we can extract and compute hidden information from obsolescence in
> the 1ms range, I don't see the advantage of unifying the storage. On the
> contrary, hidden properties from obsolescence is a critical part of
> changeset-evolution. Mixing the storage will makes it harder to
> guarantee this property. So better keeping them separated.

As we've seen, there are differing opinions on the future of hiding
commits in Mercurial and this patch opens doors to some options without
breaking evolve. Having this conceptual distinction in core will help
give people more insight into and confidence in the various options
around hiding so we can have a more productive discussion next time.
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH 5 of 6] hidden: add hiddenset and updatevisibility logic

Durham Goode
In reply to this post by Gregory Szorc
On 5/18/17 8:25 PM, Gregory Szorc wrote:

> On Thu, May 18, 2017 at 11:23 AM, Durham Goode <[hidden email]
> <mailto:[hidden email]>> wrote:
>
>     # HG changeset patch
>     # User Durham Goode <[hidden email] <mailto:[hidden email]>>
>     # Date 1495129620 25200
>     #      Thu May 18 10:47:00 2017 -0700
>     # Node ID 0979ce9d40803103441b6df1ebefafe80bb9a057
>     # Parent  2aef3c4be6ba81e9d2453760515517df6fc2b816
>     hidden: add hiddenset and updatevisibility logic
>
>     This adds a function that can incrementally update the hidden state
>     for a
>     repository when given a list of nodes that have been changed. This
>     will be
>     called by all actions that can affect hiddenness (bookmarks, tags,
>     obsmarkers,
>     working copy parents). It uses a hash of the inputs that affect the
>     hidden store
>     to ensure things are up-to-date.
>
>     This has the nice property that all logic for propagating hiddenness is
>     contained to one function, and all the consumers just have to report
>     what
>     commits they touched. If an external extension wants to modify how
>     hiding is
>     computed, it can just wrap this one function.
>
>     It also is nice because it updates the hidden state incrementally,
>     instead of
>     recomputing the entire repo at once.
>
>
> Most comments inline. My biggest concerns so far in this series are
> around performance and cache semantics. You said in a separate email
> (and the code reinforces) that this file is a cache. Yet it isn't in
> .hg/cache nor does it have a repo requirements entry guarding it. The
> nearest analogue is the phaseroots file. But that's a canonical store
> (not a cache) and old clients don't care about it (like caches) so we
> don't need a requirements file. So there's a bit of work that needs to
> be done to turn this into a proper cache.

My long term hopes were that it would stop being a cache, but you are
right. For the interim it should be in .hg/cache.

>     diff --git a/mercurial/hidden.py b/mercurial/hidden.py
>     --- a/mercurial/hidden.py
>     +++ b/mercurial/hidden.py
>     @@ -5,12 +5,38 @@
>
>      from __future__ import absolute_import
>
>     +import hashlib
>     +import heapq
>     +
>      from .i18n import _
>     -from .node import bin, hex
>     +from .node import bin, hex, nullid, nullrev
>      from . import (
>          error,
>     +    obsolete,
>     +    tags as tagsmod,
>     +    util,
>      )
>
>     +class lazychildset(object):
>     +    """Data structure for efficiently answering the query 'what are
>     my children'
>     +    for a series of revisions."""
>     +    def __init__(self, repo):
>     +        self.repo = repo.unfiltered()
>     +        self._childmap = {}
>     +        self._minprocessed = len(repo.changelog)
>     +
>     +    def get(self, rev):
>     +        childmap = self._childmap
>     +        cl = self.repo.changelog
>     +        if rev < self._minprocessed:
>     +            for r in xrange(rev, self._minprocessed):
>     +                for p in cl.parentrevs(r):
>     +                    if p != nullrev:
>     +                        childmap.setdefault(p, []).append(r)
>     +            self._minprocessed = rev
>     +
>     +        return childmap.get(rev, ())
>     +
>      class rootset(object):
>          """A commit set like object where a commit being in the set
>     implies that all
>          descendants of that commit are also in the set.
>     @@ -135,3 +161,168 @@ class rootset(object):
>          def __len__(self):
>              self._load()
>              return len(self._cache)
>     +
>     +class hiddenset(rootset):
>     +    """A set representing what commits should be hidden. It
>     contains logic to
>     +    update itself based on what nodes are reported as having been
>     changed.
>     +    """
>     +    def __init__(self, repo, opener, name):
>     +        super(hiddenset, self).__init__(repo, opener, name)
>     +        self._load()
>     +
>     +    def _load(self):
>     +        if self.roots is None:
>     +            try:
>     +                cachehash = None
>     +                raw = self.opener.read(self.filename)
>     +                if len(raw) >= 20:
>     +                    cachehash = raw[:20]
>     +                    raw = raw[20:]
>     +                else:
>     +                    raise RuntimeError("invalid cache")
>
>
> RuntimeError feels like the wrong exception here since lots of Python
> code can raise it and we don't want to be catching and swallowing
> unrelated errors. Can you invent a dummy type for this or validate the
> exception in the except block?
>
>
>     +
>     +                realhash = self._gethash()
>
>
> _gethash() calls _gethiderevs() which queries a bunch of things. I'm
> worried it is doing too much for a perf critical read path.
> Specifically, the call to obsolete.getrevs(repo, 'obsolete') requires
> loading the obsstore. I thought a point of this work was alleviate
> scaling problems of the obsstore? Note that since the obsstore is append
> only, perhaps we only need to store and compare the size of the obsstore
> file for hash verification?

The first step is to get all the scaling problems into one area (in this
case _gethash), then we can iterate on why _gethash is slow, like using
the obstore's new hash function.

> Another concern is loading tags. I'd have to look at the code, but if
> this needs to resolve .hgtags, that's not great for perf. Related to
> that, presumably this will get hooked into the visible repoview filter
> someday. Ideally, we shouldn't have to load tags, bookmarks, etc on the
> critical path of loading the default repoview.

Pierre-Yves addressed this, but this is only local tags and therefore
doesn't read .hgtags.

> While I'm here, could v2 please include a large comment block in this
> file about the caching strategy? That really helps us re-evaluate the
> roles of caches in the future. See tags.py for inspiration.
>
>
>     +                if cachehash == realhash:
>     +                    self.roots = self._deserialize(raw)
>     +                else:
>     +                    raise RuntimeError("cache hash mismatch")
>     +            except (IOError, RuntimeError):
>     +                # Rebuild the hidden roots from scratch
>     +                self.roots = set()
>     +                self._cache = set()
>     +                revs = self.repo.revs('not public()')
>     +                self.updatevisibility(revs)
>
>
> Calling self.updatevisibility() here is problematic because
> updatevisibility() takes out a lock unconditionally and repo.hidden
> could be called by a process that doesn't have write permissions. See
> tags.py for how cache files should handle permissions errors. You'll
> also want a test for this scenario. There's one for the tags fnodes cache.

True. I can make it gracefully handle permission issues. I'll have to
think about if there's an abstraction that would work here.

> Also, this pattern of cache I/O complete with hash verification is
> common enough and has been the source of enough bugs over the years that
> I think it is time to formalize the pattern (possibly at the vfs layer)
> in order to cut down on bugs...
>
> Finally, since this is a cache, why is it in .hg/store instead of .hg/cache?
>
>
>
>     +
>     +            self._fillcache()
>     +            self.dirty = False
>     +
>     +    def _write(self, fp):
>     +        fp.write(self._gethash())
>     +        fp.write(self._serialize())
>     +        self.dirty = False
>     +
>     +    def _gethash(self):
>     +        hiderevs, nohiderevs = self._gethiderevs()
>     +        cl = self.repo.changelog
>     +        hash = hashlib.sha1()
>     +        hash.update("hide")
>     +        hash.update(''.join(sorted(cl.node(r) for r in hiderevs)))
>     +        hash.update("nohide")
>     +        hash.update(''.join(sorted(cl.node(r) for r in nohiderevs)))
>     +
>     +        return hash.digest()
>     +
>     +    def _gethiderevs(self):
>     +        """Returns the set of revs that potentially could be hidden
>     and a set of
>     +        revs that definitely should not be hidden."""
>     +        hiderevs = set()
>     +        nohiderevs = set()
>     +
>     +        repo = self.repo
>     +        cl = repo.changelog
>     +
>     +        # Hide obsolete commits
>     +        hiderevs.update(obsolete.getrevs(repo, 'obsolete'))
>     +
>     +        # Don't hide bookmarks
>     +        nohiderevs.update(cl.rev(node) for name, node in
>     +                          repo._bookmarks.iteritems())
>     +
>     +        # Don't hide workingcopy parents
>     +        nohiderevs.update(cl.rev(node) for node in
>     repo.dirstate.parents()
>     +                          if node != nullid and node in cl.nodemap)
>     +
>     +        # Don't hide local tags
>     +        tags = {}
>     +        tagsmod.readlocaltags(repo.ui, repo, tags, {})
>     +        nohiderevs.update(cl.rev(t[0]) for t in tags.values()
>     +                          if t[0] in cl.nodemap)
>     +
>     +        # Don't hide rebase commits
>     +        if util.safehasattr(repo, '_rebaseset'):
>     +            nohiderevs.update(repo._rebaseset)
>     +
>     +        return hiderevs, nohiderevs
>     +
>     +    def _shouldhide(self, rev, childmap, hiderevs, nohiderevs):
>     +        repo = self.repo
>     +        ispublic = repo._phasecache.phase(repo, rev) == 0
>
>
> Nit: use constant in phases module instead of 0
>
>
>     +        cl = repo.changelog
>     +        return (not ispublic and
>     +                rev in hiderevs and
>     +                rev not in nohiderevs and
>     +                all(cl.node(r) in self for r in childmap.get(rev)))
>     +
>     +    def _needupdate(self, revs):
>     +        repo = self.repo
>     +        cl = repo.changelog
>     +
>     +        hiderevs, nohiderevs = self._gethiderevs()
>     +
>     +        childmap = lazychildset(repo)
>     +        for rev in revs:
>     +            node = cl.node(rev)
>     +            # If the node should be visible...
>     +            if not self._shouldhide(rev, childmap, hiderevs,
>     nohiderevs):
>
>
> Resolving repo.changelog on every invocation inside _shouldhide() can
> add overhead. I wonder if _shouldhide() should accept an iterable of
> revs and return a dict...
>
>
>     +                # ...but it's hidden
>     +                if node in self:
>     +                    return True
>     +            # Otherwise, the node should be hidden
>     +            elif node not in self:
>     +                return True
>     +
>     +        return False
>     +
>     +    def updatevisibility(self, revs):
>     +        """Updates the visibility of the given revs, and propagates
>     those
>     +        updates to the rev parents as necessary."""
>     +        self._load()
>     +
>     +        repo = self.repo
>     +        seen = set(revs)
>     +        if not seen or not self._needupdate(seen):
>     +            # Even if there are no changes, we need to rewrite the
>     file so the
>     +            # cache hash is up to date with the latest changes.
>     +            with self.opener(self.filename, 'w+', atomictemp=True)
>     as fp:
>     +                self._write(fp)
>
>
> This being a cache, it needs to handle I/O failures gracefully if the
> user doesn't have permissions.
>
>
>     +            return
>     +
>     +        # Sort the revs so we can process them in topo order
>     +        heap = []
>     +        for rev in seen:
>     +            heapq.heappush(heap, -rev)
>     +
>     +        with repo.lock():
>     +            with repo.transaction('hidden') as tr:
>
>
> Do we need a transaction here? We don't use transactions for other caches.

The transaction is nice because it let's us defer writing the cache
until the transaction completes.  I think all writes to disk should be
in transactions, and the fact that our current caches avoid it is just a
symptom of the lack of support in our transaction layer.

>
>     +                hiderevs, nohiderevs = self._gethiderevs()
>     +
>     +                cl = repo.changelog
>     +                childmap = lazychildset(repo)
>     +                while heap:
>     +                    rev = -heapq.heappop(heap)
>     +
>     +                    # If it should be visible...
>     +                    node = cl.node(rev)
>     +                    if not self._shouldhide(rev, childmap, hiderevs,
>     +                                            nohiderevs):
>
>
> Another repo.changelog perf issue lurking in _shouldhide() inside a loop.
>
>
>     +                        # And it's currently invisible...
>     +                        if node in self:
>     +                            # Make it visible and propagate
>     +                            # (propogate first, since otherwise we
>     can't access
>     +                            # the parents)
>     +                            for parent in cl.parentrevs(rev):
>     +                                if parent != nullrev and parent not
>     in seen:
>     +                                    heapq.heappush(heap, -parent)
>     +                                    seen.add(parent)
>     +                            self.set(tr, cl.node(rev), False,
>     +                                     childmap=childmap)
>     +
>     +                    # If it should not be visible, and it's not
>     already hidden..
>     +                    elif node not in self:
>     +                        # Hide it and propagate (propogate first, since
>     +                        # otherwise we can't access the parents)
>     +                        for parent in cl.parentrevs(rev):
>     +                            if parent != nullrev and parent not in
>     seen:
>     +                                heapq.heappush(heap, -parent)
>     +                                seen.add(parent)
>     +                        self.set(tr, node, True, childmap=childmap)
>     diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
>     --- a/mercurial/localrepo.py
>     +++ b/mercurial/localrepo.py
>     @@ -388,7 +388,7 @@ class localrepository(object):
>
>          @storecache('hidden.roots')
>          def hidden(self):
>     -        return hidden.rootset(self, self.svfs, 'hidden')
>     +        return hidden.hiddenset(self, self.svfs, 'hidden')
>
>          def close(self):
>              self._writecaches()
>     @@ -1384,6 +1384,10 @@ class localrepository(object):
>              l = self._lock(self.svfs, "lock", wait, None,
>                             self.invalidate, _('repository %s') %
>     self.origroot)
>              self._lockref = weakref.ref(l)
>     +        # Force the hidden cache to load, so it can validate it's
>     cache hash
>     +        # against the unmodified bookmark, obsolete, tags, and
>     working copy
>     +        # states.
>     +        self.hidden
>              return l
>
>          def _wlockchecktransaction(self):
>     diff --git a/tests/test-empty.t b/tests/test-empty.t
>     --- a/tests/test-empty.t
>     +++ b/tests/test-empty.t
>     @@ -26,6 +26,7 @@ Check the basic files created:
>      Should be empty:
>
>        $ ls .hg/store
>     +  hidden.roots
>
>      Poke at a clone:
>
>     @@ -49,5 +50,6 @@ Poke at a clone:
>      Should be empty:
>
>        $ ls .hg/store
>     +  hidden.roots
>
>        $ cd ..
>     diff --git a/tests/test-fncache.t b/tests/test-fncache.t
>     --- a/tests/test-fncache.t
>     +++ b/tests/test-fncache.t
>     @@ -92,6 +92,7 @@ Non store repo:
>        .hg/data/tst.d.hg
>        .hg/data/tst.d.hg/foo.i
>        .hg/dirstate
>     +  .hg/hidden.roots
>        .hg/last-message.txt
>        .hg/phaseroots
>        .hg/requires
>     @@ -129,6 +130,7 @@ Non fncache repo:
>        .hg/store/data
>        .hg/store/data/tst.d.hg
>        .hg/store/data/tst.d.hg/_foo.i
>     +  .hg/store/hidden.roots
>        .hg/store/phaseroots
>        .hg/store/undo
>        .hg/store/undo.backupfiles
>     @@ -313,6 +315,7 @@ Aborted transactions can be recovered la
>        data/z.i
>        $ hg recover
>        rolling back interrupted transaction
>     +  warning: ignoring unknown working parent 4b0f1917ccc5!
>
>
> What's this about? Accessing repo.dirstate as part of cache validation?

Yup. The issue was there before, this just caused it to show up in a
warning.
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Loading...