D7886: nodemap: introduce append-only incremental update of the persisten data

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
8 messages Options
Reply | Threaded
Open this post in threaded view
|

D7886: nodemap: introduce append-only incremental update of the persisten data

martinvonz (Martin von Zweigbergk)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Rewriting the full nodemap for each transaction has a cost we would like to
  avoid. We introduce a new way to write persistent nodemap data by adding new
  information at the end for file. Any new and updated block as added at the end
  of the file. The last block is the new root node.
 
  With this method, some of the block already on disk get "dereferenced" and
  become dead data. In later changesets, We'll start tracking the amount of dead
  data to eventually re-generate a full nodemap.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7886

AFFECTED FILES
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -49,8 +49,19 @@
   $ hg ci -m 'foo'
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
+
+(The pure code use the debug code that perform incremental update, the C code reencode from scratch)
+
+#if pure
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-????????????????.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob)
+
+#else
   $ f --sha256 .hg/store/00changelog-*.nd --size
   .hg/store/00changelog-????????????????.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob)
+
+#endif
+
   $ hg debugnodemap --check
   revision in index:   5002
   revision in nodemap: 5002
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -69,12 +69,41 @@
     if revlog.nodemap_file is None:
         msg = "calling persist nodemap on a revlog without the feature enableb"
         raise error.ProgrammingError(msg)
-    if util.safehasattr(revlog.index, "nodemap_data_all"):
-        data = revlog.index.nodemap_data_all()
+
+    can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
+    ondisk_docket = revlog._nodemap_docket
+
+    # first attemp an incremental update of the data
+    if can_incremental and ondisk_docket is not None:
+        target_docket = revlog._nodemap_docket.copy()
+        data = revlog.index.nodemap_data_incremental()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, 'a') as fd:
+            fd.write(data)
     else:
-        data = persistent_data(revlog.index)
-    target_docket = NodeMapDocket()
-    datafile = _rawdata_filepath(revlog, target_docket)
+        # otherwise fallback to a full new export
+        target_docket = NodeMapDocket()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        if util.safehasattr(revlog.index, "nodemap_data_all"):
+            data = revlog.index.nodemap_data_all()
+        else:
+            data = persistent_data(revlog.index)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, 'w') as fd:
+            fd.write(data)
+    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+    # store vfs
+    with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
+        fp.write(target_docket.serialize())
+    revlog._nodemap_docket = target_docket
+    # EXP-TODO: if the transaction abort, we should remove the new data and
+    # reinstall the old one.
+
+    # search for old index file in all cases, some older process might have
+    # left one behind.
     olds = _other_rawdata_filepath(revlog, target_docket)
     if olds:
         realvfs = getattr(revlog, '_realopener', revlog.opener)
@@ -85,17 +114,6 @@
 
         callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
         tr.addpostclose(callback_id, cleanup)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(datafile, 'w') as fd:
-        fd.write(data)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
-        fp.write(target_docket.serialize())
-    revlog._nodemap_docket = target_docket
-    # EXP-TODO: if the transaction abort, we should remove the new data and
-    # reinstall the old one.
 
 
 ### Nodemap docket file
@@ -208,6 +226,13 @@
     return _dump_trie(trie)
 
 
+def update_persistent_data(index, root, max_idx, last_rev):
+    """return the serialised data of a nodemap for a given index
+    """
+    trie = _update_trie(index, root, last_rev)
+    return _dump_trie(trie, existing_idx=max_idx)
+
+
 S_BLOCK = struct.Struct(">" + ("q" * 16))
 
 NO_ENTRY = -1
@@ -259,6 +284,14 @@
     return root
 
 
+def _update_trie(index, root, last_rev):
+    """consume"""
+    for rev in range(last_rev + 1, len(index)):
+        hex = nodemod.hex(index[rev][7])
+        _insert_into_block(index, 0, root, rev, hex)
+    return root
+
+
 def _insert_into_block(index, level, block, current_rev, current_hex):
     """insert a new revision in a block
 
@@ -268,6 +301,8 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    if block.ondisk_id is not None:
+        block.ondisk_id = None
     entry = block.get(_to_int(current_hex[level]))
     if entry is None:
         # no entry, simply store the revision number
@@ -289,15 +324,22 @@
         block[_to_int(other_hex[level])] = other_rev
 
 
-def _dump_trie(root):
+def _dump_trie(root, existing_idx=None):
     """serialise a nodemap trie
 
     See `_build_trie` for nodemap trie structure"""
     block_map = {}
+    if existing_idx is not None:
+        base_idx = existing_idx + 1
+    else:
+        base_idx = 0
     chunks = []
     for tn in _walk_trie(root):
-        block_map[id(tn)] = len(chunks)
-        chunks.append(_dump_block(tn, block_map))
+        if tn.ondisk_id is not None:
+            block_map[id(tn)] = tn.ondisk_id
+        else:
+            block_map[id(tn)] = len(chunks) + base_idx
+            chunks.append(_dump_block(tn, block_map))
     return ''.join(chunks)
 
 
@@ -339,11 +381,11 @@
         msg = "nodemap data size is not a multiple of block size (%d): %d"
         raise error.Abort(msg % (S_BLOCK.size, len(data)))
     if not data:
-        return Block()
+        return Block(), None
     block_map = {}
     for i in range(0, len(data), S_BLOCK.size):
         block = _parse_block(block_map, data[i : i + S_BLOCK.size])
-    return block
+    return block, i // S_BLOCK.size
 
 
 def _parse_block(block_map, block_data):
@@ -370,7 +412,7 @@
     """verify that the provided nodemap data are valid for the given idex"""
     ret = 0
     ui.status((b"revision in index:   %d\n") % len(index))
-    root = parse_data(data)
+    root, __ = parse_data(data)
     all_revs = set(_all_revisions(root))
     ui.status((b"revision in nodemap: %d\n") % len(all_revs))
     for r in range(len(index)):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -156,13 +156,31 @@
         index."""
         return nodemaputil.persistent_data(self)
 
+    def nodemap_data_incremental(self):
+        """Return bytes containing a incremental serialization of a nodemap
+
+        This containst the data for an append-only update of the data provided
+        in the last call to `update_nodemap_data`.
+        """
+        if self._nm_root is None:
+            return None
+        data = nodemaputil.update_persistent_data(
+            self, self._nm_root, self._nm_max_idx, self._nm_rev
+        )
+        self._nm_root = self._nm_max_idx = self._nm_rev = None
+        return data
+
     def update_nodemap_data(self, nm_data):
         """provide full serialiazed data from a nodemap
 
         The data are expected to come from disk. See `nodemap_data_all` for a
         produceur of such data."""
         if nm_data is not None:
-            nodemaputil.parse_data(nm_data)
+            self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
+            if self._nm_root:
+                self._nm_rev = len(self) - 1
+            else:
+                self._nm_root = self._nm_max_idx = self._nm_rev = None
 
 
 class InlinedIndexObject(BaseIndexObject):



To: marmoute, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7886: nodemap: introduce append-only incremental update of the persisten data

martinvonz (Martin von Zweigbergk)
martinvonz added inline comments.

INLINE COMMENTS

> test-persistent-nodemap.t:53
> +
> +(The pure code use the debug code that perform incremental update, the C code reencode from scratch)
> +

Which patch introduced the C code? I have not looked at all patches yet, but I thought the idea was that this entire stack was just pure Python and then you'd add a Rust version (not C) in a separate series.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7886/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7886

To: marmoute, #hg-reviewers
Cc: martinvonz, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7886: nodemap: introduce append-only incremental update of the persisten data

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
marmoute added inline comments.

INLINE COMMENTS

> martinvonz wrote in test-persistent-nodemap.t:53
> Which patch introduced the C code? I have not looked at all patches yet, but I thought the idea was that this entire stack was just pure Python and then you'd add a Rust version (not C) in a separate series.

There are no C code (and is non planned). This should be `the C path` or `the cext policy`.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7886/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7886

To: marmoute, #hg-reviewers
Cc: martinvonz, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7886: nodemap: introduce append-only incremental update of the persisten data

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
marmoute updated this revision to Diff 19767.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7886?vs=19304&id=19767

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7886/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7886

AFFECTED FILES
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -49,8 +49,19 @@
   $ hg ci -m 'foo'
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
+
+(The pure code use the debug code that perform incremental update, the C code reencode from scratch)
+
+#if pure
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-????????????????.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob)
+
+#else
   $ f --sha256 .hg/store/00changelog-*.nd --size
   .hg/store/00changelog-????????????????.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob)
+
+#endif
+
   $ hg debugnodemap --check
   revision in index:   5002
   revision in nodemap: 5002
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -69,12 +69,41 @@
     if revlog.nodemap_file is None:
         msg = "calling persist nodemap on a revlog without the feature enableb"
         raise error.ProgrammingError(msg)
-    if util.safehasattr(revlog.index, "nodemap_data_all"):
-        data = revlog.index.nodemap_data_all()
+
+    can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
+    ondisk_docket = revlog._nodemap_docket
+
+    # first attemp an incremental update of the data
+    if can_incremental and ondisk_docket is not None:
+        target_docket = revlog._nodemap_docket.copy()
+        data = revlog.index.nodemap_data_incremental()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, 'a') as fd:
+            fd.write(data)
     else:
-        data = persistent_data(revlog.index)
-    target_docket = NodeMapDocket()
-    datafile = _rawdata_filepath(revlog, target_docket)
+        # otherwise fallback to a full new export
+        target_docket = NodeMapDocket()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        if util.safehasattr(revlog.index, "nodemap_data_all"):
+            data = revlog.index.nodemap_data_all()
+        else:
+            data = persistent_data(revlog.index)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, 'w') as fd:
+            fd.write(data)
+    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+    # store vfs
+    with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
+        fp.write(target_docket.serialize())
+    revlog._nodemap_docket = target_docket
+    # EXP-TODO: if the transaction abort, we should remove the new data and
+    # reinstall the old one.
+
+    # search for old index file in all cases, some older process might have
+    # left one behind.
     olds = _other_rawdata_filepath(revlog, target_docket)
     if olds:
         realvfs = getattr(revlog, '_realopener', revlog.opener)
@@ -85,17 +114,6 @@
 
         callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
         tr.addpostclose(callback_id, cleanup)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(datafile, 'w') as fd:
-        fd.write(data)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
-        fp.write(target_docket.serialize())
-    revlog._nodemap_docket = target_docket
-    # EXP-TODO: if the transaction abort, we should remove the new data and
-    # reinstall the old one.
 
 
 ### Nodemap docket file
@@ -208,6 +226,13 @@
     return _persist_trie(trie)
 
 
+def update_persistent_data(index, root, max_idx, last_rev):
+    """return the incremental update for persistent nodemap from a given index
+    """
+    trie = _update_trie(index, root, last_rev)
+    return _persist_trie(trie, existing_idx=max_idx)
+
+
 S_BLOCK = struct.Struct(">" + ("q" * 16))
 
 NO_ENTRY = -1
@@ -259,6 +284,14 @@
     return root
 
 
+def _update_trie(index, root, last_rev):
+    """consume"""
+    for rev in range(last_rev + 1, len(index)):
+        hex = nodemod.hex(index[rev][7])
+        _insert_into_block(index, 0, root, rev, hex)
+    return root
+
+
 def _insert_into_block(index, level, block, current_rev, current_hex):
     """insert a new revision in a block
 
@@ -268,6 +301,8 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    if block.ondisk_id is not None:
+        block.ondisk_id = None
     entry = block.get(_to_int(current_hex[level]))
     if entry is None:
         # no entry, simply store the revision number
@@ -286,15 +321,22 @@
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
 
 
-def _persist_trie(root):
+def _persist_trie(root, existing_idx=None):
     """turn a nodemap trie into persistent binary data
 
     See `_build_trie` for nodemap trie structure"""
     block_map = {}
+    if existing_idx is not None:
+        base_idx = existing_idx + 1
+    else:
+        base_idx = 0
     chunks = []
     for tn in _walk_trie(root):
-        block_map[id(tn)] = len(chunks)
-        chunks.append(_persist_block(tn, block_map))
+        if tn.ondisk_id is not None:
+            block_map[id(tn)] = tn.ondisk_id
+        else:
+            block_map[id(tn)] = len(chunks) + base_idx
+            chunks.append(_persist_block(tn, block_map))
     return b''.join(chunks)
 
 
@@ -336,11 +378,11 @@
         msg = "nodemap data size is not a multiple of block size (%d): %d"
         raise error.Abort(msg % (S_BLOCK.size, len(data)))
     if not data:
-        return Block()
+        return Block(), None
     block_map = {}
     for i in range(0, len(data), S_BLOCK.size):
         block = _parse_block(block_map, data[i : i + S_BLOCK.size])
-    return block
+    return block, i // S_BLOCK.size
 
 
 def _parse_block(block_map, block_data):
@@ -367,7 +409,7 @@
     """verify that the provided nodemap data are valid for the given idex"""
     ret = 0
     ui.status((b"revision in index:   %d\n") % len(index))
-    root = parse_data(data)
+    root, __ = parse_data(data)
     all_revs = set(_all_revisions(root))
     ui.status((b"revision in nodemap: %d\n") % len(all_revs))
     for r in range(len(index)):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -156,13 +156,31 @@
         index."""
         return nodemaputil.persistent_data(self)
 
+    def nodemap_data_incremental(self):
+        """Return bytes containing a incremental update to persistent nodemap
+
+        This containst the data for an append-only update of the data provided
+        in the last call to `update_nodemap_data`.
+        """
+        if self._nm_root is None:
+            return None
+        data = nodemaputil.update_persistent_data(
+            self, self._nm_root, self._nm_max_idx, self._nm_rev
+        )
+        self._nm_root = self._nm_max_idx = self._nm_rev = None
+        return data
+
     def update_nodemap_data(self, nm_data):
         """provide full blokc of persisted binary data for a nodemap
 
         The data are expected to come from disk. See `nodemap_data_all` for a
         produceur of such data."""
         if nm_data is not None:
-            nodemaputil.parse_data(nm_data)
+            self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
+            if self._nm_root:
+                self._nm_rev = len(self) - 1
+            else:
+                self._nm_root = self._nm_max_idx = self._nm_rev = None
 
 
 class InlinedIndexObject(BaseIndexObject):



To: marmoute, #hg-reviewers
Cc: martinvonz, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7886: nodemap: introduce append-only incremental update of the persisten data

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
marmoute updated this revision to Diff 19795.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7886?vs=19767&id=19795

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7886/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7886

AFFECTED FILES
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -49,8 +49,19 @@
   $ hg ci -m 'foo'
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
+
+(The pure code use the debug code that perform incremental update, the C code reencode from scratch)
+
+#if pure
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-????????????????.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob)
+
+#else
   $ f --sha256 .hg/store/00changelog-*.nd --size
   .hg/store/00changelog-????????????????.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob)
+
+#endif
+
   $ hg debugnodemap --check
   revision in index:   5002
   revision in nodemap: 5002
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -69,12 +69,41 @@
     if revlog.nodemap_file is None:
         msg = "calling persist nodemap on a revlog without the feature enableb"
         raise error.ProgrammingError(msg)
-    if util.safehasattr(revlog.index, "nodemap_data_all"):
-        data = revlog.index.nodemap_data_all()
+
+    can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
+    ondisk_docket = revlog._nodemap_docket
+
+    # first attemp an incremental update of the data
+    if can_incremental and ondisk_docket is not None:
+        target_docket = revlog._nodemap_docket.copy()
+        data = revlog.index.nodemap_data_incremental()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, b'a') as fd:
+            fd.write(data)
     else:
-        data = persistent_data(revlog.index)
-    target_docket = NodeMapDocket()
-    datafile = _rawdata_filepath(revlog, target_docket)
+        # otherwise fallback to a full new export
+        target_docket = NodeMapDocket()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        if util.safehasattr(revlog.index, "nodemap_data_all"):
+            data = revlog.index.nodemap_data_all()
+        else:
+            data = persistent_data(revlog.index)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, b'w') as fd:
+            fd.write(data)
+    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+    # store vfs
+    with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
+        fp.write(target_docket.serialize())
+    revlog._nodemap_docket = target_docket
+    # EXP-TODO: if the transaction abort, we should remove the new data and
+    # reinstall the old one.
+
+    # search for old index file in all cases, some older process might have
+    # left one behind.
     olds = _other_rawdata_filepath(revlog, target_docket)
     if olds:
         realvfs = getattr(revlog, '_realopener', revlog.opener)
@@ -85,17 +114,6 @@
 
         callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
         tr.addpostclose(callback_id, cleanup)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(datafile, b'w') as fd:
-        fd.write(data)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
-        fp.write(target_docket.serialize())
-    revlog._nodemap_docket = target_docket
-    # EXP-TODO: if the transaction abort, we should remove the new data and
-    # reinstall the old one.
 
 
 ### Nodemap docket file
@@ -208,6 +226,13 @@
     return _persist_trie(trie)
 
 
+def update_persistent_data(index, root, max_idx, last_rev):
+    """return the incremental update for persistent nodemap from a given index
+    """
+    trie = _update_trie(index, root, last_rev)
+    return _persist_trie(trie, existing_idx=max_idx)
+
+
 S_BLOCK = struct.Struct(">" + ("q" * 16))
 
 NO_ENTRY = -1
@@ -259,6 +284,14 @@
     return root
 
 
+def _update_trie(index, root, last_rev):
+    """consume"""
+    for rev in range(last_rev + 1, len(index)):
+        hex = nodemod.hex(index[rev][7])
+        _insert_into_block(index, 0, root, rev, hex)
+    return root
+
+
 def _insert_into_block(index, level, block, current_rev, current_hex):
     """insert a new revision in a block
 
@@ -268,6 +301,8 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    if block.ondisk_id is not None:
+        block.ondisk_id = None
     hex_digit = _to_int(current_hex[level : level + 1])
     entry = block.get(hex_digit)
     if entry is None:
@@ -287,15 +322,22 @@
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
 
 
-def _persist_trie(root):
+def _persist_trie(root, existing_idx=None):
     """turn a nodemap trie into persistent binary data
 
     See `_build_trie` for nodemap trie structure"""
     block_map = {}
+    if existing_idx is not None:
+        base_idx = existing_idx + 1
+    else:
+        base_idx = 0
     chunks = []
     for tn in _walk_trie(root):
-        block_map[id(tn)] = len(chunks)
-        chunks.append(_persist_block(tn, block_map))
+        if tn.ondisk_id is not None:
+            block_map[id(tn)] = tn.ondisk_id
+        else:
+            block_map[id(tn)] = len(chunks) + base_idx
+            chunks.append(_persist_block(tn, block_map))
     return b''.join(chunks)
 
 
@@ -337,11 +379,11 @@
         msg = "nodemap data size is not a multiple of block size (%d): %d"
         raise error.Abort(msg % (S_BLOCK.size, len(data)))
     if not data:
-        return Block()
+        return Block(), None
     block_map = {}
     for i in range(0, len(data), S_BLOCK.size):
         block = _parse_block(block_map, data[i : i + S_BLOCK.size])
-    return block
+    return block, i // S_BLOCK.size
 
 
 def _parse_block(block_map, block_data):
@@ -368,7 +410,7 @@
     """verify that the provided nodemap data are valid for the given idex"""
     ret = 0
     ui.status((b"revision in index:   %d\n") % len(index))
-    root = parse_data(data)
+    root, __ = parse_data(data)
     all_revs = set(_all_revisions(root))
     ui.status((b"revision in nodemap: %d\n") % len(all_revs))
     for r in range(len(index)):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -156,13 +156,31 @@
         index."""
         return nodemaputil.persistent_data(self)
 
+    def nodemap_data_incremental(self):
+        """Return bytes containing a incremental update to persistent nodemap
+
+        This containst the data for an append-only update of the data provided
+        in the last call to `update_nodemap_data`.
+        """
+        if self._nm_root is None:
+            return None
+        data = nodemaputil.update_persistent_data(
+            self, self._nm_root, self._nm_max_idx, self._nm_rev
+        )
+        self._nm_root = self._nm_max_idx = self._nm_rev = None
+        return data
+
     def update_nodemap_data(self, nm_data):
         """provide full blokc of persisted binary data for a nodemap
 
         The data are expected to come from disk. See `nodemap_data_all` for a
         produceur of such data."""
         if nm_data is not None:
-            nodemaputil.parse_data(nm_data)
+            self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
+            if self._nm_root:
+                self._nm_rev = len(self) - 1
+            else:
+                self._nm_root = self._nm_max_idx = self._nm_rev = None
 
 
 class InlinedIndexObject(BaseIndexObject):



To: marmoute, #hg-reviewers
Cc: martinvonz, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7886: nodemap: introduce append-only incremental update of the persisten data

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
marmoute added a comment.
marmoute updated this revision to Diff 19839.


  small doc update on .#s[1]

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7886?vs=19795&id=19839

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7886/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7886

AFFECTED FILES
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -49,8 +49,19 @@
   $ hg ci -m 'foo'
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
+
+(The pure code use the debug code that perform incremental update, the C code reencode from scratch)
+
+#if pure
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-????????????????.nd: size=246144, sha256=c0498fb1a78a5776978427bacd92477766c2182f738fbb0125d8a05e6112d43a (glob)
+
+#else
   $ f --sha256 .hg/store/00changelog-*.nd --size
   .hg/store/00changelog-????????????????.nd: size=245760, sha256=e6ee5d59afaab2cb1afae1077715be280578d29df508bd3dd9d74a994bc555e7 (glob)
+
+#endif
+
   $ hg debugnodemap --check
   revision in index:   5002
   revision in nodemap: 5002
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -69,12 +69,41 @@
     if revlog.nodemap_file is None:
         msg = "calling persist nodemap on a revlog without the feature enableb"
         raise error.ProgrammingError(msg)
-    if util.safehasattr(revlog.index, "nodemap_data_all"):
-        data = revlog.index.nodemap_data_all()
+
+    can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
+    ondisk_docket = revlog._nodemap_docket
+
+    # first attemp an incremental update of the data
+    if can_incremental and ondisk_docket is not None:
+        target_docket = revlog._nodemap_docket.copy()
+        data = revlog.index.nodemap_data_incremental()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, b'a') as fd:
+            fd.write(data)
     else:
-        data = persistent_data(revlog.index)
-    target_docket = NodeMapDocket()
-    datafile = _rawdata_filepath(revlog, target_docket)
+        # otherwise fallback to a full new export
+        target_docket = NodeMapDocket()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        if util.safehasattr(revlog.index, "nodemap_data_all"):
+            data = revlog.index.nodemap_data_all()
+        else:
+            data = persistent_data(revlog.index)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, b'w') as fd:
+            fd.write(data)
+    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+    # store vfs
+    with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
+        fp.write(target_docket.serialize())
+    revlog._nodemap_docket = target_docket
+    # EXP-TODO: if the transaction abort, we should remove the new data and
+    # reinstall the old one.
+
+    # search for old index file in all cases, some older process might have
+    # left one behind.
     olds = _other_rawdata_filepath(revlog, target_docket)
     if olds:
         realvfs = getattr(revlog, '_realopener', revlog.opener)
@@ -85,17 +114,6 @@
 
         callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
         tr.addpostclose(callback_id, cleanup)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(datafile, b'w') as fd:
-        fd.write(data)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
-        fp.write(target_docket.serialize())
-    revlog._nodemap_docket = target_docket
-    # EXP-TODO: if the transaction abort, we should remove the new data and
-    # reinstall the old one.
 
 
 ### Nodemap docket file
@@ -208,6 +226,13 @@
     return _persist_trie(trie)
 
 
+def update_persistent_data(index, root, max_idx, last_rev):
+    """return the incremental update for persistent nodemap from a given index
+    """
+    trie = _update_trie(index, root, last_rev)
+    return _persist_trie(trie, existing_idx=max_idx)
+
+
 S_BLOCK = struct.Struct(">" + ("q" * 16))
 
 NO_ENTRY = -1
@@ -260,6 +285,14 @@
     return root
 
 
+def _update_trie(index, root, last_rev):
+    """consume"""
+    for rev in range(last_rev + 1, len(index)):
+        hex = nodemod.hex(index[rev][7])
+        _insert_into_block(index, 0, root, rev, hex)
+    return root
+
+
 def _insert_into_block(index, level, block, current_rev, current_hex):
     """insert a new revision in a block
 
@@ -269,6 +302,8 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    if block.ondisk_id is not None:
+        block.ondisk_id = None
     hex_digit = _to_int(current_hex[level : level + 1])
     entry = block.get(hex_digit)
     if entry is None:
@@ -288,15 +323,22 @@
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
 
 
-def _persist_trie(root):
+def _persist_trie(root, existing_idx=None):
     """turn a nodemap trie into persistent binary data
 
     See `_build_trie` for nodemap trie structure"""
     block_map = {}
+    if existing_idx is not None:
+        base_idx = existing_idx + 1
+    else:
+        base_idx = 0
     chunks = []
     for tn in _walk_trie(root):
-        block_map[id(tn)] = len(chunks)
-        chunks.append(_persist_block(tn, block_map))
+        if tn.ondisk_id is not None:
+            block_map[id(tn)] = tn.ondisk_id
+        else:
+            block_map[id(tn)] = len(chunks) + base_idx
+            chunks.append(_persist_block(tn, block_map))
     return b''.join(chunks)
 
 
@@ -338,11 +380,11 @@
         msg = "nodemap data size is not a multiple of block size (%d): %d"
         raise error.Abort(msg % (S_BLOCK.size, len(data)))
     if not data:
-        return Block()
+        return Block(), None
     block_map = {}
     for i in range(0, len(data), S_BLOCK.size):
         block = _parse_block(block_map, data[i : i + S_BLOCK.size])
-    return block
+    return block, i // S_BLOCK.size
 
 
 def _parse_block(block_map, block_data):
@@ -369,7 +411,7 @@
     """verify that the provided nodemap data are valid for the given idex"""
     ret = 0
     ui.status((b"revision in index:   %d\n") % len(index))
-    root = parse_data(data)
+    root, __ = parse_data(data)
     all_revs = set(_all_revisions(root))
     ui.status((b"revision in nodemap: %d\n") % len(all_revs))
     for r in range(len(index)):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -156,13 +156,31 @@
         index."""
         return nodemaputil.persistent_data(self)
 
+    def nodemap_data_incremental(self):
+        """Return bytes containing a incremental update to persistent nodemap
+
+        This containst the data for an append-only update of the data provided
+        in the last call to `update_nodemap_data`.
+        """
+        if self._nm_root is None:
+            return None
+        data = nodemaputil.update_persistent_data(
+            self, self._nm_root, self._nm_max_idx, self._nm_rev
+        )
+        self._nm_root = self._nm_max_idx = self._nm_rev = None
+        return data
+
     def update_nodemap_data(self, nm_data):
         """provide full blokc of persisted binary data for a nodemap
 
         The data are expected to come from disk. See `nodemap_data_all` for a
         produceur of such data."""
         if nm_data is not None:
-            nodemaputil.parse_data(nm_data)
+            self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
+            if self._nm_root:
+                self._nm_rev = len(self) - 1
+            else:
+                self._nm_root = self._nm_max_idx = self._nm_rev = None
 
 
 class InlinedIndexObject(BaseIndexObject):



To: marmoute, #hg-reviewers
Cc: martinvonz, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7886: nodemap: introduce append-only incremental update of the persisten data

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
marmoute added a comment.
marmoute updated this revision to Diff 19898.


  rebase to latest default

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7886?vs=19839&id=19898

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7886/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7886

AFFECTED FILES
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -49,8 +49,19 @@
   $ hg ci -m 'foo'
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
+
+(The pure code use the debug code that perform incremental update, the C code reencode from scratch)
+
+#if pure
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-????????????????.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob)
+
+#else
   $ f --sha256 .hg/store/00changelog-*.nd --size
   .hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)
+
+#endif
+
   $ hg debugnodemap --check
   revision in index:   5002
   revision in nodemap: 5002
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -69,12 +69,41 @@
     if revlog.nodemap_file is None:
         msg = "calling persist nodemap on a revlog without the feature enableb"
         raise error.ProgrammingError(msg)
-    if util.safehasattr(revlog.index, "nodemap_data_all"):
-        data = revlog.index.nodemap_data_all()
+
+    can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
+    ondisk_docket = revlog._nodemap_docket
+
+    # first attemp an incremental update of the data
+    if can_incremental and ondisk_docket is not None:
+        target_docket = revlog._nodemap_docket.copy()
+        data = revlog.index.nodemap_data_incremental()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, b'a') as fd:
+            fd.write(data)
     else:
-        data = persistent_data(revlog.index)
-    target_docket = NodeMapDocket()
-    datafile = _rawdata_filepath(revlog, target_docket)
+        # otherwise fallback to a full new export
+        target_docket = NodeMapDocket()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        if util.safehasattr(revlog.index, "nodemap_data_all"):
+            data = revlog.index.nodemap_data_all()
+        else:
+            data = persistent_data(revlog.index)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, b'w') as fd:
+            fd.write(data)
+    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+    # store vfs
+    with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
+        fp.write(target_docket.serialize())
+    revlog._nodemap_docket = target_docket
+    # EXP-TODO: if the transaction abort, we should remove the new data and
+    # reinstall the old one.
+
+    # search for old index file in all cases, some older process might have
+    # left one behind.
     olds = _other_rawdata_filepath(revlog, target_docket)
     if olds:
         realvfs = getattr(revlog, '_realopener', revlog.opener)
@@ -85,17 +114,6 @@
 
         callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
         tr.addpostclose(callback_id, cleanup)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(datafile, b'w') as fd:
-        fd.write(data)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
-        fp.write(target_docket.serialize())
-    revlog._nodemap_docket = target_docket
-    # EXP-TODO: if the transaction abort, we should remove the new data and
-    # reinstall the old one.
 
 
 ### Nodemap docket file
@@ -208,6 +226,13 @@
     return _persist_trie(trie)
 
 
+def update_persistent_data(index, root, max_idx, last_rev):
+    """return the incremental update for persistent nodemap from a given index
+    """
+    trie = _update_trie(index, root, last_rev)
+    return _persist_trie(trie, existing_idx=max_idx)
+
+
 S_BLOCK = struct.Struct(">" + ("l" * 16))
 
 NO_ENTRY = -1
@@ -260,6 +285,14 @@
     return root
 
 
+def _update_trie(index, root, last_rev):
+    """consume"""
+    for rev in range(last_rev + 1, len(index)):
+        hex = nodemod.hex(index[rev][7])
+        _insert_into_block(index, 0, root, rev, hex)
+    return root
+
+
 def _insert_into_block(index, level, block, current_rev, current_hex):
     """insert a new revision in a block
 
@@ -269,6 +302,8 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    if block.ondisk_id is not None:
+        block.ondisk_id = None
     hex_digit = _to_int(current_hex[level : level + 1])
     entry = block.get(hex_digit)
     if entry is None:
@@ -288,15 +323,22 @@
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
 
 
-def _persist_trie(root):
+def _persist_trie(root, existing_idx=None):
     """turn a nodemap trie into persistent binary data
 
     See `_build_trie` for nodemap trie structure"""
     block_map = {}
+    if existing_idx is not None:
+        base_idx = existing_idx + 1
+    else:
+        base_idx = 0
     chunks = []
     for tn in _walk_trie(root):
-        block_map[id(tn)] = len(chunks)
-        chunks.append(_persist_block(tn, block_map))
+        if tn.ondisk_id is not None:
+            block_map[id(tn)] = tn.ondisk_id
+        else:
+            block_map[id(tn)] = len(chunks) + base_idx
+            chunks.append(_persist_block(tn, block_map))
     return b''.join(chunks)
 
 
@@ -338,7 +380,7 @@
         msg = "nodemap data size is not a multiple of block size (%d): %d"
         raise error.Abort(msg % (S_BLOCK.size, len(data)))
     if not data:
-        return Block()
+        return Block(), None
     block_map = {}
     new_blocks = []
     for i in range(0, len(data), S_BLOCK.size):
@@ -356,7 +398,7 @@
                 b[idx] = block_map[v]
             else:
                 b[idx] = _transform_rev(v)
-    return block
+    return block, i // S_BLOCK.size
 
 
 # debug utility
@@ -366,7 +408,7 @@
     """verify that the provided nodemap data are valid for the given idex"""
     ret = 0
     ui.status((b"revision in index:   %d\n") % len(index))
-    root = parse_data(data)
+    root, __ = parse_data(data)
     all_revs = set(_all_revisions(root))
     ui.status((b"revision in nodemap: %d\n") % len(all_revs))
     for r in range(len(index)):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -156,13 +156,31 @@
         index."""
         return nodemaputil.persistent_data(self)
 
+    def nodemap_data_incremental(self):
+        """Return bytes containing a incremental update to persistent nodemap
+
+        This containst the data for an append-only update of the data provided
+        in the last call to `update_nodemap_data`.
+        """
+        if self._nm_root is None:
+            return None
+        data = nodemaputil.update_persistent_data(
+            self, self._nm_root, self._nm_max_idx, self._nm_rev
+        )
+        self._nm_root = self._nm_max_idx = self._nm_rev = None
+        return data
+
     def update_nodemap_data(self, nm_data):
         """provide full blokc of persisted binary data for a nodemap
 
         The data are expected to come from disk. See `nodemap_data_all` for a
         produceur of such data."""
         if nm_data is not None:
-            nodemaputil.parse_data(nm_data)
+            self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
+            if self._nm_root:
+                self._nm_rev = len(self) - 1
+            else:
+                self._nm_root = self._nm_max_idx = self._nm_rev = None
 
 
 class InlinedIndexObject(BaseIndexObject):



To: marmoute, #hg-reviewers
Cc: martinvonz, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7886: nodemap: introduce append-only incremental update of the persisten data

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
Closed by commit rHG50ad851efd9b: nodemap: introduce append-only incremental update of the persistent data (authored by marmoute).
This revision was automatically updated to reflect the committed changes.
This revision was not accepted when it landed; it landed in state "Needs Review".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7886?vs=19898&id=20119

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7886/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7886

AFFECTED FILES
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -49,8 +49,19 @@
   $ hg ci -m 'foo'
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
+
+(The pure code use the debug code that perform incremental update, the C code reencode from scratch)
+
+#if pure
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-????????????????.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob)
+
+#else
   $ f --sha256 .hg/store/00changelog-*.nd --size
   .hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)
+
+#endif
+
   $ hg debugnodemap --check
   revision in index:   5002
   revision in nodemap: 5002
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -69,12 +69,41 @@
     if revlog.nodemap_file is None:
         msg = "calling persist nodemap on a revlog without the feature enableb"
         raise error.ProgrammingError(msg)
-    if util.safehasattr(revlog.index, "nodemap_data_all"):
-        data = revlog.index.nodemap_data_all()
+
+    can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
+    ondisk_docket = revlog._nodemap_docket
+
+    # first attemp an incremental update of the data
+    if can_incremental and ondisk_docket is not None:
+        target_docket = revlog._nodemap_docket.copy()
+        data = revlog.index.nodemap_data_incremental()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, b'a') as fd:
+            fd.write(data)
     else:
-        data = persistent_data(revlog.index)
-    target_docket = NodeMapDocket()
-    datafile = _rawdata_filepath(revlog, target_docket)
+        # otherwise fallback to a full new export
+        target_docket = NodeMapDocket()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        if util.safehasattr(revlog.index, "nodemap_data_all"):
+            data = revlog.index.nodemap_data_all()
+        else:
+            data = persistent_data(revlog.index)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, b'w') as fd:
+            fd.write(data)
+    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+    # store vfs
+    with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
+        fp.write(target_docket.serialize())
+    revlog._nodemap_docket = target_docket
+    # EXP-TODO: if the transaction abort, we should remove the new data and
+    # reinstall the old one.
+
+    # search for old index file in all cases, some older process might have
+    # left one behind.
     olds = _other_rawdata_filepath(revlog, target_docket)
     if olds:
         realvfs = getattr(revlog, '_realopener', revlog.opener)
@@ -85,17 +114,6 @@
 
         callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
         tr.addpostclose(callback_id, cleanup)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(datafile, b'w') as fd:
-        fd.write(data)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
-        fp.write(target_docket.serialize())
-    revlog._nodemap_docket = target_docket
-    # EXP-TODO: if the transaction abort, we should remove the new data and
-    # reinstall the old one.
 
 
 ### Nodemap docket file
@@ -208,6 +226,13 @@
     return _persist_trie(trie)
 
 
+def update_persistent_data(index, root, max_idx, last_rev):
+    """return the incremental update for persistent nodemap from a given index
+    """
+    trie = _update_trie(index, root, last_rev)
+    return _persist_trie(trie, existing_idx=max_idx)
+
+
 S_BLOCK = struct.Struct(">" + ("l" * 16))
 
 NO_ENTRY = -1
@@ -260,6 +285,14 @@
     return root
 
 
+def _update_trie(index, root, last_rev):
+    """consume"""
+    for rev in range(last_rev + 1, len(index)):
+        hex = nodemod.hex(index[rev][7])
+        _insert_into_block(index, 0, root, rev, hex)
+    return root
+
+
 def _insert_into_block(index, level, block, current_rev, current_hex):
     """insert a new revision in a block
 
@@ -269,6 +302,8 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    if block.ondisk_id is not None:
+        block.ondisk_id = None
     hex_digit = _to_int(current_hex[level : level + 1])
     entry = block.get(hex_digit)
     if entry is None:
@@ -288,15 +323,22 @@
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
 
 
-def _persist_trie(root):
+def _persist_trie(root, existing_idx=None):
     """turn a nodemap trie into persistent binary data
 
     See `_build_trie` for nodemap trie structure"""
     block_map = {}
+    if existing_idx is not None:
+        base_idx = existing_idx + 1
+    else:
+        base_idx = 0
     chunks = []
     for tn in _walk_trie(root):
-        block_map[id(tn)] = len(chunks)
-        chunks.append(_persist_block(tn, block_map))
+        if tn.ondisk_id is not None:
+            block_map[id(tn)] = tn.ondisk_id
+        else:
+            block_map[id(tn)] = len(chunks) + base_idx
+            chunks.append(_persist_block(tn, block_map))
     return b''.join(chunks)
 
 
@@ -338,7 +380,7 @@
         msg = "nodemap data size is not a multiple of block size (%d): %d"
         raise error.Abort(msg % (S_BLOCK.size, len(data)))
     if not data:
-        return Block()
+        return Block(), None
     block_map = {}
     new_blocks = []
     for i in range(0, len(data), S_BLOCK.size):
@@ -356,7 +398,7 @@
                 b[idx] = block_map[v]
             else:
                 b[idx] = _transform_rev(v)
-    return block
+    return block, i // S_BLOCK.size
 
 
 # debug utility
@@ -366,7 +408,7 @@
     """verify that the provided nodemap data are valid for the given idex"""
     ret = 0
     ui.status((b"revision in index:   %d\n") % len(index))
-    root = parse_data(data)
+    root, __ = parse_data(data)
     all_revs = set(_all_revisions(root))
     ui.status((b"revision in nodemap: %d\n") % len(all_revs))
     for r in range(len(index)):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -156,13 +156,31 @@
         index."""
         return nodemaputil.persistent_data(self)
 
+    def nodemap_data_incremental(self):
+        """Return bytes containing a incremental update to persistent nodemap
+
+        This containst the data for an append-only update of the data provided
+        in the last call to `update_nodemap_data`.
+        """
+        if self._nm_root is None:
+            return None
+        data = nodemaputil.update_persistent_data(
+            self, self._nm_root, self._nm_max_idx, self._nm_rev
+        )
+        self._nm_root = self._nm_max_idx = self._nm_rev = None
+        return data
+
     def update_nodemap_data(self, nm_data):
         """provide full blokc of persisted binary data for a nodemap
 
         The data are expected to come from disk. See `nodemap_data_all` for a
         produceur of such data."""
         if nm_data is not None:
-            nodemaputil.parse_data(nm_data)
+            self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
+            if self._nm_root:
+                self._nm_rev = len(self) - 1
+            else:
+                self._nm_root = self._nm_max_idx = self._nm_rev = None
 
 
 class InlinedIndexObject(BaseIndexObject):



To: marmoute, #hg-reviewers
Cc: martinvonz, mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel