D7889: nodemap: track the total and unused amount of data in the rawdata file

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
7 messages Options
Reply | Threaded
Open this post in threaded view
|

D7889: nodemap: track the total and unused amount of data in the rawdata file

martinvonz (Martin von Zweigbergk)
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We need to keep that information around:
 
  - total data will allow transaction to start appending new information without confusing other reader.
 
  - unused data will allow to detect when we should regenerate new rawdata file.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7889

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -15,8 +15,10 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5000
+  data-length: 245760
+  data-unused: 0
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
   $ f --sha256 .hg/store/00changelog-*.nd
   .hg/store/00changelog-????????????????.nd: sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
@@ -50,11 +52,22 @@
   $ echo foo > foo
   $ hg add foo
   $ hg ci -m 'foo'
+
+#if pure
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5001
+  data-length: 246144
+  data-unused: 384
+#else
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5001
+  data-length: 245760
+  data-unused: 0
+#endif
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
 
 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
 
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -37,10 +37,12 @@
         return None
     offset += S_VERSION.size
     headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
-    uid_size, tip_rev = headers
+    uid_size, tip_rev, data_length, data_unused = headers
     offset += S_HEADER.size
     docket = NodeMapDocket(pdata[offset : offset + uid_size])
     docket.tip_rev = tip_rev
+    docket.data_length = data_length
+    docket.data_unused = data_unused
 
     filename = _rawdata_filepath(revlog, docket)
     return docket, revlog.opener.tryread(filename)
@@ -78,12 +80,14 @@
     # first attemp an incremental update of the data
     if can_incremental and ondisk_docket is not None:
         target_docket = revlog._nodemap_docket.copy()
-        data = revlog.index.nodemap_data_incremental()
+        data_changed_count, data = revlog.index.nodemap_data_incremental()
         datafile = _rawdata_filepath(revlog, target_docket)
         # EXP-TODO: if this is a cache, this should use a cache vfs, not a
         # store vfs
         with revlog.opener(datafile, 'a') as fd:
             fd.write(data)
+        target_docket.data_length += len(data)
+        target_docket.data_unused += data_changed_count
     else:
         # otherwise fallback to a full new export
         target_docket = NodeMapDocket()
@@ -96,6 +100,7 @@
         # store vfs
         with revlog.opener(datafile, 'w') as fd:
             fd.write(data)
+        target_docket.data_length = len(data)
     target_docket.tip_rev = revlog.tiprev()
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
     # store vfs
@@ -143,9 +148,8 @@
 
 # version 0 is experimental, no BC garantee, do no use outside of tests.
 ONDISK_VERSION = 0
-
 S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">BQ")
+S_HEADER = struct.Struct(">BQQQ")
 
 ID_SIZE = 8
 
@@ -168,17 +172,26 @@
             uid = _make_uid()
         self.uid = uid
         self.tip_rev = None
+        self.data_length = None
+        self.data_unused = 0
 
     def copy(self):
         new = NodeMapDocket(uid=self.uid)
         new.tip_rev = self.tip_rev
+        new.data_length = self.data_length
+        new.data_unused = self.data_unused
         return new
 
     def serialize(self):
         """return serialized bytes for a docket using the passed uid"""
         data = []
         data.append(S_VERSION.pack(ONDISK_VERSION))
-        headers = (len(self.uid), self.tip_rev)
+        headers = (
+            len(self.uid),
+            self.tip_rev,
+            self.data_length,
+            self.data_unused,
+        )
         data.append(S_HEADER.pack(*headers))
         data.append(self.uid)
         return b''.join(data)
@@ -236,8 +249,8 @@
 def update_persistent_data(index, root, max_idx, last_rev):
     """return the serialised data of a nodemap for a given index
     """
-    trie = _update_trie(index, root, last_rev)
-    return _dump_trie(trie, existing_idx=max_idx)
+    changed_block, trie = _update_trie(index, root, last_rev)
+    return changed_block * S_BLOCK.size, _dump_trie(trie, existing_idx=max_idx)
 
 
 S_BLOCK = struct.Struct(">" + ("q" * 16))
@@ -293,10 +306,11 @@
 
 def _update_trie(index, root, last_rev):
     """consume"""
+    changed = 0
     for rev in range(last_rev + 1, len(index)):
         hex = nodemod.hex(index[rev][7])
-        _insert_into_block(index, 0, root, rev, hex)
-    return root
+        changed += _insert_into_block(index, 0, root, rev, hex)
+    return changed, root
 
 
 def _insert_into_block(index, level, block, current_rev, current_hex):
@@ -308,6 +322,7 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    changed = 1
     if block.ondisk_id is not None:
         block.ondisk_id = None
     entry = block.get(_to_int(current_hex[level]))
@@ -316,7 +331,9 @@
         block[_to_int(current_hex[level])] = current_rev
     elif isinstance(entry, dict):
         # need to recurse to an underlying block
-        _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+        changed += _insert_into_block(
+            index, level + 1, entry, current_rev, current_hex
+        )
     else:
         # collision with a previously unique prefix, inserting new
         # vertices to fit both entry.
@@ -329,6 +346,7 @@
             level += 1
         block[_to_int(current_hex[level])] = current_rev
         block[_to_int(other_hex[level])] = other_rev
+    return changed
 
 
 def _dump_trie(root, existing_idx=None):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -164,11 +164,11 @@
         """
         if self._nm_root is None:
             return None
-        data = nodemaputil.update_persistent_data(
+        changed, data = nodemaputil.update_persistent_data(
             self, self._nm_root, self._nm_max_idx, self._nm_rev
         )
         self._nm_root = self._nm_max_idx = self._nm_rev = None
-        return data
+        return changed, data
 
     def update_nodemap_data(self, docket, nm_data):
         """provide full serialiazed data from a nodemap
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2131,6 +2131,8 @@
             docket, data = nm_data
             ui.write((b"uid: %s\n") % docket.uid)
             ui.write((b"tip-rev: %d\n") % docket.tip_rev)
+            ui.write((b"data-length: %s\n") % docket.data_length)
+            ui.write((b"data-unused: %s\n") % docket.data_unused)
 
 
 @command(



To: marmoute, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7889: nodemap: track the total and unused amount of data in the rawdata file

martinvonz (Martin von Zweigbergk)
marmoute updated this revision to Diff 19434.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7889?vs=19307&id=19434

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7889/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7889

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -15,8 +15,10 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5000
+  data-length: 245760
+  data-unused: 0
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
   $ f --sha256 .hg/store/00changelog-*.nd
   .hg/store/00changelog-????????????????.nd: sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
@@ -50,11 +52,22 @@
   $ echo foo > foo
   $ hg add foo
   $ hg ci -m 'foo'
+
+#if pure
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5001
+  data-length: 246144
+  data-unused: 384
+#else
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5001
+  data-length: 245760
+  data-unused: 0
+#endif
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
 
 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
 
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -37,10 +37,12 @@
         return None
     offset += S_VERSION.size
     headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
-    uid_size, tip_rev = headers
+    uid_size, tip_rev, data_length, data_unused = headers
     offset += S_HEADER.size
     docket = NodeMapDocket(pdata[offset : offset + uid_size])
     docket.tip_rev = tip_rev
+    docket.data_length = data_length
+    docket.data_unused = data_unused
 
     filename = _rawdata_filepath(revlog, docket)
     return docket, revlog.opener.tryread(filename)
@@ -78,12 +80,14 @@
     # first attemp an incremental update of the data
     if can_incremental and ondisk_docket is not None:
         target_docket = revlog._nodemap_docket.copy()
-        data = revlog.index.nodemap_data_incremental()
+        data_changed_count, data = revlog.index.nodemap_data_incremental()
         datafile = _rawdata_filepath(revlog, target_docket)
         # EXP-TODO: if this is a cache, this should use a cache vfs, not a
         # store vfs
         with revlog.opener(datafile, 'a') as fd:
             fd.write(data)
+        target_docket.data_length += len(data)
+        target_docket.data_unused += data_changed_count
     else:
         # otherwise fallback to a full new export
         target_docket = NodeMapDocket()
@@ -96,6 +100,7 @@
         # store vfs
         with revlog.opener(datafile, 'w') as fd:
             fd.write(data)
+        target_docket.data_length = len(data)
     target_docket.tip_rev = revlog.tiprev()
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
     # store vfs
@@ -143,9 +148,8 @@
 
 # version 0 is experimental, no BC garantee, do no use outside of tests.
 ONDISK_VERSION = 0
-
 S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">BQ")
+S_HEADER = struct.Struct(">BQQQ")
 
 ID_SIZE = 8
 
@@ -168,17 +172,26 @@
             uid = _make_uid()
         self.uid = uid
         self.tip_rev = None
+        self.data_length = None
+        self.data_unused = 0
 
     def copy(self):
         new = NodeMapDocket(uid=self.uid)
         new.tip_rev = self.tip_rev
+        new.data_length = self.data_length
+        new.data_unused = self.data_unused
         return new
 
     def serialize(self):
         """return serialized bytes for a docket using the passed uid"""
         data = []
         data.append(S_VERSION.pack(ONDISK_VERSION))
-        headers = (len(self.uid), self.tip_rev)
+        headers = (
+            len(self.uid),
+            self.tip_rev,
+            self.data_length,
+            self.data_unused,
+        )
         data.append(S_HEADER.pack(*headers))
         data.append(self.uid)
         return b''.join(data)
@@ -236,8 +249,8 @@
 def update_persistent_data(index, root, max_idx, last_rev):
     """return the serialised data of a nodemap for a given index
     """
-    trie = _update_trie(index, root, last_rev)
-    return _dump_trie(trie, existing_idx=max_idx)
+    changed_block, trie = _update_trie(index, root, last_rev)
+    return changed_block * S_BLOCK.size, _dump_trie(trie, existing_idx=max_idx)
 
 
 S_BLOCK = struct.Struct(">" + ("q" * 16))
@@ -293,10 +306,11 @@
 
 def _update_trie(index, root, last_rev):
     """consume"""
+    changed = 0
     for rev in range(last_rev + 1, len(index)):
         hex = nodemod.hex(index[rev][7])
-        _insert_into_block(index, 0, root, rev, hex)
-    return root
+        changed += _insert_into_block(index, 0, root, rev, hex)
+    return changed, root
 
 
 def _insert_into_block(index, level, block, current_rev, current_hex):
@@ -308,6 +322,7 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    changed = 1
     if block.ondisk_id is not None:
         block.ondisk_id = None
     entry = block.get(_to_int(current_hex[level]))
@@ -316,7 +331,9 @@
         block[_to_int(current_hex[level])] = current_rev
     elif isinstance(entry, dict):
         # need to recurse to an underlying block
-        _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+        changed += _insert_into_block(
+            index, level + 1, entry, current_rev, current_hex
+        )
     else:
         # collision with a previously unique prefix, inserting new
         # vertices to fit both entry.
@@ -329,6 +346,7 @@
             level += 1
         block[_to_int(current_hex[level])] = current_rev
         block[_to_int(other_hex[level])] = other_rev
+    return changed
 
 
 def _dump_trie(root, existing_idx=None):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -164,11 +164,11 @@
         """
         if self._nm_root is None:
             return None
-        data = nodemaputil.update_persistent_data(
+        changed, data = nodemaputil.update_persistent_data(
             self, self._nm_root, self._nm_max_idx, self._nm_rev
         )
         self._nm_root = self._nm_max_idx = self._nm_rev = None
-        return data
+        return changed, data
 
     def update_nodemap_data(self, docket, nm_data):
         """provide full serialiazed data from a nodemap
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2131,6 +2131,8 @@
             docket, data = nm_data
             ui.write((b"uid: %s\n") % docket.uid)
             ui.write((b"tip-rev: %d\n") % docket.tip_rev)
+            ui.write((b"data-length: %s\n") % docket.data_length)
+            ui.write((b"data-unused: %s\n") % docket.data_unused)
 
 
 @command(



To: marmoute, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7889: nodemap: track the total and unused amount of data in the rawdata file

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
marmoute updated this revision to Diff 19770.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7889?vs=19434&id=19770

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7889/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7889

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -15,8 +15,10 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5000
+  data-length: 245760
+  data-unused: 0
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
   $ f --sha256 .hg/store/00changelog-*.nd
   .hg/store/00changelog-????????????????.nd: sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
@@ -50,11 +52,22 @@
   $ echo foo > foo
   $ hg add foo
   $ hg ci -m 'foo'
+
+#if pure
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5001
+  data-length: 246144
+  data-unused: 384
+#else
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5001
+  data-length: 245760
+  data-unused: 0
+#endif
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
 
 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
 
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -37,10 +37,12 @@
         return None
     offset += S_VERSION.size
     headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
-    uid_size, tip_rev = headers
+    uid_size, tip_rev, data_length, data_unused = headers
     offset += S_HEADER.size
     docket = NodeMapDocket(pdata[offset : offset + uid_size])
     docket.tip_rev = tip_rev
+    docket.data_length = data_length
+    docket.data_unused = data_unused
 
     filename = _rawdata_filepath(revlog, docket)
     return docket, revlog.opener.tryread(filename)
@@ -78,12 +80,14 @@
     # first attemp an incremental update of the data
     if can_incremental and ondisk_docket is not None:
         target_docket = revlog._nodemap_docket.copy()
-        data = revlog.index.nodemap_data_incremental()
+        data_changed_count, data = revlog.index.nodemap_data_incremental()
         datafile = _rawdata_filepath(revlog, target_docket)
         # EXP-TODO: if this is a cache, this should use a cache vfs, not a
         # store vfs
         with revlog.opener(datafile, 'a') as fd:
             fd.write(data)
+        target_docket.data_length += len(data)
+        target_docket.data_unused += data_changed_count
     else:
         # otherwise fallback to a full new export
         target_docket = NodeMapDocket()
@@ -96,6 +100,7 @@
         # store vfs
         with revlog.opener(datafile, 'w') as fd:
             fd.write(data)
+        target_docket.data_length = len(data)
     target_docket.tip_rev = revlog.tiprev()
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
     # store vfs
@@ -143,9 +148,8 @@
 
 # version 0 is experimental, no BC garantee, do no use outside of tests.
 ONDISK_VERSION = 0
-
 S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">BQ")
+S_HEADER = struct.Struct(">BQQQ")
 
 ID_SIZE = 8
 
@@ -168,17 +172,26 @@
             uid = _make_uid()
         self.uid = uid
         self.tip_rev = None
+        self.data_length = None
+        self.data_unused = 0
 
     def copy(self):
         new = NodeMapDocket(uid=self.uid)
         new.tip_rev = self.tip_rev
+        new.data_length = self.data_length
+        new.data_unused = self.data_unused
         return new
 
     def serialize(self):
         """return serialized bytes for a docket using the passed uid"""
         data = []
         data.append(S_VERSION.pack(ONDISK_VERSION))
-        headers = (len(self.uid), self.tip_rev)
+        headers = (
+            len(self.uid),
+            self.tip_rev,
+            self.data_length,
+            self.data_unused,
+        )
         data.append(S_HEADER.pack(*headers))
         data.append(self.uid)
         return b''.join(data)
@@ -236,8 +249,11 @@
 def update_persistent_data(index, root, max_idx, last_rev):
     """return the incremental update for persistent nodemap from a given index
     """
-    trie = _update_trie(index, root, last_rev)
-    return _persist_trie(trie, existing_idx=max_idx)
+    changed_block, trie = _update_trie(index, root, last_rev)
+    return (
+        changed_block * S_BLOCK.size,
+        _persist_block(trie, existing_idx=max_idx),
+    )
 
 
 S_BLOCK = struct.Struct(">" + ("q" * 16))
@@ -293,10 +309,11 @@
 
 def _update_trie(index, root, last_rev):
     """consume"""
+    changed = 0
     for rev in range(last_rev + 1, len(index)):
         hex = nodemod.hex(index[rev][7])
-        _insert_into_block(index, 0, root, rev, hex)
-    return root
+        changed += _insert_into_block(index, 0, root, rev, hex)
+    return changed, root
 
 
 def _insert_into_block(index, level, block, current_rev, current_hex):
@@ -308,6 +325,7 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    changed = 1
     if block.ondisk_id is not None:
         block.ondisk_id = None
     entry = block.get(_to_int(current_hex[level]))
@@ -316,7 +334,9 @@
         block[_to_int(current_hex[level])] = current_rev
     elif isinstance(entry, dict):
         # need to recurse to an underlying block
-        _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+        changed += _insert_into_block(
+            index, level + 1, entry, current_rev, current_hex
+        )
     else:
         # collision with a previously unique prefix, inserting new
         # vertices to fit both entry.
@@ -326,6 +346,7 @@
         block[_to_int(current_hex[level])] = new
         _insert_into_block(index, level + 1, new, other_rev, other_hex)
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
+    return changed
 
 
 def _persist_trie(root, existing_idx=None):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -164,11 +164,11 @@
         """
         if self._nm_root is None:
             return None
-        data = nodemaputil.update_persistent_data(
+        changed, data = nodemaputil.update_persistent_data(
             self, self._nm_root, self._nm_max_idx, self._nm_rev
         )
         self._nm_root = self._nm_max_idx = self._nm_rev = None
-        return data
+        return changed, data
 
     def update_nodemap_data(self, docket, nm_data):
         """provide full block of persisted binary data for a nodemap
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2137,6 +2137,8 @@
             docket, data = nm_data
             ui.write((b"uid: %s\n") % docket.uid)
             ui.write((b"tip-rev: %d\n") % docket.tip_rev)
+            ui.write((b"data-length: %s\n") % docket.data_length)
+            ui.write((b"data-unused: %s\n") % docket.data_unused)
 
 
 @command(



To: marmoute, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7889: nodemap: track the total and unused amount of data in the rawdata file

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
marmoute updated this revision to Diff 19797.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7889?vs=19770&id=19797

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7889/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7889

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -15,8 +15,10 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5000
+  data-length: 245760
+  data-unused: 0
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
   $ f --sha256 .hg/store/00changelog-*.nd
   .hg/store/00changelog-????????????????.nd: sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
@@ -50,11 +52,22 @@
   $ echo foo > foo
   $ hg add foo
   $ hg ci -m 'foo'
+
+#if pure
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5001
+  data-length: 246144
+  data-unused: 384
+#else
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5001
+  data-length: 245760
+  data-unused: 0
+#endif
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
 
 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
 
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -37,10 +37,12 @@
         return None
     offset += S_VERSION.size
     headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
-    uid_size, tip_rev = headers
+    uid_size, tip_rev, data_length, data_unused = headers
     offset += S_HEADER.size
     docket = NodeMapDocket(pdata[offset : offset + uid_size])
     docket.tip_rev = tip_rev
+    docket.data_length = data_length
+    docket.data_unused = data_unused
 
     filename = _rawdata_filepath(revlog, docket)
     return docket, revlog.opener.tryread(filename)
@@ -78,12 +80,14 @@
     # first attemp an incremental update of the data
     if can_incremental and ondisk_docket is not None:
         target_docket = revlog._nodemap_docket.copy()
-        data = revlog.index.nodemap_data_incremental()
+        data_changed_count, data = revlog.index.nodemap_data_incremental()
         datafile = _rawdata_filepath(revlog, target_docket)
         # EXP-TODO: if this is a cache, this should use a cache vfs, not a
         # store vfs
         with revlog.opener(datafile, b'a') as fd:
             fd.write(data)
+        target_docket.data_length += len(data)
+        target_docket.data_unused += data_changed_count
     else:
         # otherwise fallback to a full new export
         target_docket = NodeMapDocket()
@@ -96,6 +100,7 @@
         # store vfs
         with revlog.opener(datafile, b'w') as fd:
             fd.write(data)
+        target_docket.data_length = len(data)
     target_docket.tip_rev = revlog.tiprev()
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
     # store vfs
@@ -143,9 +148,8 @@
 
 # version 0 is experimental, no BC garantee, do no use outside of tests.
 ONDISK_VERSION = 0
-
 S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">BQ")
+S_HEADER = struct.Struct(">BQQQ")
 
 ID_SIZE = 8
 
@@ -168,17 +172,26 @@
             uid = _make_uid()
         self.uid = uid
         self.tip_rev = None
+        self.data_length = None
+        self.data_unused = 0
 
     def copy(self):
         new = NodeMapDocket(uid=self.uid)
         new.tip_rev = self.tip_rev
+        new.data_length = self.data_length
+        new.data_unused = self.data_unused
         return new
 
     def serialize(self):
         """return serialized bytes for a docket using the passed uid"""
         data = []
         data.append(S_VERSION.pack(ONDISK_VERSION))
-        headers = (len(self.uid), self.tip_rev)
+        headers = (
+            len(self.uid),
+            self.tip_rev,
+            self.data_length,
+            self.data_unused,
+        )
         data.append(S_HEADER.pack(*headers))
         data.append(self.uid)
         return b''.join(data)
@@ -236,8 +249,11 @@
 def update_persistent_data(index, root, max_idx, last_rev):
     """return the incremental update for persistent nodemap from a given index
     """
-    trie = _update_trie(index, root, last_rev)
-    return _persist_trie(trie, existing_idx=max_idx)
+    changed_block, trie = _update_trie(index, root, last_rev)
+    return (
+        changed_block * S_BLOCK.size,
+        _persist_trie(trie, existing_idx=max_idx),
+    )
 
 
 S_BLOCK = struct.Struct(">" + ("q" * 16))
@@ -293,10 +309,11 @@
 
 def _update_trie(index, root, last_rev):
     """consume"""
+    changed = 0
     for rev in range(last_rev + 1, len(index)):
         hex = nodemod.hex(index[rev][7])
-        _insert_into_block(index, 0, root, rev, hex)
-    return root
+        changed += _insert_into_block(index, 0, root, rev, hex)
+    return changed, root
 
 
 def _insert_into_block(index, level, block, current_rev, current_hex):
@@ -308,6 +325,7 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    changed = 1
     if block.ondisk_id is not None:
         block.ondisk_id = None
     hex_digit = _to_int(current_hex[level : level + 1])
@@ -317,7 +335,9 @@
         block[hex_digit] = current_rev
     elif isinstance(entry, dict):
         # need to recurse to an underlying block
-        _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+        changed += _insert_into_block(
+            index, level + 1, entry, current_rev, current_hex
+        )
     else:
         # collision with a previously unique prefix, inserting new
         # vertices to fit both entry.
@@ -327,6 +347,7 @@
         block[hex_digit] = new
         _insert_into_block(index, level + 1, new, other_rev, other_hex)
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
+    return changed
 
 
 def _persist_trie(root, existing_idx=None):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -164,11 +164,11 @@
         """
         if self._nm_root is None:
             return None
-        data = nodemaputil.update_persistent_data(
+        changed, data = nodemaputil.update_persistent_data(
             self, self._nm_root, self._nm_max_idx, self._nm_rev
         )
         self._nm_root = self._nm_max_idx = self._nm_rev = None
-        return data
+        return changed, data
 
     def update_nodemap_data(self, docket, nm_data):
         """provide full block of persisted binary data for a nodemap
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2137,6 +2137,8 @@
             docket, data = nm_data
             ui.write((b"uid: %s\n") % docket.uid)
             ui.write((b"tip-rev: %d\n") % docket.tip_rev)
+            ui.write((b"data-length: %d\n") % docket.data_length)
+            ui.write((b"data-unused: %d\n") % docket.data_unused)
 
 
 @command(



To: marmoute, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7889: nodemap: track the total and unused amount of data in the rawdata file

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
marmoute added a comment.
marmoute updated this revision to Diff 19841.


  small doc update on .#s[1]

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7889?vs=19797&id=19841

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7889/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7889

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -15,8 +15,10 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5000
+  data-length: 245760
+  data-unused: 0
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
   $ f --sha256 .hg/store/00changelog-*.nd
   .hg/store/00changelog-????????????????.nd: sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
@@ -50,11 +52,22 @@
   $ echo foo > foo
   $ hg add foo
   $ hg ci -m 'foo'
+
+#if pure
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5001
+  data-length: 246144
+  data-unused: 384
+#else
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5001
+  data-length: 245760
+  data-unused: 0
+#endif
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
 
 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
 
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -37,10 +37,12 @@
         return None
     offset += S_VERSION.size
     headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
-    uid_size, tip_rev = headers
+    uid_size, tip_rev, data_length, data_unused = headers
     offset += S_HEADER.size
     docket = NodeMapDocket(pdata[offset : offset + uid_size])
     docket.tip_rev = tip_rev
+    docket.data_length = data_length
+    docket.data_unused = data_unused
 
     filename = _rawdata_filepath(revlog, docket)
     return docket, revlog.opener.tryread(filename)
@@ -78,12 +80,14 @@
     # first attemp an incremental update of the data
     if can_incremental and ondisk_docket is not None:
         target_docket = revlog._nodemap_docket.copy()
-        data = revlog.index.nodemap_data_incremental()
+        data_changed_count, data = revlog.index.nodemap_data_incremental()
         datafile = _rawdata_filepath(revlog, target_docket)
         # EXP-TODO: if this is a cache, this should use a cache vfs, not a
         # store vfs
         with revlog.opener(datafile, b'a') as fd:
             fd.write(data)
+        target_docket.data_length += len(data)
+        target_docket.data_unused += data_changed_count
     else:
         # otherwise fallback to a full new export
         target_docket = NodeMapDocket()
@@ -96,6 +100,7 @@
         # store vfs
         with revlog.opener(datafile, b'w') as fd:
             fd.write(data)
+        target_docket.data_length = len(data)
     target_docket.tip_rev = revlog.tiprev()
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
     # store vfs
@@ -143,9 +148,8 @@
 
 # version 0 is experimental, no BC garantee, do no use outside of tests.
 ONDISK_VERSION = 0
-
 S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">BQ")
+S_HEADER = struct.Struct(">BQQQ")
 
 ID_SIZE = 8
 
@@ -168,17 +172,26 @@
             uid = _make_uid()
         self.uid = uid
         self.tip_rev = None
+        self.data_length = None
+        self.data_unused = 0
 
     def copy(self):
         new = NodeMapDocket(uid=self.uid)
         new.tip_rev = self.tip_rev
+        new.data_length = self.data_length
+        new.data_unused = self.data_unused
         return new
 
     def serialize(self):
         """return serialized bytes for a docket using the passed uid"""
         data = []
         data.append(S_VERSION.pack(ONDISK_VERSION))
-        headers = (len(self.uid), self.tip_rev)
+        headers = (
+            len(self.uid),
+            self.tip_rev,
+            self.data_length,
+            self.data_unused,
+        )
         data.append(S_HEADER.pack(*headers))
         data.append(self.uid)
         return b''.join(data)
@@ -236,8 +249,11 @@
 def update_persistent_data(index, root, max_idx, last_rev):
     """return the incremental update for persistent nodemap from a given index
     """
-    trie = _update_trie(index, root, last_rev)
-    return _persist_trie(trie, existing_idx=max_idx)
+    changed_block, trie = _update_trie(index, root, last_rev)
+    return (
+        changed_block * S_BLOCK.size,
+        _persist_trie(trie, existing_idx=max_idx),
+    )
 
 
 S_BLOCK = struct.Struct(">" + ("q" * 16))
@@ -294,10 +310,11 @@
 
 def _update_trie(index, root, last_rev):
     """consume"""
+    changed = 0
     for rev in range(last_rev + 1, len(index)):
         hex = nodemod.hex(index[rev][7])
-        _insert_into_block(index, 0, root, rev, hex)
-    return root
+        changed += _insert_into_block(index, 0, root, rev, hex)
+    return changed, root
 
 
 def _insert_into_block(index, level, block, current_rev, current_hex):
@@ -309,6 +326,7 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    changed = 1
     if block.ondisk_id is not None:
         block.ondisk_id = None
     hex_digit = _to_int(current_hex[level : level + 1])
@@ -318,7 +336,9 @@
         block[hex_digit] = current_rev
     elif isinstance(entry, dict):
         # need to recurse to an underlying block
-        _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+        changed += _insert_into_block(
+            index, level + 1, entry, current_rev, current_hex
+        )
     else:
         # collision with a previously unique prefix, inserting new
         # vertices to fit both entry.
@@ -328,6 +348,7 @@
         block[hex_digit] = new
         _insert_into_block(index, level + 1, new, other_rev, other_hex)
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
+    return changed
 
 
 def _persist_trie(root, existing_idx=None):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -164,11 +164,11 @@
         """
         if self._nm_root is None:
             return None
-        data = nodemaputil.update_persistent_data(
+        changed, data = nodemaputil.update_persistent_data(
             self, self._nm_root, self._nm_max_idx, self._nm_rev
         )
         self._nm_root = self._nm_max_idx = self._nm_rev = None
-        return data
+        return changed, data
 
     def update_nodemap_data(self, docket, nm_data):
         """provide full block of persisted binary data for a nodemap
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2137,6 +2137,8 @@
             docket, data = nm_data
             ui.write((b"uid: %s\n") % docket.uid)
             ui.write((b"tip-rev: %d\n") % docket.tip_rev)
+            ui.write((b"data-length: %d\n") % docket.data_length)
+            ui.write((b"data-unused: %d\n") % docket.data_unused)
 
 
 @command(



To: marmoute, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7889: nodemap: track the total and unused amount of data in the rawdata file

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
marmoute added a comment.
marmoute updated this revision to Diff 19901.


  rebase to latest default

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7889?vs=19841&id=19901

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7889/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7889

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -15,8 +15,10 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5000
+  data-length: 122880
+  data-unused: 0
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
   $ f --sha256 .hg/store/00changelog-*.nd
   .hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
@@ -50,11 +52,22 @@
   $ echo foo > foo
   $ hg add foo
   $ hg ci -m 'foo'
+
+#if pure
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5001
+  data-length: 123072
+  data-unused: 192
+#else
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5001
+  data-length: 122880
+  data-unused: 0
+#endif
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
 
 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
 
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -37,10 +37,12 @@
         return None
     offset += S_VERSION.size
     headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
-    uid_size, tip_rev = headers
+    uid_size, tip_rev, data_length, data_unused = headers
     offset += S_HEADER.size
     docket = NodeMapDocket(pdata[offset : offset + uid_size])
     docket.tip_rev = tip_rev
+    docket.data_length = data_length
+    docket.data_unused = data_unused
 
     filename = _rawdata_filepath(revlog, docket)
     return docket, revlog.opener.tryread(filename)
@@ -78,12 +80,14 @@
     # first attemp an incremental update of the data
     if can_incremental and ondisk_docket is not None:
         target_docket = revlog._nodemap_docket.copy()
-        data = revlog.index.nodemap_data_incremental()
+        data_changed_count, data = revlog.index.nodemap_data_incremental()
         datafile = _rawdata_filepath(revlog, target_docket)
         # EXP-TODO: if this is a cache, this should use a cache vfs, not a
         # store vfs
         with revlog.opener(datafile, b'a') as fd:
             fd.write(data)
+        target_docket.data_length += len(data)
+        target_docket.data_unused += data_changed_count
     else:
         # otherwise fallback to a full new export
         target_docket = NodeMapDocket()
@@ -96,6 +100,7 @@
         # store vfs
         with revlog.opener(datafile, b'w') as fd:
             fd.write(data)
+        target_docket.data_length = len(data)
     target_docket.tip_rev = revlog.tiprev()
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
     # store vfs
@@ -143,9 +148,8 @@
 
 # version 0 is experimental, no BC garantee, do no use outside of tests.
 ONDISK_VERSION = 0
-
 S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">BQ")
+S_HEADER = struct.Struct(">BQQQ")
 
 ID_SIZE = 8
 
@@ -168,17 +172,26 @@
             uid = _make_uid()
         self.uid = uid
         self.tip_rev = None
+        self.data_length = None
+        self.data_unused = 0
 
     def copy(self):
         new = NodeMapDocket(uid=self.uid)
         new.tip_rev = self.tip_rev
+        new.data_length = self.data_length
+        new.data_unused = self.data_unused
         return new
 
     def serialize(self):
         """return serialized bytes for a docket using the passed uid"""
         data = []
         data.append(S_VERSION.pack(ONDISK_VERSION))
-        headers = (len(self.uid), self.tip_rev)
+        headers = (
+            len(self.uid),
+            self.tip_rev,
+            self.data_length,
+            self.data_unused,
+        )
         data.append(S_HEADER.pack(*headers))
         data.append(self.uid)
         return b''.join(data)
@@ -236,8 +249,11 @@
 def update_persistent_data(index, root, max_idx, last_rev):
     """return the incremental update for persistent nodemap from a given index
     """
-    trie = _update_trie(index, root, last_rev)
-    return _persist_trie(trie, existing_idx=max_idx)
+    changed_block, trie = _update_trie(index, root, last_rev)
+    return (
+        changed_block * S_BLOCK.size,
+        _persist_trie(trie, existing_idx=max_idx),
+    )
 
 
 S_BLOCK = struct.Struct(">" + ("l" * 16))
@@ -294,10 +310,11 @@
 
 def _update_trie(index, root, last_rev):
     """consume"""
+    changed = 0
     for rev in range(last_rev + 1, len(index)):
         hex = nodemod.hex(index[rev][7])
-        _insert_into_block(index, 0, root, rev, hex)
-    return root
+        changed += _insert_into_block(index, 0, root, rev, hex)
+    return changed, root
 
 
 def _insert_into_block(index, level, block, current_rev, current_hex):
@@ -309,6 +326,7 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    changed = 1
     if block.ondisk_id is not None:
         block.ondisk_id = None
     hex_digit = _to_int(current_hex[level : level + 1])
@@ -318,7 +336,9 @@
         block[hex_digit] = current_rev
     elif isinstance(entry, dict):
         # need to recurse to an underlying block
-        _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+        changed += _insert_into_block(
+            index, level + 1, entry, current_rev, current_hex
+        )
     else:
         # collision with a previously unique prefix, inserting new
         # vertices to fit both entry.
@@ -328,6 +348,7 @@
         block[hex_digit] = new
         _insert_into_block(index, level + 1, new, other_rev, other_hex)
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
+    return changed
 
 
 def _persist_trie(root, existing_idx=None):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -164,11 +164,11 @@
         """
         if self._nm_root is None:
             return None
-        data = nodemaputil.update_persistent_data(
+        changed, data = nodemaputil.update_persistent_data(
             self, self._nm_root, self._nm_max_idx, self._nm_rev
         )
         self._nm_root = self._nm_max_idx = self._nm_rev = None
-        return data
+        return changed, data
 
     def update_nodemap_data(self, docket, nm_data):
         """provide full block of persisted binary data for a nodemap
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2137,6 +2137,8 @@
             docket, data = nm_data
             ui.write((b"uid: %s\n") % docket.uid)
             ui.write((b"tip-rev: %d\n") % docket.tip_rev)
+            ui.write((b"data-length: %d\n") % docket.data_length)
+            ui.write((b"data-unused: %d\n") % docket.data_unused)
 
 
 @command(



To: marmoute, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Reply | Threaded
Open this post in threaded view
|

D7889: nodemap: track the total and unused amount of data in the rawdata file

martinvonz (Martin von Zweigbergk)
In reply to this post by martinvonz (Martin von Zweigbergk)
Closed by commit rHG8374b69aef75: nodemap: track the total and unused amount of data in the rawdata file (authored by marmoute).
This revision was automatically updated to reflect the committed changes.
This revision was not accepted when it landed; it landed in state "Needs Review".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7889?vs=19901&id=20122

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7889/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7889

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -15,8 +15,10 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5000
+  data-length: 122880
+  data-unused: 0
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
   $ f --sha256 .hg/store/00changelog-*.nd
   .hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
@@ -50,11 +52,22 @@
   $ echo foo > foo
   $ hg add foo
   $ hg ci -m 'foo'
+
+#if pure
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5001
+  data-length: 123072
+  data-unused: 192
+#else
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5001
+  data-length: 122880
+  data-unused: 0
+#endif
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=26
+  .hg/store/00changelog.n: size=42
 
 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
 
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -37,10 +37,12 @@
         return None
     offset += S_VERSION.size
     headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
-    uid_size, tip_rev = headers
+    uid_size, tip_rev, data_length, data_unused = headers
     offset += S_HEADER.size
     docket = NodeMapDocket(pdata[offset : offset + uid_size])
     docket.tip_rev = tip_rev
+    docket.data_length = data_length
+    docket.data_unused = data_unused
 
     filename = _rawdata_filepath(revlog, docket)
     return docket, revlog.opener.tryread(filename)
@@ -78,12 +80,14 @@
     # first attemp an incremental update of the data
     if can_incremental and ondisk_docket is not None:
         target_docket = revlog._nodemap_docket.copy()
-        data = revlog.index.nodemap_data_incremental()
+        data_changed_count, data = revlog.index.nodemap_data_incremental()
         datafile = _rawdata_filepath(revlog, target_docket)
         # EXP-TODO: if this is a cache, this should use a cache vfs, not a
         # store vfs
         with revlog.opener(datafile, b'a') as fd:
             fd.write(data)
+        target_docket.data_length += len(data)
+        target_docket.data_unused += data_changed_count
     else:
         # otherwise fallback to a full new export
         target_docket = NodeMapDocket()
@@ -96,6 +100,7 @@
         # store vfs
         with revlog.opener(datafile, b'w') as fd:
             fd.write(data)
+        target_docket.data_length = len(data)
     target_docket.tip_rev = revlog.tiprev()
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
     # store vfs
@@ -143,9 +148,8 @@
 
 # version 0 is experimental, no BC garantee, do no use outside of tests.
 ONDISK_VERSION = 0
-
 S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">BQ")
+S_HEADER = struct.Struct(">BQQQ")
 
 ID_SIZE = 8
 
@@ -168,17 +172,26 @@
             uid = _make_uid()
         self.uid = uid
         self.tip_rev = None
+        self.data_length = None
+        self.data_unused = 0
 
     def copy(self):
         new = NodeMapDocket(uid=self.uid)
         new.tip_rev = self.tip_rev
+        new.data_length = self.data_length
+        new.data_unused = self.data_unused
         return new
 
     def serialize(self):
         """return serialized bytes for a docket using the passed uid"""
         data = []
         data.append(S_VERSION.pack(ONDISK_VERSION))
-        headers = (len(self.uid), self.tip_rev)
+        headers = (
+            len(self.uid),
+            self.tip_rev,
+            self.data_length,
+            self.data_unused,
+        )
         data.append(S_HEADER.pack(*headers))
         data.append(self.uid)
         return b''.join(data)
@@ -236,8 +249,11 @@
 def update_persistent_data(index, root, max_idx, last_rev):
     """return the incremental update for persistent nodemap from a given index
     """
-    trie = _update_trie(index, root, last_rev)
-    return _persist_trie(trie, existing_idx=max_idx)
+    changed_block, trie = _update_trie(index, root, last_rev)
+    return (
+        changed_block * S_BLOCK.size,
+        _persist_trie(trie, existing_idx=max_idx),
+    )
 
 
 S_BLOCK = struct.Struct(">" + ("l" * 16))
@@ -294,10 +310,11 @@
 
 def _update_trie(index, root, last_rev):
     """consume"""
+    changed = 0
     for rev in range(last_rev + 1, len(index)):
         hex = nodemod.hex(index[rev][7])
-        _insert_into_block(index, 0, root, rev, hex)
-    return root
+        changed += _insert_into_block(index, 0, root, rev, hex)
+    return changed, root
 
 
 def _insert_into_block(index, level, block, current_rev, current_hex):
@@ -309,6 +326,7 @@
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    changed = 1
     if block.ondisk_id is not None:
         block.ondisk_id = None
     hex_digit = _to_int(current_hex[level : level + 1])
@@ -318,7 +336,9 @@
         block[hex_digit] = current_rev
     elif isinstance(entry, dict):
         # need to recurse to an underlying block
-        _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+        changed += _insert_into_block(
+            index, level + 1, entry, current_rev, current_hex
+        )
     else:
         # collision with a previously unique prefix, inserting new
         # vertices to fit both entry.
@@ -328,6 +348,7 @@
         block[hex_digit] = new
         _insert_into_block(index, level + 1, new, other_rev, other_hex)
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
+    return changed
 
 
 def _persist_trie(root, existing_idx=None):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -164,11 +164,11 @@
         """
         if self._nm_root is None:
             return None
-        data = nodemaputil.update_persistent_data(
+        changed, data = nodemaputil.update_persistent_data(
             self, self._nm_root, self._nm_max_idx, self._nm_rev
         )
         self._nm_root = self._nm_max_idx = self._nm_rev = None
-        return data
+        return changed, data
 
     def update_nodemap_data(self, docket, nm_data):
         """provide full block of persisted binary data for a nodemap
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -2138,6 +2138,8 @@
             docket, data = nm_data
             ui.write((b"uid: %s\n") % docket.uid)
             ui.write((b"tip-rev: %d\n") % docket.tip_rev)
+            ui.write((b"data-length: %d\n") % docket.data_length)
+            ui.write((b"data-unused: %d\n") % docket.data_unused)
 
 
 @command(



To: marmoute, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
[hidden email]
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel