Hi,
This is the second of two series to implement support for an on-disk format for
storing the reverse index. Note that this depends on the patches in the previous
series [1], which was recently updated).
This version is largely unchanged from the original, with the following
exceptions:
- It has been rebased onto the patches in the first series.
- The operands of two comparisons in 'offset_to_pack_pos()' were swapped so
that the smaller of the two appears on the left-hand side of the comparison.
- A brown-paper-bag bug was fixed in tests so that they pass on Windows (last
night's integration broke 'seen' on Windows).
- The GIT_TEST_WRITE_REV_INDEX mode was enabled in the "all-features" test.
Thanks in advance for your review.
[1]: https://lore.kernel.org/git/cover.1610129796.git.me@ttaylorr.com/
Taylor Blau (8):
packfile: prepare for the existence of '*.rev' files
pack-write.c: prepare to write 'pack-*.rev' files
builtin/index-pack.c: write reverse indexes
builtin/pack-objects.c: respect 'pack.writeReverseIndex'
Documentation/config/pack.txt: advertise 'pack.writeReverseIndex'
t: prepare for GIT_TEST_WRITE_REV_INDEX
t: support GIT_TEST_WRITE_REV_INDEX
pack-revindex: ensure that on-disk reverse indexes are given
precedence
Documentation/config/pack.txt | 7 ++
Documentation/git-index-pack.txt | 20 ++--
Documentation/technical/pack-format.txt | 17 ++++
builtin/index-pack.c | 67 +++++++++++--
builtin/pack-objects.c | 9 ++
builtin/repack.c | 1 +
ci/run-build-and-tests.sh | 1 +
object-store.h | 3 +
pack-revindex.c | 116 ++++++++++++++++++++--
pack-revindex.h | 10 +-
pack-write.c | 123 +++++++++++++++++++++++-
pack.h | 4 +
packfile.c | 13 ++-
packfile.h | 1 +
t/README | 3 +
t/t5319-multi-pack-index.sh | 5 +-
t/t5325-reverse-index.sh | 97 +++++++++++++++++++
t/t5604-clone-reference.sh | 2 +-
t/t5702-protocol-v2.sh | 12 ++-
t/t6500-gc.sh | 6 +-
t/t9300-fast-import.sh | 5 +-
tmp-objdir.c | 4 +-
22 files changed, 485 insertions(+), 41 deletions(-)
create mode 100755 t/t5325-reverse-index.sh
-: ---------- > 1: e1aa89244a pack-revindex: introduce a new API
-: ---------- > 2: 0fca7d5812 write_reuse_object(): convert to new revindex API
-: ---------- > 3: 7676822a54 write_reused_pack_one(): convert to new revindex API
-: ---------- > 4: dd7133fdb7 write_reused_pack_verbatim(): convert to new revindex API
-: ---------- > 5: 8e93ca3886 check_object(): convert to new revindex API
-: ---------- > 6: 084bbf2145 bitmap_position_packfile(): convert to new revindex API
-: ---------- > 7: 68794e9484 show_objects_for_type(): convert to new revindex API
-: ---------- > 8: 31ac6f5703 get_size_by_pos(): convert to new revindex API
-: ---------- > 9: acd80069a2 try_partial_reuse(): convert to new revindex API
-: ---------- > 10: 569acdca7f rebuild_existing_bitmaps(): convert to new revindex API
-: ---------- > 11: 9881637724 get_delta_base_oid(): convert to new revindex API
-: ---------- > 12: df8bb571a5 retry_bad_packed_offset(): convert to new revindex API
-: ---------- > 13: 41b2e00947 packed_object_info(): convert to new revindex API
-: ---------- > 14: 8ad49d231f unpack_entry(): convert to new revindex API
-: ---------- > 15: e757476351 for_each_object_in_pack(): convert to new revindex API
-: ---------- > 16: a500311e33 builtin/gc.c: guess the size of the revindex
-: ---------- > 17: 67d14da04a pack-revindex: remove unused 'find_pack_revindex()'
-: ---------- > 18: 3b5c92be68 pack-revindex: remove unused 'find_revindex_position()'
-: ---------- > 19: cabafce4a1 pack-revindex: hide the definition of 'revindex_entry'
-: ---------- > 20: 8400ff6c96 pack-revindex.c: avoid direct revindex access in 'offset_to_pack_pos()'
1: ddf47a0a50 ! 21: 6742c15c84 packfile: prepare for the existence of '*.rev' files
@@ pack-revindex.c: static void create_pack_revindex(struct packed_git *p)
+
int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos)
{
- int lo = 0;
+ unsigned lo, hi;
@@ pack-revindex.c: int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos)
uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos)
@@ pack-revindex.c: int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_
- if (!p->revindex)
+ if (!(p->revindex || p->revindex_data))
BUG("pack_pos_to_index: reverse index not yet loaded");
- if (pos >= p->num_objects)
+ if (p->num_objects <= pos)
BUG("pack_pos_to_index: out-of-bounds object at %"PRIu32, pos);
- return p->revindex[pos].nr;
+
@@ pack-revindex.c: int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_
- if (!p->revindex)
+ if (!(p->revindex || p->revindex_data))
BUG("pack_pos_to_index: reverse index not yet loaded");
- if (pos > p->num_objects)
+ if (p->num_objects < pos)
BUG("pack_pos_to_offset: out-of-bounds object at %"PRIu32, pos);
- return p->revindex[pos].offset;
+
@@ pack-revindex.c: int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_
+ return nth_packed_object_offset(p, pack_pos_to_index(p, pos));
}
+ ## pack-revindex.h ##
+@@ pack-revindex.h: struct packed_git;
+ /*
+ * load_pack_revindex populates the revindex's internal data-structures for the
+ * given pack, returning zero on success and a negative value otherwise.
++ *
++ * If a '.rev' file is present, it is checked for consistency, mmap'd, and
++ * pointers are assigned into it (instead of using the in-memory variant).
+ */
+ int load_pack_revindex(struct packed_git *p);
+
+@@ pack-revindex.h: uint32_t pack_pos_to_index(struct packed_git *p, uint32_t pos);
+ * If the reverse index has not yet been loaded, or the position is out of
+ * bounds, this function aborts.
+ *
+- * This function runs in constant time.
++ * This function runs in constant time under both in-memory and on-disk reverse
++ * indexes, but an additional step is taken to consult the corresponding .idx
++ * file when using the on-disk format.
+ */
+ off_t pack_pos_to_offset(struct packed_git *p, uint32_t pos);
+
+
## packfile.c ##
@@ packfile.c: void close_pack_index(struct packed_git *p)
}
2: 88393e2662 = 22: 8648c87fa7 pack-write.c: prepare to write 'pack-*.rev' files
3: b0a7329824 ! 23: 5b18ada611 builtin/index-pack.c: write reverse indexes
@@ t/t5325-reverse-index.sh (new)
+ rm -f $rev &&
+ conf=$1 &&
+ shift &&
++ # remove the index since Windows won't overwrite an existing file
++ rm $packdir/pack-$pack.idx &&
+ git -c pack.writeReverseIndex=$conf index-pack "$@" \
+ $packdir/pack-$pack.pack
+}
4: e297a31875 = 24: 68bde3ea97 builtin/pack-objects.c: respect 'pack.writeReverseIndex'
5: 5d3e96a498 = 25: 38a253d0ce Documentation/config/pack.txt: advertise 'pack.writeReverseIndex'
6: 2288571fbe < -: ---------- t: prepare for GIT_TEST_WRITE_REV_INDEX
-: ---------- > 26: 12cdf2d67a t: prepare for GIT_TEST_WRITE_REV_INDEX
7: 3525c4d114 ! 27: 6b647d9775 t: support GIT_TEST_WRITE_REV_INDEX
@@ Commit message
Add a new option that unconditionally enables the pack.writeReverseIndex
setting in order to run the whole test suite in a mode that generates
- on-disk reverse indexes.
+ on-disk reverse indexes. Additionally, enable this mode in the second
+ run of tests under linux-gcc in 'ci/run-build-and-tests.sh'.
Once on-disk reverse indexes are proven out over several releases, we
can change the default value of that configuration to 'true', and drop
@@ builtin/pack-objects.c: int cmd_pack_objects(int argc, const char **argv, const
progress = isatty(2);
argc = parse_options(argc, argv, prefix, pack_objects_options,
+ ## ci/run-build-and-tests.sh ##
+@@ ci/run-build-and-tests.sh: linux-gcc)
+ export GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=1
+ export GIT_TEST_MULTI_PACK_INDEX=1
+ export GIT_TEST_ADD_I_USE_BUILTIN=1
++ export GIT_TEST_WRITE_REV_INDEX=1
+ make test
+ ;;
+ linux-clang)
+
## pack-revindex.h ##
@@
- #ifndef PACK_REVINDEX_H
- #define PACK_REVINDEX_H
+ * can be found
+ */
+#define GIT_TEST_WRITE_REV_INDEX "GIT_TEST_WRITE_REV_INDEX"
+
struct packed_git;
- int load_pack_revindex(struct packed_git *p);
+ /*
## t/README ##
@@ t/README: GIT_TEST_DEFAULT_HASH=<hash-algo> specifies which hash algorithm to
8: 6e580d43d1 ! 28: 48926ae182 pack-revindex: ensure that on-disk reverse indexes are given precedence
@@ pack-revindex.c: static void create_pack_revindex(struct packed_git *p)
## pack-revindex.h ##
@@
- #define PACK_REVINDEX_H
+ */
#define GIT_TEST_WRITE_REV_INDEX "GIT_TEST_WRITE_REV_INDEX"
+#define GIT_TEST_REV_INDEX_DIE_IN_MEMORY "GIT_TEST_REV_INDEX_DIE_IN_MEMORY"
@@ t/t5325-reverse-index.sh: test_expect_success 'pack-objects respects pack.writeR
'
+test_expect_success 'reverse index is not generated when available on disk' '
-+ git index-pack --rev-index $packdir/pack-$pack.pack &&
++ test_index_pack true &&
++ test_path_is_file $rev &&
+
+ git rev-parse HEAD >tip &&
+ GIT_TEST_REV_INDEX_DIE_IN_MEMORY=1 git cat-file \
--
2.30.0.138.g6d7191ea01