[PATCH v6 0/5] pack-redundant: new algorithm to find min packs
To
Junio C Hamano
Git List
SZEDER Gábor
Cc
Jiang Xin
Sun Chao
From
Jiang Xin
See Also
Prev
Date
2019-01-12 09:17:49 UTC
> Sun Chao (my former colleague at Huawei) found a bug of
> git-pack-redundant.  If there are too many packs and many of them
> overlap each other, running `git pack-redundant --all` will
> exhaust all memories and the process will be killed by kernel.
> 
> There is a script in commit log of commit 2/5, which can be used to
> create a repository with lots of redundant packs. Running `git
> pack-redundant --all` in it can reproduce this issue.


Junio C Hamano <gitster@pobox.com> 于2019年1月12日周六 上午2:00写道:
> >> Yikes.  Can't "git pack-objects" get the input directly without
> >> overlong printf, something along the lines of...
> >>
> >>         P1=$(git -C .git/objects/pack pack-objects pack <<-EOF
> >>                 $A
> >>                 $B
> >>                 $C
> >>                 ...
> >>                 $R
> >>                 EOF
> >>         )
> >
> > Find that no space before <OID>,  because git-pack-objects not allow that,
> > and mached parentheses should in the same line.
> > So Will write like this:
> >
> >     create_pack_1() {
> >             P1=$(git -C .git/objects/pack pack-objects pack <<-EOF) &&
> >     $T
>
> Isn't the whole point of <<-EOF (notice the leading dash) to allow
> us to indent the here-doc with horizontal tab?

The reason that indents are not stripped even with `<<-EOF` is I mixed
tabs and spaces to make a better align.

If put the heredoc outside the parentheses, it will failed on MacOS, so
use the syntax Junio previously suggested.


SZEDER Gábor <szeder.dev@gmail.com> 于2019年1月11日周五 上午9:19写道:
> I see that the last patch in this series removes those three
> unused functions, but that patch should be squashed into this one to
> keep Git buildable with '-Werror' or DEVELOPER=1.
>
> Furthermore, after building this patch (without '-Werror'), several
> tests in 't5323-pack-redundant.sh' fail.  To avoid the test failure I
> think the fourth patch ensuring a consistent sort order should be
> squashed in as well.
Patch 3/5 to 5/5 can be squashed to patch 2/5.


## Changes since reroll v5


1:  40fea5d67f ! 1:  7e4e703083 t5323: test cases for git-pack-redundant
    @@ -22,8 +22,7 @@
     +
     +. ./test-lib.sh
     +
    -+create_commits()
    -+{
    ++create_commits() {
     +	parent=
     +	for name in A B C D E F G H I J K L M N O P Q R
     +	do
    @@ -39,54 +38,98 @@
     +		parent=$oid ||
     +		return 1
     +	done
    -+	git update-ref refs/heads/master $M
    ++	git update-ref refs/heads/master $R
     +}
     +
    -+create_pack_1()
    -+{
    -+	P1=$(cd .git/objects/pack; printf "$T\n$A\n$B\n$C\n$D\n$E\n$F\n$R\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_1() {
    ++	P1=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$T
    ++		$A
    ++		$B
    ++		$C
    ++		$D
    ++		$E
    ++		$F
    ++		$R
    ++		EOF
    ++	) &&
     +	eval P$P1=P1:$P1
     +}
     +
    -+create_pack_2()
    -+{
    -+	P2=$(cd .git/objects/pack; printf "$B\n$C\n$D\n$E\n$G\n$H\n$I\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_2() {
    ++	P2=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$B
    ++		$C
    ++		$D
    ++		$E
    ++		$G
    ++		$H
    ++		$I
    ++		EOF
    ++	) &&
     +	eval P$P2=P2:$P2
     +}
     +
    -+create_pack_3()
    -+{
    -+	P3=$(cd .git/objects/pack; printf "$F\n$I\n$J\n$K\n$L\n$M\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_3() {
    ++	P3=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$F
    ++		$I
    ++		$J
    ++		$K
    ++		$L
    ++		$M
    ++		EOF
    ++	) &&
     +	eval P$P3=P3:$P3
     +}
     +
    -+create_pack_4()
    -+{
    -+	P4=$(cd .git/objects/pack; printf "$J\n$K\n$L\n$M\n$P\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_4() {
    ++	P4=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$J
    ++		$K
    ++		$L
    ++		$M
    ++		$P
    ++		EOF
    ++	) &&
     +	eval P$P4=P4:$P4
     +}
     +
    -+create_pack_5()
    -+{
    -+	P5=$(cd .git/objects/pack; printf "$G\n$H\n$N\n$O\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_5() {
    ++	P5=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$G
    ++		$H
    ++		$N
    ++		$O
    ++		EOF
    ++	) &&
     +	eval P$P5=P5:$P5
     +}
     +
    -+create_pack_6()
    -+{
    -+	P6=$(cd .git/objects/pack; printf "$N\n$O\n$Q\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_6() {
    ++	P6=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$N
    ++		$O
    ++		$Q
    ++		EOF
    ++	) &&
     +	eval P$P6=P6:$P6
     +}
     +
    -+create_pack_7()
    -+{
    -+	P7=$(cd .git/objects/pack; printf "$P\n$Q\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_7() {
    ++	P7=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$P
    ++		$Q
    ++		EOF
    ++	) &&
     +	eval P$P7=P7:$P7
     +}
     +
    -+create_pack_8()
    -+{
    -+	P8=$(cd .git/objects/pack; printf "$A\n" | git pack-objects pack 2>/dev/null) &&
    ++create_pack_8() {
    ++	P8=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++		$A
    ++		EOF
    ++	) &&
     +	eval P$P8=P8:$P8
     +}
     +
    @@ -110,10 +153,12 @@
     +
     +test_expect_success 'one of pack-2/pack-3 is redundant' '
     +	git pack-redundant --all >out &&
    -+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
    -+		sort -u | \
    -+		while read p; do eval echo "\${P$p}"; done | \
    -+		sort >actual && \
    ++	sed \
    ++		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    ++		sort -u |
    ++		while read p; do eval echo "\${P$p}"; done |
    ++		sort >actual &&
     +	test_cmp expected actual
     +'
     +
    @@ -121,6 +166,7 @@
     +	create_pack_6 && create_pack_7
     +'
     +
    ++# Only after calling create_pack_6, we can use $P6 variable.
     +cat >expected <<EOF
     +P2:$P2
     +P4:$P4
    @@ -129,10 +175,12 @@
     +
     +test_expect_success 'pack 2, 4, and 6 are redundant' '
     +	git pack-redundant --all >out &&
    -+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
    -+		sort -u | \
    -+		while read p; do eval echo "\${P$p}"; done | \
    -+		sort >actual && \
    ++	sed \
    ++		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    ++		sort -u |
    ++		while read p; do eval echo "\${P$p}"; done |
    ++		sort >actual &&
     +	test_cmp expected actual
     +'
     +
    @@ -147,24 +195,26 @@
     +P8:$P8
     +EOF
     +
    -+test_expect_success 'pack-8, subset of pack-1, is also redundant' '
    ++test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
     +	git pack-redundant --all >out &&
    -+	sed -E -e "s#.*/pack-(.*)\.(idx|pack)#\1#" out | \
    -+		sort -u | \
    -+		while read p; do eval echo "\${P$p}"; done | \
    -+		sort >actual && \
    ++	sed \
    ++		-e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++		-e "s#.*/pack-\(.*\)\.pack#\1#" out |
    ++		sort -u |
    ++		while read p; do eval echo "\${P$p}"; done |
    ++		sort >actual &&
     +	test_cmp expected actual
     +'
     +
    -+test_expect_success 'clear loose objects' '
    ++test_expect_success 'clean loose objects' '
     +	git prune-packed &&
     +	find .git/objects -type f | sed -e "/objects\/pack\//d" >out &&
     +	test_must_be_empty out
     +'
     +
    -+test_expect_success 'remove redundant packs' '
    ++test_expect_success 'remove redundant packs and pass fsck' '
     +	git pack-redundant --all | xargs rm &&
    -+	git fsck &&
    ++	git fsck --no-progress &&
     +	git pack-redundant --all >out &&
     +	test_must_be_empty out
     +'
2:  50cd5a5b47 ! 2:  51a9c2d8a5 pack-redundant: new algorithm to find min packs
    @@ -67,7 +67,7 @@
         Original PR and discussions: https://github.com/jiangxin/git/pull/25
     
         Signed-off-by: Sun Chao <sunchao9@huawei.com>
    -    Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
    +    Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
         Signed-off-by: Junio C Hamano <gitster@pobox.com>
     
      diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
5:  b7ccdea1ad ! 3:  c5eb21c23c pack-redundant: remove unused functions
    @@ -6,14 +6,14 @@
         `pll_free`, etc.
     
         Signed-off-by: Sun Chao <sunchao9@huawei.com>
    -    Signed-off-by: Jiang Xin <worldhello.net@gmail.com>
    +    Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
         Signed-off-by: Junio C Hamano <gitster@pobox.com>
     
      diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
      --- a/builtin/pack-redundant.c
      +++ b/builtin/pack-redundant.c
     @@
    - 	size_t all_objects_size;
    + 	struct llist *all_objects;
      } *local_packs = NULL, *altodb_packs = NULL;
      
     -struct pll {
    @@ -105,7 +105,7 @@
     -	diff = llist_copy(list);
     -
     -	while (pl) {
    --		llist_sorted_difference_inplace(diff, pl->remaining_objects);
    +-		llist_sorted_difference_inplace(diff, pl->all_objects);
     -		if (diff->size == 0) { /* we're done */
     -			llist_free(diff);
     -			return 1;
3:  6338c6fad4 ! 4:  1acdd0af1e pack-redundant: rename pack_list.all_objects
    @@ -18,16 +18,7 @@
     +	struct llist *remaining_objects;
      } *local_packs = NULL, *altodb_packs = NULL;
      
    - struct pll {
    -@@
    - 	diff = llist_copy(list);
    - 
    - 	while (pl) {
    --		llist_sorted_difference_inplace(diff, pl->all_objects);
    -+		llist_sorted_difference_inplace(diff, pl->remaining_objects);
    - 		if (diff->size == 0) { /* we're done */
    - 			llist_free(diff);
    - 			return 1;
    + static struct llist_item *free_nodes;
     @@
      {
      	struct pack_list *pl_a = *((struct pack_list **)a);
4:  734f4d8a8b ! 5:  306d515cda pack-redundant: consistent sort method
    @@ -26,7 +26,7 @@
     +	size_t all_objects_size;
      } *local_packs = NULL, *altodb_packs = NULL;
      
    - struct pll {
    + static struct llist_item *free_nodes;
     @@
      	return ret;
      }
    @@ -42,20 +42,24 @@
     -	if (sz_a == sz_b)
     -		return 0;
     -	else if (sz_a < sz_b)
    -+	/* if have the same remaining_objects, big pack first */
    -+	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size)
    ++	if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
    ++		/* have the same remaining_objects, big pack first */
     +		if (pl_a->all_objects_size == pl_b->all_objects_size)
     +			return 0;
     +		else if (pl_a->all_objects_size < pl_b->all_objects_size)
     +			return 1;
     +		else
     +			return -1;
    -+
    -+	/* sort according to remaining objects, more remaining objects first */
    -+	if (pl_a->remaining_objects->size < pl_b->remaining_objects->size)
    ++	} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
    ++		/* sort by remaining objects, more objects first */
      		return 1;
    - 	else
    +-	else
    ++	} else {
      		return -1;
    ++	}
    + }
    + 
    + /* Sort pack_list, greater size of remaining_objects first */
     @@
      	for (n = 0, p = *pl; p; p = p->next)
      		ary[n++] = p;

## This reroll has the following commits:

Jiang Xin (3):
  t5323: test cases for git-pack-redundant
  pack-redundant: rename pack_list.all_objects
  pack-redundant: consistent sort method

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 221 +++++++++++++++-----------------------
 t/t5323-pack-redundant.sh | 207 +++++++++++++++++++++++++++++++++++
 2 files changed, 292 insertions(+), 136 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh

-- 
2.20.0.3.gc45e608566