[PATCH v2 0/4] Check .gitmodules when using packfile URIs
To
git@vger.kernel.org
Cc
Jonathan Tan
avarab@gmail.com
gitster@pobox.com
From
Jonathan Tan
See Also
Prev
Date
2021-02-22 19:20:05 UTC
Here's v2. I think I've addressed all the review comments, including
passing the index-pack args as separate arguments (to avoid the
necessity to somehow encode in order to get rid of spaces), and by using
a custom error function instead of a specific option in fsck.

This applies on master. I mentioned earlier [1] that I was planning to
implement this on Ævar's fsck API improvements, but after looking at the
latest v2, I see that it omits patch 11 from v1 (which is the one I
need), so what I've done is to use a string check in the meantime.

[1] https://lore.kernel.org/git/20210219004612.1181920-1-jonathantanmy@google.com/

Jonathan Tan (4):
  http: allow custom index-pack args
  http-fetch: allow custom index-pack args
  fetch-pack: with packfile URIs, use index-pack arg
  fetch-pack: print and use dangling .gitmodules

 Documentation/git-http-fetch.txt |  10 ++-
 Documentation/git-index-pack.txt |   7 ++-
 builtin/index-pack.c             |  25 +++++++-
 builtin/receive-pack.c           |   2 +-
 fetch-pack.c                     | 103 ++++++++++++++++++++++++++-----
 fsck.c                           |   5 ++
 fsck.h                           |   2 +
 http-fetch.c                     |  20 +++++-
 http.c                           |  15 ++---
 http.h                           |  10 +--
 pack-write.c                     |   8 ++-
 pack.h                           |   2 +-
 t/t5550-http-fetch-dumb.sh       |   5 +-
 t/t5702-protocol-v2.sh           |  58 +++++++++++++++--
 14 files changed, 227 insertions(+), 45 deletions(-)

Range-diff against v1:
-:  ---------- > 1:  b7e376be16 http: allow custom index-pack args
1:  9fba6c9bcc ! 2:  57220ceb84 http-fetch: allow custom index-pack args
    @@ Documentation/git-http-fetch.txt: commit-id::
      
      --packfile=<hash>::
     -	Instead of a commit id on the command line (which is not expected in
    -+	For internal use only. Instead of a commit id on the command line (which is not expected in
    ++	For internal use only. Instead of a commit id on the command
    ++	line (which is not expected in
      	this case), 'git http-fetch' fetches the packfile directly at the given
      	URL and uses index-pack to generate corresponding .idx and .keep files.
      	The hash is used to determine the name of the temporary file and is
    @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
      		strvec_pushf(&cmd.args, "--packfile=%.*s",
      			     (int) the_hash_algo->hexsz,
      			     packfile_uris.items[i].string);
    -+		strvec_push(&cmd.args, "--index-pack-args=index-pack --stdin --keep");
    ++		strvec_push(&cmd.args, "--index-pack-arg=index-pack");
    ++		strvec_push(&cmd.args, "--index-pack-arg=--stdin");
    ++		strvec_push(&cmd.args, "--index-pack-arg=--keep");
      		strvec_push(&cmd.args, uri);
      		cmd.git_cmd = 1;
      		cmd.no_stdin = 1;
    @@ http-fetch.c: int cmd_main(int argc, const char **argv)
      	int packfile = 0;
      	int nongit;
      	struct object_id packfile_hash;
    -+	const char *index_pack_args = NULL;
    ++	struct strvec index_pack_args = STRVEC_INIT;
      
      	setup_git_directory_gently(&nongit);
      
    @@ http-fetch.c: int cmd_main(int argc, const char **argv)
      			packfile = 1;
      			if (parse_oid_hex(p, &packfile_hash, &end) || *end)
      				die(_("argument to --packfile must be a valid hash (got '%s')"), p);
    -+		} else if (skip_prefix(argv[arg], "--index-pack-args=", &p)) {
    -+			index_pack_args = p;
    ++		} else if (skip_prefix(argv[arg], "--index-pack-arg=", &p)) {
    ++			strvec_push(&index_pack_args, p);
      		}
      		arg++;
      	}
    @@ http-fetch.c: int cmd_main(int argc, const char **argv)
      
      	if (packfile) {
     -		fetch_single_packfile(&packfile_hash, argv[arg]);
    -+		struct strvec encoded = STRVEC_INIT;
    -+		char **raw;
    -+		int i;
    -+
    -+		if (!index_pack_args)
    ++		if (!index_pack_args.nr)
     +			die(_("--packfile requires --index-pack-args"));
     +
    -+		strvec_split(&encoded, index_pack_args);
    -+
    -+		CALLOC_ARRAY(raw, encoded.nr + 1);
    -+		for (i = 0; i < encoded.nr; i++)
    -+			raw[i] = url_percent_decode(encoded.v[i]);
    -+
     +		fetch_single_packfile(&packfile_hash, argv[arg],
    -+				      (const char **) raw);
    -+
    -+		for (i = 0; i < encoded.nr; i++)
    -+			free(raw[i]);
    -+		free(raw);
    -+		strvec_clear(&encoded);
    ++				      index_pack_args.v);
     +
      		return 0;
      	}
      
    -+	if (index_pack_args)
    ++	if (index_pack_args.nr)
     +		die(_("--index-pack-args can only be used with --packfile"));
     +
      	if (commits_on_stdin) {
    @@ t/t5550-http-fetch-dumb.sh: test_expect_success 'http-fetch --packfile' '
      	p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) &&
     -	git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
     +	git -C packfileclient http-fetch --packfile=$ARBITRARY \
    -+		--index-pack-args="index-pack --stdin --keep" "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
    ++		--index-pack-arg=index-pack --index-pack-arg=--stdin \
    ++		--index-pack-arg=--keep \
    ++		"$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
      
      	grep "^keep.[0-9a-f]\{16,\}$" out &&
      	cut -c6- out >packhash &&
2:  7c3244e79f ! 3:  aa87335464 fetch-pack: with packfile URIs, use index-pack arg
    @@ fetch-pack.c: static void write_promisor_file(const char *keep_name,
     - * Pass 1 as "only_packfile" if the pack received is the only pack in this
     - * fetch request (that is, if there were no packfile URIs provided).
     + * If packfile URIs were provided, pass a non-NULL pointer to index_pack_args.
    -+ * The string to pass as the --index-pack-args argument to http-fetch will be
    ++ * The strings to pass as the --index-pack-arg arguments to http-fetch will be
     + * stored there. (It must be freed by the caller.)
       */
      static int get_pack(struct fetch_pack_args *args,
      		    int xd[2], struct string_list *pack_lockfiles,
     -		    int only_packfile,
    -+		    char **index_pack_args,
    ++		    struct strvec *index_pack_args,
      		    struct ref **sought, int nr_sought)
      {
      	struct async demux;
    @@ fetch-pack.c: static int get_pack(struct fetch_pack_args *args,
      	}
      
     +	if (index_pack_args) {
    -+		struct strbuf joined = STRBUF_INIT;
     +		int i;
     +
    -+		for (i = 0; i < cmd.args.nr; i++) {
    -+			if (i)
    -+				strbuf_addch(&joined, ' ');
    -+			strbuf_addstr_urlencode(&joined, cmd.args.v[i],
    -+						is_rfc3986_unreserved);
    -+		}
    -+		*index_pack_args = strbuf_detach(&joined, NULL);
    ++		for (i = 0; i < cmd.args.nr; i++)
    ++			strvec_push(index_pack_args, cmd.args.v[i]);
     +	}
     +
      	cmd.in = demux.out;
    @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
      	int seen_ack = 0;
      	struct string_list packfile_uris = STRING_LIST_INIT_DUP;
      	int i;
    -+	char *index_pack_args = NULL;
    ++	struct strvec index_pack_args = STRVEC_INIT;
      
      	negotiator = &negotiator_alloc;
      	fetch_negotiator_init(r, negotiator);
    @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
      				die(_("git fetch-pack: fetch failed."));
      			do_check_stateless_delimiter(args, &reader);
      
    +@@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
    + 	}
    + 
    + 	for (i = 0; i < packfile_uris.nr; i++) {
    ++		int j;
    + 		struct child_process cmd = CHILD_PROCESS_INIT;
    + 		char packname[GIT_MAX_HEXSZ + 1];
    + 		const char *uri = packfile_uris.items[i].string +
     @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
      		strvec_pushf(&cmd.args, "--packfile=%.*s",
      			     (int) the_hash_algo->hexsz,
      			     packfile_uris.items[i].string);
    --		strvec_push(&cmd.args, "--index-pack-args=index-pack --stdin --keep");
    -+		strvec_pushf(&cmd.args, "--index-pack-args=%s", index_pack_args);
    +-		strvec_push(&cmd.args, "--index-pack-arg=index-pack");
    +-		strvec_push(&cmd.args, "--index-pack-arg=--stdin");
    +-		strvec_push(&cmd.args, "--index-pack-arg=--keep");
    ++		for (j = 0; j < index_pack_args.nr; j++)
    ++			strvec_pushf(&cmd.args, "--index-pack-arg=%s",
    ++				     index_pack_args.v[j]);
      		strvec_push(&cmd.args, uri);
      		cmd.git_cmd = 1;
      		cmd.no_stdin = 1;
    @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
      						 packname));
      	}
      	string_list_clear(&packfile_uris, 0);
    -+	FREE_AND_NULL(index_pack_args);
    ++	strvec_clear(&index_pack_args);
      
      	if (negotiator)
      		negotiator->release(negotiator);
3:  384c9d1c73 ! 4:  e8b18d02e6 fetch-pack: print and use dangling .gitmodules
    @@ Documentation/git-index-pack.txt: OPTIONS
      	Specifies the number of threads to spawn when resolving
     
      ## builtin/index-pack.c ##
    +@@ builtin/index-pack.c: static void show_pack_info(int stat_only)
    + 	}
    + }
    + 
    ++static int print_dangling_gitmodules(struct fsck_options *o,
    ++				     const struct object_id *oid,
    ++				     enum object_type object_type,
    ++				     int msg_type, const char *message)
    ++{
    ++	/*
    ++	 * NEEDSWORK: Plumb the MSG_ID (from fsck.c) here and use it
    ++	 * instead of relying on this string check.
    ++	 */
    ++	if (starts_with(message, "gitmodulesMissing")) {
    ++		printf("%s\n", oid_to_hex(oid));
    ++		return 0;
    ++	}
    ++	return fsck_error_function(o, oid, object_type, msg_type, message);
    ++}
    ++
    + int cmd_index_pack(int argc, const char **argv, const char *prefix)
    + {
    + 	int i, fix_thin_pack = 0, verify = 0, stat_only = 0;
     @@ builtin/index-pack.c: int cmd_index_pack(int argc, const char **argv, const char *prefix)
      	else
      		close(input_fd);
    @@ builtin/index-pack.c: int cmd_index_pack(int argc, const char **argv, const char
     -	if (do_fsck_object && fsck_finish(&fsck_options))
     -		die(_("fsck error in pack objects"));
     +	if (do_fsck_object) {
    -+		struct fsck_options fo = FSCK_OPTIONS_STRICT;
    ++		struct fsck_options fo = fsck_options;
     +
    -+		fo.print_dangling_gitmodules = 1;
    ++		fo.error_func = print_dangling_gitmodules;
     +		if (fsck_finish(&fo))
     +			die(_("fsck error in pack objects"));
     +	}
    @@ fetch-pack.c: static void write_promisor_file(const char *keep_name,
     +
      /*
       * If packfile URIs were provided, pass a non-NULL pointer to index_pack_args.
    -  * The string to pass as the --index-pack-args argument to http-fetch will be
    +  * The strings to pass as the --index-pack-arg arguments to http-fetch will be
     @@ fetch-pack.c: static void write_promisor_file(const char *keep_name,
      static int get_pack(struct fetch_pack_args *args,
      		    int xd[2], struct string_list *pack_lockfiles,
    - 		    char **index_pack_args,
    + 		    struct strvec *index_pack_args,
     -		    struct ref **sought, int nr_sought)
     +		    struct ref **sought, int nr_sought,
     +		    struct oidset *gitmodules_oids)
    @@ fetch-pack.c: static struct ref *do_fetch_pack(struct fetch_pack_args *args,
     @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
      	struct string_list packfile_uris = STRING_LIST_INIT_DUP;
      	int i;
    - 	char *index_pack_args = NULL;
    + 	struct strvec index_pack_args = STRVEC_INIT;
     +	struct oidset gitmodules_oids = OIDSET_INIT;
      
      	negotiator = &negotiator_alloc;
    @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
      		if (finish_command(&cmd))
     @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
      	string_list_clear(&packfile_uris, 0);
    - 	FREE_AND_NULL(index_pack_args);
    + 	strvec_clear(&index_pack_args);
      
     +	fsck_gitmodules_oids(&gitmodules_oids);
     +
    @@ fsck.c: int fsck_error_function(struct fsck_options *o,
      int fsck_finish(struct fsck_options *options)
      {
      	int ret = 0;
    -@@ fsck.c: int fsck_finish(struct fsck_options *options)
    - 		if (!buf) {
    - 			if (is_promisor_object(oid))
    - 				continue;
    --			ret |= report(options,
    --				      oid, OBJ_BLOB,
    --				      FSCK_MSG_GITMODULES_MISSING,
    --				      "unable to read .gitmodules blob");
    -+			if (options->print_dangling_gitmodules)
    -+				printf("%s\n", oid_to_hex(oid));
    -+			else
    -+				ret |= report(options,
    -+					      oid, OBJ_BLOB,
    -+					      FSCK_MSG_GITMODULES_MISSING,
    -+					      "unable to read .gitmodules blob");
    - 			continue;
    - 		}
    - 
     
      ## fsck.h ##
    -@@ fsck.h: struct fsck_options {
    - 	int *msg_type;
    - 	struct oidset skiplist;
    - 	kh_oid_map_t *object_names;
    -+
    -+	/*
    -+	 * If 1, print the hashes of missing .gitmodules blobs instead of
    -+	 * considering them to be errors.
    -+	 */
    -+	unsigned print_dangling_gitmodules:1;
    - };
    - 
    - #define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT }
     @@ fsck.h: int fsck_walk(struct object *obj, void *data, struct fsck_options *options);
      int fsck_object(struct object *obj, void *data, unsigned long size,
      	struct fsck_options *options);
    @@ pack.h: int verify_pack_index(struct packed_git *);
       * The "hdr" output buffer should be at least this big, which will handle sizes
     
      ## t/t5702-protocol-v2.sh ##
    +@@ t/t5702-protocol-v2.sh: test_expect_success 'part of packfile response provided as URI' '
    + 	test -f hfound &&
    + 	test -f h2found &&
    + 
    +-	# Ensure that there are exactly 6 files (3 .pack and 3 .idx).
    +-	ls http_child/.git/objects/pack/* >filelist &&
    ++	# Ensure that there are exactly 3 packfiles with associated .idx
    ++	ls http_child/.git/objects/pack/*.pack \
    ++	    http_child/.git/objects/pack/*.idx >filelist &&
    + 	test_line_count = 6 filelist
    + '
    + 
    +@@ t/t5702-protocol-v2.sh: test_expect_success 'packfile-uri with transfer.fsckobjects' '
    + 		-c fetch.uriprotocols=http,https \
    + 		clone "$HTTPD_URL/smart/http_parent" http_child &&
    + 
    +-	# Ensure that there are exactly 4 files (2 .pack and 2 .idx).
    +-	ls http_child/.git/objects/pack/* >filelist &&
    ++	# Ensure that there are exactly 2 packfiles with associated .idx
    ++	ls http_child/.git/objects/pack/*.pack \
    ++	    http_child/.git/objects/pack/*.idx >filelist &&
    + 	test_line_count = 4 filelist
    + '
    + 
     @@ t/t5702-protocol-v2.sh: test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object'
      	test_i18ngrep "invalid author/committer line - missing email" error
      '
    @@ t/t5702-protocol-v2.sh: test_expect_success 'packfile-uri with transfer.fsckobje
     +		-c fetch.uriprotocols=http,https \
     +		clone "$HTTPD_URL/smart/http_parent" http_child &&
     +
    -+	# Ensure that there are exactly 4 files (2 .pack and 2 .idx).
    -+	ls http_child/.git/objects/pack/* >filelist &&
    ++	# Ensure that there are exactly 2 packfiles with associated .idx
    ++	ls http_child/.git/objects/pack/*.pack \
    ++	    http_child/.git/objects/pack/*.idx >filelist &&
     +	test_line_count = 4 filelist
     +'
     +
4:  da0d7b38ae < -:  ---------- SQUASH??? test fix
-- 
2.30.0.617.g56c4b15f3c-goog