From 13b9e71b383485885c4823baa466c32511fd20bc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 3 Jun 2019 14:14:17 -0400 Subject: [PATCH 1/3] repo-settings: create core.featureAdoptionRate setting Several advanced config settings are highly recommended for clients using large repositories. Power users learn these one-by-one and enable them as they see fit. This could be made simpler, to allow more users to have access to these almost-always beneficial features (and more beneficial in larger repos). Create a 'core.featureAdoptionRate' config setting that allows integer values. This is a rating from 0 to 10 for the user's willingness to adopt new or experimental features that improve Git performance. The default is 0, meaning "don't change anything!" A value of 10 would mean "I'm willing for some behavior to change to get the best performance I can get, and can take experimental features in their first release." As we integrate this with more config settings, we will make this scale more clear. This config setting only changes the default values of other config settings. If the setting is given explicitly, then take the explicit value. This change adds these two defaults when core.featureAdoptionRate is at least three: * core.commitGraph=true * gc.writeCommitGraph=true The use of "three or higher" for these settings means that a value of 3 means "I'm willing to add optional features that can augment the data on disk in favor of improved performance, but those features should be stable after being included in multiple major releases." To centralize these config options and properly set the defaults, create a repo_settings that contains chars for each config variable. Use -1 as "unset", with 0 for false and 1 for true. The prepare_repo_settings() method ensures that this settings struct has been initialized, and avoids double-scanning the config settings. Signed-off-by: Derrick Stolee --- Documentation/config/core.txt | 22 +++++++++++++++++- Documentation/config/gc.txt | 4 ++-- Makefile | 1 + builtin/gc.c | 6 ++--- commit-graph.c | 7 +++--- repo-settings.c | 44 +++++++++++++++++++++++++++++++++++ repo-settings.h | 13 +++++++++++ repository.h | 3 +++ 8 files changed, 91 insertions(+), 9 deletions(-) create mode 100644 repo-settings.c create mode 100644 repo-settings.h diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 75538d27e7e06b..bfe647c76fa126 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -577,7 +577,8 @@ the `GIT_NOTES_REF` environment variable. See linkgit:git-notes[1]. core.commitGraph:: If true, then git will read the commit-graph file (if it exists) - to parse the graph structure of commits. Defaults to false. See + to parse the graph structure of commits. Defaults to false, unless + `core.featureAdoptionRate` is at least three. See linkgit:git-commit-graph[1] for more information. core.useReplaceRefs:: @@ -601,3 +602,22 @@ core.abbrev:: in your repository, which hopefully is enough for abbreviated object names to stay unique for some time. The minimum length is 4. + +core.featureAdoptionRate:: + Set an integer value on a scale from 0 to 10 describing your + desire to adopt new performance features. Defaults to 0. As + the value increases, features are enabled by changing the + default values of other config settings. If a config variable + is specified explicitly, the explicit value will override these + defaults: ++ +If the value is at least 3, then the following defaults are modified. +These represent relatively new features that have existed for multiple +major releases, and may present performance benefits. These benefits +depend on the amount and kind of data in your repo and how you use it. +The settings do not modify the user-facing output of porcelain commands. ++ +* `core.commitGraph=true` enables reading commit-graph files. ++ +* `gc.writeCommitGraph=true` eneables writing commit-graph files during +`git gc`. diff --git a/Documentation/config/gc.txt b/Documentation/config/gc.txt index 02b92b18b5c2cf..898263209c5e6d 100644 --- a/Documentation/config/gc.txt +++ b/Documentation/config/gc.txt @@ -63,8 +63,8 @@ gc.writeCommitGraph:: If true, then gc will rewrite the commit-graph file when linkgit:git-gc[1] is run. When using `git gc --auto` the commit-graph will be updated if housekeeping is - required. Default is false. See linkgit:git-commit-graph[1] - for details. + required. Default is false, unless `core.featureAdoptionRage` + is at least three. See linkgit:git-commit-graph[1] for details. gc.logExpiry:: If the file gc.log exists, then `git gc --auto` will print diff --git a/Makefile b/Makefile index 8a7e2353520ddd..2d3499d7ac203d 100644 --- a/Makefile +++ b/Makefile @@ -967,6 +967,7 @@ LIB_OBJS += refspec.o LIB_OBJS += ref-filter.o LIB_OBJS += remote.o LIB_OBJS += replace-object.o +LIB_OBJS += repo-settings.o LIB_OBJS += repository.o LIB_OBJS += rerere.o LIB_OBJS += resolve-undo.o diff --git a/builtin/gc.c b/builtin/gc.c index 8943bcc300d4a2..6281aad961a5e3 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -27,6 +27,7 @@ #include "pack-objects.h" #include "blob.h" #include "tree.h" +#include "repo-settings.h" #define FAILED_RUN "failed to run %s" @@ -41,7 +42,6 @@ static int aggressive_depth = 50; static int aggressive_window = 250; static int gc_auto_threshold = 6700; static int gc_auto_pack_limit = 50; -static int gc_write_commit_graph; static int detach_auto = 1; static timestamp_t gc_log_expire_time; static const char *gc_log_expire = "1.day.ago"; @@ -148,7 +148,6 @@ static void gc_config(void) git_config_get_int("gc.aggressivedepth", &aggressive_depth); git_config_get_int("gc.auto", &gc_auto_threshold); git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit); - git_config_get_bool("gc.writecommitgraph", &gc_write_commit_graph); git_config_get_bool("gc.autodetach", &detach_auto); git_config_get_expiry("gc.pruneexpire", &prune_expire); git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire); @@ -685,7 +684,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) clean_pack_garbage(); } - if (gc_write_commit_graph) + prepare_repo_settings(the_repository); + if (the_repository->settings->gc_write_commit_graph == 1) write_commit_graph_reachable(get_object_directory(), 0, !quiet && !daemonized); diff --git a/commit-graph.c b/commit-graph.c index 7c5e54875fdacd..b09c465a7aa00d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -16,6 +16,7 @@ #include "hashmap.h" #include "replace-object.h" #include "progress.h" +#include "repo-settings.h" #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */ #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ @@ -311,7 +312,6 @@ static void prepare_commit_graph_one(struct repository *r, const char *obj_dir) static int prepare_commit_graph(struct repository *r) { struct object_directory *odb; - int config_value; if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0)) die("dying as requested by the '%s' variable on commit-graph load!", @@ -321,9 +321,10 @@ static int prepare_commit_graph(struct repository *r) return !!r->objects->commit_graph; r->objects->commit_graph_attempted = 1; + prepare_repo_settings(r); + if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && - (repo_config_get_bool(r, "core.commitgraph", &config_value) || - !config_value)) + r->settings->core_commit_graph != 1) /* * This repository is not configured to use commit graphs, so * do not load one. (But report commit_graph_attempted anyway diff --git a/repo-settings.c b/repo-settings.c new file mode 100644 index 00000000000000..f7fc2a1959acd9 --- /dev/null +++ b/repo-settings.c @@ -0,0 +1,44 @@ +#include "cache.h" +#include "repository.h" +#include "config.h" +#include "repo-settings.h" + +#define UPDATE_DEFAULT(s,v) do { if (s == -1) { s = v; } } while(0) + +static int git_repo_config(const char *key, const char *value, void *cb) +{ + struct repo_settings *rs = (struct repo_settings *)cb; + + if (!strcmp(key, "core.featureadoptionrate")) { + int rate = git_config_int(key, value); + if (rate >= 3) { + UPDATE_DEFAULT(rs->core_commit_graph, 1); + UPDATE_DEFAULT(rs->gc_write_commit_graph, 1); + } + return 0; + } + if (!strcmp(key, "core.commitgraph")) { + rs->core_commit_graph = git_config_bool(key, value); + return 0; + } + if (!strcmp(key, "gc.writecommitgraph")) { + rs->gc_write_commit_graph = git_config_bool(key, value); + return 0; + } + + return 1; +} + +void prepare_repo_settings(struct repository *r) +{ + if (r->settings) + return; + + r->settings = xmalloc(sizeof(*r->settings)); + + /* Defaults */ + r->settings->core_commit_graph = -1; + r->settings->gc_write_commit_graph = -1; + + repo_config(r, git_repo_config, r->settings); +} diff --git a/repo-settings.h b/repo-settings.h new file mode 100644 index 00000000000000..7d44627bf09957 --- /dev/null +++ b/repo-settings.h @@ -0,0 +1,13 @@ +#ifndef REPO_SETTINGS_H +#define REPO_SETTINGS_H + +struct repo_settings { + int core_commit_graph; + int gc_write_commit_graph; +}; + +struct repository; + +void prepare_repo_settings(struct repository *r); + +#endif /* REPO_SETTINGS_H */ diff --git a/repository.h b/repository.h index 4fb6a5885f794d..352afc9cd8eda9 100644 --- a/repository.h +++ b/repository.h @@ -4,6 +4,7 @@ #include "path.h" struct config_set; +struct repo_settings; struct git_hash_algo; struct index_state; struct lock_file; @@ -72,6 +73,8 @@ struct repository { */ char *submodule_prefix; + struct repo_settings *settings; + /* Subsystems */ /* * Repository's config which contains key-value pairs from the usual From 4fe896e423b698ef60b208c613252b4847b6cd0a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 3 Jun 2019 15:13:59 -0400 Subject: [PATCH 2/3] repo-settings: use index.version=4 by default If a repo is large, it likely has many paths in its working directory. This means the index could be compressed using version 4. Set this as a default when core.featureAdoptionRate is at least three. Since the index version is written to a file, this is an excellent opportunity to test that the config settings are working correctly with the different precedence rules. Adapt a test from t1600-index.sh to verify the version is set properly with different values of index.version config, core.featureAdoptionRate, and GIT_INDEX_VERSION. Signed-off-by: Derrick Stolee --- Documentation/config/core.txt | 3 +++ Documentation/config/index.txt | 2 ++ read-cache.c | 12 +++++++----- repo-settings.c | 6 ++++++ repo-settings.h | 1 + t/t1600-index.sh | 34 +++++++++++++++++++++++++++++----- 6 files changed, 48 insertions(+), 10 deletions(-) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index bfe647c76fa126..865252aba906a9 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -621,3 +621,6 @@ The settings do not modify the user-facing output of porcelain commands. + * `gc.writeCommitGraph=true` eneables writing commit-graph files during `git gc`. ++ +* `index.version=4` uses prefix-compression to reduce the size of the +.git/index file. diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt index f1815030410689..98a88c30bec570 100644 --- a/Documentation/config/index.txt +++ b/Documentation/config/index.txt @@ -24,3 +24,5 @@ index.threads:: index.version:: Specify the version with which new index files should be initialized. This does not affect existing repositories. + If `core.featureAdoptionRate` is at least three, then the + default value is 4. diff --git a/read-cache.c b/read-cache.c index 22e7b9944e35d2..7fab8ff748ae6b 100644 --- a/read-cache.c +++ b/read-cache.c @@ -25,6 +25,7 @@ #include "fsmonitor.h" #include "thread-utils.h" #include "progress.h" +#include "repo-settings.h" /* Mask for the name length in ce_flags in the on-disk index */ @@ -1599,16 +1600,17 @@ struct cache_entry *refresh_cache_entry(struct index_state *istate, #define INDEX_FORMAT_DEFAULT 3 -static unsigned int get_index_format_default(void) +static unsigned int get_index_format_default(struct repository *r) { char *envversion = getenv("GIT_INDEX_VERSION"); char *endp; - int value; unsigned int version = INDEX_FORMAT_DEFAULT; if (!envversion) { - if (!git_config_get_int("index.version", &value)) - version = value; + prepare_repo_settings(r); + + if (r->settings->index_version >= 0) + version = r->settings->index_version; if (version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < version) { warning(_("index.version set, but the value is invalid.\n" "Using version %i"), INDEX_FORMAT_DEFAULT); @@ -2765,7 +2767,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, } if (!istate->version) { - istate->version = get_index_format_default(); + istate->version = get_index_format_default(the_repository); if (git_env_bool("GIT_TEST_SPLIT_INDEX", 0)) init_split_index(istate); } diff --git a/repo-settings.c b/repo-settings.c index f7fc2a1959acd9..5753153a84ffe6 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -14,6 +14,7 @@ static int git_repo_config(const char *key, const char *value, void *cb) if (rate >= 3) { UPDATE_DEFAULT(rs->core_commit_graph, 1); UPDATE_DEFAULT(rs->gc_write_commit_graph, 1); + UPDATE_DEFAULT(rs->index_version, 4); } return 0; } @@ -25,6 +26,10 @@ static int git_repo_config(const char *key, const char *value, void *cb) rs->gc_write_commit_graph = git_config_bool(key, value); return 0; } + if (!strcmp(key, "index.version")) { + rs->index_version = git_config_int(key, value); + return 0; + } return 1; } @@ -39,6 +44,7 @@ void prepare_repo_settings(struct repository *r) /* Defaults */ r->settings->core_commit_graph = -1; r->settings->gc_write_commit_graph = -1; + r->settings->index_version = -1; repo_config(r, git_repo_config, r->settings); } diff --git a/repo-settings.h b/repo-settings.h index 7d44627bf09957..b752dfe8b4f2ea 100644 --- a/repo-settings.h +++ b/repo-settings.h @@ -4,6 +4,7 @@ struct repo_settings { int core_commit_graph; int gc_write_commit_graph; + int index_version; }; struct repository; diff --git a/t/t1600-index.sh b/t/t1600-index.sh index 42962ed7d46f6d..74f56e2769dfc8 100755 --- a/t/t1600-index.sh +++ b/t/t1600-index.sh @@ -59,17 +59,41 @@ test_expect_success 'out of bounds index.version issues warning' ' ) ' -test_expect_success 'GIT_INDEX_VERSION takes precedence over config' ' +test_index_version () { + INDEX_VERSION_CONFIG=$1 && + REPO_ADOPTION_RATE=$2 && + ENV_VAR_VERSION=$3 + EXPECTED_OUTPUT_VERSION=$4 && ( rm -f .git/index && - GIT_INDEX_VERSION=4 && - export GIT_INDEX_VERSION && - git config --add index.version 2 && + rm -f .git/config && + if test "$INDEX_VERSION_CONFIG" -ne 0 + then + git config --add index.version $INDEX_VERSION_CONFIG + fi && + if test "$REPO_ADOPTION_RATE" -ne 0 + then + git config --add core.featureAdoptionRate $REPO_ADOPTION_RATE + fi && + if test "$ENV_VAR_VERSION" -ne 0 + then + GIT_INDEX_VERSION=$ENV_VAR_VERSION && + export GIT_INDEX_VERSION + else + unset GIT_INDEX_VERSION + fi && git add a 2>&1 && - echo 4 >expect && + echo $EXPECTED_OUTPUT_VERSION >expect && test-tool index-version <.git/index >actual && test_cmp expect actual ) +} + +test_expect_success 'index version config precedence' ' + test_index_version 2 0 4 4 && + test_index_version 2 3 0 2 && + test_index_version 0 3 0 4 && + test_index_version 0 3 2 2 ' test_done From d080065a9208852a7e551cc8bef7d326576c076d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 3 Jun 2019 15:56:00 -0400 Subject: [PATCH 3/3] repo-settings: pack.useSparse=true If a repo is large, then it probably has a very large working directory. In this case, a typical developer's edits usually impact many fewer paths than the full path set. The sparse treewalk algorithm is optimized for this case, speeding up 'git push' calls. Use pack.useSparse=true when core.featureAdoptionRate is at least five. This is the first setting where the feature has only been out for a single major version. This could be moved to the "at least three" category after another major version. Signed-off-by: Derrick Stolee --- Documentation/config/core.txt | 9 +++++++++ Documentation/config/pack.txt | 3 ++- builtin/pack-objects.c | 9 +++++---- repo-settings.c | 8 ++++++++ repo-settings.h | 1 + 5 files changed, 25 insertions(+), 5 deletions(-) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 865252aba906a9..60356102a8b41b 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -624,3 +624,12 @@ The settings do not modify the user-facing output of porcelain commands. + * `index.version=4` uses prefix-compression to reduce the size of the .git/index file. ++ +If the value is at least 5, then all of the defaults above are included, +plus the defaults below. These represent new features that present +significant performance benefits, but may not have been released for +multiple major versions. ++ +* `pack.useSparse=true` uses the sparse tree-walk algorithm, which is +optimized for enumerating objects during linkgit:git-push[1] from a +client machine. diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index 9cdcfa73247842..9c4f8ea9ff703a 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -112,7 +112,8 @@ pack.useSparse:: objects. This can have significant performance benefits when computing a pack to send a small change. However, it is possible that extra objects are added to the pack-file if the included - commits contain certain types of direct renames. + commits contain certain types of direct renames. Defaults to + false, unless `core.featureAdoptionRate` is at least five. pack.writeBitmaps (deprecated):: This is a deprecated synonym for `repack.writeBitmaps`. diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 41d7fc59830c09..f26b3f28926646 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -34,6 +34,7 @@ #include "dir.h" #include "midx.h" #include "trace2.h" +#include "repo-settings.h" #define IN_PACK(obj) oe_in_pack(&to_pack, obj) #define SIZE(obj) oe_size(&to_pack, obj) @@ -2707,10 +2708,6 @@ static int git_pack_config(const char *k, const char *v, void *cb) use_bitmap_index_default = git_config_bool(k, v); return 0; } - if (!strcmp(k, "pack.usesparse")) { - sparse = git_config_bool(k, v); - return 0; - } if (!strcmp(k, "pack.threads")) { delta_search_threads = git_config_int(k, v); if (delta_search_threads < 0) @@ -3330,6 +3327,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) read_replace_refs = 0; sparse = git_env_bool("GIT_TEST_PACK_SPARSE", 0); + prepare_repo_settings(the_repository); + if (!sparse && the_repository->settings->pack_use_sparse != -1) + sparse = the_repository->settings->pack_use_sparse; + reset_pack_idx_option(&pack_idx_opts); git_config(git_pack_config, NULL); diff --git a/repo-settings.c b/repo-settings.c index 5753153a84ffe6..c700edc286cd91 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -16,6 +16,9 @@ static int git_repo_config(const char *key, const char *value, void *cb) UPDATE_DEFAULT(rs->gc_write_commit_graph, 1); UPDATE_DEFAULT(rs->index_version, 4); } + if (rate >= 5) { + UPDATE_DEFAULT(rs->pack_use_sparse, 1); + } return 0; } if (!strcmp(key, "core.commitgraph")) { @@ -26,6 +29,10 @@ static int git_repo_config(const char *key, const char *value, void *cb) rs->gc_write_commit_graph = git_config_bool(key, value); return 0; } + if (!strcmp(key, "pack.usesparse")) { + rs->pack_use_sparse = git_config_bool(key, value); + return 0; + } if (!strcmp(key, "index.version")) { rs->index_version = git_config_int(key, value); return 0; @@ -44,6 +51,7 @@ void prepare_repo_settings(struct repository *r) /* Defaults */ r->settings->core_commit_graph = -1; r->settings->gc_write_commit_graph = -1; + r->settings->pack_use_sparse = -1; r->settings->index_version = -1; repo_config(r, git_repo_config, r->settings); diff --git a/repo-settings.h b/repo-settings.h index b752dfe8b4f2ea..1151c2193a5e0b 100644 --- a/repo-settings.h +++ b/repo-settings.h @@ -4,6 +4,7 @@ struct repo_settings { int core_commit_graph; int gc_write_commit_graph; + int pack_use_sparse; int index_version; };