Skip to content

rev-list and list-objects blob filtering #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Documentation/git-rev-list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,13 @@ SYNOPSIS
[ --fixed-strings | -F ]
[ --date=<format>]
[ [ --objects | --objects-edge | --objects-edge-aggressive ]
[ --unpacked ] ]
[ --unpacked ]
[ [ --filter-omit-all-blobs |
--filter-omit-large-blobs=<n>[kmg] |
--filter-use-blob=<blob-ish> |
--filter-use-path=<path> ]
[ --filter-print-manifest ] ] ]
[ --filter-relax ]
[ --pretty | --header ]
[ --bisect ]
[ --bisect-vars ]
Expand Down
26 changes: 26 additions & 0 deletions Documentation/rev-list-options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,32 @@ ifdef::git-rev-list[]
--unpacked::
Only useful with `--objects`; print the object IDs that are not
in packs.

--filter-omit-all-blobs::
Only useful with one of the `--objects*`; omits all blobs from
the printed list of objects.

--filter-omit-large-blobs=<n>[kmg]::
Only useful with one of the `--objects*`; omits blobs larger than
n bytes from the printed list of objects. May optionally be
followed by 'k', 'm', or 'g' units. Value may be zero. Special
files (matching ".git*") are always included, regardless of size.

--filter-use-blob=<blob-ish>::
--filter-use-path=<path>::
Only useful with one of the `--objects*`; uses a sparse-checkout
specification contained in the given object or file to filter the
result to only contain blobs referenced by such a sparse-checkout.

--filter-print-manifest::
Only useful with one of the above `--filter*`; prints a manifest
of the omitted objects. Object IDs are prefixed with a ``~''
character. The object size is printed after the ID.

--filter-relax::
Relax consistency checking for missing blobs. Do not warn of
missing blobs during normal (non-filtering) object traversal
following an earlier partial/narrow clone or fetch.
endif::git-rev-list[]

--no-walk[=(sorted|unsorted)]::
Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,9 @@ LIB_OBJS += levenshtein.o
LIB_OBJS += line-log.o
LIB_OBJS += line-range.o
LIB_OBJS += list-objects.o
LIB_OBJS += list-objects-filter-all.o
LIB_OBJS += list-objects-filter-large.o
LIB_OBJS += list-objects-filter-sparse.o
LIB_OBJS += ll-merge.o
LIB_OBJS += lockfile.o
LIB_OBJS += log-tree.o
Expand All @@ -815,7 +818,9 @@ LIB_OBJS += notes-cache.o
LIB_OBJS += notes-merge.o
LIB_OBJS += notes-utils.o
LIB_OBJS += object.o
LIB_OBJS += object-filter.o
LIB_OBJS += oidset.o
LIB_OBJS += oidset2.o
LIB_OBJS += pack-bitmap.o
LIB_OBJS += pack-bitmap-write.o
LIB_OBJS += pack-check.o
Expand Down
50 changes: 48 additions & 2 deletions builtin/rev-list.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ static const char rev_list_usage[] =

static struct progress *progress;
static unsigned progress_counter;
static struct object_filter_options filter_options;

static void finish_commit(struct commit *commit, void *data);
static void show_commit(struct commit *commit, void *data)
Expand Down Expand Up @@ -179,8 +180,20 @@ static void finish_commit(struct commit *commit, void *data)
static void finish_object(struct object *obj, const char *name, void *cb_data)
{
struct rev_list_info *info = cb_data;
if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid))
if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) {
if (filter_options.relax) {
/*
* Relax consistency checks to not complain about
* omitted objects (presumably caused by use of
* the previous use of the 'filter-objects' feature).
*
* Note that this is independent of any filtering that
* we are doing in this run.
*/
return;
}
die("missing blob object '%s'", oid_to_hex(&obj->oid));
}
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
parse_object(&obj->oid);
}
Expand All @@ -200,6 +213,17 @@ static void show_edge(struct commit *commit)
printf("-%s\n", oid_to_hex(&commit->object.oid));
}

static void print_omitted_object(int i, int i_limit, struct oidset2_entry *e, void *cb_data)
{
/* struct rev_list_info *info = cb_data; */
const char *tn = typename(e->type);

if (e->object_length == -1)
printf("~%s %s\n", oid_to_hex(&e->oid), tn);
else
printf("~%s %s %"PRIuMAX"\n", oid_to_hex(&e->oid), tn, e->object_length);
}

static void print_var_str(const char *var, const char *val)
{
printf("%s='%s'\n", var, val);
Expand Down Expand Up @@ -277,6 +301,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
int bisect_find_all = 0;
int use_bitmap_index = 0;
const char *show_progress = NULL;
oidset2_foreach_cb fn_filter_print = NULL;

if (argc == 2 && !strcmp(argv[1], "-h"))
usage(rev_list_usage);
Expand Down Expand Up @@ -333,6 +358,14 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
show_progress = arg;
continue;
}
if (object_filter_hand_parse_arg(&filter_options, arg, 1, 1)) {
if (!revs.blob_objects)
die(_("object filtering requires --objects"));
if (filter_options.use_blob &&
!oidcmp(&filter_options.sparse_oid, &null_oid))
die(_("invalid sparse value"));
continue;
}
usage(rev_list_usage);

}
Expand All @@ -357,6 +390,11 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
if (revs.show_notes)
die(_("rev-list does not support display of notes"));

if (object_filter_enabled(&filter_options)) {
if (use_bitmap_index)
die(_("cannot combine --use-bitmap-index with object filtering"));
}

save_commit_buffer = (revs.verbose_header ||
revs.grep_filter.pattern_list ||
revs.grep_filter.header_list);
Expand Down Expand Up @@ -401,7 +439,15 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
return show_bisect_vars(&info, reaches, all);
}

traverse_commit_list(&revs, show_commit, show_object, &info);
if (filter_options.print_manifest)
fn_filter_print = print_omitted_object;

if (object_filter_enabled(&filter_options))
traverse_commit_list_filtered(&filter_options, &revs,
show_commit, show_object,
fn_filter_print, &info);
else
traverse_commit_list(&revs, show_commit, show_object, &info);

stop_progress(&progress);

Expand Down
53 changes: 51 additions & 2 deletions dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,11 @@ static void invalidate_directory(struct untracked_cache *uc,
dir->dirs[i]->recurse = 0;
}

static int add_excludes_from_buffer(
char *buf, size_t size,
const char *base, int baselen,
struct exclude_list *el);

/*
* Given a file with name "fname", read it (either from disk, or from
* an index if 'istate' is non-null), parse it and store the
Expand All @@ -754,9 +759,9 @@ static int add_excludes(const char *fname, const char *base, int baselen,
struct sha1_stat *sha1_stat)
{
struct stat st;
int fd, i, lineno = 1;
int fd;
size_t size = 0;
char *buf, *entry;
char *buf;

fd = open(fname, O_RDONLY);
if (fd < 0 || fstat(fd, &st) < 0) {
Expand Down Expand Up @@ -813,6 +818,18 @@ static int add_excludes(const char *fname, const char *base, int baselen,
}
}

add_excludes_from_buffer(buf, size, base, baselen, el);
return 0;
}

static int add_excludes_from_buffer(
char *buf, size_t size,
const char *base, int baselen,
struct exclude_list *el)
{
int i, lineno = 1;
char *entry;

el->filebuf = buf;

if (skip_utf8_bom(&buf, size))
Expand Down Expand Up @@ -841,6 +858,38 @@ int add_excludes_from_file_to_list(const char *fname, const char *base,
return add_excludes(fname, base, baselen, el, istate, NULL);
}

int add_excludes_from_blob_to_list(
struct object_id *oid,
const char *base, int baselen,
struct exclude_list *el)
{
char *buf;
unsigned long size;
enum object_type type;

buf = read_sha1_file(oid->hash, &type, &size);
if (!buf)
return -1;

if (type != OBJ_BLOB) {
free(buf);
return -1;
}

if (size == 0) {
free(buf);
return 0;
}

if (buf[size - 1] != '\n') {
buf = xrealloc(buf, st_add(size, 1));
buf[size++] = '\n';
}

add_excludes_from_buffer(buf, size, base, baselen, el);
return 0;
}

struct exclude_list *add_exclude_list(struct dir_struct *dir,
int group_type, const char *src)
{
Expand Down
4 changes: 4 additions & 0 deletions dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ extern struct exclude_list *add_exclude_list(struct dir_struct *dir,
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
struct exclude_list *el, struct index_state *istate);
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
extern int add_excludes_from_blob_to_list(
struct object_id *oid,
const char *base, int baselen,
struct exclude_list *el);
extern void parse_exclude_pattern(const char **string, int *patternlen, unsigned *flags, int *nowildcardlen);
extern void add_exclude(const char *string, const char *base,
int baselen, struct exclude_list *el, int srcpos);
Expand Down
85 changes: 85 additions & 0 deletions list-objects-filter-all.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#include "cache.h"
#include "dir.h"
#include "tag.h"
#include "commit.h"
#include "tree.h"
#include "blob.h"
#include "diff.h"
#include "tree-walk.h"
#include "revision.h"
#include "list-objects.h"
#include "list-objects-filter-all.h"

/*
* A filter for list-objects to omit ALL blobs from the traversal.
*/
struct filter_omit_all_blobs_data {
struct oidset2 omits;
};

static list_objects_filter_result filter_omit_all_blobs(
list_objects_filter_type filter_type,
struct object *obj,
const char *pathname,
const char *filename,
void *filter_data_)
{
struct filter_omit_all_blobs_data *filter_data = filter_data_;
int64_t object_length = -1;
unsigned long s;
enum object_type t;

switch (filter_type) {
default:
die("unkown filter_type");
return LOFR_ZERO;

case LOFT_BEGIN_TREE:
assert(obj->type == OBJ_TREE);
/* always include all tree objects */
return LOFR_MARK_SEEN | LOFR_SHOW;

case LOFT_END_TREE:
assert(obj->type == OBJ_TREE);
return LOFR_ZERO;

case LOFT_BLOB:
assert(obj->type == OBJ_BLOB);
assert((obj->flags & SEEN) == 0);

/*
* Since we always omit all blobs (and never provisionally omit),
* we should never see a blob twice.
*/
assert(!oidset2_contains(&filter_data->omits, &obj->oid));

t = sha1_object_info(obj->oid.hash, &s);
assert(t == OBJ_BLOB);
object_length = (int64_t)((uint64_t)(s));

/* Insert OID into the omitted list. No need for a pathname. */
oidset2_insert(&filter_data->omits, &obj->oid, t, object_length,
NULL);
return LOFR_MARK_SEEN; /* but not LOFR_SHOW (hard omit) */
}
}

void traverse_commit_list_omit_all_blobs(
struct rev_info *revs,
show_commit_fn show_commit,
show_object_fn show_object,
oidset2_foreach_cb print_omitted_object,
void *ctx_data)
{
struct filter_omit_all_blobs_data d;

memset(&d, 0, sizeof(d));

traverse_commit_list_worker(revs, show_commit, show_object, ctx_data,
filter_omit_all_blobs, &d);

if (print_omitted_object)
oidset2_foreach(&d.omits, print_omitted_object, ctx_data);

oidset2_clear(&d.omits);
}
18 changes: 18 additions & 0 deletions list-objects-filter-all.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef LIST_OBJECTS_FILTER_ALL_H
#define LIST_OBJECTS_FILTER_ALL_H

#include "oidset2.h"

/*
* A filter for list-objects to omit ALL blobs
* from the traversal.
*/
void traverse_commit_list_omit_all_blobs(
struct rev_info *revs,
show_commit_fn show_commit,
show_object_fn show_object,
oidset2_foreach_cb print_omitted_object,
void *ctx_data);

#endif /* LIST_OBJECTS_FILTER_ALL_H */

Loading