Skip to content

Commit 2063122

Browse files
committed
fast-export: do automatic reencoding of commit messages only if requested
Automatic re-encoding of commit messages (and dropping of the encoding header) hurts attempts to do reversible history rewrites (e.g. sha1sum <-> sha256sum transitions, some subtree rewrites), and seems inconsistent with the general principle followed elsewhere in fast-export of requiring explicit user requests to modify the output (e.g. --signed-tags=strip, --tag-of-filtered-object=rewrite). Add a --reencode flag that the user can use to specify, and like other fast-export flags, default it to 'abort'. Signed-off-by: Elijah Newren <[email protected]>
1 parent 83b3656 commit 2063122

File tree

2 files changed

+66
-6
lines changed

2 files changed

+66
-6
lines changed

builtin/fast-export.c

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ static const char *fast_export_usage[] = {
3333
static int progress;
3434
static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT;
3535
static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT;
36+
static enum { REENCODE_ABORT, REENCODE_PLEASE, REENCODE_NEVER } reencode_mode = REENCODE_ABORT;
3637
static int fake_missing_tagger;
3738
static int use_done_feature;
3839
static int no_data;
@@ -77,6 +78,20 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt,
7778
return 0;
7879
}
7980

81+
static int parse_opt_reencode_mode(const struct option *opt,
82+
const char *arg, int unset)
83+
{
84+
if (unset || !strcmp(arg, "abort"))
85+
reencode_mode = REENCODE_ABORT;
86+
else if (!strcmp(arg, "yes"))
87+
reencode_mode = REENCODE_PLEASE;
88+
else if (!strcmp(arg, "no"))
89+
reencode_mode = REENCODE_NEVER;
90+
else
91+
return error("Unknown reencoding mode: %s", arg);
92+
return 0;
93+
}
94+
8095
static struct decoration idnums;
8196
static uint32_t last_idnum;
8297

@@ -633,10 +648,21 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
633648
}
634649

635650
mark_next_object(&commit->object);
636-
if (anonymize)
651+
if (anonymize) {
637652
reencoded = anonymize_commit_message(message);
638-
else if (!is_encoding_utf8(encoding))
639-
reencoded = reencode_string(message, "UTF-8", encoding);
653+
} else if (encoding) {
654+
switch(reencode_mode) {
655+
case REENCODE_PLEASE:
656+
reencoded = reencode_string(message, "UTF-8", encoding);
657+
break;
658+
case REENCODE_NEVER:
659+
break;
660+
case REENCODE_ABORT:
661+
die("Encountered commit-specific encoding %s in commit "
662+
"%s; use --reencode=<mode> to handle it",
663+
encoding, oid_to_hex(&commit->object.oid));
664+
}
665+
}
640666
if (!commit->parents)
641667
printf("reset %s\n", refname);
642668
printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
@@ -1091,6 +1117,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
10911117
OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
10921118
N_("select handling of tags that tag filtered objects"),
10931119
parse_opt_tag_of_filtered_mode),
1120+
OPT_CALLBACK(0, "reencode", &reencode_mode, N_("mode"),
1121+
N_("select handling of commit messages in an alternate encoding"),
1122+
parse_opt_reencode_mode),
10941123
OPT_STRING(0, "export-marks", &export_filename, N_("file"),
10951124
N_("Dump marks to this file")),
10961125
OPT_STRING(0, "import-marks", &import_filename, N_("file"),

t/t9350-fast-export.sh

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,14 @@ test_expect_success 'fast-export --show-original-ids | git fast-import' '
9494
test $MUSS = $(git rev-parse --verify refs/tags/muss)
9595
'
9696

97-
test_expect_success 'iso-8859-7' '
97+
test_expect_success 'reencoding iso-8859-7' '
9898
9999
test_when_finished "git reset --hard HEAD~1" &&
100100
test_config i18n.commitencoding iso-8859-7 &&
101101
test_tick &&
102102
echo rosten >file &&
103103
git commit -s -F "$TEST_DIRECTORY/t9350/simple-iso-8859-7-commit-message.txt" file &&
104-
git fast-export wer^..wer >iso-8859-7.fi &&
104+
git fast-export --reencode=yes wer^..wer >iso-8859-7.fi &&
105105
sed "s/wer/i18n/" iso-8859-7.fi |
106106
(cd new &&
107107
git fast-import &&
@@ -120,13 +120,44 @@ test_expect_success 'iso-8859-7' '
120120
! grep ^encoding actual)
121121
'
122122

123+
test_expect_success 'aborting on iso-8859-7' '
124+
125+
test_when_finished "git reset --hard HEAD~1" &&
126+
test_config i18n.commitencoding iso-8859-7 &&
127+
echo rosten >file &&
128+
git commit -s -F "$TEST_DIRECTORY/t9350/simple-iso-8859-7-commit-message.txt" file &&
129+
test_must_fail git fast-export --reencode=abort wer^..wer >iso-8859-7.fi
130+
'
131+
132+
test_expect_success 'preserving iso-8859-7' '
133+
134+
test_when_finished "git reset --hard HEAD~1" &&
135+
test_config i18n.commitencoding iso-8859-7 &&
136+
echo rosten >file &&
137+
git commit -s -F "$TEST_DIRECTORY/t9350/simple-iso-8859-7-commit-message.txt" file &&
138+
git fast-export --reencode=no wer^..wer >iso-8859-7.fi &&
139+
sed "s/wer/i18n-no-recoding/" iso-8859-7.fi |
140+
(cd new &&
141+
git fast-import &&
142+
# The commit object, if not re-encoded, is 240 bytes.
143+
# Removing the "encoding iso-8859-7\n" header would drops 20
144+
# bytes. Re-encoding the Pi character from \xF0 in
145+
# iso-8859-7 to \xCF\x80 in utf-8 would add a byte. I would
146+
# grep for the # specific bytes, but Windows lamely does not
147+
# allow that, so just search for the expected size.
148+
test 240 -eq "$(git cat-file -s i18n-no-recoding)" &&
149+
# Also make sure the commit has the "encoding" header
150+
git cat-file commit i18n-no-recoding >actual &&
151+
grep ^encoding actual)
152+
'
153+
123154
test_expect_success 'encoding preserved if reencoding fails' '
124155
125156
test_when_finished "git reset --hard HEAD~1" &&
126157
test_config i18n.commitencoding iso-8859-7 &&
127158
echo rosten >file &&
128159
git commit -s -F "$TEST_DIRECTORY/t9350/broken-iso-8859-7-commit-message.txt" file &&
129-
git fast-export wer^..wer >iso-8859-7.fi &&
160+
git fast-export --reencode=yes wer^..wer >iso-8859-7.fi &&
130161
sed "s/wer/i18n-invalid/" iso-8859-7.fi |
131162
(cd new &&
132163
git fast-import &&

0 commit comments

Comments
 (0)