From c01d938c5e6c3496f77e5acdc69efe9f269a0bfa Mon Sep 17 00:00:00 2001 From: George Dunlap Date: Wed, 16 Sep 2020 15:45:43 +0100 Subject: [PATCH] Remove "appendix" information from commit message ...when parsing emails, similar to `git am`. Add a new field, `BodyAppendix` to PatchHeader. Modify `scanMessageBody` to accept a boolean argument saying whether to separate out the appendix or not. Do this by keeping two string builders, and having it switch to the appendix builder when it finds a `---` line. Handling the newlines at the end as expected requires moving things around a bit. First, we were trimming space from the line once to decide whether the line was empty, and then trimming space again if we determined it wasn't empty. This only needs to be done once. Then, do all the trimming (both of whitespace and the prefix) first, before deciding what to do about the line. Request BodyAppendix separately when parsing a mail, but not a commit message. Add some tests to verify that it works as expected. Signed-off-by: George Dunlap --- gitdiff/patch_header.go | 58 ++++++++++++++++++++++++++---------- gitdiff/patch_header_test.go | 53 ++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 16 deletions(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index 01148f5..ae50870 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -44,6 +44,11 @@ type PatchHeader struct { // remove prefixes such as `Re: ` and `[PATCH v3 5/17]` from the // Title and place them here. SubjectPrefix string + + // If the preamble looks like an email, and it contains a `---` + // line, that line will be removed and everything after it will be + // placed in BodyAppendix. + BodyAppendix string } // Message returns the commit message for the header. The message consists of @@ -165,14 +170,23 @@ func ParsePatchDate(s string) (time.Time, error) { // formats used by git diff, git log, and git show and the UNIX mailbox format // used by git format-patch. // -// If ParsePatchHeader detect that it is handling an email, it will +// If ParsePatchHeader detects that it is handling an email, it will // remove extra content at the beginning of the title line, such as // `[PATCH]` or `Re:` in the same way that `git mailinfo` does. // SubjectPrefix will be set to the value of this removed string. // (`git mailinfo` is the core part of `git am` that pulls information -// out of an individual mail.) Unline `git mailinfo`, -// ParsePatchHeader does not at the moment remove commit states or -// other extraneous matter after a `---` line. +// out of an individual mail.) +// +// Additionally, if ParsePatchHeader detects that it's handling an +// email, it will remove a `---` line and put anything after it into +// BodyAppendix. +// +// Those wishing the effect of a plain `git am` should use +// `PatchHeader.Title + "\n" + PatchHeader.Body` (or +// `PatchHeader.Message()`). Those wishing to retain the subject +// prefix and appendix material should use `PatchHeader.SubjectPrefix +// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" + +// PatchHeader.BodyAppendix`. func ParsePatchHeader(s string) (*PatchHeader, error) { r := bufio.NewReader(strings.NewReader(s)) @@ -277,7 +291,8 @@ func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { h.Title = title if title != "" { - body := scanMessageBody(s, indent) + // Don't check for an appendix + body, _ := scanMessageBody(s, indent, false) if s.Err() != nil { return nil, s.Err() } @@ -309,29 +324,40 @@ func scanMessageTitle(s *bufio.Scanner) (title string, indent string) { return b.String(), indent } -func scanMessageBody(s *bufio.Scanner, indent string) string { - var b strings.Builder +func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (string, string) { + // Body and appendix + var body, appendix strings.Builder + c := &body var empty int for i := 0; s.Scan(); i++ { line := s.Text() - if strings.TrimSpace(line) == "" { + + line = strings.TrimRightFunc(line, unicode.IsSpace) + line = strings.TrimPrefix(line, indent) + + if line == "" { empty++ continue } - if b.Len() > 0 { - b.WriteByte('\n') + // If requested, parse out "appendix" information (often added + // by `git format-patch` and removed by `git am`). + if separateAppendix && c == &body && line == "---" { + c = &appendix + continue + } + + if c.Len() > 0 { + c.WriteByte('\n') if empty > 0 { - b.WriteByte('\n') + c.WriteByte('\n') } } empty = 0 - line = strings.TrimRightFunc(line, unicode.IsSpace) - line = strings.TrimPrefix(line, indent) - b.WriteString(line) + c.WriteString(line) } - return b.String() + return body.String(), appendix.String() } func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { @@ -372,7 +398,7 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { h.SubjectPrefix, h.Title = parseSubject(subject) s := bufio.NewScanner(msg.Body) - h.Body = scanMessageBody(s, "") + h.Body, h.BodyAppendix = scanMessageBody(s, "", true) if s.Err() != nil { return nil, s.Err() } diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 37f28fd..ca7e053 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -139,6 +139,7 @@ func TestParsePatchHeader(t *testing.T) { expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60)) expectedTitle := "A sample commit to test header parsing" expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line." + expectedBodyAppendix := "CC: Joe Smith " tests := map[string]struct { Input string @@ -221,6 +222,32 @@ CommitDate: Sat Apr 11 15:21:23 2020 -0700 Body: expectedBody, }, }, + "prettyAppendix": { + Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b +Author: Morton Haypenny +AuthorDate: Sat Apr 11 15:21:23 2020 -0700 +Commit: Morton Haypenny +CommitDate: Sat Apr 11 15:21:23 2020 -0700 + + A sample commit to test header parsing + + The medium format shows the body, which + may wrap on to multiple lines. + + Another body line. + --- + CC: Joe Smith +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Committer: expectedIdentity, + CommitterDate: expectedDate, + Title: expectedTitle, + Body: expectedBody + "\n---\n" + expectedBodyAppendix, + }, + }, "mailbox": { Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny @@ -240,6 +267,28 @@ Another body line. Body: expectedBody, }, }, + "mailboxAppendix": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +--- +CC: Joe Smith +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + BodyAppendix: expectedBodyAppendix, + }, + }, "unwrapTitle": { Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Author: Morton Haypenny @@ -333,6 +382,10 @@ Author: Morton Haypenny if exp.Body != act.Body { t.Errorf("incorrect parsed body:\n expected: %q\n actual: %q", exp.Body, act.Body) } + if exp.BodyAppendix != act.BodyAppendix { + t.Errorf("incorrect parsed body appendix:\n expected: %q\n actual: %q", + exp.BodyAppendix, act.BodyAppendix) + } }) } }