From 2c572838aa9c54f0ee609b36f01633ce637fe7fb Mon Sep 17 00:00:00 2001 From: Henrique Pimentel Date: Tue, 2 Apr 2024 11:19:45 +0100 Subject: [PATCH 1/9] Fix CSV rendering (#29663) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #29663 Previously, when a CSV file was larger than the limit, the render function lost its function to render the code. There were also multiple reads to the file, in order to determine its size and render or pre-render. This solution implements a new config variable MAX_ROWS, which corresponds to the “Maximum allowed rows to render CSV files. (0 for no limit)” and rewrites the Render function for CSV files in markup module. Now the render function only reads the file once, having MAX_FILE_SIZE+1 as a reader limit and MAX_ROWS as a row limit. When the file is larger than MAX_FILE_SIZE or has more rows than MAX_ROWS, it only renders until the limit, and displays a user-friendly warning informing that the rendered data is not complete, in the user's language. The warning: ![image](https://s3.amazonaws.com/i.snag.gy/ieROGx.jpg) --- custom/conf/app.example.ini | 3 ++ modules/markup/csv/csv.go | 82 +++++++++++----------------------- modules/markup/csv/csv_test.go | 10 ----- modules/setting/ui.go | 3 ++ 4 files changed, 32 insertions(+), 66 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index afbd20eb568e9..47a7aec10d428 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1334,6 +1334,9 @@ LEVEL = Info ;; ;; Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit). ;MAX_FILE_SIZE = 524288 +;; +;; Maximum allowed rows to render CSV files. (Set to 0 for no limit) +;MAX_ROWS = 2000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 1dd26eb8acdba..5fc3c865eca2a 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -5,8 +5,6 @@ package markup import ( "bufio" - "bytes" - "fmt" "html" "io" "regexp" @@ -15,6 +13,7 @@ import ( "code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/translation" ) func init() { @@ -40,6 +39,8 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, + {Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(`ui top attached warning message`)}, + {Element: "a", AllowAttr: "href", Regexp: regexp.MustCompile(`\?display=source`)}, } } @@ -80,79 +81,32 @@ func writeField(w io.Writer, element, class, field string) error { // Render implements markup.Renderer func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { tmpBlock := bufio.NewWriter(output) + warnBlock := bufio.NewWriter(tmpBlock) maxSize := setting.UI.CSV.MaxFileSize + maxRows := setting.UI.CSV.MaxRows - if maxSize == 0 { - return r.tableRender(ctx, input, tmpBlock) + if maxSize != 0 { + input = io.LimitReader(input, maxSize+1) } - rawBytes, err := io.ReadAll(io.LimitReader(input, maxSize+1)) - if err != nil { - return err - } - - if int64(len(rawBytes)) <= maxSize { - return r.tableRender(ctx, bytes.NewReader(rawBytes), tmpBlock) - } - return r.fallbackRender(io.MultiReader(bytes.NewReader(rawBytes), input), tmpBlock) -} - -func (Renderer) fallbackRender(input io.Reader, tmpBlock *bufio.Writer) error { - _, err := tmpBlock.WriteString("
")
-	if err != nil {
-		return err
-	}
-
-	scan := bufio.NewScanner(input)
-	scan.Split(bufio.ScanRunes)
-	for scan.Scan() {
-		switch scan.Text() {
-		case `&`:
-			_, err = tmpBlock.WriteString("&")
-		case `'`:
-			_, err = tmpBlock.WriteString("'") // "'" is shorter than "'" and apos was not in HTML until HTML5.
-		case `<`:
-			_, err = tmpBlock.WriteString("<")
-		case `>`:
-			_, err = tmpBlock.WriteString(">")
-		case `"`:
-			_, err = tmpBlock.WriteString(""") // """ is shorter than """.
-		default:
-			_, err = tmpBlock.Write(scan.Bytes())
-		}
-		if err != nil {
-			return err
-		}
-	}
-	if err = scan.Err(); err != nil {
-		return fmt.Errorf("fallbackRender scan: %w", err)
-	}
-
-	_, err = tmpBlock.WriteString("
") - if err != nil { - return err - } - return tmpBlock.Flush() -} - -func (Renderer) tableRender(ctx *markup.RenderContext, input io.Reader, tmpBlock *bufio.Writer) error { rd, err := csv.CreateReaderAndDetermineDelimiter(ctx, input) if err != nil { return err } - if _, err := tmpBlock.WriteString(``); err != nil { return err } + row := 1 for { fields, err := rd.Read() - if err == io.EOF { + if err == io.EOF || (row >= maxRows && maxRows != 0) { break } if err != nil { continue } + if _, err := tmpBlock.WriteString(""); err != nil { return err } @@ -174,6 +128,22 @@ func (Renderer) tableRender(ctx *markup.RenderContext, input io.Reader, tmpBlock row++ } + + // Check if maxRows or maxSize is reached, and if true, warn. + if (row >= maxRows && maxRows != 0) || (rd.InputOffset() >= maxSize && maxSize != 0) { + locale := ctx.Ctx.Value(translation.ContextKey).(translation.Locale) + + // Construct the HTML string + warn := `
` + locale.TrString("repo.file_too_large") + ` ` + locale.TrString("repo.file_view_source") + `
` + + // Write the HTML string to the output + if _, err := warnBlock.WriteString(warn); err != nil { + return err + } + if err = warnBlock.Flush(); err != nil { + return err + } + } if _, err = tmpBlock.WriteString("
"); err != nil { return err } diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go index 3d12be477c745..8c07184b21eeb 100644 --- a/modules/markup/csv/csv_test.go +++ b/modules/markup/csv/csv_test.go @@ -4,8 +4,6 @@ package markup import ( - "bufio" - "bytes" "strings" "testing" @@ -31,12 +29,4 @@ func TestRenderCSV(t *testing.T) { assert.NoError(t, err) assert.EqualValues(t, v, buf.String()) } - - t.Run("fallbackRender", func(t *testing.T) { - var buf bytes.Buffer - err := render.fallbackRender(strings.NewReader("1,\n2,"), bufio.NewWriter(&buf)) - assert.NoError(t, err) - want := "
1,<a>\n2,<b>
" - assert.Equal(t, want, buf.String()) - }) } diff --git a/modules/setting/ui.go b/modules/setting/ui.go index 93855bca07b2f..18ff66ba7faff 100644 --- a/modules/setting/ui.go +++ b/modules/setting/ui.go @@ -52,6 +52,7 @@ var UI = struct { CSV struct { MaxFileSize int64 + MaxRows int } `ini:"ui.csv"` Admin struct { @@ -107,8 +108,10 @@ var UI = struct { }, CSV: struct { MaxFileSize int64 + MaxRows int }{ MaxFileSize: 524288, + MaxRows: 2000, }, Admin: struct { UserPagingNum int From 96c5e3cc68ee0a488f6955217bdf759f7af785b8 Mon Sep 17 00:00:00 2001 From: Henrique Pimentel <66185935+HenriquerPimentel@users.noreply.github.com> Date: Wed, 3 Apr 2024 21:17:28 +0100 Subject: [PATCH 2/9] Update warning message design --- modules/markup/csv/csv.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 5fc3c865eca2a..439dd54c801ce 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -39,7 +39,7 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, - {Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(`ui top attached warning message`)}, + {Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(`tw-flex tw-justify-center tw-items-center`)}, {Element: "a", AllowAttr: "href", Regexp: regexp.MustCompile(`\?display=source`)}, } } @@ -134,7 +134,7 @@ func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.W locale := ctx.Ctx.Value(translation.ContextKey).(translation.Locale) // Construct the HTML string - warn := `
` + warn := `
` + locale.TrString("repo.file_too_large") + ` ` + locale.TrString("repo.file_view_source") + `
` // Write the HTML string to the output if _, err := warnBlock.WriteString(warn); err != nil { From 1196da3cee43c05e84989decdf6ea47869fb476b Mon Sep 17 00:00:00 2001 From: Henrique Pimentel Date: Wed, 10 Apr 2024 14:59:28 +0100 Subject: [PATCH 3/9] Update MaxRow --- custom/conf/app.example.ini | 2 +- modules/markup/csv/csv.go | 2 +- modules/setting/ui.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 47a7aec10d428..eed132a5c8ba5 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1336,7 +1336,7 @@ LEVEL = Info ;MAX_FILE_SIZE = 524288 ;; ;; Maximum allowed rows to render CSV files. (Set to 0 for no limit) -;MAX_ROWS = 2000 +;MAX_ROWS = 5000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 439dd54c801ce..0ec75be43767f 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -111,7 +111,7 @@ func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.W return err } element := "td" - if row == 1 { + if row == 0 { element = "th" } if err := writeField(tmpBlock, element, "line-num", strconv.Itoa(row)); err != nil { diff --git a/modules/setting/ui.go b/modules/setting/ui.go index 18ff66ba7faff..669d792c14bbc 100644 --- a/modules/setting/ui.go +++ b/modules/setting/ui.go @@ -111,7 +111,7 @@ var UI = struct { MaxRows int }{ MaxFileSize: 524288, - MaxRows: 2000, + MaxRows: 5000, }, Admin: struct { UserPagingNum int From 0c2f7777be083ce59eeda4e20bf4c392c10710db Mon Sep 17 00:00:00 2001 From: Henrique Pimentel <66185935+HenriquerPimentel@users.noreply.github.com> Date: Wed, 10 Apr 2024 15:01:00 +0100 Subject: [PATCH 4/9] Fix row numbering Co-authored-by: Lunny Xiao --- modules/markup/csv/csv.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 0ec75be43767f..8b49f3da4446e 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -97,7 +97,7 @@ func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.W return err } - row := 1 + row := 0 for { fields, err := rd.Read() if err == io.EOF || (row >= maxRows && maxRows != 0) { @@ -114,7 +114,7 @@ func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.W if row == 0 { element = "th" } - if err := writeField(tmpBlock, element, "line-num", strconv.Itoa(row)); err != nil { + if err := writeField(tmpBlock, element, "line-num", strconv.Itoa(row+1)); err != nil { return err } for _, field := range fields { From 271c274669207027ca1e78b2294323f86d17e428 Mon Sep 17 00:00:00 2001 From: Henrique Pimentel Date: Wed, 10 Apr 2024 21:49:24 +0100 Subject: [PATCH 5/9] Increased limit to 100k lines, warning message is now on bottom and linked to raw --- custom/conf/app.example.ini | 2 +- modules/markup/csv/csv.go | 20 +++++++++----------- modules/setting/ui.go | 2 +- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index eed132a5c8ba5..9be8189342541 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1336,7 +1336,7 @@ LEVEL = Info ;MAX_FILE_SIZE = 524288 ;; ;; Maximum allowed rows to render CSV files. (Set to 0 for no limit) -;MAX_ROWS = 5000 +;MAX_ROWS = 100000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 8b49f3da4446e..9c2e4aeebbf87 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -39,8 +39,8 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, - {Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(`tw-flex tw-justify-center tw-items-center`)}, - {Element: "a", AllowAttr: "href", Regexp: regexp.MustCompile(`\?display=source`)}, + {Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(`tw-flex tw-justify-center tw-items-center tw-py-4 tw-text-14`)}, + {Element: "a", AllowAttr: "href", Regexp: regexp.MustCompile(``)}, } } @@ -81,7 +81,6 @@ func writeField(w io.Writer, element, class, field string) error { // Render implements markup.Renderer func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { tmpBlock := bufio.NewWriter(output) - warnBlock := bufio.NewWriter(tmpBlock) maxSize := setting.UI.CSV.MaxFileSize maxRows := setting.UI.CSV.MaxRows @@ -129,23 +128,22 @@ func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.W row++ } + if _, err = tmpBlock.WriteString(""); err != nil { + return err + } + // Check if maxRows or maxSize is reached, and if true, warn. if (row >= maxRows && maxRows != 0) || (rd.InputOffset() >= maxSize && maxSize != 0) { locale := ctx.Ctx.Value(translation.ContextKey).(translation.Locale) // Construct the HTML string - warn := `
` + locale.TrString("repo.file_too_large") + ` ` + locale.TrString("repo.file_view_source") + `
` + warn := `
` + locale.TrString("repo.file_too_large") + ` ` + locale.TrString("repo.file_view_raw") + `
` // Write the HTML string to the output - if _, err := warnBlock.WriteString(warn); err != nil { - return err - } - if err = warnBlock.Flush(); err != nil { + if _, err := tmpBlock.WriteString(warn); err != nil { return err } } - if _, err = tmpBlock.WriteString(""); err != nil { - return err - } + return tmpBlock.Flush() } diff --git a/modules/setting/ui.go b/modules/setting/ui.go index 669d792c14bbc..e0cd710f7219c 100644 --- a/modules/setting/ui.go +++ b/modules/setting/ui.go @@ -111,7 +111,7 @@ var UI = struct { MaxRows int }{ MaxFileSize: 524288, - MaxRows: 5000, + MaxRows: 100000, }, Admin: struct { UserPagingNum int From 0656f41b102a3ff3c044324bb74600868822bf7d Mon Sep 17 00:00:00 2001 From: Henrique Pimentel Date: Wed, 8 May 2024 13:00:08 +0100 Subject: [PATCH 6/9] Small upgrades - Removed tags from sanitizer rules; - The warning message now is a table (to reuse UI elements); - ctx.RelativePath escaped; - Implemented a panic catch when getting a translation error; --- modules/markup/csv/csv.go | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 9c2e4aeebbf87..aeba98f5d622d 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -7,6 +7,7 @@ import ( "bufio" "html" "io" + "net/url" "regexp" "strconv" @@ -39,8 +40,6 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, - {Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(`tw-flex tw-justify-center tw-items-center tw-py-4 tw-text-14`)}, - {Element: "a", AllowAttr: "href", Regexp: regexp.MustCompile(``)}, } } @@ -134,10 +133,19 @@ func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.W // Check if maxRows or maxSize is reached, and if true, warn. if (row >= maxRows && maxRows != 0) || (rd.InputOffset() >= maxSize && maxSize != 0) { - locale := ctx.Ctx.Value(translation.ContextKey).(translation.Locale) + warn := `
` + raw_link := ` ` + + // Try to get the user translation + if locale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale); ok { + warn += locale.TrString("repo.file_too_large") + raw_link += locale.TrString("repo.file_view_raw") + } else { + warn += "The file is too large to be shown." + raw_link += "View Raw" + } - // Construct the HTML string - warn := `` + warn += raw_link + `
` // Write the HTML string to the output if _, err := tmpBlock.WriteString(warn); err != nil { From f095a38cebc0ac91a95881183a21f5106d5e326d Mon Sep 17 00:00:00 2001 From: Henrique Pimentel Date: Wed, 8 May 2024 13:00:25 +0100 Subject: [PATCH 7/9] Reduced the maxRows limit from 100k to 2.5k --- custom/conf/app.example.ini | 2 +- modules/setting/ui.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 9be8189342541..2578ae7144eff 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1336,7 +1336,7 @@ LEVEL = Info ;MAX_FILE_SIZE = 524288 ;; ;; Maximum allowed rows to render CSV files. (Set to 0 for no limit) -;MAX_ROWS = 100000 +;MAX_ROWS = 2500 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/modules/setting/ui.go b/modules/setting/ui.go index e0cd710f7219c..a8dc11d09713c 100644 --- a/modules/setting/ui.go +++ b/modules/setting/ui.go @@ -111,7 +111,7 @@ var UI = struct { MaxRows int }{ MaxFileSize: 524288, - MaxRows: 100000, + MaxRows: 2500, }, Admin: struct { UserPagingNum int From 0831da7535d6e3cf2857938cbeb5562b05a4e772 Mon Sep 17 00:00:00 2001 From: Henrique Pimentel Date: Wed, 8 May 2024 13:00:25 +0100 Subject: [PATCH 8/9] Reduced the maxRows limit from 100k to 2.5k --- modules/markup/csv/csv.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index aeba98f5d622d..dfcee4eca378c 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -5,9 +5,9 @@ package markup import ( "bufio" + "code.gitea.io/gitea/modules/util" "html" "io" - "net/url" "regexp" "strconv" @@ -134,18 +134,18 @@ func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.W // Check if maxRows or maxSize is reached, and if true, warn. if (row >= maxRows && maxRows != 0) || (rd.InputOffset() >= maxSize && maxSize != 0) { warn := `
` - raw_link := ` ` + rawLink := ` ` // Try to get the user translation if locale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale); ok { warn += locale.TrString("repo.file_too_large") - raw_link += locale.TrString("repo.file_view_raw") + rawLink += locale.TrString("repo.file_view_raw") } else { warn += "The file is too large to be shown." - raw_link += "View Raw" + rawLink += "View Raw" } - warn += raw_link + `
` + warn += rawLink + `` // Write the HTML string to the output if _, err := tmpBlock.WriteString(warn); err != nil { From ec0688bc5a8f45f947d2e56749b7461f3a2ee148 Mon Sep 17 00:00:00 2001 From: Henrique Pimentel Date: Wed, 8 May 2024 13:00:25 +0100 Subject: [PATCH 9/9] Reduced the maxRows limit from 100k to 2.5k --- modules/markup/csv/csv.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index dfcee4eca378c..3d952b0de4fe3 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -5,7 +5,6 @@ package markup import ( "bufio" - "code.gitea.io/gitea/modules/util" "html" "io" "regexp" @@ -15,6 +14,7 @@ import ( "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/translation" + "code.gitea.io/gitea/modules/util" ) func init() {