From e7f23e5841977e0e5201b8bc05615d8dd7fb23bc Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 3 Mar 2022 13:51:48 +0800 Subject: [PATCH 1/4] Support ignore all santize for external renderer --- modules/markup/csv/csv.go | 5 ++++ modules/markup/external/external.go | 5 ++++ modules/markup/markdown/markdown.go | 5 ++++ modules/markup/orgmode/orgmode.go | 5 ++++ modules/markup/renderer.go | 38 ++++++++++++++++++++--------- modules/setting/markup.go | 2 ++ 6 files changed, 48 insertions(+), 12 deletions(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index de32c57a64f1b..abd168c759f35 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -46,6 +46,11 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { } } +// SanitizeDisabled disabled sanitize if return true +func (Renderer) SanitizeDisabled() bool { + return false +} + func writeField(w io.Writer, element, class, field string) error { if _, err := io.WriteString(w, "<"); err != nil { return err diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go index 3acb601067df1..0f97d2f8a3249 100644 --- a/modules/markup/external/external.go +++ b/modules/markup/external/external.go @@ -54,6 +54,11 @@ func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return p.MarkupSanitizerRules } +// DisableSanitize disabled sanitize if return true +func (p *Renderer) SanitizeDisabled() bool { + return p.DisableSanitize +} + func envMark(envName string) string { if runtime.GOOS == "windows" { return "%" + envName + "%" diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index b45b9c8b8ae66..62b41270907b6 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -221,6 +221,11 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{} } +// SanitizeDisabled disabled sanitize if return true +func (Renderer) SanitizeDisabled() bool { + return false +} + // Render implements markup.Renderer func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { return render(ctx, input, output) diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go index 8aa5f45ee244e..36a16f035d27d 100644 --- a/modules/markup/orgmode/orgmode.go +++ b/modules/markup/orgmode/orgmode.go @@ -47,6 +47,11 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{} } +// SanitizeDisabled disabled sanitize if return true +func (Renderer) SanitizeDisabled() bool { + return false +} + // Render renders orgmode rawbytes to HTML func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { htmlWriter := org.NewHTMLWriter() diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go index 0ac0daaea9666..e241ffef2f341 100644 --- a/modules/markup/renderer.go +++ b/modules/markup/renderer.go @@ -81,6 +81,7 @@ type Renderer interface { Extensions() []string NeedPostProcess() bool SanitizerRules() []setting.MarkupSanitizerRule + SanitizeDisabled() bool Render(ctx *RenderContext, input io.Reader, output io.Writer) error } @@ -127,6 +128,12 @@ func RenderString(ctx *RenderContext, content string) (string, error) { return buf.String(), nil } +type nopCloser struct { + io.Writer +} + +func (nopCloser) Close() error { return nil } + func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error { var wg sync.WaitGroup var err error @@ -136,18 +143,25 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr _ = pw.Close() }() - pr2, pw2 := io.Pipe() - defer func() { - _ = pr2.Close() - _ = pw2.Close() - }() - - wg.Add(1) - go func() { - err = SanitizeReader(pr2, renderer.Name(), output) - _ = pr2.Close() - wg.Done() - }() + var pr2 io.ReadCloser + var pw2 io.WriteCloser + + if !renderer.SanitizeDisabled() { + pr2, pw2 = io.Pipe() + defer func() { + _ = pr2.Close() + _ = pw2.Close() + }() + + wg.Add(1) + go func() { + err = SanitizeReader(pr2, renderer.Name(), output) + _ = pr2.Close() + wg.Done() + }() + } else { + pw2 = nopCloser{output} + } wg.Add(1) go func() { diff --git a/modules/setting/markup.go b/modules/setting/markup.go index 09b86b9b1a406..0da4ea6848a98 100644 --- a/modules/setting/markup.go +++ b/modules/setting/markup.go @@ -29,6 +29,7 @@ type MarkupRenderer struct { IsInputFile bool NeedPostProcess bool MarkupSanitizerRules []MarkupSanitizerRule + DisableSanitize bool } // MarkupSanitizerRule defines the policy for whitelisting attributes on @@ -150,5 +151,6 @@ func newMarkupRenderer(name string, sec *ini.Section) { Command: command, IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false), NeedPostProcess: sec.Key("NEED_POSTPROCESS").MustBool(true), + DisableSanitize: sec.Key("DISABLE_SANITIZE").MustBool(false), }) } From 9dec47cace806c67bfff5af4a9373f45b95fc013 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 3 Mar 2022 20:32:18 +0800 Subject: [PATCH 2/4] Update docs --- custom/conf/app.example.ini | 2 ++ .../doc/advanced/config-cheat-sheet.en-us.md | 4 +-- .../doc/advanced/config-cheat-sheet.zh-cn.md | 27 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 42d17567159ee..d12aa2ac66d6d 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2125,6 +2125,8 @@ PATH = ;RENDER_COMMAND = "asciidoc --out-file=- -" ;; Don't pass the file on STDIN, pass the filename as argument instead. ;IS_INPUT_FILE = false +; don't filter html tags and attributes if true +;DISABLE_SANITIZE = false ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 59b8fc31f0d89..e3b0fbd5050b3 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -1003,13 +1003,13 @@ IS_INPUT_FILE = false command. Multiple extensions needs a comma as splitter. - RENDER\_COMMAND: External command to render all matching extensions. - IS\_INPUT\_FILE: **false** Input is not a standard input but a file param followed `RENDER_COMMAND`. +- DISABLE_SANITIZE: **false** Don't filter html tags and attributes if true. Don't change this to true except you what that means. Two special environment variables are passed to the render command: - `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links. - `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths. - -Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. +If `DISABLE_SANITIZE` is false, Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. ```ini [markup.sanitizer.TeX] diff --git a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md index 7db7fe705afef..4b6d20d7af865 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md +++ b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md @@ -318,6 +318,33 @@ IS_INPUT_FILE = false - FILE_EXTENSIONS: 关联的文档的扩展名,多个扩展名用都好分隔。 - RENDER_COMMAND: 工具的命令行命令及参数。 - IS_INPUT_FILE: 输入方式是最后一个参数为文件路径还是从标准输入读取。 +- DISABLE_SANITIZE: **false** 如果为 true 则不过滤 HTML 标签和属性。除非你知道这意味着什么,否则不要设置为 true。 + +以下两个环境变量将会被传递给渲染命令: + +- `GITEA_PREFIX_SRC`:包含当前的`src`路径的URL前缀,可以被用于链接的前缀。 +- `GITEA_PREFIX_RAW`:包含当前的`raw`路径的URL前缀,可以被用于图片的前缀。 + +如果 `DISABLE_SANITIZE` 为 false,则 Gitea 支持自定义渲染 HTML 的净化策略。以下例子将用 pandoc 支持 KaTeX 输出。 + +```ini +[markup.sanitizer.TeX] +; Pandoc renders TeX segments as s with the "math" class, optionally +; with "inline" or "display" classes depending on context. +ELEMENT = span +ALLOW_ATTR = class +REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ +ALLOW_DATA_URI_IMAGES = true +``` + +- `ELEMENT`: 将要被应用到该策略的 HTML 元素,不能为空。 +- `ALLOW_ATTR`: 将要被应用到该策略的属性,不能为空。 +- `REGEXP`: 正则表达式,用来匹配属性的内容。如果为空,则跟属性内容无关。 +- `ALLOW_DATA_URI_IMAGES`: **false** 允许 data uri 图片 (``)。 + +多个净化规则可以被同时定义,只要section名称最后一位不重复即可。如: `[markup.sanitizer.TeX-2]`。 +为了针对一种渲染类型进行一个特殊的净化策略,必须使用形如 `[markup.sanitizer.asciidoc.rule-1]` 的方式来命名 seciton。 +如果此规则没有匹配到任何渲染类型,它将会被应用到所有的渲染类型。 ## Time (`time`) From b98e65ad72d638e09c722e6a3ddd41eb45bf98e7 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Fri, 4 Mar 2022 08:34:26 +0800 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: silverwind --- custom/conf/app.example.ini | 2 +- docs/content/doc/advanced/config-cheat-sheet.en-us.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index d12aa2ac66d6d..5502137ab62f4 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2125,7 +2125,7 @@ PATH = ;RENDER_COMMAND = "asciidoc --out-file=- -" ;; Don't pass the file on STDIN, pass the filename as argument instead. ;IS_INPUT_FILE = false -; don't filter html tags and attributes if true +; Don't filter html tags and attributes if true ;DISABLE_SANITIZE = false ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index e3b0fbd5050b3..725917389e28a 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -1003,7 +1003,7 @@ IS_INPUT_FILE = false command. Multiple extensions needs a comma as splitter. - RENDER\_COMMAND: External command to render all matching extensions. - IS\_INPUT\_FILE: **false** Input is not a standard input but a file param followed `RENDER_COMMAND`. -- DISABLE_SANITIZE: **false** Don't filter html tags and attributes if true. Don't change this to true except you what that means. +- DISABLE_SANITIZE: **false** Don't filter html tags and attributes if true. Don't change this to true except you know what it means. Two special environment variables are passed to the render command: - `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links. From 1d57ed0aa17fb1323e8088ec62b0cd3af5867425 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Fri, 4 Mar 2022 15:36:55 +0800 Subject: [PATCH 4/4] Fix doc --- custom/conf/app.example.ini | 2 +- .../doc/advanced/config-cheat-sheet.en-us.md | 4 ++-- .../doc/advanced/config-cheat-sheet.zh-cn.md | 4 ++-- modules/markup/csv/csv.go | 4 ++-- modules/markup/external/external.go | 6 +++--- modules/markup/markdown/markdown.go | 4 ++-- modules/markup/orgmode/orgmode.go | 4 ++-- modules/markup/renderer.go | 4 ++-- modules/setting/markup.go | 16 ++++++++-------- 9 files changed, 24 insertions(+), 24 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 5502137ab62f4..ad58e6bda36d8 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2126,7 +2126,7 @@ PATH = ;; Don't pass the file on STDIN, pass the filename as argument instead. ;IS_INPUT_FILE = false ; Don't filter html tags and attributes if true -;DISABLE_SANITIZE = false +;DISABLE_SANITIZER = false ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 725917389e28a..70bc2ee8293e1 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -1003,13 +1003,13 @@ IS_INPUT_FILE = false command. Multiple extensions needs a comma as splitter. - RENDER\_COMMAND: External command to render all matching extensions. - IS\_INPUT\_FILE: **false** Input is not a standard input but a file param followed `RENDER_COMMAND`. -- DISABLE_SANITIZE: **false** Don't filter html tags and attributes if true. Don't change this to true except you know what it means. +- DISABLE_SANITIZER: **false** Don't filter html tags and attributes if true. Don't change this to true except you know what that means. Two special environment variables are passed to the render command: - `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links. - `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths. -If `DISABLE_SANITIZE` is false, Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. +If `DISABLE_SANITIZER` is false, Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. ```ini [markup.sanitizer.TeX] diff --git a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md index 4b6d20d7af865..600e54a85e54e 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md +++ b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md @@ -318,14 +318,14 @@ IS_INPUT_FILE = false - FILE_EXTENSIONS: 关联的文档的扩展名,多个扩展名用都好分隔。 - RENDER_COMMAND: 工具的命令行命令及参数。 - IS_INPUT_FILE: 输入方式是最后一个参数为文件路径还是从标准输入读取。 -- DISABLE_SANITIZE: **false** 如果为 true 则不过滤 HTML 标签和属性。除非你知道这意味着什么,否则不要设置为 true。 +- DISABLE_SANITIZER: **false** 如果为 true 则不过滤 HTML 标签和属性。除非你知道这意味着什么,否则不要设置为 true。 以下两个环境变量将会被传递给渲染命令: - `GITEA_PREFIX_SRC`:包含当前的`src`路径的URL前缀,可以被用于链接的前缀。 - `GITEA_PREFIX_RAW`:包含当前的`raw`路径的URL前缀,可以被用于图片的前缀。 -如果 `DISABLE_SANITIZE` 为 false,则 Gitea 支持自定义渲染 HTML 的净化策略。以下例子将用 pandoc 支持 KaTeX 输出。 +如果 `DISABLE_SANITIZER` 为 false,则 Gitea 支持自定义渲染 HTML 的净化策略。以下例子将用 pandoc 支持 KaTeX 输出。 ```ini [markup.sanitizer.TeX] diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index abd168c759f35..17c3fe6f4f25c 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -46,8 +46,8 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { } } -// SanitizeDisabled disabled sanitize if return true -func (Renderer) SanitizeDisabled() bool { +// SanitizerDisabled disabled sanitize if return true +func (Renderer) SanitizerDisabled() bool { return false } diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go index 0f97d2f8a3249..4fdd4315bc3e4 100644 --- a/modules/markup/external/external.go +++ b/modules/markup/external/external.go @@ -54,9 +54,9 @@ func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return p.MarkupSanitizerRules } -// DisableSanitize disabled sanitize if return true -func (p *Renderer) SanitizeDisabled() bool { - return p.DisableSanitize +// SanitizerDisabled disabled sanitize if return true +func (p *Renderer) SanitizerDisabled() bool { + return p.DisableSanitizer } func envMark(envName string) string { diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index 62b41270907b6..320c2f7f82782 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -221,8 +221,8 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{} } -// SanitizeDisabled disabled sanitize if return true -func (Renderer) SanitizeDisabled() bool { +// SanitizerDisabled disabled sanitize if return true +func (Renderer) SanitizerDisabled() bool { return false } diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go index 36a16f035d27d..2f394b992b22b 100644 --- a/modules/markup/orgmode/orgmode.go +++ b/modules/markup/orgmode/orgmode.go @@ -47,8 +47,8 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{} } -// SanitizeDisabled disabled sanitize if return true -func (Renderer) SanitizeDisabled() bool { +// SanitizerDisabled disabled sanitize if return true +func (Renderer) SanitizerDisabled() bool { return false } diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go index e241ffef2f341..cf8b9bace70ba 100644 --- a/modules/markup/renderer.go +++ b/modules/markup/renderer.go @@ -81,7 +81,7 @@ type Renderer interface { Extensions() []string NeedPostProcess() bool SanitizerRules() []setting.MarkupSanitizerRule - SanitizeDisabled() bool + SanitizerDisabled() bool Render(ctx *RenderContext, input io.Reader, output io.Writer) error } @@ -146,7 +146,7 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr var pr2 io.ReadCloser var pw2 io.WriteCloser - if !renderer.SanitizeDisabled() { + if !renderer.SanitizerDisabled() { pr2, pw2 = io.Pipe() defer func() { _ = pr2.Close() diff --git a/modules/setting/markup.go b/modules/setting/markup.go index 0da4ea6848a98..5fb6af6838333 100644 --- a/modules/setting/markup.go +++ b/modules/setting/markup.go @@ -29,7 +29,7 @@ type MarkupRenderer struct { IsInputFile bool NeedPostProcess bool MarkupSanitizerRules []MarkupSanitizerRule - DisableSanitize bool + DisableSanitizer bool } // MarkupSanitizerRule defines the policy for whitelisting attributes on @@ -145,12 +145,12 @@ func newMarkupRenderer(name string, sec *ini.Section) { } ExternalMarkupRenderers = append(ExternalMarkupRenderers, &MarkupRenderer{ - Enabled: sec.Key("ENABLED").MustBool(false), - MarkupName: name, - FileExtensions: exts, - Command: command, - IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false), - NeedPostProcess: sec.Key("NEED_POSTPROCESS").MustBool(true), - DisableSanitize: sec.Key("DISABLE_SANITIZE").MustBool(false), + Enabled: sec.Key("ENABLED").MustBool(false), + MarkupName: name, + FileExtensions: exts, + Command: command, + IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false), + NeedPostProcess: sec.Key("NEED_POSTPROCESS").MustBool(true), + DisableSanitizer: sec.Key("DISABLE_SANITIZER").MustBool(false), }) }