2019-03-16 11:12:44 +08:00
// Copyright 2019 The Gitea Authors. All rights reserved.
2022-11-27 13:20:29 -05:00
// SPDX-License-Identifier: MIT
2019-03-16 11:12:44 +08:00
package setting
import (
"regexp"
"strings"
2026-01-26 10:34:38 +08:00
"sync"
2019-03-16 11:12:44 +08:00
"code.gitea.io/gitea/modules/log"
2025-04-05 11:56:48 +08:00
"code.gitea.io/gitea/modules/util"
2019-03-16 11:12:44 +08:00
)
2021-04-20 06:25:08 +08:00
// ExternalMarkupRenderers represents the external markup renderers
2019-03-16 11:12:44 +08:00
var (
2021-07-24 05:21:51 +01:00
ExternalMarkupRenderers [ ] * MarkupRenderer
ExternalSanitizerRules [ ] MarkupSanitizerRule
MermaidMaxSourceCharacters int
2019-03-16 11:12:44 +08:00
)
2022-06-16 11:33:23 +08:00
const (
RenderContentModeSanitized = "sanitized"
RenderContentModeNoSanitizer = "no-sanitizer"
RenderContentModeIframe = "iframe"
)
2025-04-05 11:56:48 +08:00
type MarkdownRenderOptions struct {
NewLineHardBreak bool
ShortIssuePattern bool // Actually it is a "markup" option because it is used in "post processor"
}
type MarkdownMathCodeBlockOptions struct {
ParseInlineDollar bool
ParseInlineParentheses bool
ParseBlockDollar bool
ParseBlockSquareBrackets bool
}
2023-02-20 00:12:01 +08:00
// Markdown settings
var Markdown = struct {
2025-04-05 11:56:48 +08:00
RenderOptionsComment MarkdownRenderOptions ` ini:"-" `
RenderOptionsWiki MarkdownRenderOptions ` ini:"-" `
RenderOptionsRepoFile MarkdownRenderOptions ` ini:"-" `
CustomURLSchemes [ ] string ` ini:"CUSTOM_URL_SCHEMES" ` // Actually it is a "markup" option because it is used in "post processor"
2026-01-26 10:34:38 +08:00
FileNamePatterns [ ] string ` ini:"-" `
2025-04-05 11:56:48 +08:00
EnableMath bool
MathCodeBlockDetection [ ] string
MathCodeBlockOptions MarkdownMathCodeBlockOptions ` ini:"-" `
2023-02-20 00:12:01 +08:00
} {
2026-01-26 10:34:38 +08:00
EnableMath : true ,
2023-02-20 00:12:01 +08:00
}
2021-04-20 06:25:08 +08:00
// MarkupRenderer defines the external parser configured in ini
type MarkupRenderer struct {
2021-06-23 23:09:51 +02:00
MarkupName string
Command string
2026-01-26 10:34:38 +08:00
FilePatterns [ ] string
2021-06-23 23:09:51 +02:00
IsInputFile bool
NeedPostProcess bool
MarkupSanitizerRules [ ] MarkupSanitizerRule
2022-06-16 11:33:23 +08:00
RenderContentMode string
2025-10-23 16:01:38 +08:00
RenderContentSandbox string
2019-03-16 11:12:44 +08:00
}
2019-12-07 14:49:04 -05:00
// MarkupSanitizerRule defines the policy for whitelisting attributes on
// certain elements.
type MarkupSanitizerRule struct {
2021-06-23 23:09:51 +02:00
Element string
AllowAttr string
2024-11-18 13:25:42 +08:00
Regexp string
2021-06-23 23:09:51 +02:00
AllowDataURIImages bool
2019-12-07 14:49:04 -05:00
}
2023-02-20 00:12:01 +08:00
func loadMarkupFrom ( rootCfg ConfigProvider ) {
mustMapSetting ( rootCfg , "markdown" , & Markdown )
2026-01-26 10:34:38 +08:00
markdownFileExtensions := rootCfg . Section ( "markdown" ) . Key ( "FILE_EXTENSIONS" ) . Strings ( "," )
if len ( markdownFileExtensions ) == 0 || len ( markdownFileExtensions ) == 1 && markdownFileExtensions [ 0 ] == "" {
markdownFileExtensions = [ ] string { ".md" , ".markdown" , ".mdown" , ".mkd" , ".livemd" }
}
Markdown . FileNamePatterns = fileExtensionsToPatterns ( "markdown" , markdownFileExtensions )
2025-04-05 11:56:48 +08:00
const none = "none"
const renderOptionShortIssuePattern = "short-issue-pattern"
const renderOptionNewLineHardBreak = "new-line-hard-break"
cfgMarkdown := rootCfg . Section ( "markdown" )
parseMarkdownRenderOptions := func ( key string , defaults [ ] string ) ( ret MarkdownRenderOptions ) {
options := cfgMarkdown . Key ( key ) . Strings ( "," )
options = util . IfEmpty ( options , defaults )
for _ , opt := range options {
switch opt {
case renderOptionShortIssuePattern :
ret . ShortIssuePattern = true
case renderOptionNewLineHardBreak :
ret . NewLineHardBreak = true
case none :
ret = MarkdownRenderOptions { }
case "" :
default :
log . Error ( "Unknown markdown render option in %s: %s" , key , opt )
}
}
return ret
}
Markdown . RenderOptionsComment = parseMarkdownRenderOptions ( "RENDER_OPTIONS_COMMENT" , [ ] string { renderOptionShortIssuePattern , renderOptionNewLineHardBreak } )
Markdown . RenderOptionsWiki = parseMarkdownRenderOptions ( "RENDER_OPTIONS_WIKI" , [ ] string { renderOptionShortIssuePattern } )
Markdown . RenderOptionsRepoFile = parseMarkdownRenderOptions ( "RENDER_OPTIONS_REPO_FILE" , nil )
const mathCodeInlineDollar = "inline-dollar"
const mathCodeInlineParentheses = "inline-parentheses"
const mathCodeBlockDollar = "block-dollar"
const mathCodeBlockSquareBrackets = "block-square-brackets"
Markdown . MathCodeBlockDetection = util . IfEmpty ( Markdown . MathCodeBlockDetection , [ ] string { mathCodeInlineDollar , mathCodeBlockDollar } )
Markdown . MathCodeBlockOptions = MarkdownMathCodeBlockOptions { }
for _ , s := range Markdown . MathCodeBlockDetection {
switch s {
case mathCodeInlineDollar :
Markdown . MathCodeBlockOptions . ParseInlineDollar = true
case mathCodeInlineParentheses :
Markdown . MathCodeBlockOptions . ParseInlineParentheses = true
case mathCodeBlockDollar :
Markdown . MathCodeBlockOptions . ParseBlockDollar = true
case mathCodeBlockSquareBrackets :
Markdown . MathCodeBlockOptions . ParseBlockSquareBrackets = true
case none :
Markdown . MathCodeBlockOptions = MarkdownMathCodeBlockOptions { }
case "" :
default :
log . Error ( "Unknown math code block detection option: %s" , s )
}
}
2023-02-20 00:12:01 +08:00
2025-04-09 15:52:01 +08:00
MermaidMaxSourceCharacters = rootCfg . Section ( "markup" ) . Key ( "MERMAID_MAX_SOURCE_CHARACTERS" ) . MustInt ( 50000 )
2021-06-23 23:09:51 +02:00
ExternalMarkupRenderers = make ( [ ] * MarkupRenderer , 0 , 10 )
2021-06-07 06:50:07 +08:00
ExternalSanitizerRules = make ( [ ] MarkupSanitizerRule , 0 , 10 )
2021-06-23 23:09:51 +02:00
2023-02-20 00:12:01 +08:00
for _ , sec := range rootCfg . Section ( "markup" ) . ChildSections ( ) {
2019-03-16 11:12:44 +08:00
name := strings . TrimPrefix ( sec . Name ( ) , "markup." )
if name == "" {
log . Warn ( "name is empty, markup " + sec . Name ( ) + "ignored" )
continue
}
2020-04-29 07:34:59 -04:00
if name == "sanitizer" || strings . HasPrefix ( name , "sanitizer." ) {
2019-12-07 14:49:04 -05:00
newMarkupSanitizer ( name , sec )
} else {
newMarkupRenderer ( name , sec )
2019-03-16 11:12:44 +08:00
}
2019-12-07 14:49:04 -05:00
}
}
2023-04-25 23:06:39 +08:00
func newMarkupSanitizer ( name string , sec ConfigSection ) {
2021-06-23 23:09:51 +02:00
rule , ok := createMarkupSanitizerRule ( name , sec )
if ok {
2025-06-18 20:37:49 +02:00
if after , found := strings . CutPrefix ( name , "sanitizer." ) ; found {
2025-06-18 03:48:09 +02:00
names := strings . SplitN ( after , "." , 2 )
2021-06-23 23:09:51 +02:00
name = names [ 0 ]
}
for _ , renderer := range ExternalMarkupRenderers {
if name == renderer . MarkupName {
renderer . MarkupSanitizerRules = append ( renderer . MarkupSanitizerRules , rule )
return
}
}
ExternalSanitizerRules = append ( ExternalSanitizerRules , rule )
2019-12-07 14:49:04 -05:00
}
2021-06-23 23:09:51 +02:00
}
2019-12-07 14:49:04 -05:00
2023-04-25 23:06:39 +08:00
func createMarkupSanitizerRule ( name string , sec ConfigSection ) ( MarkupSanitizerRule , bool ) {
2021-06-23 23:09:51 +02:00
var rule MarkupSanitizerRule
ok := false
if sec . HasKey ( "ALLOW_DATA_URI_IMAGES" ) {
rule . AllowDataURIImages = sec . Key ( "ALLOW_DATA_URI_IMAGES" ) . MustBool ( false )
ok = true
2019-12-07 14:49:04 -05:00
}
2021-06-23 23:09:51 +02:00
if sec . HasKey ( "ELEMENT" ) || sec . HasKey ( "ALLOW_ATTR" ) {
rule . Element = sec . Key ( "ELEMENT" ) . Value ( )
rule . AllowAttr = sec . Key ( "ALLOW_ATTR" ) . Value ( )
2019-12-07 14:49:04 -05:00
2021-06-23 23:09:51 +02:00
if rule . Element == "" || rule . AllowAttr == "" {
log . Error ( "Missing required values from markup.%s. Must have ELEMENT and ALLOW_ATTR defined!" , name )
return rule , false
2020-04-29 07:34:59 -04:00
}
2021-06-23 23:09:51 +02:00
regexpStr := sec . Key ( "REGEXP" ) . Value ( )
if regexpStr != "" {
2024-11-18 13:25:42 +08:00
hasPrefix := strings . HasPrefix ( regexpStr , "^" )
hasSuffix := strings . HasSuffix ( regexpStr , "$" )
if ! hasPrefix || ! hasSuffix {
log . Error ( "In markup.%s: REGEXP must start with ^ and end with $ to be strict" , name )
// to avoid breaking existing user configurations and satisfy the strict requirement in addSanitizerRules
if ! hasPrefix {
regexpStr = "^.*" + regexpStr
}
if ! hasSuffix {
regexpStr += ".*$"
}
}
_ , err := regexp . Compile ( regexpStr )
2021-06-23 23:09:51 +02:00
if err != nil {
log . Error ( "In markup.%s: REGEXP (%s) failed to compile: %v" , name , regexpStr , err )
return rule , false
}
2024-11-18 13:25:42 +08:00
rule . Regexp = regexpStr
2021-06-23 23:09:51 +02:00
}
2019-03-16 11:12:44 +08:00
2021-06-23 23:09:51 +02:00
ok = true
2020-04-29 07:34:59 -04:00
}
2019-03-16 11:12:44 +08:00
2021-06-23 23:09:51 +02:00
if ! ok {
log . Error ( "Missing required keys from markup.%s. Must have ELEMENT and ALLOW_ATTR or ALLOW_DATA_URI_IMAGES defined!" , name )
return rule , false
2019-12-07 14:49:04 -05:00
}
2020-04-29 07:34:59 -04:00
2021-06-23 23:09:51 +02:00
return rule , true
2019-12-07 14:49:04 -05:00
}
2026-01-26 10:34:38 +08:00
var extensionReg = sync . OnceValue ( func ( ) * regexp . Regexp {
return regexp . MustCompile ( ` ^(\.[-\w]+)+$ ` )
} )
2019-12-07 14:49:04 -05:00
2026-01-26 10:34:38 +08:00
func fileExtensionsToPatterns ( sectionName string , extensions [ ] string ) [ ] string {
patterns := make ( [ ] string , 0 , len ( extensions ) )
2019-12-07 14:49:04 -05:00
for _ , extension := range extensions {
2026-01-26 10:34:38 +08:00
if ! extensionReg ( ) . MatchString ( extension ) {
log . Warn ( "Config section %s file extension %s is invalid. Extension ignored" , sectionName , extension )
2019-12-07 14:49:04 -05:00
} else {
2026-01-26 10:34:38 +08:00
patterns = append ( patterns , "*" + extension )
2019-12-07 14:49:04 -05:00
}
}
2026-01-26 10:34:38 +08:00
return patterns
}
func newMarkupRenderer ( name string , sec ConfigSection ) {
if ! sec . Key ( "ENABLED" ) . MustBool ( false ) {
return
}
2019-12-07 14:49:04 -05:00
2026-01-26 10:34:38 +08:00
fileNamePatterns := fileExtensionsToPatterns ( name , sec . Key ( "FILE_EXTENSIONS" ) . Strings ( "," ) )
if len ( fileNamePatterns ) == 0 {
log . Warn ( "Config section %s file extension is empty, markup render is ignored" , name )
2019-12-07 14:49:04 -05:00
return
2019-03-16 11:12:44 +08:00
}
2019-12-07 14:49:04 -05:00
command := sec . Key ( "RENDER_COMMAND" ) . MustString ( "" )
if command == "" {
log . Warn ( " RENDER_COMMAND is empty, markup " + name + " ignored" )
return
}
2022-06-16 11:33:23 +08:00
if sec . HasKey ( "DISABLE_SANITIZER" ) {
log . Error ( "Deprecated setting `[markup.*]` `DISABLE_SANITIZER` present. This fallback will be removed in v1.18.0" )
}
renderContentMode := sec . Key ( "RENDER_CONTENT_MODE" ) . MustString ( RenderContentModeSanitized )
if ! sec . HasKey ( "RENDER_CONTENT_MODE" ) && sec . Key ( "DISABLE_SANITIZER" ) . MustBool ( false ) {
renderContentMode = RenderContentModeNoSanitizer // if only the legacy DISABLE_SANITIZER exists, use it
}
if renderContentMode != RenderContentModeSanitized &&
renderContentMode != RenderContentModeNoSanitizer &&
renderContentMode != RenderContentModeIframe {
log . Error ( "invalid RENDER_CONTENT_MODE: %q, default to %q" , renderContentMode , RenderContentModeSanitized )
renderContentMode = RenderContentModeSanitized
}
2025-10-23 16:01:38 +08:00
// ATTENTION! at the moment, only a safe set like "allow-scripts" are allowed for sandbox mode.
2025-12-25 11:33:34 +01:00
// "allow-same-origin" should NEVER be used, it leads to XSS attack: makes the JS in iframe can access parent window's config and send requests with user's credentials.
2025-10-23 16:01:38 +08:00
renderContentSandbox := sec . Key ( "RENDER_CONTENT_SANDBOX" ) . MustString ( "allow-scripts allow-popups" )
if renderContentSandbox == "disabled" {
renderContentSandbox = ""
}
2021-06-23 23:09:51 +02:00
ExternalMarkupRenderers = append ( ExternalMarkupRenderers , & MarkupRenderer {
2026-01-26 10:34:38 +08:00
MarkupName : name ,
FilePatterns : fileNamePatterns ,
Command : command ,
IsInputFile : sec . Key ( "IS_INPUT_FILE" ) . MustBool ( false ) ,
2025-10-23 16:01:38 +08:00
RenderContentMode : renderContentMode ,
RenderContentSandbox : renderContentSandbox ,
2025-10-23 07:41:38 +08:00
// if no sanitizer is needed, no post process is needed
NeedPostProcess : sec . Key ( "NEED_POST_PROCESS" ) . MustBool ( renderContentMode == RenderContentModeSanitized ) ,
2019-12-07 14:49:04 -05:00
} )
2019-03-16 11:12:44 +08:00
}