2022-12-12 20:45:21 +08:00
// Copyright 2022 The Gitea Authors. All rights reserved.
2024-04-17 11:40:35 +02:00
// SPDX-License-Identifier: MIT
2022-12-12 20:45:21 +08:00
package integration
import (
"net/http"
2025-10-23 07:41:38 +08:00
"net/url"
2022-12-12 20:45:21 +08:00
"strings"
"testing"
2025-10-23 07:41:38 +08:00
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
2025-11-14 08:31:11 +08:00
"code.gitea.io/gitea/modules/charset"
2024-11-14 13:02:11 +08:00
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/markup/external"
2026-03-29 12:24:30 +02:00
"code.gitea.io/gitea/modules/public"
2022-12-12 20:45:21 +08:00
"code.gitea.io/gitea/modules/setting"
2025-10-23 07:41:38 +08:00
"code.gitea.io/gitea/modules/test"
2022-12-12 20:45:21 +08:00
"code.gitea.io/gitea/tests"
"github.com/stretchr/testify/assert"
2025-10-23 07:41:38 +08:00
"github.com/stretchr/testify/require"
2022-12-12 20:45:21 +08:00
)
func TestExternalMarkupRenderer ( t * testing . T ) {
defer tests . PrepareTestEnv ( t ) ( )
2023-03-07 18:51:06 +08:00
if ! setting . Database . Type . IsSQLite3 ( ) {
2025-11-14 08:31:11 +08:00
t . Skip ( "only SQLite3 test config supports external markup renderer" )
2022-12-12 20:45:21 +08:00
return
}
2025-11-14 08:31:11 +08:00
const binaryContentPrefix = "any prefix text."
const binaryContent = binaryContentPrefix + "\xfe\xfe\xfe\x00\xff\xff"
detectedEncoding , _ := charset . DetectEncoding ( [ ] byte ( binaryContent ) )
assert . NotEqual ( t , binaryContent , strings . ToValidUTF8 ( binaryContent , "?" ) )
assert . Equal ( t , "ISO-8859-2" , detectedEncoding ) // even if the binary content can be detected as text encoding, it shouldn't affect the raw rendering
2025-10-23 07:41:38 +08:00
onGiteaRun ( t , func ( t * testing . T , _ * url . URL ) {
2025-10-23 16:01:38 +08:00
user2 := unittest . AssertExistsAndLoadBean ( t , & user_model . User { ID : 2 } )
repo1 := unittest . AssertExistsAndLoadBean ( t , & repo_model . Repository { ID : 1 } )
2025-11-14 08:31:11 +08:00
_ , err := createFileInBranch ( user2 , repo1 , createFileInBranchOptions { } , map [ string ] string {
"test.html" : ` <div><any attr="val"><script></script></div> ` ,
"html.no-sanitizer" : ` <script>foo("raw")</script> ` ,
"bin.no-sanitizer" : binaryContent ,
} )
2025-10-23 16:01:38 +08:00
require . NoError ( t , err )
2025-10-23 07:41:38 +08:00
2025-10-23 16:01:38 +08:00
t . Run ( "RenderNoSanitizer" , func ( t * testing . T ) {
2025-11-14 08:31:11 +08:00
req := NewRequest ( t , "GET" , "/user2/repo1/src/branch/master/html.no-sanitizer" )
2025-10-23 07:41:38 +08:00
resp := MakeRequest ( t , req , http . StatusOK )
2025-11-14 08:31:11 +08:00
div := NewHTMLParser ( t , resp . Body ) . Find ( "div.file-view" )
2025-10-23 07:41:38 +08:00
data , err := div . Html ( )
assert . NoError ( t , err )
2025-11-14 08:31:11 +08:00
assert . Equal ( t , ` <script>foo("raw")</script> ` , strings . TrimSpace ( data ) )
req = NewRequest ( t , "GET" , "/user2/repo1/src/branch/master/bin.no-sanitizer" )
resp = MakeRequest ( t , req , http . StatusOK )
div = NewHTMLParser ( t , resp . Body ) . Find ( "div.file-view" )
data , err = div . Html ( )
assert . NoError ( t , err )
assert . Equal ( t , strings . ReplaceAll ( binaryContent , "\x00" , "" ) , strings . TrimSpace ( data ) ) // HTML template engine removes the null bytes
2025-10-23 07:41:38 +08:00
} )
} )
t . Run ( "RenderContentDirectly" , func ( t * testing . T ) {
2025-11-14 08:31:11 +08:00
req := NewRequest ( t , "GET" , "/user2/repo1/src/branch/master/test.html" )
2025-10-23 07:41:38 +08:00
resp := MakeRequest ( t , req , http . StatusOK )
assert . Equal ( t , "text/html; charset=utf-8" , resp . Header ( ) . Get ( "Content-Type" ) )
doc := NewHTMLParser ( t , resp . Body )
div := doc . Find ( "div.file-view" )
data , err := div . Html ( )
assert . NoError ( t , err )
2025-11-14 08:31:11 +08:00
// the content is fully sanitized
assert . Equal ( t , ` <div><script></script></div> ` , strings . TrimSpace ( data ) )
2025-10-23 07:41:38 +08:00
} )
2025-11-14 08:31:11 +08:00
// above tested in-page rendering (no iframe), then we test iframe mode below
2026-01-26 10:34:38 +08:00
r := markup . DetectRendererTypeByFilename ( "any-file.html" ) . ( * external . Renderer )
2025-10-23 07:41:38 +08:00
defer test . MockVariableValue ( & r . RenderContentMode , setting . RenderContentModeIframe ) ( )
2025-11-14 08:31:11 +08:00
assert . True ( t , r . NeedPostProcess ( ) )
2026-01-26 10:34:38 +08:00
r = markup . DetectRendererTypeByFilename ( "any-file.no-sanitizer" ) . ( * external . Renderer )
2025-10-23 16:01:38 +08:00
defer test . MockVariableValue ( & r . RenderContentMode , setting . RenderContentModeIframe ) ( )
2025-11-14 08:31:11 +08:00
assert . False ( t , r . NeedPostProcess ( ) )
2025-10-23 07:41:38 +08:00
t . Run ( "RenderContentInIFrame" , func ( t * testing . T ) {
2025-10-23 16:01:38 +08:00
t . Run ( "DefaultSandbox" , func ( t * testing . T ) {
2025-11-14 08:31:11 +08:00
req := NewRequest ( t , "GET" , "/user2/repo1/src/branch/master/test.html" )
2025-10-23 07:41:38 +08:00
2025-10-23 16:01:38 +08:00
t . Run ( "ParentPage" , func ( t * testing . T ) {
respParent := MakeRequest ( t , req , http . StatusOK )
assert . Equal ( t , "text/html; charset=utf-8" , respParent . Header ( ) . Get ( "Content-Type" ) )
iframe := NewHTMLParser ( t , respParent . Body ) . Find ( "iframe.external-render-iframe" )
assert . Empty ( t , iframe . AttrOr ( "src" , "" ) ) // src should be empty, "data-src" is used instead
// default sandbox on parent page
assert . Equal ( t , "allow-scripts allow-popups" , iframe . AttrOr ( "sandbox" , "" ) )
2025-11-14 08:31:11 +08:00
assert . Equal ( t , "/user2/repo1/render/branch/master/test.html" , iframe . AttrOr ( "data-src" , "" ) )
2025-10-23 16:01:38 +08:00
} )
t . Run ( "SubPage" , func ( t * testing . T ) {
2025-11-14 08:31:11 +08:00
req = NewRequest ( t , "GET" , "/user2/repo1/render/branch/master/test.html" )
2025-10-23 16:01:38 +08:00
respSub := MakeRequest ( t , req , http . StatusOK )
assert . Equal ( t , "text/html; charset=utf-8" , respSub . Header ( ) . Get ( "Content-Type" ) )
// default sandbox in sub page response
assert . Equal ( t , "frame-src 'self'; sandbox allow-scripts allow-popups" , respSub . Header ( ) . Get ( "Content-Security-Policy" ) )
2025-11-14 08:31:11 +08:00
// FIXME: actually here is a bug (legacy design problem), the "PostProcess" will escape "<script>" tag, but it indeed is the sanitizer's job
2026-04-05 21:13:34 +02:00
assert . Equal ( t , ` <script crossorigin src=" ` + public . AssetURI ( "js/external-render-helper.js" ) + ` "></script><link rel="stylesheet" href=" ` + public . AssetURI ( "css/theme-gitea-auto.css" ) + ` "><div><any attr="val"><script></script></any></div> ` , respSub . Body . String ( ) )
2025-10-23 16:01:38 +08:00
} )
} )
t . Run ( "NoSanitizerNoSandbox" , func ( t * testing . T ) {
2025-11-14 08:31:11 +08:00
t . Run ( "BinaryContent" , func ( t * testing . T ) {
req := NewRequest ( t , "GET" , "/user2/repo1/src/branch/master/bin.no-sanitizer" )
respParent := MakeRequest ( t , req , http . StatusOK )
iframe := NewHTMLParser ( t , respParent . Body ) . Find ( "iframe.external-render-iframe" )
assert . Equal ( t , "/user2/repo1/render/branch/master/bin.no-sanitizer" , iframe . AttrOr ( "data-src" , "" ) )
2025-10-23 16:01:38 +08:00
2025-11-14 08:31:11 +08:00
req = NewRequest ( t , "GET" , "/user2/repo1/render/branch/master/bin.no-sanitizer" )
respSub := MakeRequest ( t , req , http . StatusOK )
assert . Equal ( t , binaryContent , respSub . Body . String ( ) ) // raw content should keep the raw bytes (including invalid UTF-8 bytes), and no "external-render-iframe" helpers
// no sandbox (disabled by RENDER_CONTENT_SANDBOX)
assert . Empty ( t , iframe . AttrOr ( "sandbox" , "" ) )
assert . Equal ( t , "frame-src 'self'" , respSub . Header ( ) . Get ( "Content-Security-Policy" ) )
} )
2025-10-23 16:01:38 +08:00
2025-11-14 08:31:11 +08:00
t . Run ( "HTMLContentWithExternalRenderIframeHelper" , func ( t * testing . T ) {
req := NewRequest ( t , "GET" , "/user2/repo1/render/branch/master/html.no-sanitizer" )
respSub := MakeRequest ( t , req , http . StatusOK )
2026-04-05 21:13:34 +02:00
assert . Equal ( t , ` <script crossorigin src=" ` + public . AssetURI ( "js/external-render-helper.js" ) + ` "></script><link rel="stylesheet" href=" ` + public . AssetURI ( "css/theme-gitea-auto.css" ) + ` "><script>foo("raw")</script> ` , respSub . Body . String ( ) )
2025-11-14 08:31:11 +08:00
assert . Equal ( t , "frame-src 'self'" , respSub . Header ( ) . Get ( "Content-Security-Policy" ) )
} )
2025-10-23 16:01:38 +08:00
} )
2025-10-23 07:41:38 +08:00
} )
2022-12-12 20:45:21 +08:00
}