Improve control char rendering and escape button styling (#37094)

Follow-up to #37078.

- Use Unicode Control Pictures](U+2400-U+2421) to render C0 control characters
- Make it work in diff view too
- Replace escape warning emoji with SVG
- Align escape warning button with code lines

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
silverwind
2026-04-06 13:07:33 +02:00
committed by GitHub
parent e47c6135dd
commit 423cdd4d94
37 changed files with 1561 additions and 1794 deletions
+17 -80
View File
@@ -6,8 +6,8 @@ package highlight
import (
"bytes"
gohtml "html"
"html/template"
"slices"
"sync"
"code.gitea.io/gitea/modules/log"
@@ -15,7 +15,7 @@ import (
"code.gitea.io/gitea/modules/util"
"github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/formatters/html"
chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
"github.com/alecthomas/chroma/v2/styles"
)
@@ -25,8 +25,6 @@ const sizeLimit = 1024 * 1024
type globalVarsType struct {
highlightMapping map[string]string
githubStyles *chroma.Style
escapeFull []template.HTML
escCtrlCharsMap []template.HTML
}
var (
@@ -42,69 +40,10 @@ func globalVars() *globalVarsType {
globalVarsPtr = &globalVarsType{}
globalVarsPtr.githubStyles = styles.Get("github")
globalVarsPtr.highlightMapping = setting.GetHighlightMapping()
globalVarsPtr.escCtrlCharsMap = make([]template.HTML, 256)
// ASCII Table 0x00 - 0x1F
controlCharNames := []string{
"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
}
// Uncomment this line if you'd debug the layout without creating a special file, then Space (0x20) will also be escaped.
// Don't worry, even if you forget to comment it out and push it to git repo, the CI tests will catch it and fail.
// controlCharNames = append(controlCharNames, "SP")
for i, s := range controlCharNames {
globalVarsPtr.escCtrlCharsMap[i] = template.HTML(`<span class="broken-code-point" data-escaped="` + s + `"><span class="char">` + string(byte(i)) + `</span></span>`)
}
globalVarsPtr.escCtrlCharsMap[0x7f] = template.HTML(`<span class="broken-code-point" data-escaped="DEL"><span class="char">` + string(byte(0x7f)) + `</span></span>`)
globalVarsPtr.escCtrlCharsMap['\t'] = ""
globalVarsPtr.escCtrlCharsMap['\n'] = ""
globalVarsPtr.escCtrlCharsMap['\r'] = ""
globalVarsPtr.escapeFull = slices.Clone(globalVarsPtr.escCtrlCharsMap)
// exactly the same as Golang's html.EscapeString
globalVarsPtr.escapeFull['&'] = "&amp;"
globalVarsPtr.escapeFull['\''] = "&#39;"
globalVarsPtr.escapeFull['<'] = "&lt;"
globalVarsPtr.escapeFull['>'] = "&gt;"
globalVarsPtr.escapeFull['"'] = "&#34;"
}
return globalVarsPtr
}
func escapeByMap(code []byte, escapeMap []template.HTML) template.HTML {
firstEscapePos := -1
for i, c := range code {
if escapeMap[c] != "" {
firstEscapePos = i
break
}
}
if firstEscapePos == -1 {
return template.HTML(util.UnsafeBytesToString(code))
}
buf := make([]byte, firstEscapePos, len(code)*2)
copy(buf[:firstEscapePos], code[:firstEscapePos])
for i := firstEscapePos; i < len(code); i++ {
c := code[i]
if esc := escapeMap[c]; esc != "" {
buf = append(buf, esc...)
} else {
buf = append(buf, c)
}
}
return template.HTML(util.UnsafeBytesToString(buf))
}
func escapeFullString(code string) template.HTML {
return escapeByMap(util.UnsafeStringToBytes(code), globalVars().escapeFull)
}
func escapeControlChars(code []byte) template.HTML {
return escapeByMap(code, globalVars().escCtrlCharsMap)
}
// UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags
// It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags
// The '\n' is necessary for copying code from web UI to preserve original code lines
@@ -137,6 +76,10 @@ func UnsafeSplitHighlightedLines(code template.HTML) (ret [][]byte) {
}
}
func htmlEscape(code string) template.HTML {
return template.HTML(gohtml.EscapeString(code))
}
// RenderCodeSlowGuess tries to get a lexer by file name and language first,
// if not found, it will try to guess the lexer by code content, which is slow (more than several hundreds of milliseconds).
func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexer chroma.Lexer, lexerDisplayName string) {
@@ -147,7 +90,7 @@ func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML,
}
if len(code) > sizeLimit {
return escapeFullString(code), nil, ""
return htmlEscape(code), nil, ""
}
lexer = detectChromaLexerWithAnalyze(fileName, language, util.UnsafeStringToBytes(code)) // it is also slow
@@ -156,15 +99,15 @@ func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML,
// RenderCodeByLexer returns a HTML version of code string with chroma syntax highlighting classes
func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
formatter := html.New(html.WithClasses(true),
html.WithLineNumbers(false),
html.PreventSurroundingPre(true),
formatter := chromahtml.New(chromahtml.WithClasses(true),
chromahtml.WithLineNumbers(false),
chromahtml.PreventSurroundingPre(true),
)
iterator, err := lexer.Tokenise(nil, code)
if err != nil {
log.Error("Can't tokenize code: %v", err)
return escapeFullString(code)
return htmlEscape(code)
}
htmlBuf := &bytes.Buffer{}
@@ -172,14 +115,9 @@ func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
err = formatter.Format(htmlBuf, globalVars().githubStyles, iterator)
if err != nil {
log.Error("Can't format code: %v", err)
return escapeFullString(code)
return htmlEscape(code)
}
// At the moment, we do not escape control chars here (unlike RenderFullFile which escapes control chars).
// The reason is: it is a very rare case that a text file contains control chars.
// This function is usually used by highlight diff and blame, not quite sure whether there will be side effects.
// If there would be new user feedback about this, we can re-consider about various edge cases.
return template.HTML(htmlBuf.String())
return template.HTML(util.UnsafeBytesToString(htmlBuf.Bytes()))
}
// RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
@@ -191,10 +129,9 @@ func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, st
lexerName := formatLexerName(lexer.Config().Name)
rendered := RenderCodeByLexer(lexer, util.UnsafeBytesToString(code))
unsafeLines := UnsafeSplitHighlightedLines(rendered)
lines := make([]template.HTML, 0, len(unsafeLines))
for _, lineBytes := range unsafeLines {
line := escapeControlChars(lineBytes)
lines = append(lines, line)
lines := make([]template.HTML, len(unsafeLines))
for idx, lineBytes := range unsafeLines {
lines[idx] = template.HTML(util.UnsafeBytesToString(lineBytes))
}
return lines, lexerName
}
@@ -213,7 +150,7 @@ func renderPlainText(code []byte) []template.HTML {
content = code[pos : pos+nextPos+1]
pos += nextPos + 1
}
lines = append(lines, escapeFullString(util.UnsafeBytesToString(content)))
lines = append(lines, htmlEscape(util.UnsafeBytesToString(content)))
}
return lines
}
-11
View File
@@ -204,14 +204,3 @@ func TestUnsafeSplitHighlightedLines(t *testing.T) {
assert.Equal(t, "<span>a</span>\n", string(ret[0]))
assert.Equal(t, "<span>b\n</span>", string(ret[1]))
}
func TestEscape(t *testing.T) {
assert.Equal(t, template.HTML("\t\r\n<span class=\"broken-code-point\" data-escaped=\"NUL\"><span class=\"char\">\x00</span></span><span class=\"broken-code-point\" data-escaped=\"US\"><span class=\"char\">\x1f</span></span>&'\"<>"), escapeControlChars([]byte("\t\r\n\x00\x1f&'\"<>")))
assert.Equal(t, template.HTML("<span class=\"broken-code-point\" data-escaped=\"NUL\"><span class=\"char\">\x00</span></span><span class=\"broken-code-point\" data-escaped=\"US\"><span class=\"char\">\x1f</span></span>&amp;&#39;&#34;&lt;&gt;\t\r\n"), escapeFullString("\x00\x1f&'\"<>\t\r\n"))
out, _ := RenderFullFile("a.py", "", []byte("# \x7f<>"))
assert.Equal(t, template.HTML(`<span class="c1"># <span class="broken-code-point" data-escaped="DEL"><span class="char">`+string(byte(0x7f))+`</span></span>&lt;&gt;</span>`), out[0])
out = renderPlainText([]byte("# \x7f<>"))
assert.Equal(t, template.HTML(`# <span class="broken-code-point" data-escaped="DEL"><span class="char">`+string(byte(0x7f))+`</span></span>&lt;&gt;`), out[0])
}