Skip to content

Commit fd5876b

Browse files
committed
Use SIMD to replace C0 control codes in Go code
1 parent 12c1b0c commit fd5876b

File tree

10 files changed

+74
-47
lines changed

10 files changed

+74
-47
lines changed

kittens/diff/collect.go

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@ import (
99
"os"
1010
"path/filepath"
1111
"strings"
12-
"sync"
1312
"unicode/utf8"
1413

15-
"github.com/kovidgoyal/kitty/tools/highlight"
1614
"github.com/kovidgoyal/kitty/tools/utils"
1715
)
1816

@@ -125,18 +123,15 @@ func text_to_lines(text string) []string {
125123
return lines
126124
}
127125

128-
var sanitize = sync.OnceValue(func() func(string) string {
129-
s := highlight.NewSanitizeControlCodes(conf.Replace_tab_by)
130-
return s.Sanitize
131-
})
126+
func sanitize(text string) string { return utils.ReplaceControlCodes(text, conf.Replace_tab_by, "\n") }
132127

133128
func lines_for_path(path string) ([]string, error) {
134129
return lines_cache.GetOrCreate(path, func(path string) ([]string, error) {
135130
ans, err := data_for_path(path)
136131
if err != nil {
137132
return nil, err
138133
}
139-
return text_to_lines(sanitize()(ans)), nil
134+
return text_to_lines(sanitize(ans)), nil
140135
})
141136
}
142137

kittens/diff/highlight.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func (s prefer_light_colors) SyntaxAliases() map[string]string { return c
2626
func (s prefer_light_colors) TextForPath(path string) (string, error) { return data_for_path(path) }
2727

2828
var highlighter = sync.OnceValue(func() highlight.Highlighter {
29-
return highlight.NewHighlighter(sanitize())
29+
return highlight.NewHighlighter(sanitize)
3030
})
3131

3232
func highlight_all(paths []string, light bool) {

kittens/diff/render.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -284,10 +284,10 @@ func title_lines(left_path, right_path string, columns, margin_size int, ans []*
284284
}
285285
sl := ScreenLine{}
286286
if right_name != "" && right_name != left_name {
287-
sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize()(left_name), available_cols)
288-
sl.right.marked_up_text = format_as_sgr.title + fit_in(sanitize()(right_name), available_cols)
287+
sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize(left_name), available_cols)
288+
sl.right.marked_up_text = format_as_sgr.title + fit_in(sanitize(right_name), available_cols)
289289
} else {
290-
sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize()(left_name), columns-margin_size)
290+
sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize(left_name), columns-margin_size)
291291
ll.is_full_width = true
292292
}
293293
l2 := ll
@@ -755,7 +755,7 @@ func rename_lines(path, other_path string, columns, margin_size int, ans []*Logi
755755
ll := LogicalLine{
756756
left_reference: Reference{path: path}, right_reference: Reference{path: other_path},
757757
line_type: CHANGE_LINE, is_change_start: true, is_full_width: true}
758-
for _, line := range splitlines(fmt.Sprintf(`The file %s was renamed to %s`, sanitize()(path_name_map[path]), sanitize()(path_name_map[other_path])), columns-margin_size) {
758+
for _, line := range splitlines(fmt.Sprintf(`The file %s was renamed to %s`, sanitize(path_name_map[path]), sanitize(path_name_map[other_path])), columns-margin_size) {
759759
sl := ScreenLine{}
760760
sl.right.marked_up_text = line
761761
ll.screen_lines = append(ll.screen_lines, &sl)

kittens/diff/ui.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ func (self *Handler) draw_status_line() {
446446
if self.inputting_command {
447447
self.rl.RedrawNonAtomic()
448448
} else if self.statusline_message != "" {
449-
self.lp.QueueWriteString(message_format(wcswidth.TruncateToVisualLength(sanitize()(self.statusline_message), self.screen_size.columns)))
449+
self.lp.QueueWriteString(message_format(wcswidth.TruncateToVisualLength(sanitize(self.statusline_message), self.screen_size.columns)))
450450
} else {
451451
num := self.logical_lines.NumScreenLinesTo(self.scroll_pos)
452452
den := self.logical_lines.NumScreenLinesTo(self.max_scroll_pos)

kitty/data-types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#define zero_at_ptr(p) memset((p), 0, sizeof((p)[0]))
4949
#define literal_strlen(x) (sizeof(x)-1)
5050
#define zero_at_ptr_count(p, count) memset((p), 0, (count) * sizeof((p)[0]))
51+
#define C0_EXCEPT_NL_SPACE_TAB_DEL 0x0 ... 0x8: case 0xb ... 0x1f
5152
#define C0_EXCEPT_NL_SPACE_TAB 0x0 ... 0x8: case 0xb ... 0x1f: case 0x7f
5253
void log_error(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
5354
#define fatal(...) { log_error(__VA_ARGS__); exit(EXIT_FAILURE); }

kitty/logging.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,26 @@ log_error(const char *fmt, ...) {
2727
va_end(ar);
2828
if (n < 0) return;
2929
size_t size = 5 * (size_t)n + 8;
30-
RAII_ALLOC(char, arena, calloc(size, sizeof(char)));
30+
RAII_ALLOC(unsigned char, arena, calloc(size, sizeof(char)));
3131
if (!arena) return;
3232
va_start(ar, fmt);
33-
n = vsnprintf(arena, size, fmt, ar);
33+
n = vsnprintf((char*)arena, size, fmt, ar);
3434
va_end(ar);
35-
char *sanbuf = arena + n + 1;
35+
unsigned char *sanbuf = arena + n + 1;
3636

3737
char utf8buf[4];
3838
START_ALLOW_CASE_RANGE
3939
size_t j = 0;
40-
for (char *x = arena; x < arena + n; x++) {
40+
for (unsigned char *x = arena; x < arena + n; x++) {
4141
switch(*x) {
42-
case C0_EXCEPT_NL_SPACE_TAB: {
42+
case C0_EXCEPT_NL_SPACE_TAB_DEL: {
4343
const uint32_t ch = 0x2400 + *x;
4444
const unsigned sz = encode_utf8(ch, utf8buf);
4545
for (unsigned c = 0; c < sz; c++, j++) sanbuf[j] = utf8buf[c];
4646
} break;
47+
case 0x7f:
48+
sanbuf[j++] = 0xe2; sanbuf[j++] = 0x90; sanbuf[j++] = 0xa1; // U+2421
49+
break;
4750
default:
4851
sanbuf[j++] = *x;
4952
break;

tools/highlight/api.go

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ package highlight
33
import (
44
"errors"
55
"fmt"
6-
"strings"
76

87
"github.com/alecthomas/chroma/v2"
8+
"github.com/kovidgoyal/kitty/tools/utils"
99
)
1010

1111
var _ = fmt.Print
@@ -19,38 +19,14 @@ type StyleResolveData interface {
1919
TextForPath(string) (string, error)
2020
}
2121

22-
type SanitizeControlCodes struct {
23-
r *strings.Replacer
24-
}
25-
26-
func (s SanitizeControlCodes) Sanitize(x string) string { return s.r.Replace(x) }
27-
28-
func NewSanitizeControlCodes(replace_tab_by string) *SanitizeControlCodes {
29-
repls := make([]string, 0, 2*(0x1f+2+(0x9f-0x80+1)))
30-
for i := range 0x1f + 1 {
31-
var repl string
32-
switch i {
33-
case '\n', ' ':
34-
repl = string(rune(i))
35-
case '\t':
36-
repl = replace_tab_by
37-
default:
38-
repl = string(rune(0x2400 + i))
39-
}
40-
repls = append(repls, string(rune(i)), repl)
41-
}
42-
return &SanitizeControlCodes{r: strings.NewReplacer(repls...)}
43-
}
44-
4522
type Highlighter interface {
4623
HighlightFile(path string, srd StyleResolveData) (highlighted_string string, err error)
4724
Sanitize(string) string
4825
}
4926

5027
func NewHighlighter(sanitize func(string) string) Highlighter {
5128
if sanitize == nil {
52-
s := NewSanitizeControlCodes(" ")
53-
sanitize = s.Sanitize
29+
sanitize = func(text string) string { return utils.ReplaceControlCodes(text, " ", "\n") }
5430
}
5531
return &highlighter{sanitize: sanitize, tokens_map: make(map[string][]chroma.Token)}
5632
}

tools/simdstring/intrinsics_test.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ package simdstring
55
import (
66
"bytes"
77
"fmt"
8-
"github.com/kovidgoyal/kitty/tools/utils"
98
"runtime"
109
"strings"
1110
"testing"
@@ -120,13 +119,19 @@ func addressof_data(b []byte) uintptr {
120119
return uintptr(unsafe.Pointer(&b[0]))
121120
}
122121

122+
func memset(ans []byte, val byte) {
123+
for i := range ans {
124+
ans[i] = val
125+
}
126+
}
127+
123128
func aligned_slice(sz, alignment int) ([]byte, []byte) {
124129
ans := make([]byte, sz+alignment+512)
125130
a := addressof_data(ans)
126131
a &= uintptr(alignment - 1)
127132
extra := uintptr(alignment) - a
128-
utils.Memset(ans, '<')
129-
utils.Memset(ans[extra+uintptr(sz):], '>')
133+
memset(ans, '<')
134+
memset(ans[extra+uintptr(sz):], '>')
130135
return ans[extra : extra+uintptr(sz)], ans
131136
}
132137

tools/utils/misc.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"strings"
1515
"sync"
1616

17+
"github.com/kovidgoyal/kitty/tools/simdstring"
1718
"golang.org/x/exp/constraints"
1819
"golang.org/x/text/language"
1920
)
@@ -423,3 +424,35 @@ var LanguageTag = sync.OnceValue(func() language.Tag {
423424
return tag
424425

425426
})
427+
428+
// Replace control codes by unicode codepoints that describe the codes
429+
// making the text safe to send to a terminal
430+
func ReplaceControlCodes(text, replace_tab_by, replace_newline_by string) string {
431+
buf := strings.Builder{}
432+
for len(text) > 0 {
433+
idx := simdstring.IndexC0String(text)
434+
if idx < 0 {
435+
if buf.Cap() == 0 {
436+
return text
437+
}
438+
buf.WriteString(text)
439+
break
440+
}
441+
if buf.Cap() == 0 {
442+
buf.Grow(2 * len(text))
443+
}
444+
buf.WriteString(text[:idx])
445+
switch text[idx] {
446+
case '\n':
447+
buf.WriteString(replace_newline_by)
448+
case '\t':
449+
buf.WriteString(replace_tab_by)
450+
case 0x7f:
451+
buf.WriteRune(0x2421)
452+
default:
453+
buf.WriteRune(0x2400 + rune(text[idx]))
454+
}
455+
text = text[idx+1:]
456+
}
457+
return buf.String()
458+
}

tools/utils/strings_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,17 @@ func TestStringScanner(t *testing.T) {
3434
}
3535
}
3636
}
37+
38+
func TestReplaceControlCodes(t *testing.T) {
39+
for text, expected := range map[string]string{
40+
"none": "none",
41+
"a\r\x01b\x03\x7f c\n\td": "a\u240d\u2401b\u2403\u2421 cX d",
42+
"\x01": "\u2401",
43+
"\x00\x0b": "\u2400\u240b",
44+
} {
45+
actual := ReplaceControlCodes(text, " ", "X")
46+
if diff := cmp.Diff(expected, actual); diff != "" {
47+
t.Fatalf("Failed for text: %#v\n%s", text, diff)
48+
}
49+
}
50+
}

0 commit comments

Comments
 (0)