Skip to content

Instantly share code, notes, and snippets.

@mflatt
Last active November 19, 2021 15:01
Show Gist options
  • Save mflatt/6ab71f8214c5fd98dae98c8531056fa2 to your computer and use it in GitHub Desktop.
Save mflatt/6ab71f8214c5fd98dae98c8531056fa2 to your computer and use it in GitHub Desktop.
#lang racket/base
(provide text-lines?
(rename-out [empty empty-text-lines])
;; 0 is the position before everything, and the position
;; after a newline is on the subsequent line
text-length ; t -> position at end
insert ; t position str -> t, detecting "\n"
delete ; t start-position end-position -> t
get-text ; t start-position end-position -> string
position->start ; t position -> position of line start
position->line ; t position -> line number
line->start ; t line number -> position
text-line-count) ; t -> one more than line number at end
;; ----------------------------------------
(define (insert t pos str)
(check-in-range 'insert t pos)
(let loop ([t t] [i 0] [pos pos] [accum 0])
(cond
[(= i (string-length str))
(if (zero? accum)
t
(adjust-within-line t pos (substring str (- i accum) i)))]
[(char=? #\newline (string-ref str i))
(define len (add1 accum))
(loop (insert-newline t pos (substring str (- i accum) i))
(add1 i)
(+ pos len)
0)]
[else (loop t (add1 i) pos (add1 accum))])))
(define (delete t pos end)
(check-in-range 'delete t pos)
(check-in-range 'delete t end)
(let delete ([t t] [pos pos] [end end])
(cond
[(= pos end) t]
[else
(define-values (left-len left-count sub-t) (find-line t pos))
(define line-start (+ left-len (node-left-len sub-t)))
(define line-len (node-len sub-t))
(define line-end (+ line-start line-len))
(cond
[(end . < . line-end)
(adjust-within-line t pos (- pos end))]
[(= pos line-start)
(define new-t (delete-line t line-start))
(delete new-t line-start (- end line-len))]
[else
(define keep (substring (node-content sub-t) 0 (- pos line-start)))
(define new-t (delete t line-start end))
(insert new-t line-start keep)])])))
(define (get-text t pos end)
(check-in-range 'get-text t pos)
(check-in-range 'get-text t end)
(define str
(let loop ([pos pos] [end end])
(define-values (left-len left-count sub-t) (find-line t pos))
(define line-start (+ left-len (node-left-len sub-t)))
(define line-len (node-len sub-t))
(define line-end (+ line-start line-len))
(cond
[(<= end line-end)
(define rel-pos (- pos line-start))
(cond
[(= end line-end)
(string-append (substring (node-content sub-t) rel-pos) "\n")]
[else
(define rel-end (- end line-start))
(substring (node-content sub-t) rel-pos rel-end)])]
[else
(define pre-str (loop pos line-end))
(define post-strs (loop line-end end))
(cons pre-str
(if (pair? post-strs)
post-strs
(list post-strs)))])))
(if (string? str)
str
(apply string-append str)))
;; ----------------------------------------
;; a node represents one text line that ends with a newline
(struct node (content ; characters in this line, excluding ending newline
left-len ; characters in left subtree
total-len ; total in both subtrees
left-count ; number of lines in left subtree
total-count ; total number of lines
height ; head of tree (for balancing)
left ; left subtree
right) ; right subtree
#:transparent
#:authentic
#:reflection-name 'lines-of-text)
(define (text-lines? v) (node? v))
(define (node-len n)
(content-len (node-content n)))
(define (content-len content)
(add1 (string-length content)))
(define (text-length n)
(sub1 (node-total-len n)))
(define (text-line-count n)
(node-total-count n))
;; represent an editor with a sentinel newline, but hide
;; its existence to the outside
(define empty (node "" 0 1 0 1 0 #f #f))
(define (check-in-range* who t pos limit what)
(unless (node? t)
(raise-argument-error who "text-lines?" t))
(unless (exact-nonnegative-integer? pos)
(raise-argument-error who "exact-nonnegative-integer?" pos))
(unless (pos . < . limit)
(raise-arguments-error who
(format "~a is out of bounds" what)
what pos
"upper limit" (sub1 limit))))
(define (check-in-range who t pos)
(check-in-range* who t pos (node-total-len t) "position"))
(define (check-in-range-line who t line)
(check-in-range* who t line (add1 (node-total-count t)) "line"))
;; ----------------------------------------
(define (tree-height t)
(cond
[(not t) 0]
[else (node-height t)]))
(define (tree-total-len t)
(cond
[(not t) 0]
[else (node-total-len t)]))
(define (tree-total-count t)
(cond
[(not t) 0]
[else (node-total-count t)]))
;; ----------------------------------------
(define (combine content left right)
(node content
(tree-total-len left)
(+ (content-len content) (tree-total-len left) (tree-total-len right))
(tree-total-count left)
(+ 1 (tree-total-count left) (tree-total-count right))
(+ 1 (max (tree-height left) (tree-height right)))
left
right))
(define (reverse-combine content right left)
(combine content left right))
;; ----------------------------------------
(define (position->start t pos)
(check-in-range 'position->start t pos)
(define-values (left-len left-count sub-t) (find-line t pos))
(+ left-len (node-left-len sub-t)))
;; ----------------------------------------
(define (position->line t pos)
(check-in-range 'position->line t pos)
(define-values (left-len left-count sub-t) (find-line t pos))
(+ left-count (node-left-count sub-t)))
;; ----------------------------------------
(define (find-line t pos)
(cond
[(< pos (node-left-len t))
(find-line (node-left t) pos)]
[else
(define right-left-len (+ (node-len t) (node-left-len t)))
(define new-pos (- pos right-left-len))
(cond
[(new-pos . < . 0) (values 0 0 t)]
[else
(define right-left-count (+ 1 (node-left-count t)))
(define-values (left-len left-count sub-t) (find-line (node-right t) new-pos))
(values (+ left-len right-left-len)
(+ left-count right-left-count)
sub-t)])]))
;; ----------------------------------------
(define (line->start t line)
(check-in-range-line 'line->start t line)
(find-start t line))
(define (find-start t line)
(define here (node-left-count t))
(cond
[(line . < . here)
(find-start (node-left t) line)]
[(line . > . here)
(define pre (+ (node-left-len t) (node-len t)))
(+ (find-start (node-right t) (- line here 1))
pre)]
[else
(node-left-len t)]))
;; ----------------------------------------
(define (adjust-within-line t pos amt) ; amt is string or negative number
(check-in-range 'adjust-within-line t pos)
(unless (or (string? amt)
(and (exact-integer? amt) (negative? amt)))
(raise-argument-error 'adjust-within-line "(or string? (and/c exact-integer? negative?))" amt))
(when (exact-integer? amt)
(define-values (left-len left-count line-t) (find-line t pos))
(when ((+ left-len (node-left-len line-t) (node-len line-t))
. <= .
(+ pos amt))
(raise-arguments-error 'adjust-within-line
"subtracting too much"
"amount" amt)))
(adjust t pos amt))
(define (adjust t pos amt)
(define rel-pos (- pos (node-left-len t)))
(cond
[(rel-pos . < . 0)
(combine (node-content t)
(adjust (node-left t) pos amt)
(node-right t))]
[(rel-pos . >= . (node-len t))
(define new-pos (- rel-pos (node-len t)))
(combine (node-content t)
(node-left t)
(adjust (node-right t) new-pos amt))]
[else
(combine (cond
[(string? amt) (string-append (substring (node-content t) 0 rel-pos)
amt
(substring (node-content t) rel-pos))]
[else (string-append (substring (node-content t) 0 rel-pos)
(substring (node-content t) (- rel-pos amt)))])
(node-left t)
(node-right t))]))
;; ----------------------------------------
;; inserts `len` characters that end with a newline
(define (insert-newline t pos content)
(check-in-range 'insert-newline t pos)
(unless (string? content)
(raise-argument-error 'insert-newline "string?" content))
(define-values (left-len left-count sub-t) (find-line t pos))
(define start (+ left-len (node-left-len sub-t)))
(define delta (- pos start))
(cond
[(zero? delta)
;; insert new line before existing one
(insert-line t pos content)]
[else
;; split node by first shrinking, then insert
(define pre (substring (node-content sub-t) 0 delta))
(insert-line (adjust-within-line t start (- delta))
start
(string-append pre content))]))
;; ----------------------------------------
(define (delete-line t pos)
(check-in-range 'delete-line t pos)
;; sanity check:
(define start (position->start t pos))
(unless (= start pos)
(error 'delete-line "line does not start at position"))
(delete-node t pos))
;; ----------------------------------------
(define (insert-line t pos str)
(cond
[(not t) (combine str #f #f)]
[(<= pos (node-left-len t))
(insert-to t pos str
node-left
node-right
combine
rotate-right)]
[else
(define right-left-len (+ (node-len t) (node-left-len t)))
(when (pos . < . right-left-len)
(error "insert-line cannot insert into the middle"))
(insert-to t (- pos right-left-len) str
node-right
node-left
reverse-combine
rotate-left)]))
;; Like insert, but inserts to a child, where `node-to'
;; determines the side where the child is added,`node-other'
;; is the other side, and `comb' builds the new tree gven the
;; two new children.
(define-syntax-rule (insert-to t new-pos new-content node-to node-other comb rotate)
(begin
;; Insert into the `node-to' child:
(define new-to (insert-line (node-to t) new-pos new-content))
(define new-other (node-other t))
(define new-t (comb (node-content t) new-to new-other))
;; Check for rotation:
(define to-height (tree-height new-to))
(define other-height (tree-height new-other))
(if ((- to-height other-height) . = . 2)
(rotate new-t)
new-t)))
(define (delete-node t pos)
(define key (node-left-len t))
(cond
[(pos . < . key)
(delete-from t pos
node-left
node-right
combine
rotate-left)]
[(not (= pos key))
(delete-from t (- pos key (node-len t))
node-right
node-left
reverse-combine
rotate-right)]
[else
(define l (node-left t))
(define r (node-right t))
(cond
[(not l) r]
[(not r) l]
[else
(delete-here t)])]))
(define-syntax-rule (delete-from t pos node-to node-other comb rotate)
(begin
;; Delete from the `node-to' child:
(define new-to (delete-node (node-to t) pos))
(define new-other (node-other t))
(define new-t (comb (node-content t) new-to new-other))
;; Check for rotation:
(define to-height (tree-height new-to))
(define other-height (tree-height new-other))
(if ((- to-height other-height) . = . -2)
(rotate new-t)
new-t)))
(define-syntax-rule (delete-here t)
(begin
;; Delete by moving from `from` to `other`
(define from (node-left t))
(define new-t
(let loop ([end from] [left-len 0])
(cond
[(node-right end)
=> (lambda (e) (loop e (+ left-len (node-left-len end) (node-len end))))]
[else
(define pos (node-left-len end))
(define new-from (delete-node from (+ pos left-len)))
(combine (node-content end) new-from (node-right t))])))
;; Check for rotation:
(define from-height (tree-height (node-left new-t)))
(define other-height (tree-height (node-right new-t)))
(if ((- from-height other-height) . = . -2)
(rotate-left new-t)
new-t)))
(define-syntax-rule (define-rotate rotate node-to node-other comb)
(begin
;; Helper rotate function:
(define (rotate t)
(define to (node-to t))
(define to-balance (- (tree-height (node-to to))
(tree-height (node-other to))))
(cond
[(to-balance . < . 0)
(double-rotate t)]
[else
(single-rotate t)]))
;; Helper double-rotate function:
(define (double-rotate t)
(define orange (node-to t))
(define yellow (node-other orange))
(define A (node-to orange))
(define B (node-to yellow))
(define C (node-other yellow))
(define D (node-other t))
(single-rotate (comb (node-content t)
(comb (node-content yellow)
(comb (node-content orange)
A
B)
C)
D)))
;; Helper single-rotate function:
(define (single-rotate t)
(define yellow (node-to t))
(comb (node-content yellow)
(node-to yellow)
(comb (node-content t)
(node-other yellow)
(node-other t))))))
(define-rotate rotate-right node-left node-right combine)
(define-rotate rotate-left node-right node-left reverse-combine)
;; ----------------------------------------
(module+ main
(define (do-check av bv form)
(unless (equal? av bv)
(error 'fail "~s: ~v vs. ~v" form av bv)))
(define-syntax-rule (check a b)
(do-check a b '(check a b)))
(define (at desc) (printf "~a\n" desc))
(at "empty")
(check (position->start empty 0) 0)
(check (position->line empty 0) 0)
(check (text-length empty) 0)
(check (text-line-count empty) 1)
(at "insert within only line")
(let* ([t (adjust-within-line empty 0 "xxx")])
(check (get-text t 0 3) "xxx")
(check (text-length t) 3)
(check (text-line-count t) 1)
(check (position->start t 0) 0)
(check (position->line t 0) 0)
(check (line->start t 0) 0)
(check (position->start t 2) 0)
(check (position->line t 2) 0)
(check (position->start t 3) 0)
(check (position->line t 3) 0))
(at "insert line 1")
(let* ([t (insert-newline empty 0 "")])
;; "|", where "|" means newline
(check (get-text t 0 1) "\n")
(check (text-length t) 1)
(check (text-line-count t) 2)
(check (position->start t 0) 0)
(check (position->line t 0) 0)
(check (position->start t 1) 1)
(check (position->line t 1) 1)
(at "insert 3 within line 0")
(let* ([t (adjust-within-line t 0 "xxx")])
;; "xxx|"
(check (get-text t 0 4) "xxx\n")
(check (position->start t 0) 0)
(check (position->line t 0) 0)
(check (position->start t 1) 0)
(check (position->start t 3) 0)
(check (position->line t 3) 0)
(check (position->start t 4) 4)
(check (position->line t 4) 1)
(at "insert 1 within line 1")
(let* ([t (adjust-within-line t 4 "x")])
;; "xxx|x"
(check (get-text t 0 5) "xxx\nx")
(check (position->start t 4) 4)
(check (position->line t 4) 1)
(check (position->start t 5) 4)
(check (position->line t 5) 1)
(at "delete 1 within line 0")
(let* ([t (adjust-within-line t 1 -1)])
;; "xx|x"
(check (get-text t 0 4) "xx\nx")
(check (position->start t 0) 0)
(check (position->line t 0) 0)
(check (position->start t 1) 0)
(check (position->start t 2) 0)
(check (position->line t 2) 0)
(check (position->start t 3) 3)
(check (position->line t 3) 1)
(check (position->start t 4) 3)
(check (position->line t 4) 1)
(at "insert before line 1")
(let* ([t (insert-newline t 3 "yyyy")])
;; "xx|yyyy|x"
(check (get-text t 0 9) "xx\nyyyy\nx")
(check (position->start t 0) 0)
(check (position->line t 0) 0)
(check (position->start t 2) 0)
(check (position->line t 2) 0)
(check (position->start t 3) 3)
(check (position->line t 3) 1)
(check (position->start t 7) 3)
(check (position->line t 7) 1)
(check (position->start t 8) 8)
(check (position->line t 8) 2)
(check (position->start t 9) 8)
(check (position->line t 9) 2)
(void))
(at "insert newline into line 1")
(let* ([t (insert-newline t 1 "yyyy")])
;; "xyyyy|x|x"
(check (get-text t 0 9) "xyyyy\nx\nx")
(check (position->start t 0) 0)
(check (position->line t 0) 0)
(check (position->start t 1) 0)
(check (position->line t 1) 0)
(check (position->start t 6) 6)
(check (position->line t 6) 1)
(at "delete line 0")
(let* ([t (delete-line t 0)])
;; "x|x"
(check (position->start t 0) 0)
(check (position->line t 0) 0)
(check (position->start t 1) 0)
(check (position->line t 1) 0)
(check (position->start t 2) 2)
(check (position->line t 2) 1)
(check (position->start t 3) 2)
(check (position->line t 3) 1)
(void)))))))
(at "three lines")
(let* ([t (insert empty 0 "abc\ndef\nghi")])
(check (get-text t 0 11) "abc\ndef\nghi")
(check (get-text (insert t 0 "xy") 0 13) "xyabc\ndef\nghi")
(check (get-text (insert t 1 "xy") 0 13) "axybc\ndef\nghi")
(check (get-text (insert t 4 "xy") 0 13) "abc\nxydef\nghi")
(check (get-text (insert t 5 "xy") 0 13) "abc\ndxyef\nghi")
(check (get-text (insert t 11 "xy") 0 13) "abc\ndef\nghixy")
(check (get-text (insert t 0 "x\ny") 0 14) "x\nyabc\ndef\nghi")
(check (get-text (insert t 4 "x\ny") 0 14) "abc\nx\nydef\nghi")
(check (get-text (insert t 7 "x\ny") 0 14) "abc\ndefx\ny\nghi")
(check (get-text (delete t 0 1) 0 10) "bc\ndef\nghi")
(check (get-text (delete t 1 2) 0 10) "ac\ndef\nghi")
(check (get-text (delete t 2 3) 0 10) "ab\ndef\nghi")
(check (get-text (delete t 3 4) 0 10) "abcdef\nghi")
(check (get-text (delete t 4 5) 0 10) "abc\nef\nghi")
(check (get-text (delete t 0 4) 0 7) "def\nghi")
(check (get-text (delete t 2 5) 0 8) "abef\nghi")
(check (get-text (delete t 1 10) 0 2) "ai"))
(at "random modify")
(define (random-modify-test W)
(define N 32)
(define M 8)
(define (make-str W) (make-string (sub1 W) #\-))
;; insert lines of length W in a random order
(define t
(for/fold ([t empty]) ([i (in-range N)])
(insert-newline t (* (random (add1 i)) W) (make-str W))))
(define (check-N*W t N str)
(define W (add1 (string-length str)))
(define content (apply string-append
(for/list ([i N])
(string-append str "\n"))))
(for* ([i (in-range N)]
[j (in-range W)])
(check (position->line t (+ (* i W) j)) i))
(check (get-text t 0 (* N W)) content)
(for ([i (in-range (* N W))]
[k (in-range 5)])
(define j (+ i (random (add1 (- (* N W) i)))))
(check (get-text t i j) (substring content i j))))
(check-N*W t N (make-str W))
;; try inserting then deleting at each point within the line
(for ([k (in-range W)])
(define new-str (string-append (substring (make-str W) 0 k)
"!"
(substring (make-str W) k)))
(define t+
(for/fold ([t t]) ([i (in-range (sub1 N) -1 -1)])
(adjust-within-line t (+ (* i W) k) "!")))
(check-N*W t+ N new-str)
(define t-
(for/fold ([t t+]) ([i (in-range 0 N)])
(adjust-within-line t (+ (* i W) k) -1)))
(check-N*W t- N (make-str W)))
;; delete a few random lines
(define t-
(for/fold ([t t]) ([i (in-range (- N M 1) -1 -1)])
(delete-line t (* (random (add1 i)) W))))
(check-N*W t- M (make-str W)))
(for ([i (in-range 100)])
(for ([W (in-range 2 6)])
(random-modify-test W)))
(at "random")
(define (random-create-test)
(define n (add1 (random 4096)))
(define str (make-string n))
(for ([i (in-range n)])
(define ch (random 27))
(string-set! str i (if (zero? ch)
#\newline
(integer->char (+ (sub1 (char->integer #\a)) ch)))))
(define t (insert empty 0 str))
(check (get-text t 0 n) str)
(for ([i 1])
(define start (random n))
(define len (random (- n start)))
(check (get-text (delete t start (+ start len)) 0 (- n len))
(string-append (substring str 0 start)
(substring str (+ start len))))))
(for ([i (in-range 1000)])
(random-create-test))
(void))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment