Created
April 24, 2013 11:07
Old enamel hack
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;;; Enamel - Erik Naggum's Markup Language | |
;;; | |
;;; See http://xach.com/naggum/articles/[email protected] | |
;;; | |
(define-condition malformed-enamel (error) ()) | |
(defun parse-enamel (in &key (case :upcase) (strip-newlines t) (distinguish-attributes nil) (package *package*)) | |
(let ((node '())) | |
(labels ((skip-ws () (peek-char t in nil)) | |
(peek () (peek-char nil in nil)) | |
(consume () | |
(or (read-char in nil nil) | |
(error 'malformed-enamel))) | |
(expect (char) | |
(when (not (eql (consume) char)) | |
(error 'malformed-enamel))) | |
(finish () | |
(return-from parse-enamel | |
(nreverse node))) | |
(correct-case (char) | |
(ecase case | |
(:upcase (char-upcase char)) | |
(:downcase (char-downcase char)) | |
(:preserve char))) | |
(read-name () | |
(let ((s (with-output-to-string (out) | |
(block read-name-loop | |
(loop | |
(case (peek) | |
((#\Space #\| #\< #\> nil) | |
(return-from read-name-loop)) | |
((#\\) | |
(consume) | |
(write-char (consume) out)) | |
(t | |
(write-char (correct-case (consume)) out)))))))) | |
(push (intern s package) node))) | |
(maybe-read-content-separator () | |
(when (eql (peek) #\|) | |
(consume))) | |
(read-node () | |
(push (parse-enamel in | |
:case case | |
:strip-newlines strip-newlines | |
:distinguish-attributes distinguish-attributes | |
:package package) | |
node)) | |
(read-entity () | |
(expect #\[) | |
(let ((s (with-output-to-string (out) | |
(block entity-name-loop | |
(loop | |
(case (peek) | |
((#\]) | |
(consume) | |
(return-from entity-name-loop)) | |
((#\\) | |
(consume) | |
(write-char (consume) out)) | |
((nil) | |
(error 'malformed-enamel)) | |
(t | |
(write-char (correct-case (consume)) out)))))))) | |
(push (intern s package) node))) | |
(read-contents () | |
(let ((string-out (make-string-output-stream))) | |
(labels ((finish-string () | |
(let ((s (get-output-stream-string string-out))) | |
(when (plusp (length s)) | |
(push s node))))) | |
(loop | |
(case (peek) | |
((nil) (error 'malformed-enamel)) | |
((#\<) (finish-string) (read-node)) | |
((#\>) (finish-string) (return-from read-contents)) | |
((#\[) (finish-string) (read-entity)) | |
((#\\) (consume) (write-char (consume) string-out)) | |
((#\Return #\Linefeed #\Newline) | |
(if strip-newlines | |
(skip-ws) | |
(write-char (consume) string-out))) | |
(t (write-char (consume) string-out))))))) | |
(maybe-read-attributes () | |
(loop while (eql (peek) #\<) do (read-node) (skip-ws)))) | |
(skip-ws) | |
(when (null (peek)) (finish)) | |
(expect #\<) | |
(read-name) | |
(skip-ws) | |
(maybe-read-attributes) | |
(skip-ws) | |
(when (maybe-read-content-separator) | |
(when distinguish-attributes | |
(push (intern "" package) node)) | |
(read-contents)) | |
(expect #\>) | |
(finish)))) | |
(defun parse-enamel-from-string (string &key (start 0) end (case :upcase) (strip-newlines t) (distinguish-attributes nil) (package *package*)) | |
(with-input-from-string (in string :start start :end end) | |
(parse-enamel in | |
:case case | |
:strip-newlines strip-newlines | |
:distinguish-attributes distinguish-attributes | |
:package package))) | |
(defun test () | |
(macrolet ((aver (string rep &rest args) | |
`(let ((actual (parse-enamel-from-string ,string ,@args))) | |
(unless (equal actual ',rep) | |
(error "~S parses as ~S, but ~S was expected." ,string actual ',rep))))) | |
(aver "<foo>" (foo)) | |
(aver "<foo <bar|zot>>" (foo (bar "zot"))) | |
(aver "<foo|zot>" (foo "zot")) | |
(aver "<foo <bar|zot> |quux>" (foo (bar "zot") "quux")) | |
(aver "<foo|Hey, [quux]!>" (foo "Hey, " quux "!")) | |
(aver "<foo|AT&T you will>" (foo "AT&T you will")) | |
(aver "<foo|<bar|zot>>" (foo (bar "zot"))) | |
(aver "<foo <bar|zot> <quux|oink>>" (foo (bar "zot") (quux "oink"))) | |
;; The following is an apparent deviation from Naggum's syntax. | |
;; He says that the Enamel should translate to: | |
;; | |
;; (foo (bar "zot") || (bar "zot")) | |
;; | |
;; Presumably, "<foo|<bar|zot>>" would translate to: | |
;; | |
;; (foo (bar "zot")) | |
;; | |
;; This means we need to scan for || to know if the first bar node | |
;; is an attribute or not. [According to Naggum we do not need to | |
;; do this in the fully parsed structure, where all attributes are | |
;; present. This hack is useful only in the context of dealing | |
;; with non-fully parsed structure, however.] If we postulate | |
;; that content is always preceded by ||, we can assume that the | |
;; first bar node is an attribute without scanning forward. | |
(aver "<foo <bar|zot>|<bar|zot>>" (foo (bar || "zot") || (bar || "zot")) | |
:distinguish-attributes t)) | |
'ok) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment