Created
April 14, 2014 11:36
-
-
Save chunpu/10640120 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<script type='x' id='markdown'> | |
title | |
=== | |
> blockquote | |
code | |
code2 | |
### head | |
</script> | |
<script> | |
/*var tokenRules = [ | |
['number', /^\d+/], | |
['space', /^ +/], | |
'+', '-', '*', '/' | |
] | |
var grammarRules = [ | |
['number1', ['number * number', 'number / number'], function(a, op, b) { | |
return op == '*' ? a * b : a / b | |
}], | |
['number2', ['number + number', 'number - number'], function(a, op, b) { | |
return op == '-' ? a - b : +a + +b // fuck str... | |
}], | |
['number', ['number1', 'number2']] | |
] | |
var str = '1 + 2 * 3 + 33 / 3'*/ | |
// markdown! | |
var str = markdown.innerHTML.trim() | |
var tokenRules = [ | |
['hr', /^ *-{3,} *\n/], // hr 或者 h2 | |
['h1', /^ *={3,} *\n/], | |
['newline', /^\n+/], | |
['code', /^( {4}[^\n]+\n*)+/], | |
['head', /^ *#{1,6} */], | |
['blockquote', /^ *> */], | |
['text', /[^\n]+/] // inline will be another parser | |
] | |
var grammarRules = [ | |
['block', ['blockquote block'], function(a, block) { | |
return { | |
blockquote: block | |
} | |
}], | |
['block', ['text']], | |
['block', ['head block'], function(head, block) { | |
var o = {} | |
o['h' + head.trim().length] = block | |
return o | |
}], | |
['block', ['newline block'], function(a, b) { | |
return b | |
}], | |
['block', ['block newline h1'], function(block) { | |
return { | |
h1: block | |
} | |
}], | |
['block', ['code'], function(a) { | |
return { | |
code: a | |
} | |
}], | |
['blocks', ['block block', 'blocks block', 'block blocks'], function(a, b) { | |
if (a.push) {a.push(b); return a} | |
else if (b.push) {b.unshift(a); return b} | |
return [a, b] | |
}] | |
] | |
function lex(str, tokenRules) { | |
var tokens = [] | |
var tmp | |
while (str) { | |
tryMatch() | |
} | |
return tokens | |
function tryMatch() { | |
for (var i = 0, a; a = tokenRules[i]; i++) { | |
if (typeof a == 'string' && str.substr(0, a.length) == a) { | |
// 字符串 | |
tokens.push([a, a]) | |
str = str.substr(a.length) | |
return | |
} | |
else if (a[1] && a[1].exec) { | |
// 正则 | |
if (tmp = a[1].exec(str)) { | |
if (a[0] != 'space') tokens.push([a[0], tmp[0]]) | |
str = str.substr(tmp[0].length) | |
return | |
} | |
} | |
} | |
} | |
} | |
function parser(tokens, grammarRules) { | |
var stack = [] | |
window.s = stack | |
var bnf = grammarRules.map(function(rule) { | |
return rule[1].join(' ') | |
}) | |
//console.log(bnf) | |
while (tokens.length) { | |
stack.push(tokens.shift()) | |
tryMatch() | |
} | |
return stack | |
function tryMatch() { | |
for (var i = 0, a; a = grammarRules[i]; i++) { | |
// a[1] = ['number * number', 'number / number'] | |
for (var j = 0, b; b = a[1][j]; j++) { | |
var len = b.split(' ').length | |
if (stack.length >= len) { | |
// 要足够匹配, 从后面开始匹配, 取出stack后面len个 | |
var tests = stack.slice(stack.length - len) | |
var str = tests.map(function(a) { | |
return a[0] | |
}).join(' ') | |
//console.log(str) | |
if (str == b) { | |
// 匹配成功 | |
// 判断优先级 | |
var lookahead = getLookahead(stack[stack.length - 1][0]) | |
// i是规约的等级 | |
var reduceLevel | |
if (tokens.length && lookahead.length) { | |
// 如果可能移近的话 | |
for (var k = 0, c; c = lookahead[k]; k++) { | |
if (c[0] == tokens[0][0]) { | |
reduceLevel = c[1] | |
} | |
} | |
} | |
//console.log(reduceLevel, i) | |
if (reduceLevel !== undefined && reduceLevel < i) { | |
// 应该移近 | |
stack.push(tokens.shift()) | |
} else { | |
// 规约 | |
var fn = a[2] || function(a) {return a} // 没有就是返回自己 | |
var q = tests.map(function(a) { | |
return a[1] | |
}) | |
var val = fn.apply(null, q) | |
//console.log(q + ' -> ' + val, '\n', str + ' -> ' + a[0], '\n') // 显示结果 | |
var newToken = [a[0], val] | |
stack.splice(stack.length - len, len, newToken) | |
tryMatch() // try again when we reduce | |
} | |
return | |
} | |
} | |
} | |
} | |
} | |
function getLookahead(x) { | |
var reg = new RegExp(x + ' (\\S+)', 'g') | |
var lookahead = [], ret | |
for (var i = 0; i < bnf.length; i++) { | |
while (ret = reg.exec(bnf[i])) { | |
lookahead.push([ret[1], i]) | |
} | |
} | |
return lookahead | |
} | |
} | |
var tokens = lex(str, tokenRules) | |
//console.log(tokens) | |
var ast = parser(tokens, grammarRules) | |
var html = render(ast[0][1]) | |
document.write(html) | |
function render(blocks) { | |
console.log(blocks) | |
var html = '' | |
if (Array.isArray(blocks)) { | |
for (var i = 0; i < blocks.length; i++) { | |
html += render(blocks[i]) | |
} | |
} else { | |
for (var k in blocks) { | |
console.log(k, blocks[k]) | |
html += '<' + k + '>' + blocks[k] + '</' + k + '>\n' | |
} | |
} | |
return html | |
} | |
</script> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment