Last active
January 7, 2017 06:08
-
-
Save kaeza/611b22c81835894887643c06684e2cac to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
-- Table serialization with minification. | |
-- | |
-- This module contains functions to serialize tables to strings, and | |
-- deserialize these strings back to a table. | |
-- | |
-- The serialized string is a representation of the table in Lua code. | |
-- The module does its best to generate the most compact code possible. | |
-- | |
-- Tables with consecutive numerical indices starting from 1 ("arrays") | |
-- are efficiently stored by omitting the key. Numerical indices after | |
-- the first nil element are output adorned. | |
-- | |
-- local t = { 42, "Hello!", nil, "blah" } | |
-- print(serialize(t)) --> {42,"Hello!",[4]="blah"} | |
-- | |
-- Keys that are considered valid identifiers are output unadorned; other | |
-- keys (including reserved words) are serialized as `[key]`. | |
-- | |
-- local t = { a=1, ["b"]=2, c=3 } | |
-- t["true"] = true | |
-- -- Note that this is just an example; the order of non-array | |
-- -- fields is random, so they may not appear as shown here. | |
-- print(serialize(t)) --> {a=1,b=2,c=3,["true"]=true} | |
-- | |
-- A key is a valid identifier if and only if all the following are true: | |
-- | |
-- * It is a string, and is not empty. | |
-- * It consists of only letters, digits, or the underscore. | |
-- * It does not begin with a digit. | |
-- * It is not a reserved word as listed in the "Lexical Conventions" | |
-- of the Lua manual (sec. 2.1 in 5.1, 3.1 in 5.3). | |
-- | |
-- Since what Lua considers a "letter" or "digit" depends on the locale, | |
-- we take a shortcut and only take into account ASCII letters and digits. | |
-- | |
-- As you have seen above, the serialization algorithm only inserts a | |
-- comma if needed, and it doesn't add any spaces (except in actual | |
-- strings). The serialized data does not contain the `return` statement, | |
-- so this must be added if needed. The `deserialize` function provided by | |
-- this module takes care of adding the return statement if needed. | |
-- | |
-- Please note that not all tables can be serialized: | |
-- | |
-- * For keys, only strings, numbers, and booleans are supported. For | |
-- values, tables are supported in addition to the types for keys. | |
-- An unsupported type will raise an error. | |
-- * Tables containing circular references may cause a stack overflow | |
-- error or lock up the interpreter. | |
-- * Tables referenced more than once in the tree will be serialized | |
-- separately each time, and will result in references to different | |
-- tables on deserialization. | |
-- | |
-- Also note that this module does not handle persistence; that must | |
-- be handled by the caller. | |
-- | |
-- @module minser | |
-- @author kaeza <https://github.com/kaeza> | |
local M = { | |
_NAME = "minser", | |
_VERSION = "2016.12.27", | |
_LICENSE = "Unlicense <https://unlicense.org>", | |
} | |
-- Localize everything. | |
local error, tostring, type, assert, loadstring, setfenv, load = | |
error, tostring, type, assert, loadstring, setfenv, load | |
local tconcat, strformat, strfind, strmatch = | |
table.concat, string.format, string.find, string.match | |
-- List of reserved words in the Lua language. Taken from section | |
-- 3.1 "Lexical Conventions" in the manual for Lua 5.3. | |
local reserved = { | |
"and", "break", "do", "else", "elseif", "end", "false", "for", | |
"function", "goto", "if", "in", "local", "nil", "not", "or", | |
"repeat", "return", "then", "true", "until", "while", | |
} | |
-- Convert array to mapping for more efficient use. | |
for i, k in ipairs(reserved) do | |
reserved[i] = nil | |
reserved[k] = true | |
end | |
-- Check if a key is a valid identifier. | |
local function isvalidkey(k) | |
return not (k=="" or reserved[k] | |
or strfind(k, "^[0-9]") | |
or strfind(k, "[^A-Za-z0-9_]")) | |
end | |
-- Return the representation of a key. | |
local function reprkey(k) | |
local t = type(k) | |
if t == "string" then | |
return isvalidkey(k) and k or strformat("[%q]", k) | |
elseif t == "number" then | |
k = tostring(k) | |
-- Drop integer part if possible. | |
return "["..(strmatch(k, "^0(%..*)") or k).."]" | |
elseif t == "boolean" then | |
return "["..tostring(k).."]" | |
else | |
error("unsupported key type: "..t) | |
end | |
end | |
local serialize | |
-- Return the representation of a value. | |
local function reprval(v) | |
local t = type(v) | |
if t == "string" then | |
return strformat("%q", v) | |
elseif t == "number" then | |
v = tostring(v) | |
-- Drop integer part if possible. | |
return strmatch(v, "^0(%..*)") or v | |
elseif t == "boolean" or t == "nil" then | |
return tostring(v) | |
elseif t == "table" then | |
return serialize(v) | |
else | |
error("unsupported value type: "..t) | |
end | |
end | |
--- | |
-- Serializes a table to a string. | |
-- | |
-- @function serialize | |
-- @tparam table t Table to serialize. See notes in the module | |
-- description for possible issues. | |
-- @return A string. | |
function serialize(t) -- local | |
assert(type(t) == "table", "argument #1 must be a table") | |
local out, n, nc = { }, 0, false | |
local cutoff = 1 | |
-- Serialize array part if possible. | |
while true do | |
local v = t[cutoff] | |
if v == nil then break end | |
n=n+1 out[n]=(nc and "," or "")..reprval(v) | |
cutoff = cutoff + 1 | |
nc = true | |
end | |
for k, v in pairs(t) do | |
-- Only serialize non-numbers, or numbers not part of the "array" | |
if type(k)~="number" or k<1 or k>cutoff or k%1~=0 then | |
k, v = reprkey(k), reprval(v) | |
n = n + 1 | |
out[n] = (nc and "," or "")..k.."="..v | |
nc = true | |
end | |
end | |
return "{"..tconcat(out).."}" | |
end | |
--- | |
-- Deserialize a string into a table. | |
-- | |
-- **IMPORTANT NOTE: Don't ever pass strings received from untrusted | |
-- sources to this function! It loads the data as Lua code in order to | |
-- deserialize it, and while it has some measures in place to thwart some | |
-- attacks, it can't be guaranteed it will prevent all kinds of code | |
-- injection!** | |
-- | |
-- If some error occurs parsing the Lua code, or the value to be returned | |
-- is not a table, this function raises an error. | |
-- | |
-- @function deserialize | |
-- @tparam string s String to deserialize. | |
-- @return The deserialized table. | |
local function deserialize(s) | |
assert(type(s) == "string", "argument #1 must be a string") | |
if s:sub(1, 1) == "\27" then | |
error("refused to load bytecode") | |
end | |
local env = { } | |
env._ENV, env._G = env, env | |
if not strmatch(s, "^%s*return%s*%{") then | |
s = "return"..s | |
end | |
local f | |
if setfenv then -- Lua 5.1 | |
f = assert(loadstring(s)) | |
setfenv(f, env) | |
else | |
f = load(s, s, nil, env) | |
end | |
local t = f() | |
assert(type(t) == "table", "invalid data") | |
return t | |
end | |
M.serialize = serialize | |
M.deserialize = deserialize | |
return M |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment