|
|
@@ -0,0 +1,264 @@ |
|
|
window._gzip = (function() |
|
|
{ |
|
|
var _matchCount = function(s, word) |
|
|
{ |
|
|
var n = 0, wordLen = word.length, p = -wordLen; |
|
|
|
|
|
while((p = s.indexOf(word, p + wordLen)) > -1) |
|
|
++n; |
|
|
|
|
|
return n; |
|
|
}; |
|
|
|
|
|
var _matchCountInDict = function(dict, word) |
|
|
{ |
|
|
var n = 0, i = dict.length; |
|
|
|
|
|
while(i--) |
|
|
{ |
|
|
if(dict[i].word.indexOf(word) > -1) |
|
|
++n; |
|
|
} |
|
|
|
|
|
return n; |
|
|
}; |
|
|
|
|
|
|
|
|
var _escapeForCString = function(s) |
|
|
{ |
|
|
return s.replace(/\\/g, '\\\\'). |
|
|
replace(/\'/g, '\\\''). |
|
|
//replace(/\"/g, '\\\"'). |
|
|
replace(/\r/g, '\\r'). |
|
|
replace(/\0/g, '\\x00'). |
|
|
replace(/\n/g, '\\n'); |
|
|
}; |
|
|
var reEscapeableRE = /[\[\]\(\)\{\}\?\!\:\|\+\.\*\'\"]/g; |
|
|
var _escapeForRegExp = function(s) |
|
|
{ |
|
|
return s.replace(reEscapeableRE, '\\$&'); |
|
|
}; |
|
|
var _escapeForRegExpReplace = function(s) |
|
|
{ |
|
|
return s.replace(/\$/g, '\\$$'); |
|
|
}; |
|
|
|
|
|
var _removeEscapebleChars = function(a) |
|
|
{ |
|
|
delete a['^']; |
|
|
delete a['\"']; |
|
|
delete a['\'']; |
|
|
delete a['\\']; |
|
|
delete a['$']; |
|
|
delete a['[']; |
|
|
delete a[']']; |
|
|
delete a['(']; |
|
|
delete a[')']; |
|
|
delete a['{']; |
|
|
delete a['}']; |
|
|
delete a['?']; |
|
|
delete a['!']; |
|
|
delete a[':']; |
|
|
delete a['|']; |
|
|
delete a['+']; |
|
|
delete a['`']; |
|
|
delete a['.']; |
|
|
delete a['*']; |
|
|
}; |
|
|
|
|
|
var _dumpDict = function(dict) |
|
|
{ |
|
|
var t = ''; |
|
|
var d, i = -1; while((d = dict[++i])) |
|
|
t += d.word + '--'; |
|
|
|
|
|
return t; |
|
|
}; |
|
|
|
|
|
var unpackCode = 'for(var s=\'__CODE__\',d=\'__DICT__\'.split("`"),i=0;v=d[i++];)s=s.replace(RegExp(v[0],"g"),v.slice(1));console.log(s)'; |
|
|
|
|
|
/** |
|
|
@fn pack and wrap to <unpackCode> code <s> using gzip like algo (dictonaty based) |
|
|
@param allowNonAcsiiChars {Boolean} {Default = true} true if you allow use 32-256 chars range else 32-127. |
|
|
@param minProfit {Number} {Default = 0} threshold in bytes for each replace pair |
|
|
@return packed and wrapped to <unpackCode> code |
|
|
*/ |
|
|
var _gzip = function(s, allowNonAcsiiChars, minProfit) |
|
|
{ |
|
|
if(allowNonAcsiiChars == null) |
|
|
allowNonAcsiiChars = true; |
|
|
if(minProfit == null) |
|
|
minProfit = 1; |
|
|
|
|
|
var availableCharMap = {}; |
|
|
|
|
|
var i = 31, e = (allowNonAcsiiChars) ? 256 : 127; |
|
|
|
|
|
// gen all available char map |
|
|
while(++i < e) |
|
|
availableCharMap[String.fromCharCode(i)] = 0; |
|
|
|
|
|
// and remove some chars which require escaping (in RegExp too) |
|
|
_removeEscapebleChars(availableCharMap); |
|
|
|
|
|
// reduce availableCharMap - remove chars which used in <s> |
|
|
i = -1, e = s.length; while(++i < e) |
|
|
{ |
|
|
delete availableCharMap[s.charAt(i)]; |
|
|
} |
|
|
|
|
|
// convert <allowNonAcsiiChars> to <availableChars> |
|
|
var availableChars = [], j = 0; |
|
|
|
|
|
for(var i in availableCharMap) |
|
|
{ |
|
|
if(availableCharMap.hasOwnProperty(i)) |
|
|
availableChars[j++] = i; |
|
|
} |
|
|
|
|
|
//console.log(availableChars.join('> <')); |
|
|
//console.log(availableChars); |
|
|
|
|
|
var sLen = s.length; |
|
|
var dict = [], dictLen = 0; |
|
|
|
|
|
// build <dict> |
|
|
|
|
|
i = -1, e = sLen - 2; while(++i < e) |
|
|
{ |
|
|
var c = s.charAt(i); |
|
|
var j = i + 1; |
|
|
|
|
|
do |
|
|
{ |
|
|
while(++j < e && s.charAt(j) != c) |
|
|
; |
|
|
|
|
|
if(j < e && s.charAt(i + 1) == s.charAt(++j)) |
|
|
{ |
|
|
var k = i + 1; |
|
|
|
|
|
do |
|
|
{ |
|
|
var word = s.slice(i, k + 1); |
|
|
|
|
|
//if(!reEscapeableRE.test(word)) |
|
|
dict[dictLen++] = {word: word, matchCount: _matchCount(s, word)}; |
|
|
} |
|
|
while(++k < sLen && ++j < sLen && s.charAt(k) == s.charAt(j)); |
|
|
} |
|
|
} |
|
|
while(j < e); |
|
|
} |
|
|
|
|
|
// sort <dict> by word |
|
|
dict.sort(function(a, b){ return 2*(a.word < b.word) - 1; }); |
|
|
|
|
|
//console.log(_dumpDict(dict)); |
|
|
|
|
|
// remove duplicating words |
|
|
var d, i = -1, word, j = 0; while((d = dict[++i])) |
|
|
{ |
|
|
if(d.word != word) |
|
|
{ |
|
|
dict[j++] = d; |
|
|
word = d.word; |
|
|
} |
|
|
} |
|
|
|
|
|
dict.length = dictLen = j; |
|
|
|
|
|
// word will be replaced using RegExp - prepare escaped vestion |
|
|
var d, i = -1; while((d = dict[++i])) |
|
|
d.escapedWord = _escapeForRegExp(d.word); |
|
|
|
|
|
// calc profit of each word in <dict> |
|
|
var d, i = -1; while((d = dict[++i])) |
|
|
{ |
|
|
var escapedWord = _escapeForRegExpReplace(d.word); |
|
|
|
|
|
d.profit = d.matchCount*(escapedWord.length - 1) - escapedWord.length - 1 - 1; |
|
|
} |
|
|
|
|
|
// remove words with profit < <minProfit> |
|
|
var d, i = -1, j = -1; while((d = dict[++i])) |
|
|
{ |
|
|
if(d.profit > minProfit) |
|
|
dict[++j] = d; |
|
|
} |
|
|
|
|
|
dict.length = dictLen = j + 1; |
|
|
|
|
|
//console.log(_dumpDict(dict)); |
|
|
|
|
|
// and sort <dict> from last to top by profit and word length |
|
|
dict.sort(function(a, b){ return (b.profit - a.profit) || (b.word.length - a.word.length); }); |
|
|
|
|
|
//console.log(_dumpDict(dict)); |
|
|
|
|
|
// assign replaceChar, replace words to replaceChars and remove unused words |
|
|
var t = s; |
|
|
var d, i = -1, j = 0; while((d = dict[++i])) |
|
|
{ |
|
|
if(t.indexOf(d.word) > -1) |
|
|
{ |
|
|
d.replaceChar = availableChars[j]; |
|
|
t = t.replace(new RegExp(d.escapedWord, 'g'), d.replaceChar); |
|
|
dict[j] = d; |
|
|
j++; |
|
|
} |
|
|
} |
|
|
|
|
|
dict.length = dictLen = j; |
|
|
|
|
|
console.log('t = ', t); |
|
|
|
|
|
// compress <dict> |
|
|
// calc match one word in other words |
|
|
var d, i = -1; while((d = dict[++i])) |
|
|
d.matchCountInDict = _matchCountInDict(dict, d.word); |
|
|
|
|
|
// sort <dict> by matchCountInDict from max to min |
|
|
dict.sort(function(a, b){ return a.matchCountInDict - b.matchCountInDict; }); |
|
|
|
|
|
// redice <dict> |
|
|
var d, i = -1; while((d = dict[++i])) |
|
|
{ |
|
|
var word = d.word; |
|
|
|
|
|
var j = i, d2; while((d2 = dict[++i])) |
|
|
word = word.replace(new RegExp(d2.escapedWord, 'g'), d2.replaceChar); |
|
|
|
|
|
d.word = word; |
|
|
} |
|
|
|
|
|
//console.log(_dumpDict(dict)); |
|
|
console.log(dict); |
|
|
//return; |
|
|
|
|
|
// final |
|
|
|
|
|
// serialize <dict> |
|
|
var dictString = ''; |
|
|
var d, i = -1; while((d = dict[++i])) |
|
|
dictString += d.replaceChar + _escapeForRegExpReplace(d.word) + '`'; |
|
|
|
|
|
dictString = dictString.slice(0, -1); |
|
|
|
|
|
console.log(dictString); |
|
|
|
|
|
// and create unpack code |
|
|
var code = unpackCode. |
|
|
replace('__DICT__', _escapeForCString(dictString)). |
|
|
replace('__CODE__', _escapeForCString(t)) |
|
|
; |
|
|
|
|
|
console.log('code.length = ', code.length); |
|
|
console.log('s.length = ', s.length); |
|
|
console.log('profit = ', (s.length/code.length*100).toFixed(2), '%'); |
|
|
|
|
|
return code; |
|
|
}; |
|
|
|
|
|
return _gzip; |
|
|
})(); |