Last active
December 16, 2015 12:49
-
-
Save cukabeka/5437512 to your computer and use it in GitHub Desktop.
Converts HTML to Textile. As seen in http://3v1n0.tuxfamily.org/scripts/detextile/HTML-to-Textile.php?source=show
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// ------------------------------------------------------------- | |
// The following functions are used to detextile html, a process | |
// still in development. | |
// By Tim Koschützki | |
// Based on code from www.aquarionics.com | |
// ------------------------------------------------------------- | |
class html2textile { | |
function detextile($text) { | |
$text = preg_replace("/(<\/?)(\w+)([^>]*>)/e", "'\\1'.strtolower('\\2').'\\3'", $text); | |
$text = preg_replace("/<br[ \/]*>\s*/","\n",$text); | |
$text = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $text); | |
$text = stripslashes($text); | |
$oktags = array('p','ol','ul','li','i','b','em','strong','span','a','h[1-6]', | |
'table','tr','td','u','del','sup','sub','blockquote', 'center', 'code'); | |
$text = preg_replace_callback("/\t*<(img)\s*([^>]*)>/Usi", | |
array($this,'processTag'),$text); | |
foreach($oktags as $tag){ | |
$text = preg_replace_callback("/\t*<(".$tag.")\s*([^>]*)>(.*)<\/\\1>/Usi", | |
array($this,'processTag'),$text); | |
} | |
$text = $this->detextile_process_glyphs($text); | |
$text = $this->detextile_process_lists($text); | |
$text = preg_replace('/^\t* *p\. /m','',$text); | |
return str_replace(array("#\\","/#"), | |
array(">","<"), | |
strip_tags($this->decode_high($text), '<pre>')); //strip_tags($text); | |
} | |
function detextile_process_glyphs($text) { | |
$glyphs = array( | |
'’'=>'\'', # single closing | |
'‘'=>'\'', # single opening | |
'”'=>'"', # double closing | |
'“'=>'"', # double opening | |
'—'=>'--', # em dash | |
'–'=>' - ', # en dash | |
'×' =>'x', # dimension sign | |
'™'=>'(TM)', # trademark | |
'®' =>'(R)', # registered | |
'©' =>'(C)', # copyright | |
'…'=>'...' # ellipsis | |
); | |
foreach($glyphs as $f=>$r){ | |
$text = str_replace($f,$r,$text); | |
} | |
return $text; | |
} | |
function detextile_process_lists($text) { | |
$list = false; | |
$text = preg_split("/(<.*>)/U",$text,-1,PREG_SPLIT_DELIM_CAPTURE); | |
foreach($text as $line){ | |
if ($list == false && preg_match('/<ol /',$line)){ | |
$line = ""; | |
$list = "o"; | |
} else if (preg_match('/<\/ol/',$line)){ | |
$line = ""; | |
$list = false; | |
} else if ($list == false && preg_match('/<ul/',$line)){ | |
$line = ""; | |
$list = "u"; | |
} else if (preg_match('/<\/ul/',$line)){ | |
$line = ""; | |
$list = false; | |
} else if ($list == 'o'){ | |
$line = preg_replace('/<li.*>/U','# ', $line); | |
} else if ($list == 'u'){ | |
$line = preg_replace('/<li .*>/U','* ', $line); | |
} | |
$glyph_out[] = $line; | |
} | |
return $text = implode('',$glyph_out); | |
} | |
function processTag($matches) { | |
list($all,$tag,$atts,$content) = $matches; | |
$a = $this->splat($atts); | |
$phr = array( | |
'em'=>'_', | |
'i'=>'__', | |
'b'=>'**', | |
'strong'=>'*', | |
'cite'=>'??', | |
'del'=>'-', | |
'ins'=>'+', | |
'u'=>'+', | |
'sup'=>'^', | |
'sub'=>'~', | |
'span'=>'%', | |
'code'=>'@' | |
); | |
$blk = array('p','h1','h2','h3','h4','h5','h6'); | |
if(isset($phr[$tag])) { | |
return $phr[$tag].$this->sci($a).$content.$phr[$tag]; | |
} elseif($tag=='blockquote') { | |
return 'bq.'.$this->sci($a).' '.$content; | |
} elseif($tag=='center') { | |
return 'p=.'.$this->sci($a).' '.$content; | |
} elseif(in_array($tag,$blk)) { | |
return $tag.$this->sci($a).'. '.$content; | |
} elseif ($tag=='a') { | |
$t = $this->filterAtts($a,array('href','title')); | |
$out = '"'.$content; | |
$out.= (isset($t['title'])) ? ' ('.preg_replace(array("/\(/","/\)/"), array("[","]"), $t['title']).')' : ''; | |
$out.= '":'.$t['href']; | |
return $out; | |
} elseif ($tag=='img') { | |
$t = $this->filterAtts($a,array('src','alt')); | |
$out = '!'.($t['src']); | |
$out.= (isset($t['alt'])) ? '('.preg_replace(array("/\(/","/\)/"), array("[","]"), $t['alt']).')' : ''; | |
$out.= '!'; | |
return $out; | |
} else { | |
return $all; | |
} | |
} | |
// ------------------------------------------------------------- | |
function filterAtts($atts,$ok) | |
{ | |
foreach($atts as $a) { | |
if(in_array($a['name'],$ok)) { | |
if($a['att']!='') { | |
$out[$a['name']] = $a['att']; | |
} | |
} | |
} | |
# dump($out); | |
return $out; | |
} | |
// ------------------------------------------------------------- | |
function sci($a) | |
{ | |
$out = ''; | |
foreach($a as $t){ | |
$out.= ($t['name']=='class') ? '(='.$t['att'].')' : ''; | |
$out.= ($t['name']=='id') ? '[='.$t['att'].']' : ''; | |
$out.= ($t['name']=='style') ? '{='.$t['att'].'}' : ''; | |
$out.= ($t['name']=='cite') ? ':'.$t['att'] : ''; | |
if ($t['name']=='align') | |
if ($t['att'] == "left") | |
$out.= '/#'; | |
elseif ($t['att'] == "right") | |
$out.= '#\\'; | |
elseif ($t['att'] == "center") | |
$out.= '='; | |
elseif ($t['att'] == "justify") | |
$out.= '/##\\'; | |
} | |
return $out; | |
} | |
// ------------------------------------------------------------- | |
function splat($attr) // returns attributes as an array | |
{ | |
$arr = array(); | |
$atnm = ''; | |
$mode = 0; | |
while (strlen($attr) != 0){ | |
$ok = 0; | |
switch ($mode) { | |
case 0: // name | |
if (preg_match('/^([a-z]+)/i', $attr, $match)) { | |
$atnm = $match[1]; $ok = $mode = 1; | |
$attr = preg_replace('/^[a-z]+/i', '', $attr); | |
} | |
break; | |
case 1: // = | |
if (preg_match('/^\s*=\s*/', $attr)) { | |
$ok = 1; $mode = 2; | |
$attr = preg_replace('/^\s*=\s*/', '', $attr); | |
break; | |
} | |
if (preg_match('/^\s+/', $attr)) { | |
$ok = 1; $mode = 0; | |
$arr[] = array('name'=>$atnm,'whole'=>$atnm,'att'=>$atnm); | |
$attr = preg_replace('/^\s+/', '', $attr); | |
} | |
break; | |
case 2: // value | |
if (preg_match('/^("[^"]*")(\s+|$)/', $attr, $match)) { | |
$arr[]=array('name' =>$atnm,'whole'=>$atnm.'='.$match[1], | |
'att'=>str_replace('"','',$match[1])); | |
$ok = 1; $mode = 0; | |
$attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); | |
break; | |
} | |
if (preg_match("/^('[^']*')(\s+|$)/", $attr, $match)) { | |
$arr[]=array('name' =>$atnm,'whole'=>$atnm.'='.$match[1], | |
'att'=>str_replace("'",'',$match[1])); | |
$ok = 1; $mode = 0; | |
$attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); | |
break; | |
} | |
if (preg_match("/^(\w+)(\s+|$)/", $attr, $match)) { | |
$arr[]= | |
array('name'=>$atnm,'whole'=>$atnm.'="'.$match[1].'"', | |
'att'=>$match[1]); | |
$ok = 1; $mode = 0; | |
$attr = preg_replace("/^\w+(\s+|$)/", '', $attr); | |
} | |
break; | |
} | |
if ($ok == 0){ | |
$attr = preg_replace('/^\S*\s*/', '', $attr); | |
$mode = 0; | |
} | |
} | |
if ($mode == 1) $arr[] = | |
array ('name'=>$atnm,'whole'=>$atnm.'="'.$atnm.'"','att'=>$atnm); | |
return $arr; | |
} | |
// ------------------------------------------------------------- | |
function cmap() { | |
$f = 0xffff; | |
$cmap = array( | |
160, 255, 0, $f, | |
402, 402, 0, $f, | |
913, 929, 0, $f, | |
931, 937, 0, $f, | |
945, 969, 0, $f, | |
977, 978, 0, $f, | |
982, 982, 0, $f, | |
8226, 8226, 0, $f, | |
8230, 8230, 0, $f, | |
8242, 8243, 0, $f, | |
8254, 8254, 0, $f, | |
8260, 8260, 0, $f, | |
8465, 8465, 0, $f, | |
8472, 8472, 0, $f, | |
8476, 8476, 0, $f, | |
8482, 8482, 0, $f, | |
8501, 8501, 0, $f, | |
8592, 8596, 0, $f, | |
8629, 8629, 0, $f, | |
8656, 8660, 0, $f, | |
8704, 8704, 0, $f, | |
8706, 8707, 0, $f, | |
8709, 8709, 0, $f, | |
8711, 8713, 0, $f, | |
8715, 8715, 0, $f, | |
8719, 8719, 0, $f, | |
8721, 8722, 0, $f, | |
8727, 8727, 0, $f, | |
8730, 8730, 0, $f, | |
8733, 8734, 0, $f, | |
8736, 8736, 0, $f, | |
8743, 8747, 0, $f, | |
8756, 8756, 0, $f, | |
8764, 8764, 0, $f, | |
8773, 8773, 0, $f, | |
8776, 8776, 0, $f, | |
8800, 8801, 0, $f, | |
8804, 8805, 0, $f, | |
8834, 8836, 0, $f, | |
8838, 8839, 0, $f, | |
8853, 8853, 0, $f, | |
8855, 8855, 0, $f, | |
8869, 8869, 0, $f, | |
8901, 8901, 0, $f, | |
8968, 8971, 0, $f, | |
9001, 9002, 0, $f, | |
9674, 9674, 0, $f, | |
9824, 9824, 0, $f, | |
9827, 9827, 0, $f, | |
9829, 9830, 0, $f, | |
338, 339, 0, $f, | |
352, 353, 0, $f, | |
376, 376, 0, $f, | |
710, 710, 0, $f, | |
732, 732, 0, $f, | |
8194, 8195, 0, $f, | |
8201, 8201, 0, $f, | |
8204, 8207, 0, $f, | |
8211, 8212, 0, $f, | |
8216, 8218, 0, $f, | |
8218, 8218, 0, $f, | |
8220, 8222, 0, $f, | |
8224, 8225, 0, $f, | |
8240, 8240, 0, $f, | |
8249, 8250, 0, $f, | |
8364, 8364, 0, $f | |
); | |
return $cmap; | |
} | |
// ------------------------------------------------------------- | |
function decode_high($text) { | |
$cmap = $this->cmap(); | |
return mb_decode_numericentity($text, $cmap, "UTF-8"); | |
} | |
} | |
?> | |
<html> | |
<head> | |
<title>Html to Textile Converter - Html2Textile</title> | |
<meta http-equiv="Content-Type" content="text/html; charset=utf8"> | |
<style type="text/css"> | |
<!-- | |
body,td,th { | |
font-family: Verdana, Arial, Helvetica, sans-serif; | |
font-size: 12px; | |
} | |
.Stile4 {font-size: 9px} | |
--> | |
</style> | |
</head> | |
<body> | |
<h2>Html to Textile Converter</br> | |
<small><small><small>Based on | |
<a href="http://tinyurl.com/3dfybj">TextileHelper engine</a> | |
</small></small></small> | |
</h2> | |
<?php | |
if (!isSet($_REQUEST['source'])) | |
{ | |
if (isSet($_POST['html'])) | |
{ | |
$html = stripslashes($_POST['html']); | |
$html2textile = new html2textile; | |
$textile = $html2textile->detextile($html); | |
} | |
?> | |
<form action="<?php $_SERVER['SCRIPT_NAME'] ?>" method="post"> | |
<textarea name="html" | |
style="padding: 5px; width: 450px; height: 250px;" | |
cols="1" rows="1" | |
><?php echo $html ?></textarea> | |
<br /> | |
<input name="html2textile" | |
value="HTML to Textile<?php if ($textile) echo ' ↓' ?>" | |
class="button" | |
style="margin: 1em; padding: 3px; font-size: 10px;" | |
type="submit" /> | |
<br /> | |
</form> | |
<?php | |
if ($textile) | |
{ | |
?> | |
<textarea name="textile" | |
style="padding: 5px; width: 450px; height: 250px;" | |
cols="1" rows="1" readonly="1" | |
><?php echo $textile ?></textarea> | |
<?php | |
} | |
?> | |
<?php | |
} | |
elseif ($_REQUEST['source'] == "show") | |
{ | |
?> | |
<table cellspacing="0" width="90%" border="1" align="center" bordercolor="#000033" bgcolor="#E6F2FF"> | |
<tr> | |
<td width="90%" align="left"> | |
<?php show_source($_SERVER['SCRIPT_FILENAME']) ?> | |
</td> | |
</tr> | |
</table> | |
<?php | |
} | |
?> | |
<p align="center"> | |
<small>© 2007 Treviño - PHP Script for converting HTML code to Textile<br> | |
My part is released under GPL License, freely use, copy and redistribuite it.<br> | |
<?php | |
if (!isSet($_REQUEST['source'])) | |
echo '<a href="' . $_SERVER['SCRIPT_NAME'] . '?source=show"><i>Source code</i></a>'; | |
elseif ($_REQUEST['source'] == "show") | |
echo '<a href="' . $_SERVER['SCRIPT_NAME'] . '"><i>Hide source code</i></a>'; | |
?> | |
</small> | |
</p> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment