Created
October 17, 2015 23:16
-
-
Save cemeyer/d31af5bbe9490c6a275c to your computer and use it in GitHub Desktop.
Convert the raw output of man(1) to HTML-ish
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Extracted from 2-clause BSDL https://www.freebsd.org/cgi/man.cgi/source | |
# | |
# Usage example: | |
# $ man 3 tree | path/to/man2html.pl > tree.3.html | |
# encode unknown text data for using as HTML, | |
# treats ^H as overstrike ala nroff. | |
sub encode_data { | |
local ($_) = @_; | |
local ($str); | |
# Escape &, < and > | |
s,\010[><&],,g; | |
s/\&/\&\;/g; | |
s/\</\<\;/g; | |
s/\>/\>\;/g; | |
s,((_\010[^_])+),($str = $1) =~ s/.\010//g; "<i>$str</i>";,ge; | |
s,((.\010.)+),($str = $1) =~ s/.\010//g; "<b>$str</b>";,ge; | |
#s,((_\010.)+),($str = $1) =~ s/.\010//g; "<i>$str</i>";,ge; | |
#s,(.\010)+,$1,g; | |
#if (!s,((.\010.)+\s+(.\010.)+),($str = $1) =~ s/.\010//g; "<b>$str</b>";,ge) { | |
# s,(([^_]\010.)+),($str = $1) =~ s/[^_]\010//g; "<b>$str</b>";,ge; | |
# s,(([_]\010.)+),($str = $1) =~ s/[_]\010//g; "<i>$str</i>";,ge; | |
#} | |
# Escape binary data except for ^H which we process below | |
# \375 gets turned into the & for the entity reference | |
#s/([^\010\012\015\032-\176])/sprintf('\375#%03d;',ord($1))/eg; | |
# Process ^H sequences, we use \376 and \377 (already escaped | |
# above) to stand in for < and > until those characters can | |
# be properly escaped below. | |
#s,\376[IB]\377_\376/[IB]\377,,g; | |
#s/.[\b]//g; # just do an erase for anything else | |
# Now convert our magic chars into our tag markers | |
#s/\375/\&/g; s/\376/</g; s/\377/>/g; | |
# cleanup all the rest | |
s,.\010,,g; | |
$_; | |
} | |
sub encode_url { | |
local ($_) = @_; | |
# rfc1738 says that ";"|"/"|"?"|":"|"@"|"&"|"=" may be reserved. | |
# And % is the escape character so we escape it along with | |
# single-quote('), double-quote("), grave accent(`), less than(<), | |
# greater than(>), and non-US-ASCII characters (binary data), | |
# and white space. Whew. | |
s/([\000-\032\;\/\?\:\@\&\=\%\'\"\`\<\>\177-\377 ])/sprintf('%%%02x',ord($1))/eg; | |
s/%20/+/g; | |
$_; | |
} | |
sub mlnk { | |
local ($matched) = @_; | |
local ( $link, $section ); | |
( $link = $matched ) =~ s/[\s]+//g; | |
$link =~ s/<\/?[IB]>//ig; | |
( $link, $section ) = ( $link =~ m/^([^\(]*)\((.*)\)/ ); | |
$link = &encode_url($link); | |
$section = &encode_url($section); | |
local ($manpath) = &encode_url($manpath); | |
return qq{<a href="$BASE?query=$link} | |
. qq{&sektion=$section&apropos=0&manpath=$manpath">$matched</a>}; | |
} | |
local ($space) = 1; | |
local (@sect); | |
local ( $i, $j ); | |
print "<html>\n<body>\n"; | |
print "<pre>\n"; | |
while (<>) { | |
# remove tailing white space | |
if (/^\s+$/) { | |
next if $space; | |
$space = 1; | |
} | |
else { | |
$space = 0; | |
} | |
$_ = &encode_data($_); | |
/^\s/ && # skip headers | |
s,((<[IB]>)?[\w\_\.\-]+\s*(</[IB]>)?\s*\(([1-9ln][a-zA-Z]*)\)),&mlnk($1),oige; | |
# detect E-Mail Addreses in manpages | |
if ( $enable_mailto_links && /\@/ ) { | |
s/([a-z0-9_\-\.]+\@[a-z0-9\-\.]+\.[a-z]+)/<a href="mailto:$1">$1<\/A>/gi; | |
} | |
# detect URLs in manpages | |
if (m%tp://%) { | |
s,((ftp|http)://[^\s<>\)]+),<a href="$1">$1</a>,gi; | |
} | |
if (s%^(<b>.*?</b>)+\n?$% ($str = $1) =~ s,(<b>|</b>),,g; $str%ge) { | |
$i = $_; | |
$j = &encode_url($i); | |
$j =~ s/\+/_/g; | |
$_ = qq{<a name="$j" href="#end"><b>$i</b></a>\n}; | |
push( @sect, $i ); | |
} | |
print; | |
} | |
print "</pre>\n</body>\n</html>\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment