Skip to content

Instantly share code, notes, and snippets.

@dcrosta
Created November 4, 2011 17:50

Revisions

  1. dcrosta revised this gist Jun 15, 2012. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion htmlabbrev.py
    Original file line number Diff line number Diff line change
    @@ -52,7 +52,7 @@ def handle_entityref(self, name):
    self.emit('&%s;' % name)

    def handle_charref(self, name):
    return self.handle_entityref(name)
    return self.handle_entityref('#%s' % name)

    def close(self):
    return ''.join(self.out)
  2. dcrosta revised this gist Dec 2, 2011. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions htmlabbrev.py
    Original file line number Diff line number Diff line change
    @@ -14,10 +14,10 @@ def __init__(self, maxlength, *args, **kwargs):
    self.out = []

    def emit(self, thing, count=False):
    if count:
    self.length += len(thing)
    if self.length < self.maxlength:
    self.out.append(thing)
    if count:
    self.length += len(thing)
    elif not self.done:
    # trim trailing whitespace
    self.out[-1] = self.out[-1].rstrip()
  3. dcrosta created this gist Nov 4, 2011.
    58 changes: 58 additions & 0 deletions htmlabbrev.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,58 @@
    import re
    from HTMLParser import HTMLParser

    whitespace = re.compile('(\w+)')

    class HTMLAbbrev(HTMLParser):

    def __init__(self, maxlength, *args, **kwargs):
    HTMLParser.__init__(self, *args, **kwargs)
    self.stack = []
    self.maxlength = maxlength
    self.length = 0
    self.done = False
    self.out = []

    def emit(self, thing, count=False):
    if self.length < self.maxlength:
    self.out.append(thing)
    if count:
    self.length += len(thing)
    elif not self.done:
    # trim trailing whitespace
    self.out[-1] = self.out[-1].rstrip()

    # close out tags on the stack
    for tag in reversed(self.stack):
    self.out.append('</%s>' % tag)
    self.done = True

    def handle_starttag(self, tag, attrs):
    self.stack.append(tag)
    attrs = ' '.join('%s="%s"' % (k, v) for k, v in attrs)
    self.emit('<%s%s>' % (tag, (' ' + attrs).rstrip()))

    def handle_endtag(self, tag):
    if tag == self.stack[-1]:
    self.emit('</%s>' % tag)
    del self.stack[-1]
    else:
    raise Exception('end tag %r does not match stack: %r' % (tag, self.stack))

    def handle_startendtag(self, tag, attrs):
    self.stack.append(tag)
    attrs = ' '.join('%s="%s"' % (k, v) for k, v in attrs)
    self.emit('<%s%s/>' % (tag, (' ' + attrs).rstrip()))

    def handle_data(self, data):
    for word in whitespace.split(data):
    self.emit(word, count=True)

    def handle_entityref(self, name):
    self.emit('&%s;' % name)

    def handle_charref(self, name):
    return self.handle_entityref(name)

    def close(self):
    return ''.join(self.out)