"""Convert HTML to Textile syntax using BeautifulSoup.""" __author__ = 'Roberto De Almeida ' from BeautifulSoup import BeautifulSoup def _body(el): """Process body tag.""" for i in el.contents: for j in walk_tree(i): yield j def _block(sig): """Process block tag.""" def block(el): # Get attributes as dict. attrs = {} for k,v in el.attrs: attrs[k] = v # Block signature. yield sig # Store attributes. attrs_ = [] if 'class' in attrs: attrs_.append('%s' % (attrs['class'])) if 'id' in attrs: attrs_.append('#%s' % attrs['id']) if attrs_: yield '(%s)' % ''.join(attrs_) # We can safely always output extended blocks, because we always # use the p. signature for unamed blocks. yield '.. ' for i in el.contents: for j in walk_tree(i): yield j yield '\n\n' return block def _qtag(tag): """Process quick tag.""" def qtag(el): yield tag for i in el.contents: for j in walk_tree(i): yield j yield tag return qtag def _image(el): """Process image tag.""" # Get attributes as dict. attrs = {} for k,v in el.attrs: attrs[k] = v yield '!' yield attrs['src'] if 'alt' in attrs: yield '(%s)' % attrs['alt'] yield '!' def _anchor(el): """Process anchor tag.""" # Get attributes as dict. attrs = {} for k,v in el.attrs: attrs[k] = v # Check for image inside anchor. if getattr(el.contents[0], 'name', None) == 'img': for i in el.contents: for j in walk_tree(i): yield j else: yield '"' for i in el.contents: for j in walk_tree(i): yield j yield '"' if 'href' in attrs: yield ':%s ' % attrs['href'] def _html(el): """Return unrecognized tags as is.""" yield '%s\n\n' % el def walk_tree(el): tags = {'p' : _block('p'), 'blockquote': _block('bq'), 'pre' : _block('pre'), 'strong' : _qtag('*'), 'em' : _qtag('_'), 'b' : _qtag('**'), 'i' : _qtag('__'), 'big' : _qtag('++'), 'small' : _qtag('--'), 'del' : _qtag('-'), 'ins' : _qtag('+'), 'sup' : _qtag('^'), 'sub' : _qtag('~'), 'span' : _qtag('%'), 'code' : _qtag('@'), 'body' : _body, 'img' : _image, 'a' : _anchor, } if getattr(el, 'name', None): # Tag. f = tags.get(el.name, _html) for i in f(el): yield i # Blocks must always be followed by another block, so if we # add the default block signature if it's not the case. if f.__name__ == 'block': next = el.nextSibling # Look for the first named sibling. while not getattr(next, 'name', None) and next: next = next.nextSibling f = tags.get(next.name, None) if f and f.__name__ != 'block': yield 'p. ' else: # String. yield el def detextile(input): """Convert HTML input to Textile syntax.""" soup = BeautifulSoup(input) # Check for body in soup. body = soup('body') if body: soup = body[0] return walk_tree(soup) if __name__ == '__main__': input = """ Page title
Test

This is paragraph one.

This is paragraph two.

Test
    Some code

    Some more code
    
alternative description Example.com """ textile_ = ''.join(detextile(input)) print textile_ import textile print textile.textile(textile_)