AviDuda · November 1, 2023 12:00 · Mar 18, 2016 · May 27, 2013 · May 27, 2013
diff --git a/blogger-to-wordpress.py → blogger-to-disqus.py b/blogger-to-wordpress.py → blogger-to-disqus.py
@@ -1,27 +1,32 @@
-# Blogger's backup file to WordPress' WXR converter.
+# -*- coding: utf-8 -*-
+
+# Blogger's backup file to WordPress' WXR converter for Disqus.
+#
+# Original script: https://gist.github.com/fajran/5659455
 #
 # Only tested with posts and comments, and NOT with pages.
-# May not be efficient for huge blogs since the script keep
+# May not be efficient for huge blogs since the script keeps
 # all content in the memory during conversion.
 #
 # Released as public domain.
 #
-# Please note that I converted the labels in Blogspot
-# as tags in WordPress. I also hardcoded two categories for the
-# WordPress posts. Adjust these first to suit your need.
+# Required library: dateutil - https://labix.org/python-dateutil
+#
+# Usage: python blogger-to-disqus.py blogger.xml > output.xml
 
 import sys
 from datetime import datetime
 from xml.dom.minidom import parse, parseString
 from xml.dom import Node
 import cgi
+from HTMLParser import HTMLParser
 
 import dateutil.parser
 
 inp = sys.argv[1]
 
 def d(*msg):
-    print >>sys.stderr, ' '.join(map(str, msg))
+    print >> sys.stderr, ' '.join(map(str, msg))
 
 class Blog(object):
     class Author(object):
@@ -40,6 +45,7 @@ class Entry(object):
         published = None
         updated = None
         author = None
+        related = None
 
     class Post(Entry):
         draft = False
@@ -113,6 +119,11 @@ def parse_author(self, author):
 
             if name == 'name':
                 data.name = self.get_text(child)
+                if len(data.name) < 3:
+                    data.name = 'Anonymous'
+                else:
+                    # automatically generated email has a max of 75 characters including @wordpress.disqus.net, so use a sane value
+                    data.name = data.name[:42]
             elif name == 'uri':
                 data.uri = self.get_text(child)
             elif name == 'email':
@@ -138,8 +149,8 @@ def parse_entries(self, feed):
         total = len(self.blog.posts)
         d('total posts:', total)
         d('total comments:', len(self.comments))
-        for i, post in enumerate(self.blog.posts):
-            d('(%d / %d) -> %d: %s' % (i+1, total, len(post.comments), post.title))
+        # for i, post in enumerate(self.blog.posts):
+            # d('(%d / %d) -> %d: %s' % (i+1, total, len(post.comments), post.title))
 
     def assign_comments(self):
         i = 0
@@ -151,7 +162,7 @@ def assign_comments(self):
             post = self.post_ids[entry_id]
             post.comments.append(comment)
 
-            d('%s. comment: %s -> %s' % (i+1, id(comment), id(post)))
+            # d('%s. comment: %s -> %s' % (i+1, id(comment), id(post)))
             i += 1
 
     def parse_category(self, category):
@@ -200,6 +211,7 @@ def parse_entry_common(self, entry, target):
                 target.title_type = child.attributes['type'].nodeValue
             elif name == 'content':
                 target.content = self.get_text(child)
+                target.content = target.content.ljust(3, '.') # Disqus requires 3 characters
                 target.content_type = child.attributes['type'].nodeValue
             elif name == 'author':
                 target.author = self.parse_author(child)
@@ -241,6 +253,10 @@ def parse_comment(self, entry):
                 ref = child.attributes['ref'].nodeValue
                 comment.post_entry_id = ref
 
+            if name == 'link' and child.attributes['rel'].nodeValue == 'related':
+                related = child.attributes['href'].nodeValue
+                comment.related = related[related.rfind('/') + 1 :]
+
         return comment
 
 class WXRWriter(object):
@@ -262,15 +278,12 @@ def get_header(self):
         res = []
         res.append('<?xml version="1.0" encoding="UTF-8" ?>')
         res.append('<rss version="2.0"')
-        res.append('     xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"')
         res.append('     xmlns:content="http://purl.org/rss/1.0/modules/content/"')
-        res.append('     xmlns:wfw="http://wellformedweb.org/CommentAPI/"')
+        res.append('     xmlns:dsq="http://www.disqus.com/"')
         res.append('     xmlns:dc="http://purl.org/dc/elements/1.1/"')
-        res.append('     xmlns:wp="http://wordpress.org/export/1.2/">')
+        res.append('     xmlns:wp="http://wordpress.org/export/1.0/">')
 
         res.append('<channel>')
-        res.append('<title>%s</title>' % self.blog.title)
-        res.append('<wp:wxr_version>1.2</wp:wxr_version>')
 
         return res
 
@@ -295,9 +308,19 @@ def get_date_wp(self, ts):
         return ts.strftime("%Y-%m-%d %H:%M:%S")
 
     def escape(self, s):
-        return s
         return cgi.escape(s).encode('ascii', 'xmlcharrefreplace')
 
+    def unescape(self, s):
+        parser = HTMLParser()
+        return parser.unescape(s)
+
+    def get_comment_id(self, comment):
+        self.comment_id += 1
+
+        comment_id_blogger = comment.entry_id[comment.entry_id.rfind('-') + 1 :]
+
+        self.post_comment_ids[comment_id_blogger] = self.comment_id
+
     def get_comment(self, comment):
         status = 1
 
@@ -307,31 +330,25 @@ def get_comment(self, comment):
 
         res.append('  <wp:comment>')
         res.append('    <wp:comment_id>%s</wp:comment_id>' % self.comment_id)
-        if comment.author.name:
-            res.append('    <wp:comment_author>%s</wp:comment_author>' % comment.author.name)
-        if comment.author.email:
-            res.append('    <wp:comment_author_email>%s</wp:comment_author_email>' % comment.author.email)
+
+        if not comment.author.name:
+            comment.author.name = 'Anonymous'
+        res.append('    <wp:comment_author><![CDATA[%s]]></wp:comment_author>' % comment.author.name)
+        # Blogger has the [email protected] email by default. Uncomment if you need it.
+        # if comment.author.email:
+            # res.append('    <wp:comment_author_email>%s</wp:comment_author_email>' % comment.author.email)
         if comment.author.uri:
-            res.append('    <wp:comment_author_url>%s</wp:comment_author_url>' % comment.author.uri)
+            res.append('    <wp:comment_author_url><![CDATA[%s]]></wp:comment_author_url>' % comment.author.uri)
         res.append('    <wp:comment_author_IP>%s</wp:comment_author_IP>' % '')
-        res.append('    <wp:comment_date>%s</wp:comment_date>' % self.get_date_wp(comment.published))
         res.append('    <wp:comment_date_gmt>%s</wp:comment_date_gmt>' % self.get_date_wp(comment.published))
-        res.append('    <wp:comment_content><![CDATA[%s]]></wp:comment_content>' % self.escape(comment.content))
+        res.append('    <wp:comment_content><![CDATA[%s]]></wp:comment_content>' % self.unescape(comment.content))
         res.append('    <wp:comment_approved>%s</wp:comment_approved>' % status)
-        res.append('    <wp:commentmeta>')
-        res.append('      <wp:meta_key>blogger_id</wp:meta_key>')
-        res.append('      <wp:meta_value>%s</wp:meta_value>' % comment.entry_id)
-        res.append('    </wp:commentmeta>')
-        if comment.permalink:
-            res.append('    <wp:commentmeta>')
-            res.append('      <wp:meta_key>blogger_permalink</wp:meta_key>')
-            res.append('      <wp:meta_value>%s</wp:meta_value>' % comment.permalink)
-            res.append('    </wp:commentmeta>')
-        if comment.url:
-            res.append('    <wp:commentmeta>')
-            res.append('      <wp:meta_key>blogger_url</wp:meta_key>')
-            res.append('      <wp:meta_value>%s</wp:meta_value>' % comment.url)
-            res.append('    </wp:commentmeta>')
+        if comment.related:
+            if comment.related in self.post_comment_ids:
+                res.append('    <wp:comment_parent>%s</wp:comment_parent>' % self.post_comment_ids[comment.related])
+            else:
+                d('could not find related comment %s for comment entry %s (comment_id %s)' % (comment.related, comment.entry_id, self.comment_id))
+
         res.append('  </wp:comment>')
 
         return res
@@ -354,44 +371,19 @@ def get_post(self, post):
         self.post_id += 1
 
         res.append('<item>')
-        res.append('  <title>%s</title>' % post.title)
-        res.append('  <pubDate>%s</pubDate>' % self.get_date(post.published))
-        res.append('  <dc:creator>%s</dc:creator>' % post.author.name)
-        res.append('  <guid isPermaLink="true">%s</guid>' % post.permalink)
-        res.append('  <description></description/>')
+        res.append('  <title><![CDATA[%s]]></title>' % self.escape(post.title))
+        res.append('  <link>%s</link>' % post.url)
         res.append('  <content:encoded><![CDATA[%s]]></content:encoded>' % self.escape(post.content))
-        res.append('  <excerpt:encoded><![CDATA[%s]]></excerpt:encoded>' % self.escape(post.content))
-        res.append('  <wp:post_id>%s</wp:post_id>' % self.post_id)
-        res.append('  <wp:post_date>%s</wp:post_date>' % self.get_date_wp(post.published))
         res.append('  <wp:post_date_gmt>%s</wp:post_date_gmt>' % self.get_date_wp(post.published))
         res.append('  <wp:comment_status>%s</wp:comment_status>' % self.comment_status)
-        res.append('  <wp:ping_status>closed</wp:ping_status>')
-        if slug:
-            res.append('  <wp:post_name>%s</wp:post_name>' % slug)
-        res.append('  <wp:status>%s</wp:status>' % status)
-        res.append('  <wp:post_parent>0</wp:post_parent>')
-        res.append('  <wp:menu_order>0</wp:menu_order>')
-        res.append('  <wp:post_type>post</wp:post_type>')
-        res.append('  <wp:post_password></wp:post_password>')
-        res.append('  <wp:is_sticky>0</wp:is_sticky>')
-        res.append('  <category domain="category" nicename="id"><![CDATA[Bahasa Indonesia]]></category>')
-        res.append('  <category domain="category" nicename="hacking"><![CDATA[Hacking]]></category>')
-        for label in post.labels:
-            res.append('  <category domain="post_tag" nicename="%s"><![CDATA[%s]]></category>' % (label, label))
-        res.append('  <wp:postmeta>')
-        res.append('    <wp:meta_key>blogger_id</wp:meta_key>')
-        res.append('    <wp:meta_value>%s</wp:meta_value>' % post.entry_id)
-        res.append('  </wp:postmeta>')
-        if post.permalink:
-            res.append('  <wp:postmeta>')
-            res.append('    <wp:meta_key>blogger_permalink</wp:meta_key>')
-            res.append('    <wp:meta_value>%s</wp:meta_value>' % post.permalink)
-            res.append('  </wp:postmeta>')
-        if post.url:
-            res.append('  <wp:postmeta>')
-            res.append('    <wp:meta_key>blogger_url</wp:meta_key>')
-            res.append('    <wp:meta_value>%s</wp:meta_value>' % post.url)
-            res.append('  </wp:postmeta>')
+
+        self.post_comment_ids = {}
+        old_comment_id = self.comment_id
+
+        for comment in post.comments:
+            self.get_comment_id(comment)
+
+        self.comment_id = old_comment_id
 
         for comment in post.comments:
             res += self.get_comment(comment)
@@ -406,7 +398,3 @@ def get_post(self, post):
 xml = writer.write()
 
 print xml
-
-# f = open(out, 'w')
-# f.write(xml)
-# f.close()
diff --git a/blogger-to-wordpress.py b/blogger-to-wordpress.py
@@ -1,3 +1,15 @@
+# Blogger's backup file to WordPress' WXR converter.
+#
+# Only tested with posts and comments, and NOT with pages.
+# May not be efficient for huge blogs since the script keep
+# all content in the memory during conversion.
+#
+# Released as public domain.
+#
+# Please note that I converted the labels in Blogspot
+# as tags in WordPress. I also hardcoded two categories for the
+# WordPress posts. Adjust these first to suit your need.
+
 import sys
 from datetime import datetime
 from xml.dom.minidom import parse, parseString

diff --git a/blogger-to-wordpress.py b/blogger-to-wordpress.py
@@ -0,0 +1,400 @@
+import sys
+from datetime import datetime
+from xml.dom.minidom import parse, parseString
+from xml.dom import Node
+import cgi
+
+import dateutil.parser
+
+inp = sys.argv[1]
+
+def d(*msg):
+    print >>sys.stderr, ' '.join(map(str, msg))
+
+class Blog(object):
+    class Author(object):
+        name = None
+        email = None
+        uri = None
+
+    class Entry(object):
+        entry_id = None
+        url = None
+        permalink = None
+        title = None
+        title_type = None
+        content = None
+        content_type = None
+        published = None
+        updated = None
+        author = None
+
+    class Post(Entry):
+        draft = False
+
+        def __init__(self):
+            self.labels = []
+            self.comments = []
+
+    class Comment(Entry):
+        pass
+
+    author = Author()
+    posts = []
+
+class BlogParser(object):
+    def __init__(self, atom_file):
+        self.atom_file = atom_file
+
+    def parse(self):
+        self.blog = Blog()
+        dom = parse(open(self.atom_file))
+
+        feed = None
+        for child in dom.childNodes:
+            if child.nodeName == 'feed':
+                feed = child
+                break
+
+        if feed is not None:
+            self.parse_metadata(feed)
+            self.parse_entries(feed)
+
+        return self.blog
+
+    def get_text(self, el):
+        if el.nodeType == Node.TEXT_NODE:
+            return el.nodeValue
+
+        value = []
+
+        if el.nodeType == Node.ELEMENT_NODE:
+            for child in el.childNodes:
+                value.append(self.get_text(child))
+
+        return ''.join(value)
+
+    def parse_date(self, txt):
+        return dateutil.parser.parse(txt)
+
+    def parse_metadata(self, feed):
+        for child in feed.childNodes:
+            name = child.nodeName.split(':')[-1]
+
+            if name == 'entry':
+                break
+
+            if name == 'id':
+                self.blog.blog_id = self.get_text(child)
+            elif name == 'updated':
+                self.blog.updated = self.parse_date(self.get_text(child))
+            elif name == 'title':
+                self.blog.title = self.get_text(child)
+            elif name == 'author':
+                self.blog.author = self.parse_author(child)
+
+    def parse_author(self, author):
+        data = Blog.Author()
+
+        for child in author.childNodes:
+            name = child.nodeName.split(':')[-1]
+
+            if name == 'name':
+                data.name = self.get_text(child)
+            elif name == 'uri':
+                data.uri = self.get_text(child)
+            elif name == 'email':
+                data.email = self.get_text(child)
+
+        return data
+
+    def parse_entries(self, feed):
+        self.posts = []
+        self.comments = []
+        self.post_ids = {}
+        self.comment_ids = {}
+
+        for child in feed.childNodes:
+            if child.nodeName != 'entry':
+                continue
+            self.parse_entry(child)
+
+        self.assign_comments()
+
+        self.blog.posts = self.posts
+
+        total = len(self.blog.posts)
+        d('total posts:', total)
+        d('total comments:', len(self.comments))
+        for i, post in enumerate(self.blog.posts):
+            d('(%d / %d) -> %d: %s' % (i+1, total, len(post.comments), post.title))
+
+    def assign_comments(self):
+        i = 0
+        for comment in self.comments:
+            entry_id = comment.post_entry_id
+            if entry_id not in self.post_ids:
+                continue
+
+            post = self.post_ids[entry_id]
+            post.comments.append(comment)
+
+            d('%s. comment: %s -> %s' % (i+1, id(comment), id(post)))
+            i += 1
+
+    def parse_category(self, category):
+        scheme = category.attributes['scheme'].nodeValue
+        term = category.attributes['term'].nodeValue
+        return scheme, term
+
+    def get_kind(self, entry):
+        for child in entry.childNodes:
+            if child.nodeName == 'category':
+                scheme, term = self.parse_category(child)
+                if scheme == 'http://schemas.google.com/g/2005#kind':
+                    return term
+
+    def parse_entry(self, entry):
+        kind = self.get_kind(entry)
+        if kind == 'http://schemas.google.com/blogger/2008/kind#post':
+            post = self.parse_post(entry)
+            self.posts.append(post)
+            self.post_ids[post.entry_id] = post
+        elif kind == 'http://schemas.google.com/blogger/2008/kind#comment':
+            comment = self.parse_comment(entry)
+            self.comments.append(comment)
+            self.comment_ids[comment.entry_id] = comment
+
+    def get_draft(self, control):
+        for child in control.childNodes:
+            name = child.nodeName.split(':')[-1]
+            if name == 'draft':
+                return self.get_text(child) == 'yes'
+        return False
+
+    def parse_entry_common(self, entry, target):
+        for child in entry.childNodes:
+            name = child.nodeName.split(':')[-1]
+            ns = child.namespaceURI
+
+            if name == 'id':
+                target.entry_id = self.get_text(child)
+            elif name == 'published':
+                target.published = self.parse_date(self.get_text(child))
+            elif name == 'updated':
+                target.updated = self.parse_date(self.get_text(child))
+            elif name == 'title':
+                target.title = self.get_text(child)
+                target.title_type = child.attributes['type'].nodeValue
+            elif name == 'content':
+                target.content = self.get_text(child)
+                target.content_type = child.attributes['type'].nodeValue
+            elif name == 'author':
+                target.author = self.parse_author(child)
+            elif name == 'link':
+                rel = child.attributes['rel'].nodeValue
+                href = child.attributes['href'].nodeValue
+
+                if rel == 'self':
+                    target.permalink = href
+                elif rel == 'alternate':
+                    target.url = href
+
+    def parse_post(self, entry):
+        post = Blog.Post()
+        self.parse_entry_common(entry, post)
+
+        for child in entry.childNodes:
+            name = child.nodeName.split(':')[-1]
+            ns = child.namespaceURI
+
+            if name == 'category':
+                scheme, term = self.parse_category(child)
+                if scheme == 'http://www.blogger.com/atom/ns#':
+                    post.labels.append(term)
+            elif ns == 'http://purl.org/atom/app#' and name == 'control':
+                post.draft = self.get_draft(child)
+
+        return post
+
+    def parse_comment(self, entry):
+        comment = Blog.Comment()
+        self.parse_entry_common(entry, comment)
+
+        for child in entry.childNodes:
+            name = child.nodeName.split(':')[-1]
+            ns = child.namespaceURI
+
+            if ns == 'http://purl.org/syndication/thread/1.0' and name == 'in-reply-to':
+                ref = child.attributes['ref'].nodeValue
+                comment.post_entry_id = ref
+
+        return comment
+
+class WXRWriter(object):
+    comment_status = 'open'
+
+    def __init__(self, blog):
+        self.blog = blog
+
+    def write(self):
+        self.post_id = 0
+        self.comment_id = 0
+
+        doc = self.get_header() + self.get_entries() + self.get_footer()
+        doc = [line.strip() for line in doc]
+        doc = '\n'.join(doc)
+        return unicode(doc).encode('utf-8')
+
+    def get_header(self):
+        res = []
+        res.append('<?xml version="1.0" encoding="UTF-8" ?>')
+        res.append('<rss version="2.0"')
+        res.append('     xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"')
+        res.append('     xmlns:content="http://purl.org/rss/1.0/modules/content/"')
+        res.append('     xmlns:wfw="http://wellformedweb.org/CommentAPI/"')
+        res.append('     xmlns:dc="http://purl.org/dc/elements/1.1/"')
+        res.append('     xmlns:wp="http://wordpress.org/export/1.2/">')
+
+        res.append('<channel>')
+        res.append('<title>%s</title>' % self.blog.title)
+        res.append('<wp:wxr_version>1.2</wp:wxr_version>')
+
+        return res
+
+    def get_footer(self):
+        res = []
+        res.append('</channel>')
+        res.append('</rss>')
+        return res
+
+    def get_entries(self):
+        res = []
+
+        for post in self.blog.posts:
+            res += self.get_post(post)
+
+        return res
+
+    def get_date(self, ts):
+        return ts.strftime("%a, %d %b %Y %H:%M:%S +0000")
+
+    def get_date_wp(self, ts):
+        return ts.strftime("%Y-%m-%d %H:%M:%S")
+
+    def escape(self, s):
+        return s
+        return cgi.escape(s).encode('ascii', 'xmlcharrefreplace')
+
+    def get_comment(self, comment):
+        status = 1
+
+        res = []
+
+        self.comment_id += 1
+
+        res.append('  <wp:comment>')
+        res.append('    <wp:comment_id>%s</wp:comment_id>' % self.comment_id)
+        if comment.author.name:
+            res.append('    <wp:comment_author>%s</wp:comment_author>' % comment.author.name)
+        if comment.author.email:
+            res.append('    <wp:comment_author_email>%s</wp:comment_author_email>' % comment.author.email)
+        if comment.author.uri:
+            res.append('    <wp:comment_author_url>%s</wp:comment_author_url>' % comment.author.uri)
+        res.append('    <wp:comment_author_IP>%s</wp:comment_author_IP>' % '')
+        res.append('    <wp:comment_date>%s</wp:comment_date>' % self.get_date_wp(comment.published))
+        res.append('    <wp:comment_date_gmt>%s</wp:comment_date_gmt>' % self.get_date_wp(comment.published))
+        res.append('    <wp:comment_content><![CDATA[%s]]></wp:comment_content>' % self.escape(comment.content))
+        res.append('    <wp:comment_approved>%s</wp:comment_approved>' % status)
+        res.append('    <wp:commentmeta>')
+        res.append('      <wp:meta_key>blogger_id</wp:meta_key>')
+        res.append('      <wp:meta_value>%s</wp:meta_value>' % comment.entry_id)
+        res.append('    </wp:commentmeta>')
+        if comment.permalink:
+            res.append('    <wp:commentmeta>')
+            res.append('      <wp:meta_key>blogger_permalink</wp:meta_key>')
+            res.append('      <wp:meta_value>%s</wp:meta_value>' % comment.permalink)
+            res.append('    </wp:commentmeta>')
+        if comment.url:
+            res.append('    <wp:commentmeta>')
+            res.append('      <wp:meta_key>blogger_url</wp:meta_key>')
+            res.append('      <wp:meta_value>%s</wp:meta_value>' % comment.url)
+            res.append('    </wp:commentmeta>')
+        res.append('  </wp:comment>')
+
+        return res
+
+    def get_post(self, post):
+        if post.content.strip() == '':
+            return []
+
+        res = []
+
+        slug = None
+        if post.url is not None:
+            slug = post.url.split('/')[-1]
+            slug = slug[:-5]
+
+        status = 'publish'
+        if post.draft:
+            status = 'draft'
+
+        self.post_id += 1
+
+        res.append('<item>')
+        res.append('  <title>%s</title>' % post.title)
+        res.append('  <pubDate>%s</pubDate>' % self.get_date(post.published))
+        res.append('  <dc:creator>%s</dc:creator>' % post.author.name)
+        res.append('  <guid isPermaLink="true">%s</guid>' % post.permalink)
+        res.append('  <description></description/>')
+        res.append('  <content:encoded><![CDATA[%s]]></content:encoded>' % self.escape(post.content))
+        res.append('  <excerpt:encoded><![CDATA[%s]]></excerpt:encoded>' % self.escape(post.content))
+        res.append('  <wp:post_id>%s</wp:post_id>' % self.post_id)
+        res.append('  <wp:post_date>%s</wp:post_date>' % self.get_date_wp(post.published))
+        res.append('  <wp:post_date_gmt>%s</wp:post_date_gmt>' % self.get_date_wp(post.published))
+        res.append('  <wp:comment_status>%s</wp:comment_status>' % self.comment_status)
+        res.append('  <wp:ping_status>closed</wp:ping_status>')
+        if slug:
+            res.append('  <wp:post_name>%s</wp:post_name>' % slug)
+        res.append('  <wp:status>%s</wp:status>' % status)
+        res.append('  <wp:post_parent>0</wp:post_parent>')
+        res.append('  <wp:menu_order>0</wp:menu_order>')
+        res.append('  <wp:post_type>post</wp:post_type>')
+        res.append('  <wp:post_password></wp:post_password>')
+        res.append('  <wp:is_sticky>0</wp:is_sticky>')
+        res.append('  <category domain="category" nicename="id"><![CDATA[Bahasa Indonesia]]></category>')
+        res.append('  <category domain="category" nicename="hacking"><![CDATA[Hacking]]></category>')
+        for label in post.labels:
+            res.append('  <category domain="post_tag" nicename="%s"><![CDATA[%s]]></category>' % (label, label))
+        res.append('  <wp:postmeta>')
+        res.append('    <wp:meta_key>blogger_id</wp:meta_key>')
+        res.append('    <wp:meta_value>%s</wp:meta_value>' % post.entry_id)
+        res.append('  </wp:postmeta>')
+        if post.permalink:
+            res.append('  <wp:postmeta>')
+            res.append('    <wp:meta_key>blogger_permalink</wp:meta_key>')
+            res.append('    <wp:meta_value>%s</wp:meta_value>' % post.permalink)
+            res.append('  </wp:postmeta>')
+        if post.url:
+            res.append('  <wp:postmeta>')
+            res.append('    <wp:meta_key>blogger_url</wp:meta_key>')
+            res.append('    <wp:meta_value>%s</wp:meta_value>' % post.url)
+            res.append('  </wp:postmeta>')
+
+        for comment in post.comments:
+            res += self.get_comment(comment)
+
+        res.append('</item>')
+        return res
+
+p = BlogParser(inp)
+blog = p.parse()
+
+writer = WXRWriter(blog)
+xml = writer.write()
+
+print xml
+
+# f = open(out, 'w')
+# f.write(xml)
+# f.close()