Skip to content

Instantly share code, notes, and snippets.

@jaanus
Created July 30, 2018 19:03
Show Gist options
  • Save jaanus/3f5fc340c0f8a92f3ecc06dabcf38970 to your computer and use it in GitHub Desktop.
Save jaanus/3f5fc340c0f8a92f3ecc06dabcf38970 to your computer and use it in GitHub Desktop.
Upload Instapaper bookmarks to Pinboard. For input, uses the CSV file as exported from Instapaper, or received from their support.
#!/usr/bin/env python3
"""Upload Instapaper bookmarks to Pinboard. For input, uses the CSV file as
exported from Instapaper, or received from their support.
"""
import sys
import os
import argparse
import csv
# https://github.com/lionheart/pinboard.py
import pinboard
def main(arguments):
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-i', '--infile', help="Input file in CSV format as exported from Instapaper", required=True, type=argparse.FileType('r'))
args = parser.parse_args(arguments)
try:
api_token = os.environ["PINBOARD_TOKEN"]
except KeyError:
print("You must specify Pinboard API token in PINBOARD_TOKEN environment variable.")
return 1
pb = pinboard.Pinboard(api_token)
reader = csv.reader(args.infile, delimiter=',', quotechar="\"")
next(reader) # skip 1st line (header)
for row in reader:
# sometimes title is blank. Pinboard API will barf and abort on this. I prefer to just fix the CSV manually in that case
url, title, selection, folder = row
print(folder, " – ", title)
toread = False
# Pinboard doesn’t like multi-word folders and converts them to multiple tags. I had few enough of these
# that I could just fix that manually.
tags = [folder]
if folder == "Unread":
toread = True
tags = []
pb.posts.add(url=url, description=title, extended=selection, tags=tags, toread = toread)
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))
@Zettt
Copy link

Zettt commented Nov 12, 2024

Just in case someone is reading this in 2024, I have used this script as the basis to get my Instapaper links to Pinboard. But I had to make some changes because the exported CSV is now in a different format, which made the script crash.

I've also added a new feature where it uses GPT to summarize the webpage. I found this helpful to use when the Instapaper link itself doesn't have a title or a description. It's commented out, but please don't hesitate to use it.

Furthermore, I would rather not use .env for this small little script that I run once, i.e., the API keys are hard-coded. I hope this is understandable.

#!/usr/bin/env python3

"""Upload Instapaper bookmarks to Pinboard. For input, uses the CSV file as
exported from Instapaper, or received from their support.
"""

# Imports
import sys
import argparse
import csv

# https://github.com/lionheart/pinboard.py
import pinboard

from openai import OpenAI
client = OpenAI(api_key="sk-proj-apikey")
import requests
from bs4 import BeautifulSoup

pinboard_api_token = "your:pinboard:token"


# Function to fetch webpage content and summarize it
def summarize_webpage(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        # Extract text content from the webpage
        text = ' '.join(soup.stripped_strings)

        # Summarize
        response = client.chat.completions.create(model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Summarize the following content in 50 characters:\n\n{text}"}
        ],
        max_tokens=50)  # Adjust this if needed to fit the summary length

        summary = response.choices[0].message.content.strip()
        return summary

    except Exception as e:
        print(f"Error summarizing webpage: {e}")
        return ""


def main(arguments):

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i', '--infile', help="Input file in CSV format as exported from Instapaper", required=True, type=argparse.FileType('r'))

    args = parser.parse_args(arguments)

    pb = pinboard.Pinboard(pinboard_api_token)

    reader = csv.reader(args.infile, delimiter=',', quotechar="\"")
    next(reader) # skip 1st line (header)

    for row in reader:
        # sometimes title is blank. Pinboard API will barf and abort on this. I prefer to just fix the CSV manually in that case.
        url, title, selection, folder = row[:4]
        print(row)

        # Default to "to read" and use the "instapaper" tag.
        # This makes it easier to find all imported links on Pinboard.
        toread = True
        tags = ["instapaper"]

        # Use selection for description if available, otherwise summarize the webpage.
        # description = selection if selection else summarize_webpage(url)
        # Alternatively you can use this code to use the selection from Instapaper, if there is one.
        description = selection if selection else ""

        # Limit description length to a reasonable number of characters.
        # When this is too long, the API call will fail.
        max_description_length = 250
        if len(description) > max_description_length:
            description = description[:max_description_length]

        # Use existing title if available, otherwise summarize the webpage for a title.
        # title = title if title else summarize_webpage(url) or "untitled"
        # Again, the alternative, and quicker method, is to just leave set a manual title.
        title = title if title else "untitled"

        # Limit title length to a reasonable number of characters.
        max_title_length = 250
        if len(title) > max_title_length:
            title = title[:max_title_length]
        
        # I've had a couple of links with "instapaper-private://". This causes trouble, so we need to be able to filter the links.
        ignored_patterns = ["instapaper-private"]

        if not any(pattern.lower() in url.lower() for pattern in ignored_patterns):
            pb.posts.add(url=url, description=title, extended=description, tags=tags, toread=True)


if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment