Created
March 30, 2013 07:42
-
-
Save nicolasH/5275769 to your computer and use it in GitHub Desktop.
FlickrSnippets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get the sharing / embed snippets from | |
# the non-mobile flickr page of an image. | |
# (if sharing is not restricted on that image). | |
# A couple of lines come from get_links.py | |
# https://pythonadventures.wordpress.com/2011/03/10/extract-all-links-from-a-web-page/ | |
# | |
# Copyright Nicolas Hoibian | |
# Licence: creative common By-Nc-Sa | |
# | |
import sys | |
import urllib2 | |
import re | |
from bs4 import BeautifulSoup | |
import clipboard | |
# Has embedding snippets | |
# http://m.flickr.com/#/photos/nico_h/8436767741/ | |
# | |
# Doesn't have embedding snippets | |
# http://m.flickr.com/#/photos/larrygerbrandt/8597623035/in/photostream/ | |
sep = "--------------------" | |
sep += sep + '-' | |
_mobile = 'http://m.flickr.com/#' | |
_normal = 'http://flickr.com' | |
r = 'width="(\d+)"\s*height="(\d+)"' | |
def extract_size(text): | |
match = re.search(r, text) | |
return match.groups(0) | |
# one could ignore the first 5 one, | |
# as they are very small: width < 320. | |
# The first one is 500, but it is repeated | |
def get_snippets(url,to_reject=5): | |
text = urllib2.urlopen(url).read() | |
soup = BeautifulSoup(text) | |
iteration = 0 | |
worthy = [] | |
for tag in soup.findAll('textarea'): | |
text = '' | |
if len(tag.contents) == 1: | |
text=tag.contents[0] | |
if text.find('<img src=')>0: | |
iteration += 1 | |
if iteration > to_reject: | |
w, h = extract_size(text) | |
worthy.append((text,w,h)) | |
return worthy | |
def pre_process_url(url): | |
url = url.replace(_mobile,'') | |
url = url.replace(_normal,'') | |
url = url.split('/') | |
url = '/'.join(url[:4]) | |
url = _normal+url+'/' | |
return url | |
if __name__ == '__main__': | |
print sep+"\n"+sep | |
url = clipboard.get() | |
print url | |
url = pre_process_url(url) | |
print url | |
snippets = get_snippets(url,1) | |
if len(snippets) == 0: | |
print sep | |
print "!! No embedding snippet found" | |
print "!! might be disallowed by photographer." | |
for s in snippets: | |
print sep | |
print "# %s x %s" % (s[1],s[2]) | |
print s[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment