tumblr.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. import requests
  2. import simplejson as json
  3. import time
  4. import urllib
  5. import re
  6. from bs4 import BeautifulSoup
  7. import urlparse
  8. import oauth2
  9. request_token_url = 'http://www.tumblr.com/oauth/request_token'
  10. authorize_url = 'http://www.tumblr.com/oauth/authorize'
  11. access_token_url = 'http://www.tumblr.com/oauth/access_token'
  12. user_uri = "http://api.tumblr.com/v2/user/info"
  13. blog_uri = "http://api.tumblr.com/v2/blog/"
  14. class TumblrOauthClient(object):
  15. token = None
  16. oauth_verifier = None
  17. oauth_token = None
  18. oauth_token_secret = None
  19. accessed = False
  20. def __init__(self, consumer_key, consumer_secret):
  21. self.consumer_key = consumer_key
  22. self.consumer_secret = consumer_secret
  23. self.consumer = oauth2.Consumer(consumer_key, consumer_secret)
  24. def authorize_url(self):
  25. client = oauth2.Client(self.consumer)
  26. resp, content = client.request(request_token_url, "GET")
  27. #parse content
  28. if not self.oauth_token:
  29. request_token = dict(urlparse.parse_qsl(content))
  30. self.oauth_token = request_token['oauth_token'] #'QBXdeeMKAnLzDbIG7dDNewTzRYyQoHZLbcn3bAFTCEFF5EXurl' #
  31. self.oauth_token_secret = request_token['oauth_token_secret']#'u10SuRl2nzS8vFK4K7UPQexAvbIFBFrZBjA79XDlgoXFxv9ZhO' #
  32. link = authorize_url+"?oauth_token="+self.oauth_token+"&redirect_uri=http%3A%2F%2Flocalhost%3A8000/hackathon/tumblr"
  33. return link
  34. def access_token_url(self, oauth_verifier=''):
  35. self.accessed = True
  36. token = oauth2.Token(self.oauth_token, self.oauth_token_secret)
  37. self.oauth_verifier = oauth_verifier
  38. print self.oauth_verifier
  39. token.set_verifier(self.oauth_verifier)
  40. client = oauth2.Client(self.consumer, token)
  41. resp, content = client.request(access_token_url,"POST")
  42. access_token = dict(urlparse.parse_qsl(content))
  43. #set verified token
  44. self.token = oauth2.Token(access_token['oauth_token'], access_token['oauth_token_secret'])
  45. #print self.token
  46. def getUserInfo(self):
  47. ''' Returns users information. '''
  48. client = oauth2.Client(self.consumer, self.token)
  49. #print client
  50. resp, content = client.request(user_uri, "POST")
  51. if int(resp['status']) != 200:
  52. raise Exception("Invalid response %s." % resp['status'])
  53. #return content in json format
  54. jsonlist = json.loads(content)
  55. response = jsonlist['response']
  56. user_info = response['user']
  57. total_blogs = len(user_info['blogs'])
  58. #print user_info
  59. return user_info, total_blogs
  60. def getBlogInfo(self, user):
  61. ''' Returns blogger's blog information '''
  62. blog_info = blog_uri + user + ".tumblr.com/info?api_key="+self.consumer_key
  63. req = requests.get(blog_info)
  64. #if int(req.status_code) != 200:
  65. # raise Exception("Invalid response %s." % resp['status'])
  66. jsonlist = json.loads(req.content)
  67. response = jsonlist['response']
  68. blog = response['blog']
  69. blog['updated'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(blog['updated']))
  70. return blog
  71. def getTaggedInfo(self, tag):
  72. ''' Return tags related to blog with certain tag. '''
  73. tagged_uri = "http://api.tumblr.com/v2/tagged?tag="+tag+"&api_key="+self.consumer_key+"&limit=20"
  74. req = requests.get(tagged_uri)
  75. jsonlist = json.loads(req.content)
  76. tags = []
  77. meta = jsonlist['meta']
  78. body = jsonlist['response']
  79. for blog in body:
  80. for data in blog:
  81. if data == "tags":
  82. #print blog[data]
  83. for i in blog[data]:
  84. m = re.match("(.*)(s*)s(t*)t(a*)a(r*)r(b*)b(u*)u(c*)c(k*)k(.*)", i.lower())
  85. if not m:
  86. tags.append(i)
  87. return tags
  88. def getTaggedBlog(self, tag):
  89. ''' Return the tagged blogs's captions or post.'''
  90. tagged_uri = "http://api.tumblr.com/v2/tagged?tag="+tag+"&api_key="+self.consumer_key+"&limit=20"
  91. req = requests.get(tagged_uri)
  92. jsonlist = json.loads(req.content)
  93. meta = jsonlist['meta']
  94. body = jsonlist['response']
  95. tagtext = []
  96. for blog in body:
  97. #print "####"
  98. for data in blog:
  99. #post
  100. if data == "body":
  101. if blog[data]:
  102. #print blog[data]
  103. soup = BeautifulSoup(blog[data])
  104. text = soup.get_text()
  105. tagtext.append(text)
  106. #an image
  107. if data == "caption":
  108. if blog[data]:
  109. #print blog[data]
  110. soup = BeautifulSoup(blog[data])
  111. text = soup.get_text()
  112. tagtext.append(text)
  113. return tagtext