tumblr.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. # pylint: disable=invalid-name
  2. # pylint: disable=too-many-instance-attributes
  3. '''
  4. tumblr.py contains methods for authenticating
  5. the user via tumblr OAuth as well as methods
  6. for getting data from Tumblr API.
  7. '''
  8. import requests
  9. import simplejson as json
  10. import time
  11. import re
  12. from bs4 import BeautifulSoup
  13. import urlparse
  14. import oauth2
  15. request_token_url = 'http://www.tumblr.com/oauth/request_token'
  16. authorize_url = 'http://www.tumblr.com/oauth/authorize'
  17. access_token_url = 'http://www.tumblr.com/oauth/access_token'
  18. user_uri = "http://api.tumblr.com/v2/user/info"
  19. blog_uri = "http://api.tumblr.com/v2/blog/"
  20. class TumblrOauthClient(object):
  21. '''
  22. Class responsible for authenticating the user
  23. via Tumblr OAuth2.
  24. '''
  25. token = None
  26. oauth_verifier = None
  27. oauth_token = None
  28. oauth_token_secret = None
  29. accessed = False
  30. def __init__(self, consumer_key, consumer_secret):
  31. self.consumer_key = consumer_key
  32. self.consumer_secret = consumer_secret
  33. self.consumer = oauth2.Consumer(consumer_key, consumer_secret)
  34. def authorize_url(self):
  35. '''
  36. Redirects user to authorize use.
  37. '''
  38. client = oauth2.Client(self.consumer)
  39. resp, content = client.request(request_token_url, "GET")
  40. #parse content
  41. if not self.oauth_token:
  42. request_token = dict(urlparse.parse_qsl(content))
  43. self.oauth_token = request_token['oauth_token']
  44. self.oauth_token_secret = request_token['oauth_token_secret']
  45. link = authorize_url + "?oauth_token=" + self.oauth_token + \
  46. "&redirect_uri=http%3A%2F%2Flocalhost%3A8000/hackathon/tumblr"
  47. return link
  48. def access_token_url(self, oauth_verifier=''):
  49. '''
  50. Returns an access token to the user.
  51. '''
  52. self.accessed = True
  53. token = oauth2.Token(self.oauth_token, self.oauth_token_secret)
  54. self.oauth_verifier = oauth_verifier
  55. print self.oauth_verifier
  56. token.set_verifier(self.oauth_verifier)
  57. client = oauth2.Client(self.consumer, token)
  58. resp, content = client.request(access_token_url, "POST")
  59. access_token = dict(urlparse.parse_qsl(content))
  60. #set verified token
  61. self.token = oauth2.Token(access_token['oauth_token'], access_token['oauth_token_secret'])
  62. def getUserInfo(self):
  63. ''' Returns users information. '''
  64. client = oauth2.Client(self.consumer, self.token)
  65. #print client
  66. resp, content = client.request(user_uri, "POST")
  67. if int(resp['status']) != 200:
  68. raise Exception("Invalid response %s." % resp['status'])
  69. #return content in json format
  70. jsonlist = json.loads(content)
  71. response = jsonlist['response']
  72. user_info = response['user']
  73. total_blogs = len(user_info['blogs'])
  74. #print user_info
  75. return user_info, total_blogs
  76. def getBlogInfo(self, user):
  77. ''' Returns blogger's blog information '''
  78. blog_info = blog_uri + user + ".tumblr.com/info?api_key="+self.consumer_key
  79. req = requests.get(blog_info)
  80. jsonlist = json.loads(req.content)
  81. response = jsonlist['response']
  82. blog = response['blog']
  83. blog['updated'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(blog['updated']))
  84. return blog
  85. def getTaggedInfo(self, tag):
  86. ''' Return tags related to blog with certain tag. '''
  87. tagged_uri = "http://api.tumblr.com/v2/tagged?tag=" + tag + "&api_key=" + \
  88. self.consumer_key + "&limit=20"
  89. req = requests.get(tagged_uri)
  90. jsonlist = json.loads(req.content)
  91. tags = []
  92. body = jsonlist['response']
  93. for blog in body:
  94. for data in blog:
  95. if data == "tags":
  96. #print blog[data]
  97. for i in blog[data]:
  98. m = re.match("(.*)(s*)s(t*)t(a*)a(r*)r(b*)b(u*)u(c*)c(k*)k(.*)", i.lower())
  99. if not m:
  100. tags.append(i)
  101. return tags
  102. def getTaggedBlog(self, tag):
  103. ''' Return the tagged blogs's captions or post.'''
  104. tagged_uri = "http://api.tumblr.com/v2/tagged?tag=" + tag + "&api_key=" + \
  105. self.consumer_key + "&limit=20"
  106. req = requests.get(tagged_uri)
  107. jsonlist = json.loads(req.content)
  108. body = jsonlist['response']
  109. tagtext = []
  110. for blog in body:
  111. #print "####"
  112. for data in blog:
  113. #post
  114. if data == "body":
  115. if blog[data]:
  116. #print blog[data]
  117. soup = BeautifulSoup(blog[data])
  118. text = soup.get_text()
  119. tagtext.append(text)
  120. #an image
  121. if data == "caption":
  122. if blog[data]:
  123. #print blog[data]
  124. soup = BeautifulSoup(blog[data])
  125. text = soup.get_text()
  126. tagtext.append(text)
  127. return tagtext