tumblr.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. # pylint: disable=invalid-name
  2. # pylint: disable=too-many-instance-attributes
  3. '''
  4. tumblr.py contains methods for authenticating
  5. the user via tumblr OAuth as well as methods
  6. for getting data from Tumblr API.
  7. '''
  8. import requests
  9. import simplejson as json
  10. import time
  11. import re
  12. from bs4 import BeautifulSoup
  13. import urlparse
  14. import oauth2
  15. request_token_url = 'http://www.tumblr.com/oauth/request_token'
  16. authorize_url = 'http://www.tumblr.com/oauth/authorize'
  17. access_token_url = 'http://www.tumblr.com/oauth/access_token'
  18. user_uri = "http://api.tumblr.com/v2/user/info"
  19. blog_uri = "http://api.tumblr.com/v2/blog/"
  20. class TumblrOauthClient(object):
  21. '''
  22. Class responsible for authenticating the user
  23. via Tumblr OAuth2.
  24. '''
  25. token = None
  26. oauth_verifier = None
  27. oauth_token = None
  28. oauth_token_secret = None
  29. is_authorized = False
  30. access_token = None
  31. def __init__(self, consumer_key, consumer_secret):
  32. self.consumer_key = consumer_key
  33. self.consumer_secret = consumer_secret
  34. self.consumer = oauth2.Consumer(consumer_key, consumer_secret)
  35. def authorize_url(self):
  36. '''
  37. Redirects user to authorize use.
  38. '''
  39. client = oauth2.Client(self.consumer)
  40. resp, content = client.request(request_token_url, "GET")
  41. #parse content
  42. if not self.oauth_token:
  43. request_token = dict(urlparse.parse_qsl(content))
  44. self.oauth_token = request_token['oauth_token']
  45. self.oauth_token_secret = request_token['oauth_token_secret']
  46. link = authorize_url + "?oauth_token=" + self.oauth_token + \
  47. "&redirect_uri=http://127.0.0.1:8000/hackathon/"
  48. return link
  49. def access_token_url(self, oauth_verifier=''):
  50. '''
  51. Returns an access token to the user.
  52. '''
  53. self.is_authorized = True
  54. token = oauth2.Token(self.oauth_token, self.oauth_token_secret)
  55. self.oauth_verifier = oauth_verifier
  56. print self.oauth_verifier
  57. token.set_verifier(self.oauth_verifier)
  58. client = oauth2.Client(self.consumer, token)
  59. resp, content = client.request(access_token_url, "POST")
  60. self.access_token = dict(urlparse.parse_qsl(content))
  61. #set verified token
  62. self.token = oauth2.Token(self.access_token['oauth_token'], self.access_token['oauth_token_secret'])
  63. def getUserInfo(self):
  64. ''' Returns users information. '''
  65. client = oauth2.Client(self.consumer, self.token)
  66. #print client
  67. resp, content = client.request(user_uri, "POST")
  68. if int(resp['status']) != 200:
  69. raise Exception("Invalid response %s." % resp['status'])
  70. #return content in json format
  71. jsonlist = json.loads(content)
  72. response = jsonlist['response']
  73. self.username = str(response['user']['name'])
  74. user_info = response['user']
  75. total_blogs = len(user_info['blogs'])
  76. #print user_info
  77. return user_info, total_blogs
  78. def getBlogInfo(self, user):
  79. ''' Returns blogger's blog information '''
  80. blog_info = blog_uri + user + ".tumblr.com/info?api_key="+self.consumer_key
  81. req = requests.get(blog_info)
  82. jsonlist = json.loads(req.content)
  83. response = jsonlist['response']
  84. blog = response['blog']
  85. blog['updated'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(blog['updated']))
  86. return blog
  87. def getTaggedInfo(self, tag):
  88. ''' Return tags related to blog with certain tag. '''
  89. tagged_uri = "http://api.tumblr.com/v2/tagged?tag=" + tag + "&api_key=" + \
  90. self.consumer_key + "&limit=20"
  91. req = requests.get(tagged_uri)
  92. jsonlist = json.loads(req.content)
  93. tags = []
  94. body = jsonlist['response']
  95. for blog in body:
  96. for data in blog:
  97. if data == "tags":
  98. #print blog[data]
  99. for i in blog[data]:
  100. m = re.match("(.*)(s*)s(t*)t(a*)a(r*)r(b*)b(u*)u(c*)c(k*)k(.*)", i.lower())
  101. if not m:
  102. tags.append(i)
  103. return tags
  104. def getTaggedBlog(self, tag):
  105. ''' Return the tagged blogs's captions or post.'''
  106. tagged_uri = "http://api.tumblr.com/v2/tagged?tag=" + tag + "&api_key=" + \
  107. self.consumer_key + "&limit=20"
  108. req = requests.get(tagged_uri)
  109. jsonlist = json.loads(req.content)
  110. body = jsonlist['response']
  111. tagtext = []
  112. for blog in body:
  113. #print "####"
  114. for data in blog:
  115. #post
  116. if data == "body":
  117. if blog[data]:
  118. #print blog[data]
  119. soup = BeautifulSoup(blog[data])
  120. text = soup.get_text()
  121. tagtext.append(text)
  122. #an image
  123. if data == "caption":
  124. if blog[data]:
  125. #print blog[data]
  126. soup = BeautifulSoup(blog[data])
  127. text = soup.get_text()
  128. tagtext.append(text)
  129. return tagtext