scraper.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. '''
  2. scraper.py contains various methods for scraping
  3. HTML content and packaging into dictionaries.
  4. '''
  5. import requests
  6. import itertools
  7. from bs4 import BeautifulSoup
  8. def steamDiscounts():
  9. req = requests.get('http://store.steampowered.com/search/?specials=1#sort_by=_ASC&sort_order=ASC&specials=1&page=1')
  10. content = req.text
  11. soup = BeautifulSoup(content)
  12. allData = {id: {} for id in range(0, 25)}
  13. # Get all divs of a specific class
  14. releaseDate = soup.findAll('div', {'class': 'col search_released'})
  15. # Get all release dates
  16. releaseDates = []
  17. id = 0
  18. for date in releaseDate:
  19. allData[id]['releaseDates'] = date.text
  20. releaseDates.append(date.text)
  21. id += 1
  22. #print releaseDates
  23. id = 0
  24. gameName = soup.findAll('div', {'class': 'col search_name ellipsis'})
  25. # Get all game names
  26. gameNames = []
  27. for name in gameName:
  28. span = name.findAll('span', {'class': 'title'})
  29. for tag in span:
  30. allData[id]['name'] = tag.text
  31. gameNames.append(tag.text)
  32. id += 1
  33. # print gameNames
  34. discount = soup.findAll('div', {'class': 'col search_discount'})
  35. id = 0
  36. # Get all game discounts
  37. gameDiscounts = []
  38. for discountedGame in discount:
  39. span = discountedGame.findAll('span')
  40. for tag in span:
  41. allData[id]['discount'] = tag.text
  42. gameDiscounts.append(tag.text)
  43. id += 1
  44. # print gameDiscounts
  45. price = soup.findAll('div', {'class': 'col search_price discounted'})
  46. id = 0
  47. prices = []
  48. # Get all discounted prices
  49. for value in price:
  50. br = value.findAll('br')
  51. for tag in br:
  52. allData[id]['price'] = tag.text.strip('\t')
  53. prices.append(tag.text.strip('\t'))
  54. id += 1
  55. # Cleanup data
  56. newData = []
  57. for datum in allData:
  58. newData.append(allData[datum])
  59. # print prices
  60. return newData