scraper.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import itertools
  4. def steamDiscounts():
  5. req = requests.get('http://store.steampowered.com/search/?specials=1#sort_by=_ASC&sort_order=ASC&specials=1&page=1')
  6. content = req.text
  7. soup = BeautifulSoup(content)
  8. allData = {id: {} for id in range(0, 25)}
  9. # Get all divs of a specific class
  10. releaseDate = soup.findAll('div', {'class': 'col search_released'})
  11. # Get all release dates
  12. releaseDates = []
  13. id = 0
  14. for date in releaseDate:
  15. allData[id]['releaseDates'] = date.text
  16. releaseDates.append(date.text)
  17. id += 1
  18. #print releaseDates
  19. id = 0
  20. gameName = soup.findAll('div', {'class': 'col search_name ellipsis'})
  21. # Get all game names
  22. gameNames = []
  23. for name in gameName:
  24. span = name.findAll('span', {'class': 'title'})
  25. for tag in span:
  26. allData[id]['name'] = tag.text
  27. gameNames.append(tag.text)
  28. id += 1
  29. # print gameNames
  30. discount = soup.findAll('div', {'class': 'col search_discount'})
  31. id = 0
  32. # Get all game discounts
  33. gameDiscounts = []
  34. for discountedGame in discount:
  35. span = discountedGame.findAll('span')
  36. for tag in span:
  37. allData[id]['discount'] = tag.text
  38. gameDiscounts.append(tag.text)
  39. id += 1
  40. # print gameDiscounts
  41. price = soup.findAll('div', {'class': 'col search_price discounted'})
  42. id = 0
  43. prices = []
  44. # Get all discounted prices
  45. for value in price:
  46. br = value.findAll('br')
  47. for tag in br:
  48. allData[id]['price'] = tag.text.strip('\t')
  49. prices.append(tag.text.strip('\t'))
  50. id += 1
  51. # Cleanup data
  52. newData = []
  53. for datum in allData:
  54. newData.append(allData[datum])
  55. # print prices
  56. return newData