scraper.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import itertools
  4. req = requests.get('http://store.steampowered.com/search/?specials=1#sort_by=_ASC&sort_order=ASC&specials=1&page=1')
  5. content = req.text
  6. soup = BeautifulSoup(content)
  7. allData = {id: {} for id in range(0, 25)}
  8. # Get all divs of a specific class
  9. releaseDate = soup.findAll('div', {'class': 'col search_released'})
  10. # Get all release dates
  11. releaseDates = []
  12. id = 0
  13. for date in releaseDate:
  14. allData[id]['releaseDates'] = date.text
  15. releaseDates.append(date.text)
  16. id += 1
  17. #print releaseDates
  18. id = 0
  19. gameName = soup.findAll('div', {'class': 'col search_name ellipsis'})
  20. # Get all game names
  21. gameNames = []
  22. for name in gameName:
  23. span = name.findAll('span', {'class': 'title'})
  24. for tag in span:
  25. allData[id]['name'] = tag.text
  26. gameNames.append(tag.text)
  27. id += 1
  28. # print gameNames
  29. discount = soup.findAll('div', {'class': 'col search_discount'})
  30. id = 0
  31. # Get all game discounts
  32. gameDiscounts = []
  33. for discountedGame in discount:
  34. span = discountedGame.findAll('span')
  35. for tag in span:
  36. allData[id]['discount'] = tag.text
  37. gameDiscounts.append(tag.text)
  38. id += 1
  39. # print gameDiscounts
  40. price = soup.findAll('div', {'class': 'col search_price discounted'})
  41. id = 0
  42. prices = []
  43. # Get all discounted prices
  44. for value in price:
  45. br = value.findAll('br')
  46. for tag in br:
  47. allData[id]['price'] = tag.text.strip('\t')
  48. prices.append(tag.text.strip('\t'))
  49. id += 1
  50. # print prices
  51. print allData