scraper.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import itertools
  4. req = requests.get('http://store.steampowered.com/search/?specials=1#sort_by=_ASC&sort_order=ASC&specials=1&page=1')
  5. content = req.text
  6. soup = BeautifulSoup(content)
  7. allData = {id: {} for id in range(0, 25)}
  8. # Get all divs of a specific class
  9. releaseDate = soup.findAll('div', {'class': 'col search_released'})
  10. # Get all release dates
  11. releaseDates = []
  12. id = 0
  13. for date in releaseDate:
  14. allData[id]['releaseDates'] = date.text
  15. releaseDates.append(date.text)
  16. id += 1
  17. #print releaseDates
  18. id = 0
  19. gameName = soup.findAll('div', {'class': 'col search_name ellipsis'})
  20. # Get all game names
  21. gameNames = []
  22. for name in gameName:
  23. span = name.findAll('span', {'class': 'title'})
  24. for tag in span:
  25. allData[id]['name'] = tag.text
  26. gameNames.append(tag.text)
  27. id += 1
  28. # print gameNames
  29. discount = soup.findAll('div', {'class': 'col search_discount'})
  30. id = 0
  31. # Get all game discounts
  32. gameDiscounts = []
  33. for discountedGame in discount:
  34. span = discountedGame.findAll('span')
  35. for tag in span:
  36. allData[id]['discount'] = tag.text
  37. gameDiscounts.append(tag.text)
  38. id += 1
  39. # print gameDiscounts
  40. price = soup.findAll('div', {'class': 'col search_price discounted'})
  41. id = 0
  42. prices = []
  43. # Get all discounted prices
  44. for value in price:
  45. br = value.findAll('br')
  46. for tag in br:
  47. allData[id]['price'] = tag.text.strip('\t')
  48. prices.append(tag.text.strip('\t'))
  49. id += 1
  50. # print prices
  51. # Print all the data
  52. for i in allData:
  53. print allData[i]