张延森 4 anni fa
parent
commit
85085cc9ef

BIN
crawl/__pycache__/__init__.cpython-38.pyc Vedi File


BIN
crawl/__pycache__/settings.cpython-38.pyc Vedi File


BIN
crawl/comm/__pycache__/__init__.cpython-38.pyc Vedi File


BIN
crawl/comm/__pycache__/basketball.cpython-38.pyc Vedi File


BIN
crawl/comm/__pycache__/football.cpython-38.pyc Vedi File


BIN
crawl/comm/__pycache__/lottery.cpython-38.pyc Vedi File


BIN
crawl/comm/__pycache__/mydb.cpython-38.pyc Vedi File


+ 1
- 1
crawl/run.py Vedi File

@@ -7,4 +7,4 @@ print(dirpath)
7 7
 # 添加环境变量
8 8
 sys.path.append(dirpath)
9 9
 # 启动爬虫,第三个参数为爬虫name
10
-execute(['scrapy','crawl','football-result'])
10
+execute(['scrapy','crawl','football'])

BIN
crawl/spiders/__pycache__/__init__.cpython-38.pyc Vedi File


BIN
crawl/spiders/__pycache__/basketball_match.cpython-38.pyc Vedi File


BIN
crawl/spiders/__pycache__/basketball_price.cpython-38.pyc Vedi File


BIN
crawl/spiders/__pycache__/basketball_result.cpython-38.pyc Vedi File


BIN
crawl/spiders/__pycache__/football.cpython-38.pyc Vedi File


BIN
crawl/spiders/__pycache__/football_match.cpython-38.pyc Vedi File


BIN
crawl/spiders/__pycache__/football_price.cpython-38.pyc Vedi File


BIN
crawl/spiders/__pycache__/football_result.cpython-38.pyc Vedi File


BIN
crawl/spiders/__pycache__/lottery.cpython-38.pyc Vedi File


+ 24
- 9
crawl/spiders/football.py Vedi File

@@ -1,10 +1,14 @@
1 1
 import scrapy
2 2
 import re
3
+import logging
3 4
 from crawl.comm.football import FTMatch, FTPrice, FTResult, WDLOdds, WDLSpreadOdds, ScoreResult, PointsResult, DoubleResult
4 5
 
5 6
 class FootballSpider(scrapy.Spider):
6 7
   name = 'football'
7 8
 
9
+  # 所有比赛
10
+  _matchesMap = {}
11
+
8 12
   def start_requests(self):
9 13
     # 受注赛事
10 14
     url = 'https://info.sporttery.cn/football/match_list.php'
@@ -33,9 +37,12 @@ class FootballSpider(scrapy.Spider):
33 37
       score = self.parsePassWay(tdNodeList[8].css('div'))
34 38
       points = self.parsePassWay(tdNodeList[9].css('div'))
35 39
       double = self.parsePassWay(tdNodeList[10].css('div'))
40
+      
41
+      matchLink = tdNodeList[2].css('a').attrib.get('href')
42
+      matchId = re.sub(r'^.*m=', '', matchLink)
36 43
 
37 44
       # 入库
38
-      self._currentMatch = FTMatch(
45
+      match = FTMatch(
39 46
         matchWeek,
40 47
         matchTime,
41 48
         leagueName,
@@ -48,12 +55,13 @@ class FootballSpider(scrapy.Spider):
48 55
         points,
49 56
         double
50 57
       )
51
-      self._currentMatch.persist()
58
+      match.persist()
59
+
60
+      # 加入到临时字典里
61
+      self._matchesMap[matchId] = match
52 62
 
53 63
       # 请求比赛赔率
54 64
       if saleStatus == '已开售':
55
-        matchLink = tdNodeList[2].css('a').attrib.get('href')
56
-        matchId = re.sub(r'^.*m=', '', matchLink)
57 65
         priceAPI = 'https://i.sporttery.cn/api/fb_match_info/get_pool_rs?mid=' + matchId
58 66
         yield scrapy.Request(priceAPI, self.parsePrice, 'GET')
59 67
 
@@ -80,16 +88,20 @@ class FootballSpider(scrapy.Spider):
80 88
       return 'wait'
81 89
 
82 90
   def parsePrice(self, response):
91
+    logging.info("采集数据源 ---> %s" % response.url)
83 92
     data = response.json()
84 93
     res = data.get('result').get('odds_list')
85 94
 
95
+    matchId = re.sub(r'^.*get_pool_rs\?mid=', '', response.url)
96
+    match = self._matchesMap[matchId]
97
+
86 98
     ftprice = FTPrice(
87 99
       '', # 不需要 matchId
88
-      self._currentMatch.matchWeek,
89
-      self._currentMatch.matchTime,
90
-      self._currentMatch.league,
91
-      self._currentMatch.homeTeam,
92
-      self._currentMatch.awayTeam
100
+      match.matchWeek,
101
+      match.matchTime,
102
+      match.league,
103
+      match.homeTeam,
104
+      match.awayTeam
93 105
     )
94 106
 
95 107
     try:
@@ -178,5 +190,8 @@ class FootballSpider(scrapy.Spider):
178 190
     finally:
179 191
       pass
180 192
 
193
+    #
194
+    logging.info("采集到数据 --> %s" % ftprice.toString())
195
+
181 196
     # 入库
182 197
     ftprice.persist()