张延森 4 years ago
parent
commit
85085cc9ef

BIN
crawl/__pycache__/__init__.cpython-38.pyc View File


BIN
crawl/__pycache__/settings.cpython-38.pyc View File


BIN
crawl/comm/__pycache__/__init__.cpython-38.pyc View File


BIN
crawl/comm/__pycache__/basketball.cpython-38.pyc View File


BIN
crawl/comm/__pycache__/football.cpython-38.pyc View File


BIN
crawl/comm/__pycache__/lottery.cpython-38.pyc View File


BIN
crawl/comm/__pycache__/mydb.cpython-38.pyc View File


+ 1
- 1
crawl/run.py View File

7
 # 添加环境变量
7
 # 添加环境变量
8
 sys.path.append(dirpath)
8
 sys.path.append(dirpath)
9
 # 启动爬虫,第三个参数为爬虫name
9
 # 启动爬虫,第三个参数为爬虫name
10
-execute(['scrapy','crawl','football-result'])
10
+execute(['scrapy','crawl','football'])

BIN
crawl/spiders/__pycache__/__init__.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/basketball_match.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/basketball_price.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/basketball_result.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/football.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/football_match.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/football_price.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/football_result.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/lottery.cpython-38.pyc View File


+ 24
- 9
crawl/spiders/football.py View File

1
 import scrapy
1
 import scrapy
2
 import re
2
 import re
3
+import logging
3
 from crawl.comm.football import FTMatch, FTPrice, FTResult, WDLOdds, WDLSpreadOdds, ScoreResult, PointsResult, DoubleResult
4
 from crawl.comm.football import FTMatch, FTPrice, FTResult, WDLOdds, WDLSpreadOdds, ScoreResult, PointsResult, DoubleResult
4
 
5
 
5
 class FootballSpider(scrapy.Spider):
6
 class FootballSpider(scrapy.Spider):
6
   name = 'football'
7
   name = 'football'
7
 
8
 
9
+  # 所有比赛
10
+  _matchesMap = {}
11
+
8
   def start_requests(self):
12
   def start_requests(self):
9
     # 受注赛事
13
     # 受注赛事
10
     url = 'https://info.sporttery.cn/football/match_list.php'
14
     url = 'https://info.sporttery.cn/football/match_list.php'
33
       score = self.parsePassWay(tdNodeList[8].css('div'))
37
       score = self.parsePassWay(tdNodeList[8].css('div'))
34
       points = self.parsePassWay(tdNodeList[9].css('div'))
38
       points = self.parsePassWay(tdNodeList[9].css('div'))
35
       double = self.parsePassWay(tdNodeList[10].css('div'))
39
       double = self.parsePassWay(tdNodeList[10].css('div'))
40
+      
41
+      matchLink = tdNodeList[2].css('a').attrib.get('href')
42
+      matchId = re.sub(r'^.*m=', '', matchLink)
36
 
43
 
37
       # 入库
44
       # 入库
38
-      self._currentMatch = FTMatch(
45
+      match = FTMatch(
39
         matchWeek,
46
         matchWeek,
40
         matchTime,
47
         matchTime,
41
         leagueName,
48
         leagueName,
48
         points,
55
         points,
49
         double
56
         double
50
       )
57
       )
51
-      self._currentMatch.persist()
58
+      match.persist()
59
+
60
+      # 加入到临时字典里
61
+      self._matchesMap[matchId] = match
52
 
62
 
53
       # 请求比赛赔率
63
       # 请求比赛赔率
54
       if saleStatus == '已开售':
64
       if saleStatus == '已开售':
55
-        matchLink = tdNodeList[2].css('a').attrib.get('href')
56
-        matchId = re.sub(r'^.*m=', '', matchLink)
57
         priceAPI = 'https://i.sporttery.cn/api/fb_match_info/get_pool_rs?mid=' + matchId
65
         priceAPI = 'https://i.sporttery.cn/api/fb_match_info/get_pool_rs?mid=' + matchId
58
         yield scrapy.Request(priceAPI, self.parsePrice, 'GET')
66
         yield scrapy.Request(priceAPI, self.parsePrice, 'GET')
59
 
67
 
80
       return 'wait'
88
       return 'wait'
81
 
89
 
82
   def parsePrice(self, response):
90
   def parsePrice(self, response):
91
+    logging.info("采集数据源 ---> %s" % response.url)
83
     data = response.json()
92
     data = response.json()
84
     res = data.get('result').get('odds_list')
93
     res = data.get('result').get('odds_list')
85
 
94
 
95
+    matchId = re.sub(r'^.*get_pool_rs\?mid=', '', response.url)
96
+    match = self._matchesMap[matchId]
97
+
86
     ftprice = FTPrice(
98
     ftprice = FTPrice(
87
       '', # 不需要 matchId
99
       '', # 不需要 matchId
88
-      self._currentMatch.matchWeek,
89
-      self._currentMatch.matchTime,
90
-      self._currentMatch.league,
91
-      self._currentMatch.homeTeam,
92
-      self._currentMatch.awayTeam
100
+      match.matchWeek,
101
+      match.matchTime,
102
+      match.league,
103
+      match.homeTeam,
104
+      match.awayTeam
93
     )
105
     )
94
 
106
 
95
     try:
107
     try:
178
     finally:
190
     finally:
179
       pass
191
       pass
180
 
192
 
193
+    #
194
+    logging.info("采集到数据 --> %s" % ftprice.toString())
195
+
181
     # 入库
196
     # 入库
182
     ftprice.persist()
197
     ftprice.persist()