Your Name 4 years ago
parent
commit
18ebea7bdb

BIN
crawl/__pycache__/__init__.cpython-38.pyc View File


BIN
crawl/__pycache__/settings.cpython-38.pyc View File


BIN
crawl/comm/__pycache__/__init__.cpython-38.pyc View File


BIN
crawl/comm/__pycache__/basketball.cpython-38.pyc View File


BIN
crawl/comm/__pycache__/football.cpython-38.pyc View File


BIN
crawl/comm/__pycache__/lottery.cpython-38.pyc View File


BIN
crawl/comm/__pycache__/mydb.cpython-38.pyc View File


+ 15
- 10
crawl/comm/football.py View File

491
     whole = None,
491
     whole = None,
492
 
492
 
493
     # 状态
493
     # 状态
494
-    status = None
494
+    status = None,
495
+
496
+    # 玩法结果
497
+    playRes = None,
495
   ):
498
   ):
496
     self.matchTime = matchTime
499
     self.matchTime = matchTime
497
     self.matchWeek = matchWeek
500
     self.matchWeek = matchWeek
502
     self.half = half
505
     self.half = half
503
     self.whole = whole
506
     self.whole = whole
504
     self.status = status
507
     self.status = status
508
+    self.playRes = playRes
505
 
509
 
506
   def toString(self):
510
   def toString(self):
507
     return ';'.join((
511
     return ';'.join((
508
-      self.matchTime if self.matchTime is not None else '',
509
-      self.matchWeek if self.matchWeek is not None else '',
510
-      self.league if self.league is not None else '',
511
-      self.homeTeam if self.homeTeam is not None else '',
512
-      self.awayTeam if self.awayTeam is not None else '',
513
-      self.single if self.single is not None else '',
514
-      self.half if self.half is not None else '',
515
-      self.whole if self.whole is not None else '',
516
-      self.status if self.status is not None else ''
512
+      self.matchTime,
513
+      self.matchWeek,
514
+      self.league,
515
+      self.homeTeam,
516
+      self.awayTeam,
517
+      self.single,
518
+      self.half,
519
+      self.whole,
520
+      self.status,
521
+      self.playRes,
517
     ))
522
     ))
518
 
523
 
519
   def persist(self):
524
   def persist(self):

BIN
crawl/spiders/__pycache__/__init__.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/basketball.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/basketball_match.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/basketball_price.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/basketball_result.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/football.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/football_match.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/football_price.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/football_result.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/lottery.cpython-38.pyc View File


BIN
crawl/spiders/__pycache__/util.cpython-38.pyc View File


+ 5
- 4
crawl/spiders/basketball_result.py View File

61
         ''
61
         ''
62
       )
62
       )
63
 
63
 
64
-      matchLink = getNoneStr(tdNodeList[12].css('a').attrib.get('href'))
65
-      matchId = re.sub(r'^.*pool_result.php\?id=', '', matchLink)
64
+      matchLink = getNoneStr(tdNodeList[3].css('a').attrib.get('href'))
65
+      matchId = re.sub(r'^.*bk_match_info.php\?m=', '', matchLink)
66
 
66
 
67
       # 加入到临时字典里
67
       # 加入到临时字典里
68
       self._matchesMap[matchId] = bstResult
68
       self._matchesMap[matchId] = bstResult
69
 
69
 
70
       if status == '已完成':
70
       if status == '已完成':
71
-        yield scrapy.Request('https:' + matchLink, self.parsePrice, 'GET')
71
+        link = tdNodeList[12].css('a').attrib.get('href')
72
+        yield scrapy.Request('https:' + link, self.parsePrice, 'GET')
72
       else:
73
       else:
73
         logging.info("采集到数据 --> %s" % bstResult.toString())
74
         logging.info("采集到数据 --> %s" % bstResult.toString())
74
         bstResult.persist()
75
         bstResult.persist()
105
 
106
 
106
     tableList = response.css('.kj-table')
107
     tableList = response.css('.kj-table')
107
     if tableList is None or len(tableList) < 4:
108
     if tableList is None or len(tableList) < 4:
108
-      logging.error("抓取赔率结果失败")
109
+      logging.error("抓取玩法结果失败")
109
       return
110
       return
110
 
111
 
111
     playRes = []
112
     playRes = []

+ 56
- 12
crawl/spiders/football_result.py View File

2
 import re
2
 import re
3
 import logging
3
 import logging
4
 from crawl.comm.football import FTResult
4
 from crawl.comm.football import FTResult
5
+from crawl.spiders.util import getNoneStr
5
 
6
 
6
 class FootballSpider(scrapy.Spider):
7
 class FootballSpider(scrapy.Spider):
7
   name = "football-result"
8
   name = "football-result"
8
 
9
 
10
+  # 所有比赛
11
+  _matchesMap = {}
12
+
9
   def start_requests(self):
13
   def start_requests(self):
10
     url = 'https://info.sporttery.cn/football/match_result.php'
14
     url = 'https://info.sporttery.cn/football/match_result.php'
11
     yield scrapy.Request(url, self.parseResult, 'GET')
15
     yield scrapy.Request(url, self.parseResult, 'GET')
20
       if tdNodeList is None or len(tdNodeList) < 10:
24
       if tdNodeList is None or len(tdNodeList) < 10:
21
         continue
25
         continue
22
 
26
 
23
-      matchTime = tdNodeList[0].css('::text').get()
24
-      matchWeek = tdNodeList[1].css('::text').get()
25
-      league = tdNodeList[2].css('::text').get()
26
-      leagueFullName = tdNodeList[2].attrib.get('title')  # 联赛全称
27
-      leagueName = '|'.join((league if league is not None else '', leagueFullName if leagueFullName is not None else ''))
27
+      matchTime = getNoneStr(tdNodeList[0].css('::text').get())
28
+      matchWeek = getNoneStr(tdNodeList[1].css('::text').get())
29
+      league = getNoneStr(tdNodeList[2].css('::text').get())
30
+      leagueFullName = getNoneStr(tdNodeList[2].attrib.get('title'))  # 联赛全称
31
+      leagueName = '|'.join((league, leagueFullName))
28
       homeTeam = self.trimBrackets(tdNodeList[3].css('.zhu::text').get()) # 主队
32
       homeTeam = self.trimBrackets(tdNodeList[3].css('.zhu::text').get()) # 主队
29
       awayTeam = self.trimBrackets(tdNodeList[3].css('.ke::text').get()) # 客队
33
       awayTeam = self.trimBrackets(tdNodeList[3].css('.ke::text').get()) # 客队
30
-      half = tdNodeList[4].css('span::text').get()
31
-      whole = tdNodeList[5].css('span::text').get()
32
-      status = tdNodeList[9].css('::text').get()
34
+      half = getNoneStr(tdNodeList[4].css('span::text').get())
35
+      whole = getNoneStr(tdNodeList[5].css('span::text').get())
36
+      status = getNoneStr(tdNodeList[9].css('::text').get())
33
 
37
 
34
       ftResult = FTResult(
38
       ftResult = FTResult(
35
         matchTime,
39
         matchTime,
40
         '', # 不需要单固
44
         '', # 不需要单固
41
         half,
45
         half,
42
         whole,
46
         whole,
43
-        status
47
+        status,
48
+        ''
44
       )
49
       )
45
-      
46
-      logging.info("采集到数据 --> %s" % ftResult.toString())
47
 
50
 
48
-      ftResult.persist()
51
+      matchLink = getNoneStr(tdNodeList[3].css('a').attrib.get('href'))
52
+      matchId = re.sub(r'^.*fb_match_info.php\?m=', '', matchLink)
53
+
54
+      # 加入到临时字典里
55
+      self._matchesMap[matchId] = ftResult
56
+
57
+
58
+      if status == '已完成':
59
+        link = "https://i.sporttery.cn/api/fb_match_info/get_pool_rs?mid=" + matchId
60
+        yield scrapy.Request(link, self.parsePrice, 'GET')
61
+      else:
62
+        logging.info("采集到数据 --> %s" % ftResult.toString())
63
+        ftResult.persist()
49
 
64
 
50
 
65
 
51
     # 是否存在下一页
66
     # 是否存在下一页
74
     if str is None: return ''
89
     if str is None: return ''
75
 
90
 
76
     return re.sub(r'\(.*\)', '', str)
91
     return re.sub(r'\(.*\)', '', str)
92
+
93
+  def parsePrice(self, response):
94
+    logging.info("采集数据源 ---> %s" % response.url)
95
+    
96
+    data = response.json()
97
+    res = data.get('result').get('pool_rs')
98
+
99
+    playRes = []
100
+
101
+    # 胜平负
102
+    playRes.append(res['had']['prs_name'])
103
+    # 让球胜平负
104
+    playRes.append(res['hhad']['prs_name'])
105
+    # 比分
106
+    playRes.append(res['crs']['prs_name'])
107
+    # 总进球
108
+    playRes.append(res['ttg']['prs_name'])
109
+    # 半全场
110
+    playRes.append(res['hafu']['prs_name'])
111
+
112
+    matchId = re.sub(r'^.*get_pool_rs\?mid=', '', response.url)
113
+    ftResult = self._matchesMap[matchId]
114
+    ftResult.playRes = '|'.join(playRes)
115
+
116
+    #
117
+    logging.info("采集到数据 --> %s" % ftResult.toString())
118
+
119
+    # 入库
120
+    ftResult.persist()