4 年前 · 18ebea7bdb
--- a/crawl/__pycache__/__init__.cpython-38.pyc
+++ b/crawl/__pycache__/__init__.cpython-38.pyc
--- a/crawl/__pycache__/settings.cpython-38.pyc
+++ b/crawl/__pycache__/settings.cpython-38.pyc
--- a/crawl/comm/__pycache__/__init__.cpython-38.pyc
+++ b/crawl/comm/__pycache__/__init__.cpython-38.pyc
--- a/crawl/comm/__pycache__/basketball.cpython-38.pyc
+++ b/crawl/comm/__pycache__/basketball.cpython-38.pyc
--- a/crawl/comm/__pycache__/football.cpython-38.pyc
+++ b/crawl/comm/__pycache__/football.cpython-38.pyc
--- a/crawl/comm/__pycache__/lottery.cpython-38.pyc
+++ b/crawl/comm/__pycache__/lottery.cpython-38.pyc
--- a/crawl/comm/__pycache__/mydb.cpython-38.pyc
+++ b/crawl/comm/__pycache__/mydb.cpython-38.pyc
--- a/crawl/comm/football.py
+++ b/crawl/comm/football.py
@@ -491,7 +491,10 @@ class FTResult:
 
				     whole = None,
			
 
				 
			
 
				     # 状态
			
 
				-    status = None
			
 
				+    status = None,
			
 
				+
			
 
				+    # 玩法结果
			
 
				+    playRes = None,
			
 
				   ):
			
 
				     self.matchTime = matchTime
			
 
				     self.matchWeek = matchWeek
			
@@ -502,18 +505,20 @@ class FTResult:
 
				     self.half = half
			
 
				     self.whole = whole
			
 
				     self.status = status
			
 
				+    self.playRes = playRes
			
 
				 
			
 
				   def toString(self):
			
 
				     return ';'.join((
			
 
				-      self.matchTime if self.matchTime is not None else '',
			
 
				-      self.matchWeek if self.matchWeek is not None else '',
			
 
				-      self.league if self.league is not None else '',
			
 
				-      self.homeTeam if self.homeTeam is not None else '',
			
 
				-      self.awayTeam if self.awayTeam is not None else '',
			
 
				-      self.single if self.single is not None else '',
			
 
				-      self.half if self.half is not None else '',
			
 
				-      self.whole if self.whole is not None else '',
			
 
				-      self.status if self.status is not None else ''
			
 
				+      self.matchTime,
			
 
				+      self.matchWeek,
			
 
				+      self.league,
			
 
				+      self.homeTeam,
			
 
				+      self.awayTeam,
			
 
				+      self.single,
			
 
				+      self.half,
			
 
				+      self.whole,
			
 
				+      self.status,
			
 
				+      self.playRes,
			
 
				     ))
			
 
				 
			
 
				   def persist(self):
			
--- a/crawl/spiders/__pycache__/__init__.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/__init__.cpython-38.pyc
--- a/crawl/spiders/__pycache__/basketball.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/basketball.cpython-38.pyc
--- a/crawl/spiders/__pycache__/basketball_match.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/basketball_match.cpython-38.pyc
--- a/crawl/spiders/__pycache__/basketball_price.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/basketball_price.cpython-38.pyc
--- a/crawl/spiders/__pycache__/basketball_result.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/basketball_result.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football_match.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_match.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football_price.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_price.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football_result.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_result.cpython-38.pyc
--- a/crawl/spiders/__pycache__/lottery.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/lottery.cpython-38.pyc
--- a/crawl/spiders/__pycache__/util.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/util.cpython-38.pyc
--- a/crawl/spiders/basketball_result.py
+++ b/crawl/spiders/basketball_result.py
@@ -61,14 +61,15 @@ class BasketballSpider(scrapy.Spider):
 
				         ''

			
 
				       )

			
 
				 

			
 
				-      matchLink = getNoneStr(tdNodeList[12].css('a').attrib.get('href'))

			
 
				-      matchId = re.sub(r'^.*pool_result.php\?id=', '', matchLink)

			
 
				+      matchLink = getNoneStr(tdNodeList[3].css('a').attrib.get('href'))

			
 
				+      matchId = re.sub(r'^.*bk_match_info.php\?m=', '', matchLink)

			
 
				 

			
 
				       # 加入到临时字典里

			
 
				       self._matchesMap[matchId] = bstResult

			
 
				 

			
 
				       if status == '已完成':

			
 
				-        yield scrapy.Request('https:' + matchLink, self.parsePrice, 'GET')

			
 
				+        link = tdNodeList[12].css('a').attrib.get('href')

			
 
				+        yield scrapy.Request('https:' + link, self.parsePrice, 'GET')

			
 
				       else:

			
 
				         logging.info("采集到数据 --> %s" % bstResult.toString())

			
 
				         bstResult.persist()

			
@@ -105,7 +106,7 @@ class BasketballSpider(scrapy.Spider):
 
				 

			
 
				     tableList = response.css('.kj-table')

			
 
				     if tableList is None or len(tableList) < 4:

			
 
				-      logging.error("抓取赔率结果失败")

			
 
				+      logging.error("抓取玩法结果失败")

			
 
				       return

			
 
				 

			
 
				     playRes = []

			
--- a/crawl/spiders/football_result.py
+++ b/crawl/spiders/football_result.py
@@ -2,10 +2,14 @@ import scrapy
 
				 import re
			
 
				 import logging
			
 
				 from crawl.comm.football import FTResult
			
 
				+from crawl.spiders.util import getNoneStr
			
 
				 
			
 
				 class FootballSpider(scrapy.Spider):
			
 
				   name = "football-result"
			
 
				 
			
 
				+  # 所有比赛
			
 
				+  _matchesMap = {}
			
 
				+
			
 
				   def start_requests(self):
			
 
				     url = 'https://info.sporttery.cn/football/match_result.php'
			
 
				     yield scrapy.Request(url, self.parseResult, 'GET')
			
@@ -20,16 +24,16 @@ class FootballSpider(scrapy.Spider):
 
				       if tdNodeList is None or len(tdNodeList) < 10:
			
 
				         continue
			
 
				 
			
 
				-      matchTime = tdNodeList[0].css('::text').get()
			
 
				-      matchWeek = tdNodeList[1].css('::text').get()
			
 
				-      league = tdNodeList[2].css('::text').get()
			
 
				-      leagueFullName = tdNodeList[2].attrib.get('title')  # 联赛全称
			
 
				-      leagueName = '|'.join((league if league is not None else '', leagueFullName if leagueFullName is not None else ''))
			
 
				+      matchTime = getNoneStr(tdNodeList[0].css('::text').get())
			
 
				+      matchWeek = getNoneStr(tdNodeList[1].css('::text').get())
			
 
				+      league = getNoneStr(tdNodeList[2].css('::text').get())
			
 
				+      leagueFullName = getNoneStr(tdNodeList[2].attrib.get('title'))  # 联赛全称
			
 
				+      leagueName = '|'.join((league, leagueFullName))
			
 
				       homeTeam = self.trimBrackets(tdNodeList[3].css('.zhu::text').get()) # 主队
			
 
				       awayTeam = self.trimBrackets(tdNodeList[3].css('.ke::text').get()) # 客队
			
 
				-      half = tdNodeList[4].css('span::text').get()
			
 
				-      whole = tdNodeList[5].css('span::text').get()
			
 
				-      status = tdNodeList[9].css('::text').get()
			
 
				+      half = getNoneStr(tdNodeList[4].css('span::text').get())
			
 
				+      whole = getNoneStr(tdNodeList[5].css('span::text').get())
			
 
				+      status = getNoneStr(tdNodeList[9].css('::text').get())
			
 
				 
			
 
				       ftResult = FTResult(
			
 
				         matchTime,
			
@@ -40,12 +44,23 @@ class FootballSpider(scrapy.Spider):
 
				         '', # 不需要单固
			
 
				         half,
			
 
				         whole,
			
 
				-        status
			
 
				+        status,
			
 
				+        ''
			
 
				       )
			
 
				-      
			
 
				-      logging.info("采集到数据 --> %s" % ftResult.toString())
			
 
				 
			
 
				-      ftResult.persist()
			
 
				+      matchLink = getNoneStr(tdNodeList[3].css('a').attrib.get('href'))
			
 
				+      matchId = re.sub(r'^.*fb_match_info.php\?m=', '', matchLink)
			
 
				+
			
 
				+      # 加入到临时字典里
			
 
				+      self._matchesMap[matchId] = ftResult
			
 
				+
			
 
				+
			
 
				+      if status == '已完成':
			
 
				+        link = "https://i.sporttery.cn/api/fb_match_info/get_pool_rs?mid=" + matchId
			
 
				+        yield scrapy.Request(link, self.parsePrice, 'GET')
			
 
				+      else:
			
 
				+        logging.info("采集到数据 --> %s" % ftResult.toString())
			
 
				+        ftResult.persist()
			
 
				 
			
 
				 
			
 
				     # 是否存在下一页
			
@@ -74,3 +89,32 @@ class FootballSpider(scrapy.Spider):
 
				     if str is None: return ''
			
 
				 
			
 
				     return re.sub(r'\(.*\)', '', str)
			
 
				+
			
 
				+  def parsePrice(self, response):
			
 
				+    logging.info("采集数据源 ---> %s" % response.url)
			
 
				+    
			
 
				+    data = response.json()
			
 
				+    res = data.get('result').get('pool_rs')
			
 
				+
			
 
				+    playRes = []
			
 
				+
			
 
				+    # 胜平负
			
 
				+    playRes.append(res['had']['prs_name'])
			
 
				+    # 让球胜平负
			
 
				+    playRes.append(res['hhad']['prs_name'])
			
 
				+    # 比分
			
 
				+    playRes.append(res['crs']['prs_name'])
			
 
				+    # 总进球
			
 
				+    playRes.append(res['ttg']['prs_name'])
			
 
				+    # 半全场
			
 
				+    playRes.append(res['hafu']['prs_name'])
			
 
				+
			
 
				+    matchId = re.sub(r'^.*get_pool_rs\?mid=', '', response.url)
			
 
				+    ftResult = self._matchesMap[matchId]
			
 
				+    ftResult.playRes = '|'.join(playRes)
			
 
				+
			
 
				+    #
			
 
				+    logging.info("采集到数据 --> %s" % ftResult.toString())
			
 
				+
			
 
				+    # 入库
			
 
				+    ftResult.persist()