4 年前 · 686e36a12a
--- a/crawl/__pycache__/__init__.cpython-38.pyc
+++ b/crawl/__pycache__/__init__.cpython-38.pyc
--- a/crawl/__pycache__/settings.cpython-38.pyc
+++ b/crawl/__pycache__/settings.cpython-38.pyc
--- a/crawl/comm/__pycache__/__init__.cpython-38.pyc
+++ b/crawl/comm/__pycache__/__init__.cpython-38.pyc
--- a/crawl/comm/__pycache__/basketball.cpython-38.pyc
+++ b/crawl/comm/__pycache__/basketball.cpython-38.pyc
--- a/crawl/comm/__pycache__/football.cpython-38.pyc
+++ b/crawl/comm/__pycache__/football.cpython-38.pyc
--- a/crawl/comm/__pycache__/lottery.cpython-38.pyc
+++ b/crawl/comm/__pycache__/lottery.cpython-38.pyc
--- a/crawl/comm/__pycache__/mydb.cpython-38.pyc
+++ b/crawl/comm/__pycache__/mydb.cpython-38.pyc
--- a/crawl/comm/basketball.py
+++ b/crawl/comm/basketball.py
@@ -286,7 +286,10 @@ class BSTResult:
 
				                score,

			
 
				 

			
 
				                # 状态

			
 
				-               status

			
 
				+               status,

			
 
				+

			
 
				+               # 玩法结果

			
 
				+               playRes = None

			
 
				                ):

			
 
				     self.matchTime = matchTime

			
 
				     self.matchWeek = matchWeek

			
@@ -296,6 +299,7 @@ class BSTResult:
 
				     self.single = single

			
 
				     self.score = score

			
 
				     self.status = status

			
 
				+    self.playRes = playRes

			
 
				 

			
 
				   def toString(self):

			
 
				     return ';'.join((

			
@@ -306,7 +310,8 @@ class BSTResult:
 
				       self.awayTeam,

			
 
				       self.single,

			
 
				       self.score,

			
 
				-      self.status

			
 
				+      self.status,

			
 
				+      self.playRes

			
 
				     ))

			
 
				 

			
 
				   def persist(self):

			
--- a/crawl/spiders/__pycache__/__init__.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/__init__.cpython-38.pyc
--- a/crawl/spiders/__pycache__/basketball.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/basketball.cpython-38.pyc
--- a/crawl/spiders/__pycache__/basketball_match.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/basketball_match.cpython-38.pyc
--- a/crawl/spiders/__pycache__/basketball_price.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/basketball_price.cpython-38.pyc
--- a/crawl/spiders/__pycache__/basketball_result.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/basketball_result.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football_match.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_match.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football_price.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_price.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football_result.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_result.cpython-38.pyc
--- a/crawl/spiders/__pycache__/lottery.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/lottery.cpython-38.pyc
--- a/crawl/spiders/__pycache__/util.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/util.cpython-38.pyc
--- a/crawl/spiders/basketball.py
+++ b/crawl/spiders/basketball.py
@@ -0,0 +1,158 @@
 
				+import scrapy

			
 
				+import re

			
 
				+import logging

			
 
				+from crawl.comm.basketball import BSTMatch, BSTPrice, WLOdds, WLSpreadOdds, ScoreResult, PointsResult

			
 
				+from crawl.spiders.util import getNoneStr

			
 
				+

			
 
				+

			
 
				+class BasketballSpider(scrapy.Spider):

			
 
				+  name = 'basketball'

			
 
				+

			
 
				+  # 所有比赛

			
 
				+  _matchesMap = {}

			
 
				+

			
 
				+  def start_requests(self):

			
 
				+    # 受注赛事

			
 
				+    url = 'https://info.sporttery.cn/basketball/match_list.php'

			
 
				+    yield scrapy.Request(url, self.parseMatch, 'GET')

			
 
				+

			
 
				+  def parseMatch(self, response):

			
 
				+    cssOfMatches = '.all-wrap > .match_list .m-tab tr'

			
 
				+

			
 
				+    # 所有赛事

			
 
				+    matches = response.css(cssOfMatches)

			
 
				+    # 当前赛事

			
 
				+    for node in matches:

			
 
				+      tdNodeList = node.css('td')

			
 
				+

			
 
				+      matchWeek = getNoneStr(tdNodeList[0].css('::text').get())  # 赛事编号

			
 
				+      league = getNoneStr(tdNodeList[1].css('::text').get())  # 联赛简称

			
 
				+      leagueFullName = getNoneStr(tdNodeList[1].attrib.get('title'))  # 联赛全称

			
 
				+      leagueName = '|'.join((league, leagueFullName))

			
 
				+      homeTeam = getNoneStr(tdNodeList[2].css('.zhu::text').get())  # 主队

			
 
				+      awayTeam = getNoneStr(tdNodeList[2].css('.ke::text').get())  # 客队

			
 
				+      matchTime = getNoneStr(tdNodeList[3].css('::text').get())  # 比赛时间

			
 
				+      saleStatus = getNoneStr(tdNodeList[5].css('::text').get())  # 销售状态

			
 
				+      wdl = self.parsePassWay(tdNodeList[6].css('div'))

			
 
				+      wdls = self.parsePassWay(tdNodeList[7].css('div'))

			
 
				+      score = self.parsePassWay(tdNodeList[8].css('div'))

			
 
				+      points = self.parsePassWay(tdNodeList[9].css('div'))

			
 
				+

			
 
				+      matchLink = tdNodeList[2].css('a').attrib.get('href')

			
 
				+      matchId = re.sub(r'^.*m=', '', matchLink)

			
 
				+

			
 
				+      # 入库

			
 
				+      match = BSTMatch(

			
 
				+        matchWeek,

			
 
				+        matchTime,

			
 
				+        leagueName,

			
 
				+        homeTeam,

			
 
				+        awayTeam,

			
 
				+        saleStatus,

			
 
				+        wdl,

			
 
				+        wdls,

			
 
				+        score,

			
 
				+        points

			
 
				+      )

			
 
				+      match.persist()

			
 
				+

			
 
				+      # 加入到临时字典里

			
 
				+      self._matchesMap[matchId] = match

			
 
				+

			
 
				+      # 请求比赛赔率

			
 
				+      if saleStatus == '已开售':

			
 
				+        priceAPI = 'https://info.sporttery.cn/basketball/pool_result.php?id=' + matchId

			
 
				+        yield scrapy.Request(priceAPI, self.parsePrice, 'GET')

			
 
				+

			
 
				+  def parsePassWay(self, div):

			
 
				+    # 待开售

			
 
				+    if div is None:

			
 
				+      return 'wait'

			
 
				+

			
 
				+    # 地址

			
 
				+    cls = div.attrib.get('class')

			
 
				+    if cls is None:

			
 
				+      return 'wait'

			
 
				+

			
 
				+    # 开售单关方式和过关方式

			
 
				+    if "u-dan" in cls:

			
 
				+      return 'pass&single'

			
 
				+

			
 
				+    # 仅开售过关方式

			
 
				+    elif "u-cir" in cls:

			
 
				+      return 'pass'

			
 
				+

			
 
				+    # 未开售此玩法

			
 
				+    else:

			
 
				+      return 'wait'

			
 
				+

			
 
				+  def parsePrice(self, response):

			
 
				+    logging.info("采集数据源 ---> %s" % response.url)

			
 
				+

			
 
				+    tableList = response.css('.kj-table')

			
 
				+    if tableList is None or len(tableList) < 4:

			
 
				+      logging.error("抓取赔率结果失败")

			
 
				+      return

			
 
				+

			
 
				+    wl = None

			
 
				+    wls = None

			
 
				+    score = None

			
 
				+    points = None

			
 
				+

			
 
				+    for i, tab in enumerate(tableList):

			
 
				+      # 取最新的记录

			
 
				+      tr = tab.css("tr")[-1]

			
 
				+      tdList = tr.css('td')

			
 
				+

			
 
				+      try:

			
 
				+        if i == 0:

			
 
				+          # 胜负

			
 
				+          wl = WLOdds(getNoneStr(tdList[2].css("::text").get()), getNoneStr(tdList[1].css("::text").get()))

			
 
				+        elif i == 1:

			
 
				+          # 让分胜负

			
 
				+          wls = WLSpreadOdds(getNoneStr(tdList[3].css("::text").get()), getNoneStr(tdList[1].css("::text").get()), getNoneStr(tdList[2].css("::text").get()))

			
 
				+        elif i == 2:

			
 
				+          # 大小分

			
 
				+          score = ScoreResult(getNoneStr(tdList[1].css("::text").get()), getNoneStr(tdList[3].css("::text").get()), getNoneStr(tdList[2].css("::text").get()))

			
 
				+        elif i == 3:

			
 
				+          # 胜分差

			
 
				+          points = PointsResult(

			
 
				+            getNoneStr(tdList[7].css("::text").get()),

			
 
				+            getNoneStr(tdList[8].css("::text").get()),

			
 
				+            getNoneStr(tdList[9].css("::text").get()),

			
 
				+            getNoneStr(tdList[10].css("::text").get()),

			
 
				+            getNoneStr(tdList[11].css("::text").get()),

			
 
				+            getNoneStr(tdList[12].css("::text").get()),

			
 
				+            getNoneStr(tdList[1].css("::text").get()),

			
 
				+            getNoneStr(tdList[2].css("::text").get()),

			
 
				+            getNoneStr(tdList[3].css("::text").get()),

			
 
				+            getNoneStr(tdList[4].css("::text").get()),

			
 
				+            getNoneStr(tdList[5].css("::text").get()),

			
 
				+            getNoneStr(tdList[6].css("::text").get())

			
 
				+          )

			
 
				+        else:

			
 
				+          continue

			
 
				+      except:

			
 
				+        continue

			
 
				+

			
 
				+    matchId = re.sub(r'^.*pool_result.php\?id=', '', response.url)

			
 
				+    match = self._matchesMap[matchId]

			
 
				+

			
 
				+    bstprice = BSTPrice(

			
 
				+      matchId,

			
 
				+      match.matchWeek,

			
 
				+      match.matchTime,

			
 
				+      match.league,

			
 
				+      match.homeTeam,

			
 
				+      match.awayTeam,

			
 
				+      wl,

			
 
				+      wls,

			
 
				+      score,

			
 
				+      points

			
 
				+    )

			
 
				+

			
 
				+    #

			
 
				+    logging.info("采集到数据 --> %s" % bstprice.toString())

			
 
				+

			
 
				+    # 入库

			
 
				+    bstprice.persist()

			
--- a/crawl/spiders/basketball_result.py
+++ b/crawl/spiders/basketball_result.py
@@ -1,56 +1,139 @@
 
				-import scrapy
			
 
				-import time
			
 
				-from crawl.comm.basketball import BSTResult
			
 
				-
			
 
				-class BasketballSpider(scrapy.Spider):
			
 
				-  name = "basketball-result"
			
 
				-
			
 
				-  def start_requests(self):
			
 
				-    # 开奖
			
 
				-    today = time.strftime("%Y-%m-%d")
			
 
				-    url = "https://www.lottery.gov.cn/basketball/result_99.jspx?startDate="+today+"&endDate="+today+"&f_league_id=0&f_league_name=%E5%85%A8%E9%83%A8%E8%81%94%E8%B5%9B&single=off"
			
 
				-    yield scrapy.Request(url, self.parseResult)
			
 
				-
			
 
				-  def parseResult(self, response):
			
 
				-    cssMain = ".xxsj table tr"
			
 
				-
			
 
				-    # 获取所有比赛
			
 
				-    matches = response.css(cssMain)
			
 
				-    for node in matches[1:-1]:  # 标题行忽略以及末尾一行
			
 
				-      prop = node.css("td")
			
 
				-      if len(prop) < 7:
			
 
				-        continue
			
 
				-
			
 
				-      matchTime = prop[0].css('::text').get()
			
 
				-      matchWeek = prop[1].css('::text').get()
			
 
				-      league = prop[2].css('::text').get()
			
 
				-      team = prop[3].css('a::text').getall()
			
 
				-      if team is None or len(team) == 0:
			
 
				-        team = prop[3].css('::text').get().split('VS')
			
 
				-      homeTeam = team[0].strip()
			
 
				-      awayTeam = team[1].strip()
			
 
				-      single = self.isSingle(prop[3].attrib.get('class'))
			
 
				-      tmp = prop[4].css('::text').get()
			
 
				-      score = tmp.strip() if tmp is not None else ""
			
 
				-      tmp = prop[5].css('::text').get()
			
 
				-      status = tmp.strip() if tmp is not None else ""
			
 
				-
			
 
				-      BSTResult(
			
 
				-        matchTime,
			
 
				-        matchWeek,
			
 
				-        league,
			
 
				-        homeTeam,
			
 
				-        awayTeam,
			
 
				-        single,
			
 
				-        score,
			
 
				-        status
			
 
				-      ).persist()
			
 
				-
			
 
				-  def isSingle(self, eleCls):
			
 
				-    if eleCls is None:
			
 
				-      return '0'
			
 
				-    
			
 
				-    if 'dan' in eleCls:
			
 
				-      return '1'
			
 
				-    else:
			
 
				-      return '0'
			
 
				+import scrapy

			
 
				+import re

			
 
				+import logging

			
 
				+from crawl.comm.basketball import BSTResult

			
 
				+from crawl.spiders.util import getNoneStr

			
 
				+

			
 
				+

			
 
				+class BasketballSpider(scrapy.Spider):

			
 
				+  name = "basketball-result"

			
 
				+

			
 
				+  # 所有比赛

			
 
				+  _matchesMap = {}

			
 
				+

			
 
				+  def start_requests(self):

			
 
				+    url = 'https://info.sporttery.cn/basketball/match_result.php'

			
 
				+    yield scrapy.Request(url, self.parseResult, 'GET')

			
 
				+

			
 
				+  def parseResult(self, response):

			
 
				+    # 处理比赛结果

			
 
				+    cssOfMatch = '.all-wrap > .match_list .m-tab tr'

			
 
				+    matches = response.css(cssOfMatch)

			
 
				+    if matches is None: return

			
 
				+    for matchNode in matches:

			
 
				+      tdNodeList = matchNode.css('td')

			
 
				+      if tdNodeList is None or len(tdNodeList) < 10:

			
 
				+        continue

			
 
				+

			
 
				+      matchTime = getNoneStr(tdNodeList[0].css('::text').get())

			
 
				+      matchWeek = getNoneStr(tdNodeList[1].css('::text').get())

			
 
				+      league = getNoneStr(tdNodeList[2].css('::text').get())

			
 
				+      leagueFullName = getNoneStr(tdNodeList[2].attrib.get('title'))  # 联赛全称

			
 
				+      leagueName = '|'.join((league, leagueFullName))

			
 
				+      homeTeam = self.trimBrackets(getNoneStr(tdNodeList[3].css('.zhu::text').get()))  # 主队

			
 
				+      awayTeam = self.trimBrackets(getNoneStr(tdNodeList[3].css('.ke::text').get()))  # 客队

			
 
				+

			
 
				+      half = ''

			
 
				+      halfSpan = tdNodeList[4].css('span')

			
 
				+      if halfSpan is not None and len(halfSpan) > 0:

			
 
				+        part1 = getNoneStr(tdNodeList[4].css("span::text")[0].get())

			
 
				+        part2 = getNoneStr(tdNodeList[4].css("span::text")[1].get())

			
 
				+        part3 = getNoneStr(tdNodeList[5].css("span::text")[0].get())

			
 
				+        part4 = getNoneStr(tdNodeList[5].css("span::text")[1].get())

			
 
				+        part5 = getNoneStr(tdNodeList[6].css('::text').get())

			
 
				+        half = '|'.join((part1, part2, part3, part4, part5))

			
 
				+

			
 
				+      whole = getNoneStr(tdNodeList[7].css('span::text').get())

			
 
				+      status = getNoneStr(tdNodeList[11].css('::text').get())

			
 
				+

			
 
				+      if whole == '无效场次':

			
 
				+        status = '无效场次'

			
 
				+

			
 
				+      bstResult = BSTResult(

			
 
				+        matchTime,

			
 
				+        matchWeek,

			
 
				+        leagueName,

			
 
				+        homeTeam,

			
 
				+        awayTeam,

			
 
				+        half,

			
 
				+        whole,

			
 
				+        status,

			
 
				+        ''

			
 
				+      )

			
 
				+

			
 
				+      matchLink = getNoneStr(tdNodeList[12].css('a').attrib.get('href'))

			
 
				+      matchId = re.sub(r'^.*pool_result.php\?id=', '', matchLink)

			
 
				+

			
 
				+      # 加入到临时字典里

			
 
				+      self._matchesMap[matchId] = bstResult

			
 
				+

			
 
				+      if status == '已完成':

			
 
				+        yield scrapy.Request('https:' + matchLink, self.parsePrice, 'GET')

			
 
				+      else:

			
 
				+        logging.info("采集到数据 --> %s" % bstResult.toString())

			
 
				+        bstResult.persist()

			
 
				+

			
 
				+    # 是否存在下一页

			
 
				+    pgNodes = response.css('.m-page .u-pg2')

			
 
				+    if pgNodes is None or len(pgNodes) < 1: return

			
 
				+

			
 
				+    # 总页数

			
 
				+    total = pgNodes[-1].css('a::text').get()

			
 
				+    if total is None or total == '':

			
 
				+      total = 0

			
 
				+

			
 
				+    # 当前页数

			
 
				+    current = re.sub(r'^.*match_result.php[?page=]*', '', response.url)

			
 
				+    if current is None or current == '':

			
 
				+      current = 1

			
 
				+

			
 
				+    if int(current) >= int(total): return

			
 
				+

			
 
				+    # 下一页

			
 
				+    nextPG = int(current) + 1

			
 
				+    url = 'https://info.sporttery.cn/football/match_result.php?page=' + str(nextPG)

			
 
				+    yield scrapy.Request(url, self.parseResult, 'GET')

			
 
				+

			
 
				+

			
 
				+  def trimBrackets(self, str):

			
 
				+    if str is None: return ''

			
 
				+

			
 
				+    return re.sub(r'\(.*\)', '', str)

			
 
				+

			
 
				+  def parsePrice(self, response):

			
 
				+    logging.info("采集数据源 ---> %s" % response.url)

			
 
				+

			
 
				+    tableList = response.css('.kj-table')

			
 
				+    if tableList is None or len(tableList) < 4:

			
 
				+      logging.error("抓取赔率结果失败")

			
 
				+      return

			
 
				+

			
 
				+    playRes = []

			
 
				+

			
 
				+    for i, tab in enumerate(tableList):

			
 
				+

			
 
				+      try:

			
 
				+        if i == 0 or i == 3:

			
 
				+          # 胜负 or 胜分差

			
 
				+          tr = tab.css("tr")[0]

			
 
				+          tdList = tr.css('td')

			
 
				+          playRes.append(getNoneStr(tdList[1].css("span::text").get()).replace(' ', ''))

			
 
				+        elif i == 1 or i == 2:

			
 
				+          # 让分胜负 or 大小分

			
 
				+          tr = tab.css("tr")[-1]

			
 
				+          tdList = tr.css('td')

			
 
				+          playRes.append(getNoneStr(tdList[-1].css("span::text").get()).replace(' ', ''))

			
 
				+        else:

			
 
				+          continue

			
 
				+      except:

			
 
				+        continue

			
 
				+

			
 
				+    matchId = re.sub(r'^.*pool_result.php\?id=', '', response.url)

			
 
				+    bstResult = self._matchesMap[matchId]

			
 
				+    bstResult.playRes = '|'.join(playRes)

			
 
				+

			
 
				+    #

			
 
				+    logging.info("采集到数据 --> %s" % bstResult.toString())

			
 
				+

			
 
				+    # 入库

			
 
				+    bstResult.persist()

			
--- a/crawl/spiders/basketball_result.py.bak
+++ b/crawl/spiders/basketball_result.py.bak
@@ -0,0 +1,61 @@
 
				+import scrapy
			
 
				+import time
			
 
				+from crawl.comm.basketball import BSTResult
			
 
				+
			
 
				+class BasketballSpider(scrapy.Spider):
			
 
				+  name = "basketball-result"
			
 
				+
			
 
				+  def start_requests(self):
			
 
				+    # 开奖
			
 
				+    today = time.strftime("%Y-%m-%d")
			
 
				+    url = "https://www.lottery.gov.cn/basketball/result_99.jspx?startDate="+today+"&endDate="+today+"&f_league_id=0&f_league_name=%E5%85%A8%E9%83%A8%E8%81%94%E8%B5%9B&single=off"
			
 
				+    yield scrapy.Request(url, self.parseResult)
			
 
				+
			
 
				+  def parseResult(self, response):
			
 
				+    cssMain = ".xxsj table tr"
			
 
				+
			
 
				+    # 获取所有比赛
			
 
				+    matches = response.css(cssMain)
			
 
				+    for node in matches[1:-1]:  # 标题行忽略以及末尾一行
			
 
				+      prop = node.css("td")
			
 
				+      if len(prop) < 7:
			
 
				+        continue
			
 
				+
			
 
				+      matchTime = prop[0].css('::text').get()
			
 
				+      matchWeek = prop[1].css('::text').get()
			
 
				+      league = prop[2].css('::text').get()
			
 
				+      team = prop[3].css('a::text').getall()
			
 
				+      if team is None or len(team) == 0:
			
 
				+        team = prop[3].css('::text').get().split('VS')
			
 
				+      homeTeam = team[1].strip()
			
 
				+      awayTeam = team[0].strip()
			
 
				+      single = self.isSingle(prop[3].attrib.get('class'))
			
 
				+      tmp = prop[4].css('::text').get()
			
 
				+      score = tmp.strip() if tmp is not None else ""
			
 
				+      tmp = prop[5].css('::text').get()
			
 
				+      status = tmp.strip() if tmp is not None else ""
			
 
				+
			
 
				+      bstResult = BSTResult(
			
 
				+        matchTime,
			
 
				+        matchWeek,
			
 
				+        league,
			
 
				+        homeTeam,
			
 
				+        awayTeam,
			
 
				+        single,
			
 
				+        score,
			
 
				+        status
			
 
				+      )
			
 
				+      bstResult.persist()
			
 
				+
			
 
				+      # if status == '已完成':
			
 
				+      #   resURI = prop[6].css('a').attrib.get('href')
			
 
				+      #   yield scrapy.Request('https://www.lottery.gov.cn' + resURI, self.parseResDetail, 'GET')
			
 
				+
			
 
				+  def isSingle(self, eleCls):
			
 
				+    if eleCls is None:
			
 
				+      return '0'
			
 
				+    
			
 
				+    if 'dan' in eleCls:
			
 
				+      return '1'
			
 
				+    else:
			
 
				+      return '0'
			
--- a/crawl/spiders/football_result.py
+++ b/crawl/spiders/football_result.py
@@ -67,7 +67,7 @@ class FootballSpider(scrapy.Spider):
 
				     # 下一页
			
 
				     nextPG = int(current) + 1
			
 
				     url = 'https://info.sporttery.cn/football/match_result.php?page=' + str(nextPG)
			
 
				-    return scrapy.Request(url, self.parseResult, 'GET')
			
 
				+    yield scrapy.Request(url, self.parseResult, 'GET')
			
 
				 
			
 
				 
			
 
				   def trimBrackets(self, str):
			
--- a/crawl/spiders/util.py
+++ b/crawl/spiders/util.py
@@ -0,0 +1,6 @@
 
				+

			
 
				+def getNoneStr(s):

			
 
				+  if s is None:

			
 
				+    return ''

			
 
				+  else:

			
 
				+    return s.strip()