4 лет назад · 4d4cd4bdff
--- a/basketball_job.sh
+++ b/basketball_job.sh
@@ -7,8 +7,6 @@ source /etc/profile
 
				 cd /opt/niucai/crawl
			
 
				 
			
 
				 # 赛事
			
 
				-scrapy crawl basketball >> logs/basketball.log
			
 
				-# 赔率
			
 
				-# scrapy crawl basketball-price >> logs/basketball-price.log
			
 
				+/usr/local/bin/scrapy crawl basketball
			
 
				 # 结果
			
 
				-scrapy crawl basketball-result >> logs/basketball-result.log
			
 
				+/usr/local/bin/scrapy crawl basketball-result
			
--- a/crawl/settings.py
+++ b/crawl/settings.py
@@ -94,3 +94,12 @@ DATABASE = {
 
				   'user': 'niucai',
			
 
				   'password': '1qaz#EDC'
			
 
				 }
			
 
				+
			
 
				+# CRITICAL 50
			
 
				+# ERROR 40
			
 
				+# WARNING 30
			
 
				+# INFO 20
			
 
				+# DEBUG 10
			
 
				+# NOTSET 0 
			
 
				+LOG_LEVEL = 10
			
 
				+LOG_FILE = 'logs/main.log'
			
--- a/crawl/spiders/basketball.py
+++ b/crawl/spiders/basketball.py
@@ -7,6 +7,10 @@ from crawl.spiders.util import getNoneStr
 
				 

			
 
				 class BasketballSpider(scrapy.Spider):

			
 
				   name = 'basketball'

			
 
				+  custom_settings = {

			
 
				+    'LOG_LEVEL': logging.INFO,

			
 
				+    'LOG_FILE': 'logs/basketball-match.log'

			
 
				+  }

			
 
				 

			
 
				   # 所有比赛

			
 
				   _matchesMap = {}

			
--- a/crawl/spiders/basketball_match.py
+++ b/crawl/spiders/basketball_match.py
@@ -1,69 +0,0 @@
 
				-import scrapy
			
 
				-import time
			
 
				-from crawl.comm.basketball import BSTMatch
			
 
				-
			
 
				-class BasketballSpider(scrapy.Spider):
			
 
				-  name = "basketball-match"
			
 
				-
			
 
				-  def start_requests(self):
			
 
				-    # 受注比赛
			
 
				-    url = "https://www.lottery.gov.cn/basketball/match_list.jspx"
			
 
				-    yield scrapy.Request(url, self.parseMatch)
			
 
				-  
			
 
				-  def parseMatch(self, response):
			
 
				-    cssMain = ".xxsj table tr"
			
 
				-    cssDetail = "td"
			
 
				-
			
 
				-    # 获取所有比赛
			
 
				-    matches = response.css(cssMain)
			
 
				-    for node in matches[1:]:  # 标题行忽略
			
 
				-      prop = node.css(cssDetail)
			
 
				-
			
 
				-      # 小于 2 个 td 为无效行
			
 
				-      if len(prop) < 2:
			
 
				-        continue
			
 
				-
			
 
				-      matchWeek = prop[0].css('::text').get()
			
 
				-      league = prop[1].css('::text').get()
			
 
				-      team = prop[2].css('::text').get().split('VS')
			
 
				-      homeTeam = team[1].strip()
			
 
				-      awayTeam = team[0].strip()
			
 
				-      matchTime = prop[3].css('::text').get()
			
 
				-      status = prop[5].css('::text').get()
			
 
				-      wl = self.parsePassWay(prop[6].css('img'))
			
 
				-      wls = self.parsePassWay(prop[7].css('img'))
			
 
				-      score = self.parsePassWay(prop[8].css('img'))
			
 
				-      points = self.parsePassWay(prop[9].css('img'))
			
 
				-
			
 
				-      BSTMatch(
			
 
				-        matchWeek,
			
 
				-        matchTime,
			
 
				-        league,
			
 
				-        homeTeam,
			
 
				-        awayTeam,
			
 
				-        status,
			
 
				-        wl,
			
 
				-        wls,
			
 
				-        score,
			
 
				-        points
			
 
				-      ).persist()
			
 
				-  
			
 
				-  def parsePassWay(self, img):
			
 
				-    # 待开售
			
 
				-    if img is None:
			
 
				-      return 'wait'
			
 
				-
			
 
				-    # 图片地址
			
 
				-    src = img.attrib['src']
			
 
				-
			
 
				-    # 开售单关方式和过关方式
			
 
				-    if "ball2_11.png" in src:
			
 
				-      return 'pass&single'
			
 
				-
			
 
				-    # 仅开售过关方式
			
 
				-    elif "ball2_1.png" in src:
			
 
				-      return 'pass'
			
 
				-
			
 
				-    # 未开售此玩法
			
 
				-    else:
			
 
				-      return 'no'
			
--- a/crawl/spiders/basketball_price.py
+++ b/crawl/spiders/basketball_price.py
@@ -1,72 +0,0 @@
 
				-import scrapy
			
 
				-import time
			
 
				-from crawl.comm.basketball import BSTPrice, WLOdds, WLSpreadOdds, ScoreResult, PointsResult
			
 
				-
			
 
				-class BasketballSpider(scrapy.Spider):
			
 
				-  name = "basketball-price"
			
 
				-
			
 
				-  def start_requests(self):
			
 
				-    # 赔率
			
 
				-    url = "https://www.lottery.gov.cn/basketball/counter.jspx"
			
 
				-    yield scrapy.Request(url, self.parsePrice)
			
 
				-  
			
 
				-  def parsePrice(self, response):
			
 
				-    cssMain = "#content .article .articleCon .section"
			
 
				-    cssDetail = ".saishi"
			
 
				-    cssOther = ".saishiCon table td"
			
 
				-
			
 
				-    # 获取所有比赛
			
 
				-    matches = response.css(cssMain)
			
 
				-    for node in matches:
			
 
				-
			
 
				-      # 比赛ID
			
 
				-      matchId = node.attrib['match_id']
			
 
				-      matchTime = node.attrib['match_time']
			
 
				-      matchWeek = node.attrib['match_week']
			
 
				-      league = node.attrib['league_val']
			
 
				-
			
 
				-      # 其他相关属性
			
 
				-      details = node.css(cssDetail).css("td")
			
 
				-      homeTeam = details[4].css('::text').get()
			
 
				-      awayTeam = details[3].css('::text').get()
			
 
				-
			
 
				-      match = BSTPrice(
			
 
				-        matchId,
			
 
				-        matchWeek,
			
 
				-        matchTime,
			
 
				-        league,
			
 
				-        homeTeam,
			
 
				-        awayTeam,
			
 
				-        WLOdds(
			
 
				-          details[6].css('strong::text').get(),
			
 
				-          details[5].css('strong::text').get()
			
 
				-        ),
			
 
				-        WLSpreadOdds(
			
 
				-          details[9].css('strong::text').get(),
			
 
				-          details[7].css('strong::text').get(),
			
 
				-          details[8].css('strong font::text').get()
			
 
				-        ),
			
 
				-        ScoreResult(
			
 
				-          details[10].css('strong::text').get(),
			
 
				-          details[12].css('strong::text').get(),
			
 
				-          details[11].css('strong::text').get()
			
 
				-        )
			
 
				-      )
			
 
				-
			
 
				-      # 剩分差
			
 
				-      pointsResult = PointsResult()
			
 
				-      datas1 = []
			
 
				-      lst = node.css(cssOther)
			
 
				-      for it1 in lst[9:15]:
			
 
				-        datas1 += [it1.css('strong::text').get()]
			
 
				-
			
 
				-      datas2 = []
			
 
				-      for it2 in lst[16:22]:
			
 
				-        datas2 += [it2.css('strong::text').get()]
			
 
				-
			
 
				-      pointsResult.datas(datas2 + datas1)
			
 
				-      match.pointsResult = pointsResult
			
 
				-
			
 
				-      # 入库
			
 
				-      match.persist()
			
 
				-  
			
--- a/crawl/spiders/basketball_result.py
+++ b/crawl/spiders/basketball_result.py
@@ -7,6 +7,10 @@ from crawl.spiders.util import getNoneStr
 
				 

			
 
				 class BasketballSpider(scrapy.Spider):

			
 
				   name = "basketball-result"

			
 
				+  custom_settings = {

			
 
				+    'LOG_LEVEL': logging.INFO,

			
 
				+    'LOG_FILE': 'logs/basketball-result.log'

			
 
				+  }

			
 
				 

			
 
				   # 所有比赛

			
 
				   _matchesMap = {}

			
--- a/crawl/spiders/basketball_result.py.bak
+++ b/crawl/spiders/basketball_result.py.bak
@@ -1,61 +0,0 @@
 
				-import scrapy
			
 
				-import time
			
 
				-from crawl.comm.basketball import BSTResult
			
 
				-
			
 
				-class BasketballSpider(scrapy.Spider):
			
 
				-  name = "basketball-result"
			
 
				-
			
 
				-  def start_requests(self):
			
 
				-    # 开奖
			
 
				-    today = time.strftime("%Y-%m-%d")
			
 
				-    url = "https://www.lottery.gov.cn/basketball/result_99.jspx?startDate="+today+"&endDate="+today+"&f_league_id=0&f_league_name=%E5%85%A8%E9%83%A8%E8%81%94%E8%B5%9B&single=off"
			
 
				-    yield scrapy.Request(url, self.parseResult)
			
 
				-
			
 
				-  def parseResult(self, response):
			
 
				-    cssMain = ".xxsj table tr"
			
 
				-
			
 
				-    # 获取所有比赛
			
 
				-    matches = response.css(cssMain)
			
 
				-    for node in matches[1:-1]:  # 标题行忽略以及末尾一行
			
 
				-      prop = node.css("td")
			
 
				-      if len(prop) < 7:
			
 
				-        continue
			
 
				-
			
 
				-      matchTime = prop[0].css('::text').get()
			
 
				-      matchWeek = prop[1].css('::text').get()
			
 
				-      league = prop[2].css('::text').get()
			
 
				-      team = prop[3].css('a::text').getall()
			
 
				-      if team is None or len(team) == 0:
			
 
				-        team = prop[3].css('::text').get().split('VS')
			
 
				-      homeTeam = team[1].strip()
			
 
				-      awayTeam = team[0].strip()
			
 
				-      single = self.isSingle(prop[3].attrib.get('class'))
			
 
				-      tmp = prop[4].css('::text').get()
			
 
				-      score = tmp.strip() if tmp is not None else ""
			
 
				-      tmp = prop[5].css('::text').get()
			
 
				-      status = tmp.strip() if tmp is not None else ""
			
 
				-
			
 
				-      bstResult = BSTResult(
			
 
				-        matchTime,
			
 
				-        matchWeek,
			
 
				-        league,
			
 
				-        homeTeam,
			
 
				-        awayTeam,
			
 
				-        single,
			
 
				-        score,
			
 
				-        status
			
 
				-      )
			
 
				-      bstResult.persist()
			
 
				-
			
 
				-      # if status == '已完成':
			
 
				-      #   resURI = prop[6].css('a').attrib.get('href')
			
 
				-      #   yield scrapy.Request('https://www.lottery.gov.cn' + resURI, self.parseResDetail, 'GET')
			
 
				-
			
 
				-  def isSingle(self, eleCls):
			
 
				-    if eleCls is None:
			
 
				-      return '0'
			
 
				-    
			
 
				-    if 'dan' in eleCls:
			
 
				-      return '1'
			
 
				-    else:
			
 
				-      return '0'
			
--- a/crawl/spiders/football.py
+++ b/crawl/spiders/football.py
@@ -6,6 +6,10 @@ from crawl.spiders.util import getNoneStr
 
				 

			
 
				 class FootballSpider(scrapy.Spider):

			
 
				   name = 'football'

			
 
				+  custom_settings = {

			
 
				+    'LOG_LEVEL': logging.INFO,

			
 
				+    'LOG_FILE': 'logs/football-match.log'

			
 
				+  }

			
 
				 

			
 
				   # 所有比赛

			
 
				   _matchesMap = {}

			
--- a/crawl/spiders/football_match.py
+++ b/crawl/spiders/football_match.py
@@ -1,68 +0,0 @@
 
				-import scrapy
			
 
				-import time
			
 
				-from crawl.comm.football import FTMatch
			
 
				-
			
 
				-class FootballSpider(scrapy.Spider):
			
 
				-  name = "football-match"
			
 
				-
			
 
				-  def start_requests(self):
			
 
				-    # 受注比赛
			
 
				-    url = "https://www.lottery.gov.cn/football/match_list.jspx"
			
 
				-    yield scrapy.Request(url, self.parseMatch)
			
 
				-  
			
 
				-  def parseMatch(self, response):
			
 
				-    cssMain = ".xxsj table table tr"
			
 
				-    cssDetail = "td"
			
 
				-
			
 
				-    # 获取所有比赛
			
 
				-    matches = response.css(cssMain)
			
 
				-    for node in matches[1:]:  # 标题行忽略
			
 
				-      prop = node.css(cssDetail)
			
 
				-
			
 
				-      # 小于 2 个 td 为无效行
			
 
				-      if len(prop) < 2:
			
 
				-        continue
			
 
				-
			
 
				-      matchWeek = prop[0].css('::text').get()
			
 
				-      league = prop[1].css('::text').get()
			
 
				-      homeTeam = prop[2].css('.zhu::text').get()
			
 
				-      awayTeam = prop[2].css('.ke::text').get()
			
 
				-      matchTime = prop[3].css('::text').get()
			
 
				-      wdl = self.parsePassWay(prop[6].css('img'))
			
 
				-      wdls = self.parsePassWay(prop[7].css('img'))
			
 
				-      score = self.parsePassWay(prop[8].css('img'))
			
 
				-      points = self.parsePassWay(prop[9].css('img'))
			
 
				-      double = self.parsePassWay(prop[10].css('img'))
			
 
				-
			
 
				-      FTMatch(
			
 
				-        matchWeek,
			
 
				-        matchTime,
			
 
				-        league,
			
 
				-        homeTeam,
			
 
				-        awayTeam,
			
 
				-        wdl,
			
 
				-        wdls,
			
 
				-        score,
			
 
				-        points,
			
 
				-        double
			
 
				-      ).persist()
			
 
				-  
			
 
				-  def parsePassWay(self, img):
			
 
				-    # 待开售
			
 
				-    if img is None:
			
 
				-      return 'wait'
			
 
				-
			
 
				-    # 图片地址
			
 
				-    src = img.attrib['src']
			
 
				-
			
 
				-    # 开售单关方式和过关方式
			
 
				-    if "ball11.png" in src:
			
 
				-      return 'pass&single'
			
 
				-
			
 
				-    # 仅开售过关方式
			
 
				-    elif "ball1.png" in src:
			
 
				-      return 'pass'
			
 
				-
			
 
				-    # 未开售此玩法
			
 
				-    else:
			
 
				-      return 'no'
			
--- a/crawl/spiders/football_price.py
+++ b/crawl/spiders/football_price.py
@@ -1,96 +0,0 @@
 
				-import scrapy
			
 
				-import time
			
 
				-from crawl.comm.football import FTPrice, WDLOdds, WDLSpreadOdds, ScoreResult, PointsResult, DoubleResult
			
 
				-
			
 
				-class FootballSpider(scrapy.Spider):
			
 
				-  name = "football-price"
			
 
				-
			
 
				-  def start_requests(self):
			
 
				-    # 赔率
			
 
				-    url = "https://www.lottery.gov.cn/football/counter.jspx"
			
 
				-    yield scrapy.Request(url, self.parsePrice)
			
 
				-  
			
 
				-  def parsePrice(self, response):
			
 
				-    cssMain = "#content .article .articleCon .section"
			
 
				-    cssDetail = ".saishi"
			
 
				-    cssOther = ".saishiCon"
			
 
				-
			
 
				-    # 获取所有比赛
			
 
				-    matches = response.css(cssMain)
			
 
				-    for node in matches:
			
 
				-
			
 
				-      # 比赛ID
			
 
				-      matchId = node.attrib['match_id']
			
 
				-      matchTime = node.attrib['match_time']
			
 
				-
			
 
				-      # 其他相关属性
			
 
				-      details = node.css(cssDetail).css("td")
			
 
				-      matchWeek = details[0].css('::text').get()
			
 
				-      league = details[1].css('::text').get()
			
 
				-      homeTeam = details[3].css('::text').get()
			
 
				-      awayTeam = details[4].css('::text').get()
			
 
				-
			
 
				-      match = FTPrice(
			
 
				-        matchId,
			
 
				-        matchWeek,
			
 
				-        matchTime,
			
 
				-        league,
			
 
				-        homeTeam,
			
 
				-        awayTeam,
			
 
				-        WDLOdds(
			
 
				-          details[5].css('::text').get(),
			
 
				-          details[6].css('::text').get(),
			
 
				-          details[7].css('::text').get()
			
 
				-        ),
			
 
				-        WDLSpreadOdds(
			
 
				-          details[9].css('::text').get(),
			
 
				-          details[10].css('::text').get(),
			
 
				-          details[11].css('::text').get(),
			
 
				-          details[8].css('::text').get()
			
 
				-        )
			
 
				-      )
			
 
				-
			
 
				-      # 比分, 总分, 半全场 是另外的 table
			
 
				-      otherOdds = node.css(cssOther)
			
 
				-      for index, otherNode in enumerate(otherOdds):
			
 
				-
			
 
				-        # 获取比分的赔率
			
 
				-        if index == 0:
			
 
				-          scoreResult = ScoreResult()
			
 
				-          datas = []
			
 
				-          lst = otherNode.css('td')
			
 
				-
			
 
				-          # table 的 3 个 tr
			
 
				-          for it in lst[1:14] + lst[15:20] + lst[21:34]:
			
 
				-            datas += [it.css('strong::text').get()]
			
 
				-
			
 
				-          scoreResult.datas(datas)
			
 
				-          match.scoreResult = scoreResult
			
 
				-
			
 
				-        # 获取总分的赔率
			
 
				-        elif index == 1:
			
 
				-          pointsResult = PointsResult()
			
 
				-          datas = []
			
 
				-          lst = otherNode.css('td')
			
 
				-
			
 
				-          for it in lst[2:10]:
			
 
				-            datas += [it.css('strong::text').get()]
			
 
				-
			
 
				-          pointsResult.datas(datas)
			
 
				-          match.pointsResult = pointsResult
			
 
				-
			
 
				-        # 获取半全场赔率
			
 
				-        else:
			
 
				-          doubleResult = DoubleResult()
			
 
				-          datas = []
			
 
				-          lst = otherNode.css('td')
			
 
				-
			
 
				-          for it in lst[2:11]:
			
 
				-            datas += [it.css('strong::text').get()]
			
 
				-
			
 
				-          doubleResult.datas(datas)
			
 
				-          match.doubleResult = doubleResult
			
 
				-
			
 
				-      # 入库
			
 
				-      match.persist()
			
 
				-  
			
--- a/crawl/spiders/football_result.py
+++ b/crawl/spiders/football_result.py
@@ -6,6 +6,11 @@ from crawl.spiders.util import getNoneStr
 
				 
			
 
				 class FootballSpider(scrapy.Spider):
			
 
				   name = "football-result"
			
 
				+  custom_settings = {
			
 
				+    'LOG_LEVEL': logging.INFO,
			
 
				+    'LOG_FILE': 'logs/football-result.log'
			
 
				+  }
			
 
				+
			
 
				 
			
 
				   # 所有比赛
			
 
				   _matchesMap = {}
			
--- a/crawl/spiders/lottery.py
+++ b/crawl/spiders/lottery.py
@@ -1,10 +1,15 @@
 
				 import scrapy

			
 
				+import logging

			
 
				 

			
 
				 from crawl.comm.lottery import LotteryResult, LotteryResultDetail

			
 
				 

			
 
				 

			
 
				 class LotterySpider(scrapy.Spider):

			
 
				   name = "lottery"

			
 
				+  custom_settings = {

			
 
				+    'LOG_LEVEL': logging.INFO,

			
 
				+    'LOG_FILE': 'logs/lottery.log'

			
 
				+  }

			
 
				 

			
 
				   def start_requests(self):

			
 
				     # 大乐透

			
--- a/football_job.sh
+++ b/football_job.sh
@@ -7,8 +7,6 @@ source /etc/profile
 
				 cd /opt/niucai/crawl
			
 
				 
			
 
				 # 赛事
			
 
				-scrapy crawl football >> logs/football.log
			
 
				-# 赔率
			
 
				-# scrapy crawl football-price >> logs/football-price.log
			
 
				+/usr/local/bin/scrapy crawl football
			
 
				 # 结果
			
 
				-scrapy crawl football-result >> logs/football-result.log
			
 
				+/usr/local/bin/scrapy crawl football-result
			
--- a/lottery_job.sh
+++ b/lottery_job.sh
@@ -6,4 +6,4 @@ source /etc/profile
 
				 
			
 
				 cd /opt/niucai/crawl
			
 
				 
			
 
				-scrapy crawl lottery >> logs/lottery.log
			
 
				+/usr/local/bin/scrapy crawl lottery