4 年前 · 20ac2ee2bc
--- a/crawl/comm/__pycache__/football.cpython-38.pyc
+++ b/crawl/comm/__pycache__/football.cpython-38.pyc
--- a/crawl/comm/football.py
+++ b/crawl/comm/football.py
@@ -1,5 +1,6 @@
 
				 from .mydb import MyDB
			
 
				 
			
 
				+# 比赛
			
 
				 class FTMatch:
			
 
				   def __init__(self,
			
 
				     # 比赛周编码
			
@@ -437,7 +438,6 @@ class DoubleResult:
 
				     self.ld = datas[7]
			
 
				     self.ll = datas[8]
			
 
				 
			
 
				-
			
 
				   def toString(self):
			
 
				     return '|'.join((
			
 
				       self.ww,
			
@@ -449,4 +449,68 @@ class DoubleResult:
 
				       self.lw,
			
 
				       self.ld,
			
 
				       self.ll
			
 
				-    ))
			
 
				+    ))
			
 
				+
			
 
				+# 结果
			
 
				+class FTResult:
			
 
				+  def __init__(self,
			
 
				+    # 比赛时间
			
 
				+    matchTime,
			
 
				+
			
 
				+    # 周次
			
 
				+    matchWeek,
			
 
				+
			
 
				+    # 联赛
			
 
				+    league,
			
 
				+
			
 
				+    # 主队
			
 
				+    homeTeam,
			
 
				+
			
 
				+    # 客队
			
 
				+    awayTeam,
			
 
				+
			
 
				+    # 是否单固场次
			
 
				+    single,
			
 
				+
			
 
				+    # 半场比分
			
 
				+    half,
			
 
				+
			
 
				+    # 全程比分
			
 
				+    whole,
			
 
				+
			
 
				+    # 状态
			
 
				+    status  
			
 
				+  ):
			
 
				+    self.matchTime = matchTime
			
 
				+    self.matchWeek = matchWeek
			
 
				+    self.league = league
			
 
				+    self.homeTeam = homeTeam
			
 
				+    self.awayTeam = awayTeam
			
 
				+    self.single = single
			
 
				+    self.half = half
			
 
				+    self.whole = whole
			
 
				+    self.status = status
			
 
				+
			
 
				+  def toString(self):
			
 
				+    return ';'.join((
			
 
				+      self.matchTime,
			
 
				+      self.matchWeek,
			
 
				+      self.league,
			
 
				+      self.homeTeam,
			
 
				+      self.awayTeam,
			
 
				+      self.single,
			
 
				+      self.half,
			
 
				+      self.whole,
			
 
				+      self.status
			
 
				+    ))
			
 
				+
			
 
				+  def persist(self):
			
 
				+    cursor = MyDB.getCursor()
			
 
				+    if cursor is None:
			
 
				+      return
			
 
				+    
			
 
				+    sql = "insert into ta_crawl_football(data_type, content) values('result', %s)"
			
 
				+    cursor.execute(sql, self.toString())
			
 
				+    MyDB.commit()
			
 
				+    cursor.close()
			
 
				+    
			
--- a/crawl/run.py
+++ b/crawl/run.py
@@ -7,4 +7,4 @@ print(dirpath)
 
				 # 添加环境变量
			
 
				 sys.path.append(dirpath)
			
 
				 # 启动爬虫,第三个参数为爬虫name
			
 
				-execute(['scrapy','crawl','football'])
			
 
				+execute(['scrapy','crawl','football-price'])
			
--- a/crawl/spiders/__pycache__/football_match.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_match.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football_price.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_price.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football_result.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_result.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football_spider.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_spider.cpython-38.pyc
--- a/crawl/spiders/football_match.py
+++ b/crawl/spiders/football_match.py
@@ -0,0 +1,68 @@
 
				+import scrapy
			
 
				+import time
			
 
				+from crawl.comm.football import FTMatch
			
 
				+
			
 
				+class FootballSpider(scrapy.Spider):
			
 
				+  name = "football-match"
			
 
				+
			
 
				+  def start_requests(self):
			
 
				+    # 受注比赛
			
 
				+    url = "https://www.lottery.gov.cn/football/match_list.jspx"
			
 
				+    yield scrapy.Request(url, self.parseMatch)
			
 
				+  
			
 
				+  def parseMatch(self, response):
			
 
				+    cssMain = ".xxsj table table tr"
			
 
				+    cssDetail = "td"
			
 
				+
			
 
				+    # 获取所有比赛
			
 
				+    matches = response.css(cssMain)
			
 
				+    for node in matches[1:]:  # 标题行忽略
			
 
				+      prop = node.css(cssDetail)
			
 
				+
			
 
				+      # 小于 2 个 td 为无效行
			
 
				+      if len(prop) < 2:
			
 
				+        continue
			
 
				+
			
 
				+      matchWeek = prop[0].css('::text').get()
			
 
				+      league = prop[1].css('::text').get()
			
 
				+      homeTeam = prop[2].css('.zhu::text').get()
			
 
				+      awayTeam = prop[2].css('.ke::text').get()
			
 
				+      matchTime = prop[3].css('::text').get()
			
 
				+      wdl = self.parsePassWay(prop[6].css('img'))
			
 
				+      wdls = self.parsePassWay(prop[7].css('img'))
			
 
				+      score = self.parsePassWay(prop[8].css('img'))
			
 
				+      points = self.parsePassWay(prop[9].css('img'))
			
 
				+      double = self.parsePassWay(prop[10].css('img'))
			
 
				+
			
 
				+      FTMatch(
			
 
				+        matchWeek,
			
 
				+        matchTime,
			
 
				+        league,
			
 
				+        homeTeam,
			
 
				+        awayTeam,
			
 
				+        wdl,
			
 
				+        wdls,
			
 
				+        score,
			
 
				+        points,
			
 
				+        double
			
 
				+      ).persist()
			
 
				+  
			
 
				+  def parsePassWay(self, img):
			
 
				+    # 待开售
			
 
				+    if img is None:
			
 
				+      return 'wait'
			
 
				+
			
 
				+    # 图片地址
			
 
				+    src = img.attrib['src']
			
 
				+
			
 
				+    # 开售单关方式和过关方式
			
 
				+    if "ball11.png" in src:
			
 
				+      return 'pass&single'
			
 
				+
			
 
				+    # 仅开售过关方式
			
 
				+    elif "ball1.png" in src:
			
 
				+      return 'pass'
			
 
				+
			
 
				+    # 未开售此玩法
			
 
				+    else:
			
 
				+      return 'no'
			
--- a/crawl/spiders/football_spider.py
+++ b/crawl/spiders/football_spider.py
@@ -1,80 +1,16 @@
 
				 import scrapy
			
 
				-from crawl.comm.football import *
			
 
				+import time
			
 
				+from crawl.comm.football import FTPrice, WDLOdds, WDLSpreadOdds, ScoreResult, PointsResult, DoubleResult
			
 
				 
			
 
				 class FootballSpider(scrapy.Spider):
			
 
				-  name = "football"
			
 
				+  name = "football-price"
			
 
				 
			
 
				   def start_requests(self):
			
 
				-    # 受注比赛
			
 
				-    url = "https://www.lottery.gov.cn/football/match_list.jspx"
			
 
				-    yield scrapy.Request(url, self.parseMatch)
			
 
				-
			
 
				     # 赔率
			
 
				     url = "https://www.lottery.gov.cn/football/counter.jspx"
			
 
				-    yield scrapy.Request(url, self.parseCurrent)
			
 
				-
			
 
				-    # 开奖
			
 
				-
			
 
				-  
			
 
				-  def parseMatch(self, response):
			
 
				-    cssMain = ".xxsj table table tr"
			
 
				-    cssDetail = "td"
			
 
				-
			
 
				-    # 获取所有比赛
			
 
				-    matches = response.css(cssMain)
			
 
				-    for node in matches[1:]:  # 标题行忽略
			
 
				-      prop = node.css(cssDetail)
			
 
				-
			
 
				-      # 小于 2 个 td 为无效行
			
 
				-      if len(prop) < 2:
			
 
				-        continue
			
 
				-
			
 
				-      matchWeek = prop[0].css('::text').get()
			
 
				-      league = prop[1].css('::text').get()
			
 
				-      homeTeam = prop[2].css('.zhu::text').get()
			
 
				-      awayTeam = prop[2].css('.ke::text').get()
			
 
				-      matchTime = prop[3].css('::text').get()
			
 
				-      wdl = self.parsePassWay(prop[6].css('img'))
			
 
				-      wdls = self.parsePassWay(prop[7].css('img'))
			
 
				-      score = self.parsePassWay(prop[8].css('img'))
			
 
				-      points = self.parsePassWay(prop[9].css('img'))
			
 
				-      double = self.parsePassWay(prop[10].css('img'))
			
 
				-
			
 
				-      FTMatch(
			
 
				-        matchWeek,
			
 
				-        matchTime,
			
 
				-        league,
			
 
				-        homeTeam,
			
 
				-        awayTeam,
			
 
				-        wdl,
			
 
				-        wdls,
			
 
				-        score,
			
 
				-        points,
			
 
				-        double
			
 
				-      ).persist()
			
 
				+    yield scrapy.Request(url, self.parsePrice)
			
 
				   
			
 
				-  def parsePassWay(self, img):
			
 
				-    # 待开售
			
 
				-    if img is None:
			
 
				-      return 'wait'
			
 
				-
			
 
				-    # 图片地址
			
 
				-    src = img.attrib['src']
			
 
				-
			
 
				-    # 开售单关方式和过关方式
			
 
				-    if "ball11.png" in src:
			
 
				-      return 'pass&single'
			
 
				-
			
 
				-    # 仅开售过关方式
			
 
				-    elif "ball1.png" in src:
			
 
				-      return 'pass'
			
 
				-
			
 
				-    # 未开售此玩法
			
 
				-    else:
			
 
				-      return 'no'
			
 
				-
			
 
				-
			
 
				-  def parseCurrent(self, response):
			
 
				+  def parsePrice(self, response):
			
 
				     cssMain = "#content .article .articleCon .section"
			
 
				     cssDetail = ".saishi"
			
 
				     cssOther = ".saishiCon"
			
@@ -157,3 +93,4 @@ class FootballSpider(scrapy.Spider):
 
				 
			
 
				       # 入库
			
 
				       match.persist()
			
 
				+  
			
--- a/crawl/spiders/football_result.py
+++ b/crawl/spiders/football_result.py
@@ -0,0 +1,55 @@
 
				+import scrapy
			
 
				+import time
			
 
				+from crawl.comm.football import FTResult
			
 
				+
			
 
				+class FootballSpider(scrapy.Spider):
			
 
				+  name = "football-result"
			
 
				+
			
 
				+  def start_requests(self):
			
 
				+    # 开奖
			
 
				+    today = time.strftime("%Y-%m-%d")
			
 
				+    url = "https://www.lottery.gov.cn/football/result_99.jspx?startDate="+today+"&endDate="+today+"&f_league_id=0&f_league_name=%E5%85%A8%E9%83%A8%E8%81%94%E8%B5%9B&single=off"
			
 
				+    yield scrapy.Request(url, self.parseResult)
			
 
				+
			
 
				+  def parseResult(self, response):
			
 
				+    cssMain = ".xxsj table table tr"
			
 
				+    cssDetail = "td"
			
 
				+
			
 
				+    # 获取所有比赛
			
 
				+    matches = response.css(cssMain)
			
 
				+    for node in matches[1:-3]:  # 标题行忽略以及末尾三行
			
 
				+      prop = node.css(cssDetail)
			
 
				+
			
 
				+      matchTime = prop[0].css('::text').get()
			
 
				+      matchWeek = prop[1].css('::text').get()
			
 
				+      league = prop[2].css('::text').get()
			
 
				+      homeTeam = prop[3].css('.zhu::text').get()
			
 
				+      awayTeam = prop[3].css('.ke::text').get()
			
 
				+      single = self.isSingle(prop[3].attrib.get('class'))
			
 
				+      tmp = prop[4].css('::text').get()
			
 
				+      half = tmp.strip() if tmp is not None else ""
			
 
				+      tmp = prop[5].css('::text').get()
			
 
				+      whole = tmp.strip() if tmp is not None else ""
			
 
				+      tmp = prop[6].css('::text').get()
			
 
				+      status = tmp.strip() if tmp is not None else ""
			
 
				+
			
 
				+      FTResult(
			
 
				+        matchTime,
			
 
				+        matchWeek,
			
 
				+        league,
			
 
				+        homeTeam,
			
 
				+        awayTeam,
			
 
				+        single,
			
 
				+        half,
			
 
				+        whole,
			
 
				+        status
			
 
				+      ).persist()
			
 
				+
			
 
				+  def isSingle(self, eleCls):
			
 
				+    if eleCls is None:
			
 
				+      return '0'
			
 
				+    
			
 
				+    if 'dan' in eleCls:
			
 
				+      return '1'
			
 
				+    else:
			
 
				+      return '0'