4 年前 · 27119280ea
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,15 @@
 
				+{
			
 
				+  // Use IntelliSense to learn about possible attributes.
			
 
				+  // Hover to view descriptions of existing attributes.
			
 
				+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
			
 
				+  "version": "0.2.0",
			
 
				+  "configurations": [
			
 
				+    {
			
 
				+      "name": "Python: Current File",
			
 
				+      "type": "python",
			
 
				+      "request": "launch",
			
 
				+      "program": "${workspaceRoot}/crawl/run.py",
			
 
				+      "console": "integratedTerminal"
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
--- a/crawl/__init__.py
+++ b/crawl/__init__.py
--- a/crawl/__pycache__/__init__.cpython-38.pyc
+++ b/crawl/__pycache__/__init__.cpython-38.pyc
--- a/crawl/__pycache__/settings.cpython-38.pyc
+++ b/crawl/__pycache__/settings.cpython-38.pyc
--- a/crawl/comm/__init__.py
+++ b/crawl/comm/__init__.py
--- a/crawl/comm/__pycache__/__init__.cpython-38.pyc
+++ b/crawl/comm/__pycache__/__init__.cpython-38.pyc
--- a/crawl/comm/__pycache__/football.cpython-38.pyc
+++ b/crawl/comm/__pycache__/football.cpython-38.pyc
--- a/crawl/comm/__pycache__/football_match.cpython-38.pyc
+++ b/crawl/comm/__pycache__/football_match.cpython-38.pyc
--- a/crawl/comm/__pycache__/mydb.cpython-38.pyc
+++ b/crawl/comm/__pycache__/mydb.cpython-38.pyc
--- a/crawl/comm/football.py
+++ b/crawl/comm/football.py
@@ -0,0 +1,452 @@
 
				+from .mydb import MyDB
			
 
				+
			
 
				+class FTMatch:
			
 
				+  def __init__(self,
			
 
				+    # 比赛周编码
			
 
				+    matchWeek,
			
 
				+
			
 
				+    # 比赛时间
			
 
				+    matchTime,
			
 
				+
			
 
				+    # 联赛
			
 
				+    league,
			
 
				+
			
 
				+    # 主队
			
 
				+    homeTeam,
			
 
				+
			
 
				+    # 客队
			
 
				+    awayTeam,
			
 
				+
			
 
				+    # 胜平负
			
 
				+    wdl,
			
 
				+
			
 
				+    # 胜平负(让)
			
 
				+    wdls,
			
 
				+
			
 
				+    # 比分
			
 
				+    score,
			
 
				+
			
 
				+    # 总进球
			
 
				+    points,
			
 
				+
			
 
				+    # 半全场
			
 
				+    double
			
 
				+  ):
			
 
				+    self.matchWeek = matchWeek
			
 
				+    self.matchTime = matchTime
			
 
				+    self.league = league
			
 
				+    self.homeTeam = homeTeam
			
 
				+    self.awayTeam = awayTeam
			
 
				+    self.wdl = wdl
			
 
				+    self.wdls = wdls
			
 
				+    self.score = score
			
 
				+    self.points = points
			
 
				+    self.double = double
			
 
				+
			
 
				+  def toString(self):
			
 
				+    return ";".join((
			
 
				+      self.matchWeek,
			
 
				+      self.matchTime,
			
 
				+      self.league,
			
 
				+      self.homeTeam,
			
 
				+      self.awayTeam,
			
 
				+      self.wdl,
			
 
				+      self.wdls,
			
 
				+      self.score,
			
 
				+      self.points,
			
 
				+      self.double
			
 
				+    ))
			
 
				+  
			
 
				+  def persist(self):
			
 
				+    cursor = MyDB.getCursor()
			
 
				+    if cursor is None:
			
 
				+      return
			
 
				+    
			
 
				+    sql = "insert into ta_crawl_football(data_type, content) values('match', %s)"
			
 
				+    cursor.execute(sql, self.toString())
			
 
				+    MyDB.commit()
			
 
				+    cursor.close()
			
 
				+
			
 
				+# 赔率
			
 
				+class FTPrice:
			
 
				+  def __init__(self,
			
 
				+    # 比赛ID
			
 
				+    matchId,
			
 
				+
			
 
				+    # 比赛周编码
			
 
				+    matchWeek,
			
 
				+
			
 
				+    # 比赛时间
			
 
				+    matchTime,
			
 
				+
			
 
				+    # 联赛
			
 
				+    league,
			
 
				+
			
 
				+    # 主队
			
 
				+    homeTeam,
			
 
				+
			
 
				+    # 客队
			
 
				+    awayTeam,
			
 
				+
			
 
				+    # 胜平负
			
 
				+    wdlOdds = None,
			
 
				+
			
 
				+    # 胜平负(让) - [w, d, l, s]
			
 
				+    wdlsOdds = None,
			
 
				+
			
 
				+    # 比分 - [w(13) + d(4) + l(13)]
			
 
				+    scoreResult = None,
			
 
				+
			
 
				+    # 进球数 - [0, 1, 2, 3, 4, 5, 6, >6]
			
 
				+    pointsResult = None,
			
 
				+
			
 
				+    # 半全场 - [w/w, w/d, w/l, d/w, d/d, d/l, l/w, l/d, l/l]
			
 
				+    doubleResult = None
			
 
				+  ):
			
 
				+    self.matchId = matchId
			
 
				+    self.matchWeek = matchWeek
			
 
				+    self.matchTime = matchTime
			
 
				+    self.league = league
			
 
				+    self.homeTeam = homeTeam
			
 
				+    self.awayTeam = awayTeam
			
 
				+    self.wdlOdds = wdlOdds
			
 
				+    self.wdlsOdds = wdlsOdds
			
 
				+    self.scoreResult = scoreResult
			
 
				+    self.pointsResult = pointsResult
			
 
				+    self.doubleResult = doubleResult
			
 
				+
			
 
				+  def toString(self):
			
 
				+    return ";".join((
			
 
				+      self.matchId,
			
 
				+      self.matchWeek,
			
 
				+      self.matchTime,
			
 
				+      self.league,
			
 
				+      self.homeTeam,
			
 
				+      self.awayTeam,
			
 
				+      self.wdlOdds.toString(),
			
 
				+      self.wdlsOdds.toString(),
			
 
				+      self.scoreResult.toString(),
			
 
				+      self.pointsResult.toString(),
			
 
				+      self.doubleResult.toString()
			
 
				+    ))
			
 
				+
			
 
				+  def persist(self):
			
 
				+    cursor = MyDB.getCursor()
			
 
				+    if cursor is None:
			
 
				+      return
			
 
				+    
			
 
				+    sql = "insert into ta_crawl_football(data_type, content) values('price', %s)"
			
 
				+    cursor.execute(sql, self.toString())
			
 
				+    MyDB.commit()
			
 
				+    cursor.close()
			
 
				+
			
 
				+# 胜平负
			
 
				+class WDLOdds:
			
 
				+  def __init__(self,
			
 
				+    # 胜
			
 
				+    win = None,
			
 
				+    
			
 
				+    # 平
			
 
				+    dead = None,
			
 
				+    
			
 
				+    # 负
			
 
				+    lose = None
			
 
				+  ):
			
 
				+    self.win = win
			
 
				+    self.dead = dead
			
 
				+    self.lose = lose
			
 
				+
			
 
				+  def datas(self, datas = []):
			
 
				+    self.win = datas[0]
			
 
				+    self.dead = datas[1]
			
 
				+    self.lose = datas[2]
			
 
				+
			
 
				+  def toString(self):
			
 
				+    return '|'.join((self.win, self.dead, self.lose))
			
 
				+
			
 
				+# 胜平负(让)
			
 
				+class WDLSpreadOdds:
			
 
				+  def __init__(self,
			
 
				+    # 胜
			
 
				+    win = None,
			
 
				+    
			
 
				+    # 平
			
 
				+    dead = None,
			
 
				+    
			
 
				+    # 负
			
 
				+    lose = None,
			
 
				+
			
 
				+    # 让球
			
 
				+    spread = None
			
 
				+  ):
			
 
				+    self.win = win
			
 
				+    self.dead = dead
			
 
				+    self.lose = lose
			
 
				+    self.spread = spread
			
 
				+
			
 
				+  def datas(self, datas = []):
			
 
				+    self.win = datas[0]
			
 
				+    self.dead = datas[1]
			
 
				+    self.lose = datas[2]
			
 
				+    self.spread = datas[3]
			
 
				+
			
 
				+  def toString(self):
			
 
				+    return '|'.join((self.win, self.dead, self.lose, self.spread))
			
 
				+
			
 
				+# 比分
			
 
				+class ScoreResult:
			
 
				+  def __init__(self,
			
 
				+    w10 = None, # 胜 1-0
			
 
				+    w20 = None, # 胜 2-0
			
 
				+    w21 = None, # 胜 2-1
			
 
				+    w30 = None, # 胜 3-0
			
 
				+    w31 = None, # 胜 3-1
			
 
				+    w32 = None, # 胜 3-2
			
 
				+    w40 = None, # 胜 4-0
			
 
				+    w41 = None, # 胜 4-1
			
 
				+    w42 = None, # 胜 4-2
			
 
				+    w50 = None, # 胜 5-0
			
 
				+    w51 = None, # 胜 5-1
			
 
				+    w52 = None, # 胜 5-2
			
 
				+    w99 = None, # 胜 其他
			
 
				+    d00 = None, # 平 0-0
			
 
				+    d11 = None, # 平 1-1
			
 
				+    d22 = None, # 平 2-2
			
 
				+    d33 = None, # 平 3-3
			
 
				+    d99 = None, # 平 其他
			
 
				+    l01 = None, # 负 0-1
			
 
				+    l02 = None, # 负 0-2
			
 
				+    l12 = None, # 负 1-2
			
 
				+    l03 = None, # 负 0-3
			
 
				+    l13 = None, # 负 1-3
			
 
				+    l23 = None, # 负 2-3
			
 
				+    l04 = None, # 负 0-4
			
 
				+    l14 = None, # 负 1-4
			
 
				+    l24 = None, # 负 2-4
			
 
				+    l05 = None, # 负 0-5
			
 
				+    l15 = None, # 负 1-5
			
 
				+    l25 = None, # 负 2-5
			
 
				+    l99 = None # 负 其他
			
 
				+  ):
			
 
				+    self.w10 = w10
			
 
				+    self.w20 = w20
			
 
				+    self.w21 = w21
			
 
				+    self.w30 = w30
			
 
				+    self.w31 = w31
			
 
				+    self.w32 = w32
			
 
				+    self.w40 = w40
			
 
				+    self.w41 = w41
			
 
				+    self.w42 = w42
			
 
				+    self.w50 = w50
			
 
				+    self.w51 = w51
			
 
				+    self.w52 = w52
			
 
				+    self.w99 = w99
			
 
				+    self.d00 = d00
			
 
				+    self.d11 = d11
			
 
				+    self.d22 = d22
			
 
				+    self.d33 = d33
			
 
				+    self.d99 = d99
			
 
				+    self.l01 = l01
			
 
				+    self.l02 = l02
			
 
				+    self.l12 = l12
			
 
				+    self.l03 = l03
			
 
				+    self.l13 = l13
			
 
				+    self.l23 = l23
			
 
				+    self.l04 = l04
			
 
				+    self.l14 = l14
			
 
				+    self.l24 = l24
			
 
				+    self.l05 = l05
			
 
				+    self.l15 = l15
			
 
				+    self.l25 = l25
			
 
				+    self.l99 = l99
			
 
				+
			
 
				+  def datas(self, datas = []):
			
 
				+    self.w10 = datas[0]
			
 
				+    self.w20 = datas[1]
			
 
				+    self.w21 = datas[2]
			
 
				+    self.w30 = datas[3]
			
 
				+    self.w31 = datas[4]
			
 
				+    self.w32 = datas[5]
			
 
				+    self.w40 = datas[6]
			
 
				+    self.w41 = datas[7]
			
 
				+    self.w42 = datas[8]
			
 
				+    self.w50 = datas[9]
			
 
				+    self.w51 = datas[10]
			
 
				+    self.w52 = datas[11]
			
 
				+    self.w99 = datas[12]
			
 
				+    self.d00 = datas[13]
			
 
				+    self.d11 = datas[14]
			
 
				+    self.d22 = datas[15]
			
 
				+    self.d33 = datas[16]
			
 
				+    self.d99 = datas[17]
			
 
				+    self.l01 = datas[18]
			
 
				+    self.l02 = datas[19]
			
 
				+    self.l12 = datas[20]
			
 
				+    self.l03 = datas[21]
			
 
				+    self.l13 = datas[22]
			
 
				+    self.l23 = datas[23]
			
 
				+    self.l04 = datas[24]
			
 
				+    self.l14 = datas[25]
			
 
				+    self.l24 = datas[26]
			
 
				+    self.l05 = datas[27]
			
 
				+    self.l15 = datas[28]
			
 
				+    self.l25 = datas[29]
			
 
				+    self.l99 = datas[30]
			
 
				+
			
 
				+  def toString(self):
			
 
				+    return '|'.join((
			
 
				+      self.w10,
			
 
				+      self.w20,
			
 
				+      self.w21,
			
 
				+      self.w30,
			
 
				+      self.w31,
			
 
				+      self.w32,
			
 
				+      self.w40,
			
 
				+      self.w41,
			
 
				+      self.w42,
			
 
				+      self.w50,
			
 
				+      self.w51,
			
 
				+      self.w52,
			
 
				+      self.w99,
			
 
				+      self.d00,
			
 
				+      self.d11,
			
 
				+      self.d22,
			
 
				+      self.d33,
			
 
				+      self.d99,
			
 
				+      self.l01,
			
 
				+      self.l02,
			
 
				+      self.l12,
			
 
				+      self.l03,
			
 
				+      self.l13,
			
 
				+      self.l23,
			
 
				+      self.l04,
			
 
				+      self.l14,
			
 
				+      self.l24,
			
 
				+      self.l05,
			
 
				+      self.l15,
			
 
				+      self.l25,
			
 
				+      self.l99)
			
 
				+    )
			
 
				+
			
 
				+# 进球数
			
 
				+class PointsResult:
			
 
				+  def __init__(self,
			
 
				+    # 总0球
			
 
				+    p0 = None,
			
 
				+    
			
 
				+    # 总1球
			
 
				+    p1 = None,
			
 
				+    
			
 
				+    # 总2球
			
 
				+    p2 = None,
			
 
				+    
			
 
				+    # 总3球
			
 
				+    p3 = None,
			
 
				+    
			
 
				+    # 总4球
			
 
				+    p4 = None,
			
 
				+    
			
 
				+    # 总5球
			
 
				+    p5 = None,
			
 
				+    
			
 
				+    # 总6球
			
 
				+    p6 = None,
			
 
				+    
			
 
				+    # 总>6球
			
 
				+    p99 = None
			
 
				+  ):
			
 
				+    self.p0 = p0
			
 
				+    self.p1 = p1
			
 
				+    self.p2 = p2
			
 
				+    self.p3 = p3
			
 
				+    self.p4 = p4
			
 
				+    self.p5 = p5
			
 
				+    self.p6 = p6
			
 
				+    self.p99 = p99
			
 
				+
			
 
				+  def datas(self, datas = []):
			
 
				+    self.p0 = datas[0]
			
 
				+    self.p1 = datas[1]
			
 
				+    self.p2 = datas[2]
			
 
				+    self.p3 = datas[3]
			
 
				+    self.p4 = datas[4]
			
 
				+    self.p5 = datas[5]
			
 
				+    self.p6 = datas[6]
			
 
				+    self.p99 = datas[7]
			
 
				+
			
 
				+  def toString(self):
			
 
				+    return '|'.join((
			
 
				+      self.p0,
			
 
				+      self.p1,
			
 
				+      self.p2,
			
 
				+      self.p3,
			
 
				+      self.p4,
			
 
				+      self.p5,
			
 
				+      self.p6,
			
 
				+      self.p99
			
 
				+    ))
			
 
				+
			
 
				+# 半全场
			
 
				+class DoubleResult:
			
 
				+  def __init__(self,
			
 
				+    # 胜/胜
			
 
				+    ww = None,
			
 
				+    
			
 
				+    # 胜/平
			
 
				+    wd = None,
			
 
				+    
			
 
				+    # 胜/负
			
 
				+    wl = None,
			
 
				+    
			
 
				+    # 平/胜
			
 
				+    dw = None,
			
 
				+    
			
 
				+    # 平/平
			
 
				+    dd = None,
			
 
				+    
			
 
				+    # 平/负
			
 
				+    dl = None,
			
 
				+    
			
 
				+    # 负/胜
			
 
				+    lw = None,
			
 
				+    
			
 
				+    # 负/平
			
 
				+    ld = None,
			
 
				+    
			
 
				+    # 负/负
			
 
				+    ll = None
			
 
				+  ):
			
 
				+   self.ww = ww
			
 
				+   self.wd = wd
			
 
				+   self.wl = wl
			
 
				+   self.dw = dw
			
 
				+   self.dd = dd
			
 
				+   self.dl = dl
			
 
				+   self.lw = lw
			
 
				+   self.ld = ld
			
 
				+   self.ll = ll
			
 
				+
			
 
				+  def datas(self, datas = []):
			
 
				+    self.ww = datas[0]
			
 
				+    self.wd = datas[1]
			
 
				+    self.wl = datas[2]
			
 
				+    self.dw = datas[3]
			
 
				+    self.dd = datas[4]
			
 
				+    self.dl = datas[5]
			
 
				+    self.lw = datas[6]
			
 
				+    self.ld = datas[7]
			
 
				+    self.ll = datas[8]
			
 
				+
			
 
				+
			
 
				+  def toString(self):
			
 
				+    return '|'.join((
			
 
				+      self.ww,
			
 
				+      self.wd,
			
 
				+      self.wl,
			
 
				+      self.dw,
			
 
				+      self.dd,
			
 
				+      self.dl,
			
 
				+      self.lw,
			
 
				+      self.ld,
			
 
				+      self.ll
			
 
				+    ))
			
--- a/crawl/comm/mydb.py
+++ b/crawl/comm/mydb.py
@@ -0,0 +1,38 @@
 
				+import pymysql
			
 
				+import scrapy
			
 
				+
			
 
				+class MyDB:
			
 
				+  __connected = False
			
 
				+  __conn = None
			
 
				+
			
 
				+  @classmethod
			
 
				+  def from_crawler(cls, crawler):
			
 
				+    cls.connectDB(crawler.settings['DATABASE'])
			
 
				+    return cls()
			
 
				+
			
 
				+  @classmethod
			
 
				+  def connectDB(cls, dbSetting):
			
 
				+    cls.__conn = pymysql.connect(
			
 
				+      host = dbSetting['host'],
			
 
				+      port = dbSetting['port'],
			
 
				+      user = dbSetting['user'],
			
 
				+      password = dbSetting['password'],
			
 
				+      db = dbSetting['name'],
			
 
				+      charset = 'utf8'
			
 
				+    )
			
 
				+    cls.__connected = True
			
 
				+
			
 
				+  @classmethod
			
 
				+  def getCursor(cls):
			
 
				+    if cls.__connected:
			
 
				+      return cls.__conn.cursor()
			
 
				+
			
 
				+  @classmethod
			
 
				+  def commit(cls):
			
 
				+    if cls.__connected:
			
 
				+      return cls.__conn.commit()
			
 
				+
			
 
				+  def __del__(self):
			
 
				+    if self.__class__.__connected:
			
 
				+      self.__class__.__connected = False
			
 
				+      self.__class__.__conn.close()
			
--- a/crawl/items.py
+++ b/crawl/items.py
@@ -0,0 +1,12 @@
 
				+# Define here the models for your scraped items
			
 
				+#
			
 
				+# See documentation in:
			
 
				+# https://docs.scrapy.org/en/latest/topics/items.html
			
 
				+
			
 
				+import scrapy
			
 
				+
			
 
				+
			
 
				+class CrawlItem(scrapy.Item):
			
 
				+    # define the fields for your item here like:
			
 
				+    # name = scrapy.Field()
			
 
				+    pass
			
--- a/crawl/middlewares.py
+++ b/crawl/middlewares.py
@@ -0,0 +1,103 @@
 
				+# Define here the models for your spider middleware
			
 
				+#
			
 
				+# See documentation in:
			
 
				+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
			
 
				+
			
 
				+from scrapy import signals
			
 
				+
			
 
				+# useful for handling different item types with a single interface
			
 
				+from itemadapter import is_item, ItemAdapter
			
 
				+
			
 
				+
			
 
				+class CrawlSpiderMiddleware:
			
 
				+    # Not all methods need to be defined. If a method is not defined,
			
 
				+    # scrapy acts as if the spider middleware does not modify the
			
 
				+    # passed objects.
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_crawler(cls, crawler):
			
 
				+        # This method is used by Scrapy to create your spiders.
			
 
				+        s = cls()
			
 
				+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
			
 
				+        return s
			
 
				+
			
 
				+    def process_spider_input(self, response, spider):
			
 
				+        # Called for each response that goes through the spider
			
 
				+        # middleware and into the spider.
			
 
				+
			
 
				+        # Should return None or raise an exception.
			
 
				+        return None
			
 
				+
			
 
				+    def process_spider_output(self, response, result, spider):
			
 
				+        # Called with the results returned from the Spider, after
			
 
				+        # it has processed the response.
			
 
				+
			
 
				+        # Must return an iterable of Request, or item objects.
			
 
				+        for i in result:
			
 
				+            yield i
			
 
				+
			
 
				+    def process_spider_exception(self, response, exception, spider):
			
 
				+        # Called when a spider or process_spider_input() method
			
 
				+        # (from other spider middleware) raises an exception.
			
 
				+
			
 
				+        # Should return either None or an iterable of Request or item objects.
			
 
				+        pass
			
 
				+
			
 
				+    def process_start_requests(self, start_requests, spider):
			
 
				+        # Called with the start requests of the spider, and works
			
 
				+        # similarly to the process_spider_output() method, except
			
 
				+        # that it doesn’t have a response associated.
			
 
				+
			
 
				+        # Must return only requests (not items).
			
 
				+        for r in start_requests:
			
 
				+            yield r
			
 
				+
			
 
				+    def spider_opened(self, spider):
			
 
				+        spider.logger.info('Spider opened: %s' % spider.name)
			
 
				+
			
 
				+
			
 
				+class CrawlDownloaderMiddleware:
			
 
				+    # Not all methods need to be defined. If a method is not defined,
			
 
				+    # scrapy acts as if the downloader middleware does not modify the
			
 
				+    # passed objects.
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_crawler(cls, crawler):
			
 
				+        # This method is used by Scrapy to create your spiders.
			
 
				+        s = cls()
			
 
				+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
			
 
				+        return s
			
 
				+
			
 
				+    def process_request(self, request, spider):
			
 
				+        # Called for each request that goes through the downloader
			
 
				+        # middleware.
			
 
				+
			
 
				+        # Must either:
			
 
				+        # - return None: continue processing this request
			
 
				+        # - or return a Response object
			
 
				+        # - or return a Request object
			
 
				+        # - or raise IgnoreRequest: process_exception() methods of
			
 
				+        #   installed downloader middleware will be called
			
 
				+        return None
			
 
				+
			
 
				+    def process_response(self, request, response, spider):
			
 
				+        # Called with the response returned from the downloader.
			
 
				+
			
 
				+        # Must either;
			
 
				+        # - return a Response object
			
 
				+        # - return a Request object
			
 
				+        # - or raise IgnoreRequest
			
 
				+        return response
			
 
				+
			
 
				+    def process_exception(self, request, exception, spider):
			
 
				+        # Called when a download handler or a process_request()
			
 
				+        # (from other downloader middleware) raises an exception.
			
 
				+
			
 
				+        # Must either:
			
 
				+        # - return None: continue processing this exception
			
 
				+        # - return a Response object: stops process_exception() chain
			
 
				+        # - return a Request object: stops process_exception() chain
			
 
				+        pass
			
 
				+
			
 
				+    def spider_opened(self, spider):
			
 
				+        spider.logger.info('Spider opened: %s' % spider.name)
			
--- a/crawl/pipelines.py
+++ b/crawl/pipelines.py
@@ -0,0 +1,13 @@
 
				+# Define your item pipelines here
			
 
				+#
			
 
				+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
			
 
				+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
			
 
				+
			
 
				+
			
 
				+# useful for handling different item types with a single interface
			
 
				+from itemadapter import ItemAdapter
			
 
				+
			
 
				+
			
 
				+class CrawlPipeline:
			
 
				+    def process_item(self, item, spider):
			
 
				+        return item
			
--- a/crawl/run.py
+++ b/crawl/run.py
@@ -0,0 +1,10 @@
 
				+from scrapy.cmdline import execute
			
 
				+import sys
			
 
				+import os
			
 
				+# 获取当前脚本路径
			
 
				+dirpath = os.path.dirname(os.path.abspath(__file__))
			
 
				+print(dirpath)
			
 
				+# 添加环境变量
			
 
				+sys.path.append(dirpath)
			
 
				+# 启动爬虫,第三个参数为爬虫name
			
 
				+execute(['scrapy','crawl','football'])
			
--- a/crawl/settings.py
+++ b/crawl/settings.py
@@ -0,0 +1,96 @@
 
				+# Scrapy settings for crawl project
			
 
				+#
			
 
				+# For simplicity, this file contains only settings considered important or
			
 
				+# commonly used. You can find more settings consulting the documentation:
			
 
				+#
			
 
				+#     https://docs.scrapy.org/en/latest/topics/settings.html
			
 
				+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
			
 
				+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
			
 
				+
			
 
				+BOT_NAME = 'crawl'
			
 
				+
			
 
				+SPIDER_MODULES = ['crawl.spiders']
			
 
				+NEWSPIDER_MODULE = 'crawl.spiders'
			
 
				+
			
 
				+
			
 
				+# Crawl responsibly by identifying yourself (and your website) on the user-agent
			
 
				+#USER_AGENT = 'crawl (+http://www.yourdomain.com)'
			
 
				+
			
 
				+# Obey robots.txt rules
			
 
				+ROBOTSTXT_OBEY = True
			
 
				+
			
 
				+# Configure maximum concurrent requests performed by Scrapy (default: 16)
			
 
				+#CONCURRENT_REQUESTS = 32
			
 
				+
			
 
				+# Configure a delay for requests for the same website (default: 0)
			
 
				+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
			
 
				+# See also autothrottle settings and docs
			
 
				+#DOWNLOAD_DELAY = 3
			
 
				+# The download delay setting will honor only one of:
			
 
				+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
			
 
				+#CONCURRENT_REQUESTS_PER_IP = 16
			
 
				+
			
 
				+# Disable cookies (enabled by default)
			
 
				+#COOKIES_ENABLED = False
			
 
				+
			
 
				+# Disable Telnet Console (enabled by default)
			
 
				+#TELNETCONSOLE_ENABLED = False
			
 
				+
			
 
				+# Override the default request headers:
			
 
				+#DEFAULT_REQUEST_HEADERS = {
			
 
				+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
			
 
				+#   'Accept-Language': 'en',
			
 
				+#}
			
 
				+
			
 
				+# Enable or disable spider middlewares
			
 
				+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
			
 
				+#SPIDER_MIDDLEWARES = {
			
 
				+#    'crawl.middlewares.CrawlSpiderMiddleware': 543,
			
 
				+#}
			
 
				+
			
 
				+# Enable or disable downloader middlewares
			
 
				+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
			
 
				+#DOWNLOADER_MIDDLEWARES = {
			
 
				+#    'crawl.middlewares.CrawlDownloaderMiddleware': 543,
			
 
				+#}
			
 
				+
			
 
				+# Enable or disable extensions
			
 
				+# See https://docs.scrapy.org/en/latest/topics/extensions.html
			
 
				+EXTENSIONS = {
			
 
				+    'crawl.comm.mydb.MyDB': 100,
			
 
				+}
			
 
				+
			
 
				+# Configure item pipelines
			
 
				+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
			
 
				+#ITEM_PIPELINES = {
			
 
				+#    'crawl.pipelines.CrawlPipeline': 300,
			
 
				+#}
			
 
				+
			
 
				+# Enable and configure the AutoThrottle extension (disabled by default)
			
 
				+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
			
 
				+#AUTOTHROTTLE_ENABLED = True
			
 
				+# The initial download delay
			
 
				+#AUTOTHROTTLE_START_DELAY = 5
			
 
				+# The maximum download delay to be set in case of high latencies
			
 
				+#AUTOTHROTTLE_MAX_DELAY = 60
			
 
				+# The average number of requests Scrapy should be sending in parallel to
			
 
				+# each remote server
			
 
				+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
			
 
				+# Enable showing throttling stats for every response received:
			
 
				+#AUTOTHROTTLE_DEBUG = False
			
 
				+
			
 
				+# Enable and configure HTTP caching (disabled by default)
			
 
				+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
			
 
				+#HTTPCACHE_ENABLED = True
			
 
				+#HTTPCACHE_EXPIRATION_SECS = 0
			
 
				+#HTTPCACHE_DIR = 'httpcache'
			
 
				+#HTTPCACHE_IGNORE_HTTP_CODES = []
			
 
				+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
			
 
				+
			
 
				+DATABASE = {
			
 
				+  'host': 'rm-uf6z3z6jq11x653d77o.mysql.rds.aliyuncs.com',
			
 
				+  'port': 3306,
			
 
				+  'name': 'niucai',
			
 
				+  'user': 'niucai',
			
 
				+  'password': '1qaz#EDC'
			
 
				+}
			
--- a/crawl/spiders/__init__.py
+++ b/crawl/spiders/__init__.py
@@ -0,0 +1,4 @@
 
				+# This package will contain the spiders of your Scrapy project
			
 
				+#
			
 
				+# Please refer to the documentation for information on how to create and manage
			
 
				+# your spiders.
			
--- a/crawl/spiders/__pycache__/__init__.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/__init__.cpython-38.pyc
--- a/crawl/spiders/__pycache__/football_spider.cpython-38.pyc
+++ b/crawl/spiders/__pycache__/football_spider.cpython-38.pyc
--- a/crawl/spiders/football_spider.py
+++ b/crawl/spiders/football_spider.py
@@ -0,0 +1,159 @@
 
				+import scrapy
			
 
				+from crawl.comm.football import *
			
 
				+
			
 
				+class FootballSpider(scrapy.Spider):
			
 
				+  name = "football"
			
 
				+
			
 
				+  def start_requests(self):
			
 
				+    # 受注比赛
			
 
				+    url = "https://www.lottery.gov.cn/football/match_list.jspx"
			
 
				+    yield scrapy.Request(url, self.parseMatch)
			
 
				+
			
 
				+    # 赔率
			
 
				+    url = "https://www.lottery.gov.cn/football/counter.jspx"
			
 
				+    yield scrapy.Request(url, self.parseCurrent)
			
 
				+
			
 
				+    # 开奖
			
 
				+
			
 
				+  
			
 
				+  def parseMatch(self, response):
			
 
				+    cssMain = ".xxsj table table tr"
			
 
				+    cssDetail = "td"
			
 
				+
			
 
				+    # 获取所有比赛
			
 
				+    matches = response.css(cssMain)
			
 
				+    for node in matches[1:]:  # 标题行忽略
			
 
				+      prop = node.css(cssDetail)
			
 
				+
			
 
				+      # 小于 2 个 td 为无效行
			
 
				+      if len(prop) < 2:
			
 
				+        continue
			
 
				+
			
 
				+      matchWeek = prop[0].css('::text').get()
			
 
				+      league = prop[1].css('::text').get()
			
 
				+      homeTeam = prop[2].css('.zhu::text').get()
			
 
				+      awayTeam = prop[2].css('.ke::text').get()
			
 
				+      matchTime = prop[3].css('::text').get()
			
 
				+      wdl = self.parsePassWay(prop[6].css('img'))
			
 
				+      wdls = self.parsePassWay(prop[7].css('img'))
			
 
				+      score = self.parsePassWay(prop[8].css('img'))
			
 
				+      points = self.parsePassWay(prop[9].css('img'))
			
 
				+      double = self.parsePassWay(prop[10].css('img'))
			
 
				+
			
 
				+      FTMatch(
			
 
				+        matchWeek,
			
 
				+        matchTime,
			
 
				+        league,
			
 
				+        homeTeam,
			
 
				+        awayTeam,
			
 
				+        wdl,
			
 
				+        wdls,
			
 
				+        score,
			
 
				+        points,
			
 
				+        double
			
 
				+      ).persist()
			
 
				+  
			
 
				+  def parsePassWay(self, img):
			
 
				+    # 待开售
			
 
				+    if img is None:
			
 
				+      return 'wait'
			
 
				+
			
 
				+    # 图片地址
			
 
				+    src = img.attrib['src']
			
 
				+
			
 
				+    # 开售单关方式和过关方式
			
 
				+    if "ball11.png" in src:
			
 
				+      return 'pass&single'
			
 
				+
			
 
				+    # 仅开售过关方式
			
 
				+    elif "ball1.png" in src:
			
 
				+      return 'pass'
			
 
				+
			
 
				+    # 未开售此玩法
			
 
				+    else:
			
 
				+      return 'no'
			
 
				+
			
 
				+
			
 
				+  def parseCurrent(self, response):
			
 
				+    cssMain = "#content .article .articleCon .section"
			
 
				+    cssDetail = ".saishi"
			
 
				+    cssOther = ".saishiCon"
			
 
				+
			
 
				+    # 获取所有比赛
			
 
				+    matches = response.css(cssMain)
			
 
				+    for node in matches:
			
 
				+
			
 
				+      # 比赛ID
			
 
				+      matchId = node.attrib['match_id']
			
 
				+      matchTime = node.attrib['match_time']
			
 
				+
			
 
				+      # 其他相关属性
			
 
				+      details = node.css(cssDetail).css("td")
			
 
				+      matchWeek = details[0].css('::text').get()
			
 
				+      league = details[1].css('::text').get()
			
 
				+      homeTeam = details[3].css('::text').get()
			
 
				+      awayTeam = details[4].css('::text').get()
			
 
				+
			
 
				+      match = FTPrice(
			
 
				+        matchId,
			
 
				+        matchWeek,
			
 
				+        matchTime,
			
 
				+        league,
			
 
				+        homeTeam,
			
 
				+        awayTeam,
			
 
				+        WDLOdds(
			
 
				+          details[5].css('::text').get(),
			
 
				+          details[6].css('::text').get(),
			
 
				+          details[7].css('::text').get()
			
 
				+        ),
			
 
				+        WDLSpreadOdds(
			
 
				+          details[9].css('::text').get(),
			
 
				+          details[10].css('::text').get(),
			
 
				+          details[11].css('::text').get(),
			
 
				+          details[8].css('::text').get()
			
 
				+        )
			
 
				+      )
			
 
				+
			
 
				+      # 比分, 总分, 半全场 是另外的 table
			
 
				+      otherOdds = node.css(cssOther)
			
 
				+      for index, otherNode in enumerate(otherOdds):
			
 
				+
			
 
				+        # 获取比分的赔率
			
 
				+        if index == 0:
			
 
				+          scoreResult = ScoreResult()
			
 
				+          datas = []
			
 
				+          lst = otherNode.css('td')
			
 
				+
			
 
				+          # table 的 3 个 tr
			
 
				+          for it in lst[1:14] + lst[15:20] + lst[21:34]:
			
 
				+            datas += [it.css('strong::text').get()]
			
 
				+
			
 
				+          scoreResult.datas(datas)
			
 
				+          match.scoreResult = scoreResult
			
 
				+
			
 
				+        # 获取总分的赔率
			
 
				+        elif index == 1:
			
 
				+          pointsResult = PointsResult()
			
 
				+          datas = []
			
 
				+          lst = otherNode.css('td')
			
 
				+
			
 
				+          for it in lst[2:10]:
			
 
				+            datas += [it.css('strong::text').get()]
			
 
				+
			
 
				+          pointsResult.datas(datas)
			
 
				+          match.pointsResult = pointsResult
			
 
				+
			
 
				+        # 获取半全场赔率
			
 
				+        else:
			
 
				+          doubleResult = DoubleResult()
			
 
				+          datas = []
			
 
				+          lst = otherNode.css('td')
			
 
				+
			
 
				+          for it in lst[2:11]:
			
 
				+            datas += [it.css('strong::text').get()]
			
 
				+
			
 
				+          doubleResult.datas(datas)
			
 
				+          match.doubleResult = doubleResult
			
 
				+
			
 
				+      # 入库
			
 
				+      match.persist()
			
--- a/scrapy.cfg
+++ b/scrapy.cfg
@@ -0,0 +1,11 @@
 
				+# Automatically created by: scrapy startproject
			
 
				+#
			
 
				+# For more information about the [deploy] section see:
			
 
				+# https://scrapyd.readthedocs.io/en/latest/deploy.html
			
 
				+
			
 
				+[settings]
			
 
				+default = crawl.settings
			
 
				+
			
 
				+[deploy]
			
 
				+#url = http://localhost:6800/
			
 
				+project = crawl