张延森 4 лет назад
Родитель
Сommit
4d4cd4bdff

+ 2
- 4
basketball_job.sh Просмотреть файл

@@ -7,8 +7,6 @@ source /etc/profile
7 7
 cd /opt/niucai/crawl
8 8
 
9 9
 # 赛事
10
-scrapy crawl basketball >> logs/basketball.log
11
-# 赔率
12
-# scrapy crawl basketball-price >> logs/basketball-price.log
10
+/usr/local/bin/scrapy crawl basketball
13 11
 # 结果
14
-scrapy crawl basketball-result >> logs/basketball-result.log
12
+/usr/local/bin/scrapy crawl basketball-result

+ 9
- 0
crawl/settings.py Просмотреть файл

@@ -94,3 +94,12 @@ DATABASE = {
94 94
   'user': 'niucai',
95 95
   'password': '1qaz#EDC'
96 96
 }
97
+
98
+# CRITICAL 50
99
+# ERROR 40
100
+# WARNING 30
101
+# INFO 20
102
+# DEBUG 10
103
+# NOTSET 0 
104
+LOG_LEVEL = 10
105
+LOG_FILE = 'logs/main.log'

+ 4
- 0
crawl/spiders/basketball.py Просмотреть файл

@@ -7,6 +7,10 @@ from crawl.spiders.util import getNoneStr
7 7
 
8 8
 class BasketballSpider(scrapy.Spider):
9 9
   name = 'basketball'
10
+  custom_settings = {
11
+    'LOG_LEVEL': logging.INFO,
12
+    'LOG_FILE': 'logs/basketball-match.log'
13
+  }
10 14
 
11 15
   # 所有比赛
12 16
   _matchesMap = {}

+ 0
- 69
crawl/spiders/basketball_match.py Просмотреть файл

@@ -1,69 +0,0 @@
1
-import scrapy
2
-import time
3
-from crawl.comm.basketball import BSTMatch
4
-
5
-class BasketballSpider(scrapy.Spider):
6
-  name = "basketball-match"
7
-
8
-  def start_requests(self):
9
-    # 受注比赛
10
-    url = "https://www.lottery.gov.cn/basketball/match_list.jspx"
11
-    yield scrapy.Request(url, self.parseMatch)
12
-  
13
-  def parseMatch(self, response):
14
-    cssMain = ".xxsj table tr"
15
-    cssDetail = "td"
16
-
17
-    # 获取所有比赛
18
-    matches = response.css(cssMain)
19
-    for node in matches[1:]:  # 标题行忽略
20
-      prop = node.css(cssDetail)
21
-
22
-      # 小于 2 个 td 为无效行
23
-      if len(prop) < 2:
24
-        continue
25
-
26
-      matchWeek = prop[0].css('::text').get()
27
-      league = prop[1].css('::text').get()
28
-      team = prop[2].css('::text').get().split('VS')
29
-      homeTeam = team[1].strip()
30
-      awayTeam = team[0].strip()
31
-      matchTime = prop[3].css('::text').get()
32
-      status = prop[5].css('::text').get()
33
-      wl = self.parsePassWay(prop[6].css('img'))
34
-      wls = self.parsePassWay(prop[7].css('img'))
35
-      score = self.parsePassWay(prop[8].css('img'))
36
-      points = self.parsePassWay(prop[9].css('img'))
37
-
38
-      BSTMatch(
39
-        matchWeek,
40
-        matchTime,
41
-        league,
42
-        homeTeam,
43
-        awayTeam,
44
-        status,
45
-        wl,
46
-        wls,
47
-        score,
48
-        points
49
-      ).persist()
50
-  
51
-  def parsePassWay(self, img):
52
-    # 待开售
53
-    if img is None:
54
-      return 'wait'
55
-
56
-    # 图片地址
57
-    src = img.attrib['src']
58
-
59
-    # 开售单关方式和过关方式
60
-    if "ball2_11.png" in src:
61
-      return 'pass&single'
62
-
63
-    # 仅开售过关方式
64
-    elif "ball2_1.png" in src:
65
-      return 'pass'
66
-
67
-    # 未开售此玩法
68
-    else:
69
-      return 'no'

+ 0
- 72
crawl/spiders/basketball_price.py Просмотреть файл

@@ -1,72 +0,0 @@
1
-import scrapy
2
-import time
3
-from crawl.comm.basketball import BSTPrice, WLOdds, WLSpreadOdds, ScoreResult, PointsResult
4
-
5
-class BasketballSpider(scrapy.Spider):
6
-  name = "basketball-price"
7
-
8
-  def start_requests(self):
9
-    # 赔率
10
-    url = "https://www.lottery.gov.cn/basketball/counter.jspx"
11
-    yield scrapy.Request(url, self.parsePrice)
12
-  
13
-  def parsePrice(self, response):
14
-    cssMain = "#content .article .articleCon .section"
15
-    cssDetail = ".saishi"
16
-    cssOther = ".saishiCon table td"
17
-
18
-    # 获取所有比赛
19
-    matches = response.css(cssMain)
20
-    for node in matches:
21
-
22
-      # 比赛ID
23
-      matchId = node.attrib['match_id']
24
-      matchTime = node.attrib['match_time']
25
-      matchWeek = node.attrib['match_week']
26
-      league = node.attrib['league_val']
27
-
28
-      # 其他相关属性
29
-      details = node.css(cssDetail).css("td")
30
-      homeTeam = details[4].css('::text').get()
31
-      awayTeam = details[3].css('::text').get()
32
-
33
-      match = BSTPrice(
34
-        matchId,
35
-        matchWeek,
36
-        matchTime,
37
-        league,
38
-        homeTeam,
39
-        awayTeam,
40
-        WLOdds(
41
-          details[6].css('strong::text').get(),
42
-          details[5].css('strong::text').get()
43
-        ),
44
-        WLSpreadOdds(
45
-          details[9].css('strong::text').get(),
46
-          details[7].css('strong::text').get(),
47
-          details[8].css('strong font::text').get()
48
-        ),
49
-        ScoreResult(
50
-          details[10].css('strong::text').get(),
51
-          details[12].css('strong::text').get(),
52
-          details[11].css('strong::text').get()
53
-        )
54
-      )
55
-
56
-      # 剩分差
57
-      pointsResult = PointsResult()
58
-      datas1 = []
59
-      lst = node.css(cssOther)
60
-      for it1 in lst[9:15]:
61
-        datas1 += [it1.css('strong::text').get()]
62
-
63
-      datas2 = []
64
-      for it2 in lst[16:22]:
65
-        datas2 += [it2.css('strong::text').get()]
66
-
67
-      pointsResult.datas(datas2 + datas1)
68
-      match.pointsResult = pointsResult
69
-
70
-      # 入库
71
-      match.persist()
72
-  

+ 4
- 0
crawl/spiders/basketball_result.py Просмотреть файл

@@ -7,6 +7,10 @@ from crawl.spiders.util import getNoneStr
7 7
 
8 8
 class BasketballSpider(scrapy.Spider):
9 9
   name = "basketball-result"
10
+  custom_settings = {
11
+    'LOG_LEVEL': logging.INFO,
12
+    'LOG_FILE': 'logs/basketball-result.log'
13
+  }
10 14
 
11 15
   # 所有比赛
12 16
   _matchesMap = {}

+ 0
- 61
crawl/spiders/basketball_result.py.bak Просмотреть файл

@@ -1,61 +0,0 @@
1
-import scrapy
2
-import time
3
-from crawl.comm.basketball import BSTResult
4
-
5
-class BasketballSpider(scrapy.Spider):
6
-  name = "basketball-result"
7
-
8
-  def start_requests(self):
9
-    # 开奖
10
-    today = time.strftime("%Y-%m-%d")
11
-    url = "https://www.lottery.gov.cn/basketball/result_99.jspx?startDate="+today+"&endDate="+today+"&f_league_id=0&f_league_name=%E5%85%A8%E9%83%A8%E8%81%94%E8%B5%9B&single=off"
12
-    yield scrapy.Request(url, self.parseResult)
13
-
14
-  def parseResult(self, response):
15
-    cssMain = ".xxsj table tr"
16
-
17
-    # 获取所有比赛
18
-    matches = response.css(cssMain)
19
-    for node in matches[1:-1]:  # 标题行忽略以及末尾一行
20
-      prop = node.css("td")
21
-      if len(prop) < 7:
22
-        continue
23
-
24
-      matchTime = prop[0].css('::text').get()
25
-      matchWeek = prop[1].css('::text').get()
26
-      league = prop[2].css('::text').get()
27
-      team = prop[3].css('a::text').getall()
28
-      if team is None or len(team) == 0:
29
-        team = prop[3].css('::text').get().split('VS')
30
-      homeTeam = team[1].strip()
31
-      awayTeam = team[0].strip()
32
-      single = self.isSingle(prop[3].attrib.get('class'))
33
-      tmp = prop[4].css('::text').get()
34
-      score = tmp.strip() if tmp is not None else ""
35
-      tmp = prop[5].css('::text').get()
36
-      status = tmp.strip() if tmp is not None else ""
37
-
38
-      bstResult = BSTResult(
39
-        matchTime,
40
-        matchWeek,
41
-        league,
42
-        homeTeam,
43
-        awayTeam,
44
-        single,
45
-        score,
46
-        status
47
-      )
48
-      bstResult.persist()
49
-
50
-      # if status == '已完成':
51
-      #   resURI = prop[6].css('a').attrib.get('href')
52
-      #   yield scrapy.Request('https://www.lottery.gov.cn' + resURI, self.parseResDetail, 'GET')
53
-
54
-  def isSingle(self, eleCls):
55
-    if eleCls is None:
56
-      return '0'
57
-    
58
-    if 'dan' in eleCls:
59
-      return '1'
60
-    else:
61
-      return '0'

+ 4
- 0
crawl/spiders/football.py Просмотреть файл

@@ -6,6 +6,10 @@ from crawl.spiders.util import getNoneStr
6 6
 
7 7
 class FootballSpider(scrapy.Spider):
8 8
   name = 'football'
9
+  custom_settings = {
10
+    'LOG_LEVEL': logging.INFO,
11
+    'LOG_FILE': 'logs/football-match.log'
12
+  }
9 13
 
10 14
   # 所有比赛
11 15
   _matchesMap = {}

+ 0
- 68
crawl/spiders/football_match.py Просмотреть файл

@@ -1,68 +0,0 @@
1
-import scrapy
2
-import time
3
-from crawl.comm.football import FTMatch
4
-
5
-class FootballSpider(scrapy.Spider):
6
-  name = "football-match"
7
-
8
-  def start_requests(self):
9
-    # 受注比赛
10
-    url = "https://www.lottery.gov.cn/football/match_list.jspx"
11
-    yield scrapy.Request(url, self.parseMatch)
12
-  
13
-  def parseMatch(self, response):
14
-    cssMain = ".xxsj table table tr"
15
-    cssDetail = "td"
16
-
17
-    # 获取所有比赛
18
-    matches = response.css(cssMain)
19
-    for node in matches[1:]:  # 标题行忽略
20
-      prop = node.css(cssDetail)
21
-
22
-      # 小于 2 个 td 为无效行
23
-      if len(prop) < 2:
24
-        continue
25
-
26
-      matchWeek = prop[0].css('::text').get()
27
-      league = prop[1].css('::text').get()
28
-      homeTeam = prop[2].css('.zhu::text').get()
29
-      awayTeam = prop[2].css('.ke::text').get()
30
-      matchTime = prop[3].css('::text').get()
31
-      wdl = self.parsePassWay(prop[6].css('img'))
32
-      wdls = self.parsePassWay(prop[7].css('img'))
33
-      score = self.parsePassWay(prop[8].css('img'))
34
-      points = self.parsePassWay(prop[9].css('img'))
35
-      double = self.parsePassWay(prop[10].css('img'))
36
-
37
-      FTMatch(
38
-        matchWeek,
39
-        matchTime,
40
-        league,
41
-        homeTeam,
42
-        awayTeam,
43
-        wdl,
44
-        wdls,
45
-        score,
46
-        points,
47
-        double
48
-      ).persist()
49
-  
50
-  def parsePassWay(self, img):
51
-    # 待开售
52
-    if img is None:
53
-      return 'wait'
54
-
55
-    # 图片地址
56
-    src = img.attrib['src']
57
-
58
-    # 开售单关方式和过关方式
59
-    if "ball11.png" in src:
60
-      return 'pass&single'
61
-
62
-    # 仅开售过关方式
63
-    elif "ball1.png" in src:
64
-      return 'pass'
65
-
66
-    # 未开售此玩法
67
-    else:
68
-      return 'no'

+ 0
- 96
crawl/spiders/football_price.py Просмотреть файл

@@ -1,96 +0,0 @@
1
-import scrapy
2
-import time
3
-from crawl.comm.football import FTPrice, WDLOdds, WDLSpreadOdds, ScoreResult, PointsResult, DoubleResult
4
-
5
-class FootballSpider(scrapy.Spider):
6
-  name = "football-price"
7
-
8
-  def start_requests(self):
9
-    # 赔率
10
-    url = "https://www.lottery.gov.cn/football/counter.jspx"
11
-    yield scrapy.Request(url, self.parsePrice)
12
-  
13
-  def parsePrice(self, response):
14
-    cssMain = "#content .article .articleCon .section"
15
-    cssDetail = ".saishi"
16
-    cssOther = ".saishiCon"
17
-
18
-    # 获取所有比赛
19
-    matches = response.css(cssMain)
20
-    for node in matches:
21
-
22
-      # 比赛ID
23
-      matchId = node.attrib['match_id']
24
-      matchTime = node.attrib['match_time']
25
-
26
-      # 其他相关属性
27
-      details = node.css(cssDetail).css("td")
28
-      matchWeek = details[0].css('::text').get()
29
-      league = details[1].css('::text').get()
30
-      homeTeam = details[3].css('::text').get()
31
-      awayTeam = details[4].css('::text').get()
32
-
33
-      match = FTPrice(
34
-        matchId,
35
-        matchWeek,
36
-        matchTime,
37
-        league,
38
-        homeTeam,
39
-        awayTeam,
40
-        WDLOdds(
41
-          details[5].css('::text').get(),
42
-          details[6].css('::text').get(),
43
-          details[7].css('::text').get()
44
-        ),
45
-        WDLSpreadOdds(
46
-          details[9].css('::text').get(),
47
-          details[10].css('::text').get(),
48
-          details[11].css('::text').get(),
49
-          details[8].css('::text').get()
50
-        )
51
-      )
52
-
53
-      # 比分, 总分, 半全场 是另外的 table
54
-      otherOdds = node.css(cssOther)
55
-      for index, otherNode in enumerate(otherOdds):
56
-
57
-        # 获取比分的赔率
58
-        if index == 0:
59
-          scoreResult = ScoreResult()
60
-          datas = []
61
-          lst = otherNode.css('td')
62
-
63
-          # table 的 3 个 tr
64
-          for it in lst[1:14] + lst[15:20] + lst[21:34]:
65
-            datas += [it.css('strong::text').get()]
66
-
67
-          scoreResult.datas(datas)
68
-          match.scoreResult = scoreResult
69
-
70
-        # 获取总分的赔率
71
-        elif index == 1:
72
-          pointsResult = PointsResult()
73
-          datas = []
74
-          lst = otherNode.css('td')
75
-
76
-          for it in lst[2:10]:
77
-            datas += [it.css('strong::text').get()]
78
-
79
-          pointsResult.datas(datas)
80
-          match.pointsResult = pointsResult
81
-
82
-        # 获取半全场赔率
83
-        else:
84
-          doubleResult = DoubleResult()
85
-          datas = []
86
-          lst = otherNode.css('td')
87
-
88
-          for it in lst[2:11]:
89
-            datas += [it.css('strong::text').get()]
90
-
91
-          doubleResult.datas(datas)
92
-          match.doubleResult = doubleResult
93
-
94
-      # 入库
95
-      match.persist()
96
-  

+ 5
- 0
crawl/spiders/football_result.py Просмотреть файл

@@ -6,6 +6,11 @@ from crawl.spiders.util import getNoneStr
6 6
 
7 7
 class FootballSpider(scrapy.Spider):
8 8
   name = "football-result"
9
+  custom_settings = {
10
+    'LOG_LEVEL': logging.INFO,
11
+    'LOG_FILE': 'logs/football-result.log'
12
+  }
13
+
9 14
 
10 15
   # 所有比赛
11 16
   _matchesMap = {}

+ 5
- 0
crawl/spiders/lottery.py Просмотреть файл

@@ -1,10 +1,15 @@
1 1
 import scrapy
2
+import logging
2 3
 
3 4
 from crawl.comm.lottery import LotteryResult, LotteryResultDetail
4 5
 
5 6
 
6 7
 class LotterySpider(scrapy.Spider):
7 8
   name = "lottery"
9
+  custom_settings = {
10
+    'LOG_LEVEL': logging.INFO,
11
+    'LOG_FILE': 'logs/lottery.log'
12
+  }
8 13
 
9 14
   def start_requests(self):
10 15
     # 大乐透

+ 2
- 4
football_job.sh Просмотреть файл

@@ -7,8 +7,6 @@ source /etc/profile
7 7
 cd /opt/niucai/crawl
8 8
 
9 9
 # 赛事
10
-scrapy crawl football >> logs/football.log
11
-# 赔率
12
-# scrapy crawl football-price >> logs/football-price.log
10
+/usr/local/bin/scrapy crawl football
13 11
 # 结果
14
-scrapy crawl football-result >> logs/football-result.log
12
+/usr/local/bin/scrapy crawl football-result

+ 1
- 1
lottery_job.sh Просмотреть файл

@@ -6,4 +6,4 @@ source /etc/profile
6 6
 
7 7
 cd /opt/niucai/crawl
8 8
 
9
-scrapy crawl lottery >> logs/lottery.log
9
+/usr/local/bin/scrapy crawl lottery