Your Name 4 年前
父节点
当前提交
20ac2ee2bc

二进制
crawl/comm/__pycache__/football.cpython-38.pyc 查看文件


+ 66
- 2
crawl/comm/football.py 查看文件

@@ -1,5 +1,6 @@
1 1
 from .mydb import MyDB
2 2
 
3
+# 比赛
3 4
 class FTMatch:
4 5
   def __init__(self,
5 6
     # 比赛周编码
@@ -437,7 +438,6 @@ class DoubleResult:
437 438
     self.ld = datas[7]
438 439
     self.ll = datas[8]
439 440
 
440
-
441 441
   def toString(self):
442 442
     return '|'.join((
443 443
       self.ww,
@@ -449,4 +449,68 @@ class DoubleResult:
449 449
       self.lw,
450 450
       self.ld,
451 451
       self.ll
452
-    ))
452
+    ))
453
+
454
+# 结果
455
+class FTResult:
456
+  def __init__(self,
457
+    # 比赛时间
458
+    matchTime,
459
+
460
+    # 周次
461
+    matchWeek,
462
+
463
+    # 联赛
464
+    league,
465
+
466
+    # 主队
467
+    homeTeam,
468
+
469
+    # 客队
470
+    awayTeam,
471
+
472
+    # 是否单固场次
473
+    single,
474
+
475
+    # 半场比分
476
+    half,
477
+
478
+    # 全程比分
479
+    whole,
480
+
481
+    # 状态
482
+    status  
483
+  ):
484
+    self.matchTime = matchTime
485
+    self.matchWeek = matchWeek
486
+    self.league = league
487
+    self.homeTeam = homeTeam
488
+    self.awayTeam = awayTeam
489
+    self.single = single
490
+    self.half = half
491
+    self.whole = whole
492
+    self.status = status
493
+
494
+  def toString(self):
495
+    return ';'.join((
496
+      self.matchTime,
497
+      self.matchWeek,
498
+      self.league,
499
+      self.homeTeam,
500
+      self.awayTeam,
501
+      self.single,
502
+      self.half,
503
+      self.whole,
504
+      self.status
505
+    ))
506
+
507
+  def persist(self):
508
+    cursor = MyDB.getCursor()
509
+    if cursor is None:
510
+      return
511
+    
512
+    sql = "insert into ta_crawl_football(data_type, content) values('result', %s)"
513
+    cursor.execute(sql, self.toString())
514
+    MyDB.commit()
515
+    cursor.close()
516
+    

+ 1
- 1
crawl/run.py 查看文件

@@ -7,4 +7,4 @@ print(dirpath)
7 7
 # 添加环境变量
8 8
 sys.path.append(dirpath)
9 9
 # 启动爬虫,第三个参数为爬虫name
10
-execute(['scrapy','crawl','football'])
10
+execute(['scrapy','crawl','football-price'])

二进制
crawl/spiders/__pycache__/football_match.cpython-38.pyc 查看文件


二进制
crawl/spiders/__pycache__/football_price.cpython-38.pyc 查看文件


二进制
crawl/spiders/__pycache__/football_result.cpython-38.pyc 查看文件


二进制
crawl/spiders/__pycache__/football_spider.cpython-38.pyc 查看文件


+ 68
- 0
crawl/spiders/football_match.py 查看文件

@@ -0,0 +1,68 @@
1
+import scrapy
2
+import time
3
+from crawl.comm.football import FTMatch
4
+
5
+class FootballSpider(scrapy.Spider):
6
+  name = "football-match"
7
+
8
+  def start_requests(self):
9
+    # 受注比赛
10
+    url = "https://www.lottery.gov.cn/football/match_list.jspx"
11
+    yield scrapy.Request(url, self.parseMatch)
12
+  
13
+  def parseMatch(self, response):
14
+    cssMain = ".xxsj table table tr"
15
+    cssDetail = "td"
16
+
17
+    # 获取所有比赛
18
+    matches = response.css(cssMain)
19
+    for node in matches[1:]:  # 标题行忽略
20
+      prop = node.css(cssDetail)
21
+
22
+      # 小于 2 个 td 为无效行
23
+      if len(prop) < 2:
24
+        continue
25
+
26
+      matchWeek = prop[0].css('::text').get()
27
+      league = prop[1].css('::text').get()
28
+      homeTeam = prop[2].css('.zhu::text').get()
29
+      awayTeam = prop[2].css('.ke::text').get()
30
+      matchTime = prop[3].css('::text').get()
31
+      wdl = self.parsePassWay(prop[6].css('img'))
32
+      wdls = self.parsePassWay(prop[7].css('img'))
33
+      score = self.parsePassWay(prop[8].css('img'))
34
+      points = self.parsePassWay(prop[9].css('img'))
35
+      double = self.parsePassWay(prop[10].css('img'))
36
+
37
+      FTMatch(
38
+        matchWeek,
39
+        matchTime,
40
+        league,
41
+        homeTeam,
42
+        awayTeam,
43
+        wdl,
44
+        wdls,
45
+        score,
46
+        points,
47
+        double
48
+      ).persist()
49
+  
50
+  def parsePassWay(self, img):
51
+    # 待开售
52
+    if img is None:
53
+      return 'wait'
54
+
55
+    # 图片地址
56
+    src = img.attrib['src']
57
+
58
+    # 开售单关方式和过关方式
59
+    if "ball11.png" in src:
60
+      return 'pass&single'
61
+
62
+    # 仅开售过关方式
63
+    elif "ball1.png" in src:
64
+      return 'pass'
65
+
66
+    # 未开售此玩法
67
+    else:
68
+      return 'no'

crawl/spiders/football_spider.py → crawl/spiders/football_price.py 查看文件

@@ -1,80 +1,16 @@
1 1
 import scrapy
2
-from crawl.comm.football import *
2
+import time
3
+from crawl.comm.football import FTPrice, WDLOdds, WDLSpreadOdds, ScoreResult, PointsResult, DoubleResult
3 4
 
4 5
 class FootballSpider(scrapy.Spider):
5
-  name = "football"
6
+  name = "football-price"
6 7
 
7 8
   def start_requests(self):
8
-    # 受注比赛
9
-    url = "https://www.lottery.gov.cn/football/match_list.jspx"
10
-    yield scrapy.Request(url, self.parseMatch)
11
-
12 9
     # 赔率
13 10
     url = "https://www.lottery.gov.cn/football/counter.jspx"
14
-    yield scrapy.Request(url, self.parseCurrent)
15
-
16
-    # 开奖
17
-
18
-  
19
-  def parseMatch(self, response):
20
-    cssMain = ".xxsj table table tr"
21
-    cssDetail = "td"
22
-
23
-    # 获取所有比赛
24
-    matches = response.css(cssMain)
25
-    for node in matches[1:]:  # 标题行忽略
26
-      prop = node.css(cssDetail)
27
-
28
-      # 小于 2 个 td 为无效行
29
-      if len(prop) < 2:
30
-        continue
31
-
32
-      matchWeek = prop[0].css('::text').get()
33
-      league = prop[1].css('::text').get()
34
-      homeTeam = prop[2].css('.zhu::text').get()
35
-      awayTeam = prop[2].css('.ke::text').get()
36
-      matchTime = prop[3].css('::text').get()
37
-      wdl = self.parsePassWay(prop[6].css('img'))
38
-      wdls = self.parsePassWay(prop[7].css('img'))
39
-      score = self.parsePassWay(prop[8].css('img'))
40
-      points = self.parsePassWay(prop[9].css('img'))
41
-      double = self.parsePassWay(prop[10].css('img'))
42
-
43
-      FTMatch(
44
-        matchWeek,
45
-        matchTime,
46
-        league,
47
-        homeTeam,
48
-        awayTeam,
49
-        wdl,
50
-        wdls,
51
-        score,
52
-        points,
53
-        double
54
-      ).persist()
11
+    yield scrapy.Request(url, self.parsePrice)
55 12
   
56
-  def parsePassWay(self, img):
57
-    # 待开售
58
-    if img is None:
59
-      return 'wait'
60
-
61
-    # 图片地址
62
-    src = img.attrib['src']
63
-
64
-    # 开售单关方式和过关方式
65
-    if "ball11.png" in src:
66
-      return 'pass&single'
67
-
68
-    # 仅开售过关方式
69
-    elif "ball1.png" in src:
70
-      return 'pass'
71
-
72
-    # 未开售此玩法
73
-    else:
74
-      return 'no'
75
-
76
-
77
-  def parseCurrent(self, response):
13
+  def parsePrice(self, response):
78 14
     cssMain = "#content .article .articleCon .section"
79 15
     cssDetail = ".saishi"
80 16
     cssOther = ".saishiCon"
@@ -157,3 +93,4 @@ class FootballSpider(scrapy.Spider):
157 93
 
158 94
       # 入库
159 95
       match.persist()
96
+  

+ 55
- 0
crawl/spiders/football_result.py 查看文件

@@ -0,0 +1,55 @@
1
+import scrapy
2
+import time
3
+from crawl.comm.football import FTResult
4
+
5
+class FootballSpider(scrapy.Spider):
6
+  name = "football-result"
7
+
8
+  def start_requests(self):
9
+    # 开奖
10
+    today = time.strftime("%Y-%m-%d")
11
+    url = "https://www.lottery.gov.cn/football/result_99.jspx?startDate="+today+"&endDate="+today+"&f_league_id=0&f_league_name=%E5%85%A8%E9%83%A8%E8%81%94%E8%B5%9B&single=off"
12
+    yield scrapy.Request(url, self.parseResult)
13
+
14
+  def parseResult(self, response):
15
+    cssMain = ".xxsj table table tr"
16
+    cssDetail = "td"
17
+
18
+    # 获取所有比赛
19
+    matches = response.css(cssMain)
20
+    for node in matches[1:-3]:  # 标题行忽略以及末尾三行
21
+      prop = node.css(cssDetail)
22
+
23
+      matchTime = prop[0].css('::text').get()
24
+      matchWeek = prop[1].css('::text').get()
25
+      league = prop[2].css('::text').get()
26
+      homeTeam = prop[3].css('.zhu::text').get()
27
+      awayTeam = prop[3].css('.ke::text').get()
28
+      single = self.isSingle(prop[3].attrib.get('class'))
29
+      tmp = prop[4].css('::text').get()
30
+      half = tmp.strip() if tmp is not None else ""
31
+      tmp = prop[5].css('::text').get()
32
+      whole = tmp.strip() if tmp is not None else ""
33
+      tmp = prop[6].css('::text').get()
34
+      status = tmp.strip() if tmp is not None else ""
35
+
36
+      FTResult(
37
+        matchTime,
38
+        matchWeek,
39
+        league,
40
+        homeTeam,
41
+        awayTeam,
42
+        single,
43
+        half,
44
+        whole,
45
+        status
46
+      ).persist()
47
+
48
+  def isSingle(self, eleCls):
49
+    if eleCls is None:
50
+      return '0'
51
+    
52
+    if 'dan' in eleCls:
53
+      return '1'
54
+    else:
55
+      return '0'