张延森 пре 4 година
родитељ
комит
6590b5f210

+ 3
- 0
.idea/.gitignore Прегледај датотеку

@@ -0,0 +1,3 @@
1
+# Default ignored files
2
+/shelf/
3
+/workspace.xml

+ 5
- 0
.idea/codeStyles/codeStyleConfig.xml Прегледај датотеку

@@ -0,0 +1,5 @@
1
+<component name="ProjectCodeStyleConfiguration">
2
+  <state>
3
+    <option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
4
+  </state>
5
+</component>

+ 11
- 0
.idea/crawl.iml Прегледај датотеку

@@ -0,0 +1,11 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<module type="PYTHON_MODULE" version="4">
3
+  <component name="NewModuleRootManager">
4
+    <content url="file://$MODULE_DIR$" />
5
+    <orderEntry type="inheritedJdk" />
6
+    <orderEntry type="sourceFolder" forTests="false" />
7
+  </component>
8
+  <component name="TestRunnerService">
9
+    <option name="PROJECT_TEST_RUNNER" value="Twisted Trial" />
10
+  </component>
11
+</module>

+ 7
- 0
.idea/inspectionProfiles/profiles_settings.xml Прегледај датотеку

@@ -0,0 +1,7 @@
1
+<component name="InspectionProjectProfileManager">
2
+  <settings>
3
+    <option name="PROJECT_PROFILE" value="Default" />
4
+    <option name="USE_PROJECT_PROFILE" value="false" />
5
+    <version value="1.0" />
6
+  </settings>
7
+</component>

+ 4
- 0
.idea/misc.xml Прегледај датотеку

@@ -0,0 +1,4 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<project version="4">
3
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
4
+</project>

+ 8
- 0
.idea/modules.xml Прегледај датотеку

@@ -0,0 +1,8 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<project version="4">
3
+  <component name="ProjectModuleManager">
4
+    <modules>
5
+      <module fileurl="file://$PROJECT_DIR$/.idea/crawl.iml" filepath="$PROJECT_DIR$/.idea/crawl.iml" />
6
+    </modules>
7
+  </component>
8
+</project>

+ 6
- 0
.idea/vcs.xml Прегледај датотеку

@@ -0,0 +1,6 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<project version="4">
3
+  <component name="VcsDirectoryMappings">
4
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+  </component>
6
+</project>

+ 2
- 1
.vscode/launch.json Прегледај датотеку

@@ -9,7 +9,8 @@
9 9
       "type": "python",
10 10
       "request": "launch",
11 11
       "program": "${workspaceRoot}/crawl/run.py",
12
-      "console": "integratedTerminal"
12
+      "console": "integratedTerminal",
13
+      "pythonPath": "d:/Application/python-3.8.5/python.exe"
13 14
     }
14 15
   ]
15 16
 }

+ 3
- 0
.vscode/settings.json Прегледај датотеку

@@ -0,0 +1,3 @@
1
+{
2
+  "python.pythonPath": "d:/Application/python-3.8.5/python.exe"
3
+}

BIN
crawl/__pycache__/__init__.cpython-38.pyc Прегледај датотеку


BIN
crawl/__pycache__/settings.cpython-38.pyc Прегледај датотеку


BIN
crawl/comm/__pycache__/__init__.cpython-38.pyc Прегледај датотеку


BIN
crawl/comm/__pycache__/basketball.cpython-38.pyc Прегледај датотеку


BIN
crawl/comm/__pycache__/football.cpython-38.pyc Прегледај датотеку


BIN
crawl/comm/__pycache__/mydb.cpython-38.pyc Прегледај датотеку


+ 320
- 0
crawl/comm/basketball.py Прегледај датотеку

@@ -0,0 +1,320 @@
1
+from .mydb import MyDB
2
+
3
+# 比赛
4
+class BSTMatch:
5
+  def __init__(self,
6
+               # 比赛周编码
7
+               matchWeek,
8
+
9
+               # 比赛时间
10
+               matchTime,
11
+
12
+               # 联赛
13
+               league,
14
+
15
+               # 主队
16
+               homeTeam,
17
+
18
+               # 客队
19
+               awayTeam,
20
+
21
+               # 开售状态
22
+               status,
23
+
24
+               # 胜负
25
+               wl,
26
+
27
+               # 胜负(让)
28
+               wls,
29
+
30
+               # 大小分
31
+               score,
32
+
33
+               # 剩分差
34
+               points
35
+               ):
36
+    self.matchWeek = matchWeek
37
+    self.matchTime = matchTime
38
+    self.league = league
39
+    self.homeTeam = homeTeam
40
+    self.awayTeam = awayTeam
41
+    self.status = status
42
+    self.wl = wl
43
+    self.wls = wls
44
+    self.score = score
45
+    self.points = points
46
+
47
+  def toString(self):
48
+    return ";".join((
49
+      self.matchWeek,
50
+      self.matchTime,
51
+      self.league,
52
+      self.homeTeam,
53
+      self.awayTeam,
54
+      self.status,
55
+      self.wl,
56
+      self.wls,
57
+      self.score,
58
+      self.points
59
+    ))
60
+
61
+  def persist(self):
62
+    cursor = MyDB.getCursor()
63
+    if cursor is None:
64
+      return
65
+
66
+    sql = "insert into ta_crawl_ball(ball_type, data_type, content) values('basketball', 'match', %s)"
67
+    cursor.execute(sql, self.toString())
68
+    MyDB.commit()
69
+    cursor.close()
70
+
71
+
72
+# 赔率
73
+class BSTPrice:
74
+  def __init__(self,
75
+               # 比赛ID
76
+               matchId,
77
+
78
+               # 比赛周编码
79
+               matchWeek,
80
+
81
+               # 比赛时间
82
+               matchTime,
83
+
84
+               # 联赛
85
+               league,
86
+
87
+               # 主队
88
+               homeTeam,
89
+
90
+               # 客队
91
+               awayTeam,
92
+
93
+               # 胜负
94
+               wlOdds=None,
95
+
96
+               # 胜负(让) - [w, l, s]
97
+               wlsOdds=None,
98
+
99
+               # 大小分 - [大, 小, 总分]
100
+               scoreResult=None,
101
+
102
+               # 剩分差 - [主胜6 + 客胜 6]
103
+               pointsResult=None
104
+               ):
105
+    self.matchId = matchId
106
+    self.matchWeek = matchWeek
107
+    self.matchTime = matchTime
108
+    self.league = league
109
+    self.homeTeam = homeTeam
110
+    self.awayTeam = awayTeam
111
+    self.wlOdds = wlOdds
112
+    self.wlsOdds = wlsOdds
113
+    self.scoreResult = scoreResult
114
+    self.pointsResult = pointsResult
115
+
116
+  def toString(self):
117
+    return ";".join((
118
+      self.matchId,
119
+      self.matchWeek,
120
+      self.matchTime,
121
+      self.league,
122
+      self.homeTeam,
123
+      self.awayTeam,
124
+      self.wlOdds.toString(),
125
+      self.wlsOdds.toString(),
126
+      self.scoreResult.toString(),
127
+      self.pointsResult.toString()
128
+    ))
129
+
130
+  def persist(self):
131
+    cursor = MyDB.getCursor()
132
+    if cursor is None:
133
+      return
134
+
135
+    sql = "insert into ta_crawl_ball(ball_type, data_type, content) values('basketball', 'price', %s)"
136
+    cursor.execute(sql, self.toString())
137
+    MyDB.commit()
138
+    cursor.close()
139
+
140
+
141
+# 胜平负
142
+class WLOdds:
143
+  def __init__(self,
144
+               # 胜
145
+               win=None,
146
+
147
+               # 负
148
+               lose=None
149
+               ):
150
+    self.win = win
151
+    self.lose = lose
152
+
153
+  def datas(self, datas=[]):
154
+    self.win = datas[0]
155
+    self.lose = datas[1]
156
+
157
+  def toString(self):
158
+    return '|'.join((self.win, self.lose))
159
+
160
+
161
+# 胜平负(让)
162
+class WLSpreadOdds:
163
+  def __init__(self,
164
+               # 胜
165
+               win=None,
166
+
167
+               # 负
168
+               lose=None,
169
+
170
+               # 让球
171
+               spread=None
172
+               ):
173
+    self.win = win
174
+    self.lose = lose
175
+    self.spread = spread
176
+
177
+  def datas(self, datas=[]):
178
+    self.win = datas[0]
179
+    self.lose = datas[1]
180
+    self.spread = datas[2]
181
+
182
+  def toString(self):
183
+    return '|'.join((self.win, self.lose, self.spread))
184
+
185
+
186
+# 大小分
187
+class ScoreResult:
188
+  def __init__(self,
189
+               big = None, # 大
190
+               small = None, # 小
191
+               total = None # 预设总分
192
+               ):
193
+    self.big = big
194
+    self.small = small
195
+    self.total = total
196
+
197
+  def toString(self):
198
+    return '|'.join((
199
+      self.big,
200
+      self.small,
201
+      self.total)
202
+    )
203
+
204
+# 剩分差
205
+class PointsResult:
206
+  def __init__(self,
207
+               w15=None,  # 胜 1-5
208
+               w610=None,  # 胜 6-10
209
+               w1115=None,  # 胜 11-15
210
+               w1620=None,  # 胜 16-20
211
+               w2125=None,  # 胜 21-25
212
+               w99=None,  # 胜 26+
213
+               l15=None,  # 负 1-5
214
+               l610=None,  # 负 6-10
215
+               l1115=None,  # 负 11-15
216
+               l1620=None,  # 负 16-20
217
+               l2125=None,  # 负 21-25
218
+               l99=None  # 负 26+
219
+               ):
220
+    self.w15 = w15
221
+    self.w610 = w610
222
+    self.w1115 = w1115
223
+    self.w1620 = w1620
224
+    self.w2125 = w2125
225
+    self.w99 = w99
226
+    self.l15 = l15
227
+    self.l610 = l610
228
+    self.l1115 = l1115
229
+    self.l1620 = l1620
230
+    self.l2125 = l2125
231
+    self.l99 = l99
232
+
233
+  def datas(self, datas=[]):
234
+    self.w15 = datas[0]
235
+    self.w610 = datas[1]
236
+    self.w1115 = datas[2]
237
+    self.w1620 = datas[3]
238
+    self.w2125 = datas[4]
239
+    self.w99 = datas[5]
240
+    self.l15 = datas[6]
241
+    self.l610 = datas[7]
242
+    self.l1115 = datas[8]
243
+    self.l1620 = datas[9]
244
+    self.l2125 = datas[10]
245
+    self.l99 = datas[11]
246
+
247
+  def toString(self):
248
+    return '|'.join((
249
+      self.w15,
250
+      self.w610,
251
+      self.w1115,
252
+      self.w1620,
253
+      self.w2125,
254
+      self.w99,
255
+      self.l15,
256
+      self.l610,
257
+      self.l1115,
258
+      self.l1620,
259
+      self.l2125,
260
+      self.l99)
261
+    )
262
+
263
+
264
+# 结果
265
+class BSTResult:
266
+  def __init__(self,
267
+               # 比赛时间
268
+               matchTime,
269
+
270
+               # 周次
271
+               matchWeek,
272
+
273
+               # 联赛
274
+               league,
275
+
276
+               # 主队
277
+               homeTeam,
278
+
279
+               # 客队
280
+               awayTeam,
281
+
282
+               # 是否单固场次
283
+               single,
284
+
285
+               # 比分
286
+               score,
287
+
288
+               # 状态
289
+               status
290
+               ):
291
+    self.matchTime = matchTime
292
+    self.matchWeek = matchWeek
293
+    self.league = league
294
+    self.homeTeam = homeTeam
295
+    self.awayTeam = awayTeam
296
+    self.single = single
297
+    self.score = score
298
+    self.status = status
299
+
300
+  def toString(self):
301
+    return ';'.join((
302
+      self.matchTime,
303
+      self.matchWeek,
304
+      self.league,
305
+      self.homeTeam,
306
+      self.awayTeam,
307
+      self.single,
308
+      self.score,
309
+      self.status
310
+    ))
311
+
312
+  def persist(self):
313
+    cursor = MyDB.getCursor()
314
+    if cursor is None:
315
+      return
316
+
317
+    sql = "insert into ta_crawl_ball(ball_type, data_type, content) values('basketball', 'result', %s)"
318
+    cursor.execute(sql, self.toString())
319
+    MyDB.commit()
320
+    cursor.close()

+ 3
- 3
crawl/comm/football.py Прегледај датотеку

@@ -63,7 +63,7 @@ class FTMatch:
63 63
     if cursor is None:
64 64
       return
65 65
     
66
-    sql = "insert into ta_crawl_football(data_type, content) values('match', %s)"
66
+    sql = "insert into ta_crawl_ball(ball_type, data_type, content) values('football', 'match', %s)"
67 67
     cursor.execute(sql, self.toString())
68 68
     MyDB.commit()
69 69
     cursor.close()
@@ -136,7 +136,7 @@ class FTPrice:
136 136
     if cursor is None:
137 137
       return
138 138
     
139
-    sql = "insert into ta_crawl_football(data_type, content) values('price', %s)"
139
+    sql = "insert into ta_crawl_ball(ball_type, data_type, content) values('football', 'price', %s)"
140 140
     cursor.execute(sql, self.toString())
141 141
     MyDB.commit()
142 142
     cursor.close()
@@ -509,7 +509,7 @@ class FTResult:
509 509
     if cursor is None:
510 510
       return
511 511
     
512
-    sql = "insert into ta_crawl_football(data_type, content) values('result', %s)"
512
+    sql = "insert into ta_crawl_ball(ball_type, data_type, content) values('football', 'result', %s)"
513 513
     cursor.execute(sql, self.toString())
514 514
     MyDB.commit()
515 515
     cursor.close()

BIN
crawl/spiders/__pycache__/__init__.cpython-38.pyc Прегледај датотеку


BIN
crawl/spiders/__pycache__/basketball_match.cpython-38.pyc Прегледај датотеку


BIN
crawl/spiders/__pycache__/basketball_price.cpython-38.pyc Прегледај датотеку


BIN
crawl/spiders/__pycache__/basketball_result.cpython-38.pyc Прегледај датотеку


BIN
crawl/spiders/__pycache__/football_match.cpython-38.pyc Прегледај датотеку


BIN
crawl/spiders/__pycache__/football_price.cpython-38.pyc Прегледај датотеку


BIN
crawl/spiders/__pycache__/football_result.cpython-38.pyc Прегледај датотеку


+ 69
- 0
crawl/spiders/basketball_match.py Прегледај датотеку

@@ -0,0 +1,69 @@
1
+import scrapy
2
+import time
3
+from crawl.comm.basketball import BSTMatch
4
+
5
+class BasketballSpider(scrapy.Spider):
6
+  name = "basketball-match"
7
+
8
+  def start_requests(self):
9
+    # 受注比赛
10
+    url = "https://www.lottery.gov.cn/basketball/match_list.jspx"
11
+    yield scrapy.Request(url, self.parseMatch)
12
+  
13
+  def parseMatch(self, response):
14
+    cssMain = ".xxsj table tr"
15
+    cssDetail = "td"
16
+
17
+    # 获取所有比赛
18
+    matches = response.css(cssMain)
19
+    for node in matches[1:]:  # 标题行忽略
20
+      prop = node.css(cssDetail)
21
+
22
+      # 小于 2 个 td 为无效行
23
+      if len(prop) < 2:
24
+        continue
25
+
26
+      matchWeek = prop[0].css('::text').get()
27
+      league = prop[1].css('::text').get()
28
+      team = prop[2].css('::text').get().split('VS')
29
+      homeTeam = team[0].strip()
30
+      awayTeam = team[1].strip()
31
+      matchTime = prop[3].css('::text').get()
32
+      status = prop[5].css('::text').get()
33
+      wl = self.parsePassWay(prop[6].css('img'))
34
+      wls = self.parsePassWay(prop[7].css('img'))
35
+      score = self.parsePassWay(prop[8].css('img'))
36
+      points = self.parsePassWay(prop[9].css('img'))
37
+
38
+      BSTMatch(
39
+        matchWeek,
40
+        matchTime,
41
+        league,
42
+        homeTeam,
43
+        awayTeam,
44
+        status,
45
+        wl,
46
+        wls,
47
+        score,
48
+        points
49
+      ).persist()
50
+  
51
+  def parsePassWay(self, img):
52
+    # 待开售
53
+    if img is None:
54
+      return 'wait'
55
+
56
+    # 图片地址
57
+    src = img.attrib['src']
58
+
59
+    # 开售单关方式和过关方式
60
+    if "ball2_11.png" in src:
61
+      return 'pass&single'
62
+
63
+    # 仅开售过关方式
64
+    elif "ball2_1.png" in src:
65
+      return 'pass'
66
+
67
+    # 未开售此玩法
68
+    else:
69
+      return 'no'

+ 72
- 0
crawl/spiders/basketball_price.py Прегледај датотеку

@@ -0,0 +1,72 @@
1
+import scrapy
2
+import time
3
+from crawl.comm.basketball import BSTPrice, WLOdds, WLSpreadOdds, ScoreResult, PointsResult
4
+
5
+class BasketballSpider(scrapy.Spider):
6
+  name = "basketball-price"
7
+
8
+  def start_requests(self):
9
+    # 赔率
10
+    url = "https://www.lottery.gov.cn/basketball/counter.jspx"
11
+    yield scrapy.Request(url, self.parsePrice)
12
+  
13
+  def parsePrice(self, response):
14
+    cssMain = "#content .article .articleCon .section"
15
+    cssDetail = ".saishi"
16
+    cssOther = ".saishiCon table td"
17
+
18
+    # 获取所有比赛
19
+    matches = response.css(cssMain)
20
+    for node in matches:
21
+
22
+      # 比赛ID
23
+      matchId = node.attrib['match_id']
24
+      matchTime = node.attrib['match_time']
25
+      matchWeek = node.attrib['match_week']
26
+      league = node.attrib['league_val']
27
+
28
+      # 其他相关属性
29
+      details = node.css(cssDetail).css("td")
30
+      homeTeam = details[3].css('::text').get()
31
+      awayTeam = details[4].css('::text').get()
32
+
33
+      match = BSTPrice(
34
+        matchId,
35
+        matchWeek,
36
+        matchTime,
37
+        league,
38
+        homeTeam,
39
+        awayTeam,
40
+        WLOdds(
41
+          details[6].css('strong::text').get(),
42
+          details[5].css('strong::text').get()
43
+        ),
44
+        WLSpreadOdds(
45
+          details[9].css('strong::text').get(),
46
+          details[7].css('strong::text').get(),
47
+          details[8].css('strong font::text').get()
48
+        ),
49
+        ScoreResult(
50
+          details[10].css('strong::text').get(),
51
+          details[12].css('strong::text').get(),
52
+          details[11].css('strong::text').get()
53
+        )
54
+      )
55
+
56
+      # 剩分差
57
+      pointsResult = PointsResult()
58
+      datas1 = []
59
+      lst = node.css(cssOther)
60
+      for it1 in lst[9:15]:
61
+        datas1 += [it1.css('strong::text').get()]
62
+
63
+      datas2 = []
64
+      for it2 in lst[16:22]:
65
+        datas2 += [it2.css('strong::text').get()]
66
+
67
+      pointsResult.datas(datas2 + datas1)
68
+      match.pointsResult = pointsResult
69
+
70
+      # 入库
71
+      match.persist()
72
+  

+ 56
- 0
crawl/spiders/basketball_result.py Прегледај датотеку

@@ -0,0 +1,56 @@
1
+import scrapy
2
+import time
3
+from crawl.comm.basketball import BSTResult
4
+
5
+class BasketballSpider(scrapy.Spider):
6
+  name = "basketball-result"
7
+
8
+  def start_requests(self):
9
+    # 开奖
10
+    today = time.strftime("%Y-%m-%d")
11
+    url = "https://www.lottery.gov.cn/basketball/result_99.jspx?startDate="+today+"&endDate="+today+"&f_league_id=0&f_league_name=%E5%85%A8%E9%83%A8%E8%81%94%E8%B5%9B&single=off"
12
+    yield scrapy.Request(url, self.parseResult)
13
+
14
+  def parseResult(self, response):
15
+    cssMain = ".xxsj table tr"
16
+
17
+    # 获取所有比赛
18
+    matches = response.css(cssMain)
19
+    for node in matches[1:-1]:  # 标题行忽略以及末尾一行
20
+      prop = node.css("td")
21
+      if len(prop) < 7:
22
+        continue
23
+
24
+      matchTime = prop[0].css('::text').get()
25
+      matchWeek = prop[1].css('::text').get()
26
+      league = prop[2].css('::text').get()
27
+      team = prop[3].css('a::text').getall()
28
+      if team is None or len(team) == 0:
29
+        team = prop[3].css('::text').get().split('VS')
30
+      homeTeam = team[0].strip()
31
+      awayTeam = team[1].strip()
32
+      single = self.isSingle(prop[3].attrib.get('class'))
33
+      tmp = prop[4].css('::text').get()
34
+      score = tmp.strip() if tmp is not None else ""
35
+      tmp = prop[5].css('::text').get()
36
+      status = tmp.strip() if tmp is not None else ""
37
+
38
+      BSTResult(
39
+        matchTime,
40
+        matchWeek,
41
+        league,
42
+        homeTeam,
43
+        awayTeam,
44
+        single,
45
+        score,
46
+        status
47
+      ).persist()
48
+
49
+  def isSingle(self, eleCls):
50
+    if eleCls is None:
51
+      return '0'
52
+    
53
+    if 'dan' in eleCls:
54
+      return '1'
55
+    else:
56
+      return '0'

+ 7
- 0
main.py Прегледај датотеку

@@ -0,0 +1,7 @@
1
+from scrapy.cmdline import execute
2
+import sys
3
+import os
4
+
5
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
6
+
7
+execute(['scrapy', 'crawl', 'basketball-result'])  # 你需要将此处的spider_name替换为你自己的爬虫名称