|
@@ -10,9 +10,6 @@ class FootballSpider(scrapy.Spider):
|
10
|
10
|
url = 'https://info.sporttery.cn/football/match_list.php'
|
11
|
11
|
yield scrapy.Request(url, self.parseMatch, 'GET')
|
12
|
12
|
|
13
|
|
- # 赛果开奖
|
14
|
|
- yield self.startPareseResult()
|
15
|
|
-
|
16
|
13
|
def parseMatch(self, response):
|
17
|
14
|
cssOfMatches = '.all-wrap > .match_list .m-tab tr'
|
18
|
15
|
|
|
@@ -183,57 +180,3 @@ class FootballSpider(scrapy.Spider):
|
183
|
180
|
|
184
|
181
|
# 入库
|
185
|
182
|
ftprice.persist()
|
186
|
|
-
|
187
|
|
- def startPareseResult(self):
|
188
|
|
- url = 'https://info.sporttery.cn/football/match_result.php'
|
189
|
|
- self._pages = -1
|
190
|
|
- yield scrapy.Request(url, self.parseResult, 'GET')
|
191
|
|
-
|
192
|
|
- if self._pages > 0:
|
193
|
|
- for i in range (2, self._pages):
|
194
|
|
- url = 'https://info.sporttery.cn/football/match_result.php?page=' + str(i)
|
195
|
|
- yield scrapy.Request(url, self.parseResult, 'GET')
|
196
|
|
-
|
197
|
|
- def parseResult(self, response):
|
198
|
|
- # 先处理页码
|
199
|
|
- if self._pages == -1:
|
200
|
|
- pgNodes = response.css('.m-page .u-pg2')
|
201
|
|
- if pgNodes is None or len(pgNodes) == 0:
|
202
|
|
- self._pages = 0
|
203
|
|
- else:
|
204
|
|
- self._pages = len(pgNodes) + 1
|
205
|
|
-
|
206
|
|
- # 处理比赛结果
|
207
|
|
- cssOfMatch = '.all-wrap > .match_list .m-tab tr'
|
208
|
|
- matches = response.css(cssOfMatch)
|
209
|
|
- if matches is None: return
|
210
|
|
- for matchNode in matches:
|
211
|
|
- tdNodeList = matchNode.css('td')
|
212
|
|
-
|
213
|
|
- matchTime = tdNodeList[0].css('::text').get()
|
214
|
|
- matchWeek = tdNodeList[1].css('::text').get()
|
215
|
|
- league = tdNodeList[2].css('::text').get()
|
216
|
|
- leagueFullName = tdNodeList[2].attrib.get('title') # 联赛全称
|
217
|
|
- leagueName = '|'.join((league if league is not None else '', leagueFullName if leagueFullName is not None else ''))
|
218
|
|
- homeTeam = self.trimBrackets(tdNodeList[3].css('.zhu::text').get()) # 主队
|
219
|
|
- awayTeam = self.trimBrackets(tdNodeList[3].css('.ke::text').get()) # 客队
|
220
|
|
- half = tdNodeList[4].css('span::text').get()
|
221
|
|
- whole = tdNodeList[5].css('span::text').get()
|
222
|
|
- status = tdNodeList[9].css('span::text').get()
|
223
|
|
-
|
224
|
|
- FTResult(
|
225
|
|
- matchTime,
|
226
|
|
- matchWeek,
|
227
|
|
- leagueName,
|
228
|
|
- homeTeam,
|
229
|
|
- awayTeam,
|
230
|
|
- '', # 不需要单固
|
231
|
|
- half,
|
232
|
|
- whole,
|
233
|
|
- status
|
234
|
|
- ).persist()
|
235
|
|
-
|
236
|
|
- def trimBrackets(self, str):
|
237
|
|
- if str is None: return ''
|
238
|
|
-
|
239
|
|
- return re.sub(r'\(.*\)', '', str)
|