|
@@ -2,10 +2,14 @@ import scrapy
|
2
|
2
|
import re
|
3
|
3
|
import logging
|
4
|
4
|
from crawl.comm.football import FTResult
|
|
5
|
+from crawl.spiders.util import getNoneStr
|
5
|
6
|
|
6
|
7
|
class FootballSpider(scrapy.Spider):
|
7
|
8
|
name = "football-result"
|
8
|
9
|
|
|
10
|
+ # 所有比赛
|
|
11
|
+ _matchesMap = {}
|
|
12
|
+
|
9
|
13
|
def start_requests(self):
|
10
|
14
|
url = 'https://info.sporttery.cn/football/match_result.php'
|
11
|
15
|
yield scrapy.Request(url, self.parseResult, 'GET')
|
|
@@ -20,16 +24,16 @@ class FootballSpider(scrapy.Spider):
|
20
|
24
|
if tdNodeList is None or len(tdNodeList) < 10:
|
21
|
25
|
continue
|
22
|
26
|
|
23
|
|
- matchTime = tdNodeList[0].css('::text').get()
|
24
|
|
- matchWeek = tdNodeList[1].css('::text').get()
|
25
|
|
- league = tdNodeList[2].css('::text').get()
|
26
|
|
- leagueFullName = tdNodeList[2].attrib.get('title') # 联赛全称
|
27
|
|
- leagueName = '|'.join((league if league is not None else '', leagueFullName if leagueFullName is not None else ''))
|
|
27
|
+ matchTime = getNoneStr(tdNodeList[0].css('::text').get())
|
|
28
|
+ matchWeek = getNoneStr(tdNodeList[1].css('::text').get())
|
|
29
|
+ league = getNoneStr(tdNodeList[2].css('::text').get())
|
|
30
|
+ leagueFullName = getNoneStr(tdNodeList[2].attrib.get('title')) # 联赛全称
|
|
31
|
+ leagueName = '|'.join((league, leagueFullName))
|
28
|
32
|
homeTeam = self.trimBrackets(tdNodeList[3].css('.zhu::text').get()) # 主队
|
29
|
33
|
awayTeam = self.trimBrackets(tdNodeList[3].css('.ke::text').get()) # 客队
|
30
|
|
- half = tdNodeList[4].css('span::text').get()
|
31
|
|
- whole = tdNodeList[5].css('span::text').get()
|
32
|
|
- status = tdNodeList[9].css('::text').get()
|
|
34
|
+ half = getNoneStr(tdNodeList[4].css('span::text').get())
|
|
35
|
+ whole = getNoneStr(tdNodeList[5].css('span::text').get())
|
|
36
|
+ status = getNoneStr(tdNodeList[9].css('::text').get())
|
33
|
37
|
|
34
|
38
|
ftResult = FTResult(
|
35
|
39
|
matchTime,
|
|
@@ -40,12 +44,23 @@ class FootballSpider(scrapy.Spider):
|
40
|
44
|
'', # 不需要单固
|
41
|
45
|
half,
|
42
|
46
|
whole,
|
43
|
|
- status
|
|
47
|
+ status,
|
|
48
|
+ ''
|
44
|
49
|
)
|
45
|
|
-
|
46
|
|
- logging.info("采集到数据 --> %s" % ftResult.toString())
|
47
|
50
|
|
48
|
|
- ftResult.persist()
|
|
51
|
+ matchLink = getNoneStr(tdNodeList[3].css('a').attrib.get('href'))
|
|
52
|
+ matchId = re.sub(r'^.*fb_match_info.php\?m=', '', matchLink)
|
|
53
|
+
|
|
54
|
+ # 加入到临时字典里
|
|
55
|
+ self._matchesMap[matchId] = ftResult
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+ if status == '已完成':
|
|
59
|
+ link = "https://i.sporttery.cn/api/fb_match_info/get_pool_rs?mid=" + matchId
|
|
60
|
+ yield scrapy.Request(link, self.parsePrice, 'GET')
|
|
61
|
+ else:
|
|
62
|
+ logging.info("采集到数据 --> %s" % ftResult.toString())
|
|
63
|
+ ftResult.persist()
|
49
|
64
|
|
50
|
65
|
|
51
|
66
|
# 是否存在下一页
|
|
@@ -74,3 +89,32 @@ class FootballSpider(scrapy.Spider):
|
74
|
89
|
if str is None: return ''
|
75
|
90
|
|
76
|
91
|
return re.sub(r'\(.*\)', '', str)
|
|
92
|
+
|
|
93
|
+ def parsePrice(self, response):
|
|
94
|
+ logging.info("采集数据源 ---> %s" % response.url)
|
|
95
|
+
|
|
96
|
+ data = response.json()
|
|
97
|
+ res = data.get('result').get('pool_rs')
|
|
98
|
+
|
|
99
|
+ playRes = []
|
|
100
|
+
|
|
101
|
+ # 胜平负
|
|
102
|
+ playRes.append(res['had']['prs_name'])
|
|
103
|
+ # 让球胜平负
|
|
104
|
+ playRes.append(res['hhad']['prs_name'])
|
|
105
|
+ # 比分
|
|
106
|
+ playRes.append(res['crs']['prs_name'])
|
|
107
|
+ # 总进球
|
|
108
|
+ playRes.append(res['ttg']['prs_name'])
|
|
109
|
+ # 半全场
|
|
110
|
+ playRes.append(res['hafu']['prs_name'])
|
|
111
|
+
|
|
112
|
+ matchId = re.sub(r'^.*get_pool_rs\?mid=', '', response.url)
|
|
113
|
+ ftResult = self._matchesMap[matchId]
|
|
114
|
+ ftResult.playRes = '|'.join(playRes)
|
|
115
|
+
|
|
116
|
+ #
|
|
117
|
+ logging.info("采集到数据 --> %s" % ftResult.toString())
|
|
118
|
+
|
|
119
|
+ # 入库
|
|
120
|
+ ftResult.persist()
|