张延森 4 gadus atpakaļ
vecāks
revīzija
679048eb92

Binārs
crawl/__pycache__/settings.cpython-38.pyc Parādīt failu


Binārs
crawl/comm/__pycache__/lottery.cpython-38.pyc Parādīt failu


+ 9
- 7
crawl/comm/lottery.py Parādīt failu

@@ -28,19 +28,21 @@ class LotteryResult:
28 28
     cursor = MyDB.getCursor()
29 29
     if cursor is None:
30 30
       return
31
+      
32
+    detailStr = ""
33
+    if self.details is not None:
34
+      detailStr = ";".join(map(lambda x: x.toString(), self.details))
31 35
 
32 36
     query = "select count(*) from ta_crawl_lottery_result where lottery_type = %s and issue_no = %s and status = 1"
33 37
     cursor.execute(query,[self.lotteryType, self.issueNo])
34 38
     res = cursor.fetchone()
35 39
     if res[0] > 0:
36
-      return
37
-
38
-    sql = "insert into ta_crawl_lottery_result(lottery_type, issue_no, opening_date, result, detail) values(%s, %s, %s, %s, %s)"
39
-    detailStr = ""
40
-    if self.details is not None:
41
-      detailStr = ";".join(map(lambda x: x.toString(), self.details))
40
+      sql = "update ta_crawl_lottery_result set opening_date = %s , result = %s , detail = %s where lottery_type = %s and issue_no = %s"
41
+      cursor.execute(sql, (self.openingDate, self.result, detailStr, self.lotteryType, self.issueNo))
42
+    else:
43
+      sql = "insert into ta_crawl_lottery_result(lottery_type, issue_no, opening_date, result, detail) values(%s, %s, %s, %s, %s)"
44
+      cursor.execute(sql, (self.lotteryType, self.issueNo, self.openingDate, self.result, detailStr))
42 45
 
43
-    cursor.execute(sql, (self.lotteryType, self.issueNo, self.openingDate, self.result, detailStr))
44 46
     MyDB.commit()
45 47
     cursor.close()
46 48
 

+ 1
- 1
crawl/run.py Parādīt failu

@@ -7,4 +7,4 @@ print(dirpath)
7 7
 # 添加环境变量
8 8
 sys.path.append(dirpath)
9 9
 # 启动爬虫,第三个参数为爬虫name
10
-execute(['scrapy','crawl','basketball'])
10
+execute(['scrapy','crawl','lottery'])

+ 1
- 1
crawl/settings.py Parādīt failu

@@ -90,7 +90,7 @@ EXTENSIONS = {
90 90
 DATABASE = {
91 91
   'host': 'rm-8vb8r44l60dc5ik05ao.mysql.zhangbei.rds.aliyuncs.com',
92 92
   'port': 3306,
93
-  'name': 'niucai',
93
+  'name': 'niucai2',
94 94
   'user': 'niucai',
95 95
   'password': 'sseTv!lSWgQFTZR3'
96 96
 }

Binārs
crawl/spiders/__pycache__/basketball.cpython-38.pyc Parādīt failu


Binārs
crawl/spiders/__pycache__/basketball_result.cpython-38.pyc Parādīt failu


Binārs
crawl/spiders/__pycache__/football.cpython-38.pyc Parādīt failu


Binārs
crawl/spiders/__pycache__/football_result.cpython-38.pyc Parādīt failu


Binārs
crawl/spiders/__pycache__/lottery.cpython-38.pyc Parādīt failu


+ 406
- 0
logs/lottery.log Parādīt failu

@@ -0,0 +1,406 @@
1
+2020-11-17 09:36:51 [scrapy.extensions.telnet] INFO: Telnet Password: bf575cc8f5dc5fb5
2
+2020-11-17 09:36:51 [scrapy.middleware] INFO: Enabled extensions:
3
+['scrapy.extensions.corestats.CoreStats',
4
+ 'scrapy.extensions.telnet.TelnetConsole',
5
+ 'scrapy.extensions.logstats.LogStats',
6
+ 'crawl.comm.mydb.MyDB']
7
+2020-11-17 09:36:52 [scrapy.middleware] INFO: Enabled downloader middlewares:
8
+['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
9
+ 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
10
+ 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
11
+ 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
12
+ 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
13
+ 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
14
+ 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
15
+ 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
16
+ 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
17
+ 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
18
+ 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
19
+ 'scrapy.downloadermiddlewares.stats.DownloaderStats']
20
+2020-11-17 09:36:52 [scrapy.middleware] INFO: Enabled spider middlewares:
21
+['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
22
+ 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
23
+ 'scrapy.spidermiddlewares.referer.RefererMiddleware',
24
+ 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
25
+ 'scrapy.spidermiddlewares.depth.DepthMiddleware']
26
+2020-11-17 09:36:52 [scrapy.middleware] INFO: Enabled item pipelines:
27
+[]
28
+2020-11-17 09:36:52 [scrapy.core.engine] INFO: Spider opened
29
+2020-11-17 09:36:52 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
30
+2020-11-17 09:36:52 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
31
+2020-11-17 09:36:52 [scrapy.robotstxt] WARNING: Failure while parsing robots.txt. File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.
32
+Traceback (most recent call last):
33
+  File "D:\Application\python-3.8.5\Lib\site-packages\twisted\internet\defer.py", line 1418, in _inlineCallbacks
34
+    result = g.send(result)
35
+StopIteration: <404 https://api.xinti.com/robots.txt>
36
+
37
+During handling of the above exception, another exception occurred:
38
+
39
+Traceback (most recent call last):
40
+  File "D:\Application\python-3.8.5\Lib\site-packages\scrapy\robotstxt.py", line 16, in decode_robotstxt
41
+    robotstxt_body = robotstxt_body.decode('utf-8')
42
+UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 248: invalid continuation byte
43
+2020-11-17 09:37:00 [scrapy.core.engine] INFO: Closing spider (finished)
44
+2020-11-17 09:37:00 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
45
+{'downloader/request_bytes': 1739,
46
+ 'downloader/request_count': 6,
47
+ 'downloader/request_method_count/GET': 5,
48
+ 'downloader/request_method_count/POST': 1,
49
+ 'downloader/response_bytes': 10910,
50
+ 'downloader/response_count': 6,
51
+ 'downloader/response_status_count/200': 4,
52
+ 'downloader/response_status_count/404': 2,
53
+ 'elapsed_time_seconds': 8.002777,
54
+ 'finish_reason': 'finished',
55
+ 'finish_time': datetime.datetime(2020, 11, 17, 1, 37, 0, 186544),
56
+ 'log_count/INFO': 10,
57
+ 'log_count/WARNING': 1,
58
+ 'response_received_count': 6,
59
+ 'robotstxt/request_count': 2,
60
+ 'robotstxt/response_count': 2,
61
+ 'robotstxt/response_status_count/404': 2,
62
+ 'scheduler/dequeued': 4,
63
+ 'scheduler/dequeued/memory': 4,
64
+ 'scheduler/enqueued': 4,
65
+ 'scheduler/enqueued/memory': 4,
66
+ 'start_time': datetime.datetime(2020, 11, 17, 1, 36, 52, 183767)}
67
+2020-11-17 09:37:00 [scrapy.core.engine] INFO: Spider closed (finished)
68
+2020-11-17 09:38:07 [scrapy.extensions.telnet] INFO: Telnet Password: 295aef036b8b8ad9
69
+2020-11-17 09:38:08 [scrapy.middleware] INFO: Enabled extensions:
70
+['scrapy.extensions.corestats.CoreStats',
71
+ 'scrapy.extensions.telnet.TelnetConsole',
72
+ 'scrapy.extensions.logstats.LogStats',
73
+ 'crawl.comm.mydb.MyDB']
74
+2020-11-17 09:38:08 [scrapy.middleware] INFO: Enabled downloader middlewares:
75
+['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
76
+ 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
77
+ 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
78
+ 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
79
+ 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
80
+ 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
81
+ 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
82
+ 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
83
+ 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
84
+ 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
85
+ 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
86
+ 'scrapy.downloadermiddlewares.stats.DownloaderStats']
87
+2020-11-17 09:38:08 [scrapy.middleware] INFO: Enabled spider middlewares:
88
+['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
89
+ 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
90
+ 'scrapy.spidermiddlewares.referer.RefererMiddleware',
91
+ 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
92
+ 'scrapy.spidermiddlewares.depth.DepthMiddleware']
93
+2020-11-17 09:38:08 [scrapy.middleware] INFO: Enabled item pipelines:
94
+[]
95
+2020-11-17 09:38:08 [scrapy.core.engine] INFO: Spider opened
96
+2020-11-17 09:38:08 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
97
+2020-11-17 09:38:08 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
98
+2020-11-17 09:38:08 [scrapy.robotstxt] WARNING: Failure while parsing robots.txt. File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.
99
+Traceback (most recent call last):
100
+  File "D:\Application\python-3.8.5\Lib\site-packages\twisted\internet\defer.py", line 1418, in _inlineCallbacks
101
+    result = g.send(result)
102
+StopIteration: <404 https://api.xinti.com/robots.txt>
103
+
104
+During handling of the above exception, another exception occurred:
105
+
106
+Traceback (most recent call last):
107
+  File "D:\Application\python-3.8.5\Lib\site-packages\scrapy\robotstxt.py", line 16, in decode_robotstxt
108
+    robotstxt_body = robotstxt_body.decode('utf-8')
109
+UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 248: invalid continuation byte
110
+2020-11-17 09:40:17 [scrapy.extensions.logstats] INFO: Crawled 6 pages (at 6 pages/min), scraped 0 items (at 0 items/min)
111
+2020-11-17 09:40:17 [scrapy.core.engine] INFO: Closing spider (finished)
112
+2020-11-17 09:40:17 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
113
+{'downloader/request_bytes': 1739,
114
+ 'downloader/request_count': 6,
115
+ 'downloader/request_method_count/GET': 5,
116
+ 'downloader/request_method_count/POST': 1,
117
+ 'downloader/response_bytes': 10848,
118
+ 'downloader/response_count': 6,
119
+ 'downloader/response_status_count/200': 4,
120
+ 'downloader/response_status_count/404': 2,
121
+ 'elapsed_time_seconds': 129.262888,
122
+ 'finish_reason': 'finished',
123
+ 'finish_time': datetime.datetime(2020, 11, 17, 1, 40, 17, 488510),
124
+ 'log_count/INFO': 11,
125
+ 'log_count/WARNING': 1,
126
+ 'response_received_count': 6,
127
+ 'robotstxt/request_count': 2,
128
+ 'robotstxt/response_count': 2,
129
+ 'robotstxt/response_status_count/404': 2,
130
+ 'scheduler/dequeued': 4,
131
+ 'scheduler/dequeued/memory': 4,
132
+ 'scheduler/enqueued': 4,
133
+ 'scheduler/enqueued/memory': 4,
134
+ 'start_time': datetime.datetime(2020, 11, 17, 1, 38, 8, 225622)}
135
+2020-11-17 09:40:17 [scrapy.core.engine] INFO: Spider closed (finished)
136
+2020-11-17 09:43:18 [scrapy.extensions.telnet] INFO: Telnet Password: ce022468cacb8212
137
+2020-11-17 09:43:19 [scrapy.middleware] INFO: Enabled extensions:
138
+['scrapy.extensions.corestats.CoreStats',
139
+ 'scrapy.extensions.telnet.TelnetConsole',
140
+ 'scrapy.extensions.logstats.LogStats',
141
+ 'crawl.comm.mydb.MyDB']
142
+2020-11-17 09:43:19 [scrapy.middleware] INFO: Enabled downloader middlewares:
143
+['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
144
+ 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
145
+ 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
146
+ 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
147
+ 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
148
+ 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
149
+ 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
150
+ 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
151
+ 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
152
+ 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
153
+ 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
154
+ 'scrapy.downloadermiddlewares.stats.DownloaderStats']
155
+2020-11-17 09:43:19 [scrapy.middleware] INFO: Enabled spider middlewares:
156
+['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
157
+ 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
158
+ 'scrapy.spidermiddlewares.referer.RefererMiddleware',
159
+ 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
160
+ 'scrapy.spidermiddlewares.depth.DepthMiddleware']
161
+2020-11-17 09:43:19 [scrapy.middleware] INFO: Enabled item pipelines:
162
+[]
163
+2020-11-17 09:43:19 [scrapy.core.engine] INFO: Spider opened
164
+2020-11-17 09:43:19 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
165
+2020-11-17 09:43:19 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
166
+2020-11-17 09:43:19 [scrapy.robotstxt] WARNING: Failure while parsing robots.txt. File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.
167
+Traceback (most recent call last):
168
+  File "D:\Application\python-3.8.5\Lib\site-packages\twisted\internet\defer.py", line 1418, in _inlineCallbacks
169
+    result = g.send(result)
170
+StopIteration: <404 https://api.xinti.com/robots.txt>
171
+
172
+During handling of the above exception, another exception occurred:
173
+
174
+Traceback (most recent call last):
175
+  File "D:\Application\python-3.8.5\Lib\site-packages\scrapy\robotstxt.py", line 16, in decode_robotstxt
176
+    robotstxt_body = robotstxt_body.decode('utf-8')
177
+UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 248: invalid continuation byte
178
+2020-11-17 09:44:27 [scrapy.extensions.logstats] INFO: Crawled 6 pages (at 6 pages/min), scraped 0 items (at 0 items/min)
179
+2020-11-17 09:44:28 [scrapy.core.engine] INFO: Closing spider (finished)
180
+2020-11-17 09:44:28 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
181
+{'downloader/request_bytes': 1739,
182
+ 'downloader/request_count': 6,
183
+ 'downloader/request_method_count/GET': 5,
184
+ 'downloader/request_method_count/POST': 1,
185
+ 'downloader/response_bytes': 10881,
186
+ 'downloader/response_count': 6,
187
+ 'downloader/response_status_count/200': 4,
188
+ 'downloader/response_status_count/404': 2,
189
+ 'elapsed_time_seconds': 68.571785,
190
+ 'finish_reason': 'finished',
191
+ 'finish_time': datetime.datetime(2020, 11, 17, 1, 44, 28, 6481),
192
+ 'log_count/INFO': 11,
193
+ 'log_count/WARNING': 1,
194
+ 'response_received_count': 6,
195
+ 'robotstxt/request_count': 2,
196
+ 'robotstxt/response_count': 2,
197
+ 'robotstxt/response_status_count/404': 2,
198
+ 'scheduler/dequeued': 4,
199
+ 'scheduler/dequeued/memory': 4,
200
+ 'scheduler/enqueued': 4,
201
+ 'scheduler/enqueued/memory': 4,
202
+ 'start_time': datetime.datetime(2020, 11, 17, 1, 43, 19, 434696)}
203
+2020-11-17 09:44:28 [scrapy.core.engine] INFO: Spider closed (finished)
204
+2020-11-17 09:53:03 [scrapy.extensions.telnet] INFO: Telnet Password: 3dde26547467246c
205
+2020-11-17 09:53:03 [scrapy.middleware] INFO: Enabled extensions:
206
+['scrapy.extensions.corestats.CoreStats',
207
+ 'scrapy.extensions.telnet.TelnetConsole',
208
+ 'scrapy.extensions.logstats.LogStats',
209
+ 'crawl.comm.mydb.MyDB']
210
+2020-11-17 09:53:03 [scrapy.middleware] INFO: Enabled downloader middlewares:
211
+['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
212
+ 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
213
+ 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
214
+ 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
215
+ 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
216
+ 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
217
+ 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
218
+ 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
219
+ 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
220
+ 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
221
+ 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
222
+ 'scrapy.downloadermiddlewares.stats.DownloaderStats']
223
+2020-11-17 09:53:03 [scrapy.middleware] INFO: Enabled spider middlewares:
224
+['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
225
+ 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
226
+ 'scrapy.spidermiddlewares.referer.RefererMiddleware',
227
+ 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
228
+ 'scrapy.spidermiddlewares.depth.DepthMiddleware']
229
+2020-11-17 09:53:03 [scrapy.middleware] INFO: Enabled item pipelines:
230
+[]
231
+2020-11-17 09:53:03 [scrapy.core.engine] INFO: Spider opened
232
+2020-11-17 09:53:03 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
233
+2020-11-17 09:53:03 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
234
+2020-11-17 09:53:04 [scrapy.robotstxt] WARNING: Failure while parsing robots.txt. File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.
235
+Traceback (most recent call last):
236
+  File "D:\Application\python-3.8.5\Lib\site-packages\twisted\internet\defer.py", line 1418, in _inlineCallbacks
237
+    result = g.send(result)
238
+StopIteration: <404 https://api.xinti.com/robots.txt>
239
+
240
+During handling of the above exception, another exception occurred:
241
+
242
+Traceback (most recent call last):
243
+  File "D:\Application\python-3.8.5\Lib\site-packages\scrapy\robotstxt.py", line 16, in decode_robotstxt
244
+    robotstxt_body = robotstxt_body.decode('utf-8')
245
+UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 248: invalid continuation byte
246
+2020-11-17 09:53:28 [scrapy.core.engine] INFO: Closing spider (finished)
247
+2020-11-17 09:53:28 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
248
+{'downloader/request_bytes': 1739,
249
+ 'downloader/request_count': 6,
250
+ 'downloader/request_method_count/GET': 5,
251
+ 'downloader/request_method_count/POST': 1,
252
+ 'downloader/response_bytes': 10851,
253
+ 'downloader/response_count': 6,
254
+ 'downloader/response_status_count/200': 4,
255
+ 'downloader/response_status_count/404': 2,
256
+ 'elapsed_time_seconds': 24.955046,
257
+ 'finish_reason': 'finished',
258
+ 'finish_time': datetime.datetime(2020, 11, 17, 1, 53, 28, 832941),
259
+ 'log_count/INFO': 10,
260
+ 'log_count/WARNING': 1,
261
+ 'response_received_count': 6,
262
+ 'robotstxt/request_count': 2,
263
+ 'robotstxt/response_count': 2,
264
+ 'robotstxt/response_status_count/404': 2,
265
+ 'scheduler/dequeued': 4,
266
+ 'scheduler/dequeued/memory': 4,
267
+ 'scheduler/enqueued': 4,
268
+ 'scheduler/enqueued/memory': 4,
269
+ 'start_time': datetime.datetime(2020, 11, 17, 1, 53, 3, 877895)}
270
+2020-11-17 09:53:28 [scrapy.core.engine] INFO: Spider closed (finished)
271
+2020-11-17 09:53:51 [scrapy.extensions.telnet] INFO: Telnet Password: 366dfc97a237cf7c
272
+2020-11-17 09:53:51 [scrapy.middleware] INFO: Enabled extensions:
273
+['scrapy.extensions.corestats.CoreStats',
274
+ 'scrapy.extensions.telnet.TelnetConsole',
275
+ 'scrapy.extensions.logstats.LogStats',
276
+ 'crawl.comm.mydb.MyDB']
277
+2020-11-17 09:53:51 [scrapy.middleware] INFO: Enabled downloader middlewares:
278
+['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
279
+ 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
280
+ 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
281
+ 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
282
+ 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
283
+ 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
284
+ 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
285
+ 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
286
+ 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
287
+ 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
288
+ 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
289
+ 'scrapy.downloadermiddlewares.stats.DownloaderStats']
290
+2020-11-17 09:53:51 [scrapy.middleware] INFO: Enabled spider middlewares:
291
+['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
292
+ 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
293
+ 'scrapy.spidermiddlewares.referer.RefererMiddleware',
294
+ 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
295
+ 'scrapy.spidermiddlewares.depth.DepthMiddleware']
296
+2020-11-17 09:53:51 [scrapy.middleware] INFO: Enabled item pipelines:
297
+[]
298
+2020-11-17 09:53:51 [scrapy.core.engine] INFO: Spider opened
299
+2020-11-17 09:53:52 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
300
+2020-11-17 09:53:52 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
301
+2020-11-17 09:53:52 [scrapy.robotstxt] WARNING: Failure while parsing robots.txt. File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.
302
+Traceback (most recent call last):
303
+  File "D:\Application\python-3.8.5\Lib\site-packages\twisted\internet\defer.py", line 1418, in _inlineCallbacks
304
+    result = g.send(result)
305
+StopIteration: <404 https://api.xinti.com/robots.txt>
306
+
307
+During handling of the above exception, another exception occurred:
308
+
309
+Traceback (most recent call last):
310
+  File "D:\Application\python-3.8.5\Lib\site-packages\scrapy\robotstxt.py", line 16, in decode_robotstxt
311
+    robotstxt_body = robotstxt_body.decode('utf-8')
312
+UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 248: invalid continuation byte
313
+2020-11-17 09:54:53 [scrapy.extensions.logstats] INFO: Crawled 6 pages (at 6 pages/min), scraped 0 items (at 0 items/min)
314
+2020-11-17 09:54:56 [scrapy.core.engine] INFO: Closing spider (finished)
315
+2020-11-17 09:54:56 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
316
+{'downloader/request_bytes': 1739,
317
+ 'downloader/request_count': 6,
318
+ 'downloader/request_method_count/GET': 5,
319
+ 'downloader/request_method_count/POST': 1,
320
+ 'downloader/response_bytes': 10843,
321
+ 'downloader/response_count': 6,
322
+ 'downloader/response_status_count/200': 4,
323
+ 'downloader/response_status_count/404': 2,
324
+ 'elapsed_time_seconds': 64.547145,
325
+ 'finish_reason': 'finished',
326
+ 'finish_time': datetime.datetime(2020, 11, 17, 1, 54, 56, 548924),
327
+ 'log_count/INFO': 11,
328
+ 'log_count/WARNING': 1,
329
+ 'response_received_count': 6,
330
+ 'robotstxt/request_count': 2,
331
+ 'robotstxt/response_count': 2,
332
+ 'robotstxt/response_status_count/404': 2,
333
+ 'scheduler/dequeued': 4,
334
+ 'scheduler/dequeued/memory': 4,
335
+ 'scheduler/enqueued': 4,
336
+ 'scheduler/enqueued/memory': 4,
337
+ 'start_time': datetime.datetime(2020, 11, 17, 1, 53, 52, 1779)}
338
+2020-11-17 09:54:56 [scrapy.core.engine] INFO: Spider closed (finished)
339
+2020-11-17 10:50:15 [scrapy.extensions.telnet] INFO: Telnet Password: 2b6b900fab93768f
340
+2020-11-17 10:50:15 [scrapy.middleware] INFO: Enabled extensions:
341
+['scrapy.extensions.corestats.CoreStats',
342
+ 'scrapy.extensions.telnet.TelnetConsole',
343
+ 'scrapy.extensions.logstats.LogStats',
344
+ 'crawl.comm.mydb.MyDB']
345
+2020-11-17 10:50:15 [scrapy.middleware] INFO: Enabled downloader middlewares:
346
+['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
347
+ 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
348
+ 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
349
+ 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
350
+ 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
351
+ 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
352
+ 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
353
+ 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
354
+ 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
355
+ 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
356
+ 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
357
+ 'scrapy.downloadermiddlewares.stats.DownloaderStats']
358
+2020-11-17 10:50:15 [scrapy.middleware] INFO: Enabled spider middlewares:
359
+['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
360
+ 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
361
+ 'scrapy.spidermiddlewares.referer.RefererMiddleware',
362
+ 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
363
+ 'scrapy.spidermiddlewares.depth.DepthMiddleware']
364
+2020-11-17 10:50:15 [scrapy.middleware] INFO: Enabled item pipelines:
365
+[]
366
+2020-11-17 10:50:15 [scrapy.core.engine] INFO: Spider opened
367
+2020-11-17 10:50:15 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
368
+2020-11-17 10:50:15 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
369
+2020-11-17 10:50:16 [scrapy.robotstxt] WARNING: Failure while parsing robots.txt. File either contains garbage or is in an encoding other than UTF-8, treating it as an empty file.
370
+Traceback (most recent call last):
371
+  File "D:\Application\python-3.8.5\Lib\site-packages\twisted\internet\defer.py", line 1418, in _inlineCallbacks
372
+    result = g.send(result)
373
+StopIteration: <404 https://api.xinti.com/robots.txt>
374
+
375
+During handling of the above exception, another exception occurred:
376
+
377
+Traceback (most recent call last):
378
+  File "D:\Application\python-3.8.5\Lib\site-packages\scrapy\robotstxt.py", line 16, in decode_robotstxt
379
+    robotstxt_body = robotstxt_body.decode('utf-8')
380
+UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 248: invalid continuation byte
381
+2020-11-17 10:51:39 [scrapy.extensions.logstats] INFO: Crawled 6 pages (at 6 pages/min), scraped 0 items (at 0 items/min)
382
+2020-11-17 10:51:39 [scrapy.core.engine] INFO: Closing spider (finished)
383
+2020-11-17 10:51:39 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
384
+{'downloader/request_bytes': 1739,
385
+ 'downloader/request_count': 6,
386
+ 'downloader/request_method_count/GET': 5,
387
+ 'downloader/request_method_count/POST': 1,
388
+ 'downloader/response_bytes': 10860,
389
+ 'downloader/response_count': 6,
390
+ 'downloader/response_status_count/200': 4,
391
+ 'downloader/response_status_count/404': 2,
392
+ 'elapsed_time_seconds': 84.03754,
393
+ 'finish_reason': 'finished',
394
+ 'finish_time': datetime.datetime(2020, 11, 17, 2, 51, 39, 782671),
395
+ 'log_count/INFO': 11,
396
+ 'log_count/WARNING': 1,
397
+ 'response_received_count': 6,
398
+ 'robotstxt/request_count': 2,
399
+ 'robotstxt/response_count': 2,
400
+ 'robotstxt/response_status_count/404': 2,
401
+ 'scheduler/dequeued': 4,
402
+ 'scheduler/dequeued/memory': 4,
403
+ 'scheduler/enqueued': 4,
404
+ 'scheduler/enqueued/memory': 4,
405
+ 'start_time': datetime.datetime(2020, 11, 17, 2, 50, 15, 745131)}
406
+2020-11-17 10:51:39 [scrapy.core.engine] INFO: Spider closed (finished)

+ 60
- 0
logs/main.log Parādīt failu

@@ -0,0 +1,60 @@
1
+2020-11-17 09:36:51 [scrapy.utils.log] INFO: Scrapy 2.3.0 started (bot: crawl)
2
+2020-11-17 09:36:51 [scrapy.utils.log] INFO: Versions: lxml 4.5.2.0, libxml2 2.9.5, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.8.5 (tags/v3.8.5:580fbb0, Jul 20 2020, 15:57:54) [MSC v.1924 64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1g  21 Apr 2020), cryptography 3.0, Platform Windows-10-10.0.18362-SP0
3
+2020-11-17 09:36:51 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
4
+2020-11-17 09:36:51 [scrapy.crawler] INFO: Overridden settings:
5
+{'BOT_NAME': 'crawl',
6
+ 'LOG_FILE': 'logs/lottery.log',
7
+ 'LOG_LEVEL': 20,
8
+ 'NEWSPIDER_MODULE': 'crawl.spiders',
9
+ 'ROBOTSTXT_OBEY': True,
10
+ 'SPIDER_MODULES': ['crawl.spiders']}
11
+2020-11-17 09:38:07 [scrapy.utils.log] INFO: Scrapy 2.3.0 started (bot: crawl)
12
+2020-11-17 09:38:07 [scrapy.utils.log] INFO: Versions: lxml 4.5.2.0, libxml2 2.9.5, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.8.5 (tags/v3.8.5:580fbb0, Jul 20 2020, 15:57:54) [MSC v.1924 64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1g  21 Apr 2020), cryptography 3.0, Platform Windows-10-10.0.18362-SP0
13
+2020-11-17 09:38:07 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
14
+2020-11-17 09:38:07 [scrapy.crawler] INFO: Overridden settings:
15
+{'BOT_NAME': 'crawl',
16
+ 'LOG_FILE': 'logs/lottery.log',
17
+ 'LOG_LEVEL': 20,
18
+ 'NEWSPIDER_MODULE': 'crawl.spiders',
19
+ 'ROBOTSTXT_OBEY': True,
20
+ 'SPIDER_MODULES': ['crawl.spiders']}
21
+2020-11-17 09:43:18 [scrapy.utils.log] INFO: Scrapy 2.3.0 started (bot: crawl)
22
+2020-11-17 09:43:18 [scrapy.utils.log] INFO: Versions: lxml 4.5.2.0, libxml2 2.9.5, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.8.5 (tags/v3.8.5:580fbb0, Jul 20 2020, 15:57:54) [MSC v.1924 64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1g  21 Apr 2020), cryptography 3.0, Platform Windows-10-10.0.18362-SP0
23
+2020-11-17 09:43:18 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
24
+2020-11-17 09:43:18 [scrapy.crawler] INFO: Overridden settings:
25
+{'BOT_NAME': 'crawl',
26
+ 'LOG_FILE': 'logs/lottery.log',
27
+ 'LOG_LEVEL': 20,
28
+ 'NEWSPIDER_MODULE': 'crawl.spiders',
29
+ 'ROBOTSTXT_OBEY': True,
30
+ 'SPIDER_MODULES': ['crawl.spiders']}
31
+2020-11-17 09:53:03 [scrapy.utils.log] INFO: Scrapy 2.3.0 started (bot: crawl)
32
+2020-11-17 09:53:03 [scrapy.utils.log] INFO: Versions: lxml 4.5.2.0, libxml2 2.9.5, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.8.5 (tags/v3.8.5:580fbb0, Jul 20 2020, 15:57:54) [MSC v.1924 64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1g  21 Apr 2020), cryptography 3.0, Platform Windows-10-10.0.18362-SP0
33
+2020-11-17 09:53:03 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
34
+2020-11-17 09:53:03 [scrapy.crawler] INFO: Overridden settings:
35
+{'BOT_NAME': 'crawl',
36
+ 'LOG_FILE': 'logs/lottery.log',
37
+ 'LOG_LEVEL': 20,
38
+ 'NEWSPIDER_MODULE': 'crawl.spiders',
39
+ 'ROBOTSTXT_OBEY': True,
40
+ 'SPIDER_MODULES': ['crawl.spiders']}
41
+2020-11-17 09:53:51 [scrapy.utils.log] INFO: Scrapy 2.3.0 started (bot: crawl)
42
+2020-11-17 09:53:51 [scrapy.utils.log] INFO: Versions: lxml 4.5.2.0, libxml2 2.9.5, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.8.5 (tags/v3.8.5:580fbb0, Jul 20 2020, 15:57:54) [MSC v.1924 64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1g  21 Apr 2020), cryptography 3.0, Platform Windows-10-10.0.18362-SP0
43
+2020-11-17 09:53:51 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
44
+2020-11-17 09:53:51 [scrapy.crawler] INFO: Overridden settings:
45
+{'BOT_NAME': 'crawl',
46
+ 'LOG_FILE': 'logs/lottery.log',
47
+ 'LOG_LEVEL': 20,
48
+ 'NEWSPIDER_MODULE': 'crawl.spiders',
49
+ 'ROBOTSTXT_OBEY': True,
50
+ 'SPIDER_MODULES': ['crawl.spiders']}
51
+2020-11-17 10:50:15 [scrapy.utils.log] INFO: Scrapy 2.3.0 started (bot: crawl)
52
+2020-11-17 10:50:15 [scrapy.utils.log] INFO: Versions: lxml 4.5.2.0, libxml2 2.9.5, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.8.5 (tags/v3.8.5:580fbb0, Jul 20 2020, 15:57:54) [MSC v.1924 64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1g  21 Apr 2020), cryptography 3.0, Platform Windows-10-10.0.18362-SP0
53
+2020-11-17 10:50:15 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
54
+2020-11-17 10:50:15 [scrapy.crawler] INFO: Overridden settings:
55
+{'BOT_NAME': 'crawl',
56
+ 'LOG_FILE': 'logs/lottery.log',
57
+ 'LOG_LEVEL': 20,
58
+ 'NEWSPIDER_MODULE': 'crawl.spiders',
59
+ 'ROBOTSTXT_OBEY': True,
60
+ 'SPIDER_MODULES': ['crawl.spiders']}