{"id":20959,"date":"2023-06-03T21:57:36","date_gmt":"2023-06-03T12:57:36","guid":{"rendered":"http:\/\/www.code-magagine.com\/?p=20959"},"modified":"2023-06-07T21:10:19","modified_gmt":"2023-06-07T12:10:19","slug":"%e3%80%90python%e3%80%91scrapy%e3%81%ae%e3%82%b3%e3%83%9e%e3%83%b3%e3%83%89","status":"publish","type":"post","link":"http:\/\/www.code-magagine.com\/?p=20959","title":{"rendered":"\u3010Python\u3011Scrapy\u306e\u30b3\u30de\u30f3\u30c9\u3001\u958b\u767a\u306e\u6d41\u308c"},"content":{"rendered":"<h2>\u57fa\u672c\u60c5\u5831\u306e\u78ba\u8a8d<\/h2>\n<pre class=\"lang:default decode:true \">scrapy<\/pre>\n<p>\u30d0\u30fc\u30b8\u30e7\u30f3\u3001\u30a2\u30af\u30c6\u30a3\u30d6\u306a\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3001\u5229\u7528\u3067\u304d\u308b\u30b3\u30de\u30f3\u30c9\u306e\u60c5\u5831\u3092\u8868\u793a\u3067\u304d\u307e\u3059\u3002<\/p>\n<h2>scrapy\u30b3\u30de\u30f3\u30c9\u4e00\u89a7<\/h2>\n<table>\n<tbody>\n<tr>\n<th>\u30b3\u30de\u30f3\u30c9<\/th>\n<th>\u5185\u5bb9<\/th>\n<\/tr>\n<tr>\n<td>bench<\/td>\n<td>\u7c21\u5358\u306a\u30d9\u30f3\u30c1\u30de\u30fc\u30af\u30c6\u30b9\u30c8\u3092\u5b9f\u884c<\/td>\n<\/tr>\n<tr>\n<td>startproject<\/td>\n<td>\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3092\u4f5c\u6210\u3002Scrapy\u306f\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3068\u3044\u3046\u5358\u4f4d\u3067\u4e00\u3064\u306e\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3092\u69cb\u6210\u3057\u307e\u3059\u3002\u305d\u306e\u4e2d\u306bSpider\u3068\u3044\u3046Web\u30b5\u30a4\u30c8\u304b\u3089\u60c5\u5831\u3092\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3059\u308b\u305f\u3081\u306eclass\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002\u4e00\u3064\u306e\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u306e\u4e2d\u306b\u306f\u8907\u6570spider\u3092\u4f5c\u6210\u53ef\u80fd\u3067\u3059\u3002<\/td>\n<\/tr>\n<tr>\n<td>genspider<\/td>\n<td>\u3042\u3089\u304b\u3058\u3081\u7528\u610f\u3055\u308c\u305f\u30c6\u30f3\u30d7\u30ec\u30fc\u30c8\u3092\u3082\u3068\u306b\u65b0\u3057\u3044\u30b9\u30d1\u30a4\u30c0\u30fc\u3092\u4f5c\u6210<\/td>\n<\/tr>\n<tr>\n<td>runspider<\/td>\n<td>\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3092\u4f5c\u6210\u305b\u305a\u306b\u3001\u30b9\u30d1\u30a4\u30c0\u30fc\u3092\u5b9f\u884c\u3059\u308b\u3002\u901a\u5e38\u306f\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3092\u4f5c\u6210\u3057\u3066\u4f5c\u308b\u306e\u3067\u6ec5\u591a\u306b\u5229\u7528\u3057\u306a\u3044\u3067\u3059\u3002\u4f55\u304b\u8a66\u3057\u305f\u3044\u4f7f\u3044\u6368\u3066\u306e\u30b9\u30d1\u30a4\u30c0\u30fc\u3092\u4f5c\u308b\u5834\u5408\u306a\u3069\u306f\u6d3b\u7528\u3067\u304d\u308b\u304b\u3082\u3057\u308c\u307e\u305b\u3093\u3002<\/td>\n<\/tr>\n<tr>\n<td>crawl<\/td>\n<td>\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u5185\u3067\u30b9\u30d1\u30a4\u30c0\u30fc\u3092\u5b9f\u884c\u3059\u308b\u3002<\/td>\n<\/tr>\n<tr>\n<td>shell<\/td>\n<td>\u5bfe\u8a71\u578b\u30b7\u30a7\u30eb\u3092\u8d77\u52d5\u3059\u308b\u3002XPath\u3084CSS\u3067\u30c7\u30fc\u30bf\u306e\u53d6\u5f97\u306a\u3069\u3092\u8a66\u3059\u3053\u3068\u304c\u53ef\u80fd\u3002<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h3>\u57fa\u672c\u7684\u306a\u30b3\u30de\u30f3\u30c9\u5229\u7528\u306e\u6d41\u308c<\/h3>\n<h4>\u30c7\u30fc\u30bf\u306e\u53d6\u5f97\u65b9\u6cd5\u306e\u691c\u8a0e<\/h4>\n<p>XPath\u3084CSS\u30bb\u30ec\u30af\u30bf\u306a\u3069\u3092\u4f7f\u3044\u307e\u3059\u3002\u30c7\u30fc\u30bf\u306e\u53d6\u5f97\u65b9\u6cd5\u306a\u3069\u306fshell\u306b\u3088\u3063\u3066\u9069\u5b9c\u78ba\u8a8d\u3057\u3066\u78ba\u8a8d\u304c\u7d42\u308f\u3063\u305f\u3089spider\u306b\u53cd\u6620\u3059\u308b\u307f\u305f\u3044\u306a\u5b9f\u88c5\u30a4\u30e1\u30fc\u30b8\u3067\u3059\u3002\u306a\u304a\u3001\u6163\u308c\u3066\u304d\u305f\u3089shell\u3067\u78ba\u8a8d\u3057\u306a\u304f\u3066\u3082\u52d5\u304d\u304c\u4e88\u6e2c\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308b\u306e\u3067shell\u3092\u4f7f\u3046\u306e\u306f\u6700\u521d\u306e\u3046\u3061\u3060\u3051\u304b\u3082\u3057\u308c\u307e\u305b\u3093\u3002<\/p>\n<h3>\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u306e\u4f5c\u6210<\/h3>\n<p>startproject\u3067\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3092\u4f5c\u6210\u3057\u3001genspider\u3067\u30b9\u30d1\u30a4\u30c0\u30fc\u3092\u4f5c\u6210\u3057\u3001crawl\u30b3\u30de\u30f3\u30c9\u3067\u30b9\u30d1\u30a4\u30c0\u30fc\u3092\u5b9f\u884c\u3059\u308b\u3068\u3044\u3046\u3088\u3046\u306a\u6d41\u308c\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">scrapy startproject \u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u540d[\u6307\u5b9a\u306a\u3057\u306e\u5834\u5408\u306f\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u540d]<\/pre>\n<h4>\u81ea\u52d5\u7684\u306b\u4f5c\u6210\u3055\u308c\u308b\u30d5\u30a1\u30a4\u30eb\u4e00\u89a7<\/h4>\n<table>\n<tbody>\n<tr>\n<th>\u30d5\u30a1\u30a4\u30eb\/\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u540d<\/th>\n<th>\u8aac\u660e<\/th>\n<\/tr>\n<tr>\n<td>scrapy.cfg<\/td>\n<td>Spider\u306e\u4f5c\u6210\u3084\u8a2d\u5b9a\u306b\u5fc5\u8981\u306a\u30d5\u30a1\u30a4\u30eb<\/td>\n<\/tr>\n<tr>\n<td>\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u540d\u30d5\u30a9\u30eb\u30c0<\/td>\n<td><\/td>\n<\/tr>\n<tr>\n<td>\u2514spiders<\/td>\n<td>spiders\u3092\u4f5c\u6210\u3057\u3066\u3044\u304f\u30c7\u30a3\u30ec\u30af\u30c8\u30ea<\/td>\n<\/tr>\n<tr>\n<td>\u3000\u2514__init__.py<\/td>\n<td><\/td>\n<\/tr>\n<tr>\n<td>\u2514__init__.py<\/td>\n<td><\/td>\n<\/tr>\n<tr>\n<td>\u2514items.py<\/td>\n<td>\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3067\u53d6\u5f97\u3057\u305f\u30c7\u30fc\u30bf\u3092\u683c\u7d0d\u3059\u308b\u5165\u308c\u7269\u30af\u30e9\u30b9<\/td>\n<\/tr>\n<tr>\n<td>\u2514middlewares.py<\/td>\n<td>\n<div>\n<div>\n<div>XXXSpiderMiddleware(<span style=\"font-family: inherit; font-size: inherit;\">\u30ea\u30af\u30a8\u30b9\u30c8\u3068\u30ec\u30b9\u30dd\u30f3\u30b9\u306b\u95a2\u3059\u308b\u8ffd\u52a0\u51e6\u7406\u3092\u5b9f\u88c5\u3057\u305f\u3044\u5834\u5408\u306b\u8a18\u8ff0\u3059\u308b)\u3001xxx<\/span><span style=\"font-family: inherit; font-size: inherit;\">DownloaderMiddleware(Web\u30b5\u30a4\u30c8\u304b\u3089\u30da\u30fc\u30b8\u306e\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u51e6\u7406\u3092\u62e1\u5f35\u3057\u305f\u3044\u5834\u5408\u306b\u8a18\u8ff0\u3059\u308b)<\/span><\/div>\n<\/div>\n<\/div>\n<\/td>\n<\/tr>\n<tr>\n<td>\u2514pipelines.py<\/td>\n<td>Web\u30b5\u30a4\u30c8\u304b\u3089\u53d6\u5f97\u3057\u305f\u30c7\u30fc\u30bf\u306e\u30af\u30ec\u30f3\u30b8\u30f3\u30b0\u3001DB\u306a\u3069\u306e\u66f4\u65b0\u51e6\u7406\u3092\u8a18\u8ff0\u3057\u307e\u3059\u3002<\/td>\n<\/tr>\n<tr>\n<td>\u2514settings.py<\/td>\n<td>\u30d1\u30e9\u30e1\u30fc\u30bf\u3067\u5404\u7a2e\u8a2d\u5b9a\u3092\u884c\u3046\u8a2d\u5b9a\u30d5\u30a1\u30a4\u30eb\u3067\u3059\u3002<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h3>bench<\/h3>\n<p>\u30de\u30b7\u30f3\u30b9\u30da\u30c3\u30af\u306b\u4f9d\u5b58\u3057\u307e\u3059\u304c\u3001\u5b9f\u884c\u3059\u308b\u3068\u30ea\u30af\u30a8\u30b9\u30c8\u3084\u30ec\u30b9\u30dd\u30f3\u30b9\u306e\u4ef6\u6570\u304c\u8868\u793a\u3055\u308c\u307e\u3059\u3002\u9ad8\u3044\u30de\u30b7\u30f3\u30b9\u30da\u30c3\u30af\u3092\u6301\u3064PC\u3067\u3042\u308c\u3070\u9ad8\u3044\u6570\u3092\u5b9f\u884c\u3067\u304d\u307e\u3059\u3002<\/p>\n<ul>\n<li>downloader\/request_count<\/li>\n<li>downloader\/response_count<\/li>\n<\/ul>\n<h3>shell<\/h3>\n<p>\u5229\u7528\u3059\u308b\u5834\u5408\u306f\u4e8b\u524d\u306bipython\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u304c\u5fc5\u8981\u306b\u306a\u308a\u307e\u3059\u3002\u307e\u3060\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u3066\u3044\u306a\u3044\u5834\u5408\u306fconda install ipython\u3067\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u3066\u304a\u304d\u307e\u3057\u3087\u3046\u3002<\/p>\n<pre class=\"lang:default decode:true\">scrapy shell URL(https:\/\/xxx.com)<\/pre>\n<p>\u6307\u5b9a\u3057\u305fURL\u306e\u30c7\u30fc\u30bf\u3092\u53d6\u5f97\u3057\u305f\u72b6\u614b\u3067\u30b7\u30a7\u30eb\u3092\u8d77\u52d5\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u4ee5\u4e0b\u306e\u72b6\u614b\u306b\u306a\u308c\u3070shell\u306e\u8d77\u52d5\u306b\u6210\u529f\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">[s]   shelp()           Shell help (print this help)\r\n[s]   view(response)    View response in a browser\r\nIn [1]:<\/pre>\n<p>\u8a66\u3057\u306b\u4ee5\u4e0b\u306e\u3088\u3046\u306bfetch\u30b3\u30de\u30f3\u30c9\u3092\u5b9f\u884c\u3059\u308c\u3070\u6307\u5b9a\u3057\u305fURL\u304b\u3089\u60c5\u5831\u3092\u53d6\u5f97\u3067\u304d\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">fetch('https:\/\/xxx.com\/')<\/pre>\n<h4>Xpath\u3092\u5b9f\u884c<\/h4>\n<pre class=\"lang:default decode:true\">aaa = response.xpath('\/\/h2\/a')<\/pre>\n<p>\u5909\u6570\u540d\u3092\u5165\u529b\u3059\u308c\u3070\u3001\u3053\u3093\u306a\u611f\u3058\u3067\u53d6\u5f97\u7d50\u679c\u3092\u51fa\u529b\u3067\u304d\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true \">In [6]: aaa\r\nOut[6]:\r\n[&lt;Selector query='\/\/h2\/a' data='&lt;a href=\"https:\/\/qiita.com\/cha84rakan...'&gt;,\r\n &lt;Selector query='\/\/h2\/a' data='&lt;a href=\"https:\/\/qiita.com\/takoraisut...'&gt;,\r\n &lt;Selector query='\/\/h2\/a' data='&lt;a href=\"https:\/\/qiita.com\/WebEngrChi...'&gt;,]<\/pre>\n<h5>get()<\/h5>\n<pre class=\"lang:default decode:true\">\u5909\u6570 = response.xpath('Xpath').get()<\/pre>\n<h5>getall()<\/h5>\n<pre class=\"lang:default decode:true\">\u5909\u6570 = response.xpath('Xpath').getall()<\/pre>\n<p>\u53d6\u5f97\u3067\u304d\u305fHTML\u8981\u7d20\u304c\u8907\u6570\u3067\u3082\u307e\u3068\u3081\u3066\u30c6\u30ad\u30b9\u30c8\u3092\u53d6\u5f97\u3067\u304d\u307e\u3059\u3002<\/p>\n<p>HTML\u306edata\uff08\u30c6\u30ad\u30b9\u30c8\uff09\u3092\u53d6\u5f97\u3067\u304d\u307e\u3059\u3002<\/p>\n<h4>CSS\u30bb\u30ec\u30af\u30bf\u3092\u5b9f\u884c<\/h4>\n<pre class=\"lang:default decode:true \">bbb = response.css('h2 &gt; a')<\/pre>\n<p>\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u53d6\u5f97\u3067\u304d\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">In [12]: bbb\r\nOut[12]:\r\n[&lt;Selector query='descendant-or-self::h2\/a' data='&lt;a href=\"https:\/\/qiita.com\/cha84rakan...'&gt;,\r\n &lt;Selector query='descendant-or-self::h2\/a' data='&lt;a href=\"https:\/\/qiita.com\/takoraisut...'&gt;,]<\/pre>\n<p>XPath\u3068\u540c\u69d8\u306bget\u3001getall\u3067\u30c6\u30ad\u30b9\u30c8\u306a\u3069\u3092\u53d6\u5f97\u3059\u308b\u3053\u3068\u3082\u53ef\u80fd\u3067\u3059\u3002<\/p>\n<h4>\u30b7\u30a7\u30eb\u304b\u3089\u629c\u3051\u308b<\/h4>\n<pre class=\"lang:default decode:true \">exit<\/pre>\n<h2>\u4e8b\u524d\u8a2d\u5b9a<\/h2>\n<p>project\u30d5\u30a9\u30eb\u30c0\u306e\u4e2d\u306esettings.py\u3092\u3044\u3058\u3063\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n<h3>\u51fa\u529b\u30d5\u30a1\u30a4\u30eb\u306e\u6587\u5b57\u30b3\u30fc\u30c9\u306e\u8a2d\u5b9a<\/h3>\n<p>\u6587\u5b57\u30b3\u30fc\u30c9\u306e\u8a2d\u5b9a\u3092\u3057\u3066\u304a\u304d\u307e\u3057\u3087\u3046\u3002UTF-8\u306b\u3057\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">FEED_EXPORT_ENCODING = 'uft-8'<\/pre>\n<p>\u6307\u5b9a\u3057\u306a\u3044\u3068\u51fa\u529b\u30d5\u30a1\u30a4\u30eb\u304c\u6587\u5b57\u5316\u3051\u3059\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<h3>\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u9593\u9694\u306e\u8a2d\u5b9a<\/h3>\n<p>\u4ee5\u4e0b\u306e\u884c\u306e\u30b3\u30e1\u30f3\u30c8\u3092\u5916\u3057\u307e\u3059\u3002\u3042\u308b\u30da\u30fc\u30b8\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u304b\u3089\u6b21\u306e\u30da\u30fc\u30b8\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b\u9593\u9694\u306e\u6307\u5b9a\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<p>\u8aa4\u3063\u3066\u76f8\u624b\u306e\u30b5\u30fc\u30d0\u30fc\u306b\u8ca0\u8377\u3092\u304b\u3051\u3059\u304e\u306a\u3044\u3088\u3046\u306b\u5fc5\u305a\u8a2d\u5b9a\u3057\u3066\u304a\u304f\u3088\u3046\u306b\u3057\u307e\u3057\u3087\u3046\u3002<\/p>\n<pre class=\"lang:default decode:true\">DOWNLOAD_DELAY = 3<\/pre>\n<p>\u5358\u4f4d\u306f\u79d2\u6570\u306b\u306a\u308a\u307e\u3059\u3002\u306a\u306e\u3067\u4e0a\u8a18\u3067\u3042\u308c\u3070\u300c3\u79d2\u9593\u9694\u3067\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b\u300d\u3068\u3044\u3046\u6307\u5b9a\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n<div>\n<h3>ROBOTSTXT_OBEY = True<\/h3>\n<div>robots.txt\uff08\u30af\u30ed\u30fc\u30e9\u30fc\u3078\u306e\u30a2\u30af\u30bb\u30b9\u8a31\u53ef\u306a\u3069\u3092\u6307\u793a\u3059\u308b\u30d5\u30a1\u30a4\u30eb\uff09\u304c\u3042\u308b\u5834\u5408\u306f\u305d\u308c\u306b\u5f93\u3046\u304b\u3069\u3046\u304b\u306e\u8a2d\u5b9a\u306b\u306a\u308a\u307e\u3059\u3002true\u306a\u3089\u5f93\u3044\u307e\u3059\u3002<\/div>\n<\/div>\n<h3>\u65e5\u672c\u8a9e\u8a2d\u5b9a<\/h3>\n<p>\u53d6\u5f97\u3059\u308b\u60c5\u5831\u306e\u8a00\u8a9e\u8a2d\u5b9a\u3067\u3059\u3002HTTP\u306e\u30ea\u30af\u30a8\u30b9\u30c8\u30d8\u30c3\u30c0\u306b\u306a\u308a\u307e\u3059\u3002\u65e5\u672c\u8a9e\u306b\u306a\u308b\u3088\u3046\u306b\u8a2d\u5b9a\u3057\u3066\u304a\u304d\u307e\u3057\u3087\u3046\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u3060\u3068\u300cDEFAULT_REQUEST_HEADERS\u300d\u306f\u30b3\u30e1\u30f3\u30c8\u30a2\u30a6\u30c8\u3055\u308c\u3066\u3044\u308b\u306e\u3067\u89e3\u9664\u3057\u305f\u4e0a\u3067\u4ee5\u4e0b\u306e\u8a2d\u5b9a\u3092\u3057\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">DEFAULT_REQUEST_HEADERS = {\r\n   \"Accept-Language\": \"ja\",\r\n}<\/pre>\n<div>\n<h3>HTTPCACHE_ENABLED = True<\/h3>\n<div>\u4e00\u5ea6\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u305fWeb\u30da\u30fc\u30b8\u3092\u30ed\u30fc\u30ab\u30eb\u306b\u30ad\u30e3\u30c3\u30b7\u30e5\u3057\u3066\u304f\u308c\u307e\u3059\u3002\u69cb\u6210\u304c\u96e3\u3057\u3044\u30b5\u30a4\u30c8\u3068\u304b\u3060\u3068\u4f55\u5ea6\u3082\u5b9f\u884c\u3057\u3066\u4f55\u5ea6\u3082\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b\u3053\u3068\u306b\u306a\u3063\u3066\u3057\u307e\u3046\u306e\u3067\u3001\u30ad\u30e3\u30c3\u30b7\u30e5\u3057\u3066\u304a\u304f\u3068\u958b\u767a\u304c\u65e9\u304f\u306a\u308a\u307e\u3059\u3002<\/div>\n<div>\u305f\u3060\u3001\u30c7\u30e1\u30ea\u30c3\u30c8\u3068\u3057\u3066\u306f\u5e38\u306b\u540c\u3058\u30da\u30fc\u30b8\u3092\u53c2\u7167\u3057\u7d9a\u3051\u308b\u3053\u3068\u306b\u306a\u308b\u306e\u3067\u3082\u3057\u304b\u3057\u305f\u3089\u30da\u30fc\u30b8\u306e\u5185\u5bb9\u304c\u5909\u308f\u3063\u3066\u3044\u308b\u53ef\u80fd\u6027\u3082\u3042\u308b\u3053\u3068\u3067\u3059\u3002<\/div>\n<div>\u305d\u306e\u305f\u3081\u30ad\u30e3\u30c3\u30b7\u30e5\u306b\u306f\u6709\u52b9\u671f\u9650\u3092\u8a2d\u5b9a\u3057\u3066\u304a\u304f\u3068\u826f\u3044\u3067\u3057\u3087\u3046\u3002<\/div>\n<div>\n<h4>HTTPCACHE_EXPIRATION_SECS<\/h4>\n<div>\u30ad\u30e3\u30c3\u30b7\u30e5\u306e\u6709\u52b9\u671f\u9650\u3092\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002Spider\u3092\u518d\u5b9f\u884c\u3057\u305f\u969b\u306b\u4ee5\u4e0b\u306e\u79d2\u6570\u7d4c\u904e\u3057\u3066\u3044\u305f\u5834\u5408\u306f\u518d\u5ea6Web\u30da\u30fc\u30b8\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u304f\u308c\u307e\u3059\u3002<\/div>\n<div>\n<pre class=\"lang:default decode:true\">HTTPCACHE_EXPIRATION_SECS = 86400 # 1\u65e5\u306b\u3059\u308b\u5834\u5408<\/pre>\n<p>\u5358\u4f4d\u306f\u79d2\u3067\u6307\u5b9a\u3057\u307e\u3059\u3002<\/p>\n<\/div>\n<\/div>\n<\/div>\n<h4>\u30ad\u30e3\u30c3\u30b7\u30e5\u306e\u4fdd\u5b58\u5834\u6240<\/h4>\n<p>\u3061\u306a\u307f\u306b\u3001\u4ee5\u4e0b\u306e\u5834\u6240\u306b\u30ad\u30e3\u30c3\u30b7\u30e5\u306f\u4fdd\u5b58\u3055\u308c\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">projects\/\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u540d\/.scrapy\/httpcache<\/pre>\n<p>\u3082\u3057\u5909\u66f4\u3057\u305f\u3044\u5834\u5408\u306fHTTPCACHE_DIR\u3068\u3044\u3046\u8a2d\u5b9a\u5024\u3092\u3044\u3058\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<div>\n<h3>CONCURRENT_REQUESTS<\/h3>\n<p>Scrapy\u304c\u540c\u6642\u306b\u51e6\u7406\u3059\u308b\u30ea\u30af\u30a8\u30b9\u30c8\u306e\u6570\u3092\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306f16\u306b\u306a\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n<div>\n<h3>DEPTH_PRIORITY = 1<\/h3>\n<p>\u968e\u5c64\u306b\u3088\u308b\u512a\u5148\u5ea6\u3092\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u30021\u3092\u6307\u5b9a\u3059\u308c\u3070\u4f4e\u3044\u968e\u5c64\u306e\u30ea\u30af\u30a8\u30b9\u30c8\u304b\u3089\u9806\u756a\u306b\u51e6\u7406\u3055\u308c\u307e\u3059\u3002<\/p>\n<h3>FIFO\u306b\u3059\u308b<\/h3>\n<p>Scrapy\u3067\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u3067\u30ea\u30af\u30a8\u30b9\u30c8\u3092\u51e6\u7406\u3059\u308b\u65b9\u5f0f\u304cLIFO\u3067\u3059\u304c\u3001\u4ee5\u4e0b\u306e\u8a2d\u5b9a\u3092\u3059\u308c\u3070FIFO\u306b\u5909\u66f4\u3067\u304d\u307e\u3059\u3002<\/p>\n<div>\n<div><\/div>\n<div>\n<div>\n<div>\n<pre class=\"lang:default decode:true \">SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleFifoDiskQueue'\r\nSCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.FifoMemoryQueue'\r\n<\/pre>\n<p>&nbsp;<\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n","protected":false},"excerpt":{"rendered":"\u57fa\u672c\u60c5\u5831\u306e\u78ba\u8a8d scrapy \u30d0\u30fc\u30b8\u30e7\u30f3\u3001\u30a2\u30af\u30c6\u30a3\u30d6\u306a\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3001\u5229\u7528\u3067\u304d\u308b\u30b3\u30de\u30f3\u30c9\u306e\u60c5\u5831\u3092\u8868\u793a\u3067\u304d\u307e\u3059\u3002 scrapy\u30b3\u30de\u30f3\u30c9\u4e00\u89a7 \u30b3\u30de\u30f3\u30c9 \u5185\u5bb9 bench \u7c21\u5358\u306a\u30d9\u30f3\u30c1\u30de\u30fc\u30af\u30c6\u30b9\u30c8\u3092\u5b9f\u884c startproject [&hellip;]","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[47],"tags":[],"_links":{"self":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts\/20959"}],"collection":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=20959"}],"version-history":[{"count":23,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts\/20959\/revisions"}],"predecessor-version":[{"id":21102,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts\/20959\/revisions\/21102"}],"wp:attachment":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=20959"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=20959"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=20959"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}