{"id":21033,"date":"2023-06-04T21:16:43","date_gmt":"2023-06-04T12:16:43","guid":{"rendered":"http:\/\/www.code-magagine.com\/?p=21033"},"modified":"2023-06-06T08:02:35","modified_gmt":"2023-06-05T23:02:35","slug":"%e3%80%90python%e3%80%91scrapy%e3%81%a7%e8%a4%87%e6%95%b0%e3%83%9a%e3%83%bc%e3%82%b8%e3%82%92%e6%93%8d%e3%82%8b%e3%80%82","status":"publish","type":"post","link":"http:\/\/www.code-magagine.com\/?p=21033","title":{"rendered":"\u3010Python\u3011Scrapy\u3067\u8907\u6570\u30da\u30fc\u30b8\u3092\u64cd\u308b\u3002"},"content":{"rendered":"<h2>Scrapy\u3067\u306e\u30ea\u30f3\u30af\u306e\u305f\u3069\u308a\u65b9<\/h2>\n<p>\u8907\u6570\u306e\u65b9\u6cd5\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n<h3>scrapy.Request(URL,\u30b3\u30fc\u30eb\u30d0\u30c3\u30af\u30e1\u30bd\u30c3\u30c9)<\/h3>\n<ul>\n<li>URL\u306f\u7d76\u5bfeURL(https:\/\/xxx.com)\u306e\u307f<\/li>\n<li>\u30ec\u30b9\u30dd\u30f3\u30b9\u306f\u30b3\u30fc\u30eb\u30d0\u30c3\u30af\u30e1\u30bd\u30c3\u30c9\u3067\u53d7\u3051\u53d6\u308a\u307e\u3059\u3002<\/li>\n<\/ul>\n<h3>response.follow(URL\/Selector,\u30b3\u30fc\u30eb\u30d0\u30c3\u30af\u30e1\u30bd\u30c3\u30c9)<\/h3>\n<ul>\n<li>\u76f8\u5bfeURL\u306b\u3082\u5bfe\u5fdc<\/li>\n<li>a\u8981\u7d20\u306eSelector\u304b\u3089\u81ea\u52d5\u7684\u306bhref\u3092\u53d6\u5f97\u3057\u3066\u304f\u308c\u308b\u3002<\/li>\n<li>\u30ec\u30b9\u30dd\u30f3\u30b9\u306f\u30b3\u30fc\u30eb\u30d0\u30c3\u30af\u30e1\u30bd\u30c3\u30c9\u3067\u53d7\u3051\u53d6\u308a\u307e\u3059\u3002<\/li>\n<\/ul>\n<h2>\u4e00\u89a7\u753b\u9762\u306e\u30da\u30fc\u30b8\u30f3\u30b0\u3092\u8fbf\u308b\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9<\/h2>\n<pre class=\"lang:default decode:true\">class SampleSpider(scrapy.Spider):\r\n    name = \"spider\u540d\"\r\n    allowed_domains = [\"www.xxx.com\"]\r\n    start_urls = [\"https:\/\/www.xxx.com\/1\"]\r\n\r\n    def parse(self, response):\r\n\u3000\u3000\u3000\u3000\u3000\u3000\u3000\u3000\u3000\u3000\u3000   # \u7279\u5b9a\u30da\u30fc\u30b8\u3067\u306e\u51e6\u7406\r\n        next_page = response.xpath('\u6b21\u306e\u30da\u30fc\u30b8\u3078\u306e\u30ea\u30f3\u30af\u30dc\u30bf\u30f3\u306eXPath')\r\n        if next_page:\r\n            yield response.follow(url=next_page[0],callback=self.parse)<\/pre>\n<h3>url=next_page<\/h3>\n<p>\u6b21\u306e\u30da\u30fc\u30b8\u306e\u30ea\u30f3\u30af\u306eSelector\u3092\u6e21\u3057\u307e\u3059\u3002<\/p>\n<h3>callback=self.parse<\/h3>\n<p>\u5358\u4e00\u30da\u30fc\u30b8\u3067\u60c5\u5831\u3092\u53d6\u5f97\u3059\u308b\u51e6\u7406\u3092\u8a18\u8ff0\u3057\u307e\u3059\u3002\u305d\u3046\u3059\u308b\u3053\u3068\u3067\u3042\u308b\u30da\u30fc\u30b8\u3067\u300c\u6b21\u3078\u300d\u30ea\u30f3\u30af\u304c\u3042\u308b\u304b\u304e\u308a\u305a\u3063\u3068\u30eb\u30fc\u30d7\u3057\u3066\u51e6\u7406\u3092\u3057\u7d9a\u3051\u3066\u304f\u308c\u307e\u3059\u3002<\/p>\n<h2>\u4e00\u89a7\u753b\u9762\u304b\u3089\u8a73\u7d30\u753b\u9762\u3092\u8fbf\u3063\u3066\u60c5\u5831\u3092\u53d6\u5f97\u3059\u308b\u30b5\u30f3\u30d7\u30eb<\/h2>\n<pre class=\"lang:default decode:true\">class SampleSpider(scrapy.Spider):\r\n    name = \"spider\u540d\"\r\n    allowed_domains = [\"www.xxx.com\"]\r\n    start_urls = [\"https:\/\/www.xxx.com\/1\"]\r\n\r\n    def parse(self, response):\r\n        # \u4e00\u89a7\u753b\u9762\u306eitem\u4e00\u89a7\r\n        items = response.xpath(\"\u4e00\u89a7\u60c5\u5831\u306expath\")\r\n\r\n        for item in items:\r\n    \u3000\u3000\u3000\u3000\u3000\u3000\u3000 yield response.follow(url=itemxpath(\"\u8a73\u7d30\u753b\u9762\u3078\u306eURL\u306expath\").get(),callback=self.parse_item)\r\n\r\n    def parse_item(self, response):\r\n         item_info = response.xpath(\"\u8a73\u7d30\u753b\u9762\u306e\u5404\u60c5\u5831\u306expath\")\r\n\r\n         yield {\r\n           \u60c5\u58311: item_info.xpath(\".\u60c5\u58311\u306eXpath\").get(),\r\n           \u60c5\u58312: item_info.xpath(\".\u60c5\u58312\u306eXpath\").get()\r\n         }<\/pre>\n<p>\u30dd\u30a4\u30f3\u30c8\u3068\u3057\u3066\u306f\u3001parse_item\u3068\u8a73\u7d30\u753b\u9762\u306e\u60c5\u5831\u3092\u53d6\u5f97\u3059\u308b\u305f\u3081\u306e\u30e1\u30bd\u30c3\u30c9\u3092\u8ffd\u52a0\u3057\u3066\u547c\u3073\u51fa\u3057\u307e\u3059\u3002<\/p>\n<p>&nbsp;<\/p>\n<p>&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"Scrapy\u3067\u306e\u30ea\u30f3\u30af\u306e\u305f\u3069\u308a\u65b9 \u8907\u6570\u306e\u65b9\u6cd5\u304c\u3042\u308a\u307e\u3059\u3002 scrapy.Request(URL,\u30b3\u30fc\u30eb\u30d0\u30c3\u30af\u30e1\u30bd\u30c3\u30c9) URL\u306f\u7d76\u5bfeURL(https:\/\/xxx.com)\u306e\u307f \u30ec\u30b9\u30dd\u30f3\u30b9\u306f\u30b3\u30fc\u30eb\u30d0\u30c3\u30af\u30e1\u30bd\u30c3\u30c9\u3067\u53d7\u3051\u53d6\u308a [&hellip;]","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[47],"tags":[],"_links":{"self":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts\/21033"}],"collection":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=21033"}],"version-history":[{"count":7,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts\/21033\/revisions"}],"predecessor-version":[{"id":21071,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts\/21033\/revisions\/21071"}],"wp:attachment":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=21033"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=21033"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=21033"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}