{"id":20973,"date":"2023-06-04T08:38:52","date_gmt":"2023-06-03T23:38:52","guid":{"rendered":"http:\/\/www.code-magagine.com\/?p=20973"},"modified":"2023-06-05T23:23:02","modified_gmt":"2023-06-05T14:23:02","slug":"xpath%e3%81%ae%e5%9f%ba%e6%9c%ac","status":"publish","type":"post","link":"http:\/\/www.code-magagine.com\/?p=20973","title":{"rendered":"\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306b\u304a\u3051\u308bXPath\u306e\u57fa\u672c"},"content":{"rendered":"<h2>XPath\u3068\u306f\uff1f<\/h2>\n<p>XML\u3084HTML\u306a\u3069\u304b\u3089\u6b32\u3057\u3044\u90e8\u5206\u3092\u6307\u5b9a\u3057\u3066\u53d6\u5f97\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u308b\u7c21\u6613\u8a00\u8a9e\u3067\u3059\u3002XSLT\u306a\u3069\u3067XML\u64cd\u4f5c\u306b\u5229\u7528\u3055\u308c\u308b\u306e\u306f\u3082\u3061\u308d\u3093\u3001\u73fe\u5728\u306f\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u7528\u9014\u3067\u3082\u5e45\u5e83\u304f\u5229\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<h2>XPath\u306e\u30e1\u30ea\u30c3\u30c8<\/h2>\n<p>CSS\u30bb\u30ec\u30af\u30bf\u3067\u53d6\u5f97\u3059\u308b\u5834\u5408\u306b\u6bd4\u3079\u3066XPath\u306e\u65b9\u304c\u82e5\u5e72\u8981\u7d20\u306e\u6307\u5b9a\u304c\u67d4\u8edf\u306b\u3067\u304d\u307e\u3059\u3002<\/p>\n<ul>\n<li>\u968e\u5c64\u69cb\u9020\u3092\u8fbf\u3063\u3066\u8981\u7d20\u3092\u6307\u5b9a\u3059\u308b\u3002<\/li>\n<li>\u6b63\u898f\u8868\u73fe\u3092\u4f7f\u3063\u3066\u8981\u7d20\u3092\u6307\u5b9a\u3059\u308b\u3002<\/li>\n<\/ul>\n<h2>XPath\u306e\u30c7\u30e1\u30ea\u30c3\u30c8<\/h2>\n<ul>\n<li>Scrapy\u3067\u306f\u4f7f\u3048\u307e\u3059\u304c\u3001BeautifulSoup\u3067\u306f\u4f7f\u3048\u306a\u3044\u3002<\/li>\n<\/ul>\n<h2>XPath Playground<\/h2>\n<p>\u624b\u8efd\u306bXPath\u3092\u8a66\u305b\u307e\u3059\u3002<\/p>\n<p>https:\/\/scrapinghub.github.io\/xpath-playground\/<\/p>\n<h2>\u57fa\u672c<\/h2>\n<h3>\u30ed\u30b1\u30fc\u30b7\u30e7\u30f3\u30d1\u30b9<\/h3>\n<p>\u8981\u7d20\u307e\u3067\u306e\u968e\u5c64\u306e\u9053\u306e\u308a\u3092\u793a\u3057\u305f\u3082\u306e\u3092\u547c\u3073\u307e\u3059\u3002<\/p>\n<p>\u4f8b\u3048\u3070\u3001h2\u306a\u3069\u306e\u5404\u8981\u7d20\u306f\u30ce\u30fc\u30c9\u30c6\u30b9\u30c8\u3068\u547c\u3073\u307e\u3059\u3002\u30b9\u30e9\u30c3\u30b7\u30e5\u3067\u533a\u5207\u308b\u3068\u968e\u5c64\u3092\u8fbf\u308b\u3053\u3068\u304c\u53ef\u80fd\u3067\u3059\u3002\u3053\u308c\u3092<\/p>\n<pre class=\"lang:default decode:true\">\/\u30ce\u30fc\u30c9\u30c6\u30b9\u30c8\/\u30ce\u30fc\u30c9\u30c6\u30b9\u30c8\/\u30ce\u30fc\u30c9\u30c6\u30b9\u30c8<\/pre>\n<h3>\u6700\u521d\u304b\u3089\u53d6\u5f97<\/h3>\n<pre class=\"lang:default decode:true\">\/html\/body\/h2<\/pre>\n<h3>\u9014\u4e2d\u304b\u3089\u53d6\u5f97<\/h3>\n<p>\u30b9\u30e9\u30c3\u30b7\u30e52\u3064\u3067\u3042\u308c\u3070\u3001\u76f4\u63a5\u8981\u7d20\u306b\u30a2\u30af\u30bb\u30b9\u3067\u304d\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">\/\/h2<\/pre>\n<h2>\u8ef8<\/h2>\n<p>\u8ef8\u3068\u306f\u3001\u89aa\u3001\u5b50\u3001\u5b50\u5b6b\u3001\u5148\u7956\u306a\u3069\u69d8\u3005\u306a\u8981\u7d20\u3092\u6307\u5b9a\u3067\u304d\u308b\u30ed\u30b1\u30fc\u30b7\u30e7\u30f3\u30d1\u30b9\u306e\u6307\u5b9a\u6cd5\u3067\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">\/\u8ef8::\u30ce\u30fc\u30c9\u30c6\u30b9\u30c8\/\u8ef8::\u30ce\u30fc\u30c9\u30c6\u30b9\u30c8<\/pre>\n<table>\n<tbody>\n<tr>\n<th>\u8ef8<\/th>\n<th>\u8aac\u660e<\/th>\n<\/tr>\n<tr>\n<td>parent<\/td>\n<td>\u89aa\u8981\u7d20\uff08\u5b50\u5b6b\u8981\u7d20\u3082\u542b\u3081\u3066\uff09\u53d6\u5f97\u3057\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">\/\/a[@id='link1']\/parent::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>ancestor<\/td>\n<td>\u5148\u7956\u8981\u7d20\uff08\u5b50\u5b6b\u8981\u7d20\u3082\u542b\u3081\u3066\uff09\u5168\u3066\u53d6\u5f97\u3057\u307e\u3059\u3002\uff08\u53d6\u5f97\u7d50\u679c\u306f\u8907\u6570\u306b\u306a\u308a\u307e\u3059\u3002\uff09<\/p>\n<pre class=\"lang:default decode:true\">\/\/a[@id='link1']\/ancestor::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>ancestor-of-self<\/td>\n<td>\u81ea\u5206\u81ea\u8eab\u3082\u542b\u3081\u305f\u5148\u7956\u8981\u7d20\u3092\u5168\u3066\u53d6\u5f97\u3057\u307e\u3059\u3002\uff08ancestor\u3068\u9055\u3044\u3001\u53d6\u5f97\u3059\u308b\u8907\u6570\u306e\u7d50\u679c\u306e\u4e2d\u306b\u81ea\u5206\u81ea\u8eab\u306enode\u3082\u8ffd\u52a0\u3055\u308c\u308b\u30a4\u30e1\u30fc\u30b8\u3067\u3059\u3002\uff09<\/p>\n<pre class=\"lang:default decode:true\">\/\/a[@id='link1']\/ancestor-or-self::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>preceding<\/td>\n<td>\u5148\u7956\u3092\u9664\u304f\u5168\u3066\u306e\u524d\u306e\u8981\u7d20\u3002\uff08\u4f8b\u3048\u3070HTML\u3067\u8a00\u3048\u3070\u3001\u5168\u3066\u306e\u8981\u7d20\u306e\u5148\u7956\u3067\u3042\u308bhtml\u30bf\u30b0\u306f\u7d76\u5bfe\u306b\u53d6\u5f97\u3055\u308c\u308b\u3053\u3068\u306f\u306a\u3044\u3067\u3059\u3002\uff09<\/p>\n<pre class=\"lang:default decode:true\">\/\/a[@id='link1']\/preceding::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>preceding-sibling<\/td>\n<td>\u524d\u306b\u3042\u308b\u5168\u3066\u306e\u5144\u5f1f\u8981\u7d20(\u3064\u307e\u308a\u3001\u540c\u3058\u968e\u5c64\u306e\u524d\u306e\u8981\u7d20)<\/p>\n<pre class=\"lang:default decode:true\">\/\/a[@id='link2']\/preceding-sibling::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>child<\/td>\n<td>\u5b50\u8981\u7d20<\/p>\n<pre class=\"lang:default decode:true\">\/\/p[@class='recent books']\/child::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>following<\/td>\n<td>\u5f8c\u308d\u306e\u5168\u3066\u306e\u8981\u7d20<\/p>\n<pre class=\"lang:default decode:true\">\/\/a[@id='link1']\/following::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>following-sibling<\/td>\n<td>\u5f8c\u308d\u306b\u3042\u308b\u5144\u5f1f\u8981\u7d20<\/p>\n<pre class=\"lang:default decode:true\">\/\/a[@id='link1']\/following-sibling::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>descendant<\/td>\n<td>\u5f8c\u308d\u306e\u5b50\u5b6b\u8981\u7d20<\/p>\n<pre class=\"lang:default decode:true\">\/\/p[@class='title']\/descendant::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>descendant-or-self<\/td>\n<td>\u81ea\u5206\u81ea\u8eab\u3092\u542b\u3080\u5f8c\u308d\u306e\u5b50\u5b6b\u8981\u7d20<\/p>\n<pre class=\"lang:default decode:true\">\/\/p[@class='title']\/descendant-or-self::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>self<\/td>\n<td>\u81ea\u5206\u81ea\u8eab\u306e\u8981\u7d20<\/p>\n<pre class=\"lang:default decode:true\">\/\/p[@class='title']\/self::node()<\/pre>\n<\/td>\n<\/tr>\n<tr>\n<td>attribute<\/td>\n<td>\u81ea\u5206\u81ea\u8eab\u306e\u5c5e\u6027\u306e\u5024\u3092\u51fa\u529b<\/p>\n<pre class=\"lang:default decode:true \">\/\/p[@class='title']\/attribute::node()<\/pre>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>\u306a\u304a\u3001\u7956\u5148\u3067\u3042\u308c\u3070\u7956\u7236\u306f\u3082\u3061\u308d\u3093\u3067\u3059\u304c\u3001\u89aa\u3082\u542b\u307f\u307e\u3059\u3057\u3001\u5b50\u5b6b\u3067\u3042\u308c\u3070\u5b6b\u306f\u3082\u3061\u308d\u3093\u305d\u3046\u3067\u3059\u304c\u3001\u5b50\u3082\u542b\u307f\u307e\u3059\u3002<\/p>\n<h2>\u5c5e\u6027\u306e\u6307\u5b9a\u3057\u3066\u8981\u7d20\u3092\u7279\u5b9a\u3059\u308b\u3002<\/h2>\n<pre class=\"lang:default decode:true\">\u8981\u7d20[@\u5c5e\u6027=\u5c5e\u6027\u5024]\r\n<\/pre>\n<p>\u4ee5\u4e0b\u306eHTML\u304c\u3042\u308b\u3068\u3059\u308b\u3002<\/p>\n<pre class=\"lang:default decode:true\">&lt;a class=\"item is-active\" href=\"\/\"&gt;\u30c6\u30b9\u30c8&lt;\/a&gt;<\/pre>\n<p>\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u6307\u5b9a\u3059\u308b\u3002<\/p>\n<pre class=\"lang:default decode:true\">\/\/a[@class=\"item is-active\"]<\/pre>\n<h3>\u30c6\u30ad\u30b9\u30c8\u3092\u53d6\u5f97\u3059\u308b\u3002<\/h3>\n<pre class=\"lang:default decode:true\">\/\/a[@class=\"item is-active\"]\/text()\r\n\r\n\u30c6\u30b9\u30c8<\/pre>\n<h2>\u5c5e\u6027\u306e\u5024\u3092\u76f4\u63a5\u53d6\u5f97<\/h2>\n<pre class=\"lang:default decode:true\">\u8981\u7d20\/@\u5c5e\u6027<\/pre>\n<p>\u4e0a\u8a18\u306e\u3088\u3046\u306b\u6307\u5b9a\u3059\u308c\u3070\u5c5e\u6027\u306e\u5024\u3092\u76f4\u63a5\u53d6\u5f97\u3067\u304d\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true \">\/\/h3\/a\/@href<\/pre>\n<p>\u3053\u3046\u3059\u308c\u3070h3\u306e\u4e0b\u306b\u3042\u308ba\u30bf\u30b0\u306ehref\u5c5e\u6027\u306e\u5024\u3092\u53d6\u5f97\u3067\u304d\u307e\u3059\u3002<\/p>\n<h2>\u8981\u7d20\u3092\u7d5e\u308a\u8fbc\u3080<\/h2>\n<h3>\u542b\u3080<\/h3>\n<p>contains\u3068\u3044\u3046\u8ff0\u8a9e\u3092\u5229\u7528\u3057\u307e\u3059\u3002\u4f8b\u3048\u3070\u3001href\u5c5e\u6027\uff08URL\uff09\u306e\u4e2d\u306b\u300cA07TN4D3HG\u300d\u3068\u3044\u3046\u30b3\u30fc\u30c9\u304c\u542b\u307e\u308c\u3066\u3044\u308b\u8981\u7d20\u3060\u3051\u3092\u62bd\u51fa\u3057\u305f\u3044\u5834\u5408\u306f\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u6307\u5b9a\u3057\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">\/\/a[contains(@href,\"A07TN4D3HG\")]<\/pre>\n<h4>\u30c6\u30ad\u30b9\u30c8\u691c\u7d22<\/h4>\n<pre class=\"lang:default decode:true \">\/\/a[contains(text(),\"Python\")]<\/pre>\n<p>\u306a\u304a\u3001\u30c6\u30ad\u30b9\u30c8\u691c\u7d22\u306f\u5927\u6587\u5b57\u5c0f\u6587\u5b57\u3092\u533a\u5225\u3059\u308b\u306e\u3067\u6ce8\u610f\u304c\u5fc5\u8981\u3067\u3059\u3002<\/p>\n<h3>\u542b\u307e\u306a\u3044<\/h3>\n<pre class=\"lang:default decode:true\">\/\/a[not(contains(@href,\"A07TN4D3HG\"))]<\/pre>\n<p>not\u3068\u3044\u3046\u8ff0\u8a9e\u3067\u56f2\u3048\u3070\u9006\u306b\u542b\u307e\u306a\u3044\u8981\u7d20\u3092\u691c\u7d22\u3067\u304d\u307e\u3059\u3002<\/p>\n<h3>\u8907\u6570\u6761\u4ef6\u3067\u306e\u7d5e\u308a\u8fbc\u307f<\/h3>\n<p>\u4ee5\u4e0b\u306f\u3001href\u306bA07TN4D3HG\u3092\u542b\u307f\u304b\u3064id\u306blink2\u3092\u542b\u3080\u5834\u5408\u3092\u793a\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">\/\/a[contains(@href,\"A07TN4D3HG\") and contains(@id,\"link2\")]<\/pre>\n<h3>\u524d\u65b9\u4e00\u81f4\u691c\u7d22<\/h3>\n<pre class=\"lang:default decode:true\">\/\/a[starts-with(@href,\"http:\/\/www.amazon.co.jp\/dp\/B07SRLR\")]<\/pre>\n<h3>\u5f8c\u65b9\u4e00\u81f4\u691c\u7d22(XPath2.0\u4ee5\u964d\u306e\u307f\uff09<\/h3>\n<pre class=\"lang:default decode:true\">\/\/a[ends-with(@href,\"4M\")]<\/pre>\n<p>XPath2.0\u4ee5\u964d\u3057\u304b\u5bfe\u5fdc\u3057\u3066\u3044\u306a\u3044\u306e\u3067\u5bfe\u5fdc\u3057\u3066\u3044\u306a\u3044\u30d6\u30e9\u30a6\u30b6\u3082\u591a\u3044\u306e\u3067\u3042\u304f\u307e\u3067\u53c2\u8003\u307e\u3067\u3067\u3059\u3002<\/p>\n<h2>list\u8981\u7d20\u306e\u53d6\u5f97\u65b9\u6cd5<\/h2>\n<h3>\u756a\u53f7\u3092\u6307\u5b9a\u3057\u3066\u53d6\u5f97<\/h3>\n<p>\u524d\u304b\u3089\u4f55\u756a\u76ee\u306e\u5024\u3092\u53d6\u5f97\u3059\u308b\u304b\u6307\u5b9a\u3057\u3066\u3044\u308b\u3002<\/p>\n<pre class=\"lang:default decode:true\">\/\/li[position() =2]<\/pre>\n<p>\u3082\u3057\u304f\u306f\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u3082\u66f8\u3051\u307e\u3059\u3002\u3053\u306e\u65b9\u304c\u7c21\u6f54\u306a\u306e\u3067\u671b\u307e\u3057\u3044\u3067\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true \">\/\/li[2]<\/pre>\n<h4>\u6700\u5f8c\u306e\u8981\u7d20\u3092\u53d6\u5f97<\/h4>\n<pre class=\"lang:default decode:true \">\/\/li[position() =last()]<\/pre>\n<h4>\u4e09\u756a\u76ee\u4ee5\u964d\u306e\u8981\u7d20\u3092\u53d6\u5f97<\/h4>\n<p>\u5927\u306a\u308a\u8a18\u53f7\u3082\u4f7f\u3048\u307e\u3059\u3002<\/p>\n<pre class=\"lang:default decode:true\">\/\/li[position() &gt;2]<\/pre>\n<p>&nbsp;<\/p>\n<p>&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"XPath\u3068\u306f\uff1f XML\u3084HTML\u306a\u3069\u304b\u3089\u6b32\u3057\u3044\u90e8\u5206\u3092\u6307\u5b9a\u3057\u3066\u53d6\u5f97\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u308b\u7c21\u6613\u8a00\u8a9e\u3067\u3059\u3002XSLT\u306a\u3069\u3067XML\u64cd\u4f5c\u306b\u5229\u7528\u3055\u308c\u308b\u306e\u306f\u3082\u3061\u308d\u3093\u3001\u73fe\u5728\u306f\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u7528\u9014\u3067\u3082\u5e45\u5e83\u304f\u5229\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002 XPath\u306e\u30e1\u30ea\u30c3\u30c8 [&hellip;]","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[47],"tags":[],"_links":{"self":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts\/20973"}],"collection":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=20973"}],"version-history":[{"count":26,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts\/20973\/revisions"}],"predecessor-version":[{"id":21070,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=\/wp\/v2\/posts\/20973\/revisions\/21070"}],"wp:attachment":[{"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=20973"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=20973"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.code-magagine.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=20973"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}