I want to crawl a Taobao search page. It is a page generated by js rendering, so I choose to crawl it with phantomjs. But a problem occurred during the fetching process. When there are more than two search results, no data can be obtained. But the screenshot shows that the data is loading normally. I don’t know if it’s Taobao that imposes restrictions or there’s something wrong with my code. Please help me solve it.
var url = "https://ai.m.taobao.com/search.html?q=%E5%90%8D%E4%BA%BA%E7%91%9E%E8%A3%B32017%E5%A4%8F%E8%A3%85%E6%96%B0%E6%AC%BE%E5%A5%B3%E8%A3%85%E8%A3%99%E5%AD%90%E5%8F%8A%E8%86%9D%E9%95%BF%E6%AC%BE%E4%BF%AE%E8%BA%AB%E5%81%87%E4%B8%A4%E4%BB%B6%E5%8D%B0%E8%8A%B1%E9%9B%AA%E7%BA%BA%E8%BF%9E%E8%A1%A3%E8%A3%99&spm=a311n.7676424.1000.1&prepvid=201_10.184.72.34_159920160034_1496243285841&pid=mm_33231688_7050284_23466709&env&unid&hidebar&isout&clk1&smart&rb"; var page =require('webpage').create(); var fs = require('fs'); page.settings = { javascriptEnabled: true, loadImages: false, webSecurityEnabled: false, userAgent: 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36 LBBROWSER' }; page.open(url,function (status) { if (status !== 'success') { console.log('Unable to access thewebsite'); } else { window.setTimeout(function () { page.render('example1.png'); var val = page.evaluate(function(){ var data = document.querySelectorAll('p.item-thumbnail a.item-intro'); if ( data instanceof Array ) { return 1; console.log(1) for (var i = 0, len = data.length; i < len; i++) { console.log(data[i].innerHTML); } } else if ( data instanceof Object ) { var description = ""; for (var i in data) { description += i + " = " + data[i] + "\n"; } return description; // console.log(nextSibling(data).innerHTML); } else { return data; } }); console.log('The register address:' + val); phantom.exit(); }, 1000); }; });
?Black question mark