爬取加速樂處理的網(wǎng)站

image.png
- 用postman直接訪問導(dǎo)致521且返回加密js
- 運(yùn)用java中的ScriptEngineManager腳本引擎處理拿到cookie
代碼如下:
CloseableHttpClient client = HttpClients.createDefault();
HttpGet get = new HttpGet(url);
//模擬瀏覽器
get.setHeader("Accept", "Accept text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
get.setHeader("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7");
get.setHeader("Accept-Encoding", "gzip, deflate");
get.setHeader("Accept-Language", "zh-cn,zh;q=0.5");
get.setHeader("Connection", "keep-alive");
get.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2");
CloseableHttpResponse response = client.execute(get);
//請(qǐng)求返回521
if(response.getStatusLine().getStatusCode()==521){
HttpEntity entity = response.getEntity();
String resHtml = EntityUtils.toString(entity);
//對(duì)返回js處理 拿到j(luò)sl_clearance
String jsl_clearance = getJslClearance(resHtml);;
get.setHeader("Cookie",jsl_clearance);
response = client.execute(get);
}
//拿到最終想要的頁面
HttpEntity entity = response.getEntity();
String res = EntityUtils.toString(entity,"utf-8");
Document doc = Jsoup.parse(res);
/**
* 獲取加密cookie
* @param body
*/
private static String getJslClearance(String body) {
String jsl_clearance = "";
ScriptEngineManager manager = new ScriptEngineManager();
//得到腳本引擎
ScriptEngine engine = manager.getEngineByName("JavaScript");
//處理加密js
String js = body.trim().replace("<script>", "")
.replace("</script>", "")
.replace("eval(y.replace(/\\b\\w+\\b/g, function(y){return x[f(y,z)-1]||(\"_\"+y)}))",
"y.replace(/\\b\\w+\\b/g, function(y){return x[f(y,z)-1]||(\"_\"+y)})");
try {
//得到解密后的js
String result = (String) engine.eval(js);
String jsl_pre = result.substring(result.indexOf("__jsl_clearance=") + 16,
result.indexOf("|'+(function(){var") + 1);
String bac = (result.substring(result.indexOf("|'+(function(){"), result.indexOf("+';Expires=") - 23)
+ result.substring(result.indexOf("+';Expires=") - 16, result.indexOf("+';Expires=") - 4))
.replace("|'+(function(){", "").replaceAll("window", "'Chrome'");
String jsl_bac = (String) engine.eval(bac);
jsl_clearance = "__jsl_clearance=" +jsl_pre + jsl_bac+";";
logger.debug(jsl_clearance);
} catch (ScriptException e) {
e.printStackTrace();
}
return jsl_clearance;
}
隨后訪問成功。