package com.tanzhou.spiders;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
*
* @ClassName: Main
* @Description: TODO(爬蟲程序測試)
* @author Administrator
* @date 2018年4月30日
*
*/
public class Main {
/**
* @throws IOException
*
* @Title: processPage
* @Description: TODO(爬蟲方法,用來從網(wǎng)頁上爬取數(shù)據(jù))
* @param @param URL 參數(shù)
* @return void 返回類型
* @throws
*/
public static String processPage(String word,int x) throws IOException{
List list = Ha.getList();
Document doc = Jsoup.connect("https://www.baidu.com/s?wd="+word+"&pn="+x).get();
Elements select2 = doc.select("head");
int y = 1;
if(x>0){
y= x+1;
}
for(int i = y;i<(x+11);i++){
Element elementById = doc.getElementById(""+i+"");
list.add(elementById.toString());
}
String path = "D:/workspace/Spiders/WebContent/jsp/css.html";
File f = new File(path);
if (!f.exists()) {
f.createNewFile();
}
Writer writer = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(f), "UTF-8"));
writer.write(select2.toString());
for (int j = 0,len = list.size(); j < len; j++) {
writer.append((CharSequence) list.get(j));
}
writer.flush();
writer.close();
return path;
}
public static void main(String[] args) throws IOException {
processPage("haha",10);
}
}
package com.tanzhou.spiders;
import java.io.IOException;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
@WebServlet(urlPatterns= "/SpiderServlet")
public class SpiderServlet extends HttpServlet {
@Override
protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
request.setCharacterEncoding("utf-8");
String word = request.getParameter("word");
String num = request.getParameter("num");
int x = Integer.parseInt(num);
x=10;
String processPage =null;
for(int i = 0;i<(x/10);i++){
processPage = Main.processPage(word,i*10);
}
int of = processPage.indexOf("jsp");
String substring = processPage.substring(of);
System.out.println(substring);
request.getRequestDispatcher("/"+substring).forward(request, response);
}
@Override
protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
}
}
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。