沒有requests快,但好寫
# -*- coding: utf-8 -*-
#使用selenium的webdriver的方法
import csv
import os
import time
import re
import threading
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
##進(jìn)程鎖
lock=threading.Lock()
############獲取csv文件中的行數(shù)(暫時不用管它....因為沒上)############
def raw_num(filepath):
count = 0
cs=open("2017.csv","r",errors="ignore")
reader=csv.reader(cs)
for index,line in enumerate(reader):
count += 1
cs.close()
return count
############模擬登陸############
def post_get(html,driver,name,password):
global writer
grade=""
driver.get(html)
driver.find_element_by_id("id").clear()
driver.find_element_by_id("id").send_keys(password)
driver.find_element_by_id("name").clear()
driver.find_element_by_id("name").send_keys(name)
driver.find_element_by_id("btn").click()
ip=driver.find_elements_by_class_name("liright")
#要使用elements才可以遞歸,其實用xpath更好.......
for num,i in enumerate(ip):
if(num==1):
grade=i.text
return grade
####################從csv文件讀數(shù)據(jù)############################
def read_out():
global name
global i
global password
cs=open("2017.csv","r",errors="ignore")
reader=csv.reader(cs)
for num,line in enumerate(reader):
name.append(line[6])
password.append(line[5])
#print("準(zhǔn)考證號: "+password[len(password)-1]+" 姓名: "+name[len(name)-1])
cs.close()
########往csv文件寫數(shù)據(jù)(內(nèi)嵌爬取網(wǎng)頁操作)#################
def write_in(driver,i):
global lock
global csv_file
global writer
global error
grade=0
#stop 用于計數(shù),如果有哪個倒霉的數(shù)據(jù)超過多次提交都失敗就會寫入error.csv
stop=1
html="http://cet.99sushe.com"
while(len(password)!=0):
#print("第%d個線程"%i)
try:
lock.acquire()
n=name.pop()
p=password.pop()
lock.release()
grade=post_get(html,driver,n,p)
print("第%d個線程:"%i)
print("準(zhǔn)考證號: "+p+" 姓名: "+n)
print(grade)
except Exception as e:
print(e)
#如果出線程爬取7-1次內(nèi)出了bug則返回棧
if(stop%7!=0):
stop=stop+1
lock.acquire()
password.append(p)
name.append(n)
lock.release()
if(stop%7==0):
stop=1
print("\n/******write in error.csv")
print("第%d個線程:"%i)
print("準(zhǔn)考證號: "+p+" 姓名: "+n+"******/\n")
lock.acquire()
error.writerow([n,p])
lock.release()
continue
try:
lock.acquire()
writer.writerow([n,p,grade])
lock.release()
except:
print(">>>>>>>error")
#print("/*****error in writting.")
#print("第%d個線程:"%i)
#print("準(zhǔn)考證號: "+p+" 姓名: "+n+"****/")
lock.acquire()
password.append(p)
name.append(n)
lock.release()
driver.quit()
###########主函數(shù)####################
##存儲堆棧
threads=[]
password=[]
name=[]
##讀取數(shù)據(jù)到堆棧
read_out()
##############
grade=0
###線程個數(shù)
thread_num=3
###csv文件指針等.....####
html="http://cet.99sushe.com"
#不加newline="",則會多空一行...
error_file=open("error.csv","w",newline="",errors="ignore")
csv_file=open("result.csv","w",newline="",errors="ignore")
writer=csv.writer(csv_file)
error=csv.writer(error_file)
writer.writerow("讓我們先空一行!")
error.writerow("讓我們先空一行!")
###########多進(jìn)程部分###############
for i in range(thread_num):
print("第%d個線程入棧:"%i)
driver=webdriver.Firefox()
driver.implicitly_wait(10)
t=threading.Thread(target=write_in,args=(driver,i))
#因為是args=(A,B)所以之前寫出arge=(driver)結(jié)果炸了
threads.append(t)
#開啟多線程
for i in range(thread_num):
print("start第%d個線程"%i)
threads[i].start()
for i in range(thread_num):
threads[i].join()
#####################################
print("一共花了:")
print(time.clock())
csv_file.close()
error_file.close()