
簡述
- 在簡書-爬蟲數(shù)據(jù)分析學習交流 群里有個哥們 ??I ??(。。他的微信昵稱直接粘貼過來就是這樣的。。)展示了他的極驗驗證碼破解效果,很流暢,引起了我的興趣。
- 極驗驗證碼的主要問題在于對人的行為特征的分析,它會在你拖動滑塊時以數(shù)十毫秒間隔記錄你的鼠標移動數(shù)據(jù)發(fā)送到服務器,并使用各種算法(如深度學習)判斷你的軌跡特征到底是人還是機器。
- 我突然想到pid模型說不定能模擬人的行為特征,就想試試。
- 實現(xiàn)思路上大家都差不多,簡單的方案就是selenium操縱瀏覽器,由原始圖和凹陷圖對比得出要移動的距離,然后控制鼠標以某種方案移動即可。(圖片的獲取有兩種方案,一是向服務器請求圖片片段,然后拼接起來,我選擇了另一種懶辦法,在三種情況下截圖對比)
- 難一些的思路就是抓包分析,直接請求服務器,發(fā)送鼠標軌跡數(shù)據(jù),網(wǎng)上有大神直接抓包分析發(fā)送xpos數(shù)據(jù)的樣例,詳見參考。
- ??I ??哥們說他的正確率在90%以上,感謝他的熱心交流,雖然沒有透露他的具體方案 :) (當然,這種東西不好說的)
- 我的成功率不高,要是高了就不太方便寫博客了。成功率在40、50%吧,所以我放心地貼出來,僅供學習探索參考,而且極驗的3.0也在推進,感覺樣式還不錯。
- 我將我搜集到的有用資源都列在參考里了,網(wǎng)上分享的經(jīng)驗和代碼不少,上手還是比較快的,演示視頻中的代碼也放上了,在github-geetest里直接附上了chromedriver可能更方便點,有興趣的朋友可以試著玩玩,調(diào)下參數(shù)。還試了些其他的,比較亂,就沒放上了。
- 我覺得這個對于搞機器學習的朋友會比較有吸引力,畢竟手里有個錘子,看啥都是釘子?,F(xiàn)在這么好的陪練出來了,一攻一防,不過注意分寸吧。。。在代碼中搜索到
get_offsets用你自己的思路復寫它,返回一個可迭代對象表示鼠標每次平移間隔即可。192行的間隔時間也可相應修改。
效果
- 渣畫質(zhì)動態(tài)圖
連續(xù)嘗試視頻(題外話:視頻簡單處理參考 FFmpeg實用命令:音頻、視頻格式轉(zhuǎn)換和其它操作)
參考
視頻中代碼
# -*- coding: utf-8 -*-
import os
import time
from selenium import webdriver
from io import BytesIO
from PIL import Image
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
os.chdir('.')
driver = webdriver.Chrome('./chromedriver.exe')
driver.get("http://www.itdecent.cn/sign_in")
while 1:
time.sleep(0.2)
# 設定窗口大小
width = 1280
height = 800
driver.set_window_size(width, height)
def get_captcha_image(filename):
screenshot = driver.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
# screenshot.show()
captcha_el = driver.find_element_by_class_name("gt_box")
location = captcha_el.location
size = captcha_el.size
left = location['x']
top = location['y']
right = location['x'] + size['width']
bottom = location['y'] + size['height']
box = (left, top, right, bottom)
print(box)
if box[0] == 0:
raise(Exception('======='))
captcha_image = screenshot.crop(box)
captcha_image.save(filename) # "%s.png" % uuid.uuid4().hex
print(u'截圖成功')
time.sleep(1)
WebDriverWait(driver, 8).until(
EC.presence_of_element_located((By.CLASS_NAME, "gt_box")))
knob = driver.find_element_by_class_name("gt_slider_knob")
action = ActionChains(driver)
action.move_to_element_with_offset(knob, 21, 21).perform()
time.sleep(1)
f_file = 'f-%s.png' % time.strftime("%Y%m%d-%H%M%S")
get_captcha_image(f_file)
ActionChains(driver).click_and_hold().perform()
time.sleep(0.5)
# action.drag_and_drop_by_offset(knob, x_offset, y_offset).perform()
s_file = 's-%s.png' % time.strftime("%Y%m%d-%H%M%S")
get_captcha_image(s_file)
# action.move_by_offset(50, 0).release().perform()
# action.reset_actions()
# --------------------------------------------------------------
import matplotlib.pylab as plt
from PIL import Image, ImageFilter
from PIL import ImageChops
# 直觀感受圖片差異
image_f = Image.open(f_file)
image_s = Image.open(s_file)
diff = ImageChops.difference(image_f, image_s)
# ----------------------顯示圖片debug----------------------------
'''
# if diff.getbbox() is not None:
diff.save('x.png')
# plt.imshow(plt.imread('x.png'))
# plt.show()
fig, axs = plt.subplots(nrows=1, ncols=3)
for im, ax in zip(["f.png", "s.png", "x.png"], axs):
image = plt.imread(im)
ax.imshow(image)
plt.show()
diff_image = Image.open('x.png')
'''
# -------------------------debug--------------------------------
global first_left
first_left = 0
def find_offset(diff_image, offset_=62):
d = diff_image.convert("L").point(lambda i: i > 52, mode='1')
d.save('x-%s.png' % time.strftime("%Y%m%d-%H%M%S"))
b1 = d.getbbox() # left, upper, right, and lower pixel coordinate
# offset_ = 65
b2 = d.crop((offset_, 0, d.width, d.height)).getbbox()
global first_left
first_left = b1[0]
offset = b2[0] + offset_ - b1[0] - 2
if b2[0] <= 4:
offset = -1
return offset
# diff = diff_image.load()
# http://stackoverflow.com/questions/9038160/break-two-for-loops
# for x in range(61, width):
# for y in range(height):
# if all(i > 40 for i in diff[x, y]):
# return x - 6
offset = find_offset(diff)
if offset < 0:
# 拖動滑塊到右方160像素處保持并截圖
ActionChains(driver).move_by_offset(160, 0).perform()
time.sleep(0.5)
# action.drag_and_drop_by_offset(knob, x_offset, y_offset).perform()
s_file = 's-%s.png' % time.strftime("%Y%m%d-%H%M%S")
get_captcha_image(s_file)
# 放下
ActionChains(driver).release().perform()
image_s = Image.open(s_file)
diff = ImageChops.difference(image_f, image_s)
d = diff.convert("L").point(lambda i: i > 60, mode='1')
offset = d.getbbox()[0] - first_left
time.sleep(2.5)
ActionChains(driver).move_to_element_with_offset(
knob, 21, 21).click_and_hold().perform()
time.sleep(0.5)
print(offset)
def get_offsets(setpointX):
'''
切記不能移動小數(shù)個像素位置
'''
kp = 3.0
ki = 0.0001
kd = 80.0
x = 0
vx = 0
prevErrorX = 0
integralX = 0
derivativeX = 0
while 1:
if x >= setpointX:
break
errorX = setpointX - x
# print('xxxxx - ', x)
integralX += errorX
derivativeX = errorX - prevErrorX
prevErrorX = errorX
if offset < 100:
K = 0.007
elif offset < 180:
K = 0.006
else:
K = 0.005
ax = K * (kp * errorX + ki * integralX + kd * derivativeX)
vx += ax
if x + vx > setpointX:
vx = setpointX - x
vx = int(vx)
if vx < 1:
vx = random.randint(1, 3)
yield vx
print('vvvvv - ', vx)
x += vx
def get_offsets_back(goal):
x = 0
while 1:
if x >= goal:
break
dx = random.randint(10, 50)
if x + dx > goal:
dx = goal - x
yield dx
x += dx
import random
for o in get_offsets(offset):
y = random.randint(-1, 1)
ActionChains(driver).move_by_offset(o, y).perform()
# time.sleep(0.03)
time.sleep(random.randint(2, 4) / 100)
ActionChains(driver).release().perform()
# action.drag_and_drop_by_offset(knob, offset, 0).perform()
time.sleep(3)
driver.refresh()
其它


