環(huán)境搭建
基于linux/mac、Python
1.安裝Python圖像庫
pip install Pillow
2.安裝Python庫Pytesseract
pip install pytesseract
3.安裝Python庫tesseract
pip install tesseract
4.安裝leptonica
./configure
make
make install
5.安裝tesseract-ocr
./autogen.sh
CPPFLAGS="-I/usr/local/include" LDFLAGS="-L/usr/local/lib" ./configure
make
make install
安裝完成后,再下載tessdata,將其放置于tessdata目錄。
Python腳本:
#!/usr/bin/env python
#-*- coding:utf-8 -*-
# author:wdl
# time:2017-03-08 pm
import os
import sys
import time
import requests
from PIL import Image
import pytesseract
import subprocess
code_url = "https://www.jiguang.cn/captcha/login/"
def identification_code(url):
#獲取驗證碼并保存
with open("captcha.jpg","wb") as i:
i.write(requests.get(url,stream=True).content)
#打開圖片
im = Image.open("captcha.jpg")
#轉(zhuǎn)化圖片為灰度圖
im = im.convert('L')
def initTable(threshold=140):
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
return table
#灰度圖二值化
bininaryImage = im.point(initTable(),'1')
#將圖片轉(zhuǎn)化為文本
return pytesseract.image_to_string(bininaryImage,lang="eng",config="-psm 7")
print(identification_code(code_url))