YOLOv2是Joseph Redmon提出的針對(duì)YOLO算法不足的改進(jìn)版本,作者使用了一系列的方法對(duì)原來的YOLO多目標(biāo)檢測(cè)框架進(jìn)行了改進(jìn),在保持原有速度的優(yōu)勢(shì)之下,精度上得以提升,此外作者提出了一種目標(biāo)分類與檢測(cè)的聯(lián)合訓(xùn)練方法,通過這種方法YOLO9000可以同時(shí)在COCO和ImageNet數(shù)據(jù)集中進(jìn)行訓(xùn)練,訓(xùn)練后的模型可以實(shí)現(xiàn)多達(dá)9000種物體的實(shí)時(shí)檢測(cè)。
Paper:https://arxiv.org/abs/1612.08242
Github:https://github.com/pjreddie/darknet
Website:https://pjreddie.com/darknet/yolo
作者為YOLO算法設(shè)計(jì)了獨(dú)有的深度學(xué)習(xí)框架darknet,因此沒有提供Python的接口。在實(shí)驗(yàn)中,我找到了兩種在Python 3中使用YOLOv2網(wǎng)絡(luò)的方法。
第一種:為darknet添加Python接口
Github:https://github.com/SidHard/py-yolo2
該項(xiàng)目使用了原始的darknet網(wǎng)絡(luò),需要使用cmake重新編譯源碼,因此在Linux上使用更為方便一些。
首先從git上下載該項(xiàng)目
git clone https://github.com/SidHard/py-yolo2.git
執(zhí)行cmake生成項(xiàng)目
cmake .. && make
最后執(zhí)行yolo.py測(cè)試項(xiàng)目,相應(yīng)的網(wǎng)絡(luò)結(jié)構(gòu).cfg文件保存在cfg文件夾中,權(quán)值.weight文件放在根目錄下,這些可以從darknet的官方網(wǎng)站上下載使用。
第二種:使用keras
Github:https://github.com/allanzelener/YAD2K
該項(xiàng)目使用了keras與tensorflow-gpu,因此可以在任何使用該框架的環(huán)境下運(yùn)行,我在自己的程序中使用的該種方法。
首先下載源文件并且配置環(huán)境,可以使用anaconda環(huán)境或者在全局安裝。
git clone https://github.com/allanzelener/yad2k.git
cd yad2k
# [Option 1] To replicate the conda environment:
conda env create -f environment.yml
source activate yad2k
# [Option 2] Install everything globaly.
pip install numpy
pip install tensorflow-gpu # CPU-only: conda install -c conda-forge tensorflow
pip install keras # Possibly older release: conda install keras
快速開始
- 從Darknet官方下載model:official YOLO website.
wget http://pjreddie.com/media/files/yolo.weights - 將 Darknet YOLO_v2 model轉(zhuǎn)換為Keras model.
./yad2k.py cfg/yolo.cfg yolo.weights model_data/yolo.h5 - 測(cè)試圖片位于
images/文件夾.
./test_yolo.py model_data/yolo.h5
最后執(zhí)行test_yolo就可以執(zhí)行網(wǎng)絡(luò),在images/out/文件夾里可以看到執(zhí)行效果。




為了方便模型用于測(cè)試視頻與圖片,我對(duì)demo做了修改,相比原來的測(cè)試代碼,能夠直接移植到項(xiàng)目中去,對(duì)象化的程序也更易于修改,代碼如下
#! /usr/bin/env python
"""Run a YOLO_v2 style detection model on test images."""
import cv2
import os
import time
import numpy as np
from keras import backend as K
from keras.models import load_model
from yad2k.models.keras_yolo import yolo_eval, yolo_head
class YOLO(object):
def __init__(self):
self.model_path = 'model_data/yolo.h5'
self.anchors_path = 'model_data/yolo_anchors.txt'
self.classes_path = 'model_data/coco_classes.txt'
self.score = 0.3
self.iou = 0.5
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.boxes, self.scores, self.classes = self.generate()
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
anchors = np.array(anchors).reshape(-1, 2)
return anchors
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.'
self.yolo_model = load_model(model_path)
# Verify model, anchors, and classes are compatible
num_classes = len(self.class_names)
num_anchors = len(self.anchors)
# TODO: Assumes dim ordering is channel last
model_output_channels = self.yolo_model.layers[-1].output_shape[-1]
assert model_output_channels == num_anchors * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(model_path))
# Check if model is fully convolutional, assuming channel last order.
self.model_image_size = self.yolo_model.layers[0].input_shape[1:3]
self.is_fixed_size = self.model_image_size != (None, None)
# Generate output tensor targets for filtered bounding boxes.
# TODO: Wrap these backend operations with Keras layers.
yolo_outputs = yolo_head(self.yolo_model.output, self.anchors, len(self.class_names))
self.input_image_shape = K.placeholder(shape=(2, ))
boxes, scores, classes = yolo_eval(yolo_outputs, self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes
def detect_image(self, image):
start = time.time()
y, x, _ = image.shape
if self.is_fixed_size: # TODO: When resizing we can use minibatch input.
resized_image = cv2.resize(image, tuple(reversed(self.model_image_size)), interpolation=cv2.INTER_CUBIC)
image_data = np.array(resized_image, dtype='float32')
else:
image_data = np.array(image, dtype='float32')
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.shape[0], image.shape[1]],
K.learning_phase(): 0
})
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]
label = '{} {:.2f}'.format(predicted_class, score)
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(y, np.floor(bottom + 0.5).astype('int32'))
right = min(x, np.floor(right + 0.5).astype('int32'))
print(label, (left, top), (right, bottom))
cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 2)
cv2.putText(image, label, (left, int(top - 4)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
end = time.time()
print(end - start)
return image
def close_session(self):
self.sess.close()
def detect_vedio(video, yolo):
camera = cv2.VideoCapture(video)
cv2.namedWindow("detection", cv2.WINDOW_NORMAL)
while True:
res, frame = camera.read()
if not res:
break
image = yolo.detect_image(frame)
cv2.imshow("detection", image)
if cv2.waitKey(110) & 0xff == 27:
break
yolo.close_session()
def detect_img(img, yolo):
image = cv2.imread(img)
r_image = yolo.detect_image(image)
cv2.namedWindow("detection")
while True:
cv2.imshow("detection", r_image)
if cv2.waitKey(110) & 0xff == 27:
break
yolo.close_session()
if __name__ == '__main__':
yolo = YOLO()
img = 'E:\Documents\Downloads\YAD2K-master\YAD2K-master\images\horses.jpg'
video = 'E:\Documents\Documents\python\Traffic\data\person.avi'
detect_img(img, yolo)
detect_vedio(video, yolo)