【轉(zhuǎn)載】caffe python layer

本文鏈接:https://blog.csdn.net/thesby/article/details/51264439

caffe的大多數(shù)層是由c++寫成的,借助于c++的高效性,網(wǎng)絡可以快速訓練。但是我們有時候需要自己寫點輸入層以應對各種不同的數(shù)據(jù)輸入,比如你因為是需要在圖像中取塊而不想寫成LMDB,這時候可以考慮使用python直接寫一個層。而且輸入層不需要GPU加速,所需寫起來也比較容易。

python層怎么用

先看一個網(wǎng)上的例子吧(來自http://chrischoy.github.io/research/caffe-python-layer/)

layer {

? type: 'Python'

? name: 'loss'

? top: 'loss'

? bottom: 'ipx'

? bottom: 'ipy'

? python_param {

? ? # the module name -- usually the filename -- that needs to be in $PYTHONPATH

? ? module: 'pyloss'

? ? # the layer name -- the class name in the module

? ? layer: 'EuclideanLossLayer'

? }

? # set loss weight so Caffe knows this is a loss layer

? loss_weight: 1

}

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

這里的type就只有Python一種,然后top,bottom和常見的層是一樣的,module就是你的python module名字,一般就是文件名,然后layer就是定義的類的名字。

python層怎么寫

這里就以 Fully Convolutional Networks for Semantic Segmentation 論文中公布的代碼作為示例,解釋python層該怎么寫。

import caffe

import numpy as np

from PIL import Image

import random

class VOCSegDataLayer(caffe.Layer):

? ? """

? ? Load (input image, label image) pairs from PASCAL VOC

? ? one-at-a-time while reshaping the net to preserve dimensions.

? ? Use this to feed data to a fully convolutional network.

? ? """

? ? def setup(self, bottom, top):

? ? ? ? """

? ? ? ? Setup data layer according to parameters:

? ? ? ? - voc_dir: path to PASCAL VOC year dir

? ? ? ? - split: train / val / test

? ? ? ? - mean: tuple of mean values to subtract

? ? ? ? - randomize: load in random order (default: True)

? ? ? ? - seed: seed for randomization (default: None / current time)

? ? ? ? for PASCAL VOC semantic segmentation.

? ? ? ? example

? ? ? ? params = dict(voc_dir="/path/to/PASCAL/VOC2011",

? ? ? ? ? ? mean=(104.00698793, 116.66876762, 122.67891434),

? ? ? ? ? ? split="val")

? ? ? ? """

? ? ? ? # config

? ? ? ? params = eval(self.param_str)

? ? ? ? self.voc_dir = params['voc_dir']

? ? ? ? self.split = params['split']

? ? ? ? self.mean = np.array(params['mean'])

? ? ? ? self.random = params.get('randomize', True)

? ? ? ? self.seed = params.get('seed', None)

? ? ? ? # two tops: data and label

? ? ? ? if len(top) != 2:

? ? ? ? ? ? raise Exception("Need to define two tops: data and label.")

? ? ? ? # data layers have no bottoms

? ? ? ? if len(bottom) != 0:

? ? ? ? ? ? raise Exception("Do not define a bottom.")

? ? ? ? # load indices for images and labels

? ? ? ? split_f? = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,

? ? ? ? ? ? ? ? self.split)

? ? ? ? self.indices = open(split_f, 'r').read().splitlines()

? ? ? ? self.idx = 0

? ? ? ? # make eval deterministic

? ? ? ? if 'train' not in self.split:

? ? ? ? ? ? self.random = False

? ? ? ? # randomization: seed and pick

? ? ? ? if self.random:

? ? ? ? ? ? random.seed(self.seed)

? ? ? ? ? ? self.idx = random.randint(0, len(self.indices)-1)

? ? def reshape(self, bottom, top):

? ? ? ? # load image + label image pair

? ? ? ? self.data = self.load_image(self.indices[self.idx])

? ? ? ? self.label = self.load_label(self.indices[self.idx])

? ? ? ? # reshape tops to fit (leading 1 is for batch dimension)

? ? ? ? top[0].reshape(1, *self.data.shape)

? ? ? ? top[1].reshape(1, *self.label.shape)

? ? def forward(self, bottom, top):

? ? ? ? # assign output

? ? ? ? top[0].data[...] = self.data

? ? ? ? top[1].data[...] = self.label

? ? ? ? # pick next input

? ? ? ? if self.random:

? ? ? ? ? ? self.idx = random.randint(0, len(self.indices)-1)

? ? ? ? else:

? ? ? ? ? ? self.idx += 1

? ? ? ? ? ? if self.idx == len(self.indices):

? ? ? ? ? ? ? ? self.idx = 0

? ? def backward(self, top, propagate_down, bottom):

? ? ? ? pass

? ? def load_image(self, idx):

? ? ? ? """

? ? ? ? Load input image and preprocess for Caffe:

? ? ? ? - cast to float

? ? ? ? - switch channels RGB -> BGR

? ? ? ? - subtract mean

? ? ? ? - transpose to channel x height x width order

? ? ? ? """

? ? ? ? im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))

? ? ? ? in_ = np.array(im, dtype=np.float32)

? ? ? ? in_ = in_[:,:,::-1]

? ? ? ? in_ -= self.mean

? ? ? ? in_ = in_.transpose((2,0,1))

? ? ? ? return in_

? ? def load_label(self, idx):

? ? ? ? """

? ? ? ? Load label image as 1 x height x width integer array of label indices.

? ? ? ? The leading singleton dimension is required by the loss.

? ? ? ? """

? ? ? ? im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))

? ? ? ? label = np.array(im, dtype=np.uint8)

? ? ? ? label = label[np.newaxis, ...]

? ? ? ? return label

class SBDDSegDataLayer(caffe.Layer):

? ? """

? ? Load (input image, label image) pairs from the SBDD extended labeling

? ? of PASCAL VOC for semantic segmentation

? ? one-at-a-time while reshaping the net to preserve dimensions.

? ? Use this to feed data to a fully convolutional network.

? ? """

? ? def setup(self, bottom, top):

? ? ? ? """

? ? ? ? Setup data layer according to parameters:

? ? ? ? - sbdd_dir: path to SBDD `dataset` dir

? ? ? ? - split: train / seg11valid

? ? ? ? - mean: tuple of mean values to subtract

? ? ? ? - randomize: load in random order (default: True)

? ? ? ? - seed: seed for randomization (default: None / current time)

? ? ? ? for SBDD semantic segmentation.

? ? ? ? N.B.segv11alid is the set of segval11 that does not intersect with SBDD.

? ? ? ? Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.

? ? ? ? example

? ? ? ? params = dict(sbdd_dir="/path/to/SBDD/dataset",

? ? ? ? ? ? mean=(104.00698793, 116.66876762, 122.67891434),

? ? ? ? ? ? split="valid")

? ? ? ? """

? ? ? ? # config

? ? ? ? params = eval(self.param_str)

? ? ? ? self.sbdd_dir = params['sbdd_dir']

? ? ? ? self.split = params['split']

? ? ? ? self.mean = np.array(params['mean'])

? ? ? ? self.random = params.get('randomize', True)

? ? ? ? self.seed = params.get('seed', None)

? ? ? ? # two tops: data and label

? ? ? ? if len(top) != 2:

? ? ? ? ? ? raise Exception("Need to define two tops: data and label.")

? ? ? ? # data layers have no bottoms

? ? ? ? if len(bottom) != 0:

? ? ? ? ? ? raise Exception("Do not define a bottom.")

? ? ? ? # load indices for images and labels

? ? ? ? split_f? = '{}/{}.txt'.format(self.sbdd_dir,

? ? ? ? ? ? ? ? self.split)

? ? ? ? self.indices = open(split_f, 'r').read().splitlines()

? ? ? ? self.idx = 0

? ? ? ? # make eval deterministic

? ? ? ? if 'train' not in self.split:

? ? ? ? ? ? self.random = False

? ? ? ? # randomization: seed and pick

? ? ? ? if self.random:

? ? ? ? ? ? random.seed(self.seed)

? ? ? ? ? ? self.idx = random.randint(0, len(self.indices)-1)

? ? def reshape(self, bottom, top):

? ? ? ? # load image + label image pair

? ? ? ? self.data = self.load_image(self.indices[self.idx])

? ? ? ? self.label = self.load_label(self.indices[self.idx])

? ? ? ? # reshape tops to fit (leading 1 is for batch dimension)

? ? ? ? top[0].reshape(1, *self.data.shape)

? ? ? ? top[1].reshape(1, *self.label.shape)

? ? def forward(self, bottom, top):

? ? ? ? # assign output

? ? ? ? top[0].data[...] = self.data

? ? ? ? top[1].data[...] = self.label

? ? ? ? # pick next input

? ? ? ? if self.random:

? ? ? ? ? ? self.idx = random.randint(0, len(self.indices)-1)

? ? ? ? else:

? ? ? ? ? ? self.idx += 1

? ? ? ? ? ? if self.idx == len(self.indices):

? ? ? ? ? ? ? ? self.idx = 0

? ? def backward(self, top, propagate_down, bottom):

? ? ? ? pass

? ? def load_image(self, idx):

? ? ? ? """

? ? ? ? Load input image and preprocess for Caffe:

? ? ? ? - cast to float

? ? ? ? - switch channels RGB -> BGR

? ? ? ? - subtract mean

? ? ? ? - transpose to channel x height x width order

? ? ? ? """

? ? ? ? im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))

? ? ? ? in_ = np.array(im, dtype=np.float32)

? ? ? ? in_ = in_[:,:,::-1]

? ? ? ? in_ -= self.mean

? ? ? ? in_ = in_.transpose((2,0,1))

? ? ? ? return in_

? ? def load_label(self, idx):

? ? ? ? """

? ? ? ? Load label image as 1 x height x width integer array of label indices.

? ? ? ? The leading singleton dimension is required by the loss.

? ? ? ? """

? ? ? ? import scipy.io

? ? ? ? mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))

? ? ? ? label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)

? ? ? ? label = label[np.newaxis, ...]

? ? ? ? return label

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

每個類都是層,類的名字就是layer參數(shù)的名字。這兩個都是數(shù)據(jù)輸入層,由于需要一個data,一個label,所以有兩個top,沒有bottomo。

類直接繼承的是caffe.Layer,然后必須重寫setup(),reshape(),forward(),backward()函數(shù),其他的函數(shù)可以自己定義,沒有限制。

setup()是類啟動時該做的事情,比如層所需數(shù)據(jù)的初始化。

reshape()就是取數(shù)據(jù)然后把它規(guī)范化為四維的矩陣。每次取數(shù)據(jù)都會調(diào)用此函數(shù)。

forward()就是網(wǎng)絡的前向運行,這里就是把取到的數(shù)據(jù)往前傳遞,因為沒有其他運算。

backward()就是網(wǎng)絡的反饋,data層是沒有反饋的,所以這里就直接pass。

PS

這里就把一些資料整合起來,以供參考吧。

1、caffe官網(wǎng)現(xiàn)在開始有了點pycaffe的資料,但是鑒于caffe經(jīng)常更新,不知道什么時候就把它刪除,所需摘錄到此。

文件: pyloss.py

import caffe

import numpy as np

class EuclideanLossLayer(caffe.Layer):

? ? """

? ? Compute the Euclidean Loss in the same manner as the C++ EuclideanLossLayer

? ? to demonstrate the class interface for developing layers in Python.

? ? """

? ? def setup(self, bottom, top):

? ? ? ? # check input pair

? ? ? ? if len(bottom) != 2:

? ? ? ? ? ? raise Exception("Need two inputs to compute distance.")

? ? def reshape(self, bottom, top):

? ? ? ? # check input dimensions match

? ? ? ? if bottom[0].count != bottom[1].count:

? ? ? ? ? ? raise Exception("Inputs must have the same dimension.")

? ? ? ? # difference is shape of inputs

? ? ? ? self.diff = np.zeros_like(bottom[0].data, dtype=np.float32)

? ? ? ? # loss output is scalar

? ? ? ? top[0].reshape(1)

? ? def forward(self, bottom, top):

? ? ? ? self.diff[...] = bottom[0].data - bottom[1].data

? ? ? ? top[0].data[...] = np.sum(self.diff**2) / bottom[0].num / 2.

? ? def backward(self, top, propagate_down, bottom):

? ? ? ? for i in range(2):

? ? ? ? ? ? if not propagate_down[i]:

? ? ? ? ? ? ? ? continue

? ? ? ? ? ? if i == 0:

? ? ? ? ? ? ? ? sign = 1

? ? ? ? ? ? else:

? ? ? ? ? ? ? ? sign = -1

? ? ? ? ? ? bottom[i].diff[...] = sign * self.diff / bottom[i].num

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

下面這個就是如何使用這個層了:

linreg.prototxt

name: 'LinearRegressionExample'

# define a simple network for linear regression on dummy data

# that computes the loss by a PythonLayer.

layer {

? type: 'DummyData'

? name: 'x'

? top: 'x'

? dummy_data_param {

? ? shape: { dim: 10 dim: 3 dim: 2 }

? ? data_filler: { type: 'gaussian' }

? }

}

layer {

? type: 'DummyData'

? name: 'y'

? top: 'y'

? dummy_data_param {

? ? shape: { dim: 10 dim: 3 dim: 2 }

? ? data_filler: { type: 'gaussian' }

? }

}

# include InnerProduct layers for parameters

# so the net will need backward

layer {

? type: 'InnerProduct'

? name: 'ipx'

? top: 'ipx'

? bottom: 'x'

? inner_product_param {

? ? num_output: 10

? ? weight_filler { type: 'xavier' }

? }

}

layer {

? type: 'InnerProduct'

? name: 'ipy'

? top: 'ipy'

? bottom: 'y'

? inner_product_param {

? ? num_output: 10

? ? weight_filler { type: 'xavier' }

? }

}

layer {

? type: 'Python'

? name: 'loss'

? top: 'loss'

? bottom: 'ipx'

? bottom: 'ipy'

? python_param {

? ? # the module name -- usually the filename -- that needs to be in $PYTHONPATH

? ? module: 'pyloss'

? ? # the layer name -- the class name in the module

? ? layer: 'EuclideanLossLayer'

? }

? # set loss weight so Caffe knows this is a loss layer.

? # since PythonLayer inherits directly from Layer, this isn't automatically

? # known to Caffe

? loss_weight: 1

}

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

pascal_multilabel_datalayers.py

# imports

import json

import time

import pickle

import scipy.misc

import skimage.io

import caffe

import numpy as np

import os.path as osp

from xml.dom import minidom

from random import shuffle

from threading import Thread

from PIL import Image

from tools import SimpleTransformer

class PascalMultilabelDataLayerSync(caffe.Layer):

? ? """

? ? This is a simple syncronous datalayer for training a multilabel model on

? ? PASCAL.

? ? """

? ? def setup(self, bottom, top):

? ? ? ? self.top_names = ['data', 'label']

? ? ? ? # === Read input parameters ===

? ? ? ? # params is a python dictionary with layer parameters.

? ? ? ? params = eval(self.param_str)

? ? ? ? # Check the paramameters for validity.

? ? ? ? check_params(params)

? ? ? ? # store input as class variables

? ? ? ? self.batch_size = params['batch_size']

? ? ? ? # Create a batch loader to load the images.

? ? ? ? self.batch_loader = BatchLoader(params, None)

? ? ? ? # === reshape tops ===

? ? ? ? # since we use a fixed input image size, we can shape the data layer

? ? ? ? # once. Else, we'd have to do it in the reshape call.

? ? ? ? top[0].reshape(

? ? ? ? ? ? self.batch_size, 3, params['im_shape'][0], params['im_shape'][1])

? ? ? ? # Note the 20 channels (because PASCAL has 20 classes.)

? ? ? ? top[1].reshape(self.batch_size, 20)

? ? ? ? print_info("PascalMultilabelDataLayerSync", params)

? ? def forward(self, bottom, top):

? ? ? ? """

? ? ? ? Load data.

? ? ? ? """

? ? ? ? for itt in range(self.batch_size):

? ? ? ? ? ? # Use the batch loader to load the next image.

? ? ? ? ? ? im, multilabel = self.batch_loader.load_next_image()

? ? ? ? ? ? # Add directly to the caffe data layer

? ? ? ? ? ? top[0].data[itt, ...] = im

? ? ? ? ? ? top[1].data[itt, ...] = multilabel

? ? def reshape(self, bottom, top):

? ? ? ? """

? ? ? ? There is no need to reshape the data, since the input is of fixed size

? ? ? ? (rows and columns)

? ? ? ? """

? ? ? ? pass

? ? def backward(self, top, propagate_down, bottom):

? ? ? ? """

? ? ? ? These layers does not back propagate

? ? ? ? """

? ? ? ? pass

class BatchLoader(object):

? ? """

? ? This class abstracts away the loading of images.

? ? Images can either be loaded singly, or in a batch. The latter is used for

? ? the asyncronous data layer to preload batches while other processing is

? ? performed.

? ? """

? ? def __init__(self, params, result):

? ? ? ? self.result = result

? ? ? ? self.batch_size = params['batch_size']

? ? ? ? self.pascal_root = params['pascal_root']

? ? ? ? self.im_shape = params['im_shape']

? ? ? ? # get list of image indexes.

? ? ? ? list_file = params['split'] + '.txt'

? ? ? ? self.indexlist = [line.rstrip('\n') for line in open(

? ? ? ? ? ? osp.join(self.pascal_root, 'ImageSets/Main', list_file))]

? ? ? ? self._cur = 0? # current image

? ? ? ? # this class does some simple data-manipulations

? ? ? ? self.transformer = SimpleTransformer()

? ? ? ? print "BatchLoader initialized with {} images".format(

? ? ? ? ? ? len(self.indexlist))

? ? def load_next_image(self):

? ? ? ? """

? ? ? ? Load the next image in a batch.

? ? ? ? """

? ? ? ? # Did we finish an epoch?

? ? ? ? if self._cur == len(self.indexlist):

? ? ? ? ? ? self._cur = 0

? ? ? ? ? ? shuffle(self.indexlist)

? ? ? ? # Load an image

? ? ? ? index = self.indexlist[self._cur]? # Get the image index

? ? ? ? image_file_name = index + '.jpg'

? ? ? ? im = np.asarray(Image.open(

? ? ? ? ? ? osp.join(self.pascal_root, 'JPEGImages', image_file_name)))

? ? ? ? im = scipy.misc.imresize(im, self.im_shape)? # resize

? ? ? ? # do a simple horizontal flip as data augmentation

? ? ? ? flip = np.random.choice(2)*2-1

? ? ? ? im = im[:, ::flip, :]

? ? ? ? # Load and prepare ground truth

? ? ? ? multilabel = np.zeros(20).astype(np.float32)

? ? ? ? anns = load_pascal_annotation(index, self.pascal_root)

? ? ? ? for label in anns['gt_classes']:

? ? ? ? ? ? # in the multilabel problem we don't care how MANY instances

? ? ? ? ? ? # there are of each class. Only if they are present.

? ? ? ? ? ? # The "-1" is b/c we are not interested in the background

? ? ? ? ? ? # class.

? ? ? ? ? ? multilabel[label - 1] = 1

? ? ? ? self._cur += 1

? ? ? ? return self.transformer.preprocess(im), multilabel

def load_pascal_annotation(index, pascal_root):

? ? """

? ? This code is borrowed from Ross Girshick's FAST-RCNN code

? ? (https://github.com/rbgirshick/fast-rcnn).

? ? It parses the PASCAL .xml metadata files.

? ? See publication for further details: (http://arxiv.org/abs/1504.08083).

? ? Thanks Ross!

? ? """

? ? classes = ('__background__',? # always index 0

? ? ? ? ? ? ? 'aeroplane', 'bicycle', 'bird', 'boat',

? ? ? ? ? ? ? 'bottle', 'bus', 'car', 'cat', 'chair',

? ? ? ? ? ? ? ? ? ? ? ? 'cow', 'diningtable', 'dog', 'horse',

? ? ? ? ? ? ? ? ? ? ? ? 'motorbike', 'person', 'pottedplant',

? ? ? ? ? ? ? ? ? ? ? ? 'sheep', 'sofa', 'train', 'tvmonitor')

? ? class_to_ind = dict(zip(classes, xrange(21)))

? ? filename = osp.join(pascal_root, 'Annotations', index + '.xml')

? ? # print 'Loading: {}'.format(filename)

? ? def get_data_from_tag(node, tag):

? ? ? ? return node.getElementsByTagName(tag)[0].childNodes[0].data

? ? with open(filename) as f:

? ? ? ? data = minidom.parseString(f.read())

? ? objs = data.getElementsByTagName('object')

? ? num_objs = len(objs)

? ? boxes = np.zeros((num_objs, 4), dtype=np.uint16)

? ? gt_classes = np.zeros((num_objs), dtype=np.int32)

? ? overlaps = np.zeros((num_objs, 21), dtype=np.float32)

? ? # Load object bounding boxes into a data frame.

? ? for ix, obj in enumerate(objs):

? ? ? ? # Make pixel indexes 0-based

? ? ? ? x1 = float(get_data_from_tag(obj, 'xmin')) - 1

? ? ? ? y1 = float(get_data_from_tag(obj, 'ymin')) - 1

? ? ? ? x2 = float(get_data_from_tag(obj, 'xmax')) - 1

? ? ? ? y2 = float(get_data_from_tag(obj, 'ymax')) - 1

? ? ? ? cls = class_to_ind[

? ? ? ? ? ? str(get_data_from_tag(obj, "name")).lower().strip()]

? ? ? ? boxes[ix, :] = [x1, y1, x2, y2]

? ? ? ? gt_classes[ix] = cls

? ? ? ? overlaps[ix, cls] = 1.0

? ? overlaps = scipy.sparse.csr_matrix(overlaps)

? ? return {'boxes': boxes,

? ? ? ? ? ? 'gt_classes': gt_classes,

? ? ? ? ? ? 'gt_overlaps': overlaps,

? ? ? ? ? ? 'flipped': False,

? ? ? ? ? ? 'index': index}

def check_params(params):

? ? """

? ? A utility function to check the parameters for the data layers.

? ? """

? ? assert 'split' in params.keys(

? ? ), 'Params must include split (train, val, or test).'

? ? required = ['batch_size', 'pascal_root', 'im_shape']

? ? for r in required:

? ? ? ? assert r in params.keys(), 'Params must include {}'.format(r)

def print_info(name, params):

? ? """

? ? Ouput some info regarding the class

? ? """

? ? print "{} initialized for split: {}, with bs: {}, im_shape: {}.".format(

? ? ? ? name,

? ? ? ? params['split'],

? ? ? ? params['batch_size'],

? ? ? ? params['im_shape'])

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

caffenet.py

from __future__ import print_function

from caffe import layers as L, params as P, to_proto

from caffe.proto import caffe_pb2

# helper function for common structures

def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1):

? ? conv = L.Convolution(bottom, kernel_size=ks, stride=stride,

? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? num_output=nout, pad=pad, group=group)

? ? return conv, L.ReLU(conv, in_place=True)

def fc_relu(bottom, nout):

? ? fc = L.InnerProduct(bottom, num_output=nout)

? ? return fc, L.ReLU(fc, in_place=True)

def max_pool(bottom, ks, stride=1):

? ? return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)

def caffenet(lmdb, batch_size=256, include_acc=False):

? ? data, label = L.Data(source=lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=2,

? ? ? ? transform_param=dict(crop_size=227, mean_value=[104, 117, 123], mirror=True))

? ? # the net itself

? ? conv1, relu1 = conv_relu(data, 11, 96, stride=4)

? ? pool1 = max_pool(relu1, 3, stride=2)

? ? norm1 = L.LRN(pool1, local_size=5, alpha=1e-4, beta=0.75)

? ? conv2, relu2 = conv_relu(norm1, 5, 256, pad=2, group=2)

? ? pool2 = max_pool(relu2, 3, stride=2)

? ? norm2 = L.LRN(pool2, local_size=5, alpha=1e-4, beta=0.75)

? ? conv3, relu3 = conv_relu(norm2, 3, 384, pad=1)

? ? conv4, relu4 = conv_relu(relu3, 3, 384, pad=1, group=2)

? ? conv5, relu5 = conv_relu(relu4, 3, 256, pad=1, group=2)

? ? pool5 = max_pool(relu5, 3, stride=2)

? ? fc6, relu6 = fc_relu(pool5, 4096)

? ? drop6 = L.Dropout(relu6, in_place=True)

? ? fc7, relu7 = fc_relu(drop6, 4096)

? ? drop7 = L.Dropout(relu7, in_place=True)

? ? fc8 = L.InnerProduct(drop7, num_output=1000)

? ? loss = L.SoftmaxWithLoss(fc8, label)

? ? if include_acc:

? ? ? ? acc = L.Accuracy(fc8, label)

? ? ? ? return to_proto(loss, acc)

? ? else:

? ? ? ? return to_proto(loss)

def make_net():

? ? with open('train.prototxt', 'w') as f:

? ? ? ? print(caffenet('/path/to/caffe-train-lmdb'), file=f)

? ? with open('test.prototxt', 'w') as f:

? ? ? ? print(caffenet('/path/to/caffe-val-lmdb', batch_size=50, include_acc=True), file=f)

if __name__ == '__main__':

? ? make_net()

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

tools.py

import numpy as np

class SimpleTransformer:

? ? """

? ? SimpleTransformer is a simple class for preprocessing and deprocessing

? ? images for caffe.

? ? """

? ? def __init__(self, mean=[128, 128, 128]):

? ? ? ? self.mean = np.array(mean, dtype=np.float32)

? ? ? ? self.scale = 1.0

? ? def set_mean(self, mean):

? ? ? ? """

? ? ? ? Set the mean to subtract for centering the data.

? ? ? ? """

? ? ? ? self.mean = mean

? ? def set_scale(self, scale):

? ? ? ? """

? ? ? ? Set the data scaling.

? ? ? ? """

? ? ? ? self.scale = scale

? ? def preprocess(self, im):

? ? ? ? """

? ? ? ? preprocess() emulate the pre-processing occuring in the vgg16 caffe

? ? ? ? prototxt.

? ? ? ? """

? ? ? ? im = np.float32(im)

? ? ? ? im = im[:, :, ::-1]? # change to BGR

? ? ? ? im -= self.mean

? ? ? ? im *= self.scale

? ? ? ? im = im.transpose((2, 0, 1))

? ? ? ? return im

? ? def deprocess(self, im):

? ? ? ? """

? ? ? ? inverse of preprocess()

? ? ? ? """

? ? ? ? im = im.transpose(1, 2, 0)

? ? ? ? im /= self.scale

? ? ? ? im += self.mean

? ? ? ? im = im[:, :, ::-1]? # change to RGB

? ? ? ? return np.uint8(im)

class CaffeSolver:

? ? """

? ? Caffesolver is a class for creating a solver.prototxt file. It sets default

? ? values and can export a solver parameter file.

? ? Note that all parameters are stored as strings. Strings variables are

? ? stored as strings in strings.

? ? """

? ? def __init__(self, testnet_prototxt_path="testnet.prototxt",

? ? ? ? ? ? ? ? trainnet_prototxt_path="trainnet.prototxt", debug=False):

? ? ? ? self.sp = {}

? ? ? ? # critical:

? ? ? ? self.sp['base_lr'] = '0.001'

? ? ? ? self.sp['momentum'] = '0.9'

? ? ? ? # speed:

? ? ? ? self.sp['test_iter'] = '100'

? ? ? ? self.sp['test_interval'] = '250'

? ? ? ? # looks:

? ? ? ? self.sp['display'] = '25'

? ? ? ? self.sp['snapshot'] = '2500'

? ? ? ? self.sp['snapshot_prefix'] = '"snapshot"'? # string withing a string!

? ? ? ? # learning rate policy

? ? ? ? self.sp['lr_policy'] = '"fixed"'

? ? ? ? # important, but rare:

? ? ? ? self.sp['gamma'] = '0.1'

? ? ? ? self.sp['weight_decay'] = '0.0005'

? ? ? ? self.sp['train_net'] = '"' + trainnet_prototxt_path + '"'

? ? ? ? self.sp['test_net'] = '"' + testnet_prototxt_path + '"'

? ? ? ? # pretty much never change these.

? ? ? ? self.sp['max_iter'] = '100000'

? ? ? ? self.sp['test_initialization'] = 'false'

? ? ? ? self.sp['average_loss'] = '25'? # this has to do with the display.

? ? ? ? self.sp['iter_size'] = '1'? # this is for accumulating gradients

? ? ? ? if (debug):

? ? ? ? ? ? self.sp['max_iter'] = '12'

? ? ? ? ? ? self.sp['test_iter'] = '1'

? ? ? ? ? ? self.sp['test_interval'] = '4'

? ? ? ? ? ? self.sp['display'] = '1'

? ? def add_from_file(self, filepath):

? ? ? ? """

? ? ? ? Reads a caffe solver prototxt file and updates the Caffesolver

? ? ? ? instance parameters.

? ? ? ? """

? ? ? ? with open(filepath, 'r') as f:

? ? ? ? ? ? for line in f:

? ? ? ? ? ? ? ? if line[0] == '#':

? ? ? ? ? ? ? ? ? ? continue

? ? ? ? ? ? ? ? splitLine = line.split(':')

? ? ? ? ? ? ? ? self.sp[splitLine[0].strip()] = splitLine[1].strip()

? ? def write(self, filepath):

? ? ? ? """

? ? ? ? Export solver parameters to INPUT "filepath". Sorted alphabetically.

? ? ? ? """

? ? ? ? f = open(filepath, 'w')

? ? ? ? for key, value in sorted(self.sp.items()):

? ? ? ? ? ? if not(type(value) is str):

? ? ? ? ? ? ? ? raise TypeError('All solver parameters must be strings')

? ? ? ? ? ? f.write('%s: %s\n' % (key, value))

---------------------

本文來自 thesby 的CSDN 博客 ,全文地址請點擊:https://blog.csdn.net/thesby/article/details/51264439?utm_source=copy

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容