Last active
October 24, 2018 10:38
-
-
Save SnowMasaya/39b3b10709fd02b93884012fa15536b0 to your computer and use it in GitHub Desktop.
PyTorchで始める物体検出:Yolo 9000 Better, Faster, Stronger ref: https://qiita.com/GushiSnow/items/470512e5c04fcdfe7c59
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Conv2d_BatchNorm(nn.Module): | |
def __init__(self, in_channels, out_channels, kernel_size, stride=1, relu=True, same_padding=False): | |
super(Conv2d_BatchNorm, self).__init__() | |
padding = int((kernel_size - 1) / 2) if same_padding else 0 | |
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=padding, bias=False) | |
self.bn = nn.BatchNorm2d(out_channels, momentum=0.01) | |
self.relu = nn.LeakyReLU(0.1, inplace=True) if relu else None | |
def forward(self, x): | |
x = self.conv(x) | |
x = self.bn(x) | |
if self.relu is not None: | |
x = self.relu(x) | |
return x |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
inp_size = np.array([416, 416], dtype=np.int) # w, h |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
out_w, out_h, out_c = int(w / stride), int(h / stride), c * (stride * stride) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
net.load_from_npz(cfg.pretrained_model, num_conv=18) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
out_channels = cfg.num_anchors * (cfg.num_classes + 5) | |
self.conv5 = net_utils.Conv2d(c4, out_channels, 1, 1, relu=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Pr(Norfolk terrier) = Pr(Norfolk terrier\ |\ terrier) ∗Pr(terrier\ |\ hunting dog) ∗ . . .∗ | |
Pr(mammal\ |\ animal) ∗Pr(animal\ |\ physical object) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
softmax_word_tree = [] | |
for wordnet_index in output_index: | |
softmax_word_tree.append(F.softmax(output[5:wordnet_index])) | |
output_tensor = torch.cat(tuple(softmax_word_tree), 0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Net(nn.Module): | |
def __init__(self): | |
super(Net, self).__init__() | |
self.conv1 = nn.Conv2d(1, 10, kernel_size=5) | |
self.conv2 = nn.Conv2d(10, 20, kernel_size=5) | |
self.conv2_drop = nn.Dropout2d() | |
self.fc1 = nn.Linear(320, 50) | |
self.fc2 = nn.Linear(50, 10) | |
self.conv1_2 = nn.Conv2d(10, 20, kernel_size=5) | |
self.conv2_2 = nn.Conv2d(20, 40, kernel_size=5) | |
self.fc1_2 = nn.Linear(360, 50) | |
self.over_size = 28 | |
def forward(self, x): | |
_, _, h, w = x.size() | |
if h > self.over_size: | |
x = F.relu(F.max_pool2d(self.conv1(x), 2)) | |
x = F.relu(F.max_pool2d(self.conv1_2(x), 2)) | |
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2_2(x)), 2)) | |
x = x.view(-1, 360) | |
x = F.relu(self.fc1_2(x)) | |
else: | |
x = F.relu(F.max_pool2d(self.conv1(x), 2)) | |
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) | |
x = x.view(-1, 320) | |
x = F.relu(self.fc1(x)) | |
x = F.dropout(x, training=self.training) | |
x = self.fc2(x) | |
return F.log_softmax(x) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
imdb = VOCDataset(cfg.imdb_train, cfg.DATA_DIR, cfg.train_batch_size, | |
yolo_utils.preprocess_train, processes=2, shuffle=True, dst_size=cfg.inp_size) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def next_batch(self): | |
batch = {'images': [], 'gt_boxes': [], 'gt_classes': [], 'dontcare': [], 'origin_im': []} | |
i = 0 | |
while i < self.batch_size: | |
try: | |
images, gt_boxes, classes, dontcare, origin_im = self.gen.next() | |
batch['images'].append(images) | |
batch['gt_boxes'].append(gt_boxes) | |
batch['gt_classes'].append(classes) | |
batch['dontcare'].append(dontcare) | |
batch['origin_im'].append(origin_im) | |
i += 1 | |
except (StopIteration, AttributeError): | |
indexes = np.arange(len(self.image_names), dtype=np.int) | |
if self._shuffle: | |
np.random.shuffle(indexes) | |
self.gen = self.pool.imap(self._im_processor, | |
([self.image_names[i], self.get_annotation(i), self.dst_size] for i in indexes), | |
chunksize=self.batch_size) | |
self._epoch += 1 | |
print('epoch {} start...'.format(self._epoch)) | |
batch['images'] = np.asarray(batch['images']) | |
return batch |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
net = Darknet19() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _make_layers(in_channels, net_cfg): | |
layers = [] | |
if len(net_cfg) > 0 and isinstance(net_cfg[0], list): | |
for sub_cfg in net_cfg: | |
layer, in_channels = _make_layers(in_channels, sub_cfg) | |
layers.append(layer) | |
else: | |
for item in net_cfg: | |
if item == 'M': | |
layers.append(nn.MaxPool2d(kernel_size=2, stride=2)) | |
else: | |
out_channels, ksize = item | |
layers.append(net_utils.Conv2d_BatchNorm(in_channels, out_channels, ksize, same_padding=True)) | |
# layers.append(net_utils.Conv2d(in_channels, out_channels, ksize, same_padding=True)) | |
in_channels = out_channels | |
return nn.Sequential(*layers), in_channels |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
anchors = np.asarray([(1.08, 1.19), (3.42, 4.41), (6.63, 11.38), (9.42, 5.11), (16.62, 10.52)], dtype=np.float) | |
num_anchors = len(anchors) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
net.load_from_npz(cfg.pretrained_model, num_conv=18) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def load_from_npz(self, fname, num_conv=None): | |
# 重みを指定 | |
dest_src = {'conv.weight': 'kernel', 'conv.bias': 'biases', | |
'bn.weight': 'gamma', 'bn.bias': 'biases', | |
'bn.running_mean': 'moving_mean', 'bn.running_var': 'moving_variance'} | |
# 学習済みモデルの読み込み | |
params = np.load(fname) | |
# モジュールのすべての状態をdictionary形式で返却 | |
# http://pytorch.org/docs/master/nn.html?highlight=state_dict#torch.nn.Module.state_dict | |
own_dict = self.state_dict() | |
# モデルのキー(conv_weightなど)を取得 | |
keys = list(own_dict.keys()) | |
# 畳み込み層を効率よくアクセスするため5つ刻み | |
for i, start in enumerate(range(0, len(keys), 5)): | |
if num_conv is not None and i >= num_conv: | |
break | |
end = min(start+5, len(keys)) | |
for key in keys[start:end]: | |
list_key = key.split('.') | |
ptype = dest_src['{}.{}'.format(list_key[-2], list_key[-1])] | |
src_key = '{}-convolutional/{}:0'.format(i, ptype) | |
print((src_key, own_dict[key].size(), params[src_key].shape)) | |
param = torch.from_numpy(params[src_key]) | |
# kernelのみ重みの配列の順序を変更 | |
if ptype == 'kernel': | |
param = param.permute(3, 2, 0, 1) | |
own_dict[key].copy_(param) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
('0-convolutional/kernel:0', torch.Size([32, 3, 3, 3]), (3, 3, 3, 32)) | |
('0-convolutional/gamma:0', torch.Size([32]), (32,)) | |
('0-convolutional/biases:0', torch.Size([32]), (32,)) | |
('0-convolutional/moving_mean:0', torch.Size([32]), (32,)) | |
('0-convolutional/moving_variance:0', torch.Size([32]), (32,)) | |
('1-convolutional/kernel:0', torch.Size([64, 32, 3, 3]), (3, 3, 32, 64)) | |
('1-convolutional/gamma:0', torch.Size([64]), (64,)) | |
('1-convolutional/biases:0', torch.Size([64]), (64,)) | |
('1-convolutional/moving_mean:0', torch.Size([64]), (64,)) | |
('1-convolutional/moving_variance:0', torch.Size([64]), (64,)) | |
: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
net(im_data, gt_boxes, gt_classes, dontcare) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float) | |
bbox_pred_np = np.expand_dims(bbox_pred_np, 0) | |
bbox_np = yolo_to_bbox( | |
np.ascontiguousarray(bbox_pred_np, dtype=np.float), | |
anchors, | |
H, W) | |
bbox_np = bbox_np[0] # bbox_np = (hw, num_anchors, (x1, y1, x2, y2)) range: 0 ~ 1 | |
bbox_np[:, :, 0::2] *= float(inp_size[0]) # rescale x | |
bbox_np[:, :, 1::2] *= float(inp_size[1]) # rescale y |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for b in range(bsize): | |
for row in range(H): | |
for col in range(W): | |
ind = row * W + col | |
for a in range(num_anchors): | |
# 出力画像のサイズと合わせるため中央位置を計算 | |
cx = (bbox_pred[b, ind, a, 0] + col) / W | |
cy = (bbox_pred[b, ind, a, 1] + row) / H | |
# 出力画像のサイズと合わせるため幅、高さを計算。0.5倍は中央位置からの幅、高さのため | |
bw = bbox_pred[b, ind, a, 2] * anchors[a][0] / W * 0.5 | |
bh = bbox_pred[b, ind, a, 3] * anchors[a][1] / H * 0.5 | |
# オフセットを計算。(x_min, y_min, x_max, y_max) | |
bbox_out[b, ind, a, 0] = cx - bw | |
bbox_out[b, ind, a, 1] = cy - bh | |
bbox_out[b, ind, a, 2] = cx + bw | |
bbox_out[b, ind, a, 3] = cy + bh |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bbox_np_b = np.reshape(bbox_np, [-1, 4]) | |
ious = bbox_ious( | |
np.ascontiguousarray(bbox_np_b, dtype=np.float), | |
np.ascontiguousarray(gt_boxes_b, dtype=np.float) | |
) | |
best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape) | |
iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh] | |
_iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for k in range(K): | |
# query_boxesは真のボックスのエリア。下記は面積を計算している | |
qbox_area = ( | |
(query_boxes[k, 2] - query_boxes[k, 0] + 1) * | |
(query_boxes[k, 3] - query_boxes[k, 1] + 1) | |
) | |
for n in range(N): | |
# 下記は真のボックスと予測したボックスの幅の一致している部分を導出している | |
iw = ( | |
min(boxes[n, 2], query_boxes[k, 2]) - | |
max(boxes[n, 0], query_boxes[k, 0]) + 1 | |
) | |
if iw > 0: | |
# 下記は真のボックスと予測したボックスの高さの一致している部分を導出している | |
ih = ( | |
min(boxes[n, 3], query_boxes[k, 3]) - | |
max(boxes[n, 1], query_boxes[k, 1]) + 1 | |
) | |
if ih > 0: | |
# 一致している部分が幅も高さも1以上の場合は予測したボックスの面積を計算 | |
box_area = ( | |
(boxes[n, 2] - boxes[n, 0] + 1) * | |
(boxes[n, 3] - boxes[n, 1] + 1) | |
) | |
# 一致部分の面積を計算 | |
inter_area = iw * ih | |
# 下記で一致率を計算 | |
intersec[n, k] = inter_area / (qbox_area + box_area - inter_area) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cell_w = float(inp_size[0]) / W | |
cell_h = float(inp_size[1]) / H | |
cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w | |
cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h | |
cell_inds = np.floor(cy) * W + np.floor(cx) | |
cell_inds = cell_inds.astype(np.int) | |
target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float) | |
target_boxes[:, 0] = cx - np.floor(cx) # cx | |
target_boxes[:, 1] = cy - np.floor(cy) # cy | |
target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0] # tw | |
target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1] # th |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gt_boxes_resize = np.copy(gt_boxes_b) | |
# 真のボックスを出力のサイズに変換 | |
gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0])) | |
gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1])) | |
# 各アンカーボックスにおけるIoUを計算 | |
anchor_ious = anchor_intersections( | |
anchors, | |
np.ascontiguousarray(gt_boxes_resize, dtype=np.float) | |
) | |
# IoUが最も高いアンカーボックスを選択 | |
anchor_inds = np.argmax(anchor_ious, axis=0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hw = 4 | |
_boxes = np.zeros([hw, num_anchors, 4], dtype=np.float) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for n in range(N): | |
anchor_area = anchors[n, 0] * anchors[n, 1] | |
for k in range(K): | |
# 真のボックスの幅を計算 | |
boxw = (query_boxes[k, 2] - query_boxes[k, 0] + 1) | |
# 真のボックスの高さを計算 | |
boxh = (query_boxes[k, 3] - query_boxes[k, 1] + 1) | |
# 一致している幅を計算 | |
iw = min(anchors[n, 0], boxw) | |
# 一致している高さを計算 | |
ih = min(anchors[n, 1], boxh) | |
# 一致している面積を計算 | |
inter_area = iw * ih | |
# 一致率を計算 | |
intersec[n, k] = inter_area / (anchor_area + boxw * boxh - inter_area) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i, cell_ind in enumerate(cell_inds): | |
# ネットワークの予測値を超える場合は下記の処理をしない | |
if cell_ind >= hw or cell_ind < 0: | |
print cell_ind | |
continue | |
# 最も良いアンカーボックスを選択 | |
a = anchor_inds[i] | |
iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :] # 0 ~ 1, should be close to 1 | |
# マスク処理によってスケーリングを行い、ロス計算時の重要度を設定。一致率が高いほどロスを小さくしたいので下記のようになる | |
_iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor) | |
# IoUが最も高いアンカーボックスを選択 | |
_ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i] | |
# マスク処理によってスケーリングを行い、ロス計算時の重要度を設定 | |
_box_mask[cell_ind, a, :] = cfg.coord_scale | |
# 真のボックスをアンカーサイズに変換 | |
target_boxes[i, 2:4] /= anchors[a] | |
# IoUが最も高いアンカーボックスを選択 | |
_boxes[cell_ind, a, :] = target_boxes[i] | |
# マスク処理によってスケーリングを行い、ロス計算時の重要度を設定 | |
_class_mask[cell_ind, a, :] = cfg.class_scale | |
# IoUが最も高いアンカーボックスを選択 | |
_classes[cell_ind, a, gt_classes[i]] = 1. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.bbox_loss = nn.MSELoss(size_average=False)(bbox_pred * box_mask, _boxes * box_mask) / num_boxes | |
self.iou_loss = nn.MSELoss(size_average=False)(iou_pred * iou_mask, _ious * iou_mask) / num_boxes | |
class_mask = class_mask.expand_as(prob_pred) | |
self.cls_loss = nn.MSELoss(size_average=False)(prob_pred * class_mask, _classes * class_mask) / num_boxes |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
loss = net.loss | |
loss.backward() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lr = cfg.init_learning_rate | |
optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
optimizer.zero_grad() | |
optimizer.step() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if use_tensorboard and step % cfg.log_interval == 0: | |
exp.add_scalar_value('loss_train', train_loss, step=step) | |
exp.add_scalar_value('loss_bbox', bbox_loss, step=step) | |
exp.add_scalar_value('loss_iou', iou_loss, step=step) | |
exp.add_scalar_value('loss_cls', cls_loss, step=step) | |
exp.add_scalar_value('learning_rate', lr, step=step) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if imdb.epoch in cfg.lr_decay_epochs: | |
lr *= cfg.lr_decay | |
optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
imdb = VOCDataset(cfg.imdb_train, cfg.DATA_DIR, cfg.train_batch_size, | |
yolo_utils.preprocess_train, processes=2, shuffle=True, dst_size=cfg.inp_size) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
im, trans_param = imcv2_affine_trans(im) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
array([ | |
# この部分は各画像の座標、縦軸がアンカーボックスの数、横軸がオフセットの座標 | |
[[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.]], | |
[[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.]], | |
[[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.]], | |
[[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.], | |
[ 0., 0., 0., 0.]]]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
h, w, c = im.shape | |
scale = np.random.uniform() / 10. + 1. | |
max_offx = (scale - 1.) * w | |
max_offy = (scale - 1.) * h | |
offx = int(np.random.uniform() * max_offx) | |
offy = int(np.random.uniform() * max_offy) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.gen = self.pool.imap(self._im_processor, | |
([self.image_names[i], self.get_annotation(i), self.dst_size] for i in indexes), | |
chunksize=self.batch_size) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
p_w, p_hはアンカーボックスのサイズ \\ | |
b_x = \sigma(t_x) + c_x \\ | |
b_y = \sigma(t_y) + c_y \\ | |
b_w = p_we^{(t_w)} \\ | |
b_h = p_he^{(t_h)} \\ | |
Pr(object) ∗ IOU(b, object) = \sigma(t_o) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xy_pred = F.sigmoid(conv5_reshaped[:, :, :, 0:2]) | |
wh_pred = torch.exp(conv5_reshaped[:, :, :, 2:4]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
net_cfgs = [ | |
# conv1s | |
[(32, 3)], | |
['M', (64, 3)], | |
['M', (128, 3), (64, 1), (128, 3)], | |
['M', (256, 3), (128, 1), (256, 3)], | |
['M', (512, 3), (256, 1), (512, 3), (256, 1), (512, 3)], | |
# conv2 | |
['M', (1024, 3), (512, 1), (1024, 3), (512, 1), (1024, 3)], | |
# ------------ | |
# conv3 | |
[(1024, 3), (1024, 3)], | |
# conv4 | |
[(1024, 3)] | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
self.conv4, c4 = _make_layers((c1*(stride*stride) + c3), net_cfgs[7]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
conv1s_reorg = self.reorg(conv1s) | |
cat_1_3 = torch.cat([conv1s_reorg, conv3], 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment