diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1efc7b9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +models/*.caffemodel diff --git a/README.md b/README.md index b4254ff..3893fbf 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,12 @@ +# Class Activation Mapping for Python +I have written the files demo.m and generate_bbox.m in Python in order to be able to use the script without Matlab. In order to run it in Python one just need to run +``` +python py_demo.py +``` +and +``` +python py_generate_bbox.py +``` # Sample code for the Class Activation Mapping We propose a simple technique to expose the implicit attention of Convolutional Neural Networks on the image. It highlights the most informative image regions relevant to the predicted class. You could get attention-based model instantly by tweaking your own CNN a little bit more. The paper is published at [CVPR'16](http://arxiv.org/pdf/1512.04150.pdf). diff --git a/models/download.sh b/models/download.sh index 6c06005..2d8bffc 100644 --- a/models/download.sh +++ b/models/download.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash cd $(dirname $0) -curl -O http://cnnlocalization.csail.mit.edu/demoCAM/models/imagenet_googleletCAM_train_iter_120000.caffemodel +curl -O http://cnnlocalization.csail.mit.edu/demoCAM/models/imagenet_googlenetCAM_train_iter_120000.caffemodel diff --git a/py_demo.py b/py_demo.py new file mode 100644 index 0000000..82a191a --- /dev/null +++ b/py_demo.py @@ -0,0 +1,97 @@ +import numpy as np +import sys +import os +try: + caffe_root = os.environ['CAFFE_ROOT'] + '/' +except KeyError: + raise KeyError("Define CAFFE_ROOT in ~/.bashrc") + +sys.path.insert(1, caffe_root+'python/') +import caffe +import cv2 +from py_returnCAMmap import py_returnCAMmap +from py_map2jpg import py_map2jpg +import scipy.io + +def im2double(im): + return cv2.normalize(im.astype('float'), None, 0.0, 1.0, cv2.NORM_MINMAX) + +## Be aware that since Matlab is 1-indexed and column-major, +## the usual 4 blob dimensions in Matlab are [width, height, channels, num] + +## In python the dimensions are [num, channels, width, height] + +model = 'googlenet' +if model == 'alexnet': + net_weights = 'models/alexnetplusCAM_imagenet.caffemodel' + net_model = 'models/deploy_alexnetplusCAM_imagenet.prototxt' + out_layer = 'fc9' + last_conv = 'conv7' + crop_size = 227 +elif model == 'googlenet': + net_weights = 'models/imagenet_googlenetCAM_train_iter_120000.caffemodel' + net_model = 'models/deploy_googlenetCAM.prototxt' + out_layer = 'CAM_fc' + crop_size = 224 + last_conv = 'CAM_conv' +else: + raise Exception('This model is not defined') + +categories = scipy.io.loadmat('categories1000.mat') + +# load CAM model and extract features +net = caffe.Net(net_model, net_weights, caffe.TEST) + +transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) +transformer.set_transpose('data', (2,0,1)) +transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) +#transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB + +weights_LR = net.params[out_layer][0].data # get the softmax layer of the network +# shape: [1000, N] N-> depends on the network + +image = cv2.imread('img2.jpg') +image = cv2.resize(image, (256, 256)) + +# Take center crop. +center = np.array(image.shape[:2]) / 2.0 +crop = np.tile(center, (1, 2))[0] + np.concatenate([ + -np.array([crop_size, crop_size]) / 2.0, + np.array([crop_size, crop_size]) / 2.0 +]) +crop = crop.astype(int) +input_ = image[crop[0]:crop[2], crop[1]:crop[3], :] + +# extract conv features +net.blobs['data'].reshape(*np.asarray([1,3,crop_size,crop_size])) # run only one image +net.blobs['data'].data[...][0,:,:,:] = transformer.preprocess('data', input_) +out = net.forward() +scores = out['prob'] +activation_lastconv = net.blobs[last_conv].data + + + + +## Class Activation Mapping + +topNum = 5 # generate heatmap for top X prediction results +scoresMean = np.mean(scores, axis=0) +ascending_order = np.argsort(scoresMean) +IDX_category = ascending_order[::-1] # [::-1] to sort in descending order + +curCAMmapAll = py_returnCAMmap(activation_lastconv, weights_LR[IDX_category[:topNum],:]) + +curResult = im2double(image) + +for j in range(topNum): + # for one image + curCAMmap_crops = curCAMmapAll[:,:,j] + curCAMmapLarge_crops = cv2.resize(curCAMmap_crops, (256,256)) + curHeatMap = cv2.resize(im2double(curCAMmapLarge_crops),(256,256)) # this line is not doing much + curHeatMap = im2double(curHeatMap) + + curHeatMap = py_map2jpg(curHeatMap, None, 'jet') + curHeatMap = im2double(image)*0.2+im2double(curHeatMap)*0.7 + + cv2.imshow(categories['categories'][IDX_category[j]][0][0], curHeatMap) + cv2.waitKey(0) diff --git a/py_generate_bbox.py b/py_generate_bbox.py new file mode 100644 index 0000000..9aaf77c --- /dev/null +++ b/py_generate_bbox.py @@ -0,0 +1,61 @@ +## Here is the code to generate the bounding box from the heatmap +# +# to reproduce the ILSVRC localization result, you need to first generate +# the heatmap for each testing image by merging the heatmap from the +# 10-crops (it is exactly what the demo code is doing), then resize the merged heatmap back to the original size of +# that image. Then use this bbox generator to generate the bbox from the resized heatmap. +# +# The source code of the bbox generator is also released. Probably you need +# to install the correct version of OpenCV to compile it. +# +# Special thanks to Hui Li for helping on this code. +# +# Bolei Zhou, April 19, 2016 + +import os +import numpy as np +import cv2 +from py_map2jpg import py_map2jpg + +def im2double(im): + return cv2.normalize(im.astype('float'), None, 0.0, 1.0, cv2.NORM_MINMAX) + +bbox_threshold = [20, 100, 110] # parameters for the bbox generator +curParaThreshold = str(bbox_threshold[0])+' '+str(bbox_threshold[1])+' '+str(bbox_threshold[2])+' ' +curHeatMapFile = 'bboxgenerator/heatmap_6.jpg'; +curImgFile = 'bboxgenerator/sample_6.jpg'; +curBBoxFile = 'bboxgenerator/heatmap_6.txt'; + +os.system("bboxgenerator/./dt_box "+curHeatMapFile+' '+curParaThreshold+' '+curBBoxFile) + +with open(curBBoxFile) as f: + for line in f: + items = [int(x) for x in line.strip().split()] + +boxData1 = np.array(items[0::4]).T +boxData2 = np.array(items[1::4]).T +boxData3 = np.array(items[2::4]).T +boxData4 = np.array(items[3::4]).T + +boxData_formulate = np.array([boxData1, boxData2, boxData1+boxData3, boxData2+boxData4]).T + +col1 = np.min(np.array([boxData_formulate[:,0], boxData_formulate[:,2]]), axis=0) +col2 = np.min(np.array([boxData_formulate[:,1], boxData_formulate[:,3]]), axis=0) +col3 = np.max(np.array([boxData_formulate[:,0], boxData_formulate[:,2]]), axis=0) +col4 = np.max(np.array([boxData_formulate[:,1], boxData_formulate[:,3]]), axis=0) + +boxData_formulate = np.array([col1, col2, col3, col4]).T + +curHeatMap = cv2.imread(curHeatMapFile) +curImg = cv2.imread(curImgFile) + +curHeatMap = im2double(curHeatMap) +curHeatMap = py_map2jpg(curHeatMap, None, 'jet') +curHeatMap = im2double(curImg)*0.2+im2double(curHeatMap)*0.7 + +for i in range(boxData_formulate.shape[0]): # for each bbox + print(boxData_formulate[i][:2]) + print(boxData_formulate[i][2:]) + cv2.rectangle(curHeatMap, tuple(boxData_formulate[i][:2]), tuple(boxData_formulate[i][2:]), (255,0,0), 3) + cv2.imshow('bbox', curHeatMap) + cv2.waitKey(0) \ No newline at end of file diff --git a/py_map2jpg.py b/py_map2jpg.py new file mode 100644 index 0000000..2a300b4 --- /dev/null +++ b/py_map2jpg.py @@ -0,0 +1,10 @@ +import numpy as np +import cv2 + +def py_map2jpg(imgmap, rang, colorMap): + if rang is None: + rang = [np.min(imgmap), np.max(imgmap)] + + heatmap_x = np.round(imgmap*255).astype(np.uint8) + + return cv2.applyColorMap(heatmap_x, cv2.COLORMAP_JET) \ No newline at end of file diff --git a/py_returnCAMmap.py b/py_returnCAMmap.py new file mode 100644 index 0000000..0e27d03 --- /dev/null +++ b/py_returnCAMmap.py @@ -0,0 +1,20 @@ +import numpy as np + +def py_returnCAMmap(activation, weights_LR): + print(activation.shape) + + if activation.shape[0] == 1: # only one image + n_feat, w, h = activation[0].shape + act_vec = np.reshape(activation[0], [n_feat, w*h]) + n_top = weights_LR.shape[0] + out = np.zeros([w, h, n_top]) + + for t in range(n_top): + weights_vec = np.reshape(weights_LR[t], [1, weights_LR[t].shape[0]]) + heatmap_vec = np.dot(weights_vec,act_vec) + heatmap = np.reshape( np.squeeze(heatmap_vec) , [w, h]) + out[:,:,t] = heatmap + else: # 10 images (over-sampling) + raise Exception('Not implemented') + + return out \ No newline at end of file