#This program uses multi-scale template matching to find an object in a video stream.
#The object is the "template" which is an image file (JPG, PNG, etc.)
#The video stream is from the raspberry pi camera module.
#This program works on the Raspberry PI 2, Jessie, OpenCV 3.0.0
import cv2
import picamera
import picamera.array
import numpy as np #for template matching
import imutils #for template matching, "image processing convenience functions" THIS IS A FUNCITON CALL!
with picamera.PiCamera() as camera:
with picamera.array.PiRGBArray(camera) as stream:
camera.resolution = (1944, 1944) #I made it a square, you can make it whatever you want
template = cv2.imread('glasses_mod.png') #I used a photo of sunglasses, cropped down to just the sunglasses and nothing else
(template_height, template_width) = template.shape[:2]
template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) #gray it
template = cv2.GaussianBlur (template, (7,7), 0) #blur it
template = cv2.Canny(template, 50, 150) #edge it
cv2.imshow("template", template) #show it (not necessary, but I like to see what I'm working with
while True:
camera.capture(stream, 'bgr', use_video_port=True)
image_color = stream.array #take an image from the video stream
roi_image = image_color[255:765, 510:1020] #select a region of interest
image_gray = cv2.cvtColor(roi_image, cv2.COLOR_BGR2GRAY) #change the image to grayscale
(h, w) = image_gray.shape[:2]
center = (w/2,h/2)
M = cv2.getRotationMatrix2D(center, 90, 1.0)
image_gray_rotated = cv2.warpAffine(image_gray, M, (w, h)) #my camera is sideways so I have to rotate it (not required if your camera is upright)
im_gblurred = cv2.GaussianBlur(image_gray_rotated, (7,7), 0) #blur the image
### MULTI-SCALE TEMPLATE MATCHING
ms_image = im_gblurred #this line is not required, but I was playing around with different multiscale image transformations
found = None #flag to keep track of the matched region
#scan each scale of the image
#ending value (20%), starting value (100%), number of slices in between (20)
for scale in np.linspace(0.2, 1.0, 20)[::-1]:
#resize the image according to the scale and keep track of the ratio of the resizing
resized = imutils.resize(ms_image, width = int(ms_image.shape[1] * scale))
r = ms_image.shape[1] / float(resized.shape[1])
#if the resized image is smaller than the template then break from the loop
if resized.shape[0] < template_height or resized.shape[1] < template_width:
break
#detect the edges in the resized grayscale image and apply template matching to find the template in the image
edged = cv2.Canny(resized, 50, 150) #must use the SAME parameters here as you did for the tempate ABOVE for best results
#input image: must be 8bit or 32bit-floating point
#tempate image: must not be larger than the image to search, and same data type
#method: parameter specifying the comparison methods (SQDIFF, SQDIFFNORM, CCORR, CCORRNORM, CCOEFF, CCOEFFNORM)
#mask: mask of searched template. must be same data type and size as template. It not set by default.
#result: map of comparison results, must be single channel 32bit float
result = cv2.matchTemplate(edged, template, cv2.TM_CCOEFF) #this is where the magic happens!
#The cv2.minMaxLoc function takes the correlation result and returns a 4-tuple
#that includes the minimum correlation value, the maximum correlation value,
#the (x, y)-coordinate of the minimum value, and the (x, y)-coordinate of the
#maximum value, respectively. We are only interested in the maximum value and
#(x, y)-coordinate so we keep the maximums and discard the minimums.
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result)
#if we found a new maximum correlation value, then update the bookkeeping variable
if found is None or maxVal > found[0]:
found = (maxVal, maxLoc, r)
#unpack the bookkeeping variable and compute the (x, y) coordinates of the bounding box based on the resized ratio
(_, maxLoc, r) = found
(startX, startY) = (int(maxLoc[0] * r), int(maxLoc[1] * r))
(endX, endY) = (int((maxLoc[0] + template_width) * r), int((maxLoc[1] + template_height) * r))
#draw a box around the detected result and display the image
cv2.rectangle(ms_image, (startX, startY), (endX, endY), (0, 0, 255), 2)
cv2.imshow("Image", ms_image)
stream.truncate(0) #Must use this to eliminate the error: "Incorrect buffer length"
if cv2.waitKey(1) & 0xFF == ord('q'): #press CTRL and Q to stop the program from running
break
# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()
### This code below should be in a separate file called imutils.py
# Import the necessary packages
import numpy as np
import cv2
def translate(image, x, y):
# Define the translation matrix and perform the translation
M = np.float32([[1, 0, x], [0, 1, y]])
shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
# Return the translated image
return shifted
def rotate(image, angle, center = None, scale = 1.0):
# Grab the dimensions of the image
(h, w) = image.shape[:2]
# If the center is None, initialize it as the center of the image
if center is None:
center = (w / 2, h / 2)
# Perform the rotation
M = cv2.getRotationMatrix2D(center, angle, scale)
rotated = cv2.warpAffine(image, M, (w, h))
# Return the rotated image
return rotated
def resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized