I'm developing an android app to recognize text in particular plate, as in photo here:
I have to recognize the texts in white (e.g. near to "Mod."). I'm using Google ML Kit's text recognition APIs, but it fails. So, I'm using OpenCV to edit image but I don't know how to emphasize the (white) texts so OCR recognize it. I tried more stuff, like contrast, brightness, gamma correction, adaptive thresholding, but the cases vary a lot depending on how the photo is taken. Do you have any ideas?
Thank u very much.
I coded this example in Python (since OpenCV's SIFT in Android is paid) but you can still use this to understand how to solve it.
First I created this image as a template:
Step 1: Load images
""" 1. Load images """
# load image of plate
src_path = "nRHzD.jpg"
src = cv2.imread(src_path)
# load template of plate (to be looked for)
src_template_path = "nRHzD_template.jpg"
src_template = cv2.imread(src_template_path)
Step 2: Find the template using SIFT and perspective transformation
# convert images to gray scale
src_gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
src_template_gray = cv2.cvtColor(src_template, cv2.COLOR_BGR2GRAY)
# use SIFT to find template
n_matches_min = 10
template_found, homography = find_template(src_gray, src_template_gray, n_matches_min)
warp = transform_perspective_and_crop(homography, src, src_gray, src_template)
warp_gray = cv2.cvtColor(warp, cv2.COLOR_BGR2GRAY)
warp_hsv = cv2.cvtColor(warp, cv2.COLOR_BGR2HSV)
template_hsv = cv2.cvtColor(src_template, cv2.COLOR_BGR2HSV)
Step 3: Find regions of interest (using the green parts of the template image)
green_hsv_lower_bound = [50, 250, 250]
green_hsv_upper_bound = [60, 255, 255]
mask_rois, mask_rois_img = crop_img_in_hsv_range(warp, template_hsv, green_hsv_lower_bound, green_hsv_upper_bound)
roi_list = separate_rois(mask_rois, warp_gray)
# sort the rois by distance to top right corner -> x (value[1]) + y (value[2])
roi_list = sorted(roi_list, key=lambda values: values[1]+values[2])
Step 4: Apply a Canny Edge detection to the rois (regions of interest)
for i, roi in enumerate(roi_list):
roi_img, roi_x_offset, roi_y_offset = roi
print("#roi:{} x:{} y:{}".format(i, roi_x_offset, roi_y_offset))
roi_img_blur_threshold = cv2.Canny(roi_img, 40, 200)
cv2.imshow("ROI image", roi_img_blur_threshold)
cv2.waitKey()
There are many ways for you to detect the digits, one of the easiest approaches is to run a K-Means Clustering on each of the contours.
Full code:
""" This code shows a way of getting the digit's edges in a pre-defined position (in green) """
import cv2
import numpy as np
def find_template(src_gray, src_template_gray, n_matches_min):
# Initiate SIFT detector
sift = cv2.xfeatures2d.SIFT_create()
""" find grid using SIFT """
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(src_template_gray, None)
kp2, des2 = sift.detectAndCompute(src_gray, None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks = 50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m,n in matches:
if m.distance < 0.7*n.distance:
good.append(m)
if len(good) > n_matches_min:
src_pts = np.float32([kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
matchesMask = mask.ravel().tolist()
h_template, w_template = src_template_gray.shape
pts = np.float32([[0, 0], [0, h_template - 1], [w_template - 1, h_template - 1], [w_template - 1,0]]).reshape(-1,1,2)
homography = cv2.perspectiveTransform(pts, M)
else:
print "Not enough matches are found - %d/%d" % (len(good), n_matches_min)
matchesMask = None
# show matches
draw_params = dict(matchColor = (0, 255, 0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
if matchesMask:
src_gray_copy = src_gray.copy()
sift_matches = cv2.polylines(src_gray_copy, [np.int32(homography)], True, 255, 2, cv2.LINE_AA)
sift_matches = cv2.drawMatches(src_template_gray, kp1, src_gray_copy, kp2, good, None, **draw_params)
return sift_matches, homography
def transform_perspective_and_crop(homography, src, src_gray, src_template_gray):
""" get mask and contour of template """
mask_img_template = np.zeros(src_gray.shape, dtype=np.uint8)
mask_img_template = cv2.polylines(mask_img_template, [np.int32(homography)], True, 255, 1, cv2.LINE_AA)
_ret, contours, hierarchy = cv2.findContours(mask_img_template, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
template_contour = None
# approximate the contour
c = contours[0]
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# if our approximated contour has four points, then
# we can assume that we have found our template
warp = None
if len(approx) == 4:
template_contour = approx
cv2.drawContours(mask_img_template, [template_contour] , -1, (255,0,0), -1)
""" Transform perspective """
# now that we have our template contour, we need to determine
# the top-left, top-right, bottom-right, and bottom-left
# points so that we can later warp the image -- we'll start
# by reshaping our contour to be our finals and initializing
# our output rectangle in top-left, top-right, bottom-right,
# and bottom-left order
pts = template_contour.reshape(4, 2)
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point has the smallest sum whereas the
# bottom-right has the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# compute the difference between the points -- the top-right
# will have the minumum difference and the bottom-left will
# have the maximum difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# now that we have our rectangle of points, let's compute
# the width of our new image
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
# ...and now for the height of our new image
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
# take the maximum of the width and height values to reach
# our final dimensions
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
# construct our destination points which will be used to
# map the screen to a top-down, "birds eye" view
homography = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# calculate the perspective transform matrix and warp
# the perspective to grab the screen
M = cv2.getPerspectiveTransform(rect, homography)
warp = cv2.warpPerspective(src, M, (maxWidth, maxHeight))
# resize warp
h_template, w_template, _n_channels = src_template_gray.shape
warp = cv2.resize(warp, (w_template, h_template), interpolation=cv2.INTER_AREA)
return warp
def crop_img_in_hsv_range(img, hsv, lower_bound, upper_bound):
mask = cv2.inRange(hsv, np.array(lower_bound), np.array(upper_bound))
# do an MORPH_OPEN (erosion followed by dilation) to remove isolated pixels
kernel = np.ones((5,5), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
# Bitwise-AND mask and original image
res = cv2.bitwise_and(img, img, mask=mask)
return mask, res
def separate_rois(column_mask, img_gray):
# go through each of the boxes
# https://stackoverflow.com/questions/41592039/contouring-a-binary-mask-with-opencv-python
border = cv2.copyMakeBorder(column_mask, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=0)
_, contours, hierarchy = cv2.findContours(border, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE, offset=(-1, -1))
cell_list = []
for contour in contours:
cell_mask = np.zeros_like(img_gray) # Create mask where white is what we want, black otherwise
cv2.drawContours(cell_mask, [contour], -1, 255, -1) # Draw filled contour in mask
# turn that mask into a rectangle
(x,y,w,h) = cv2.boundingRect(contour)
#print("x:{} y:{} w:{} h:{}".format(x, y, w, h))
cv2.rectangle(cell_mask, (x, y), (x+w, y+h), 255, -1)
# copy the img_gray using that mask
img_tmp_region = cv2.bitwise_and(img_gray, img_gray, mask= cell_mask)
# Now crop
(y, x) = np.where(cell_mask == 255)
(top_y, top_x) = (np.min(y), np.min(x))
(bottom_y, bottom_x) = (np.max(y), np.max(x))
img_tmp_region = img_tmp_region[top_y:bottom_y+1, top_x:bottom_x+1]
cell_list.append([img_tmp_region, top_x, top_y])
return cell_list
""" 1. Load images """
# load image of plate
src_path = "nRHzD.jpg"
src = cv2.imread(src_path)
# load template of plate (to be looked for)
src_template_path = "nRHzD_template.jpg"
src_template = cv2.imread(src_template_path)
""" 2. Find the plate (using the template image) and crop it into a rectangle """
# convert images to gray scale
src_gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
src_template_gray = cv2.cvtColor(src_template, cv2.COLOR_BGR2GRAY)
# use SIFT to find template
n_matches_min = 10
template_found, homography = find_template(src_gray, src_template_gray, n_matches_min)
warp = transform_perspective_and_crop(homography, src, src_gray, src_template)
warp_gray = cv2.cvtColor(warp, cv2.COLOR_BGR2GRAY)
warp_hsv = cv2.cvtColor(warp, cv2.COLOR_BGR2HSV)
template_hsv = cv2.cvtColor(src_template, cv2.COLOR_BGR2HSV)
""" 3. Find regions of interest (using the green parts of the template image) """
green_hsv_lower_bound = [50, 250, 250]
green_hsv_upper_bound = [60, 255, 255]
mask_rois, mask_rois_img = crop_img_in_hsv_range(warp, template_hsv, green_hsv_lower_bound, green_hsv_upper_bound)
roi_list = separate_rois(mask_rois, warp_gray)
# sort the rois by distance to top right corner -> x (value[1]) + y (value[2])
roi_list = sorted(roi_list, key=lambda values: values[1]+values[2])
""" 4. Apply a Canny Edge detection to the rois (regions of interest) """
for i, roi in enumerate(roi_list):
roi_img, roi_x_offset, roi_y_offset = roi
print("#roi:{} x:{} y:{}".format(i, roi_x_offset, roi_y_offset))
roi_img_blur_threshold = cv2.Canny(roi_img, 40, 200)
cv2.imshow("ROI image", roi_img_blur_threshold)
cv2.waitKey()
I am trying to use K-means JavaCV implementation, but I have the following error:
OpenCV Error: Assertion failed (!centers.empty()) in cvKMeans2, file src\matrix.cpp, line 4233
My source code is:
IplImage src = cvLoadImage(fileName, CV_LOAD_IMAGE_COLOR);
int cluster_count = 3;
int attempts = 10;
CvTermCriteria termCriteria = new CvTermCriteria(TermCriteria.EPS + TermCriteria.MAX_ITER, 10, 1.0);
cvReshape(src, src.asCvMat(), 1, src.height() * src.width());
IplImage samples = cvCreateImage(cvGetSize(src), src.depth(), 1);
cvConvertImage(src, samples, CV_32F);
IplImage labels = cvCreateImage(new CvSize(samples.height()), 1, CV_8U);
IplImage centers = cvCreateImage(new CvSize(cluster_count), 1, CV_32F);
cvKMeans2(samples, cluster_count, labels, termCriteria, 1, new long[attempts], KMEANS_RANDOM_CENTERS, centers, new double[attempts]);
I'm beginner on JavaCV and would to know what I doing wrong in this code?
I'm new to opencv's svm's. I tried a sample classifier but it only returns 0 as the predicted label. I even used the value 5 for training as well as the prediction.
I've been changing the values for about a hundred times but i just don't get what's wrong. I'm using OpenCV 3.0 with Java. Here's my code:
Mat labels = new Mat(new Size(1,4),CvType.CV_32SC1);
labels.put(0, 0, 1);
labels.put(1, 0, 1);
labels.put(2, 0, 1);
labels.put(3, 0, 0);
Mat data = new Mat(new Size(1,4),CvType.CV_32FC1);
data.put(0, 0, 5);
data.put(1, 0, 2);
data.put(2, 0, 3);
data.put(3, 0, 8);
Mat testSamples = new Mat(new Size(1,1),CvType.CV_32FC1);
testSamples.put(0,0,5);
SVM svm = SVM.create();
TermCriteria criteria = new TermCriteria(TermCriteria.EPS + TermCriteria.MAX_ITER,100,0.1);
svm.setKernel(SVM.LINEAR);
svm.setType(SVM.C_SVC);
svm.setGamma(0.5);
svm.setNu(0.5);
svm.setC(1);
svm.setTermCriteria(criteria);
//data is N x 64 trained data Mat , labels is N x 1 label Mat with integer values;
svm.train(data, Ml.ROW_SAMPLE, labels);
Mat results = new Mat();
int predictedClass = (int) svm.predict(testSamples, results, 0);
Even if i change the lables to 1 and 2, I still get 0.0 as a response. So something has to be absolutely wrong... I just don't know what to do. Please help! :)
I had a similar problem in C++. I'm not too sure if it's the same in Java but in C++ the predictions were saved in the results Matrix instead of returned as a float.
Been trying to get inpainting to work on Android,
int height = (int) viewMat.size().height;
int width = (int) viewMat.size().width;
Mat maskMat = new Mat();
maskMat.create(viewMat.size(), CvType.CV_8U);
maskMat.setTo(bColor);
Point r1 = new Point(width/2-width/10, height/2-height/10);
Point r2 = new Point(width/2+width/10, height/2+height/10);
Scalar color = new Scalar(1);
Core.rectangle(maskMat, r1, r2, color, Core.FILLED);
outMat.create(viewMat.size(), CvType.CV_8UC3);
viewMat.convertTo(outMat, CvType.CV_8UC3);
Photo.inpaint(outMat, maskMat, outMat, 1, Photo.INPAINT_TELEA);
Was greeted with,
Caused by: CvException [org.opencv.core.CvException: /home/reports/ci/slave_desktop/50-SDK/opencv/modules/photo/src/inpaint.cpp:744:
error: (-210) Only 8-bit 1-channel and 3-channel input/output images are supported in function void cvInpaint(const CvArr*, const CvArr*, CvArr*, double, int)
in logcat.
Been trying for hours creating Mats in various ways but to no valid.
CV_8U = 8 bit per channel, 1 channel. Right?
CV_8UC3 = 8 bit per channel, 3 channels. Right?
So what am I missing? I'm totally stumped.
...
Point r2 = new Point(width/2+width/10, height/2+height/10);
Scalar color = new Scalar(1);
Core.rectangle(maskMat, r1, r2, color, Core.FILLED);
Imgproc.cvtColor(viewMat, outMat, Imgproc.COLOR_RGBA2RGB);
Photo.inpaint(outMat, maskMat, outMat, 1, Photo.INPAINT_TELEA);
...
Turned out it was simply a matter of getting rid of the alpha channel via color conversion.
Image Inpaint Using OpenCv Android Studio
ImgMat = Mat()
Maskmat = Mat()
destmat=Mat()
Utils.bitmapToMat(BitmapImage, ImgMat)
Utils.bitmapToMat(BitmapMask, Maskmat)
Imgproc.cvtColor(ImgMat, ImgMat, Imgproc.COLOR_RGB2XYZ)
Imgproc.cvtColor(Maskmat, Maskmat, Imgproc.COLOR_BGR2GRAY)
Photo.inpaint(ImgMat,Maskmat,destmat, 15.0, INPAINT_NS)
InpaintBitmap= Bitmap.createBitmap(BitmapImage.getWidth(),
BitmapImage.getHeight(),
Bitmap.Config.ARGB_8888)
Imgproc.cvtColor(destmat, destmat, Imgproc.COLOR_XYZ2RGB);
Utils.matToBitmap(destmat, InpaintBitmap)
Description:
InpaintBitmap is an Inpaint color Bitmap
src image is a 3 channel image but mask bitmap is a 1 channel image
As described in post title, I'm looking for a way to detect motion/movement on the input stream from CCTV camera (IP/WiFi). Anyone know best way how I can connect to IP video stream and monitor for motion?
this is the opencv code in python, java is simiar, you need use opencv for the image operation
import cv2, time, pandas
from datetime import datetime
first_frame = None
status_list = [None,None]
times = []
df=pandas.DataFrame(columns=["Start","End"])
video = cv2.VideoCapture('rtsp://admin:Paxton10#10.199.27.128:554')
while True:
check, frame = video.read()
status = 0
gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(21,21),0)
if first_frame is None:
first_frame=gray
continue
delta_frame=cv2.absdiff(first_frame,gray)
thresh_frame=cv2.threshold(delta_frame, 30, 255, cv2.THRESH_BINARY)[1]
thresh_frame=cv2.dilate(thresh_frame, None, iterations=2)
(cnts,_)=cv2.findContours(thresh_frame.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in cnts:
if cv2.contourArea(contour) < 200000:
continue
status=1
(x, y, w, h)=cv2.boundingRect(contour)
cv2.rectangle(frame, (x, y), (x+w, y+h), (0,255,0), 3)
status_list.append(status)
status_list=status_list[-2:]
if status_list[-1]==1 and status_list[-2]==0:
times.append(datetime.now())
if status_list[-1]==0 and status_list[-2]==1:
times.append(datetime.now())
#cv2.imshow("Gray Frame",gray)
#cv2.imshow("Delta Frame",delta_frame)
imS = cv2.resize(thresh_frame, (640, 480))
cv2.imshow("Threshold Frame",imS)
imS = cv2.resize(frame, (640, 480))
cv2.imshow("Color Frame",imS)
#cv2.imshow("Color Frame",frame)
key=cv2.waitKey(1)
if key == ord('q'):
if status == 1:
times.append(datetime.now())
break
print(status_list)
print(times)
for i in range(0, len(times), 2):
df = df.append({"Start": times[i],"End": times[i+1]}, ignore_index=True)
df.to_csv("Times.csv")
video.release()
cv2.destroyAllWindows