I'm developing an android app to recognize text in particular plate, as in photo here:
I have to recognize the texts in white (e.g. near to "Mod."). I'm using Google ML Kit's text recognition APIs, but it fails. So, I'm using OpenCV to edit image but I don't know how to emphasize the (white) texts so OCR recognize it. I tried more stuff, like contrast, brightness, gamma correction, adaptive thresholding, but the cases vary a lot depending on how the photo is taken. Do you have any ideas?
Thank u very much.
I coded this example in Python (since OpenCV's SIFT in Android is paid) but you can still use this to understand how to solve it.
First I created this image as a template:
Step 1: Load images
""" 1. Load images """
# load image of plate
src_path = "nRHzD.jpg"
src = cv2.imread(src_path)
# load template of plate (to be looked for)
src_template_path = "nRHzD_template.jpg"
src_template = cv2.imread(src_template_path)
Step 2: Find the template using SIFT and perspective transformation
# convert images to gray scale
src_gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
src_template_gray = cv2.cvtColor(src_template, cv2.COLOR_BGR2GRAY)
# use SIFT to find template
n_matches_min = 10
template_found, homography = find_template(src_gray, src_template_gray, n_matches_min)
warp = transform_perspective_and_crop(homography, src, src_gray, src_template)
warp_gray = cv2.cvtColor(warp, cv2.COLOR_BGR2GRAY)
warp_hsv = cv2.cvtColor(warp, cv2.COLOR_BGR2HSV)
template_hsv = cv2.cvtColor(src_template, cv2.COLOR_BGR2HSV)
Step 3: Find regions of interest (using the green parts of the template image)
green_hsv_lower_bound = [50, 250, 250]
green_hsv_upper_bound = [60, 255, 255]
mask_rois, mask_rois_img = crop_img_in_hsv_range(warp, template_hsv, green_hsv_lower_bound, green_hsv_upper_bound)
roi_list = separate_rois(mask_rois, warp_gray)
# sort the rois by distance to top right corner -> x (value[1]) + y (value[2])
roi_list = sorted(roi_list, key=lambda values: values[1]+values[2])
Step 4: Apply a Canny Edge detection to the rois (regions of interest)
for i, roi in enumerate(roi_list):
roi_img, roi_x_offset, roi_y_offset = roi
print("#roi:{} x:{} y:{}".format(i, roi_x_offset, roi_y_offset))
roi_img_blur_threshold = cv2.Canny(roi_img, 40, 200)
cv2.imshow("ROI image", roi_img_blur_threshold)
cv2.waitKey()
There are many ways for you to detect the digits, one of the easiest approaches is to run a K-Means Clustering on each of the contours.
Full code:
""" This code shows a way of getting the digit's edges in a pre-defined position (in green) """
import cv2
import numpy as np
def find_template(src_gray, src_template_gray, n_matches_min):
# Initiate SIFT detector
sift = cv2.xfeatures2d.SIFT_create()
""" find grid using SIFT """
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(src_template_gray, None)
kp2, des2 = sift.detectAndCompute(src_gray, None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks = 50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m,n in matches:
if m.distance < 0.7*n.distance:
good.append(m)
if len(good) > n_matches_min:
src_pts = np.float32([kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
matchesMask = mask.ravel().tolist()
h_template, w_template = src_template_gray.shape
pts = np.float32([[0, 0], [0, h_template - 1], [w_template - 1, h_template - 1], [w_template - 1,0]]).reshape(-1,1,2)
homography = cv2.perspectiveTransform(pts, M)
else:
print "Not enough matches are found - %d/%d" % (len(good), n_matches_min)
matchesMask = None
# show matches
draw_params = dict(matchColor = (0, 255, 0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
if matchesMask:
src_gray_copy = src_gray.copy()
sift_matches = cv2.polylines(src_gray_copy, [np.int32(homography)], True, 255, 2, cv2.LINE_AA)
sift_matches = cv2.drawMatches(src_template_gray, kp1, src_gray_copy, kp2, good, None, **draw_params)
return sift_matches, homography
def transform_perspective_and_crop(homography, src, src_gray, src_template_gray):
""" get mask and contour of template """
mask_img_template = np.zeros(src_gray.shape, dtype=np.uint8)
mask_img_template = cv2.polylines(mask_img_template, [np.int32(homography)], True, 255, 1, cv2.LINE_AA)
_ret, contours, hierarchy = cv2.findContours(mask_img_template, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
template_contour = None
# approximate the contour
c = contours[0]
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# if our approximated contour has four points, then
# we can assume that we have found our template
warp = None
if len(approx) == 4:
template_contour = approx
cv2.drawContours(mask_img_template, [template_contour] , -1, (255,0,0), -1)
""" Transform perspective """
# now that we have our template contour, we need to determine
# the top-left, top-right, bottom-right, and bottom-left
# points so that we can later warp the image -- we'll start
# by reshaping our contour to be our finals and initializing
# our output rectangle in top-left, top-right, bottom-right,
# and bottom-left order
pts = template_contour.reshape(4, 2)
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point has the smallest sum whereas the
# bottom-right has the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# compute the difference between the points -- the top-right
# will have the minumum difference and the bottom-left will
# have the maximum difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# now that we have our rectangle of points, let's compute
# the width of our new image
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
# ...and now for the height of our new image
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
# take the maximum of the width and height values to reach
# our final dimensions
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
# construct our destination points which will be used to
# map the screen to a top-down, "birds eye" view
homography = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# calculate the perspective transform matrix and warp
# the perspective to grab the screen
M = cv2.getPerspectiveTransform(rect, homography)
warp = cv2.warpPerspective(src, M, (maxWidth, maxHeight))
# resize warp
h_template, w_template, _n_channels = src_template_gray.shape
warp = cv2.resize(warp, (w_template, h_template), interpolation=cv2.INTER_AREA)
return warp
def crop_img_in_hsv_range(img, hsv, lower_bound, upper_bound):
mask = cv2.inRange(hsv, np.array(lower_bound), np.array(upper_bound))
# do an MORPH_OPEN (erosion followed by dilation) to remove isolated pixels
kernel = np.ones((5,5), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
# Bitwise-AND mask and original image
res = cv2.bitwise_and(img, img, mask=mask)
return mask, res
def separate_rois(column_mask, img_gray):
# go through each of the boxes
# https://stackoverflow.com/questions/41592039/contouring-a-binary-mask-with-opencv-python
border = cv2.copyMakeBorder(column_mask, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=0)
_, contours, hierarchy = cv2.findContours(border, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE, offset=(-1, -1))
cell_list = []
for contour in contours:
cell_mask = np.zeros_like(img_gray) # Create mask where white is what we want, black otherwise
cv2.drawContours(cell_mask, [contour], -1, 255, -1) # Draw filled contour in mask
# turn that mask into a rectangle
(x,y,w,h) = cv2.boundingRect(contour)
#print("x:{} y:{} w:{} h:{}".format(x, y, w, h))
cv2.rectangle(cell_mask, (x, y), (x+w, y+h), 255, -1)
# copy the img_gray using that mask
img_tmp_region = cv2.bitwise_and(img_gray, img_gray, mask= cell_mask)
# Now crop
(y, x) = np.where(cell_mask == 255)
(top_y, top_x) = (np.min(y), np.min(x))
(bottom_y, bottom_x) = (np.max(y), np.max(x))
img_tmp_region = img_tmp_region[top_y:bottom_y+1, top_x:bottom_x+1]
cell_list.append([img_tmp_region, top_x, top_y])
return cell_list
""" 1. Load images """
# load image of plate
src_path = "nRHzD.jpg"
src = cv2.imread(src_path)
# load template of plate (to be looked for)
src_template_path = "nRHzD_template.jpg"
src_template = cv2.imread(src_template_path)
""" 2. Find the plate (using the template image) and crop it into a rectangle """
# convert images to gray scale
src_gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
src_template_gray = cv2.cvtColor(src_template, cv2.COLOR_BGR2GRAY)
# use SIFT to find template
n_matches_min = 10
template_found, homography = find_template(src_gray, src_template_gray, n_matches_min)
warp = transform_perspective_and_crop(homography, src, src_gray, src_template)
warp_gray = cv2.cvtColor(warp, cv2.COLOR_BGR2GRAY)
warp_hsv = cv2.cvtColor(warp, cv2.COLOR_BGR2HSV)
template_hsv = cv2.cvtColor(src_template, cv2.COLOR_BGR2HSV)
""" 3. Find regions of interest (using the green parts of the template image) """
green_hsv_lower_bound = [50, 250, 250]
green_hsv_upper_bound = [60, 255, 255]
mask_rois, mask_rois_img = crop_img_in_hsv_range(warp, template_hsv, green_hsv_lower_bound, green_hsv_upper_bound)
roi_list = separate_rois(mask_rois, warp_gray)
# sort the rois by distance to top right corner -> x (value[1]) + y (value[2])
roi_list = sorted(roi_list, key=lambda values: values[1]+values[2])
""" 4. Apply a Canny Edge detection to the rois (regions of interest) """
for i, roi in enumerate(roi_list):
roi_img, roi_x_offset, roi_y_offset = roi
print("#roi:{} x:{} y:{}".format(i, roi_x_offset, roi_y_offset))
roi_img_blur_threshold = cv2.Canny(roi_img, 40, 200)
cv2.imshow("ROI image", roi_img_blur_threshold)
cv2.waitKey()
I currently have a ipynb file (ipython notebook) that contains a linear regression code / model(im not entirely sure if it's a model) that I've created earlier.
How do i implement this model in an android application such that if I were to input a value of 'x' in a text box, it'll output in a textview the predicted value of 'y'. Function: Y = mx + b.
I've tried looking at different tutorials, but they were mostly not "step-by-step" guides, which made it really hard to understand, I'm a beginner at coding.
Here's my code for the model:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
rng = np.random
from numpy import genfromtxt
from sklearn.datasets import load_boston
# Parameters
learning_rate = 0.0001
training_epochs = 1000
display_step = 50
n_samples = 222
X = tf.placeholder("float") # create symbolic variables
Y = tf.placeholder("float")
filename_queue = tf.train.string_input_producer(["C://Users//Shiina//battdata.csv"],shuffle=False)
reader = tf.TextLineReader() # skip_header_lines=1 if csv has headers
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[1.], [1.]]
height, soc= tf.decode_csv(
value, record_defaults=record_defaults)
features = tf.stack([height])
# Set model weights
W = tf.Variable(rng.randn(), name="weight")
b = tf.Variable(rng.randn(), name="bias")
# Construct a linear model
pred_soc = tf.add(tf.multiply(height, W), b) # XW + b <- y = mx + b where W is gradient, b is intercept
# Mean squared error
cost = tf.reduce_sum(tf.pow(pred_soc-soc, 2))/(2*n_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
_, cost_value = sess.run([optimizer,cost])
#Display logs per epoch step
if (epoch+1) % display_step == 0:
c = sess.run(cost)
print( "Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
"W=", sess.run(W), "b=", sess.run(b))
print("Optimization Finished!")
training_cost = sess.run(cost)
print ("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
#Plot data after completing training
train_X = []
train_Y = []
for i in range(n_samples): #Your input data size to loop through once
X, Y = sess.run([height, pred_soc]) # Call pred, to get the prediction with the updated weights
train_X.append(X)
train_Y.append(Y)
#Graphic display
plt.plot(train_X, train_Y, 'ro', label='Original data')
plt.ylabel("SoC")
plt.xlabel("Height")
plt.axis([0, 1, 0, 100])
plt.plot(train_X, train_Y, linewidth=2.0)
plt.legend()
plt.show()
coord.request_stop()
coord.join(threads)
I am using template matching on openCV java to identify if a subimage exist in larger image. I want to get coordinates of match only if exact subimage is available in larger image. I am using this code but getting a lot of false positive. Attached is the subimage and larger image. Subimage is not present in larger image but i am getting match at (873,715) larger image subimage
public void run(String inFile, String templateFile, String outFile,
int match_method) {
System.out.println("Running Template Matching");
Mat img = Imgcodecs.imread(inFile);
Mat templ = Imgcodecs.imread(templateFile);
// / Create the result matrix
int result_cols = img.cols() - templ.cols() + 1;
int result_rows = img.rows() - templ.rows() + 1;
Mat result = new Mat(result_rows, result_cols, CvType.CV_32FC1);
// / Do the Matching and Normalize
Imgproc.matchTemplate(img, templ, result, match_method);
// Core.normalize(result, result, 0, 1, Core.NORM_MINMAX, -1, new
// Mat());
Imgproc.threshold(result, result, 0.1, 1.0, Imgproc.THRESH_TOZERO);
// / Localizing the best match with minMaxLoc
MinMaxLocResult mmr = Core.minMaxLoc(result);
Point matchLoc;
if (match_method == Imgproc.TM_SQDIFF
|| match_method == Imgproc.TM_SQDIFF_NORMED) {
matchLoc = mmr.minLoc;
} else {
matchLoc = mmr.maxLoc;
}
double threashhold = 1.0;
if (mmr.maxVal > threashhold) {
System.out.println(matchLoc.x+" "+matchLoc.y);
Imgproc.rectangle(img, matchLoc, new Point(matchLoc.x + templ.cols(),
matchLoc.y + templ.rows()), new Scalar(0, 255, 0));
}
// Save the visualized detection.
Imgcodecs.imwrite(outFile, img);
}
Here is an answer of the same question: Determine if an image exists within a larger image, and if so, find it, using Python
You will have to convert the python code to Java
I am not familiar with OpenCV in Java but OpenCV C++.
I don't think following code is necessary.
Imgproc.threshold(result, result, 0.1, 1.0, Imgproc.THRESH_TOZERO);
The min/max values of 'Mat result' will be between -1 and 1 if you uses normalized option. Therefore, your following code will not work because your threshold is 1.0 if you use normalized option.
if (mmr.maxVal > threshold)
Also, if you use CV_TM_SQDIFF, above code should be
if (mmr.minVal < threshold)
with proper threshold.
How about drawing minMaxLoc before comparing minVal/maxVal with threshold? to see it gives correct result? because match at (873,715) is ridiculous.
I am trying to use K-means JavaCV implementation, but I have the following error:
OpenCV Error: Assertion failed (!centers.empty()) in cvKMeans2, file src\matrix.cpp, line 4233
My source code is:
IplImage src = cvLoadImage(fileName, CV_LOAD_IMAGE_COLOR);
int cluster_count = 3;
int attempts = 10;
CvTermCriteria termCriteria = new CvTermCriteria(TermCriteria.EPS + TermCriteria.MAX_ITER, 10, 1.0);
cvReshape(src, src.asCvMat(), 1, src.height() * src.width());
IplImage samples = cvCreateImage(cvGetSize(src), src.depth(), 1);
cvConvertImage(src, samples, CV_32F);
IplImage labels = cvCreateImage(new CvSize(samples.height()), 1, CV_8U);
IplImage centers = cvCreateImage(new CvSize(cluster_count), 1, CV_32F);
cvKMeans2(samples, cluster_count, labels, termCriteria, 1, new long[attempts], KMEANS_RANDOM_CENTERS, centers, new double[attempts]);
I'm beginner on JavaCV and would to know what I doing wrong in this code?
I'm new to opencv's svm's. I tried a sample classifier but it only returns 0 as the predicted label. I even used the value 5 for training as well as the prediction.
I've been changing the values for about a hundred times but i just don't get what's wrong. I'm using OpenCV 3.0 with Java. Here's my code:
Mat labels = new Mat(new Size(1,4),CvType.CV_32SC1);
labels.put(0, 0, 1);
labels.put(1, 0, 1);
labels.put(2, 0, 1);
labels.put(3, 0, 0);
Mat data = new Mat(new Size(1,4),CvType.CV_32FC1);
data.put(0, 0, 5);
data.put(1, 0, 2);
data.put(2, 0, 3);
data.put(3, 0, 8);
Mat testSamples = new Mat(new Size(1,1),CvType.CV_32FC1);
testSamples.put(0,0,5);
SVM svm = SVM.create();
TermCriteria criteria = new TermCriteria(TermCriteria.EPS + TermCriteria.MAX_ITER,100,0.1);
svm.setKernel(SVM.LINEAR);
svm.setType(SVM.C_SVC);
svm.setGamma(0.5);
svm.setNu(0.5);
svm.setC(1);
svm.setTermCriteria(criteria);
//data is N x 64 trained data Mat , labels is N x 1 label Mat with integer values;
svm.train(data, Ml.ROW_SAMPLE, labels);
Mat results = new Mat();
int predictedClass = (int) svm.predict(testSamples, results, 0);
Even if i change the lables to 1 and 2, I still get 0.0 as a response. So something has to be absolutely wrong... I just don't know what to do. Please help! :)
I had a similar problem in C++. I'm not too sure if it's the same in Java but in C++ the predictions were saved in the results Matrix instead of returned as a float.