I'm developing an android app to recognize text in particular plate, as in photo here:
I have to recognize the texts in white (e.g. near to "Mod."). I'm using Google ML Kit's text recognition APIs, but it fails. So, I'm using OpenCV to edit image but I don't know how to emphasize the (white) texts so OCR recognize it. I tried more stuff, like contrast, brightness, gamma correction, adaptive thresholding, but the cases vary a lot depending on how the photo is taken. Do you have any ideas?
Thank u very much.
I coded this example in Python (since OpenCV's SIFT in Android is paid) but you can still use this to understand how to solve it.
First I created this image as a template:
Step 1: Load images
""" 1. Load images """
# load image of plate
src_path = "nRHzD.jpg"
src = cv2.imread(src_path)
# load template of plate (to be looked for)
src_template_path = "nRHzD_template.jpg"
src_template = cv2.imread(src_template_path)
Step 2: Find the template using SIFT and perspective transformation
# convert images to gray scale
src_gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
src_template_gray = cv2.cvtColor(src_template, cv2.COLOR_BGR2GRAY)
# use SIFT to find template
n_matches_min = 10
template_found, homography = find_template(src_gray, src_template_gray, n_matches_min)
warp = transform_perspective_and_crop(homography, src, src_gray, src_template)
warp_gray = cv2.cvtColor(warp, cv2.COLOR_BGR2GRAY)
warp_hsv = cv2.cvtColor(warp, cv2.COLOR_BGR2HSV)
template_hsv = cv2.cvtColor(src_template, cv2.COLOR_BGR2HSV)
Step 3: Find regions of interest (using the green parts of the template image)
green_hsv_lower_bound = [50, 250, 250]
green_hsv_upper_bound = [60, 255, 255]
mask_rois, mask_rois_img = crop_img_in_hsv_range(warp, template_hsv, green_hsv_lower_bound, green_hsv_upper_bound)
roi_list = separate_rois(mask_rois, warp_gray)
# sort the rois by distance to top right corner -> x (value[1]) + y (value[2])
roi_list = sorted(roi_list, key=lambda values: values[1]+values[2])
Step 4: Apply a Canny Edge detection to the rois (regions of interest)
for i, roi in enumerate(roi_list):
roi_img, roi_x_offset, roi_y_offset = roi
print("#roi:{} x:{} y:{}".format(i, roi_x_offset, roi_y_offset))
roi_img_blur_threshold = cv2.Canny(roi_img, 40, 200)
cv2.imshow("ROI image", roi_img_blur_threshold)
cv2.waitKey()
There are many ways for you to detect the digits, one of the easiest approaches is to run a K-Means Clustering on each of the contours.
Full code:
""" This code shows a way of getting the digit's edges in a pre-defined position (in green) """
import cv2
import numpy as np
def find_template(src_gray, src_template_gray, n_matches_min):
# Initiate SIFT detector
sift = cv2.xfeatures2d.SIFT_create()
""" find grid using SIFT """
# find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(src_template_gray, None)
kp2, des2 = sift.detectAndCompute(src_gray, None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks = 50)
flann = cv2.FlannBasedMatcher(index_params, search_params)
matches = flann.knnMatch(des1, des2, k=2)
# store all the good matches as per Lowe's ratio test.
good = []
for m,n in matches:
if m.distance < 0.7*n.distance:
good.append(m)
if len(good) > n_matches_min:
src_pts = np.float32([kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
matchesMask = mask.ravel().tolist()
h_template, w_template = src_template_gray.shape
pts = np.float32([[0, 0], [0, h_template - 1], [w_template - 1, h_template - 1], [w_template - 1,0]]).reshape(-1,1,2)
homography = cv2.perspectiveTransform(pts, M)
else:
print "Not enough matches are found - %d/%d" % (len(good), n_matches_min)
matchesMask = None
# show matches
draw_params = dict(matchColor = (0, 255, 0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
if matchesMask:
src_gray_copy = src_gray.copy()
sift_matches = cv2.polylines(src_gray_copy, [np.int32(homography)], True, 255, 2, cv2.LINE_AA)
sift_matches = cv2.drawMatches(src_template_gray, kp1, src_gray_copy, kp2, good, None, **draw_params)
return sift_matches, homography
def transform_perspective_and_crop(homography, src, src_gray, src_template_gray):
""" get mask and contour of template """
mask_img_template = np.zeros(src_gray.shape, dtype=np.uint8)
mask_img_template = cv2.polylines(mask_img_template, [np.int32(homography)], True, 255, 1, cv2.LINE_AA)
_ret, contours, hierarchy = cv2.findContours(mask_img_template, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
template_contour = None
# approximate the contour
c = contours[0]
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# if our approximated contour has four points, then
# we can assume that we have found our template
warp = None
if len(approx) == 4:
template_contour = approx
cv2.drawContours(mask_img_template, [template_contour] , -1, (255,0,0), -1)
""" Transform perspective """
# now that we have our template contour, we need to determine
# the top-left, top-right, bottom-right, and bottom-left
# points so that we can later warp the image -- we'll start
# by reshaping our contour to be our finals and initializing
# our output rectangle in top-left, top-right, bottom-right,
# and bottom-left order
pts = template_contour.reshape(4, 2)
rect = np.zeros((4, 2), dtype = "float32")
# the top-left point has the smallest sum whereas the
# bottom-right has the largest sum
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# compute the difference between the points -- the top-right
# will have the minumum difference and the bottom-left will
# have the maximum difference
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# now that we have our rectangle of points, let's compute
# the width of our new image
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
# ...and now for the height of our new image
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
# take the maximum of the width and height values to reach
# our final dimensions
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
# construct our destination points which will be used to
# map the screen to a top-down, "birds eye" view
homography = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# calculate the perspective transform matrix and warp
# the perspective to grab the screen
M = cv2.getPerspectiveTransform(rect, homography)
warp = cv2.warpPerspective(src, M, (maxWidth, maxHeight))
# resize warp
h_template, w_template, _n_channels = src_template_gray.shape
warp = cv2.resize(warp, (w_template, h_template), interpolation=cv2.INTER_AREA)
return warp
def crop_img_in_hsv_range(img, hsv, lower_bound, upper_bound):
mask = cv2.inRange(hsv, np.array(lower_bound), np.array(upper_bound))
# do an MORPH_OPEN (erosion followed by dilation) to remove isolated pixels
kernel = np.ones((5,5), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
# Bitwise-AND mask and original image
res = cv2.bitwise_and(img, img, mask=mask)
return mask, res
def separate_rois(column_mask, img_gray):
# go through each of the boxes
# https://stackoverflow.com/questions/41592039/contouring-a-binary-mask-with-opencv-python
border = cv2.copyMakeBorder(column_mask, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=0)
_, contours, hierarchy = cv2.findContours(border, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE, offset=(-1, -1))
cell_list = []
for contour in contours:
cell_mask = np.zeros_like(img_gray) # Create mask where white is what we want, black otherwise
cv2.drawContours(cell_mask, [contour], -1, 255, -1) # Draw filled contour in mask
# turn that mask into a rectangle
(x,y,w,h) = cv2.boundingRect(contour)
#print("x:{} y:{} w:{} h:{}".format(x, y, w, h))
cv2.rectangle(cell_mask, (x, y), (x+w, y+h), 255, -1)
# copy the img_gray using that mask
img_tmp_region = cv2.bitwise_and(img_gray, img_gray, mask= cell_mask)
# Now crop
(y, x) = np.where(cell_mask == 255)
(top_y, top_x) = (np.min(y), np.min(x))
(bottom_y, bottom_x) = (np.max(y), np.max(x))
img_tmp_region = img_tmp_region[top_y:bottom_y+1, top_x:bottom_x+1]
cell_list.append([img_tmp_region, top_x, top_y])
return cell_list
""" 1. Load images """
# load image of plate
src_path = "nRHzD.jpg"
src = cv2.imread(src_path)
# load template of plate (to be looked for)
src_template_path = "nRHzD_template.jpg"
src_template = cv2.imread(src_template_path)
""" 2. Find the plate (using the template image) and crop it into a rectangle """
# convert images to gray scale
src_gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
src_template_gray = cv2.cvtColor(src_template, cv2.COLOR_BGR2GRAY)
# use SIFT to find template
n_matches_min = 10
template_found, homography = find_template(src_gray, src_template_gray, n_matches_min)
warp = transform_perspective_and_crop(homography, src, src_gray, src_template)
warp_gray = cv2.cvtColor(warp, cv2.COLOR_BGR2GRAY)
warp_hsv = cv2.cvtColor(warp, cv2.COLOR_BGR2HSV)
template_hsv = cv2.cvtColor(src_template, cv2.COLOR_BGR2HSV)
""" 3. Find regions of interest (using the green parts of the template image) """
green_hsv_lower_bound = [50, 250, 250]
green_hsv_upper_bound = [60, 255, 255]
mask_rois, mask_rois_img = crop_img_in_hsv_range(warp, template_hsv, green_hsv_lower_bound, green_hsv_upper_bound)
roi_list = separate_rois(mask_rois, warp_gray)
# sort the rois by distance to top right corner -> x (value[1]) + y (value[2])
roi_list = sorted(roi_list, key=lambda values: values[1]+values[2])
""" 4. Apply a Canny Edge detection to the rois (regions of interest) """
for i, roi in enumerate(roi_list):
roi_img, roi_x_offset, roi_y_offset = roi
print("#roi:{} x:{} y:{}".format(i, roi_x_offset, roi_y_offset))
roi_img_blur_threshold = cv2.Canny(roi_img, 40, 200)
cv2.imshow("ROI image", roi_img_blur_threshold)
cv2.waitKey()
I'm trying to import some (Matlab-generated) GeoTIFF files into WorldWind but seem to have no luck whatsoever. Any useful hints would greatly be appreciated. The GeoTIFF files do display fine in ArcGIS (allowing me to create a .tfw file when I export), but WorldWind gives me the following message:
SEVERE: Cannot read raster: C:\Users\Matthias\Desktop\geotiff\fldextent_02-
Jan-1977(1)renderedno0.tif : gov.nasa.worldwind.formats.tiff.GeotiffImageReader.read(): unable
to decipher image organization
Jul 09, 2013 6:54:33 PM gov.nasa.worldwind.data.CachedDataRaster drawOnTo
SEVERE: C:\Users\Matthias\Desktop\geotiff\fldextent_02-Jan-1977(1)renderedno0.tif : Cannot read
raster: C:\Users\Matthias\Desktop\geotiff\fldextent_02-Jan-1977(1)renderedno0.tif :
gov.nasa.worldwind.formats.tiff.GeotiffImageReader.read(): unable to decipher image organization
gov.nasa.worldwind.exception.WWRuntimeException: Cannot read raster: C:\Users\Matthias\Desktop
\geotiff\fldextent_02-Jan-1977(1)renderedno0.tif :
gov.nasa.worldwind.formats.tiff.GeotiffImageReader.read(): unable to decipher image organization
at gov.nasa.worldwind.data.CachedDataRaster.getDataRasters(CachedDataRaster.java:255)
at gov.nasa.worldwind.data.CachedDataRaster.drawOnTo(CachedDataRaster.java:290)
at gov.nasa.worldwind.data.TiledRasterProducer.drawDataSources(TiledRasterProducer.java:576)
[...]
I have also looked at the attributes of the GeoTIFF file in FWTools which gives me:
C:\Users\Matthias\Desktop\geotiff>gdalinfo fldextent_02-Jan-1977(1)renderedno0.tif
Driver: GTiff/GeoTIFF
Files: fldextent_02-Jan-1977(1)renderedno0.tif
fldextent_02-Jan-1977(1)renderedno0.tfw
Size is 7200, 7200
Coordinate System is:
GEOGCS["WGS 84",
DATUM["WGS_1984",
SPHEROID["WGS 84",6378137,298.257223563,
AUTHORITY["EPSG","7030"]],
AUTHORITY["EPSG","6326"]],
PRIMEM["Greenwich",0],
UNIT["degree",0.0174532925199433],
AUTHORITY["EPSG","4326"]]
Origin = (99.000000000000000,7.000000000000000)
Pixel Size = (0.000833333333333,-0.000833333333333)
Metadata:
AREA_OR_POINT=Area
Image Structure Metadata:
INTERLEAVE=BAND
Corner Coordinates:
Upper Left ( 99.0000000, 7.0000000) ( 99d 0'0.00"E, 7d 0'0.00"N)
Lower Left ( 99.0000000, 1.0000000) ( 99d 0'0.00"E, 1d 0'0.00"N)
Upper Right ( 105.0000000, 7.0000000) (105d 0'0.00"E, 7d 0'0.00"N)
Lower Right ( 105.0000000, 1.0000000) (105d 0'0.00"E, 1d 0'0.00"N)
Center ( 102.0000000, 4.0000000) (102d 0'0.00"E, 4d 0'0.00"N)
Band 1 Block=128x128 Type=Byte, ColorInterp=Gray
NoData Value=0
The .tfw file reads:
0.0008333333
0.0000000000
0.0000000000
-0.0008333333
99.0004166667
6.9995833333
I have found the issue finally:
The important thing is to create a CLEAN GeoTIFF file in Matlab (RGB and alpha layer for transparency). Here some Matlab guidance, the resulting GeoTIFF can directly be imported into WorldWind:
%%% read intensity values Z (2D matrix) - with values of 0 and above
%%% (we want 0 to be completely transparent in the final geotiff) -
%%% together with spatialref.GeoRasterReference ss
[Z, ss] = geotiffread('./flddph_1976-01-01.tif');
info_3 = geotiffinfo('./flddph_1976-01-01.tif');
%%% generate indexed image with 0 to 255 (255 equals max. intensity)
indexedimage = gray2ind(Z);
indexedimage = double(indexedimage);
%%% normalize so that everything between 0 and 1
normalizedimg = (indexedimage) / 255;
%%% scaling data and applying colormap
imgscaled = uint8(256*normalizedimg); % scale data
cmp = makeColorMap([1 1 0],[1 0.75 0],[1 0 0],256);
% 256 element colormap yellow - orange - red
% (download appropriate function MAKECOLORMAP)
imgrgb = ind2rgb(imgscaled,cmp);
%%% check plot
% subplot(2,1,1)
% imagesc(indexedimage)
% title('indexed image')
% subplot(2,1,2)
% image(img)
% title('rgb image')
%%% generating alpha layer for transparency
%%% (255 for non-transparent, 0 for transparent)
alpha3 = Z;
alpha3(alpha3>0)=255;
alpha3 = uint8(alpha3);
out2 = cat(3,imgrgb,alpha3);
geotiffwrite('test_rgbhope_flddph.tif',out2,ss);
As described in post title, I'm looking for a way to detect motion/movement on the input stream from CCTV camera (IP/WiFi). Anyone know best way how I can connect to IP video stream and monitor for motion?
this is the opencv code in python, java is simiar, you need use opencv for the image operation
import cv2, time, pandas
from datetime import datetime
first_frame = None
status_list = [None,None]
times = []
df=pandas.DataFrame(columns=["Start","End"])
video = cv2.VideoCapture('rtsp://admin:Paxton10#10.199.27.128:554')
while True:
check, frame = video.read()
status = 0
gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(21,21),0)
if first_frame is None:
first_frame=gray
continue
delta_frame=cv2.absdiff(first_frame,gray)
thresh_frame=cv2.threshold(delta_frame, 30, 255, cv2.THRESH_BINARY)[1]
thresh_frame=cv2.dilate(thresh_frame, None, iterations=2)
(cnts,_)=cv2.findContours(thresh_frame.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in cnts:
if cv2.contourArea(contour) < 200000:
continue
status=1
(x, y, w, h)=cv2.boundingRect(contour)
cv2.rectangle(frame, (x, y), (x+w, y+h), (0,255,0), 3)
status_list.append(status)
status_list=status_list[-2:]
if status_list[-1]==1 and status_list[-2]==0:
times.append(datetime.now())
if status_list[-1]==0 and status_list[-2]==1:
times.append(datetime.now())
#cv2.imshow("Gray Frame",gray)
#cv2.imshow("Delta Frame",delta_frame)
imS = cv2.resize(thresh_frame, (640, 480))
cv2.imshow("Threshold Frame",imS)
imS = cv2.resize(frame, (640, 480))
cv2.imshow("Color Frame",imS)
#cv2.imshow("Color Frame",frame)
key=cv2.waitKey(1)
if key == ord('q'):
if status == 1:
times.append(datetime.now())
break
print(status_list)
print(times)
for i in range(0, len(times), 2):
df = df.append({"Start": times[i],"End": times[i+1]}, ignore_index=True)
df.to_csv("Times.csv")
video.release()
cv2.destroyAllWindows