Source code for Katna.image_filters.text_detector

.. module:: Katna.image_filters.text_detector
    :platform: OS X
    :synopsis: This module is implementation of text detector filter

import os
import cv2
import numpy as np
import time
import requests
import random
from imutils.object_detection import non_max_suppression
from Katna.image_filters.filter import Filter
import Katna.config as config

[docs]class TextDetector(Filter): """TextDetector Class: Class for implementation of text detector filter, inherit from Filter class """ def __init__(self, weight=1.0): """Constructor for this class does following tasks, if not already downloaded\ , it first downloads text detector dnn weights file from public URL\ ands save it at USER_HOME/.katna directory, or /tmp/.katna directory.\ After this initializer code initializes internal parameter: \ min_confidence (for text detection) """ super().__init__(weight) self.min_confidence = config.TextDetector.min_confidence self.merge_threshold = config.TextDetector.merge_threshold self.layerNames = config.TextDetector.layerNames self.frozen_weights = config.TextDetector.frozen_weights self.cache_subdir = config.TextDetector.cache_subdir try: self.network_folder_path = os.path.join(os.path.expanduser("~"), ".katna") if not os.access(self.network_folder_path, os.W_OK): self.network_folder_path = os.path.join("/tmp", ".katna") self.datadir = os.path.join(self.network_folder_path, self.cache_subdir) if not os.path.exists(self.datadir): os.makedirs(self.datadir) self.network_file_path = os.path.join(self.datadir, self.frozen_weights) if not os.path.exists(self.network_file_path): self.download_data() = cv2.dnn.readNet(self.network_file_path) except Exception: raise FileNotFoundError( self.frozen_weights + " seems to be missing.\ Download the file and specify the full path\ while initializing TextDetector class" )
[docs] def download_data(self): """Public function for downloading the network weight from the URL link, to be used for text detection functionality. Troubleshooting tip: If you get FileNotFound error during text detector initialization, initialize the text detector and call this function directly to download the model file from public URL link. """ # create response object link = config.TextDetector.model_download_link r = requests.get(link, stream=True) # download started print("Downloading model file...") # if not os.path.isfile(self.network_file_path) or not os.path.exists(self.network_file_path): with open(os.path.join(self.datadir, self.frozen_weights), "wb") as f: for chunk in r.iter_content(chunk_size=1024 * 1024): if chunk: f.write(chunk) print("Model file downloaded.")
def __decode_predictions(self, scores, geometry): """Internal Function for getting bounding box and confidence values from text detector dnn network output (scores, geometry) function takes the number of rows and columns from the scores volume, then initializes set of bounding box rectangles and corresponding confidence scores """ (numRows, numCols) = scores.shape[2:4] rects = [] confidences = [] # loop over the number of rows for y in range(0, numRows): # extract the scores (probabilities), followed by the # geometrical data used to derive potential bounding box # coordinates that surround text scoresData = scores[0, 0, y] xData0 = geometry[0, 0, y] xData1 = geometry[0, 1, y] xData2 = geometry[0, 2, y] xData3 = geometry[0, 3, y] anglesData = geometry[0, 4, y] # loop over the number of columns for x in range(0, numCols): # if our score does not have sufficient probability, # ignore it if scoresData[x] < self.min_confidence: continue # compute the offset factor as our resulting feature # maps will be 4x smaller than the input image (offsetX, offsetY) = (x * 4.0, y * 4.0) # extract the rotation angle for the prediction and # then compute the sin and cosine angle = anglesData[x] cos = np.cos(angle) sin = np.sin(angle) # use the geometry volume to derive the width and height # of the bounding box h = xData0[x] + xData2[x] w = xData1[x] + xData3[x] # compute both the starting and ending (x, y)-coordinates # for the text prediction bounding box endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x])) endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x])) startX = int(endX - w) startY = int(endY - h) # add the bounding box coordinates and probability score # to our respective lists rects.append((startX, startY, endX, endY)) confidences.append(scoresData[x]) # return a tuple of the bounding boxes and associated confidences return (rects, confidences) def __merge_boxes(self, rects): """main function to detect text boxes from image :param rects: list of :type rects: numpy array :param rectsUsed: image file in numpy array/opencv format :type rectsUsed: numpy array :return: output image with the list of text boxes :rtype: file, list """ def grouper(iterable, interval=2): prev = None group = [] for item in iterable: if not prev or abs(item[1] - prev[1]) <= interval: group.append(item) else: yield group group = [item] prev = item if group: yield group rects_used = [] heights = list() for bbox in rects: heights.append(bbox[3] - bbox[1]) heights = sorted(heights) # Sort heights median_height = heights[len(heights) // 2] / 2 # Find half of the median height bboxes_list = sorted( rects, key=lambda k: k[1] ) # Sort the bounding boxes based on y1 coordinate ( y of the left-top coordinate ) combined_bboxes = grouper( bboxes_list, median_height ) # Group the bounding boxes for group in combined_bboxes: x_min = min(group, key=lambda k: k[0])[0] # Find min of x1 x_max = max(group, key=lambda k: k[2])[2] # Find max of x2 y_min = min(group, key=lambda k: k[1])[1] # Find min of y1 y_max = max(group, key=lambda k: k[3])[3] # Find max of y2 rects_used.append([x_min, y_min, x_max, y_max]) return rects_used def __detect_text(self): """Internal function to detect text bounding boxes from input image. Returns list of bounding boxes of each detected text field in input image. :param image: image file in numpy array/opencv format :type image: numpy array :param output_image: image file in numpy array/opencv format :type output_image: numpy array :return: output image with the list of text boxes :rtype: file, list """ (H, W) = self.image.shape[:2] rW = W / 320 rH = H / 320 image = cv2.resize(self.image, (320, 320)) (H, W) = image.shape[:2] # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage( self.image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False ) (scores, geometry) = rects, confidences = self.__decode_predictions(scores, geometry) # apply non-maxima suppression to suppress weak, overlapping bounding # boxes boxes = non_max_suppression(np.array(rects), probs=confidences) text_rects = [] # loop over the bounding boxes for (startX, startY, endX, endY) in boxes: # scale the bounding box coordinates based on the respective # ratios startX = int(startX * rW) startY = int(startY * rH) endX = int(endX * rW) endY = int(endY * rH) cv2.rectangle(self.image, (startX, startY), (endX, endY), (0, 0, 255), 3) text_rects.append([startX, startY, endX, endY]) text_rects = sorted(text_rects, key=lambda item: item[0]) final_rects = text_rects if len(text_rects) > 0: final_rects = self.__merge_boxes(text_rects) return final_rects
[docs] def set_image(self, image): """Public set_image function, This will detect all text boxes in input image and will saves them as internal list of text_rect to be used in get_filter_result :param image: input image from which needs to be cropped :type image: numpy array(opencv) """ if image is None: return None self.image = image self.text_rects = self.__detect_text()
[docs] def get_filter_result(self, crop): """Main public function of TextDetector filter class, this filter Returns false if crop contains no text, additionally checks for overlap between input crop rectangle and the detected text bounding box, returns True if No overlap (Filter will not discard input crop) otherwise returns False (signal for discarding input crop). :param crop: input crop rectangle to test :type crop: crop_rect :return: True if No overlap (Filter will not discard input crop) otherwise returns False :rtype: bool """ # rect: xs,ys,xe,ye # crop: x,y,w,h if self.text_rects is None or len(self.text_rects) == 0: return True for rect in self.text_rects: if not ( (rect[2]) <= (crop.x + crop.w) and (rect[0]) >= (crop.x) and (rect[1]) >= (crop.y) and (rect[3]) <= (crop.y + crop.h) ): return False else: return True return True