assignment.py

import argparse
from pathlib import Path
from typing import List
from typing import Optional

import cv2
import numpy as np


def foreground_extraction(
    img: np.ndarray,
    mask: Optional[np.ndarray] = None,
    rect: Optional[tuple] = None,
    flag: Optional[int] = cv2.GC_INIT_WITH_RECT,
) -> tuple[np.ndarray, np.ndarray]:
    """Extracts Foreground of an Image

    Extracts Foregorund using the GrabCut Algorithm. For more information, see
    the OpenCV documentation.

    Args:
        img (np.ndarray): The image whose foreground is extracted.
        mask (Optional[np.ndarray]): An optional user-defined mask for the image.
        rect (Optional[tuple]): An optional user-defined rectangular area which most probably contains the foreground
        flag (Optional[int]): Optional flag, to be defined by user preference, but not user defined.

    Returns:
        Extracted foreground of the image.

    """
    h, w = img.shape[:2]
    if flag == cv2.GC_INIT_WITH_RECT:
        rect = rect or (1, 1, w - 1, h - 1)
    mask = mask or np.zeros((h, w), dtype=np.uint8)
    bg_model: np.ndarray = np.zeros((1, 65), dtype=np.float64)
    fg_model: np.ndarray = np.zeros((1, 65), dtype=np.float64)
    mask, _, _ = cv2.grabCut(img, mask, rect, bg_model, fg_model, 11, flag)
    mask2: np.ndarray = np.where(
        (mask == cv2.GC_PR_BGD) | (mask == cv2.GC_BGD), 0, 1
    ).astype(np.uint8)
    extracted_img: np.ndarray = img * mask2[:, :, np.newaxis]
    return mask2, extracted_img


def order_points(pts: np.ndarray) -> np.ndarray:
    """Orders points generated by OpenCV.

    Most function generated points by OpenCV is not ordered. This function
    orders the points clockwise starting from the top left corner.

    Args:
        pts (np.ndarray): Points to be ordered.

    Returns:
        Ordered points.

    """
    pts_ls: List[List[int]] = np.squeeze(pts).tolist()
    pts_ls = sorted(pts_ls, key=lambda a: a[1])
    pts_ls[0:2] = sorted(pts_ls[0:2], key=lambda a: a[0])
    pts_ls[2:4] = sorted(pts_ls[2:4], key=lambda a: a[0], reverse=True)
    return np.array(pts_ls)


def choose_points(img: np.ndarray) -> np.ndarray:
    """Choose points from user.

    Lets manually choose points from user.

    Args:
        img (np.ndarray): The image whose corner points are to be chosen.

    Returns:
        A list of all the corner points.

    """
    imgc = img.copy()
    clicks = []

    def click(event: int, x: int, y: int, flags: int, params: None):
        """Collects all mouse event, stores click"""
        if event == cv2.EVENT_LBUTTONDOWN:
            clicks.append([[x, y]])
            cv2.circle(imgc, (x, y), 8, (89, 0, 255), -1)

    cv2.imshow("Select Four Points", imgc)
    cv2.setMouseCallback("Select Four Points", click)
    while True:
        cv2.imshow("Select Four Points", imgc)
        cv2.waitKey(10)
        if len(clicks) == 4:
            break
    cv2.destroyAllWindows()
    return np.array(clicks)


def edges_and_corners(
    img: np.ndarray, mask: np.ndarray, override: Optional[bool] = False
) -> tuple[np.ndarray, np.ndarray]:
    """Finds a suitable quadrilateral which approximates the edges of image.

    Tries to approximate a quadrilateral automatically, but would revert to
    user interaction when it fails. Can be overriden manually, too.

    Args:
        img (np.ndarray): The image whose edges and corners are to be found.
        mask (np.ndarray): The mask for foreground extraction
        override (Optional[bool]): Whether to override for point selection.

    Returns:
        An image with edges and corners drawn. Returns the corners as well.

    """
    imgc = img.copy()
    grad = cv2.morphologyEx(mask * 255, cv2.MORPH_GRADIENT, np.ones((5, 5), np.uint8))
    contours, _ = cv2.findContours(grad, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    largest_contour = sorted(contours, key=cv2.contourArea, reverse=True)[0]
    cv2.drawContours(imgc, [largest_contour], 0, (66, 185, 245), 2)
    approx = []
    if not override:
        for eps in np.linspace(0.01, 0.1, 100):
            approx = cv2.approxPolyDP(
                largest_contour, eps * cv2.arcLength(largest_contour, True), True
            )
            if len(approx) == 4:
                break
            elif len(approx) < 4:
                print(
                    "Failed to automatically select 4 corners. Please do it manually."
                )
                override = True
                break
    if override:
        approx = choose_points(imgc)
    approx = order_points(np.array(approx))
    cv2.drawContours(imgc, [approx[:, np.newaxis, :]], 0, (204, 255, 0), 3)
    for center in approx:
        cv2.circle(imgc, tuple(center), 8, (89, 0, 255), -1)
    return imgc, approx.astype(np.float32)


def L2_dist(p1: np.ndarray, p2: np.ndarray) -> float:
    """Returns Euclidean Distance between two points.

    Args:
        p1 (np.ndarray): Fisrt Point.
        p2 (np.ndarray): Second Point.

    Returns:
        Euclidean distance or the L2 norm for two points.

    """
    return np.sqrt((p2[0] - p1[0]) ** 2 + (p2[1] - p1[1]) ** 2)


def homo_transform(img: np.ndarray, pts: np.ndarray) -> np.ndarray:
    """Homography Transformed Foreground, for straightening image.

    This calculates a homography transform between foreground image's corner
    points and a estimated rectangular area for the image. This uses the
    getPerspectiveTransform function instead of the more powerful
    findHomography function of OpenCV because i) We were taught the four point
    homography, and thus I tried to imitate what I learnt. ii) This four point
    choose-and-transform is the default in most scan apps I have seen,
    like, say, Adobe Scan, or Microsoft Scan.

    This program, although bit modified, is inspired from the paper
    "Whiteboard scanning and image enhancement" by Zhang et al., from
    Microsoft. I took the inspiration because my aspect ratio calculation was
    not providing good results.

    Args:
        img (np.ndarray): Image which is to be homography transformed.
        pts (np.ndarray): Four Corner points, the input points for the homography transformation.

    Returns:
        Homography transformed image.

    """
    # Could have taken max, but mean gave better results.
    W = (L2_dist(pts[0], pts[1]) + L2_dist(pts[2], pts[3])) / 2
    H = (L2_dist(pts[0], pts[3]) + L2_dist(pts[2], pts[1])) / 2
    m1 = np.array([*pts[0], 1], dtype=np.float64)
    m2 = np.array([*pts[1], 1], dtype=np.float64)
    m3 = np.array([*pts[3], 1], dtype=np.float64)
    m4 = np.array([*pts[2], 1], dtype=np.float64)
    k2 = (np.cross(m1, m4) @ m3) / (np.cross(m2, m4) @ m3)
    k3 = (np.cross(m1, m4) @ m2) / (np.cross(m3, m4) @ m2)
    n2 = k2 * m2 - m1
    n3 = k3 * m3 - m1
    if k2 == 1 or k3 == 1:
        ar = np.sqrt((n2[0] * n2[0] + n2[1] * n2[1]) / (n3[0] * n3[0] + n3[1] * n3[1]))
    else:
        u0 = img.shape[1] / 2.0
        v0 = img.shape[0] / 2.0
        f = np.sqrt(
            (1.0 / (n2[2] * n3[2]))
            * (
                -n2[0] * n3[0]
                + (n2[0] * n3[2] + n2[2] * n3[0]) * u0
                - n2[1] * n3[1]
                + (n2[1] * n3[2] + n2[2] * n3[1]) * v0
            )
            - (u0 * u0 + v0 * v0)
        )
        A = np.array([[f, 0, u0], [0, f, v0], [0, 0, 1]], dtype=np.float64)
        ATi = np.linalg.inv(A.T)
        Ai = np.linalg.inv(A)
        ar = np.sqrt((n2.T @ ATi @ Ai @ n2) / (n3.T @ ATi @ Ai @ n3))
    if ar * H < W:
        W = int(W)
        H = int(W / ar)
    else:
        H = int(H)
        W = int(H * ar)
    dst = np.array(
        [
            [0, 0],
            [W - 1, 0],
            [W - 1, H - 1],
            [0, H - 1],
        ],
        dtype=np.float32,
    )
    M = cv2.getPerspectiveTransform(pts, dst)
    return cv2.warpPerspective(img, M, (W, H))


def affine_rectification(img: np.ndarray, pts: np.ndarray) -> np.ndarray:
    """Performs Affine rectification of an image

    Performs afiine rectification of the image and scales it isotropically.
    No other Rotation or translation is done, and no metric retrification
    is done as the question doesn't ask for it. The method followed is that
    of "Multiple view geometry in computer vision" book by Hartley and
    Zisserman.

    Args:
        img (np.ndarray): The image whose affine rectification is done.
        pts (np.ndarray): The ordered corner points to find parallel lines.

    Returns:
        Affine rectified image.

    """
    imgc: np.ndarray = img.copy()

    # Points in 3x1 format
    pt: List[np.ndarray] = []
    pt.append(np.array([*pts[0], 1], dtype=np.float64))
    pt.append(np.array([*pts[1], 1], dtype=np.float64))
    pt.append(np.array([*pts[2], 1], dtype=np.float64))
    pt.append(np.array([*pts[3], 1], dtype=np.float64))

    # Pairs of parallel Lines from above points
    lines: List[List[np.ndarray]] = [
        [np.cross(pt[0], pt[1]), np.cross(pt[3], pt[2])],
        [np.cross(pt[0], pt[3]), np.cross(pt[1], pt[2])],
    ]

    # Vanishing points from set of parallel lines
    vpts = [np.cross(lines[0][0], lines[0][1]), np.cross(lines[1][0], lines[1][1])]
    if vpts[0][-1] != 0:
        vpts[0] /= vpts[0][-1]
    if vpts[1][-1] != 0:
        vpts[1] /= vpts[1][-1]

    # Vanishing Line from vanishing points
    vline: np.ndarray = np.cross(vpts[0], vpts[1])
    M: np.ndarray = np.array(
        [[1, 0, 0], [0, 1, 0], vline / vline[-1]], dtype=np.float64
    )

    h, w, _ = imgc.shape
    Mlim = np.array(
        [
            np.dot(M, np.array([0.0, h, 1.0])),
            np.dot(M, np.array([w, h, 1.0])),
            np.dot(M, np.array([w, 0.0, 1.0])),
        ]
    )
    if 0 not in Mlim[:, -1]:
        k = max(
            np.amax(Mlim[:, 0] / Mlim[:, -1]) / w, np.amax(Mlim[:, 1] / Mlim[:, -1]) / h
        )
        MScale = np.array([[1.0 / k, 0, 0], [0, 1.0 / k, 0], [0, 0, 1]])
        Mnew = MScale @ M
    else:
        Mnew = M

    return cv2.warpPerspective(imgc, Mnew, (w, h))


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Foreground Extraction and Image Transformations."
    )
    parser.add_argument(
        "--image",
        type=str,
        default="images/PataChitraPuri_1.jpg",
        help="path to a valid image file",
    )
    parser.add_argument(
        "--mask",
        type=str,
        default="",
        help="path to a valid image file which acts as a mask. NOTE:Provide either a mask or a rectangular area, not both",
    )
    parser.add_argument(
        "--rect",
        nargs=4,
        default=[0, 0, 0, 0],
        type=int,
        metavar=(
            "( X COORDINATE OF TOP-LEFT POINT, ",
            "Y COORDINATE OF TOP-LEFT POINT, ",
            "WIDTH OF IMAGE, ",
            "HEIGHT OF IMAGE)",
        ),
        help="specify a rectangular area containing the foreground. NOTE:Provide either a mask or a rectangular area, not both",
    )
    parser.add_argument(
        "--interval",
        type=float,
        default=4,
        help="time interval in seconds between each image of slide show",
    )
    parser.add_argument(
        "--manual",
        action="store_true",
        help="flag to override automatic detection of corners, so that the user can choose the four corners themselves",
    )
    args = parser.parse_args()

    all_images = []
    if not Path(args.image).is_file():
        raise FileNotFoundError
    image = cv2.imread(args.image)
    if image is None:
        raise Exception("Not a valid image format")
    all_images.append(("Original Image", image))

    if args.mask != "":
        if not Path(args.mask).is_file():
            raise FileNotFoundError
        mask = cv2.imread(args.mask)
        if mask.shape != image.shape:
            raise Exception("Mask and Image dimensions doesn't match")
        mask2, ex_img = foreground_extraction(
            image, mask=mask, flag=cv2.GC_INIT_WITH_MASK
        )
    elif args.rect != [0, 0, 0, 0] and args.rect != [
        0,
        0,
        image.shape[1],
        image.shape[0],
    ]:
        mask2, ex_img = foreground_extraction(image, rect=tuple(args.rect))
    else:
        mask2, ex_img = foreground_extraction(image)

    all_images.append(("Extracted Foreground", ex_img.copy()))
    ok, pts = edges_and_corners(ex_img, mask2, args.manual)
    all_images.append(("Corners and Edges", ok.copy()))
    htr = homo_transform(ex_img, pts)
    all_images.append(("Homography Transform", htr.copy()))
    afr = affine_rectification(ex_img, pts)
    all_images.append(("Affine Transform", afr.copy()))

    for title, img in all_images:
        cv2.imshow("placeholder", img)
        cv2.setWindowTitle("placeholder", title)
        key = cv2.waitKey(int(args.interval * 1000)) & 0xFF
        if key == ord("q"):
            break
    cv2.destroyAllWindows()