123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- from torchvision import models
- from PIL import Image
- import matplotlib.pyplot as plt
- import torch
- import numpy as np
- import cv2
- # Apply the transformations needed
- import torchvision.transforms as T
- # Define the helper function
- def decode_segmap(image, source, nc=21):
- label_colors = np.array([(0, 0, 0), # 0=background
- # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
- (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
- # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
- (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
- # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
- (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
- # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
- (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
- r = np.zeros_like(image).astype(np.uint8)
- g = np.zeros_like(image).astype(np.uint8)
- b = np.zeros_like(image).astype(np.uint8)
- for l in range(0, nc):
- idx = image == l
- r[idx] = label_colors[l, 0]
- g[idx] = label_colors[l, 1]
- b[idx] = label_colors[l, 2]
- rgb = np.stack([r, g, b], axis=2)
- # Load the foreground input image
- foreground = cv2.imread(source)
- # Change the color of foreground image to RGB
- # and resize image to match shape of R-band in RGB output map
- foreground = cv2.cvtColor(foreground, cv2.COLOR_BGR2RGB)
- foreground = cv2.resize(foreground,(r.shape[1],r.shape[0]))
- # Create a background array to hold white pixels
- # with the same size as RGB output map
- background = 255 * np.ones_like(rgb).astype(np.uint8)
- # Convert uint8 to float
- foreground = foreground.astype(float)
- background = background.astype(float)
- # Create a binary mask of the RGB output map using the threshold value 0
- th, alpha = cv2.threshold(np.array(rgb),0,255, cv2.THRESH_BINARY)
- # Apply a slight blur to the mask to soften edges
- alpha = cv2.GaussianBlur(alpha, (7,7),0)
- # Normalize the alpha mask to keep intensity between 0 and 1
- alpha = alpha.astype(float)/255
- # Multiply the foreground with the alpha matte
- foreground = cv2.multiply(alpha, foreground)
- # Multiply the background with ( 1 - alpha )
- background = cv2.multiply(1.0 - alpha, background)
- # Add the masked foreground and background
- outImage = cv2.add(foreground, background)
- # Return a normalized output image for display
- return outImage/255
- def segment(net, path, show_orig=True, dev='cuda'):
- img = Image.open(path)
- if show_orig: plt.imshow(img); plt.axis('off'); plt.show()
- # Comment the Resize and CenterCrop for better inference results
- trf = T.Compose([T.Resize(450),
- #T.CenterCrop(224),
- T.ToTensor(),
- T.Normalize(mean = [0.485, 0.456, 0.406],
- std = [0.229, 0.224, 0.225])])
- inp = trf(img).unsqueeze(0).to(dev)
- out = net.to(dev)(inp)['out']
- om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()
- rgb = decode_segmap(om, path)
- plt.imshow(rgb); plt.axis('off'); plt.show()
- dlab = models.segmentation.deeplabv3_resnet101(pretrained=1).eval()
- segment(dlab, './images/bgremoval/red-car.png', show_orig=False)