Background-Change.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. from torchvision import models
  2. from PIL import Image
  3. import matplotlib.pyplot as plt
  4. import torch
  5. import numpy as np
  6. import cv2
  7. # Apply the transformations needed
  8. import torchvision.transforms as T
  9. # Define the helper function
  10. def decode_segmap(image, source, bgimg, nc=21):
  11. label_colors = np.array([(0, 0, 0), # 0=background
  12. # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
  13. (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128),
  14. # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
  15. (0, 128, 128), (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0),
  16. # 11=dining table, 12=dog, 13=horse, 14=motorbike, 15=person
  17. (192, 128, 0), (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
  18. # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
  19. (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128)])
  20. r = np.zeros_like(image).astype(np.uint8)
  21. g = np.zeros_like(image).astype(np.uint8)
  22. b = np.zeros_like(image).astype(np.uint8)
  23. for l in range(0, nc):
  24. idx = image == l
  25. r[idx] = label_colors[l, 0]
  26. g[idx] = label_colors[l, 1]
  27. b[idx] = label_colors[l, 2]
  28. rgb = np.stack([r, g, b], axis=2)
  29. # Load the foreground input image
  30. foreground = cv2.imread(source)
  31. # Load the background input image
  32. background = cv2.imread(bgimg)
  33. # Change the color of foreground image to RGB
  34. # and resize images to match shape of R-band in RGB output map
  35. foreground = cv2.cvtColor(foreground, cv2.COLOR_BGR2RGB)
  36. background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
  37. foreground = cv2.resize(foreground,(r.shape[1],r.shape[0]))
  38. background = cv2.resize(background,(r.shape[1],r.shape[0]))
  39. # Convert uint8 to float
  40. foreground = foreground.astype(float)
  41. background = background.astype(float)
  42. # Create a binary mask of the RGB output map using the threshold value 0
  43. th, alpha = cv2.threshold(np.array(rgb),0,255, cv2.THRESH_BINARY)
  44. # Apply a slight blur to the mask to soften edges
  45. alpha = cv2.GaussianBlur(alpha, (7,7),0)
  46. # Normalize the alpha mask to keep intensity between 0 and 1
  47. alpha = alpha.astype(float)/255
  48. # Multiply the foreground with the alpha matte
  49. foreground = cv2.multiply(alpha, foreground)
  50. # Multiply the background with ( 1 - alpha )
  51. background = cv2.multiply(1.0 - alpha, background)
  52. # Add the masked foreground and background
  53. outImage = cv2.add(foreground, background)
  54. # Return a normalized output image for display
  55. return outImage/255
  56. def segment(net, path, bgimagepath, show_orig=True, dev='cuda'):
  57. img = Image.open(path)
  58. if show_orig: plt.imshow(img); plt.axis('off'); plt.show()
  59. # Comment the Resize and CenterCrop for better inference results
  60. trf = T.Compose([T.Resize(400),
  61. #T.CenterCrop(224),
  62. T.ToTensor(),
  63. T.Normalize(mean = [0.485, 0.456, 0.406],
  64. std = [0.229, 0.224, 0.225])])
  65. inp = trf(img).unsqueeze(0).to(dev)
  66. out = net.to(dev)(inp)['out']
  67. om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()
  68. rgb = decode_segmap(om, path, bgimagepath)
  69. plt.imshow(rgb); plt.axis('off'); plt.show()
  70. dlab = models.segmentation.deeplabv3_resnet101(pretrained=1).eval()
  71. segment(dlab, './images/change/girl-with-hat.png','./images/change/background-building.png', show_orig=False)
  72. segment(dlab, './images/change/girl.png','./images/change/forest.png', show_orig=False)