transforms.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. import random
  2. import torch
  3. from torchvision.transforms import functional as F
  4. def _flip_coco_person_keypoints(kps, width):
  5. flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
  6. flipped_data = kps[:, flip_inds]
  7. flipped_data[..., 0] = width - flipped_data[..., 0]
  8. # Maintain COCO convention that if visibility == 0, then x, y = 0
  9. inds = flipped_data[..., 2] == 0
  10. flipped_data[inds] = 0
  11. return flipped_data
  12. class Compose(object):
  13. def __init__(self, transforms):
  14. self.transforms = transforms
  15. def __call__(self, image, target):
  16. for t in self.transforms:
  17. image, target = t(image, target)
  18. return image, target
  19. class RandomHorizontalFlip(object):
  20. def __init__(self, prob):
  21. self.prob = prob
  22. def __call__(self, image, target):
  23. if random.random() < self.prob:
  24. height, width = image.shape[-2:]
  25. image = image.flip(-1)
  26. bbox = target["boxes"]
  27. bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
  28. target["boxes"] = bbox
  29. if "masks" in target:
  30. target["masks"] = target["masks"].flip(-1)
  31. if "keypoints" in target:
  32. keypoints = target["keypoints"]
  33. keypoints = _flip_coco_person_keypoints(keypoints, width)
  34. target["keypoints"] = keypoints
  35. return image, target
  36. class ToTensor(object):
  37. def __call__(self, image, target):
  38. image = F.to_tensor(image)
  39. return image, target