video_2_slides.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. import os
  2. import time
  3. import sys
  4. import cv2
  5. import argparse
  6. from frame_differencing import capture_slides_frame_diff
  7. from post_process import remove_duplicates
  8. from utils import resize_image_frame, create_output_directory, convert_slides_to_pdf
  9. # -------------- Initializations ---------------------
  10. FRAME_BUFFER_HISTORY = 15 # Length of the frame buffer history to model background.
  11. DEC_THRESH = 0.75 # Threshold value, above which it is marked foreground, else background.
  12. DIST_THRESH = 100 # Threshold on the squared distance between the pixel and the sample to decide whether a pixel is close to that sample.
  13. MIN_PERCENT = 0.15 # %age threshold to check if there is motion across subsequent frames
  14. MAX_PERCENT = 0.01 # %age threshold to determine if the motion across frames has stopped.
  15. # ----------------------------------------------------
  16. def capture_slides_bg_modeling(video_path, output_dir_path, type_bgsub, history, threshold, MIN_PERCENT_THRESH, MAX_PERCENT_THRESH):
  17. print(f"Using {type_bgsub} for Background Modeling...")
  18. print('---'*10)
  19. if type_bgsub == 'GMG':
  20. bg_sub = cv2.bgsegm.createBackgroundSubtractorGMG(initializationFrames=history, decisionThreshold=threshold)
  21. elif type_bgsub == 'KNN':
  22. bg_sub = cv2.createBackgroundSubtractorKNN(history=history, dist2Threshold=threshold, detectShadows=False)
  23. capture_frame = False
  24. screenshots_count = 0
  25. # Capture video frames.
  26. cap = cv2.VideoCapture(video_path)
  27. if not cap.isOpened():
  28. print('Unable to open video file: ', video_path)
  29. sys.exit()
  30. start = time.time()
  31. # Loop over subsequent frames.
  32. while cap.isOpened():
  33. ret, frame = cap.read()
  34. if not ret:
  35. break
  36. # Create a copy of the original frame.
  37. orig_frame = frame.copy()
  38. # Resize the frame keeping aspect ratio.
  39. frame = resize_image_frame(frame, resize_width=640)
  40. # Apply each frame through the background subtractor.
  41. fg_mask = bg_sub.apply(frame)
  42. # Compute the percentage of the Foreground mask."
  43. p_non_zero = (cv2.countNonZero(fg_mask) / (1.0 * fg_mask.size)) * 100
  44. # %age of non-zero pixels < MAX_PERCENT_THRESH, implies motion has stopped.
  45. # Therefore, capture the frame.
  46. if p_non_zero < MAX_PERCENT_THRESH and not capture_frame:
  47. capture_frame = True
  48. screenshots_count += 1
  49. png_filename = f"{screenshots_count:03}.png"
  50. out_file_path = os.path.join(output_dir_path, png_filename)
  51. print(f"Saving file at: {out_file_path}")
  52. cv2.imwrite(out_file_path, orig_frame)
  53. # p_non_zero >= MIN_PERCENT_THRESH, indicates motion/animations.
  54. # Hence wait till the motion across subsequent frames has settled down.
  55. elif capture_frame and p_non_zero >= MIN_PERCENT_THRESH:
  56. capture_frame = False
  57. end_time = time.time()
  58. print('***'*10,'\n')
  59. print("Statistics:")
  60. print('---'*10)
  61. print(f'Total Time taken: {round(end_time-start, 3)} secs')
  62. print(f'Total Screenshots captured: {screenshots_count}')
  63. print('---'*10,'\n')
  64. # Release Video Capture object.
  65. cap.release()
  66. if __name__ == "__main__":
  67. parser = argparse.ArgumentParser(description="This script is used to convert video frames into slide PDFs.")
  68. parser.add_argument("-v", "--video_file_path", help="Path to the video file", type=str)
  69. parser.add_argument("-o", "--out_dir", default = 'output_results', help="Path to the output directory", type=str)
  70. parser.add_argument("--type", help = "type of background subtraction to be used", default = 'GMG',
  71. choices=['Frame_Diff', 'GMG', 'KNN'], type=str)
  72. parser.add_argument("--no_post_process", action="store_true", default=False, help="flag to apply post processing or not")
  73. parser.add_argument("--convert_to_pdf", action="store_true", default=False, help="flag to convert the entire image set to pdf or not")
  74. args = parser.parse_args()
  75. video_path = args.video_file_path
  76. output_dir_path = args.out_dir
  77. type_bg_sub = args.type
  78. output_dir_path = create_output_directory(video_path, output_dir_path, type_bg_sub)
  79. if type_bg_sub.lower() == 'frame_diff':
  80. capture_slides_frame_diff(video_path, output_dir_path)
  81. else:
  82. if type_bg_sub.lower() == 'gmg':
  83. thresh = DEC_THRESH
  84. elif type_bg_sub.lower() == 'knn':
  85. thresh = DIST_THRESH
  86. capture_slides_bg_modeling(video_path, output_dir_path, type_bgsub=type_bg_sub,
  87. history=FRAME_BUFFER_HISTORY, threshold=thresh,
  88. MIN_PERCENT_THRESH=MIN_PERCENT, MAX_PERCENT_THRESH=MAX_PERCENT)
  89. # Perform post-processing using difference hashing technique to remove duplicate slides.
  90. if not args.no_post_process:
  91. remove_duplicates(output_dir_path)
  92. if args.convert_to_pdf:
  93. convert_slides_to_pdf(video_path, output_dir_path)