视频处理优化建议
Tạo vào: 6 tháng 11, 2024
Tạo vào: 6 tháng 11, 2024
import cv2
import numpy as np
import time
import pygame
from OpenGL.GL import *
from OpenGL.GLUT import *
from OpenGL.GLU import *
from pygame.locals import *
from tqdm import tqdm
def init_gl(width, height):
pygame.display.set_mode((width, height), DOUBLEBUF | OPENGL)
glViewport(0, 0, width, height)
glMatrixMode(GL_PROJECTION)
glLoadIdentity()
gluOrtho2D(0, width, 0, height)
glMatrixMode(GL_MODELVIEW)
glEnable(GL_BLEND)
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
def load_video_frames(video_path):
cap = cv2.VideoCapture(video_path)
frames = []
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frames.append(frame)
cap.release()
return frames
def render_frame(frame, x, y, width, height, alpha):
# 转换BGR到RGBA
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
frame = cv2.resize(frame, (width, height))
text# 应用透明度 frame[:, :, 3] = (frame[:, :, 3] * alpha).astype(np.uint8) # 创建纹理 texture_id = glGenTextures(1) glBindTexture(GL_TEXTURE_2D, texture_id) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR) glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, frame) # 绘制纹理 glLoadIdentity() glTranslatef(x, y, 0) glBegin(GL_QUADS) glTexCoord2f(0, 0) glVertex2f(0, 0) glTexCoord2f(1, 0) glVertex2f(width, 0) glTexCoord2f(1, 1) glVertex2f(width, height) glTexCoord2f(0, 1) glVertex2f(0, height) glEnd() glDeleteTextures([texture_id])
def process_video(background_video_path, overlay_video_path, output_video_path, x, y, width, height, alpha, frame_rate=30):
# 加载背景和叠加视频帧
background_frames = load_video_frames(background_video_path)
overlay_frames = load_video_frames(overlay_video_path)
text# 输出视频设置 fourcc = cv2.VideoWriter_fourcc(*'MP4V') output_video = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (background_frames[0].shape[1], background_frames[0].shape[0])) # 初始化 pygame 和 OpenGL pygame.init() init_gl(background_frames[0].shape[1], background_frames[0].shape[0]) start_time = time.time() # 处理每一帧 for i in tqdm(range(min(len(background_frames), len(overlay_frames))), desc="Processing video frames", unit="frame"): glClear(GL_COLOR_BUFFER_BIT) # 背景视频 render_frame(background_frames[i], 0, 0, background_frames[i].shape[1], background_frames[i].shape[0], 1.0) # 叠加视频 render_frame(overlay_frames[i], x, y, width, height, alpha) # 刷新并保存当前帧 pygame.display.flip() pygame.image.save(pygame.display.get_surface(), "temp_frame.png") # 保存为临时图像 temp_frame = cv2.imread("temp_frame.png") output_video.write(temp_frame) end_time = time.time() total_time = end_time - start_time print(f"视频处理完成,耗时: {total_time:.2f}秒") output_video.release() pygame.quit()
if name == "main":
# 输入视频路径
background_video_path = '/mnt/lgs/Open-Sora/moviepys/input/291514.mp4' # 背景视频
overlay_video_path = '/mnt/lgs/Open-Sora/moviepys/input/17095_1727158011_raw.mp4' # 叠加视频
output_video_path = '/mnt/lgs/Open-Sora/gstreamers/output/output_video.mp4' # 输出视频
text# 透明度和位置调整 alpha = 0.5 # 透明度 (0.0 - 1.0) x, y = 100, 100 # 叠加视频位置 width, height = 640, 360 # 叠加视频大小 process_video(background_video_path, overlay_video_path, output_video_path, x, y, width, height, alpha)
opengl_venvroot@ecs-60954587:/mnt/lgs/Open-Sora# /mnt/lgs/Open-Sora/opengl_venv/bin/python /mnt/lgs/Open-Sora/gstreamers/video_overlay.py
pygame 2.6.0 (SDL 2.28.4, Python 3.10.6)
Hello from the pygame community. https://www.pygame.org/contribute.html
OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'
ALSA lib confmisc.c:767:(parse_card) cannot find card '0'
ALSA lib conf.c:4732:(_snd_config_evaluate) function snd_func_card_driver returned error: No such file or directory
ALSA lib confmisc.c:392:(snd_func_concat) error evaluating strings
ALSA lib conf.c:4732:(_snd_config_evaluate) function snd_func_concat returned error: No such file or directory
ALSA lib confmisc.c:1246:(snd_func_refer) error evaluating name
ALSA lib conf.c:4732:(_snd_config_evaluate) function snd_func_refer returned error: No such file or directory
ALSA lib conf.c:5220:(snd_config_expand) Evaluate error: No such file or directory
ALSA lib pcm.c:2642:(snd_pcm_open_noupdate) Unknown PCM default
Processing video frames: 100%|███████████████████████████████████████████████████████████████████████████████| 566/566 [01:07<00:00, 8.41frame/s]
视频处理完成,耗时: 67.33秒
太慢了, 8.41frame/s
You're experiencing slow performance because the current approach involves rendering each frame with OpenGL and Pygame, saving it as a temporary PNG image, and then reading it back with OpenCV to write to the output video. This method introduces significant overhead due to disk I/O operations and the computational cost of rendering each frame individually.
To achieve faster processing, you can streamline your workflow by leveraging OpenCV's efficient frame manipulation capabilities directly, eliminating the need for OpenGL and Pygame entirely. OpenCV is highly optimized for video processing tasks and can handle frame blending much more efficiently.
Here's how you can optimize your script:
Use OpenCV for Frame Overlaying: OpenCV can handle alpha blending natively, which allows you to overlay one video onto another efficiently without the need for external rendering libraries.
Process Frames Sequentially: Instead of loading all frames into memory at once, process each frame sequentially. This reduces memory usage and speeds up processing.
Avoid Disk I/O for Temporary Frames: By performing all operations in memory, you eliminate the need for saving and reading temporary image files, which significantly reduces processing time.
Optimize Video Writing: Ensure that you're using an efficient codec and that the video writer is properly configured for your output format.
Below is the optimized script using OpenCV only:
pythonimport cv2 import numpy as np from tqdm import tqdm def process_video(background_video_path, overlay_video_path, output_video_path, x, y, width, height, alpha, frame_rate=30): # Open background and overlay videos background_cap = cv2.VideoCapture(background_video_path) overlay_cap = cv2.VideoCapture(overlay_video_path) if not background_cap.isOpened(): print(f"Error opening background video: {background_video_path}") return if not overlay_cap.isOpened(): print(f"Error opening overlay video: {overlay_video_path}") return # Get background video properties bg_width = int(background_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) bg_height = int(background_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) bg_fps = background_cap.get(cv2.CAP_PROP_FPS) bg_total_frames = int(background_cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Get overlay video properties ov_fps = overlay_cap.get(cv2.CAP_PROP_FPS) ov_total_frames = int(overlay_cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Set frame rate to min of both videos if not specified if frame_rate is None: frame_rate = min(bg_fps, ov_fps) # Initialize VideoWriter fourcc = cv2.VideoWriter_fourcc(*'mp4v') # You can choose other codecs like 'XVID' if needed output_video = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (bg_width, bg_height)) # Calculate total frames to process based on background video total_frames = bg_total_frames # Optional: Adjust overlay video frame rate to match background if ov_fps != frame_rate: overlay_frame_step = int(round(ov_fps / frame_rate)) else: overlay_frame_step = 1 # Process frames with tqdm(total=total_frames, desc="Processing video frames", unit="frame") as pbar: for i in range(total_frames): ret_bg, frame_bg = background_cap.read() if not ret_bg: break # End of background video # Determine corresponding overlay frame overlay_frame_idx = i * overlay_frame_step overlay_cap.set(cv2.CAP_PROP_POS_FRAMES, overlay_frame_idx) ret_overlay, frame_overlay = overlay_cap.read() if not ret_overlay: # Optionally loop the overlay video or use the last frame overlay_cap.set(cv2.CAP_PROP_POS_FRAMES, 0) ret_overlay, frame_overlay = overlay_cap.read() if not ret_overlay: print("Error reading overlay video.") break # Resize overlay frame frame_overlay_resized = cv2.resize(frame_overlay, (width, height)) # Ensure the overlay fits within the background frame if y + height > bg_height or x + width > bg_width: print("Overlay frame exceeds background frame dimensions.") break # Create ROI on background frame roi = frame_bg[y:y+height, x:x+width] # Convert overlay to float and normalize alpha overlay = frame_overlay_resized.astype(float) background = roi.astype(float) # Blend the overlay with the background ROI blended = cv2.addWeighted(overlay, alpha, background, 1 - alpha, 0) # Replace the ROI on the background frame with the blended result frame_bg[y:y+height, x:x+width] = blended.astype(np.uint8) # Write the frame to the output video output_video.write(frame_bg) pbar.update(1) # Release resources background_cap.release() overlay_cap.release() output_video.release() print("视频处理完成。") if __name__ == "__main__": # 输入视频路径 background_video_path = '/mnt/lgs/Open-Sora/moviepys/input/291514.mp4' # 背景视频 overlay_video_path = '/mnt/lgs/Open-Sora/moviepys/input/17095_1727158011_raw.mp4' # 叠加视频 output_video_path = '/mnt/lgs/Open-Sora/gstreamers/output/output_video.mp4' # 输出视频 # 透明度和位置调整 alpha = 0.5 # 透明度 (0.0 - 1.0) x, y = 100, 100 # 叠加视频位置 width, height = 640, 360 # 叠加视频大小 process_video(background_video_path, overlay_video_path, output_video_path, x, y, width, height, alpha)
Eliminated Pygame and OpenGL:
Frame-by-Frame Processing with OpenCV:
Direct Frame Blending:
cv2.addWeighted
function efficiently blends the overlay frame with the background frame using the specified alpha value.Avoiding Disk I/O for Temporary Frames:
Handling Different Frame Rates:
Progress Tracking with tqdm:
Preallocate VideoWriter:
VideoWriter
is correctly initialized with the right codec and frame size to prevent runtime adjustments that can slow down processing.Avoid Repeated Resizing:
Looping Overlay Video:
GPU Acceleration:
Multithreading or Multiprocessing:
multiprocessing
or multithreading
modules to parallelize frame processing, taking full advantage of multi-core CPUs.With these optimizations, the processing speed should increase significantly. Here's an approximate expectation based on the improvements:
By leveraging OpenCV's efficient frame processing capabilities and eliminating unnecessary rendering steps, you can achieve a substantial increase in processing speed. This approach not only simplifies the workflow but also makes the script more maintainable and scalable for larger video processing tasks.
If you still require specific OpenGL features for more complex rendering, consider integrating GPU-based processing libraries that can handle both rendering and frame manipulation more efficiently. However, for straightforward overlay tasks with transparency, OpenCV alone should suffice and provide the best performance.
import cv2
import numpy as np
from tqdm import tqdm
def process_video(background_video_path, overlay_video_path, output_video_path, x, y, width, height, alpha, frame_rate=30):
# Open background and overlay videos
background_cap = cv2.VideoCapture(background_video_path)
overlay_cap = cv2.VideoCapture(overlay_video_path)
textif not background_cap.isOpened(): print(f"Error opening background video: {background_video_path}") return if not overlay_cap.isOpened(): print(f"Error opening overlay video: {overlay_video_path}") return # Get background video properties bg_width = int(background_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) bg_height = int(background_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) bg_fps = background_cap.get(cv2.CAP_PROP_FPS) bg_total_frames = int(background_cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Get overlay video properties ov_fps = overlay_cap.get(cv2.CAP_PROP_FPS) ov_total_frames = int(overlay_cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Set frame rate to min of both videos if not specified if frame_rate is None: frame_rate = min(bg_fps, ov_fps) # Initialize VideoWriter fourcc = cv2.VideoWriter_fourcc(*'mp4v') # You can choose other codecs like 'XVID' if needed output_video = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (bg_width, bg_height)) # Calculate total frames to process based on background video total_frames = bg_total_frames # Optional: Adjust overlay video frame rate to match background if ov_fps != frame_rate: overlay_frame_step = int(round(ov_fps / frame_rate)) else: overlay_frame_step = 1 # Process frames with tqdm(total=total_frames, desc="Processing video frames", unit="frame") as pbar: for i in range(total_frames): ret_bg, frame_bg = background_cap.read() if not ret_bg: break # End of background video # Determine corresponding overlay frame overlay_frame_idx = i * overlay_frame_step overlay_cap.set(cv2.CAP_PROP_POS_FRAMES, overlay_frame_idx) ret_overlay, frame_overlay = overlay_cap.read() if not ret_overlay: # Optionally loop the overlay video or use the last frame overlay_cap.set(cv2.CAP_PROP_POS_FRAMES, 0) ret_overlay, frame_overlay = overlay_cap.read() if not ret_overlay: print("Error reading overlay video.") break # Resize overlay frame frame_overlay_resized = cv2.resize(frame_overlay, (width, height)) # Ensure the overlay fits within the background frame if y + height > bg_height or x + width > bg_width: print("Overlay frame exceeds background frame dimensions.") break # Create ROI on background frame roi = frame_bg[y:y+height, x:x+width] # Convert overlay to float and normalize alpha overlay = frame_overlay_resized.astype(float) background = roi.astype(float) # Blend the overlay with the background ROI blended = cv2.addWeighted(overlay, alpha, background, 1 - alpha, 0) # Replace the ROI on the background frame with the blended result frame_bg[y:y+height, x:x+width] = blended.astype(np.uint8) # Write the frame to the output video output_video.write(frame_bg) pbar.update(1) # Release resources background_cap.release() overlay_cap.release() output_video.release() print("视频处理完成。")
if name == "main":
# 输入视频路径
background_video_path = '/mnt/lgs/Open-Sora/moviepys/input/291514.mp4' # 背景视频
overlay_video_path = '/mnt/lgs/Open-Sora/moviepys/input/17095_1727158011_raw.mp4' # 叠加视频
output_video_path = '/mnt/lgs/Open-Sora/gstreamers/output/output_video.mp4' # 输出视频
text# 透明度和位置调整 alpha = 0.5 # 透明度 (0.0 - 1.0) x, y = 100, 100 # 叠加视频位置 width, height = 640, 360 # 叠加视频大小 process_video(background_video_path, overlay_video_path, output_video_path, x, y, width, height, alpha)
opengl_venvroot@ecs-60954587:/mnt/lgs/Open-Sora# /mnt/lgs/Open-Sora/opengl_venv/bin/python /mnt/lgs/Open-Sora/gstreamers/video_overlay.py
Processing video frames: 100%|████████████████████████████████████████████████████████████████████████████████| 1366/1366 [03:09<00:00, 7.20frame/s]
视频处理完成。
太慢了,提提速,中文回答,给出修改后的全部代码,我有gpu呀
您可以通过利用GPU加速来提高视频处理速度。以下是使用OpenCV的CUDA模块和CuPy库来加速处理的完整代码。在这个版本中,我们将计算密集型的操作移到了GPU上,从而提高了处理速度。
注意:要运行以下代码,您需要:
pip install cupy-cuda11x
,根据您的CUDA版本选择合适的CuPy版本)。pythonimport cv2 import numpy as np import cupy as cp from tqdm import tqdm def process_video(background_video_path, overlay_video_path, output_video_path, x, y, width, height, alpha, frame_rate=None): # 打开背景和叠加视频 background_cap = cv2.VideoCapture(background_video_path) overlay_cap = cv2.VideoCapture(overlay_video_path) if not background_cap.isOpened(): print(f"无法打开背景视频:{background_video_path}") return if not overlay_cap.isOpened(): print(f"无法打开叠加视频:{overlay_video_path}") return # 获取背景视频属性 bg_width = int(background_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) bg_height = int(background_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) bg_fps = background_cap.get(cv2.CAP_PROP_FPS) bg_total_frames = int(background_cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 获取叠加视频属性 ov_fps = overlay_cap.get(cv2.CAP_PROP_FPS) ov_total_frames = int(overlay_cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 如果未指定帧率,则使用背景视频的帧率 if frame_rate is None: frame_rate = bg_fps # 初始化VideoWriter fourcc = cv2.VideoWriter_fourcc(*'mp4v') output_video = cv2.VideoWriter(output_video_path, fourcc, frame_rate, (bg_width, bg_height)) # 计算要处理的总帧数 total_frames = min(bg_total_frames, ov_total_frames) # 调整叠加视频帧率以匹配背景视频 ov_frame_interval = ov_fps / frame_rate if ov_fps != frame_rate else 1 # 预先加载并调整叠加视频帧到GPU内存 overlay_frames_gpu = [] print("加载并预处理叠加视频帧...") for i in tqdm(range(ov_total_frames), desc="加载叠加视频", unit="frame"): ret_overlay, frame_overlay = overlay_cap.read() if not ret_overlay: break frame_overlay_resized = cv2.resize(frame_overlay, (width, height)) # 将帧转换为GPU数组 frame_overlay_gpu = cp.asarray(frame_overlay_resized, dtype=cp.float32) overlay_frames_gpu.append(frame_overlay_gpu) # 重置叠加视频指针 overlay_cap.release() print("开始处理视频帧...") # 处理帧 with tqdm(total=total_frames, desc="处理视频帧", unit="frame") as pbar: for i in range(total_frames): ret_bg, frame_bg = background_cap.read() if not ret_bg: break # 背景视频结束 # 将背景帧上传到GPU frame_bg_gpu = cp.asarray(frame_bg, dtype=cp.float32) # 获取对应的叠加帧 overlay_idx = int(i * ov_fps / bg_fps) if overlay_idx >= len(overlay_frames_gpu): overlay_idx = overlay_idx % len(overlay_frames_gpu) # 如果叠加视频较短,则循环播放 frame_overlay_gpu = overlay_frames_gpu[overlay_idx] # 检查叠加帧是否在背景帧范围内 if y + height > bg_height or x + width > bg_width: print("叠加帧超出了背景帧的范围。") break # 创建背景帧的ROI roi_gpu = frame_bg_gpu[y:y+height, x:x+width] # 进行加权叠加 blended_gpu = cp.addWeighted(frame_overlay_gpu, alpha, roi_gpu, 1 - alpha, 0) # 将结果复制回背景帧的ROI frame_bg_gpu[y:y+height, x:x+width] = blended_gpu # 将处理后的帧从GPU复制回CPU内存 frame_result = cp.asnumpy(frame_bg_gpu).astype(np.uint8) # 写入输出视频 output_video.write(frame_result) pbar.update(1) # 释放资源 background_cap.release() output_video.release() print("视频处理完成。") if __name__ == "__main__": # 输入视频路径 background_video_path = '/mnt/lgs/Open-Sora/moviepys/input/291514.mp4' # 背景视频 overlay_video_path = '/mnt/lgs/Open-Sora/moviepys/input/17095_1727158011_raw.mp4' # 叠加视频 output_video_path = '/mnt/lgs/Open-Sora/gstreamers/output/output_video.mp4' # 输出视频 # 透明度和位置调整 alpha = 0.5 # 透明度 (0.0 - 1.0) x, y = 100, 100 # 叠加视频位置 width, height = 640, 360 # 叠加视频大小 process_video(background_video_path, overlay_video_path, output_video_path, x, y, width, height, alpha)
使用CuPy加速计算:
cp.asarray()
将NumPy数组转换为CuPy数组。cp.addWeighted()
在GPU上执行加权叠加。预加载叠加视频帧:
循环播放叠加视频:
注意CUDA和CuPy的版本兼容性:
cupy-cuda11x
。安装支持CUDA的OpenCV:
安装CuPy:
根据您的CUDA版本安装对应的CuPy版本。例如,对于CUDA 11.1:
bashpip install cupy-cuda111
检查CUDA环境:
避免预加载所有叠加帧:
批量处理帧:
使用更高效的视频编解码器:
VideoWriter
时,尝试使用更高效的编码器,如H.264('H264'
),不过需要确保您的OpenCV支持该编码器。调整线程数:
GPU内存限制:
环境配置复杂性:
通过利用GPU加速,特别是使用CuPy库在GPU上执行数组运算,可以显著提高视频处理速度。请确保按照上述步骤正确设置您的环境,并根据您的硬件配置进行相应的调整。
希望这能帮助您提高视频处理速度!