some warning cleanup

This commit is contained in:
Lily Tsuru 2024-10-14 21:58:02 -04:00
parent 2ecf9af167
commit 3c2d4cee6f
2 changed files with 54 additions and 49 deletions

View file

@ -1,8 +1,11 @@
use anyhow::Context; use anyhow::Context;
use cudarc::{driver::{ use cudarc::{
sys::{CUdeviceptr, CUmemorytype}, driver::{
CudaDevice, CudaSlice, DevicePtr, LaunchAsync, sys::{CUdeviceptr, CUmemorytype},
}, nvrtc::CompileOptions}; CudaDevice, CudaSlice, DevicePtr, LaunchAsync,
},
nvrtc::CompileOptions,
};
use letsplay_gpu::egl_helpers::DeviceContext; use letsplay_gpu::egl_helpers::DeviceContext;
use std::{ use std::{
sync::{Arc, Mutex}, sync::{Arc, Mutex},
@ -281,7 +284,10 @@ fn encoder_thread_hwframe_main(
cuda_device.load_ptx(ptx, "module", &["flip_opengl"])?; cuda_device.load_ptx(ptx, "module", &["flip_opengl"])?;
let mut memcpy = cudarc::driver::sys::CUDA_MEMCPY2D_st::default(); let mut memcpy = cudarc::driver::sys::CUDA_MEMCPY2D_st::default();
// setup basic src stuff
// setup the things that won't change about the cuda memcpy
// src
memcpy.srcXInBytes = 0; memcpy.srcXInBytes = 0;
memcpy.srcY = 0; memcpy.srcY = 0;
memcpy.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY; memcpy.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
@ -291,12 +297,14 @@ fn encoder_thread_hwframe_main(
memcpy.dstY = 0; memcpy.dstY = 0;
memcpy.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE; memcpy.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
// Temporary buffer used for opengl flip on the GPU // Temporary buffer used for opengl flip on the GPU. We copy to this buffer,
// then copy the flipped version (using the launched support kernel) to the CUDA device memory ffmpeg
// allocated.
let mut temp_buffer: CudaSlice<u32> = cuda_device.alloc_zeros::<u32>(48).expect("over"); let mut temp_buffer: CudaSlice<u32> = cuda_device.alloc_zeros::<u32>(48).expect("over");
loop { loop {
match rx.try_recv() { match rx.blocking_recv() {
Ok(msg) => match msg { Some(msg) => match msg {
EncodeThreadInput::Init { size } => { EncodeThreadInput::Init { size } => {
frame_number = 0; frame_number = 0;
@ -318,9 +326,13 @@ fn encoder_thread_hwframe_main(
} }
EncodeThreadInput::SendFrame => { EncodeThreadInput::SendFrame => {
// copy gl frame *ON THE GPU* // benchmarking
//use std::time::Instant;
//let start = Instant::now();
// copy gl frame *ON THE GPU* to ffmpeg frame
{ {
let mut gl_ctx = gl_context.lock().expect("you dumb fuck"); let gl_ctx = gl_context.lock().expect("you dumb fuck");
let mut gl_resource = let mut gl_resource =
cuda_resource.lock().expect("couldnt lock GL resource!"); cuda_resource.lock().expect("couldnt lock GL resource!");
@ -351,11 +363,16 @@ fn encoder_thread_hwframe_main(
} }
} }
// copy to the temporary buffer and synchronize
unsafe { unsafe {
cudarc::driver::sys::lib() cudarc::driver::sys::lib()
.cuMemcpy2DAsync_v2(&memcpy, std::ptr::null_mut()) .cuMemcpy2DAsync_v2(&memcpy, std::ptr::null_mut())
.result() .result()
.expect("cuMemcpy2D fail epic"); .expect("cuMemcpy2D fail epic");
cudarc::driver::sys::lib()
.cuStreamSynchronize(std::ptr::null_mut())
.result()?;
} }
// launch kernel to flip the opengl framebuffer right-side up // launch kernel to flip the opengl framebuffer right-side up
@ -369,9 +386,9 @@ fn encoder_thread_hwframe_main(
shared_mem_bytes: 0, shared_mem_bytes: 0,
}; };
let flip_opengl = cuda_device let flip_opengl = cuda_device.get_func("module", "flip_opengl").expect(
.get_func("module", "flip_opengl") "for some reason we couldn't get the support kenrel function",
.expect("dumb fucker"); );
unsafe { unsafe {
let frame_ptr = frame.as_mut_ptr(); let frame_ptr = frame.as_mut_ptr();
@ -381,23 +398,24 @@ fn encoder_thread_hwframe_main(
(width * height) as usize * 4usize, (width * height) as usize * 4usize,
); );
flip_opengl flip_opengl.launch(
.launch( launch_config,
launch_config, (&mut temp_buffer, &mut slice, width, height),
(&mut temp_buffer, &mut slice, width, height), )?;
)
.expect("I hate you");
// leak so it doesn't free the memory
// (the device pointer we convert into a slice is owned by ffmpeg, so we shouldn't be the ones
// trying to free it!)
let _ = slice.leak();
// Synchronize for the final time
cudarc::driver::sys::lib() cudarc::driver::sys::lib()
.cuStreamSynchronize(std::ptr::null_mut()) .cuStreamSynchronize(std::ptr::null_mut())
.result() .result()?;
.expect("you banned");
// leak so it doesnt free the memory like a dumbass
let _ = slice.leak();
} }
} }
// FIXME: ideally this would work on-drop but it doesn't.
mapped.unmap().expect("fuck you asshole"); mapped.unmap().expect("fuck you asshole");
gl_ctx.release(); gl_ctx.release();
} }
@ -416,16 +434,17 @@ fn encoder_thread_hwframe_main(
if force_keyframe { if force_keyframe {
force_keyframe = false; force_keyframe = false;
} }
//tracing::info!("encoding frame {frame_number} took {:2?}", start.elapsed());
} }
}, },
Err(TryRecvError::Disconnected) => break, None => break,
Err(TryRecvError::Empty) => {
std::thread::sleep(Duration::from_millis(1));
}
} }
} }
//std::thread::sleep(Duration::from_millis(1));
Ok(()) Ok(())
} }

View file

@ -4,7 +4,7 @@ use anyhow::Context;
use cudarc::driver::CudaDevice; use cudarc::driver::CudaDevice;
use ffmpeg::error::EAGAIN; use ffmpeg::error::EAGAIN;
use ffmpeg::{codec as lavc, packet}; // lavc use ffmpeg::codec as lavc; // lavc
use crate::types::Size; use crate::types::Size;
@ -129,7 +129,7 @@ impl H264Encoder {
max_framerate: u32, max_framerate: u32,
bitrate: usize, bitrate: usize,
) -> anyhow::Result<Self> { ) -> anyhow::Result<Self> {
let (mut encoder, mut video_encoder_context) = let (encoder, mut video_encoder_context) =
create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate) create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate)
.with_context(|| "while trying to create encoder")?; .with_context(|| "while trying to create encoder")?;
@ -186,7 +186,7 @@ impl H264Encoder {
.build() .build()
.with_context(|| "while trying to create CUDA frame context")?; .with_context(|| "while trying to create CUDA frame context")?;
let (mut encoder, mut video_encoder_context) = let (encoder, mut video_encoder_context) =
create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate) create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate)
.with_context(|| "while trying to create encoder")?; .with_context(|| "while trying to create encoder")?;
@ -269,8 +269,6 @@ impl H264Encoder {
(*frame.as_mut_ptr()).height = encoder.height() as i32; (*frame.as_mut_ptr()).height = encoder.height() as i32;
(*frame.as_mut_ptr()).hw_frames_ctx = hw_context.as_raw_mut(); (*frame.as_mut_ptr()).hw_frames_ctx = hw_context.as_raw_mut();
//ffmpeg::sys::av_frame_get_buffer(frame.as_mut_ptr(), 32);
hw_context.get_buffer(&mut frame)?; hw_context.get_buffer(&mut frame)?;
hw_context.get_buffer(&mut frame)?; hw_context.get_buffer(&mut frame)?;
@ -280,13 +278,6 @@ impl H264Encoder {
} }
} }
} }
/*
*/
todo!("FIXME");
} }
pub fn send_frame(&mut self, frame: &ffmpeg::Frame) { pub fn send_frame(&mut self, frame: &ffmpeg::Frame) {
@ -296,16 +287,13 @@ impl H264Encoder {
} }
Self::NvencSWFrame { encoder } => { Self::NvencSWFrame { encoder } => {
// Realistically this should be the same right?
encoder.send_frame(frame).unwrap(); encoder.send_frame(frame).unwrap();
//todo!("Requires support.");
} }
Self::NvencHWFrame { Self::NvencHWFrame {
encoder, encoder,
hw_context, hw_context: _,
} => { } => {
//todo!("Implement send_frame() for NvencHWFrame");
encoder.send_frame(frame).unwrap(); encoder.send_frame(frame).unwrap();
} }
} }
@ -325,9 +313,9 @@ impl H264Encoder {
Self::NvencHWFrame { Self::NvencHWFrame {
encoder, encoder,
hw_context, hw_context: _,
} => { } => {
todo!("Implement send_eof() for NvencHWFrame"); encoder.send_eof().unwrap();
} }
} }
} }
@ -336,11 +324,9 @@ impl H264Encoder {
return match self { return match self {
Self::Software { encoder } => encoder.receive_packet(packet), Self::Software { encoder } => encoder.receive_packet(packet),
Self::NvencSWFrame { encoder } => encoder.receive_packet(packet), Self::NvencSWFrame { encoder } => encoder.receive_packet(packet),
// this might work?
Self::NvencHWFrame { Self::NvencHWFrame {
encoder, encoder,
hw_context, hw_context: _,
} => encoder.receive_packet(packet), } => encoder.receive_packet(packet),
}; };
} }