some warning cleanup
This commit is contained in:
parent
2ecf9af167
commit
3c2d4cee6f
2 changed files with 54 additions and 49 deletions
|
@ -1,8 +1,11 @@
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use cudarc::{driver::{
|
use cudarc::{
|
||||||
sys::{CUdeviceptr, CUmemorytype},
|
driver::{
|
||||||
CudaDevice, CudaSlice, DevicePtr, LaunchAsync,
|
sys::{CUdeviceptr, CUmemorytype},
|
||||||
}, nvrtc::CompileOptions};
|
CudaDevice, CudaSlice, DevicePtr, LaunchAsync,
|
||||||
|
},
|
||||||
|
nvrtc::CompileOptions,
|
||||||
|
};
|
||||||
use letsplay_gpu::egl_helpers::DeviceContext;
|
use letsplay_gpu::egl_helpers::DeviceContext;
|
||||||
use std::{
|
use std::{
|
||||||
sync::{Arc, Mutex},
|
sync::{Arc, Mutex},
|
||||||
|
@ -281,7 +284,10 @@ fn encoder_thread_hwframe_main(
|
||||||
cuda_device.load_ptx(ptx, "module", &["flip_opengl"])?;
|
cuda_device.load_ptx(ptx, "module", &["flip_opengl"])?;
|
||||||
|
|
||||||
let mut memcpy = cudarc::driver::sys::CUDA_MEMCPY2D_st::default();
|
let mut memcpy = cudarc::driver::sys::CUDA_MEMCPY2D_st::default();
|
||||||
// setup basic src stuff
|
|
||||||
|
// setup the things that won't change about the cuda memcpy
|
||||||
|
|
||||||
|
// src
|
||||||
memcpy.srcXInBytes = 0;
|
memcpy.srcXInBytes = 0;
|
||||||
memcpy.srcY = 0;
|
memcpy.srcY = 0;
|
||||||
memcpy.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
|
memcpy.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
|
||||||
|
@ -291,12 +297,14 @@ fn encoder_thread_hwframe_main(
|
||||||
memcpy.dstY = 0;
|
memcpy.dstY = 0;
|
||||||
memcpy.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
|
memcpy.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
|
||||||
|
|
||||||
// Temporary buffer used for opengl flip on the GPU
|
// Temporary buffer used for opengl flip on the GPU. We copy to this buffer,
|
||||||
|
// then copy the flipped version (using the launched support kernel) to the CUDA device memory ffmpeg
|
||||||
|
// allocated.
|
||||||
let mut temp_buffer: CudaSlice<u32> = cuda_device.alloc_zeros::<u32>(48).expect("over");
|
let mut temp_buffer: CudaSlice<u32> = cuda_device.alloc_zeros::<u32>(48).expect("over");
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
match rx.try_recv() {
|
match rx.blocking_recv() {
|
||||||
Ok(msg) => match msg {
|
Some(msg) => match msg {
|
||||||
EncodeThreadInput::Init { size } => {
|
EncodeThreadInput::Init { size } => {
|
||||||
frame_number = 0;
|
frame_number = 0;
|
||||||
|
|
||||||
|
@ -318,9 +326,13 @@ fn encoder_thread_hwframe_main(
|
||||||
}
|
}
|
||||||
|
|
||||||
EncodeThreadInput::SendFrame => {
|
EncodeThreadInput::SendFrame => {
|
||||||
// copy gl frame *ON THE GPU*
|
// benchmarking
|
||||||
|
//use std::time::Instant;
|
||||||
|
//let start = Instant::now();
|
||||||
|
|
||||||
|
// copy gl frame *ON THE GPU* to ffmpeg frame
|
||||||
{
|
{
|
||||||
let mut gl_ctx = gl_context.lock().expect("you dumb fuck");
|
let gl_ctx = gl_context.lock().expect("you dumb fuck");
|
||||||
let mut gl_resource =
|
let mut gl_resource =
|
||||||
cuda_resource.lock().expect("couldnt lock GL resource!");
|
cuda_resource.lock().expect("couldnt lock GL resource!");
|
||||||
|
|
||||||
|
@ -351,11 +363,16 @@ fn encoder_thread_hwframe_main(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// copy to the temporary buffer and synchronize
|
||||||
unsafe {
|
unsafe {
|
||||||
cudarc::driver::sys::lib()
|
cudarc::driver::sys::lib()
|
||||||
.cuMemcpy2DAsync_v2(&memcpy, std::ptr::null_mut())
|
.cuMemcpy2DAsync_v2(&memcpy, std::ptr::null_mut())
|
||||||
.result()
|
.result()
|
||||||
.expect("cuMemcpy2D fail epic");
|
.expect("cuMemcpy2D fail epic");
|
||||||
|
|
||||||
|
cudarc::driver::sys::lib()
|
||||||
|
.cuStreamSynchronize(std::ptr::null_mut())
|
||||||
|
.result()?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// launch kernel to flip the opengl framebuffer right-side up
|
// launch kernel to flip the opengl framebuffer right-side up
|
||||||
|
@ -369,9 +386,9 @@ fn encoder_thread_hwframe_main(
|
||||||
shared_mem_bytes: 0,
|
shared_mem_bytes: 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
let flip_opengl = cuda_device
|
let flip_opengl = cuda_device.get_func("module", "flip_opengl").expect(
|
||||||
.get_func("module", "flip_opengl")
|
"for some reason we couldn't get the support kenrel function",
|
||||||
.expect("dumb fucker");
|
);
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
let frame_ptr = frame.as_mut_ptr();
|
let frame_ptr = frame.as_mut_ptr();
|
||||||
|
@ -381,23 +398,24 @@ fn encoder_thread_hwframe_main(
|
||||||
(width * height) as usize * 4usize,
|
(width * height) as usize * 4usize,
|
||||||
);
|
);
|
||||||
|
|
||||||
flip_opengl
|
flip_opengl.launch(
|
||||||
.launch(
|
launch_config,
|
||||||
launch_config,
|
(&mut temp_buffer, &mut slice, width, height),
|
||||||
(&mut temp_buffer, &mut slice, width, height),
|
)?;
|
||||||
)
|
|
||||||
.expect("I hate you");
|
|
||||||
|
|
||||||
|
// leak so it doesn't free the memory
|
||||||
|
// (the device pointer we convert into a slice is owned by ffmpeg, so we shouldn't be the ones
|
||||||
|
// trying to free it!)
|
||||||
|
let _ = slice.leak();
|
||||||
|
|
||||||
|
// Synchronize for the final time
|
||||||
cudarc::driver::sys::lib()
|
cudarc::driver::sys::lib()
|
||||||
.cuStreamSynchronize(std::ptr::null_mut())
|
.cuStreamSynchronize(std::ptr::null_mut())
|
||||||
.result()
|
.result()?;
|
||||||
.expect("you banned");
|
|
||||||
|
|
||||||
// leak so it doesnt free the memory like a dumbass
|
|
||||||
let _ = slice.leak();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: ideally this would work on-drop but it doesn't.
|
||||||
mapped.unmap().expect("fuck you asshole");
|
mapped.unmap().expect("fuck you asshole");
|
||||||
gl_ctx.release();
|
gl_ctx.release();
|
||||||
}
|
}
|
||||||
|
@ -416,16 +434,17 @@ fn encoder_thread_hwframe_main(
|
||||||
if force_keyframe {
|
if force_keyframe {
|
||||||
force_keyframe = false;
|
force_keyframe = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//tracing::info!("encoding frame {frame_number} took {:2?}", start.elapsed());
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
Err(TryRecvError::Disconnected) => break,
|
None => break,
|
||||||
Err(TryRecvError::Empty) => {
|
|
||||||
std::thread::sleep(Duration::from_millis(1));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//std::thread::sleep(Duration::from_millis(1));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ use anyhow::Context;
|
||||||
use cudarc::driver::CudaDevice;
|
use cudarc::driver::CudaDevice;
|
||||||
use ffmpeg::error::EAGAIN;
|
use ffmpeg::error::EAGAIN;
|
||||||
|
|
||||||
use ffmpeg::{codec as lavc, packet}; // lavc
|
use ffmpeg::codec as lavc; // lavc
|
||||||
|
|
||||||
use crate::types::Size;
|
use crate::types::Size;
|
||||||
|
|
||||||
|
@ -129,7 +129,7 @@ impl H264Encoder {
|
||||||
max_framerate: u32,
|
max_framerate: u32,
|
||||||
bitrate: usize,
|
bitrate: usize,
|
||||||
) -> anyhow::Result<Self> {
|
) -> anyhow::Result<Self> {
|
||||||
let (mut encoder, mut video_encoder_context) =
|
let (encoder, mut video_encoder_context) =
|
||||||
create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate)
|
create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate)
|
||||||
.with_context(|| "while trying to create encoder")?;
|
.with_context(|| "while trying to create encoder")?;
|
||||||
|
|
||||||
|
@ -186,7 +186,7 @@ impl H264Encoder {
|
||||||
.build()
|
.build()
|
||||||
.with_context(|| "while trying to create CUDA frame context")?;
|
.with_context(|| "while trying to create CUDA frame context")?;
|
||||||
|
|
||||||
let (mut encoder, mut video_encoder_context) =
|
let (encoder, mut video_encoder_context) =
|
||||||
create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate)
|
create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate)
|
||||||
.with_context(|| "while trying to create encoder")?;
|
.with_context(|| "while trying to create encoder")?;
|
||||||
|
|
||||||
|
@ -269,8 +269,6 @@ impl H264Encoder {
|
||||||
(*frame.as_mut_ptr()).height = encoder.height() as i32;
|
(*frame.as_mut_ptr()).height = encoder.height() as i32;
|
||||||
(*frame.as_mut_ptr()).hw_frames_ctx = hw_context.as_raw_mut();
|
(*frame.as_mut_ptr()).hw_frames_ctx = hw_context.as_raw_mut();
|
||||||
|
|
||||||
//ffmpeg::sys::av_frame_get_buffer(frame.as_mut_ptr(), 32);
|
|
||||||
|
|
||||||
hw_context.get_buffer(&mut frame)?;
|
hw_context.get_buffer(&mut frame)?;
|
||||||
hw_context.get_buffer(&mut frame)?;
|
hw_context.get_buffer(&mut frame)?;
|
||||||
|
|
||||||
|
@ -280,13 +278,6 @@ impl H264Encoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
todo!("FIXME");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn send_frame(&mut self, frame: &ffmpeg::Frame) {
|
pub fn send_frame(&mut self, frame: &ffmpeg::Frame) {
|
||||||
|
@ -296,16 +287,13 @@ impl H264Encoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
Self::NvencSWFrame { encoder } => {
|
Self::NvencSWFrame { encoder } => {
|
||||||
// Realistically this should be the same right?
|
|
||||||
encoder.send_frame(frame).unwrap();
|
encoder.send_frame(frame).unwrap();
|
||||||
//todo!("Requires support.");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Self::NvencHWFrame {
|
Self::NvencHWFrame {
|
||||||
encoder,
|
encoder,
|
||||||
hw_context,
|
hw_context: _,
|
||||||
} => {
|
} => {
|
||||||
//todo!("Implement send_frame() for NvencHWFrame");
|
|
||||||
encoder.send_frame(frame).unwrap();
|
encoder.send_frame(frame).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -325,9 +313,9 @@ impl H264Encoder {
|
||||||
|
|
||||||
Self::NvencHWFrame {
|
Self::NvencHWFrame {
|
||||||
encoder,
|
encoder,
|
||||||
hw_context,
|
hw_context: _,
|
||||||
} => {
|
} => {
|
||||||
todo!("Implement send_eof() for NvencHWFrame");
|
encoder.send_eof().unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -336,11 +324,9 @@ impl H264Encoder {
|
||||||
return match self {
|
return match self {
|
||||||
Self::Software { encoder } => encoder.receive_packet(packet),
|
Self::Software { encoder } => encoder.receive_packet(packet),
|
||||||
Self::NvencSWFrame { encoder } => encoder.receive_packet(packet),
|
Self::NvencSWFrame { encoder } => encoder.receive_packet(packet),
|
||||||
|
|
||||||
// this might work?
|
|
||||||
Self::NvencHWFrame {
|
Self::NvencHWFrame {
|
||||||
encoder,
|
encoder,
|
||||||
hw_context,
|
hw_context: _,
|
||||||
} => encoder.receive_packet(packet),
|
} => encoder.receive_packet(packet),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue