ffv1enc_vulkan: switch to receive_packet

This allows the encoder to fully saturate all queues the GPU
has, giving a good 10% in certain cases and resolutions.

This also improves error resilience if an allocation fails,
and properly cleans up after itself if it does.
This commit is contained in:
Lynne 2024-11-20 08:10:22 +01:00
parent 4fefc6e80c
commit d8f301cdf2
Signed by: Lynne
GPG key ID: A2FEA5F03F034464
2 changed files with 325 additions and 115 deletions

View file

@ -19,6 +19,7 @@
*/ */
#include "libavutil/crc.h" #include "libavutil/crc.h"
#include "libavutil/mem.h"
#include "libavutil/vulkan.h" #include "libavutil/vulkan.h"
#include "libavutil/vulkan_spirv.h" #include "libavutil/vulkan_spirv.h"
@ -36,13 +37,38 @@
#define LG_ALIGN_W 32 #define LG_ALIGN_W 32
#define LG_ALIGN_H 32 #define LG_ALIGN_H 32
typedef struct VulkanEncodeFFv1FrameData {
/* Output data */
AVBufferRef *out_data_ref;
/* Results data */
AVBufferRef *results_data_ref;
/* Copied from the source */
int64_t pts;
int64_t duration;
void *frame_opaque;
AVBufferRef *frame_opaque_ref;
int key_frame;
} VulkanEncodeFFv1FrameData;
typedef struct VulkanEncodeFFv1Context { typedef struct VulkanEncodeFFv1Context {
FFV1Context ctx; FFV1Context ctx;
AVFrame *frame;
FFVulkanContext s; FFVulkanContext s;
FFVkQueueFamilyCtx qf; FFVkQueueFamilyCtx qf;
FFVkExecPool exec_pool; FFVkExecPool exec_pool;
FFVkQueueFamilyCtx transfer_qf;
FFVkExecPool transfer_exec_pool;
VkBufferCopy *buf_regions;
VulkanEncodeFFv1FrameData *exec_ctx_info;
int in_flight;
int async_depth;
FFVulkanShader setup; FFVulkanShader setup;
FFVulkanShader reset; FFVulkanShader reset;
FFVulkanShader rct; FFVulkanShader rct;
@ -59,6 +85,7 @@ typedef struct VulkanEncodeFFv1Context {
/* Output data buffer */ /* Output data buffer */
AVBufferPool *out_data_pool; AVBufferPool *out_data_pool;
AVBufferPool *pkt_data_pool;
/* Temporary data buffer */ /* Temporary data buffer */
AVBufferPool *tmp_data_pool; AVBufferPool *tmp_data_pool;
@ -271,15 +298,16 @@ fail:
return err; return err;
} }
static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt, static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
const AVFrame *pict, int *got_packet) FFVkExecContext *exec,
const AVFrame *pict)
{ {
int err; int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data; VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFV1Context *f = &fv->ctx; FFV1Context *f = &fv->ctx;
FFVulkanFunctions *vk = &fv->s.vkfn; FFVulkanFunctions *vk = &fv->s.vkfn;
FFVkExecContext *exec;
VulkanEncodeFFv1FrameData *fd = exec->opaque;
FFv1VkParameters pd; FFv1VkParameters pd;
AVFrame *intermediate_frame = NULL; AVFrame *intermediate_frame = NULL;
@ -298,14 +326,10 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
/* Output data */ /* Output data */
size_t maxsize; size_t maxsize;
AVBufferRef *out_data_ref;
FFVkBuffer *out_data_buf; FFVkBuffer *out_data_buf;
uint8_t *buf_p;
/* Results data */ /* Results data */
AVBufferRef *results_data_ref;
FFVkBuffer *results_data_buf; FFVkBuffer *results_data_buf;
uint64_t *sc;
int has_inter = avctx->gop_size > 1; int has_inter = avctx->gop_size > 1;
uint32_t context_count = f->context_count[f->context_model]; uint32_t context_count = f->context_count[f->context_model];
@ -316,44 +340,36 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
AVFrame *enc_in = (AVFrame *)pict; AVFrame *enc_in = (AVFrame *)pict;
VkImageView *enc_in_views = in_views; VkImageView *enc_in_views = in_views;
VkMappedMemoryRange invalidate_data[2];
int nb_invalidate_data = 0;
VkImageMemoryBarrier2 img_bar[37]; VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0; int nb_img_bar = 0;
VkBufferMemoryBarrier2 buf_bar[8]; VkBufferMemoryBarrier2 buf_bar[8];
int nb_buf_bar = 0; int nb_buf_bar = 0;
if (!pict) /* Start recording */
return 0;
exec = ff_vk_exec_get(&fv->s, &fv->exec_pool);
ff_vk_exec_start(&fv->s, exec); ff_vk_exec_start(&fv->s, exec);
/* Frame state */ /* Frame state */
f->cur_enc_frame = pict; f->cur_enc_frame = pict;
if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) { if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
av_buffer_unref(&fv->keyframe_slice_data_ref); av_buffer_unref(&fv->keyframe_slice_data_ref);
f->key_frame = 1; f->key_frame = fd->key_frame = 1;
f->gob_count++; f->gob_count++;
} else { } else {
f->key_frame = 0; f->key_frame = fd->key_frame = 0;
} }
f->max_slice_count = f->num_h_slices * f->num_v_slices;
f->slice_count = f->max_slice_count; f->slice_count = f->max_slice_count;
/* Allocate temporary data buffer */ /* Allocate temporary data buffer */
tmp_data_size = f->slice_count*CONTEXT_SIZE; tmp_data_size = f->slice_count*CONTEXT_SIZE;
err = ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool, RET(ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
&tmp_data_ref, &tmp_data_ref,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
NULL, tmp_data_size, NULL, tmp_data_size,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
if (err < 0)
return err;
tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data; tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data;
ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
/* Allocate slice buffer data */ /* Allocate slice buffer data */
if (f->ac == AC_GOLOMB_RICE) if (f->ac == AC_GOLOMB_RICE)
@ -368,35 +384,33 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
slice_state_size += slice_data_size; slice_state_size += slice_data_size;
slice_state_size = FFALIGN(slice_state_size, 8); slice_state_size = FFALIGN(slice_state_size, 8);
/* Allocate slice data buffer */
slice_data_ref = fv->keyframe_slice_data_ref; slice_data_ref = fv->keyframe_slice_data_ref;
if (!slice_data_ref) { if (!slice_data_ref) {
/* Allocate slice data buffer */ RET(ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool,
err = ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool, &slice_data_ref,
&slice_data_ref, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, NULL, slice_state_size*f->slice_count,
NULL, slice_state_size*f->slice_count, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (err < 0)
return err;
/* Only save it if we're going to use it again */ /* Only save it if we're going to use it again */
if (has_inter) if (has_inter)
fv->keyframe_slice_data_ref = slice_data_ref; fv->keyframe_slice_data_ref = slice_data_ref;
} }
slice_data_buf = (FFVkBuffer *)slice_data_ref->data; slice_data_buf = (FFVkBuffer *)slice_data_ref->data;
ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
/* Allocate results buffer */ /* Allocate results buffer */
err = ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool, RET(ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool,
&results_data_ref, &fd->results_data_ref,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
NULL, 2*f->slice_count*sizeof(uint64_t), NULL, 2*f->slice_count*sizeof(uint64_t),
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
if (err < 0) results_data_buf = (FFVkBuffer *)fd->results_data_ref->data;
return err; ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->results_data_ref, 1, 1);
results_data_buf = (FFVkBuffer *)results_data_ref->data;
/* Output buffer size */ /* Output buffer size */
maxsize = avctx->width*avctx->height*(1 + f->transparency); maxsize = avctx->width*avctx->height*(1 + f->transparency);
@ -414,26 +428,17 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
maxsize += FF_INPUT_BUFFER_MIN_SIZE; maxsize += FF_INPUT_BUFFER_MIN_SIZE;
/* Allocate output buffer */ /* Allocate output buffer */
err = ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool, RET(ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool,
&out_data_ref, &fd->out_data_ref,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
NULL, maxsize, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | NULL, maxsize,
VK_MEMORY_PROPERTY_HOST_CACHED_BIT); VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
if (err < 0) out_data_buf = (FFVkBuffer *)fd->out_data_ref->data;
return err; ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1);
out_data_buf = (FFVkBuffer *)out_data_ref->data; /* Prepare input frame */
pkt->data = out_data_buf->mapped_mem;
pkt->size = out_data_buf->size;
pkt->buf = out_data_ref;
/* Add dependencies */
ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
ff_vk_exec_add_dep_buf(&fv->s, exec, &results_data_ref, 1, 0);
ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
ff_vk_exec_add_dep_buf(&fv->s, exec, &out_data_ref, 1, 1);
RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in, RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
@ -645,6 +650,76 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
0, sizeof(pd), &pd); 0, sizeof(pd), &pd);
vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1); vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
/* Submit */
err = ff_vk_exec_submit(&fv->s, exec);
if (err < 0)
return err;
f->picture_number++;
/* This, if needed, was referenced by the execution context
* as it was declared as a dependency. */
av_frame_free(&intermediate_frame);
return 0;
fail:
av_frame_free(&intermediate_frame);
ff_vk_exec_discard_deps(&fv->s, exec);
return err;
}
static int download_slices(AVCodecContext *avctx,
VkBufferCopy *buf_regions, int nb_regions,
VulkanEncodeFFv1FrameData *fd,
AVBufferRef *pkt_data_ref)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFVulkanFunctions *vk = &fv->s.vkfn;
FFVkExecContext *exec;
FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data;
FFVkBuffer *pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data;
VkBufferMemoryBarrier2 buf_bar[8];
int nb_buf_bar = 0;
/* Transfer the slices */
exec = ff_vk_exec_get(&fv->s, &fv->transfer_exec_pool);
ff_vk_exec_start(&fv->s, exec);
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 0);
fd->out_data_ref = NULL; /* Ownership passed */
ff_vk_exec_add_dep_buf(&fv->s, exec, &pkt_data_ref, 1, 1);
/* Ensure the output buffer is finished */
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
.srcStageMask = out_data_buf->stage,
.dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
.srcAccessMask = out_data_buf->access,
.dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = out_data_buf->buf,
.size = VK_WHOLE_SIZE,
.offset = 0,
};
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
out_data_buf->stage = buf_bar[0].dstStageMask;
out_data_buf->access = buf_bar[0].dstAccessMask;
nb_buf_bar = 0;
vk->CmdCopyBuffer(exec->buf,
out_data_buf->buf, pkt_data_buf->buf,
nb_regions, buf_regions);
/* Submit */ /* Submit */
err = ff_vk_exec_submit(&fv->s, exec); err = ff_vk_exec_submit(&fv->s, exec);
if (err < 0) if (err < 0)
@ -652,68 +727,152 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
/* We need the encoded data immediately */ /* We need the encoded data immediately */
ff_vk_exec_wait(&fv->s, exec); ff_vk_exec_wait(&fv->s, exec);
av_frame_free(&intermediate_frame);
/* Invalidate slice/output data if needed */ /* Invalidate slice/output data if needed */
if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) if (!(pkt_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) { VkMappedMemoryRange invalidate_data = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
.memory = pkt_data_buf->mem,
.offset = 0,
.size = VK_WHOLE_SIZE,
};
vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
1, &invalidate_data);
}
return 0;
}
static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec,
AVPacket *pkt)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFV1Context *f = &fv->ctx;
FFVulkanFunctions *vk = &fv->s.vkfn;
/* Packet data */
AVBufferRef *pkt_data_ref;
FFVkBuffer *pkt_data_buf;
VulkanEncodeFFv1FrameData *fd = exec->opaque;
FFVkBuffer *results_data_buf = (FFVkBuffer *)fd->results_data_ref->data;
uint64_t *sc;
/* Make sure encoding's done */
ff_vk_exec_wait(&fv->s, exec);
/* Invalidate slice/output data if needed */
if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
VkMappedMemoryRange invalidate_data = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
.memory = results_data_buf->mem, .memory = results_data_buf->mem,
.offset = 0, .offset = 0,
.size = VK_WHOLE_SIZE, .size = VK_WHOLE_SIZE,
}; };
if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
.memory = out_data_buf->mem,
.offset = 0,
.size = VK_WHOLE_SIZE,
};
if (nb_invalidate_data)
vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev, vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
nb_invalidate_data, invalidate_data); 1, &invalidate_data);
/* First slice is in-place */
buf_p = pkt->data;
sc = &((uint64_t *)results_data_buf->mapped_mem)[0];
av_log(avctx, AV_LOG_DEBUG, "Slice size = %"PRIu64" (max %i), src offset = %"PRIu64"\n",
sc[0], pkt->size / f->slice_count, sc[1]);
av_assert0(sc[0] < pd.slice_size_max);
av_assert0(sc[0] < (1 << 24));
buf_p += sc[0];
/* We have to copy the rest */
for (int i = 1; i < f->slice_count; i++) {
uint64_t bytes;
uint8_t *bs_start;
sc = &((uint64_t *)results_data_buf->mapped_mem)[i*2];
bytes = sc[0];
bs_start = pkt->data + sc[1];
av_log(avctx, AV_LOG_DEBUG, "Slice %i size = %"PRIu64" (max %"PRIu64"), "
"src offset = %"PRIu64"\n",
i, bytes, pd.slice_size_max, sc[1]);
av_assert0(bytes < pd.slice_size_max);
av_assert0(bytes < (1 << 24));
memmove(buf_p, bs_start, bytes);
buf_p += bytes;
} }
f->picture_number++; /* Calculate final size */
pkt->size = buf_p - pkt->data; pkt->size = 0;
pkt->flags |= AV_PKT_FLAG_KEY * f->key_frame; for (int i = 0; i < f->slice_count; i++) {
*got_packet = 1; sc = &((uint64_t *)results_data_buf->mapped_mem)[i*2];
av_log(avctx, AV_LOG_DEBUG, "Slice %i size = %"PRIu64", "
"src offset = %"PRIu64"\n",
i, sc[0], sc[1]);
av_log(avctx, AV_LOG_VERBOSE, "Total data = %i\n", fv->buf_regions[i] = (VkBufferCopy) {
pkt->size); .srcOffset = sc[1],
.dstOffset = pkt->size,
.size = sc[0],
};
pkt->size += sc[0];
}
av_log(avctx, AV_LOG_VERBOSE, "Encoded data: %iMiB\n", pkt->size / (1024*1024));
av_buffer_unref(&fd->results_data_ref); /* No need for this buffer anymore */
fail: /* Allocate packet buffer */
/* Frames added as a dep are always referenced, so we only need to err = ff_vk_get_pooled_buffer(&fv->s, &fv->pkt_data_pool,
* clean this up. */ &pkt_data_ref,
av_frame_free(&intermediate_frame); VK_BUFFER_USAGE_TRANSFER_DST_BIT,
NULL, pkt->size,
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (err < 0)
return err;
pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data;
/* Setup packet data */
pkt->data = pkt_data_buf->mapped_mem;
pkt->buf = pkt_data_ref;
pkt->pts = fd->pts;
pkt->dts = fd->pts;
pkt->duration = fd->duration;
pkt->flags |= AV_PKT_FLAG_KEY * fd->key_frame;
if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
pkt->opaque = fd->frame_opaque;
pkt->opaque_ref = fd->frame_opaque_ref;
fd->frame_opaque_ref = NULL;
}
return download_slices(avctx, fv->buf_regions, f->slice_count, fd,
pkt_data_ref);
}
static int vulkan_encode_ffv1_receive_packet(AVCodecContext *avctx,
AVPacket *pkt)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
VulkanEncodeFFv1FrameData *fd;
FFVkExecContext *exec;
AVFrame *frame;
while (1) {
/* Roll an execution context */
exec = ff_vk_exec_get(&fv->s, &fv->exec_pool);
/* If it had a frame, immediately output it */
if (exec->had_submission) {
exec->had_submission = 0;
fv->in_flight--;
return get_packet(avctx, exec, pkt);
}
/* Get next frame to encode */
frame = fv->frame;
err = ff_encode_get_frame(avctx, frame);
if (err < 0 && err != AVERROR_EOF) {
return err;
} else if (err == AVERROR_EOF) {
if (!fv->in_flight)
return err;
continue;
}
/* Encode frame */
fd = exec->opaque;
fd->pts = frame->pts;
fd->duration = frame->duration;
if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
fd->frame_opaque = frame->opaque;
fd->frame_opaque_ref = frame->opaque_ref;
frame->opaque_ref = NULL;
}
err = vulkan_encode_ffv1_submit_frame(avctx, exec, frame);
av_frame_unref(frame);
if (err < 0)
return err;
fv->in_flight++;
if (fv->in_flight < fv->async_depth)
return AVERROR(EAGAIN);
}
return 0; return 0;
} }
@ -1441,8 +1600,23 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
return err; return err;
} }
if (!fv->async_depth)
fv->async_depth = fv->qf.nb_queues;
err = ff_vk_exec_pool_init(&fv->s, &fv->qf, &fv->exec_pool, err = ff_vk_exec_pool_init(&fv->s, &fv->qf, &fv->exec_pool,
1, /* Single-threaded for now */ FFMIN(fv->qf.nb_queues, fv->async_depth),
0, 0, 0, NULL);
if (err < 0)
return err;
err = ff_vk_qf_init(&fv->s, &fv->transfer_qf, VK_QUEUE_TRANSFER_BIT);
if (err < 0) {
av_log(avctx, AV_LOG_ERROR, "Device has no transfer queues!\n");
return err;
}
err = ff_vk_exec_pool_init(&fv->s, &fv->transfer_qf, &fv->transfer_exec_pool,
1,
0, 0, 0, NULL); 0, 0, 0, NULL);
if (err < 0) if (err < 0)
return err; return err;
@ -1510,6 +1684,24 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
if (err < 0) if (err < 0)
return err; return err;
/* Temporary frame */
fv->frame = av_frame_alloc();
if (!fv->frame)
return AVERROR(ENOMEM);
/* Async data pool */
fv->async_depth = fv->exec_pool.pool_size;
fv->exec_ctx_info = av_calloc(fv->async_depth, sizeof(*fv->exec_ctx_info));
if (!fv->exec_ctx_info)
return AVERROR(ENOMEM);
for (int i = 0; i < fv->async_depth; i++)
fv->exec_pool.contexts[i].opaque = &fv->exec_ctx_info[i];
f->max_slice_count = f->num_h_slices * f->num_v_slices;
fv->buf_regions = av_malloc_array(f->max_slice_count, sizeof(*fv->buf_regions));
if (!fv->buf_regions)
return AVERROR(ENOMEM);
return 0; return 0;
} }
@ -1518,17 +1710,29 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
VulkanEncodeFFv1Context *fv = avctx->priv_data; VulkanEncodeFFv1Context *fv = avctx->priv_data;
ff_vk_exec_pool_free(&fv->s, &fv->exec_pool); ff_vk_exec_pool_free(&fv->s, &fv->exec_pool);
ff_vk_exec_pool_free(&fv->s, &fv->transfer_exec_pool);
ff_vk_shader_free(&fv->s, &fv->enc); ff_vk_shader_free(&fv->s, &fv->enc);
ff_vk_shader_free(&fv->s, &fv->rct); ff_vk_shader_free(&fv->s, &fv->rct);
ff_vk_shader_free(&fv->s, &fv->reset); ff_vk_shader_free(&fv->s, &fv->reset);
ff_vk_shader_free(&fv->s, &fv->setup); ff_vk_shader_free(&fv->s, &fv->setup);
if (fv->exec_ctx_info) {
for (int i = 0; i < fv->async_depth; i++) {
VulkanEncodeFFv1FrameData *fd = &fv->exec_ctx_info[i];
av_buffer_unref(&fd->out_data_ref);
av_buffer_unref(&fd->results_data_ref);
av_buffer_unref(&fd->frame_opaque_ref);
}
}
av_free(fv->exec_ctx_info);
av_buffer_unref(&fv->intermediate_frames_ref); av_buffer_unref(&fv->intermediate_frames_ref);
av_buffer_pool_uninit(&fv->results_data_pool); av_buffer_pool_uninit(&fv->results_data_pool);
av_buffer_pool_uninit(&fv->out_data_pool); av_buffer_pool_uninit(&fv->out_data_pool);
av_buffer_pool_uninit(&fv->pkt_data_pool);
av_buffer_pool_uninit(&fv->tmp_data_pool); av_buffer_pool_uninit(&fv->tmp_data_pool);
av_buffer_unref(&fv->keyframe_slice_data_ref); av_buffer_unref(&fv->keyframe_slice_data_ref);
@ -1538,6 +1742,8 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
ff_vk_free_buf(&fv->s, &fv->rangecoder_static_buf); ff_vk_free_buf(&fv->s, &fv->rangecoder_static_buf);
ff_vk_free_buf(&fv->s, &fv->crc_tab_buf); ff_vk_free_buf(&fv->s, &fv->crc_tab_buf);
av_free(fv->buf_regions);
av_frame_free(&fv->frame);
ff_vk_uninit(&fv->s); ff_vk_uninit(&fv->s);
return 0; return 0;
@ -1567,6 +1773,9 @@ static const AVOption vulkan_encode_ffv1_options[] = {
{ "force_pcm", "Code all slices with no prediction", OFFSET(force_pcm), AV_OPT_TYPE_BOOL, { "force_pcm", "Code all slices with no prediction", OFFSET(force_pcm), AV_OPT_TYPE_BOOL,
{ .i64 = 0 }, 0, 1, VE }, { .i64 = 0 }, 0, 1, VE },
{ "async_depth", "Internal parallelization depth", OFFSET(async_depth), AV_OPT_TYPE_INT,
{ .i64 = 0 }, 0, INT_MAX, VE },
{ NULL } { NULL }
}; };
@ -1594,7 +1803,7 @@ const FFCodec ff_ffv1_vulkan_encoder = {
.p.id = AV_CODEC_ID_FFV1, .p.id = AV_CODEC_ID_FFV1,
.priv_data_size = sizeof(VulkanEncodeFFv1Context), .priv_data_size = sizeof(VulkanEncodeFFv1Context),
.init = &vulkan_encode_ffv1_init, .init = &vulkan_encode_ffv1_init,
FF_CODEC_ENCODE_CB(vulkan_encode_ffv1_frame), FF_CODEC_RECEIVE_PACKET_CB(&vulkan_encode_ffv1_receive_packet),
.close = &vulkan_encode_ffv1_close, .close = &vulkan_encode_ffv1_close,
.p.priv_class = &vulkan_encode_ffv1_class, .p.priv_class = &vulkan_encode_ffv1_class,
.p.capabilities = AV_CODEC_CAP_DELAY | .p.capabilities = AV_CODEC_CAP_DELAY |

View file

@ -146,6 +146,7 @@ typedef uint64_t FFVulkanExtensions;
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBufferToImage) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBufferToImage) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyImageToBuffer) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyImageToBuffer) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBuffer) \
\ \
/* Buffer */ \ /* Buffer */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \