Last active
March 6, 2024 22:56
-
-
Save CrendKing/0be37d6b61b5346b921c4db168f6bcd6 to your computer and use it in GitHub Desktop.
ffmpeg extract video thumbnails
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern "C" { | |
#include <libavformat/avformat.h> | |
#include <libavutil/bprint.h> | |
#include <libavutil/opt.h> | |
#include <libavutil/imgutils.h> | |
#include <libswscale/swscale.h> | |
} | |
#pragma comment(lib, "avcodec.lib") | |
#pragma comment(lib, "avformat.lib") | |
#pragma comment(lib, "avutil.lib") | |
#pragma comment(lib, "swscale.lib") | |
#define USE_FFMPEG_HW_ACCEL | |
#ifdef USE_FFMPEG_HW_ACCEL | |
static enum AVHWDeviceType HW_DEVICE_TYPE = AV_HWDEVICE_TYPE_CUDA; | |
#endif | |
// input | |
static AVFormatContext *input_format_ctx; | |
static AVStream *in_stream; | |
#ifdef USE_FFMPEG_HW_ACCEL | |
static AVBufferRef *hw_device_ctx; | |
static enum AVPixelFormat hw_pix_fmt; | |
#endif | |
static enum AVPixelFormat sw_pix_fmt; | |
static AVCodec *decoder_codec; | |
static AVCodecContext *decoder_ctx; | |
// output | |
static AVCodec *encoder_codec; | |
static AVCodecContext *encoder_ctx; | |
// swscale | |
static SwsContext *sws_ctx = nullptr; | |
// frames and packets | |
static AVFrame *in_hw_frame; | |
static AVFrame *in_sw_frame; | |
static AVFrame *out_frame; | |
static AVPacket *decode_packet; | |
static AVPacket *encode_packet; | |
static uint8_t *out_frame_buf; | |
#define check_true(condition) if (!(condition)) { av_log(nullptr, AV_LOG_PANIC, "ffmpeg false condition at %s:%d\n", __FILE__, __LINE__); terminate(); } | |
#define check_gte_0(code) if ((code) < 0) { av_log(nullptr, AV_LOG_PANIC, "ffmpeg error code at %s:%d\n", __FILE__, __LINE__); terminate(); } | |
static auto ffmpeg_open_input(string_view input) -> void { | |
input_format_ctx = avformat_alloc_context(); | |
if (avformat_open_input(&input_format_ctx, input.data(), nullptr, nullptr) == AVERROR(ENOENT)) { | |
std::cerr << "target video does not exist: " << input << std::endl; | |
exit(EXIT_FAILURE); | |
} | |
check_gte_0(avformat_find_stream_info(input_format_ctx, nullptr)); | |
const int input_video_stream_idx = av_find_best_stream(input_format_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0); | |
check_gte_0(input_video_stream_idx); | |
in_stream = input_format_ctx->streams[input_video_stream_idx]; | |
decoder_codec = avcodec_find_decoder(in_stream->codecpar->codec_id); | |
check_true(decoder_codec); | |
decoder_ctx = avcodec_alloc_context3(decoder_codec); | |
check_true(decoder_ctx); | |
check_gte_0(avcodec_parameters_to_context(decoder_ctx, in_stream->codecpar)); | |
#ifdef USE_FFMPEG_HW_ACCEL | |
// initialize hardware acceleration | |
for (int i = 0;; ++i) { | |
const AVCodecHWConfig *hw_config = avcodec_get_hw_config(decoder_codec, i); | |
check_true(hw_config); | |
if (hw_config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && hw_config->device_type == HW_DEVICE_TYPE) { | |
check_gte_0(av_hwdevice_ctx_create(&hw_device_ctx, HW_DEVICE_TYPE, nullptr, nullptr, 0)); | |
hw_pix_fmt = hw_config->pix_fmt; | |
const AVHWFramesConstraints *hw_constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx, hw_config); | |
check_true(hw_constraints); | |
sw_pix_fmt = hw_constraints->valid_sw_formats[0]; | |
break; | |
} | |
} | |
decoder_ctx->hw_device_ctx = hw_device_ctx; | |
#else | |
sw_pix_fmt = decoder_ctx->pix_fmt; | |
#endif | |
// thread_count = 0 to leave ffmpeg to determine the optimal thread count | |
decoder_ctx->thread_count = 0; | |
check_gte_0(avcodec_open2(decoder_ctx, decoder_codec, nullptr)); | |
} | |
static auto RoundDivision(int dividend, int divisor) -> int { | |
return (dividend + (divisor / 2)) / divisor; | |
} | |
static auto init_encoder() -> void { | |
encoder_codec = avcodec_find_encoder(AV_CODEC_ID_MJPEG); | |
check_true(encoder_codec); | |
encoder_ctx = avcodec_alloc_context3(encoder_codec); | |
check_true(encoder_ctx); | |
encoder_ctx->width = decoder_ctx->width; | |
encoder_ctx->height = decoder_ctx->height; | |
// MJPEG codec must use the "deprecated" YUVJ4xxP pixel formats (which actually has better color range than YUV4xxP) | |
encoder_ctx->pix_fmt = AV_PIX_FMT_YUVJ444P; | |
encoder_ctx->time_base = AVRational { .num = 1, .den = RoundDivision(in_stream->avg_frame_rate.num, in_stream->avg_frame_rate.den) }; | |
// use best quality | |
encoder_ctx->flags = AV_CODEC_FLAG_QSCALE; | |
encoder_ctx->qmin = 1; | |
encoder_ctx->qmax = 1; | |
check_gte_0(avcodec_open2(encoder_ctx, encoder_codec, nullptr)); | |
} | |
static auto init_swscale(enum AVPixelFormat src_pix_fmt) -> void { | |
sws_ctx = sws_getContext(decoder_ctx->width, decoder_ctx->height, src_pix_fmt, | |
encoder_ctx->width, encoder_ctx->height, encoder_ctx->pix_fmt, | |
SWS_LANCZOS, nullptr, nullptr, nullptr); | |
check_true(sws_ctx); | |
} | |
static auto init_frames() -> void { | |
in_hw_frame = av_frame_alloc(); | |
check_true(in_hw_frame); | |
#ifdef USE_FFMPEG_HW_ACCEL | |
in_sw_frame = av_frame_alloc(); | |
check_true(in_sw_frame); | |
#endif | |
out_frame = av_frame_alloc(); | |
check_true(out_frame); | |
decode_packet = av_packet_alloc(); | |
check_true(decode_packet); | |
encode_packet = av_packet_alloc(); | |
check_true(encode_packet); | |
out_frame->format = encoder_ctx->pix_fmt; | |
out_frame->width = encoder_ctx->width; | |
out_frame->height = encoder_ctx->height; | |
// instead of directly sending the decoded frame to the encoder, we do a sws scale to convert the frame's pixel format and color range | |
// prepare a buffer for the output frame to hold the scaled image data | |
const int out_frame_buf_size = av_image_get_buffer_size(encoder_ctx->pix_fmt, encoder_ctx->width, encoder_ctx->height, 1); | |
check_gte_0(out_frame_buf_size); | |
out_frame_buf = reinterpret_cast<uint8_t *>(av_malloc(out_frame_buf_size)); | |
check_true(out_frame_buf); | |
check_true(av_image_fill_arrays(out_frame->data, out_frame->linesize, out_frame_buf, encoder_ctx->pix_fmt, encoder_ctx->width, encoder_ctx->height, 1) == out_frame_buf_size); | |
} | |
static auto ffmpeg_init() -> void { | |
init_encoder(); | |
init_swscale(sw_pix_fmt); | |
init_frames(); | |
} | |
/* | |
* return: tuple of | |
* transcode start time, in in_stream's time base | |
* gap time between each transcode, in in_stream's time base | |
*/ | |
static auto calculate_seek_time() -> std::tuple<int64_t, int64_t, int64_t> { | |
const double video_start_time = static_cast<double>(input_format_ctx->start_time) / AV_TIME_BASE; | |
const double video_duration = static_cast<double>(input_format_ctx->duration) / AV_TIME_BASE; | |
const double transcode_start_time = video_start_time; | |
const double transcode_end_time = video_duration; | |
const int thumbnails_count = 10; | |
const double frame_gap = (transcode_end_time - transcode_start_time) / thumbnails_count; | |
return { | |
static_cast<int64_t>(transcode_start_time * in_stream->time_base.den / in_stream->time_base.num), | |
static_cast<int64_t>(transcode_end_time * in_stream->time_base.den / in_stream->time_base.num), | |
static_cast<int64_t>(frame_gap * in_stream->time_base.den / in_stream->time_base.num), | |
}; | |
} | |
/* | |
* return: seeking was successful at the specified time or not. | |
*/ | |
static auto seek_to_time(int64_t time_in_tb) -> bool { | |
/* | |
without AVSEEK_FLAG_ANY, av_seek_frame() always seeks to a keyframe near the specified time | |
default is to the keyframe AFTER the time | |
with AVSEEK_FLAG_BACKWARD, seek to the keyframe BEFORE the time | |
with AVSEEK_FLAG_ANY, it seeks to the frame precisely at the specified time | |
unfortunately, if such frame is not a keyframe, the frame will not be fully decoded | |
instead, we use AVSEEK_FLAG_BACKWARD to get to the nearest pre-keyframe, then keep reading frame from decoder | |
until we reach the specified time (by comparing the frame's pts) | |
note: AVSEEK_FLAG_FRAME is not available in most decoders | |
*/ | |
const int seek_ret = av_seek_frame(input_format_ctx, in_stream->index, time_in_tb, AVSEEK_FLAG_BACKWARD); | |
if (seek_ret >= 0) { | |
// must flush after seeking to make the new frame available | |
avcodec_flush_buffers(decoder_ctx); | |
} | |
return seek_ret >= 0; | |
} | |
/* | |
* return: able to read more frame from input or not. | |
*/ | |
static auto read_frame_until_time(int64_t time_in_tb) -> bool { | |
while (true) { | |
if (av_read_frame(input_format_ctx, decode_packet) < 0) { | |
return false; | |
} | |
if (decode_packet->stream_index != in_stream->index) { | |
goto cleanup; | |
} | |
if (avcodec_send_packet(decoder_ctx, decode_packet) < 0) { | |
goto cleanup; | |
} | |
if (avcodec_receive_frame(decoder_ctx, in_hw_frame) < 0) { | |
goto cleanup; | |
} | |
#ifdef USE_FFMPEG_HW_ACCEL | |
check_true(in_hw_frame->format == hw_pix_fmt); | |
#endif | |
if (in_hw_frame->best_effort_timestamp >= time_in_tb) { | |
break; | |
} | |
cleanup: | |
av_frame_unref(in_hw_frame); | |
av_packet_unref(decode_packet); | |
} | |
return true; | |
} | |
/* | |
* return: transcoding was successful or not. | |
*/ | |
static auto transcode_frame() -> bool { | |
#ifdef USE_FFMPEG_HW_ACCEL | |
check_gte_0(av_hwframe_transfer_data(in_sw_frame, in_hw_frame, 0)); | |
#else | |
in_sw_frame = in_hw_frame; | |
#endif | |
check_true(sws_scale(sws_ctx, in_sw_frame->data, in_sw_frame->linesize, 0, in_sw_frame->height, out_frame->data, out_frame->linesize) == out_frame->height); | |
av_frame_unref(in_sw_frame); | |
check_gte_0(avcodec_send_frame(encoder_ctx, out_frame)); | |
check_gte_0(avcodec_receive_packet(encoder_ctx, encode_packet)); | |
FILE *out_file; | |
static int out_file_idx = 0; | |
fopen_s(&out_file, std::format(R"(output/{}.jpg)", out_file_idx++).c_str(), "wb"); | |
fwrite(encode_packet->data, 1, encode_packet->size, out_file); | |
fclose(out_file); | |
av_packet_unref(encode_packet); | |
return true; | |
} | |
static auto ffmpeg_transcode() -> void { | |
auto [curr_timestamp_in_tb, transcode_end_in_tb, frame_gap_in_tb] = calculate_seek_time(); | |
int frameCount = 0; | |
bool do_seek = true; | |
while (true) { | |
if (do_seek && !seek_to_time(curr_timestamp_in_tb)) { | |
break; | |
} | |
if (!read_frame_until_time(curr_timestamp_in_tb)) { | |
break; | |
} | |
do_seek = transcode_frame(); | |
av_packet_unref(decode_packet); | |
if (do_seek) { | |
curr_timestamp_in_tb += frame_gap_in_tb; | |
frameCount += 1; | |
if (curr_timestamp_in_tb >= transcode_end_in_tb) { | |
break; | |
} | |
} | |
} | |
// flush the codec contexts with null packet | |
avcodec_send_frame(encoder_ctx, nullptr); | |
avcodec_send_packet(decoder_ctx, nullptr); | |
} | |
static auto ffmpeg_cleanup() -> void { | |
av_packet_free(&encode_packet); | |
av_packet_free(&decode_packet); | |
av_frame_free(&out_frame); | |
av_frame_free(&in_sw_frame); | |
#ifdef USE_FFMPEG_HW_ACCEL | |
av_frame_free(&in_hw_frame); | |
#endif | |
av_free(out_frame_buf); | |
sws_freeContext(sws_ctx); | |
avcodec_free_context(&encoder_ctx); | |
avcodec_free_context(&decoder_ctx); | |
avformat_close_input(&input_format_ctx); | |
} | |
auto main(int argc, char **argv) -> int { | |
ffmpeg_open_input(argv[1]); | |
ffmpeg_init(); | |
ffmpeg_transcode(); | |
ffmpeg_cleanup(); | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment