30#define MAX_SUPPORTED_WIDTH 1950
31#define MAX_SUPPORTED_HEIGHT 1100
34#include "libavutil/hwcontext_vaapi.h"
36typedef struct VAAPIDecodeContext {
38 VAEntrypoint va_entrypoint;
40 VAContextID va_context;
42#if FF_API_STRUCT_VAAPI_CONTEXT
45 struct vaapi_context *old_context;
46 AVBufferRef *device_ref;
50 AVHWDeviceContext *device;
51 AVVAAPIDeviceContext *hwctx;
53 AVHWFramesContext *frames;
54 AVVAAPIFramesContext *hwfc;
56 enum AVPixelFormat surface_format;
72 : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
73 path(
path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
74 seek_audio_frame_found(0), seek_video_frame_found(0),is_duration_known(false), largest_frame_processed(0),
76 video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
77 pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
85 pts_offset_seconds = NO_PTS_OFFSET;
86 video_pts_seconds = NO_PTS_OFFSET;
87 audio_pts_seconds = NO_PTS_OFFSET;
116 if (abs(diff) <= amount)
127static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
129 const enum AVPixelFormat *p;
131 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
133#if defined(__linux__)
135 case AV_PIX_FMT_VAAPI:
140 case AV_PIX_FMT_VDPAU:
148 case AV_PIX_FMT_DXVA2_VLD:
153 case AV_PIX_FMT_D3D11:
159#if defined(__APPLE__)
161 case AV_PIX_FMT_VIDEOTOOLBOX:
168 case AV_PIX_FMT_CUDA:
184 return AV_PIX_FMT_NONE;
187int FFmpegReader::IsHardwareDecodeSupported(
int codecid)
191 case AV_CODEC_ID_H264:
192 case AV_CODEC_ID_MPEG2VIDEO:
193 case AV_CODEC_ID_VC1:
194 case AV_CODEC_ID_WMV1:
195 case AV_CODEC_ID_WMV2:
196 case AV_CODEC_ID_WMV3:
211 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
221 if (avformat_open_input(&pFormatCtx,
path.c_str(), NULL, NULL) != 0)
225 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
232 packet_status.
reset(
true);
235 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
237 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
244 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
251 if (videoStream == -1 && audioStream == -1)
255 if (videoStream != -1) {
260 pStream = pFormatCtx->streams[videoStream];
266 const AVCodec *pCodec = avcodec_find_decoder(codecId);
267 AVDictionary *
opts = NULL;
268 int retry_decode_open = 2;
273 if (
hw_de_on && (retry_decode_open==2)) {
275 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
278 retry_decode_open = 0;
283 if (pCodec == NULL) {
284 throw InvalidCodec(
"A valid video codec could not be found for this file.",
path);
288 av_dict_set(&
opts,
"strict",
"experimental", 0);
292 int i_decoder_hw = 0;
294 char *adapter_ptr = NULL;
297 fprintf(stderr,
"Hardware decoding device number: %d\n", adapter_num);
300 pCodecCtx->get_format = get_hw_dec_format;
302 if (adapter_num < 3 && adapter_num >=0) {
303#if defined(__linux__)
304 snprintf(adapter,
sizeof(adapter),
"/dev/dri/renderD%d", adapter_num+128);
305 adapter_ptr = adapter;
307 switch (i_decoder_hw) {
309 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
312 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
315 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
318 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
321 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
328 switch (i_decoder_hw) {
330 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
333 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
336 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
339 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
342 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
345#elif defined(__APPLE__)
348 switch (i_decoder_hw) {
350 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
353 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
356 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
366#if defined(__linux__)
367 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
369 if( adapter_ptr != NULL ) {
370#elif defined(__APPLE__)
371 if( adapter_ptr != NULL ) {
380 hw_device_ctx = NULL;
382 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
383 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
425 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
429 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &
opts);
430 if (avcodec_return < 0) {
431 std::stringstream avcodec_error_msg;
432 avcodec_error_msg <<
"A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
438 AVHWFramesConstraints *constraints = NULL;
439 void *hwconfig = NULL;
440 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
444 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
445 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
448 if (pCodecCtx->coded_width < constraints->min_width ||
449 pCodecCtx->coded_height < constraints->min_height ||
450 pCodecCtx->coded_width > constraints->max_width ||
451 pCodecCtx->coded_height > constraints->max_height) {
454 retry_decode_open = 1;
457 av_buffer_unref(&hw_device_ctx);
458 hw_device_ctx = NULL;
463 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Min width :", constraints->min_width,
"Min Height :", constraints->min_height,
"MaxWidth :", constraints->max_width,
"MaxHeight :", constraints->max_height,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
464 retry_decode_open = 0;
466 av_hwframe_constraints_free(&constraints);
479 if (pCodecCtx->coded_width < 0 ||
480 pCodecCtx->coded_height < 0 ||
481 pCodecCtx->coded_width > max_w ||
482 pCodecCtx->coded_height > max_h ) {
483 ZmqLogger::Instance()->
AppendDebugMethod(
"DIMENSIONS ARE TOO LARGE for hardware acceleration\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
485 retry_decode_open = 1;
488 av_buffer_unref(&hw_device_ctx);
489 hw_device_ctx = NULL;
493 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
494 retry_decode_open = 0;
502 retry_decode_open = 0;
504 }
while (retry_decode_open);
513 if (audioStream != -1) {
518 aStream = pFormatCtx->streams[audioStream];
524 const AVCodec *aCodec = avcodec_find_decoder(codecId);
530 if (aCodec == NULL) {
531 throw InvalidCodec(
"A valid audio codec could not be found for this file.",
path);
535 AVDictionary *
opts = NULL;
536 av_dict_set(&
opts,
"strict",
"experimental", 0);
539 if (avcodec_open2(aCodecCtx, aCodec, &
opts) < 0)
540 throw InvalidCodec(
"An audio codec was found, but could not be opened.",
path);
550 AVDictionaryEntry *tag = NULL;
551 while ((tag = av_dict_get(pFormatCtx->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
552 QString str_key = tag->key;
553 QString str_value = tag->value;
554 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
558 previous_packet_location.
frame = -1;
590 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
596 AVPacket *recent_packet = packet;
601 int max_attempts = 128;
606 "attempts", attempts);
618 RemoveAVPacket(recent_packet);
623 if(avcodec_is_open(pCodecCtx)) {
624 avcodec_flush_buffers(pCodecCtx);
630 av_buffer_unref(&hw_device_ctx);
631 hw_device_ctx = NULL;
639 if(avcodec_is_open(aCodecCtx)) {
640 avcodec_flush_buffers(aCodecCtx);
647 working_cache.
Clear();
650 avformat_close_input(&pFormatCtx);
651 av_freep(&pFormatCtx);
656 largest_frame_processed = 0;
657 seek_audio_frame_found = 0;
658 seek_video_frame_found = 0;
659 current_video_frame = 0;
660 last_video_frame.reset();
664bool FFmpegReader::HasAlbumArt() {
668 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
669 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
672void FFmpegReader::UpdateAudioInfo() {
689 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
710 if (aStream->duration > 0 && aStream->duration >
info.
duration) {
713 }
else if (pFormatCtx->duration > 0 &&
info.
duration <= 0.0f) {
715 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
758 AVDictionaryEntry *tag = NULL;
759 while ((tag = av_dict_get(aStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
760 QString str_key = tag->key;
761 QString str_value = tag->value;
762 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
766void FFmpegReader::UpdateVideoInfo() {
774 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
781 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
793 if (pStream->sample_aspect_ratio.num != 0) {
816 if (!check_interlace) {
817 check_interlace =
true;
819 switch(field_order) {
820 case AV_FIELD_PROGRESSIVE:
833 case AV_FIELD_UNKNOWN:
835 check_interlace =
false;
850 if (
info.
duration <= 0.0f && pFormatCtx->duration >= 0) {
852 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
862 if (
info.
duration <= 0.0f && pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
880 is_duration_known =
false;
883 is_duration_known =
true;
893 AVDictionaryEntry *tag = NULL;
894 while ((tag = av_dict_get(pStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
895 QString str_key = tag->key;
896 QString str_value = tag->value;
897 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
902 return this->is_duration_known;
908 throw ReaderClosed(
"The FFmpegReader is closed. Call Open() before calling this method.",
path);
911 if (requested_frame < 1)
917 throw InvalidFile(
"Could not detect the duration of the video or audio stream.",
path);
933 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
947 int64_t diff = requested_frame - last_frame;
948 if (diff >= 1 && diff <= 20) {
950 frame = ReadStream(requested_frame);
955 Seek(requested_frame);
964 frame = ReadStream(requested_frame);
972std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
974 bool check_seek =
false;
975 int packet_error = -1;
985 CheckWorkingFrames(requested_frame);
990 if (is_cache_found) {
994 if (!hold_packet || !packet) {
996 packet_error = GetNextPacket();
997 if (packet_error < 0 && !packet) {
1008 check_seek = CheckSeek(
false);
1020 if ((
info.
has_video && packet && packet->stream_index == videoStream) ||
1024 ProcessVideoPacket(requested_frame);
1027 if ((
info.
has_audio && packet && packet->stream_index == audioStream) ||
1031 ProcessAudioPacket(requested_frame);
1036 if ((!
info.
has_video && packet && packet->stream_index == videoStream) ||
1037 (!
info.
has_audio && packet && packet->stream_index == audioStream)) {
1039 if (packet->stream_index == videoStream) {
1041 }
else if (packet->stream_index == audioStream) {
1047 RemoveAVPacket(packet);
1057 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ReadStream (force EOF)",
"packets_read", packet_status.
packets_read(),
"packets_decoded", packet_status.
packets_decoded(),
"packets_eof", packet_status.
packets_eof,
"video_eof", packet_status.
video_eof,
"audio_eof", packet_status.
audio_eof,
"end_of_file", packet_status.
end_of_file);
1074 "largest_frame_processed", largest_frame_processed,
1075 "Working Cache Count", working_cache.
Count());
1084 CheckWorkingFrames(requested_frame);
1100 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1103 if (!frame->has_image_data) {
1108 frame->AddAudioSilence(samples_in_frame);
1113 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1115 f->AddAudioSilence(samples_in_frame);
1123int FFmpegReader::GetNextPacket() {
1124 int found_packet = 0;
1125 AVPacket *next_packet;
1126 next_packet =
new AVPacket();
1127 found_packet = av_read_frame(pFormatCtx, next_packet);
1131 RemoveAVPacket(packet);
1134 if (found_packet >= 0) {
1136 packet = next_packet;
1139 if (packet->stream_index == videoStream) {
1141 }
else if (packet->stream_index == audioStream) {
1150 return found_packet;
1154bool FFmpegReader::GetAVFrame() {
1155 int frameFinished = 0;
1161 int send_packet_err = 0;
1162 int64_t send_packet_pts = 0;
1163 if ((packet && packet->stream_index == videoStream) || !packet) {
1164 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1166 if (packet && send_packet_err >= 0) {
1167 send_packet_pts = GetPacketPTS();
1168 hold_packet =
false;
1178 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1179 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) +
"])",
"send_packet_err", send_packet_err,
"send_packet_pts", send_packet_pts);
1180 if (send_packet_err == AVERROR(EAGAIN)) {
1182 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()",
"send_packet_pts", send_packet_pts);
1184 if (send_packet_err == AVERROR(EINVAL)) {
1185 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush",
"send_packet_pts", send_packet_pts);
1187 if (send_packet_err == AVERROR(ENOMEM)) {
1188 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors",
"send_packet_pts", send_packet_pts);
1195 int receive_frame_err = 0;
1196 AVFrame *next_frame2;
1204 next_frame2 = next_frame;
1207 while (receive_frame_err >= 0) {
1208 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1210 if (receive_frame_err != 0) {
1211 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])",
"receive_frame_err", receive_frame_err,
"send_packet_pts", send_packet_pts);
1213 if (receive_frame_err == AVERROR_EOF) {
1215 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)",
"send_packet_pts", send_packet_pts);
1216 avcodec_flush_buffers(pCodecCtx);
1219 if (receive_frame_err == AVERROR(EINVAL)) {
1221 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)",
"send_packet_pts", send_packet_pts);
1222 avcodec_flush_buffers(pCodecCtx);
1224 if (receive_frame_err == AVERROR(EAGAIN)) {
1226 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)",
"send_packet_pts", send_packet_pts);
1228 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1230 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)",
"send_packet_pts", send_packet_pts);
1241 if (next_frame2->format == hw_de_av_pix_fmt) {
1242 next_frame->format = AV_PIX_FMT_YUV420P;
1243 if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) {
1246 if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) {
1254 next_frame = next_frame2;
1262 av_image_alloc(pFrame->data, pFrame->linesize,
info.
width,
info.
height, (AVPixelFormat)(pStream->codecpar->format), 1);
1263 av_image_copy(pFrame->data, pFrame->linesize, (
const uint8_t**)next_frame->data, next_frame->linesize,
1270 if (next_frame->pts != AV_NOPTS_VALUE) {
1273 video_pts = next_frame->pts;
1274 }
else if (next_frame->pkt_dts != AV_NOPTS_VALUE) {
1276 video_pts = next_frame->pkt_dts;
1280 "FFmpegReader::GetAVFrame (Successful frame received)",
"video_pts", video_pts,
"send_packet_pts", send_packet_pts);
1291 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1297 if (frameFinished) {
1301 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt,
info.
width,
1310 return frameFinished;
1314bool FFmpegReader::CheckSeek(
bool is_video) {
1319 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1327 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1330 if (max_seeked_frame >= seeking_frame) {
1333 "is_video_seek", is_video_seek,
1334 "max_seeked_frame", max_seeked_frame,
1335 "seeking_frame", seeking_frame,
1336 "seeking_pts", seeking_pts,
1337 "seek_video_frame_found", seek_video_frame_found,
1338 "seek_audio_frame_found", seek_audio_frame_found);
1341 Seek(seeking_frame - (10 * seek_count * seek_count));
1345 "is_video_seek", is_video_seek,
1346 "packet->pts", GetPacketPTS(),
1347 "seeking_pts", seeking_pts,
1348 "seeking_frame", seeking_frame,
1349 "seek_video_frame_found", seek_video_frame_found,
1350 "seek_audio_frame_found", seek_audio_frame_found);
1364void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1367 int frame_finished = GetAVFrame();
1370 if (!frame_finished) {
1373 RemoveAVFrame(pFrame);
1379 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1382 if (!seek_video_frame_found && is_seeking)
1383 seek_video_frame_found = current_frame;
1389 working_cache.
Add(CreateFrame(requested_frame));
1401 AVFrame *pFrameRGB =
nullptr;
1402 uint8_t *buffer =
nullptr;
1406 if (pFrameRGB ==
nullptr)
1428 max_width = std::max(
float(max_width), max_width * max_scale_x);
1429 max_height = std::max(
float(max_height), max_height * max_scale_y);
1435 QSize width_size(max_width * max_scale_x,
1438 max_height * max_scale_y);
1440 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1441 max_width = std::max(max_width, width_size.width());
1442 max_height = std::max(max_height, width_size.height());
1444 max_width = std::max(max_width, height_size.width());
1445 max_height = std::max(max_height, height_size.height());
1452 float preview_ratio = 1.0;
1459 max_width =
info.
width * max_scale_x * preview_ratio;
1460 max_height =
info.
height * max_scale_y * preview_ratio;
1465 int original_height = height;
1466 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1468 float ratio = float(width) / float(height);
1469 int possible_width = round(max_height * ratio);
1470 int possible_height = round(max_width / ratio);
1472 if (possible_width <= max_width) {
1474 width = possible_width;
1475 height = max_height;
1479 height = possible_height;
1484 const int bytes_per_pixel = 4;
1485 int buffer_size = (width * height * bytes_per_pixel) + 128;
1486 buffer =
new unsigned char[buffer_size]();
1491 int scale_mode = SWS_FAST_BILINEAR;
1493 scale_mode = SWS_BICUBIC;
1499 sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
1500 original_height, pFrameRGB->data, pFrameRGB->linesize);
1503 std::shared_ptr<Frame> f = CreateFrame(current_frame);
1508 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
1511 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
1515 working_cache.
Add(f);
1518 last_video_frame = f;
1524 RemoveAVFrame(pFrame);
1525 sws_freeContext(img_convert_ctx);
1531 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ProcessVideoPacket (After)",
"requested_frame", requested_frame,
"current_frame", current_frame,
"f->number", f->number,
"video_pts_seconds", video_pts_seconds);
1535void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
1538 if (packet && packet->pts != AV_NOPTS_VALUE) {
1540 location = GetAudioPTSLocation(packet->pts);
1543 if (!seek_audio_frame_found && is_seeking)
1544 seek_audio_frame_found = location.
frame;
1551 working_cache.
Add(CreateFrame(requested_frame));
1555 "requested_frame", requested_frame,
1556 "target_frame", location.
frame,
1560 int frame_finished = 0;
1564 int packet_samples = 0;
1568 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
1569 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1573 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
1574 if (receive_frame_err >= 0) {
1577 if (receive_frame_err == AVERROR_EOF) {
1581 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
1583 avcodec_flush_buffers(aCodecCtx);
1585 if (receive_frame_err != 0) {
1590 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
1593 if (frame_finished) {
1599 audio_pts = audio_frame->pts;
1602 location = GetAudioPTSLocation(audio_pts);
1605 int plane_size = -1;
1611 data_size = av_samples_get_buffer_size(&plane_size, nb_channels,
1612 audio_frame->nb_samples,
1616 packet_samples = audio_frame->nb_samples * nb_channels;
1625 int pts_remaining_samples = packet_samples /
info.
channels;
1628 if (pts_remaining_samples == 0) {
1630 "packet_samples", packet_samples,
1632 "pts_remaining_samples", pts_remaining_samples);
1636 while (pts_remaining_samples) {
1641 int samples = samples_per_frame - previous_packet_location.
sample_start;
1642 if (samples > pts_remaining_samples)
1643 samples = pts_remaining_samples;
1646 pts_remaining_samples -= samples;
1648 if (pts_remaining_samples > 0) {
1650 previous_packet_location.
frame++;
1662 "packet_samples", packet_samples,
1666 "AV_SAMPLE_FMT_S16", AV_SAMPLE_FMT_S16);
1671 audio_converted->nb_samples = audio_frame->nb_samples;
1672 av_samples_alloc(audio_converted->data, audio_converted->linesize,
info.
channels, audio_frame->nb_samples, AV_SAMPLE_FMT_S16, 0);
1690 av_opt_set_int(avr,
"out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
1697 audio_converted->data,
1698 audio_converted->linesize[0],
1699 audio_converted->nb_samples,
1701 audio_frame->linesize[0],
1702 audio_frame->nb_samples);
1706 audio_converted->data[0],
1707 static_cast<size_t>(audio_converted->nb_samples)
1708 * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16)
1717 av_free(audio_converted->data[0]);
1720 int64_t starting_frame_number = -1;
1721 bool partial_frame =
true;
1722 for (
int channel_filter = 0; channel_filter <
info.
channels; channel_filter++) {
1724 starting_frame_number = location.
frame;
1725 int channel_buffer_size = packet_samples /
info.
channels;
1726 float *channel_buffer =
new float[channel_buffer_size];
1729 for (
int z = 0; z < channel_buffer_size; z++)
1730 channel_buffer[z] = 0.0f;
1736 for (
int sample = 0; sample < packet_samples; sample++) {
1738 if (channel_filter == channel) {
1740 channel_buffer[position] = audio_buf[sample] * (1.0f / (1 << 15));
1757 int remaining_samples = channel_buffer_size;
1758 float *iterate_channel_buffer = channel_buffer;
1759 while (remaining_samples > 0) {
1765 int samples = samples_per_frame - start;
1766 if (samples > remaining_samples)
1767 samples = remaining_samples;
1770 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
1773 if (samples_per_frame == start + samples)
1774 partial_frame =
false;
1776 partial_frame =
true;
1779 f->AddAudio(
true, channel_filter, start, iterate_channel_buffer,
1784 "frame", starting_frame_number,
1787 "channel", channel_filter,
1788 "partial_frame", partial_frame,
1789 "samples_per_frame", samples_per_frame);
1792 working_cache.
Add(f);
1795 remaining_samples -= samples;
1798 if (remaining_samples > 0)
1799 iterate_channel_buffer += samples;
1802 starting_frame_number++;
1809 delete[] channel_buffer;
1810 channel_buffer = NULL;
1811 iterate_channel_buffer = NULL;
1826 "requested_frame", requested_frame,
1827 "starting_frame", location.
frame,
1828 "end_frame", starting_frame_number - 1,
1829 "audio_pts_seconds", audio_pts_seconds);
1835void FFmpegReader::Seek(int64_t requested_frame) {
1837 if (requested_frame < 1)
1838 requested_frame = 1;
1841 if (requested_frame > largest_frame_processed && packet_status.
end_of_file) {
1848 "requested_frame", requested_frame,
1849 "seek_count", seek_count,
1850 "last_frame", last_frame);
1853 working_cache.
Clear();
1857 video_pts_seconds = NO_PTS_OFFSET;
1859 audio_pts_seconds = NO_PTS_OFFSET;
1860 hold_packet =
false;
1862 current_video_frame = 0;
1863 largest_frame_processed = 0;
1868 packet_status.
reset(
false);
1874 int buffer_amount = std::max(max_concurrent_frames, 8);
1875 if (requested_frame - buffer_amount < 20) {
1889 if (seek_count == 1) {
1892 seeking_pts = ConvertFrameToVideoPTS(1);
1894 seek_audio_frame_found = 0;
1895 seek_video_frame_found = 0;
1899 bool seek_worked =
false;
1900 int64_t seek_target = 0;
1904 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
1906 fprintf(stderr,
"%s: error while seeking video stream\n", pFormatCtx->AV_FILENAME);
1909 is_video_seek =
true;
1916 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
1918 fprintf(stderr,
"%s: error while seeking audio stream\n", pFormatCtx->AV_FILENAME);
1921 is_video_seek =
false;
1930 avcodec_flush_buffers(aCodecCtx);
1934 avcodec_flush_buffers(pCodecCtx);
1937 previous_packet_location.
frame = -1;
1942 if (seek_count == 1) {
1944 seeking_pts = seek_target;
1945 seeking_frame = requested_frame;
1947 seek_audio_frame_found = 0;
1948 seek_video_frame_found = 0;
1976int64_t FFmpegReader::GetPacketPTS() {
1978 int64_t current_pts = packet->pts;
1979 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
1980 current_pts = packet->dts;
1986 return AV_NOPTS_VALUE;
1991void FFmpegReader::UpdatePTSOffset() {
1992 if (pts_offset_seconds != NO_PTS_OFFSET) {
1996 pts_offset_seconds = 0.0;
1997 double video_pts_offset_seconds = 0.0;
1998 double audio_pts_offset_seconds = 0.0;
2000 bool has_video_pts =
false;
2003 has_video_pts =
true;
2005 bool has_audio_pts =
false;
2008 has_audio_pts =
true;
2012 while (!has_video_pts || !has_audio_pts) {
2014 if (GetNextPacket() < 0)
2019 int64_t pts = GetPacketPTS();
2022 if (!has_video_pts && packet->stream_index == videoStream) {
2028 if (std::abs(video_pts_offset_seconds) <= 10.0) {
2029 has_video_pts =
true;
2032 else if (!has_audio_pts && packet->stream_index == audioStream) {
2038 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
2039 has_audio_pts =
true;
2045 if (has_video_pts && has_audio_pts) {
2057 pts_offset_seconds = std::max(video_pts_offset_seconds, audio_pts_offset_seconds);
2062int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2064 int64_t previous_video_frame = current_video_frame;
2073 if (current_video_frame == 0)
2074 current_video_frame = frame;
2078 if (frame == previous_video_frame) {
2083 current_video_frame++;
2092int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2094 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2104int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2106 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2116AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2124 int64_t whole_frame = int64_t(frame);
2127 double sample_start_percentage = frame - double(whole_frame);
2133 int sample_start = round(
double(samples_per_frame) * sample_start_percentage);
2136 if (whole_frame < 1)
2138 if (sample_start < 0)
2145 if (previous_packet_location.
frame != -1) {
2146 if (location.
is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2147 int64_t orig_frame = location.
frame;
2152 location.
frame = previous_packet_location.
frame;
2155 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)",
"Source Frame", orig_frame,
"Source Audio Sample", orig_start,
"Target Frame", location.
frame,
"Target Audio Sample", location.
sample_start,
"pts", pts);
2164 previous_packet_location = location;
2171std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2173 std::shared_ptr<Frame> output = working_cache.
GetFrame(requested_frame);
2177 output = working_cache.
GetFrame(requested_frame);
2178 if(output)
return output;
2186 working_cache.
Add(output);
2189 if (requested_frame > largest_frame_processed)
2190 largest_frame_processed = requested_frame;
2197bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2200 bool seek_trash =
false;
2201 int64_t max_seeked_frame = seek_audio_frame_found;
2202 if (seek_video_frame_found > max_seeked_frame) {
2203 max_seeked_frame = seek_video_frame_found;
2205 if ((
info.
has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2206 (
info.
has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2214void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2217 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
2220 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.
GetFrames();
2221 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2224 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2227 std::shared_ptr<Frame> f = *working_itr;
2230 if (!f || f->number > requested_frame) {
2236 double frame_pts_seconds = (double(f->number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2237 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2240 bool is_video_ready =
false;
2241 bool is_audio_ready =
false;
2242 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2243 if ((frame_pts_seconds <= video_pts_seconds)
2244 || (recent_pts_diff > 1.5)
2248 is_video_ready =
true;
2250 "frame_number", f->number,
2251 "frame_pts_seconds", frame_pts_seconds,
2252 "video_pts_seconds", video_pts_seconds,
2253 "recent_pts_diff", recent_pts_diff);
2257 for (int64_t previous_frame = requested_frame - 1; previous_frame > 0; previous_frame--) {
2259 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2261 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2266 if (last_video_frame && !f->has_image_data) {
2268 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2269 }
else if (!f->has_image_data) {
2270 f->AddColor(
"#000000");
2275 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2276 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2277 || (recent_pts_diff > 1.5)
2282 is_audio_ready =
true;
2284 "frame_number", f->number,
2285 "frame_pts_seconds", frame_pts_seconds,
2286 "audio_pts_seconds", audio_pts_seconds,
2287 "audio_pts_diff", audio_pts_diff,
2288 "recent_pts_diff", recent_pts_diff);
2290 bool is_seek_trash = IsPartialFrame(f->number);
2298 "frame_number", f->number,
2299 "is_video_ready", is_video_ready,
2300 "is_audio_ready", is_audio_ready,
2306 if ((!packet_status.
end_of_file && is_video_ready && is_audio_ready) || packet_status.
end_of_file || is_seek_trash) {
2309 "requested_frame", requested_frame,
2310 "f->number", f->number,
2311 "is_seek_trash", is_seek_trash,
2312 "Working Cache Count", working_cache.
Count(),
2316 if (!is_seek_trash) {
2321 working_cache.
Remove(f->number);
2324 last_frame = f->number;
2327 working_cache.
Remove(f->number);
2334 working_frames.clear();
2335 working_frames.shrink_to_fit();
2339void FFmpegReader::CheckFPS() {
2347 int frames_per_second[3] = {0,0,0};
2348 int max_fps_index =
sizeof(frames_per_second) /
sizeof(frames_per_second[0]);
2351 int all_frames_detected = 0;
2352 int starting_frames_detected = 0;
2357 if (GetNextPacket() < 0)
2362 if (packet->stream_index == videoStream) {
2365 fps_index = int(video_seconds);
2368 if (fps_index >= 0 && fps_index < max_fps_index) {
2370 starting_frames_detected++;
2371 frames_per_second[fps_index]++;
2375 all_frames_detected++;
2380 float avg_fps = 30.0;
2381 if (starting_frames_detected > 0 && fps_index > 0) {
2382 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2386 if (avg_fps < 8.0) {
2395 if (all_frames_detected > 0) {
2409void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2413 av_freep(&remove_frame->data[0]);
2421void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2426 delete remove_packet;
2441 root[
"type"] =
"FFmpegReader";
2442 root[
"path"] =
path;
2457 catch (
const std::exception& e) {
2459 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
2470 if (!root[
"path"].isNull())
2471 path = root[
"path"].asString();
Header file for all Exception classes.
AVPixelFormat hw_de_av_pix_fmt_global
AVHWDeviceType hw_de_av_device_type_global
Header file for FFmpegReader class.
Header file for FFmpegUtilities.
#define AV_FREE_CONTEXT(av_context)
#define AV_FREE_FRAME(av_frame)
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
#define AV_GET_CODEC_TYPE(av_stream)
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
#define AV_FIND_DECODER_CODEC_ID(av_stream)
#define AV_ALLOCATE_FRAME()
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
#define AV_FREE_PACKET(av_packet)
#define AVCODEC_REGISTER_ALL
#define AVCODEC_MAX_AUDIO_FRAME_SIZE
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
#define MY_INPUT_BUFFER_PADDING_SIZE
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AV_RESET_FRAME(av_frame)
#define FF_NUM_PROCESSORS
#define OPEN_MP_NUM_PROCESSORS
Header file for Timeline class.
Header file for ZeroMQ-based Logger class.
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
int64_t Count()
Count the frames in the queue.
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
void Remove(int64_t frame_number)
Remove a specific frame.
void Clear()
Clear the cache of all frames.
This class represents a clip (used to arrange readers on the timeline)
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
openshot::TimelineBase * ParentTimeline() override
Get the associated Timeline pointer (if any)
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
double Y
The Y value of the coordinate (usually representing the value of the property being animated)
void Open() override
Open File - which is called by the constructor automatically.
FFmpegReader(const std::string &path, bool inspect_reader=true)
Constructor for FFmpegReader.
Json::Value JsonValue() const override
Generate Json::Value for this object.
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
CacheMemory final_cache
Final cache object used to hold final frames.
virtual ~FFmpegReader()
Destructor.
std::string Json() const override
Generate JSON string of this object.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
void Close() override
Close File.
void SetJson(const std::string value) override
Load JSON string into this object.
This class represents a fraction.
int num
Numerator for the fraction.
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
int den
Denominator for the fraction.
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Exception when no valid codec is found for a file.
Exception for files that can not be found or opened.
Exception for invalid JSON.
Point GetMaxPoint() const
Get max point (by Y coordinate)
Exception when no streams are found in the file.
Exception when memory could not be allocated.
Coordinate co
This is the primary coordinate.
openshot::ReaderInfo info
Information about the current media file.
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Exception when a reader is closed, and a frame is requested.
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
This class represents a timeline.
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
This namespace is the default namespace for all code in the openshot library.
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping)
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit)
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap)
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
const Json::Value stringToJson(const std::string value)
This struct holds the associated video frame and starting sample # for an audio packet.
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
int64_t packets_decoded()
int audio_bit_rate
The bit rate of the audio stream (in bytes)
int video_bit_rate
The bit rate of the video stream (in bytes)
bool has_single_image
Determines if this file only contains a single image.
float duration
Length of time (in seconds)
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
int width
The width of the video (in pixesl)
int channels
The number of audio channels used in the audio stream.
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3)
int height
The height of the video (in pixels)
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
int64_t video_length
The number of frames in the video stream.
std::string acodec
The name of the audio codec used to encode / decode the video stream.
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
std::string vcodec
The name of the video codec used to encode / decode the video stream.
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square)
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
bool has_video
Determines if this file has a video stream.
bool has_audio
Determines if this file has an audio stream.
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
int video_stream_index
The index of the video stream.
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
int audio_stream_index
The index of the audio stream.
int64_t file_size
Size of file (in bytes)