diff --git a/data/missing-sei-and-pes.ts b/data/missing-sei-and-pes.ts new file mode 100644 index 0000000000..c164aa0862 Binary files /dev/null and b/data/missing-sei-and-pes.ts differ diff --git a/data/skip_1.ts b/data/skip_1.ts new file mode 100644 index 0000000000..7ee2bfb8e2 Binary files /dev/null and b/data/skip_1.ts differ diff --git a/data/skip_3.ts b/data/skip_3.ts new file mode 100644 index 0000000000..8760283543 Binary files /dev/null and b/data/skip_3.ts differ diff --git a/ffmpeg/api_test.go b/ffmpeg/api_test.go index d5c4946ab0..ee7f75beb5 100755 --- a/ffmpeg/api_test.go +++ b/ffmpeg/api_test.go @@ -51,7 +51,7 @@ func TestAPI_SkippedSegment(t *testing.T) { if res.Decoded.Frames != 120 { t.Error("Did not get decoded frames", res.Decoded.Frames) } - if res.Encoded[1].Frames != 245 { + if res.Encoded[1].Frames != 246 { t.Error("Did not get encoded frames ", res.Encoded[1].Frames) } } @@ -68,7 +68,7 @@ func TestAPI_SkippedSegment(t *testing.T) { # sanity check ffmpeg frame count against ours ffprobe -count_frames -show_streams -select_streams v ffmpeg_sw_$1.ts | grep nb_read_frames=246 - ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=245 + ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=246 # check image quality # TODO really should have frame counts match for ssim @@ -224,18 +224,14 @@ func countEncodedFrames(t *testing.T, accel Acceleration) { if err != nil { t.Error(err) } - expectedFrames := 60 - if i == 1 || i == 3 { - expectedFrames = 61 // TODO figure out why this is! - } - if res.Encoded[0].Frames != expectedFrames { - t.Error(in.Fname, " Mismatched frame count: expected ", expectedFrames, " got ", res.Encoded[0].Frames) + if res.Encoded[0].Frames != 60 { + t.Error(in.Fname, " Mismatched frame count: expected 60 got ", res.Encoded[0].Frames) } if res.Encoded[1].Frames != 120 { t.Error(in.Fname, " Mismatched frame count: expected 120 got ", res.Encoded[1].Frames) } - if res.Encoded[2].Frames != 239 { - t.Error(in.Fname, " Mismatched frame count: expected 239 got ", res.Encoded[2].Frames) + if res.Encoded[2].Frames != 240 { + t.Error(in.Fname, " Mismatched frame count: expected 240 got ", res.Encoded[2].Frames) } if res.Encoded[3].Frames != 120 { t.Error(in.Fname, " Mismatched frame count: expected 120 got ", res.Encoded[3].Frames) @@ -257,33 +253,33 @@ func countEncodedFrames(t *testing.T, accel Acceleration) { pts=129000 pts=129750 pts=130500 -pts=306000 pts=306750 pts=307500 +pts=308250 ==> out_120fps_1.ts.pts <== pts=309000 pts=309750 pts=310500 -pts=486000 pts=486750 pts=487500 +pts=488250 ==> out_120fps_2.ts.pts <== pts=489000 pts=489750 pts=490500 -pts=666000 pts=666750 pts=667500 +pts=668250 ==> out_120fps_3.ts.pts <== pts=669000 pts=669750 pts=670500 -pts=846000 pts=846750 pts=847500 +pts=848250 ==> out_30fps_0.ts.pts <== pts=129000 @@ -297,9 +293,9 @@ pts=306000 pts=309000 pts=312000 pts=315000 +pts=480000 pts=483000 pts=486000 -pts=489000 ==> out_30fps_2.ts.pts <== pts=489000 @@ -313,9 +309,9 @@ pts=666000 pts=669000 pts=672000 pts=675000 +pts=840000 pts=843000 pts=846000 -pts=849000 ==> out_60fps_0.ts.pts <== pts=129000 @@ -463,8 +459,8 @@ func TestTranscoder_API_AlternatingTimestamps(t *testing.T) { # sanity check ffmpeg frame count against ours ffprobe -count_frames -show_streams -select_streams v ffmpeg_sw_$1.ts | grep nb_read_frames=246 - ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=245 - ffprobe -count_frames -show_streams -select_streams v sw_audio_encode_$1.ts | grep nb_read_frames=245 + ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=246 + ffprobe -count_frames -show_streams -select_streams v sw_audio_encode_$1.ts | grep nb_read_frames=246 # check image quality # TODO frame count should really match for ssim @@ -476,12 +472,146 @@ func TestTranscoder_API_AlternatingTimestamps(t *testing.T) { # Really should check relevant audio as well... } - - # re-enable for seg 0 and 1 when alternating timestamps can be handled check 0 check 1 check 2 check 3 + ` + run(cmd) + +} + +func TestTranscoder_API_DTSOverlap(t *testing.T) { + dtsOverlap(t, Software) +} + +func dtsOverlap(t *testing.T, accel Acceleration) { + // Non-monotonic DTS timestamps are a major problem. + // We have one such case here when: + // 1. first segment pts starts near zero + // 2. B-frames are in use + // 3. mpegts is the output format + // + // the transcoder can produce DTS < 0, PTS = 0 which gets + // offset to DTS = 0, PTS = -DTS in the mpegts muxer + // + // However, transcodes for other segments will not be aware + // of this delay, leading to overlap between the first and + // second segments. + // + // This is not a LPMS specific issue but rather one in the + // employment of mpegts (always add an offset!); ffmpeg has + // the exact same issue. + + // This test case codifies this behavior for now as a sign + // that we are aware of it, and if it ever changes somehow, + // this should fail and let us know. + + run, dir := setupTest(t) + defer os.RemoveAll(dir) + + err := RTMPToHLS("../transcoder/test.ts", dir+"/out.m3u8", dir+"/out_%d.ts", "2", 0) + if err != nil { + t.Error(err) + } + + profile := P144p30fps16x9 + profile.Framerate = 15 + profile.Profile = ProfileH264Main + // and check no bframes case (which is ok) + profileNoB := profile + profileNoB.Profile = ProfileH264ConstrainedHigh + tc := NewTranscoder() + defer tc.StopTranscoder() + idx := []int{1, 0} + for _, i := range idx { + in := &TranscodeOptionsIn{Fname: fmt.Sprintf("%s/out_%d.ts", dir, i)} + out := []TranscodeOptions{{ + Oname: fmt.Sprintf("%s/bf_%d.ts", dir, i), + Profile: profile, + AudioEncoder: ComponentOptions{Name: "copy"}, + Accel: accel, + }, { + Oname: fmt.Sprintf("%s/nobf_%d.ts", dir, i), + Profile: profileNoB, + AudioEncoder: ComponentOptions{Name: "copy"}, + Accel: accel, + }} + res, err := tc.Transcode(in, out) + if err != nil { + t.Error(err) + } + if res != nil { + if res.Decoded.Frames != 120 { + t.Error("Did not get decoded frames", res.Decoded.Frames) + } + if res.Encoded[0].Frames != 30 { + t.Error("Mismatched frame count for hw/nv") + } + } + } + + cmd := ` + + # ffmpeg has the exact same problem so let's demonstrate that too + ffmpeg -loglevel warning -i out_0.ts -c:a copy \ + -vf fps=15,scale=w=256:h=144 -c:v libx264 -muxdelay 0 -muxpreload 0 -copyts \ + ffmpeg_sw_0.ts + ffmpeg -loglevel warning -i out_1.ts -c:a copy \ + -vf fps=15,scale=w=256:h=144 -c:v libx264 -muxdelay 0 -muxpreload 0 -copyts \ + ffmpeg_sw_1.ts + + + # Ensure timestamps are monotonic by checking deltas + # do this for the low fps rendition since those are more + # likely to have issues while downsampling fps + function calc_delta { + cat $1_0.ts $1_1.ts > $1_concat.ts + mapfile -t dts_times < <(ffprobe -hide_banner -select_streams v -of csv=p=0 -show_entries packet=dts_time $1_concat.ts | awk -F',' '{print $1}') + # Loop through the array and calculate the delta + for ((i = 1; i < ${#dts_times[@]}; i++)); do + delta=$(awk "BEGIN {printf \"%.6f\", ${dts_times[$i]} - ${dts_times[$i-1]}}" | sed -e 's/^0\./\./' -e 's/^-0\./-\./') + echo "$delta" >> $1_concat.delta + done + sort $1_concat.delta | uniq -c | sed 's/^ *//g' > $2 + } + + calc_delta bf deltas.out + calc_delta nobf deltas_nobf.out + calc_delta ffmpeg_sw ffmpeg_deltas.out + ` + + if accel == Nvidia { + cmd = cmd + ` + cat <<-EOF > expected_deltas.out + 1 -.133333 + 20 .066666 + 38 .066667 + EOF + ` + } else { + // for sw transcode, ffmpeg and lpms are exactly the same + cmd = cmd + ` + diff -u deltas.out ffmpeg_deltas.out + + cat <<-EOF > expected_deltas.out + 1 -.066666 + 20 .066666 + 38 .066667 + EOF + ` + } + + cmd = cmd + ` + diff -u expected_deltas.out deltas.out + + # no b-frames case + cat <<-EOF > expected_deltas_nobf.out + 20 .066666 + 39 .066667 + EOF + diff -u expected_deltas_nobf.out deltas_nobf.out + ` run(cmd) } @@ -795,11 +925,7 @@ func consecutiveMP4s(t *testing.T, accel Acceleration) { t.Error("Unexpected error ", err) continue } - expectedFrames := 60 - if i == 1 || i == 3 { - expectedFrames = 61 // TODO figure out why this is! - } - if res.Decoded.Frames != 120 || res.Encoded[0].Frames != expectedFrames { + if res.Decoded.Frames != 120 || res.Encoded[0].Frames != 60 { t.Error("Unexpected results ", i, inExt, outExt, res) } } @@ -1119,9 +1245,8 @@ func setGops(t *testing.T, accel Acceleration) { # intra checks with fixed fps. # sanity check number of packets vs keyframes - # TODO look into why lpms generates 91 frames instead of 100 - ffprobe -loglevel warning lpms_intra_10fps.ts -select_streams v -show_packets | grep flags= | wc -l | grep 91 - ffprobe -loglevel warning lpms_intra_10fps.ts -select_streams v -show_packets | grep flags=K | wc -l | grep 91 + ffprobe -loglevel warning lpms_intra_10fps.ts -select_streams v -show_packets | grep flags= | wc -l | grep 100 + ffprobe -loglevel warning lpms_intra_10fps.ts -select_streams v -show_packets | grep flags=K | wc -l | grep 100 ` run(cmd) diff --git a/ffmpeg/decoder.c b/ffmpeg/decoder.c index f8101de2d4..29823aee02 100755 --- a/ffmpeg/decoder.c +++ b/ffmpeg/decoder.c @@ -11,11 +11,43 @@ static int lpms_send_packet(struct input_ctx *ictx, AVCodecContext *dec, AVPacke return ret; } +static int64_t decoded_video_pts_step(struct input_ctx *ictx, AVFrame *frame) +{ + if (frame && frame->duration > 0) return frame->duration; + AVStream *vst = (ictx && ictx->ic && ictx->vi >= 0) ? ictx->ic->streams[ictx->vi] : NULL; + if (vst && vst->r_frame_rate.num > 0 && vst->r_frame_rate.den > 0) { + int64_t step = av_rescale_q(1, av_inv_q(vst->r_frame_rate), vst->time_base); + if (step > 0) return step; + } + return 1; +} + +// Fix malformed decode timestamps (missing/regressive PTS) so downstream stages +// receive a stable, non-AV_NOPTS_VALUE video timeline. +static void fix_video_pts(struct input_ctx *ictx, AVFrame *frame) +{ + int64_t pts = frame->pts; + int synthesized = 0; + if (pts == AV_NOPTS_VALUE) pts = frame->best_effort_timestamp; + if (pts == AV_NOPTS_VALUE) { + pts = decoded_video_pts_step(ictx, frame); + if (ictx->last_video_pts != AV_NOPTS_VALUE) pts += ictx->last_video_pts; + synthesized = 1; + } + if (ictx->last_video_pts != AV_NOPTS_VALUE && pts <= ictx->last_video_pts) { + int64_t step = synthesized ? decoded_video_pts_step(ictx, frame) : 1; + pts = ictx->last_video_pts + step; + } + frame->pts = pts; + ictx->last_video_pts = pts; +} + static int lpms_receive_frame(struct input_ctx *ictx, AVCodecContext *dec, AVFrame *frame) { int ret = avcodec_receive_frame(dec, frame); if (dec != ictx->vc) return ret; if (!ret && frame && !is_flush_frame(frame)) { + fix_video_pts(ictx, frame); ictx->pkt_diff--; // decrease buffer count for non-sentinel video frames if (ictx->flushing) ictx->sentinel_count = 0; } @@ -328,6 +360,7 @@ int open_input(input_params *params, struct input_ctx *ctx) if (!ctx->last_frame_v) LPMS_ERR(open_input_err, "Unable to alloc last_frame_v"); ctx->last_frame_a = av_frame_alloc(); if (!ctx->last_frame_a) LPMS_ERR(open_input_err, "Unable to alloc last_frame_a"); + ctx->last_video_pts = AV_NOPTS_VALUE; return 0; diff --git a/ffmpeg/decoder.h b/ffmpeg/decoder.h index ab82948aab..7ffde76882 100755 --- a/ffmpeg/decoder.h +++ b/ffmpeg/decoder.h @@ -36,6 +36,8 @@ struct input_ctx { #define SENTINEL_MAX 8 uint16_t sentinel_count; + int64_t last_video_pts; // Resets after each segment + // Packet held while decoder is blocked and needs to drain AVPacket *blocked_pkt; diff --git a/ffmpeg/encoder.c b/ffmpeg/encoder.c index 3d049e0cf5..2a06d4c3b2 100755 --- a/ffmpeg/encoder.c +++ b/ffmpeg/encoder.c @@ -48,6 +48,7 @@ static int add_video_stream(struct output_ctx *octx, struct input_ctx *ictx) } else LPMS_ERR(add_video_err, "No video encoder, not a copy; what is this?"); octx->last_video_dts = AV_NOPTS_VALUE; + octx->last_enc_pts = AV_NOPTS_VALUE; return 0; add_video_err: @@ -160,11 +161,9 @@ void close_output(struct output_ctx *octx) } if (octx->vc && octx->hw_type == AV_HWDEVICE_TYPE_NONE) avcodec_free_context(&octx->vc); if (octx->ac) avcodec_free_context(&octx->ac); + free_filter(&octx->vf); octx->af.flushed = octx->vf.flushed = 0; octx->af.flushing = octx->vf.flushing = 0; - octx->vf.pts_diff = INT64_MIN; - octx->vf.prev_frame_pts = 0; - octx->vf.segments_complete++; } void free_output(struct output_ctx *octx) @@ -473,6 +472,19 @@ int mux(AVPacket *pkt, AVRational tb, struct output_ctx *octx, AVStream *ost) av_packet_rescale_ts(pkt, tb, ost->time_base); } + /* Enable this if it seems we have issues + with the first and second segments overlapping due to bframes + See TestTranscoder_API_DTSOverlap + + int delay = av_rescale_q(10, (AVRational){1, 1}, ost->time_base); + if (pkt->dts != AV_NOPTS_VALUE) { + pkt->dts += delay; + } + if (pkt->pts != AV_NOPTS_VALUE) { + pkt->pts += delay; + } + */ + // drop any preroll audio. may need to drop multiple packets for multichannel // XXX this breaks if preroll isn't exactly one AVPacket or drop_ts == 0 // hasn't been a problem in practice (so far) @@ -508,8 +520,11 @@ int mux(AVPacket *pkt, AVRational tb, struct output_ctx *octx, AVStream *ost) pkt->pts = pkt->dts = pkt->pts + pkt->dts + octx->last_video_dts + 1 - FFMIN3(pkt->pts, pkt->dts, octx->last_video_dts + 1) - FFMAX3(pkt->pts, pkt->dts, octx->last_video_dts + 1); + } + // Match ffmpeg's mux behavior and clamp non-monotonic DTS separately, + // even when the packet did not trip the decoder's DTS > PTS repair path. + if (pkt->dts != AV_NOPTS_VALUE && octx->last_video_dts != AV_NOPTS_VALUE) { int64_t max = octx->last_video_dts + !(octx->oc->oformat->flags & AVFMT_TS_NONSTRICT); - // check if dts is bigger than previous last dts or not, not then that's non-monotonic if (pkt->dts < max) { if (pkt->pts >= pkt->dts) pkt->pts = FFMAX(pkt->pts, max); pkt->dts = max; @@ -544,13 +559,26 @@ int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext if (!encoder) LPMS_ERR(proc_cleanup, "Trying to transmux; not supported") + int is_video = (AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type); + int is_audio = (AVMEDIA_TYPE_AUDIO == ost->codecpar->codec_type); + + if (is_video && filter && !filter->active && inf) { + ret = init_video_filters(ictx, octx, inf); + if (ret < 0) LPMS_ERR(proc_cleanup, "Unable to initialize video filter"); + } + if (!filter || !filter->active) { + // Don't call encode if nothing has been sent to CUDA yet (via filter + // lazy init) because it may cause odd interactions with flushing + if (is_video && !inf && + octx->hw_type > AV_HWDEVICE_TYPE_NONE && + AV_HWDEVICE_TYPE_MEDIACODEC != octx->hw_type) { + return AVERROR_EOF; + } // No filter in between decoder and encoder, so use input frame directly return encode(encoder, inf, octx, ost); } - int is_video = (AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type); - int is_audio = (AVMEDIA_TYPE_AUDIO == ost->codecpar->codec_type); ret = filtergraph_write(inf, ictx, octx, filter, is_video); if (ret < 0) goto proc_cleanup; @@ -620,10 +648,20 @@ int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext if (frame) { // rescale pts to match encoder timebase if necessary (eg, fps passthrough) AVRational filter_tb = av_buffersink_get_time_base(filter->sink_ctx); - if (av_cmp_q(filter_tb, encoder->time_base)) { + int pts_rescaled = av_cmp_q(filter_tb, encoder->time_base); + if (pts_rescaled) { frame->pts = av_rescale_q(frame->pts, filter_tb, encoder->time_base); // TODO does frame->duration needs to be rescaled too? } + // Handle timebase conversion collapsing adjacent PTS into the same encoder tick + if (is_video && pts_rescaled) { + if (octx->last_enc_pts != AV_NOPTS_VALUE && frame->pts <= octx->last_enc_pts) { + frame->pts = octx->last_enc_pts + 1; + AVRational ftb = av_buffersink_get_time_base(filter->sink_ctx); + frame->opaque = (void *)av_rescale_q(frame->pts, encoder->time_base, ftb); + } + octx->last_enc_pts = frame->pts; + } } // Check for runaway encodes where the FPS filter produces too many frames diff --git a/ffmpeg/ffmpeg.go b/ffmpeg/ffmpeg.go index 04c6fb93bf..38410203c0 100755 --- a/ffmpeg/ffmpeg.go +++ b/ffmpeg/ffmpeg.go @@ -912,6 +912,14 @@ func (t *Transcoder) Transcode(input *TranscodeOptionsIn, ps []TranscodeOptions) } } } + if format.Format == "mpegts" && format.Vcodec == "h264" { + if fixedPath, fixErr := FixMisplacedSEI(input.Fname); fixErr != nil { + glog.Warningf("SEI fix-up check failed for %s: %v", input.Fname, fixErr) + } else if fixedPath != input.Fname { + defer os.Remove(fixedPath) + input.Fname = fixedPath + } + } } hw_type, err := accelDeviceType(input.Accel) if err != nil { diff --git a/ffmpeg/ffmpeg_test.go b/ffmpeg/ffmpeg_test.go index f3896a9226..9ff502eaca 100644 --- a/ffmpeg/ffmpeg_test.go +++ b/ffmpeg/ffmpeg_test.go @@ -494,16 +494,17 @@ func TestTranscoder_Statistics_Encoded(t *testing.T) { t.Error("Mismatched pixel counts") } // Since this is a 1-second input we should ideally have count of frames - if r.Frames != int(out[i].Profile.Framerate+1) { - + if r.Frames == int(out[i].Profile.Framerate) { + // cool all good + } else { // Some "special" cases (already have test cases covering these) if p144p60fps == out[i].Profile { if r.Frames != int(out[i].Profile.Framerate)+1 { t.Error("Mismatched frame counts for 60fps; expected 61 frames but got ", r.Frames) } } else if podd123fps == out[i].Profile { - if r.Frames != 124 { - t.Error("Mismatched frame counts for 123fps; expected 124 frames but got ", r.Frames) + if r.Frames != 125 { + t.Error("Mismatched frame counts for 123fps; expected 125 frames but got ", r.Frames) } } else { t.Error("Mismatched frame counts ", r.Frames, out[i].Profile.Framerate) @@ -560,7 +561,7 @@ func TestTranscoder_StatisticsAspectRatio(t *testing.T) { t.Error(err) } r := res.Encoded[0] - if r.Frames != int(pAdj.Framerate+1) || r.Pixels != int64(r.Frames*146*82) { + if r.Frames != int(pAdj.Framerate) || r.Pixels != int64(r.Frames*146*82) { t.Error(fmt.Errorf("Results did not match: %v ", r)) } } @@ -852,7 +853,7 @@ func TestTranscoder_StreamCopy(t *testing.T) { if err != nil { t.Error(err) } - if res.Decoded.Frames != 60 || res.Encoded[0].Frames != 31 || + if res.Decoded.Frames != 60 || res.Encoded[0].Frames != 30 || res.Encoded[1].Frames != 0 { t.Error("Unexpected frame counts from stream copy") t.Error(res) @@ -996,7 +997,7 @@ func TestTranscoder_Drop(t *testing.T) { if err != nil { t.Error(err) } - if res.Decoded.Frames != 60 || res.Encoded[0].Frames != 31 { + if res.Decoded.Frames != 60 || res.Encoded[0].Frames != 30 { t.Error("Unexpected count of decoded frames ", res.Decoded.Frames, res.Decoded.Pixels) } @@ -1028,7 +1029,7 @@ func TestTranscoder_Drop(t *testing.T) { if err != nil { t.Error(err) } - if res.Decoded.Frames != 31 || res.Encoded[0].Frames != 31 { + if res.Decoded.Frames != 30 || res.Encoded[0].Frames != 30 { t.Error("Unexpected encoded/decoded frame counts ", res.Decoded.Frames, res.Encoded[0].Frames) } in.Fname = dir + "/novideo.ts" @@ -1224,7 +1225,7 @@ func TestTranscoder_RepeatedTranscodes(t *testing.T) { in = &TranscodeOptionsIn{Fname: dir + "/test-short-with-audio.ts"} out = []TranscodeOptions{{Oname: dir + "/audio-0.ts", Profile: P144p30fps16x9}} res, err = Transcode3(in, out) - if err != nil || res.Decoded.Frames != 60 || res.Encoded[0].Frames != 31 { + if err != nil || res.Decoded.Frames != 60 || res.Encoded[0].Frames != 30 { t.Error("Unexpected preconditions ", err, res) } frames = res.Encoded[0].Frames @@ -1399,7 +1400,7 @@ nb_read_frames=%d b.Flush() // Run a ffmpeg command that attempts to match the given encode settings - run(fmt.Sprintf(`ffmpeg -loglevel warning -hide_banner -i %s -vsync passthrough -c:a aac -ar 44100 -ac 2 -c:v libx264 -vf fps=%d/1:eof_action=pass,scale=%dx%d -copyts -muxdelay 0 -y ffmpeg.ts`, in.Fname, out.Profile.Framerate, w, h)) + run(fmt.Sprintf(`ffmpeg -loglevel warning -hide_banner -i %s -vsync passthrough -c:a aac -ar 44100 -ac 2 -c:v libx264 -vf fps=%d/1,scale=%dx%d -copyts -muxdelay 0 -y ffmpeg.ts`, in.Fname, out.Profile.Framerate, w, h)) // Gather some ffprobe stats on the output of the above ffmpeg command run(`ffprobe -loglevel warning -hide_banner -count_frames -select_streams v -show_streams 2>&1 ffmpeg.ts | grep '^width=\|^height=\|nb_read_frames=' > ffmpeg.stats`) @@ -1439,7 +1440,7 @@ nb_read_frames=%d if err != nil { t.Error(err) } - if res.Encoded[0].Frames != 31 { + if res.Encoded[0].Frames != 30 { t.Error("Did not get expected frame count ", res.Encoded[0].Frames) } checkStatsFile(in, &out[0], res) @@ -1465,10 +1466,10 @@ nb_read_frames=%d if err != nil { t.Error(err) } - if res.Encoded[0].Frames != 124 { // TODO Find out why this isn't 126 (ffmpeg) + if res.Encoded[0].Frames != 125 { // TODO Find out why this isn't 126 (ffmpeg) t.Error("Did not get expected frame count ", res.Encoded[0].Frames) } - // checkStatsFile(in, &out[0], res) // TODO framecounts don't match ffmpeg + checkStatsFile(in, &out[0], res) } func TestTranscoder_PassthroughFPS(t *testing.T) { @@ -2065,9 +2066,10 @@ PTS_EOF // check output cmd = ` # reproduce expected lpms output using ffmpeg - ffmpeg -debug_ts -loglevel trace -i in.ts -vf 'scale=136x240,fps=30/1:eof_action=pass' -c:v libx264 -copyts -muxdelay 0 out-ffmpeg.ts + ffmpeg -debug_ts -loglevel trace -i in.ts -vf 'scale=136x240,fps=30/1' -c:v libx264 -copyts -muxdelay 0 out-ffmpeg.ts - ffprobe -show_packets out-ffmpeg.ts | grep dts= > ffmpeg-dts.out + # ffmpeg produces one more packet than lpms in this case so just trim that out + ffprobe -show_packets out-ffmpeg.ts | grep dts= | head -n -1 > ffmpeg-dts.out ffprobe -show_packets out0in.ts | grep dts= > lpms-dts.out diff -u lpms-dts.out ffmpeg-dts.out @@ -2223,7 +2225,7 @@ func runRotationTests(t *testing.T, accel Acceleration) { require.NoError(t, err) assert.Equal(t, 360, res.Decoded.Frames) - assert.Equal(t, 181, res.Encoded[0].Frames) // should be 180 ... ts rounding ? + assert.Equal(t, 180, res.Encoded[0].Frames) assert.Equal(t, 360, res.Encoded[1].Frames) // TODO test rollover of gop interval during flush @@ -2256,7 +2258,7 @@ func runRotationTests(t *testing.T, accel Acceleration) { cat <<-EOF2 > expected-30fps.dims 60 256,144 60 146,260 - 61 256,144 + 60 256,144 EOF2 diff -u expected.dims out.dims @@ -2341,7 +2343,7 @@ func runRotationTests(t *testing.T, accel Acceleration) { require.NoError(t, err) assert.Equal(t, 240, res.Decoded.Frames) - assert.Equal(t, 121, res.Encoded[0].Frames) // should be 120 ... ts rounding ? + assert.Equal(t, 120, res.Encoded[0].Frames) assert.Equal(t, 240, res.Encoded[1].Frames) cmd = ` @@ -2358,7 +2360,7 @@ func runRotationTests(t *testing.T, accel Acceleration) { cat <<-EOF2 > single-expected-30fps.dims 60 256,144 - 61 146,260 + 60 146,260 EOF2 diff -u single-expected.dims single-out.dims @@ -2434,10 +2436,20 @@ func TestTranscoder_PNGDemuxerOpts(t *testing.T) { // so test those run, dir := setupTest(t) defer os.RemoveAll(dir) + // generate 3 png frames cmd := ` ffmpeg -i $1/../transcoder/test.ts -an -frames:v 3 test-%d.png + + # Run an ffmpeg CLI equivalent for this PNG demuxer path. + ffmpeg -framerate 1/3 -f image2 -i test-%d.png -an \ + -vf "fps=30/1,scale=256:144" -c:v libx264 -pix_fmt yuv420p \ + -f mpegts -y ffmpeg-equivalent.ts + + # Check packet count from ffprobe. + ffprobe -v warning -count_packets -show_streams -select_streams v ffmpeg-equivalent.ts | grep nb_read_packets=270 ` run(cmd) + // 1 frame every 3 seconds, with 3 frames == 9 seconds res, err := Transcode3(&TranscodeOptionsIn{ Fname: dir + "/test-%d.png", Profile: VideoProfile{ @@ -2450,7 +2462,7 @@ func TestTranscoder_PNGDemuxerOpts(t *testing.T) { }}) assert.Nil(t, err) assert.Equal(t, 3, res.Decoded.Frames) - assert.Equal(t, 180, res.Encoded[0].Frames) + assert.Equal(t, 270, res.Encoded[0].Frames) // 9 seconds * 30fps == 270 } func TestTranscode_DurationLimit(t *testing.T) { @@ -2531,8 +2543,8 @@ func TestTranscoder_LargeOutputs(t *testing.T) { close(closeCh) assert.Nil(err) assert.Equal(120, res.Decoded.Frames) - assert.Equal(116, res.Encoded[0].Frames) // ffmpeg probably drops missing timestamp frames - assert.Equal(56, res.Encoded[1].Frames) + assert.Equal(120, res.Encoded[0].Frames) // passthrough + assert.Equal(60, res.Encoded[1].Frames) // 30fps, 2 second input cmd := ` # check input properties to ensure they still have the weird timestamps ffprobe -of csv -hide_banner -show_entries frame=pts_time,pkt_dts_time,media_type,pict_type $1/../data/missing-dts.ts 2>&1 | grep video > input.out @@ -2663,8 +2675,6 @@ func TestTranscoder_LargeOutputs(t *testing.T) { # check output - ls -lha - #ffprobe -of csv -hide_banner -show_entries frame=pts_time,pkt_dts_time,media_type,pict_type out-30fps.ts ffprobe -of csv -hide_banner -show_entries frame=pts_time,pkt_dts_time,media_type,pict_type out-30fps.ts 2>&1 | grep video > output.out cat <<- 'EOF2' > expected-output.out frame,video,25994.033333,25994.033333,I, @@ -2684,10 +2694,11 @@ func TestTranscoder_LargeOutputs(t *testing.T) { frame,video,25994.500000,25994.500000,B frame,video,25994.533333,25994.533333,B frame,video,25994.566667,25994.566667,B - frame,video,25994.600000,25994.600000,P - frame,video,25994.666667,25994.666667,P, + frame,video,25994.600000,25994.600000,P, + frame,video,25994.633333,25994.633333,P, + frame,video,25994.666667,25994.666667,B, frame,video,25994.700000,25994.700000,B, - frame,video,25994.733333,25994.733333,B, + frame,video,25994.733333,25994.733333,P, frame,video,25994.766667,25994.766667,B, frame,video,25994.800000,25994.800000,P, frame,video,25994.833333,25994.833333,B, @@ -2698,33 +2709,281 @@ func TestTranscoder_LargeOutputs(t *testing.T) { frame,video,25995.000000,25995.000000,P, frame,video,25995.033333,25995.033333,B, frame,video,25995.066667,25995.066667,B, + frame,video,25995.100000,25995.100000,P, frame,video,25995.133333,25995.133333,B, frame,video,25995.166667,25995.166667,P, frame,video,25995.200000,25995.200000,B, + frame,video,25995.233333,25995.233333,B, frame,video,25995.266667,25995.266667,B, - frame,video,25995.300000,25995.300000,B, - frame,video,25995.333333,25995.333333,P, + frame,video,25995.300000,25995.300000,P, + frame,video,25995.333333,25995.333333,B, frame,video,25995.366667,25995.366667,B, frame,video,25995.400000,25995.400000,B, - frame,video,25995.433333,25995.433333,B, - frame,video,25995.466667,25995.466667,P, + frame,video,25995.433333,25995.433333,P, + frame,video,25995.466667,25995.466667,B, frame,video,25995.500000,25995.500000,B, frame,video,25995.533333,25995.533333,B, - frame,video,25995.566667,25995.566667,B, - frame,video,25995.600000,25995.600000,P, + frame,video,25995.566667,25995.566667,P, + frame,video,25995.600000,25995.600000,B, frame,video,25995.633333,25995.633333,B, frame,video,25995.666667,25995.666667,B, + frame,video,25995.700000,25995.700000,P, frame,video,25995.733333,25995.733333,B, - frame,video,25995.766667,25995.766667,P, + frame,video,25995.766667,25995.766667,B, frame,video,25995.800000,25995.800000,B, - frame,video,25995.833333,25995.833333,B, + frame,video,25995.833333,25995.833333,P, frame,video,25995.866667,25995.866667,B, - frame,video,25995.900000,25995.900000,P, + frame,video,25995.900000,25995.900000,B, frame,video,25995.933333,25995.933333,B, - frame,video,25995.966667,N/A,B, + frame,video,25995.966667,N/A,P, frame,video,25996.000000,N/A,P, EOF2 diff -u expected-output.out output.out ` assert.True(run(cmd)) } + +func TestTranscoder_NOPTS_SkipSegment(t *testing.T) { + + // This test case exercises 2 samples that have produced extremely large + // outputs due to mis-placed SEI resulting in the H.264 parser omitting + // timing information. When a segment is skipped + // (eg, jumping from segment 1 -> 3), this led to a PTS underflow + // in the FPS filter (AV_NOPTS_VALUE is a large negative number), causing + // millions of frames to be output. This is fixed at several layers now + // but transcoding the samples here would trigger exactly that problem. + + run, dir := setupTest(t) + defer os.RemoveAll(dir) + tc := NewTranscoder() + defer tc.StopTranscoder() + segs := []int{1, 3} + + // Double check that the inputs have a broken 'shape' + // which is a SEI that comes late after a picture NAL + // that results in N/A timestamp and position values (AV_NOPTS_VALUE) + // this behavior comes from deep inside the ffmpeg h264 parser + // + // Frame 6 of each segment has N/A pts; frames 15-21 of skip_1 + // show non-monotonic PTS from B-frame reordering. + cmd := ` + cat <<- 'EOF' > expected-skip.out + ==> skip_1.begin <== + 12.844767,N/A,1156029,I + 12.878133,12.878133,1159032,B + 12.911500,12.911500,1162035,B + 12.944867,12.944867,1165038,B + 12.978233,12.978233,1168041,B + N/A,13.011600,1171044,P + 13.044967,13.044967,1174047,B + 13.078333,13.078333,1177050,B + + ==> skip_1.mid <== + 13.478733,13.311900,1213086,B + 13.311900,13.345267,1201074,P + 13.378633,13.378633,1204077,B + 13.412000,13.412000,1207080,B + 13.445367,13.445367,1210083,B + 13.645567,13.478733,1213086,B + 13.345267,13.512100,1216089,P + + ==> skip_1.end <== + 18.483733,18.450367,1660533,B + 18.517100,18.483733,1663536,B + 18.350267,N/A,1651524,P + + ==> skip_3.begin <== + 24.256167,N/A,2183055,I + 24.289533,24.289533,2186058,B + 24.322900,24.322900,2189061,B + 24.356267,24.356267,2192064,B + 24.389633,24.389633,2195067,B + N/A,24.423000,2198070,P + 24.456367,24.456367,2201073,B + 24.489733,24.489733,2204076,B + + ==> skip_3.mid <== + 24.723300,24.723300,2225097,B + 24.756667,24.756667,2228100,P + 24.790033,24.790033,2231103,B + 24.823400,24.823400,2234106,B + 24.856767,24.856767,2237109,B + 24.890133,24.890133,2240112,B + 24.923500,24.923500,2243115,P + + ==> skip_3.end <== + 29.861767,29.861767,2687559,B + 29.895133,29.895133,2690562,B + 29.928500,N/A,2693565,P + EOF + + for i in 1 3 + do + name="skip_$i" + ffprobe -loglevel warning -select_streams v:0 \ + -show_entries frame=pts_time,pkt_dts_time,best_effort_timestamp,pict_type \ + -of csv=p=0 "$1/../data/${name}.ts" | sed '/^$/d; s/,*$//g' > "$name.frames" + head -n 8 "$name.frames" > "$name.begin" + sed -n '15,21p' "$name.frames" > "$name.mid" + tail -n 3 "$name.frames" > "$name.end" + tail -n +1 "$name.begin" "$name.mid" "$name.end" >> skip.out + [ "$i" = 1 ] && printf '\n' >> skip.out + done + diff -u expected-skip.out skip.out + ` + require.True(t, run(cmd), "unable to verify input; ffmpeg behavior may have changed") + + passthrough := P240p30fps16x9 + passthrough.Framerate = 0 + + for _, i := range segs { + in := &TranscodeOptionsIn{ + Fname: fmt.Sprintf("../data/skip_%d.ts", i), + } + out := []TranscodeOptions{{ + Oname: "-", + Profile: P240p30fps16x9, + AudioEncoder: ComponentOptions{Name: "copy"}, + Muxer: ComponentOptions{Name: "null"}, + }, { + Oname: fmt.Sprintf("%s/out-%d-pass.ts", dir, i), + Profile: passthrough, + AudioEncoder: ComponentOptions{Name: "copy"}, + }} + res, err := tc.Transcode(in, out) + require.Nil(t, err) + assert := assert.New(t) + assert.Equal(171, res.Decoded.Frames) + assert.Equal(171, res.Encoded[1].Frames, "passthrough output frame count for segment %d", i) + if i == 0 { + assert.Equal(172, res.Encoded[1].Frames) // unclear why; ts rounding? + } else { + assert.Equal(171, res.Encoded[1].Frames) + } + } + cmd = ` + cat <<- 'EOF' > expected-out-pass.out + ==> out-1-pass.begin <== + 12.844767,12.778033,0.033367,K__ + 12.978233,12.811400,0.033367,___ + 12.911500,12.844767,0.033367,___ + 12.878133,12.878133,0.033367,___ + 12.944867,12.911500,0.033367,___ + 13.111700,12.944867,0.033367,___ + 13.044967,12.978233,0.033367,___ + 13.011600,13.011600,0.033367,___ + + ==> out-1-pass.end <== + 18.385033,18.385033,0.033367,___ + 18.450367,18.417000,0.033367,___ + 18.618600,18.451767,0.033367,___ + 18.551867,18.485133,0.033367,___ + 18.518500,18.518500,0.033367,___ + 18.585233,18.551867,0.033367,___ + 18.685333,18.585233,0.033367,___ + 18.651967,18.618600,0.033367,___ + + ==> out-3-pass.begin <== + 24.256167,24.189433,0.033367,K__ + 24.389633,24.222800,0.033367,___ + 24.322900,24.256167,0.033367,___ + 24.289533,24.289533,0.033367,___ + 24.356267,24.322900,0.033367,___ + 24.523100,24.356267,0.033367,___ + 24.456367,24.389633,0.033367,___ + 24.423000,24.423000,0.033367,___ + + ==> out-3-pass.end <== + 29.661567,29.628200,0.033367,___ + 29.828400,29.661567,0.033367,___ + 29.761667,29.694933,0.033367,___ + 29.728300,29.728300,0.033367,___ + 29.795033,29.761667,0.033367,___ + 29.928500,29.795033,0.033367,___ + 29.861767,29.828400,0.033367,___ + 29.895133,29.861767,0.033367,___ + EOF + + for i in 1 3 + do + ffprobe -loglevel warning -select_streams v:0 -show_entries packet=pts_time,dts_time,duration_time,flags -of csv=p=0 "out-$i-pass.ts" | sed '/^$/d; s/,*$//g' > "out-$i-pass.packets" + head -n 8 "out-$i-pass.packets" > "out-$i-pass.begin" + tail -n 8 "out-$i-pass.packets" > "out-$i-pass.end" + tail -n +1 "out-$i-pass.begin" "out-$i-pass.end" >> out-pass.out + [ "$i" = 1 ] && printf '\n' >> out-pass.out + done + diff -u expected-out-pass.out out-pass.out + ` + assert.True(t, run(cmd)) +} + +func TestTranscoder_NOPTS_MissingSEIAndPES(t *testing.T) { + // This test case is constructed such that the H.264 parser can't produce + // timestamps (missing SEI timing information) but ensure that LPMS can + // still derive produce timestamps for that. + run, dir := setupTest(t) + defer os.RemoveAll(dir) + + cmd := ` + cat <<- 'EOF' > expected-input.out + 0.200000,0.000000,0.100000,K__ + 0.500000,0.100000,0.100000,___ + 0.300000,0.200000,0.100000,___ + 0.400000,0.300000,0.100000,___ + N/A,N/A,0.100000,___ + N/A,N/A,0.100000,___ + N/A,N/A,0.100000,___ + N/A,N/A,0.100000,___ + 0.900000,0.800000,0.100000,___ + 1.000000,0.900000,0.100000,___ + 1.300000,1.000000,0.100000,___ + 1.200000,1.100000,0.100000,___ + EOF + + ffprobe -loglevel warning -select_streams v:0 \ + -show_entries packet=pts_time,dts_time,duration_time,flags -of csv=p=0 \ + "$1/../data/missing-sei-and-pes.ts" | sed '/^$/d; s/,*$//g' > input.out + diff -u expected-input.out input.out + ` + require.True(t, run(cmd), "unable to verify input; ffmpeg behavior may have changed") + + passthrough := P240p30fps16x9 + passthrough.Framerate = 0 + + in := &TranscodeOptionsIn{ + Fname: "../data/missing-sei-and-pes.ts", + } + out := []TranscodeOptions{{ + Oname: fmt.Sprintf("%s/out.ts", dir), + Profile: passthrough, + AudioEncoder: ComponentOptions{Name: "drop"}, + }} + res, err := Transcode3(in, out) + require.Nil(t, err) + assert := assert.New(t) + assert.Equal(12, res.Decoded.Frames) + assert.Equal(12, res.Encoded[0].Frames) + + cmd = ` + cat <<- 'EOF' > expected-output.out + 0.200000,0.000000,0.100000,K__ + 0.600000,0.100000,0.100000,___ + 0.400000,0.200000,0.100000,___ + 0.300000,0.300000,0.100000,___ + 0.500000,0.400000,0.100000,___ + 1.000000,0.500000,0.100000,___ + 0.800000,0.600000,0.100000,___ + 0.700000,0.700000,0.100000,___ + 0.900000,0.800000,0.100000,___ + 1.300000,0.900000,0.100000,___ + 1.100000,1.000000,0.100000,___ + 1.200000,1.100000,0.100000,___ + EOF + + ffprobe -loglevel warning -select_streams v:0 \ + -show_entries packet=pts_time,dts_time,duration_time,flags -of csv=p=0 \ + out.ts | sed '/^$/d; s/,*$//g' > output.out + diff -u expected-output.out output.out + ` + require.True(t, run(cmd), "Unable to verify output, LPMS behavior may have changed") +} diff --git a/ffmpeg/filter.c b/ffmpeg/filter.c index bb0cf93f4a..7ac495130d 100644 --- a/ffmpeg/filter.c +++ b/ffmpeg/filter.c @@ -70,7 +70,6 @@ int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx, AVFrame if (vf->graph == NULL) { vf->graph = avfilter_graph_alloc(); } - vf->pts_diff = INT64_MIN; if (!outputs || !inputs || !vf->graph) { ret = AVERROR(ENOMEM); LPMS_ERR(vf_init_cleanup, "Unable to allocate filters"); @@ -125,6 +124,7 @@ int init_video_filters(struct input_ctx *ictx, struct output_ctx *octx, AVFrame if (!vf->frame) LPMS_ERR(vf_init_cleanup, "Unable to allocate video frame"); vf->active = 1; + vf->closed = 0; vf_init_cleanup: avfilter_inout_free(&inputs); @@ -223,7 +223,6 @@ int init_signature_filters(struct output_ctx *octx, AVFrame *inf) outputs = avfilter_inout_alloc(); inputs = avfilter_inout_alloc(); sf->graph = avfilter_graph_alloc(); - sf->pts_diff = INT64_MIN; if (!outputs || !inputs || !sf->graph) { ret = AVERROR(ENOMEM); LPMS_ERR(sf_init_cleanup, "Unable to allocate filters"); @@ -284,6 +283,7 @@ int init_signature_filters(struct output_ctx *octx, AVFrame *inf) int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *octx, struct filter_ctx *filter, int is_video) { + if (filter->closed) return 0; int ret = 0; // We have to reset the filter because we initially set the filter // before the decoder is fully ready, and the decoder may change HW params @@ -335,21 +335,15 @@ int filtergraph_write(AVFrame *inf, struct input_ctx *ictx, struct output_ctx *o AVStream *vst = ictx->ic->streams[ictx->vi]; if (inf) { // Non-Flush Frame inf->opaque = (void *) inf->pts; // Store original PTS for calc later - if (is_video && octx->fps.den) { - // Custom PTS set when FPS filter is used - int64_t ts_step = inf->pts - filter->prev_frame_pts; - if (filter->segments_complete && !filter->prev_frame_pts) { - // We are on the first frame of the second (or later) segment - // So in this case just increment the pts by 1/fps - ts_step = av_rescale_q_rnd(1, av_inv_q(octx->fps), vst->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); - } - filter->custom_pts += ts_step; - filter->prev_frame_pts = inf->pts; - } else { - // FPS Passthrough or Audio case - filter->custom_pts = inf->pts; - } + filter->custom_pts = inf->pts; } else if (!filter->flushed) { // Flush Frame + // close filter right away if we already have some frames + if (octx->res->frames) { + filter->closed = 1; + return av_buffersrc_write_frame(filter->src_ctx, NULL); + } + // we don't have frames yet so flush the filter + // needed for extremely short or low-fps content int64_t ts_step; inf = (is_video) ? ictx->last_frame_v : ictx->last_frame_a; inf->opaque = (void *) (INT64_MIN); // Store INT64_MIN as pts for flush frames @@ -392,17 +386,6 @@ int filtergraph_read(struct input_ctx *ictx, struct output_ctx *octx, struct fil // don't set flushed flag in case this is a flush from a previous segment if (filter->flushing) filter->flushed = 1; ret = lpms_ERR_FILTER_FLUSHED; - } else if (frame && is_video && octx->fps.den) { - // TODO why limit to fps filter? what about non-fps filtergraphs, eg scale? - // We set custom PTS as an input of the filtergraph so we need to - // re-calculate our output PTS before passing it on to the encoder - if (filter->pts_diff == INT64_MIN) { - int64_t pts = (int64_t)frame->opaque; // original input PTS - pts = av_rescale_q_rnd(pts, ictx->ic->streams[ictx->vi]->time_base, av_buffersink_get_time_base(filter->sink_ctx), AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); - // difference between rescaled input PTS and the segment's first frame PTS of the filtergraph output - filter->pts_diff = pts - frame->pts; - } - frame->pts += filter->pts_diff; // Re-calculate by adding back this segment's difference calculated at start } fg_read_cleanup: return ret; diff --git a/ffmpeg/filter.h b/ffmpeg/filter.h index 026a8ada8b..7a6984caa3 100755 --- a/ffmpeg/filter.h +++ b/ffmpeg/filter.h @@ -22,19 +22,6 @@ struct filter_ctx { // uniformly and monotonically increasing. int64_t custom_pts; - // Previous PTS to be used to manually calculate duration for custom_pts - int64_t prev_frame_pts; - - // Count of complete segments that have been processed by this filtergraph - int segments_complete; - - // We need to update the post-filtergraph PTS before sending the frame for - // encoding because we modified the input PTS. - // We do this by calculating the difference between our custom PTS and actual - // PTS for the first-frame of every segment, and then applying this diff to - // every subsequent frame in the segment. - int64_t pts_diff; - // When draining the filtergraph, we inject fake frames. // These frames have monotonically increasing timestamps at the same interval // as a normal stream of frames. The custom_pts is set to more than usual jump @@ -43,6 +30,8 @@ struct filter_ctx { // We mark this boolean as flushed when done flushing. int flushed; int flushing; + + int closed; }; struct output_ctx { @@ -73,6 +62,7 @@ struct output_ctx { int64_t last_audio_dts; //dts of the last audio packet sent to the muxer int64_t last_video_dts; //dts of the last video packet sent to the muxer + int64_t last_enc_pts; // last pts sent to the video encoder (in encoder timebase) int64_t gop_time, gop_pts_len, next_kf_pts; // for gop reset diff --git a/ffmpeg/nvidia_test.go b/ffmpeg/nvidia_test.go index d53826499d..0a7df8d2ea 100755 --- a/ffmpeg/nvidia_test.go +++ b/ffmpeg/nvidia_test.go @@ -423,12 +423,10 @@ func TestNvidia_DrainFilters(t *testing.T) { # sanity check with ffmpeg itself ffmpeg -loglevel warning -i test.ts -c:a copy -c:v libx264 -vf fps=100 -vsync 0 ffmpeg-out.ts - ffprobe -loglevel warning -show_streams -select_streams v -count_frames ffmpeg-out.ts > ffmpeg.out - ffprobe -loglevel warning -show_streams -select_streams v -count_frames out.ts > probe.out + ffprobe -loglevel warning -select_streams v -count_frames -show_entries stream=nb_read_frames,time_base,duration,r_frame_rate,avg_frame_rate,start_pts,start_time,duration_ts,duration ffmpeg-out.ts > ffmpeg.out + ffprobe -loglevel warning -select_streams v -count_frames -show_entries stream=nb_read_frames,time_base,duration,r_frame_rate,avg_frame_rate,start_pts,start_time,duration_ts,duration out.ts > probe.out - # These used to be same, but aren't since we've diverged the flushing and PTS handling from ffmpeg - grep nb_read_frames=101 probe.out - grep duration=1.0100 probe.out + diff -u ffmpeg.out probe.out grep nb_read_frames=102 ffmpeg.out grep duration=1.0200 ffmpeg.out ` @@ -559,9 +557,9 @@ func TestNvidia_API_MixedOutput(t *testing.T) { # sanity check ffmpeg frame count against ours ffprobe -count_frames -show_streams -select_streams v ffmpeg_nv_$1.ts | grep nb_read_frames=246 - ffprobe -count_frames -show_streams -select_streams v nv_$1.ts | grep nb_read_frames=245 - ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=245 - ffprobe -count_frames -show_streams -select_streams v nv_audio_encode_$1.ts | grep nb_read_frames=245 + ffprobe -count_frames -show_streams -select_streams v nv_$1.ts | grep nb_read_frames=246 + ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=246 + ffprobe -count_frames -show_streams -select_streams v nv_audio_encode_$1.ts | grep nb_read_frames=246 # check image quality ffmpeg -loglevel warning -i nv_$1.ts -i ffmpeg_nv_$1.ts \ @@ -601,6 +599,7 @@ func TestNvidia_API_AlternatingTimestamps(t *testing.T) { profile := P144p30fps16x9 profile.Framerate = 123 tc := NewTranscoder() + defer tc.StopTranscoder() idx := []int{1, 0, 3, 2} for _, i := range idx { // TODO this breaks with nvidia acceleration on the input! @@ -624,9 +623,6 @@ func TestNvidia_API_AlternatingTimestamps(t *testing.T) { Profile: profile, }} res, err := tc.Transcode(in, out) - if (i == 1 || i == 3) && err != nil { - t.Error(err) - } if err != nil { t.Error(err) } @@ -651,9 +647,9 @@ func TestNvidia_API_AlternatingTimestamps(t *testing.T) { # sanity check ffmpeg frame count against ours ffprobe -count_frames -show_streams -select_streams v ffmpeg_nv_$1.ts | grep nb_read_frames=246 - ffprobe -count_frames -show_streams -select_streams v nv_$1.ts | grep nb_read_frames=245 - ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=245 - ffprobe -count_frames -show_streams -select_streams v nv_audio_encode_$1.ts | grep nb_read_frames=245 + ffprobe -count_frames -show_streams -select_streams v nv_$1.ts | grep nb_read_frames=246 + ffprobe -count_frames -show_streams -select_streams v sw_$1.ts | grep nb_read_frames=246 + ffprobe -count_frames -show_streams -select_streams v nv_audio_encode_$1.ts | grep nb_read_frames=246 # check image quality ffmpeg -loglevel warning -i nv_$1.ts -i ffmpeg_nv_$1.ts \ @@ -674,11 +670,13 @@ func TestNvidia_API_AlternatingTimestamps(t *testing.T) { check 1 check 2 check 3 - ` + ` run(cmd) - tc.StopTranscoder() } +func TestNvidia_DTSOverlap(t *testing.T) { + dtsOverlap(t, Nvidia) +} func TestNvidia_ShortSegments(t *testing.T) { shortSegments(t, Nvidia, 1) shortSegments(t, Nvidia, 2) diff --git a/ffmpeg/sei_fixup.go b/ffmpeg/sei_fixup.go new file mode 100644 index 0000000000..1587b04106 --- /dev/null +++ b/ffmpeg/sei_fixup.go @@ -0,0 +1,341 @@ +package ffmpeg + +import ( + "io/ioutil" + "os" + "path/filepath" + + "github.com/livepeer/joy4/format/ts/tsio" +) + +const ( + tsPacketSize = 188 + invalidPID uint16 = 0x1fff +) + +type byteRange struct { + start int + end int +} + +type nalInfo struct { + start int + end int + typ uint8 +} + +// FixMisplacedSEI rewrites a TS segment into a temp file when SEI NAL units are +// found after VCL NAL units within an access unit. If no fix is needed, it +// returns the original input path. +func FixMisplacedSEI(inputPath string) (fixedPath string, err error) { + data, err := ioutil.ReadFile(inputPath) + if err != nil { + return "", err + } + fixedData, changed := fixSEIOrder(data) + if !changed { + return inputPath, nil + } + + dir := filepath.Dir(inputPath) + tmp, err := ioutil.TempFile(dir, "sei-fixup-*.ts") + if err != nil { + return "", err + } + tmpPath := tmp.Name() + if _, err := tmp.Write(fixedData); err != nil { + tmp.Close() + os.Remove(tmpPath) + return "", err + } + if err := tmp.Close(); err != nil { + os.Remove(tmpPath) + return "", err + } + return tmpPath, nil +} + +func fixSEIOrder(data []byte) ([]byte, bool) { + if len(data) < tsPacketSize { + return data, false + } + videoPID := findVideoPID(data) + if videoPID == invalidPID { + return data, false + } + + result := make([]byte, len(data)) + copy(result, data) + + var allPayload []byteRange + inVideoPES := false + for off := 0; off+tsPacketSize <= len(data); off += tsPacketSize { + pkt := data[off : off+tsPacketSize] + pid, start, _, hdrlen, err := tsio.ParseTSHeader(pkt) + if err != nil || hdrlen >= tsPacketSize { + continue + } + if pid != videoPID { + continue + } + payloadStart := off + hdrlen + payloadEnd := off + tsPacketSize + + if start { + inVideoPES = false + payload := pkt[hdrlen:] + if len(payload) < 9 || payload[0] != 0 || payload[1] != 0 || payload[2] != 1 { + continue + } + pesHdrLen, streamid, _, _, _, err := tsio.ParsePESHeader(payload) + if err != nil || streamid < 0xe0 || streamid > 0xef { + continue + } + payloadStart += pesHdrLen + if payloadStart > payloadEnd { + // Header spans this packet; skip payload bytes from this packet. + continue + } + inVideoPES = true + } + + if !inVideoPES { + continue + } + if payloadStart < payloadEnd { + allPayload = append(allPayload, byteRange{start: payloadStart, end: payloadEnd}) + } + } + if len(allPayload) == 0 { + return result, false + } + return result, fixPES(result, result, allPayload) +} + +func findVideoPID(data []byte) uint16 { + pmtPID := invalidPID + for off := 0; off+tsPacketSize <= len(data); off += tsPacketSize { + pkt := data[off : off+tsPacketSize] + pid, start, _, hdrlen, err := tsio.ParseTSHeader(pkt) + if err != nil || !start || hdrlen >= tsPacketSize { + continue + } + if pid != tsio.PAT_PID { + continue + } + payload := pkt[hdrlen:] + tableid, _, psihdrlen, datalen, err := tsio.ParsePSI(payload) + if err != nil || tableid != tsio.TableIdPAT { + continue + } + end := psihdrlen + datalen + if end > len(payload) || datalen <= 0 { + continue + } + var pat tsio.PAT + if _, err := pat.Unmarshal(payload[psihdrlen:end]); err != nil { + continue + } + for _, e := range pat.Entries { + if e.ProgramMapPID != 0 { + pmtPID = e.ProgramMapPID + break + } + } + if pmtPID != invalidPID { + break + } + } + + if pmtPID != invalidPID { + for off := 0; off+tsPacketSize <= len(data); off += tsPacketSize { + pkt := data[off : off+tsPacketSize] + pid, start, _, hdrlen, err := tsio.ParseTSHeader(pkt) + if err != nil || !start || hdrlen >= tsPacketSize || pid != pmtPID { + continue + } + payload := pkt[hdrlen:] + tableid, _, psihdrlen, datalen, err := tsio.ParsePSI(payload) + if err != nil || tableid != tsio.TableIdPMT { + continue + } + end := psihdrlen + datalen + if end > len(payload) || datalen <= 0 { + continue + } + var pmt tsio.PMT + if _, err := pmt.Unmarshal(payload[psihdrlen:end]); err != nil { + continue + } + for _, es := range pmt.ElementaryStreamInfos { + if es.StreamType == tsio.ElementaryStreamTypeH264 { + return es.ElementaryPID + } + } + } + } + + // Fallback for truncated segments that may not include PAT/PMT. + for off := 0; off+tsPacketSize <= len(data); off += tsPacketSize { + pkt := data[off : off+tsPacketSize] + pid, start, _, hdrlen, err := tsio.ParseTSHeader(pkt) + if err != nil || !start || hdrlen >= tsPacketSize { + continue + } + payload := pkt[hdrlen:] + if len(payload) >= 4 && payload[0] == 0 && payload[1] == 0 && payload[2] == 1 { + if payload[3] >= 0xe0 && payload[3] <= 0xef { + return pid + } + } + } + return invalidPID +} + +func fixPES(orig, result []byte, ranges []byteRange) bool { + total := 0 + for _, r := range ranges { + if r.end > r.start { + total += r.end - r.start + } + } + if total == 0 { + return false + } + + es := make([]byte, 0, total) + for _, r := range ranges { + if r.end <= r.start || r.start < 0 || r.end > len(orig) { + return false + } + es = append(es, orig[r.start:r.end]...) + } + nals := scanNALs(es) + if len(nals) == 0 { + return false + } + + leading := es[:nals[0].start] + reordered := make([]byte, 0, len(es)) + reordered = append(reordered, leading...) + + var changed bool + appendSegment := func(seg []nalInfo) { + if len(seg) == 0 { + return + } + firstVCL := -1 + for i, n := range seg { + if n.typ >= 1 && n.typ <= 5 { + firstVCL = i + break + } + } + if firstVCL < 0 { + for _, n := range seg { + reordered = append(reordered, es[n.start:n.end]...) + } + return + } + misplacedSEI := false + for i := firstVCL + 1; i < len(seg); i++ { + if seg[i].typ == 6 { + misplacedSEI = true + break + } + } + if !misplacedSEI { + for _, n := range seg { + reordered = append(reordered, es[n.start:n.end]...) + } + return + } + + changed = true + for i := 0; i < firstVCL; i++ { + n := seg[i] + reordered = append(reordered, es[n.start:n.end]...) + } + for i := firstVCL; i < len(seg); i++ { + n := seg[i] + if n.typ == 6 { + reordered = append(reordered, es[n.start:n.end]...) + } + } + for i := firstVCL; i < len(seg); i++ { + n := seg[i] + if n.typ != 6 { + reordered = append(reordered, es[n.start:n.end]...) + } + } + } + + segStart := 0 + for i := 0; i <= len(nals); i++ { + segmentBoundary := i == len(nals) || (i > segStart && nals[i].typ == 9) + if !segmentBoundary { + continue + } + appendSegment(nals[segStart:i]) + segStart = i + } + if !changed { + return false + } + if len(reordered) != len(es) { + return false + } + + pos := 0 + for _, r := range ranges { + n := r.end - r.start + if n <= 0 { + continue + } + copy(result[r.start:r.end], reordered[pos:pos+n]) + pos += n + } + return pos == len(reordered) +} + +func scanNALs(es []byte) []nalInfo { + var nals []nalInfo + for pos := 0; pos < len(es); { + start, scLen := findStartCode(es, pos) + if start < 0 { + break + } + nextStart, _ := findStartCode(es, start+scLen) + end := len(es) + if nextStart >= 0 { + end = nextStart + } + if start+scLen < end { + nals = append(nals, nalInfo{ + start: start, + end: end, + typ: es[start+scLen] & 0x1f, + }) + } + if nextStart < 0 { + break + } + pos = nextStart + } + return nals +} + +func findStartCode(b []byte, from int) (int, int) { + for i := from; i+3 < len(b); i++ { + if b[i] != 0 || b[i+1] != 0 { + continue + } + if b[i+2] == 1 { + return i, 3 + } + if i+4 < len(b) && b[i+2] == 0 && b[i+3] == 1 { + return i, 4 + } + } + return -1, 0 +} diff --git a/ffmpeg/sei_fixup_test.go b/ffmpeg/sei_fixup_test.go new file mode 100644 index 0000000000..4a3a2f532e --- /dev/null +++ b/ffmpeg/sei_fixup_test.go @@ -0,0 +1,123 @@ +package ffmpeg + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestFixMisplacedSEI_BrokenFiles(t *testing.T) { + run, dir := setupTest(t) + defer os.RemoveAll(dir) + + run(` + cat <<- 'EOF1' > input-sei-order.out + Access Unit Delimiter + Slice Header + Supplemental Enhancement Information + Access Unit Delimiter + EOF1 + + cat <<- 'EOF2' > fixed-sei-order.out + Access Unit Delimiter + Supplemental Enhancement Information + Slice Header + Access Unit Delimiter + EOF2 + + # missing-dts.ts has a SEI pre-prended, check that's preserved + cat <<- 'EOF3' > leading-sei.out + Supplemental Enhancement Information + Access Unit Delimiter + Slice Header + Supplemental Enhancement Information + EOF3 + + # missing-dts.ts has a SEI pre-prended, check that's preserved + cat <<- 'EOF3' > fixed-leading-sei.out + Supplemental Enhancement Information + Access Unit Delimiter + Supplemental Enhancement Information + Slice Header + EOF3 + + `) + + for _, name := range []string{"skip_1.ts", "skip_3.ts", "missing-dts.ts"} { + t.Run(name, func(t *testing.T) { + input := dataFilePath(t, name) + if "missing-dts.ts" == name { + checkNALSequence(t, run, input, "leading-sei.out") + } else { + checkNALSequence(t, run, input, "input-sei-order.out") + } + + inputData, err := ioutil.ReadFile(input) + require.NoError(t, err) + + fixedPath, err := FixMisplacedSEI(input) + require.NoError(t, err) + require.NotEqual(t, input, fixedPath, "expected fix-up to trigger") + defer os.Remove(fixedPath) + + fixedData, err := ioutil.ReadFile(fixedPath) + require.NoError(t, err) + require.Equal(t, len(inputData), len(fixedData), "fix-up must preserve byte size") + if "missing-dts.ts" == name { + checkNALSequence(t, run, fixedPath, "fixed-leading-sei.out") + } else { + checkNALSequence(t, run, fixedPath, "fixed-sei-order.out") + } + }) + } +} + +func TestFixMisplacedSEI_NoChanges(t *testing.T) { + run, dir := setupTest(t) + defer os.RemoveAll(dir) + + run(` + # normally SEI comes before any picture data + cat <<- 'EOF1' > vertical-sei-order.out + Access Unit Delimiter + Supplemental Enhancement Information + Slice Header + Access Unit Delimiter + EOF1 + + # this sample should NOT have any SEI + ! ffmpeg -hide_banner -i "$1/../data/portrait.ts" -c copy -bsf:v trace_headers -f null - 2>&1 | grep "Supplemental Enhancement Information" + `) + + checkNALSequence(t, run, dataFilePath(t, "vertical-sample.ts"), "vertical-sei-order.out") + + for _, name := range []string{"vertical-sample.ts", "portrait.ts", "broken-h264-parser.ts"} { + t.Run(name, func(t *testing.T) { + input := dataFilePath(t, name) + fixedPath, err := FixMisplacedSEI(input) + require.NoError(t, err) + require.Equal(t, input, fixedPath, "known-good sample should pass through unchanged") + }) + } +} + +func dataFilePath(t *testing.T, name string) string { + t.Helper() + wd, err := os.Getwd() + require.NoError(t, err) + return path.Join(wd, "..", "data", name) +} + +func checkNALSequence(t *testing.T, run func(cmd string) bool, inputPath, expectedPath string) { + t.Helper() + cmd := fmt.Sprintf(` + ffmpeg -hide_banner -i "%s" -c copy -bsf:v trace_headers -f null - 2>&1 | grep -e 'Access Unit\|Slice Header\|Supplement' | head -4 > pre.raw + sed -E 's/^\[[^]]+\] //' pre.raw > pre.out + diff -u %s pre.out + `, inputPath, expectedPath) + require.True(t, run(cmd), "NAL ordering check failed for %s", inputPath) +} diff --git a/ffmpeg/transcoder.c b/ffmpeg/transcoder.c index e7cc16182c..b4c7a5451c 100755 --- a/ffmpeg/transcoder.c +++ b/ffmpeg/transcoder.c @@ -161,6 +161,7 @@ int transcode_init(struct transcode_thread *h, input_params *inp, int nb_outputs = h->nb_outputs; if (!inp) LPMS_ERR(transcode_cleanup, "Missing input params") + ictx->last_video_pts = AV_NOPTS_VALUE; AVDictionary **demuxer_opts = NULL; if (inp->demuxer.opts) demuxer_opts = &inp->demuxer.opts;