1#![allow(clippy::single_match)]
16#![deny(unsafe_code)]
18#![deny(unreachable_pub)]
19
20use std::collections::VecDeque;
21use std::fmt::Debug;
22use std::io;
23
24use byteorder::{BigEndian, ReadBytesExt};
25use bytes::{Buf, Bytes};
26use scuffle_flv::audio::AudioData;
27use scuffle_flv::audio::body::AudioTagBody;
28use scuffle_flv::audio::body::legacy::LegacyAudioTagBody;
29use scuffle_flv::audio::body::legacy::aac::AacAudioData;
30use scuffle_flv::audio::header::AudioTagHeader;
31use scuffle_flv::audio::header::legacy::{LegacyAudioTagHeader, SoundType};
32use scuffle_flv::script::{OnMetaData, ScriptData};
33use scuffle_flv::tag::{FlvTag, FlvTagData};
34use scuffle_flv::video::VideoData;
35use scuffle_flv::video::body::VideoTagBody;
36use scuffle_flv::video::body::enhanced::{ExVideoTagBody, VideoPacket, VideoPacketCodedFrames, VideoPacketSequenceStart};
37use scuffle_flv::video::body::legacy::LegacyVideoTagBody;
38use scuffle_flv::video::header::enhanced::VideoFourCc;
39use scuffle_flv::video::header::legacy::{LegacyVideoTagHeader, LegacyVideoTagHeaderAvcPacket};
40use scuffle_flv::video::header::{VideoFrameType, VideoTagHeader, VideoTagHeaderData};
41use scuffle_h264::Sps;
42use scuffle_mp4::BoxType;
43use scuffle_mp4::codec::{AudioCodec, VideoCodec};
44use scuffle_mp4::types::ftyp::{FourCC, Ftyp};
45use scuffle_mp4::types::hdlr::{HandlerType, Hdlr};
46use scuffle_mp4::types::mdat::Mdat;
47use scuffle_mp4::types::mdhd::Mdhd;
48use scuffle_mp4::types::mdia::Mdia;
49use scuffle_mp4::types::mfhd::Mfhd;
50use scuffle_mp4::types::minf::Minf;
51use scuffle_mp4::types::moof::Moof;
52use scuffle_mp4::types::moov::Moov;
53use scuffle_mp4::types::mvex::Mvex;
54use scuffle_mp4::types::mvhd::Mvhd;
55use scuffle_mp4::types::smhd::Smhd;
56use scuffle_mp4::types::stbl::Stbl;
57use scuffle_mp4::types::stco::Stco;
58use scuffle_mp4::types::stsc::Stsc;
59use scuffle_mp4::types::stsd::Stsd;
60use scuffle_mp4::types::stsz::Stsz;
61use scuffle_mp4::types::stts::Stts;
62use scuffle_mp4::types::tfdt::Tfdt;
63use scuffle_mp4::types::tfhd::Tfhd;
64use scuffle_mp4::types::tkhd::Tkhd;
65use scuffle_mp4::types::traf::Traf;
66use scuffle_mp4::types::trak::Trak;
67use scuffle_mp4::types::trex::Trex;
68use scuffle_mp4::types::trun::Trun;
69use scuffle_mp4::types::vmhd::Vmhd;
70
71mod codecs;
72mod define;
73mod errors;
74
75pub use define::*;
76pub use errors::TransmuxError;
77
78struct Tags<'a> {
79 video_sequence_header: Option<VideoSequenceHeader>,
80 audio_sequence_header: Option<AudioSequenceHeader>,
81 scriptdata_tag: Option<OnMetaData<'a>>,
82}
83
84#[derive(Debug, Clone)]
85pub struct Transmuxer<'a> {
86 audio_duration: u64,
89 video_duration: u64,
91 sequence_number: u32,
92 last_video_timestamp: u32,
93 settings: Option<(VideoSettings, AudioSettings)>,
94 tags: VecDeque<FlvTag<'a>>,
95}
96
97impl Default for Transmuxer<'_> {
98 fn default() -> Self {
99 Self::new()
100 }
101}
102
103impl<'a> Transmuxer<'a> {
104 pub fn new() -> Self {
105 Self {
106 sequence_number: 1,
107 tags: VecDeque::new(),
108 audio_duration: 0,
109 video_duration: 0,
110 last_video_timestamp: 0,
111 settings: None,
112 }
113 }
114
115 pub fn demux(&mut self, data: Bytes) -> Result<(), TransmuxError> {
117 let mut cursor = io::Cursor::new(data);
118 while cursor.has_remaining() {
119 cursor.read_u32::<BigEndian>()?; if !cursor.has_remaining() {
121 break;
122 }
123
124 let tag = FlvTag::demux(&mut cursor)?;
125 self.tags.push_back(tag);
126 }
127
128 Ok(())
129 }
130
131 pub fn add_tag(&mut self, tag: FlvTag<'a>) {
133 self.tags.push_back(tag);
134 }
135
136 pub fn mux(&mut self) -> Result<Option<TransmuxResult>, TransmuxError> {
139 let mut writer = Vec::new();
140
141 let Some((video_settings, _)) = &self.settings else {
142 let Some((video_settings, audio_settings)) = self.init_sequence(&mut writer)? else {
143 if self.tags.len() > 30 {
144 return Err(TransmuxError::NoSequenceHeaders);
146 }
147
148 return Ok(None);
150 };
151
152 self.settings = Some((video_settings.clone(), audio_settings.clone()));
153
154 return Ok(Some(TransmuxResult::InitSegment {
155 data: Bytes::from(writer),
156 audio_settings,
157 video_settings,
158 }));
159 };
160
161 loop {
162 let Some(tag) = self.tags.pop_front() else {
163 return Ok(None);
164 };
165
166 let mdat_data;
167 let total_duration;
168 let trun_sample;
169 let mut is_audio = false;
170 let mut is_keyframe = false;
171
172 let duration =
173 if self.last_video_timestamp == 0 || tag.timestamp_ms == 0 || tag.timestamp_ms < self.last_video_timestamp {
174 1000 } else {
177 let delta = tag.timestamp_ms as f64 - self.last_video_timestamp as f64;
187 let expected_delta = 1000.0 / video_settings.framerate;
188 if (delta - expected_delta).abs() <= 1.0 {
189 1000
190 } else {
191 (delta * video_settings.framerate) as u32
192 }
193 };
194
195 match tag.data {
196 FlvTagData::Audio(AudioData {
197 body: AudioTagBody::Legacy(LegacyAudioTagBody::Aac(AacAudioData::Raw(data))),
198 ..
199 }) => {
200 let (sample, duration) = codecs::aac::trun_sample(&data)?;
201
202 trun_sample = sample;
203 mdat_data = data;
204 total_duration = duration;
205 is_audio = true;
206 }
207 FlvTagData::Video(VideoData {
208 header:
209 VideoTagHeader {
210 frame_type,
211 data:
212 VideoTagHeaderData::Legacy(LegacyVideoTagHeader::AvcPacket(
213 LegacyVideoTagHeaderAvcPacket::Nalu { composition_time_offset },
214 )),
215 },
216 body: VideoTagBody::Legacy(LegacyVideoTagBody::Other { data }),
217 ..
218 }) => {
219 let composition_time =
220 ((composition_time_offset as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
221
222 let sample = codecs::avc::trun_sample(frame_type, composition_time as u32, duration, &data)?;
223
224 trun_sample = sample;
225 total_duration = duration;
226 mdat_data = data;
227
228 is_keyframe = frame_type == VideoFrameType::KeyFrame;
229 }
230 FlvTagData::Video(VideoData {
231 header: VideoTagHeader { frame_type, .. },
232 body:
233 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
234 video_four_cc: VideoFourCc::Av1,
235 packet: VideoPacket::CodedFrames(VideoPacketCodedFrames::Other(data)),
236 }),
237 ..
238 }) => {
239 let sample = codecs::av1::trun_sample(frame_type, duration, &data)?;
240
241 trun_sample = sample;
242 total_duration = duration;
243 mdat_data = data;
244
245 is_keyframe = frame_type == VideoFrameType::KeyFrame;
246 }
247 FlvTagData::Video(VideoData {
248 header: VideoTagHeader { frame_type, .. },
249 body:
250 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
251 video_four_cc: VideoFourCc::Hevc,
252 packet,
253 }),
254 ..
255 }) => {
256 let (composition_time, data) = match packet {
257 VideoPacket::CodedFrames(VideoPacketCodedFrames::Hevc {
258 composition_time_offset,
259 data,
260 }) => (Some(composition_time_offset), data),
261 VideoPacket::CodedFramesX { data } => (None, data),
262 _ => continue,
263 };
264
265 let composition_time =
266 ((composition_time.unwrap_or_default() as f64 * video_settings.framerate) / 1000.0).floor() * 1000.0;
267
268 let sample = codecs::hevc::trun_sample(frame_type, composition_time as i32, duration, &data)?;
269
270 trun_sample = sample;
271 total_duration = duration;
272 mdat_data = data;
273
274 is_keyframe = frame_type == VideoFrameType::KeyFrame;
275 }
276 _ => {
277 continue;
279 }
280 }
281
282 let trafs = {
283 let (main_duration, main_id) = if is_audio {
284 (self.audio_duration, 2)
285 } else {
286 (self.video_duration, 1)
287 };
288
289 let mut traf = Traf::new(
290 Tfhd::new(main_id, None, None, None, None, None),
291 Some(Trun::new(vec![trun_sample], None)),
292 Some(Tfdt::new(main_duration)),
293 );
294 traf.optimize();
295
296 vec![traf]
297 };
298
299 let mut moof = Moof::new(Mfhd::new(self.sequence_number), trafs);
300
301 let moof_size = moof.size();
303
304 let traf = moof.traf.get_mut(0).expect("we just created the moof with a traf");
308
309 let trun = traf.trun.as_mut().expect("we just created the video traf with a trun");
311
312 trun.data_offset = Some(moof_size as i32 + 8);
316
317 moof.mux(&mut writer)?;
319
320 Mdat::new(vec![mdat_data]).mux(&mut writer)?;
322
323 self.sequence_number += 1;
325
326 if is_audio {
327 self.audio_duration += total_duration as u64;
328 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
329 data: Bytes::from(writer),
330 ty: MediaType::Audio,
331 keyframe: false,
332 timestamp: self.audio_duration - total_duration as u64,
333 })));
334 } else {
335 self.video_duration += total_duration as u64;
336 self.last_video_timestamp = tag.timestamp_ms;
337 return Ok(Some(TransmuxResult::MediaSegment(MediaSegment {
338 data: Bytes::from(writer),
339 ty: MediaType::Video,
340 keyframe: is_keyframe,
341 timestamp: self.video_duration - total_duration as u64,
342 })));
343 }
344 }
345 }
346
347 fn find_tags(&self) -> Tags<'a> {
349 let tags = self.tags.iter();
350 let mut video_sequence_header = None;
351 let mut audio_sequence_header = None;
352 let mut scriptdata_tag = None;
353
354 for tag in tags {
355 if video_sequence_header.is_some() && audio_sequence_header.is_some() && scriptdata_tag.is_some() {
356 break;
357 }
358
359 match &tag.data {
360 FlvTagData::Video(VideoData {
361 body: VideoTagBody::Legacy(LegacyVideoTagBody::AvcVideoPacketSeqHdr(data)),
362 ..
363 }) => {
364 video_sequence_header = Some(VideoSequenceHeader::Avc(data.clone()));
365 }
366 FlvTagData::Video(VideoData {
367 body:
368 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
369 video_four_cc: VideoFourCc::Av1,
370 packet: VideoPacket::SequenceStart(VideoPacketSequenceStart::Av1(config)),
371 }),
372 ..
373 }) => {
374 video_sequence_header = Some(VideoSequenceHeader::Av1(config.clone()));
375 }
376 FlvTagData::Video(VideoData {
377 body:
378 VideoTagBody::Enhanced(ExVideoTagBody::NoMultitrack {
379 video_four_cc: VideoFourCc::Hevc,
380 packet: VideoPacket::SequenceStart(VideoPacketSequenceStart::Hevc(config)),
381 }),
382 ..
383 }) => {
384 video_sequence_header = Some(VideoSequenceHeader::Hevc(config.clone()));
385 }
386 FlvTagData::Audio(AudioData {
387 body: AudioTagBody::Legacy(LegacyAudioTagBody::Aac(AacAudioData::SequenceHeader(data))),
388 header:
389 AudioTagHeader::Legacy(LegacyAudioTagHeader {
390 sound_size, sound_type, ..
391 }),
392 ..
393 }) => {
394 audio_sequence_header = Some(AudioSequenceHeader {
395 data: AudioSequenceHeaderData::Aac(data.clone()),
396 sound_size: *sound_size,
397 sound_type: *sound_type,
398 });
399 }
400 FlvTagData::ScriptData(ScriptData::OnMetaData(metadata)) => {
401 scriptdata_tag = Some(*metadata.clone());
402 }
403 _ => {}
404 }
405 }
406
407 Tags {
408 video_sequence_header,
409 audio_sequence_header,
410 scriptdata_tag,
411 }
412 }
413
414 fn init_sequence(
416 &mut self,
417 writer: &mut impl io::Write,
418 ) -> Result<Option<(VideoSettings, AudioSettings)>, TransmuxError> {
419 let Tags {
422 video_sequence_header,
423 audio_sequence_header,
424 scriptdata_tag,
425 } = self.find_tags();
426
427 let Some(video_sequence_header) = video_sequence_header else {
428 return Ok(None);
429 };
430 let Some(audio_sequence_header) = audio_sequence_header else {
431 return Ok(None);
432 };
433
434 let video_codec;
435 let audio_codec;
436 let video_width;
437 let video_height;
438 let audio_channels;
439 let audio_sample_rate;
440 let mut video_fps = 0.0;
441
442 let mut estimated_video_bitrate = 0;
443 let mut estimated_audio_bitrate = 0;
444
445 if let Some(scriptdata_tag) = scriptdata_tag {
446 video_fps = scriptdata_tag.framerate.unwrap_or(0.0);
447 estimated_video_bitrate = scriptdata_tag.videodatarate.map(|v| (v * 1024.0) as u32).unwrap_or(0);
448 estimated_audio_bitrate = scriptdata_tag.audiodatarate.map(|v| (v * 1024.0) as u32).unwrap_or(0);
449 }
450
451 let mut compatable_brands = vec![FourCC::Iso5, FourCC::Iso6];
452
453 let video_stsd_entry = match video_sequence_header {
454 VideoSequenceHeader::Avc(config) => {
455 compatable_brands.push(FourCC::Avc1);
456 video_codec = VideoCodec::Avc {
457 constraint_set: config.profile_compatibility,
458 level: config.level_indication,
459 profile: config.profile_indication,
460 };
461
462 let sps = Sps::parse_with_emulation_prevention(io::Cursor::new(&config.sps[0]))
463 .map_err(|_| TransmuxError::InvalidAVCDecoderConfigurationRecord)?;
464 video_width = sps.width() as u32;
465 video_height = sps.height() as u32;
466
467 let frame_rate = sps.frame_rate();
468 if let Some(frame_rate) = frame_rate {
469 video_fps = frame_rate;
470 }
471
472 codecs::avc::stsd_entry(config, &sps)?
473 }
474 VideoSequenceHeader::Av1(config) => {
475 compatable_brands.push(FourCC::Av01);
476 let (entry, seq_obu) = codecs::av1::stsd_entry(config)?;
477
478 video_height = seq_obu.max_frame_height as u32;
479 video_width = seq_obu.max_frame_width as u32;
480
481 let op_point = &seq_obu.operating_points[0];
482
483 video_codec = VideoCodec::Av1 {
484 profile: seq_obu.seq_profile,
485 level: op_point.seq_level_idx,
486 tier: op_point.seq_tier,
487 depth: seq_obu.color_config.bit_depth as u8,
488 monochrome: seq_obu.color_config.mono_chrome,
489 sub_sampling_x: seq_obu.color_config.subsampling_x,
490 sub_sampling_y: seq_obu.color_config.subsampling_y,
491 color_primaries: seq_obu.color_config.color_primaries,
492 transfer_characteristics: seq_obu.color_config.transfer_characteristics,
493 matrix_coefficients: seq_obu.color_config.matrix_coefficients,
494 full_range_flag: seq_obu.color_config.full_color_range,
495 };
496
497 entry
498 }
499 VideoSequenceHeader::Hevc(config) => {
500 compatable_brands.push(FourCC::Hev1);
501 video_codec = VideoCodec::Hevc {
502 constraint_indicator: config.general_constraint_indicator_flags,
503 level: config.general_level_idc,
504 profile: config.general_profile_idc,
505 profile_compatibility: config.general_profile_compatibility_flags,
506 tier: config.general_tier_flag,
507 general_profile_space: config.general_profile_space,
508 };
509
510 let (entry, sps) = codecs::hevc::stsd_entry(config)?;
511 if let Some(info) = sps.vui_parameters.as_ref().and_then(|p| p.vui_timing_info.as_ref()) {
512 video_fps = info.time_scale.get() as f64 / info.num_units_in_tick.get() as f64;
513 }
514
515 video_width = sps.cropped_width() as u32;
516 video_height = sps.cropped_height() as u32;
517
518 entry
519 }
520 };
521
522 let audio_stsd_entry = match audio_sequence_header.data {
523 AudioSequenceHeaderData::Aac(data) => {
524 compatable_brands.push(FourCC::Mp41);
525 let (entry, config) =
526 codecs::aac::stsd_entry(audio_sequence_header.sound_size, audio_sequence_header.sound_type, data)?;
527
528 audio_sample_rate = config.sampling_frequency;
529
530 audio_codec = AudioCodec::Aac {
531 object_type: config.audio_object_type,
532 };
533 audio_channels = match audio_sequence_header.sound_type {
534 SoundType::Mono => 1,
535 SoundType::Stereo => 2,
536 _ => return Err(TransmuxError::InvalidAudioChannels),
537 };
538
539 entry
540 }
541 };
542
543 if video_fps == 0.0 {
544 return Err(TransmuxError::InvalidVideoFrameRate);
545 }
546
547 if video_width == 0 || video_height == 0 {
548 return Err(TransmuxError::InvalidVideoDimensions);
549 }
550
551 if audio_sample_rate == 0 {
552 return Err(TransmuxError::InvalidAudioSampleRate);
553 }
554
555 let video_timescale = (1000.0 * video_fps) as u32;
561
562 Ftyp::new(FourCC::Iso5, 512, compatable_brands).mux(writer)?;
563 Moov::new(
564 Mvhd::new(0, 0, 1000, 0, 1),
565 vec![
566 Trak::new(
567 Tkhd::new(0, 0, 1, 0, Some((video_width, video_height))),
568 None,
569 Mdia::new(
570 Mdhd::new(0, 0, video_timescale, 0),
571 Hdlr::new(HandlerType::Vide, "VideoHandler".to_string()),
572 Minf::new(
573 Stbl::new(
574 Stsd::new(vec![video_stsd_entry]),
575 Stts::new(vec![]),
576 Stsc::new(vec![]),
577 Stco::new(vec![]),
578 Some(Stsz::new(0, vec![])),
579 ),
580 Some(Vmhd::new()),
581 None,
582 ),
583 ),
584 ),
585 Trak::new(
586 Tkhd::new(0, 0, 2, 0, None),
587 None,
588 Mdia::new(
589 Mdhd::new(0, 0, audio_sample_rate, 0),
590 Hdlr::new(HandlerType::Soun, "SoundHandler".to_string()),
591 Minf::new(
592 Stbl::new(
593 Stsd::new(vec![audio_stsd_entry]),
594 Stts::new(vec![]),
595 Stsc::new(vec![]),
596 Stco::new(vec![]),
597 Some(Stsz::new(0, vec![])),
598 ),
599 None,
600 Some(Smhd::new()),
601 ),
602 ),
603 ),
604 ],
605 Some(Mvex::new(vec![Trex::new(1), Trex::new(2)], None)),
606 )
607 .mux(writer)?;
608
609 Ok(Some((
610 VideoSettings {
611 width: video_width,
612 height: video_height,
613 framerate: video_fps,
614 codec: video_codec,
615 bitrate: estimated_video_bitrate,
616 timescale: video_timescale,
617 },
618 AudioSettings {
619 codec: audio_codec,
620 sample_rate: audio_sample_rate,
621 channels: audio_channels,
622 bitrate: estimated_audio_bitrate,
623 timescale: audio_sample_rate,
624 },
625 )))
626 }
627}
628
629#[cfg(test)]
630mod tests;