"""Use ffmpeg to decode audio and video media. """ import sys from collections import deque from ctypes import (c_int, c_int32, c_uint8, c_char_p, addressof, byref, cast, POINTER, Structure, create_string_buffer, memmove) import pyglet import pyglet.lib from pyglet import image from pyglet.util import asbytes, asstr from . import MediaDecoder from .base import AudioData, SourceInfo, StaticSource from .base import StreamingSource, VideoFormat, AudioFormat from .ffmpeg_lib import * from ..exceptions import MediaFormatException class FileInfo: def __init__(self): self.n_streams = None self.start_time = None self.duration = None self.title = "" self.author = "" self.copyright = "" self.comment = "" self.album = "" self.year = None self.track = "" self.genre = "" class StreamVideoInfo: def __init__(self, width, height, sample_aspect_num, sample_aspect_den, frame_rate_num, frame_rate_den, codec_id): self.width = width self.height = height self.sample_aspect_num = sample_aspect_num self.sample_aspect_den = sample_aspect_den self.frame_rate_num = frame_rate_num self.frame_rate_den = frame_rate_den self.codec_id = codec_id class StreamAudioInfo: def __init__(self, sample_format, sample_rate, channels): self.sample_format = sample_format self.sample_rate = sample_rate self.sample_bits = None self.channels = channels class FFmpegFile(Structure): _fields_ = [ ('context', POINTER(AVFormatContext)) ] class FFmpegStream(Structure): _fields_ = [ ('type', c_int32), ('format_context', POINTER(AVFormatContext)), ('codec_context', POINTER(AVCodecContext)), ('frame', POINTER(AVFrame)), ('time_base', AVRational) ] class FFmpegException(MediaFormatException): pass def ffmpeg_get_audio_buffer_size(audio_format): """Return the audio buffer size Buffer size can accomodate 1 sec of audio data. """ return audio_format.bytes_per_second + FF_INPUT_BUFFER_PADDING_SIZE def ffmpeg_init(): """Initialize libavformat and register all the muxers, demuxers and protocols.""" pass class MemoryFileObject: """A class to manage reading and seeking of a ffmpeg file object.""" buffer_size = 32768 def __init__(self, file): self.file = file self.fmt_context = None self.buffer = None if not getattr(self.file, 'seek', None) or not getattr(self.file, 'tell', None): raise Exception("File object does not support seeking.") # Seek to end of file to get the filesize. self.file.seek(0, 2) self.file_size = self.file.tell() self.file.seek(0) # Put cursor back at the beginning. def read_data_cb(_, buff, buf_size): data = self.file.read(buf_size) read_size = len(data) memmove(buff, data, read_size) return read_size def seek_data_cb(_, offset, whence): if whence == libavformat.AVSEEK_SIZE: return self.file_size pos = self.file.seek(offset, whence) return pos self.read_func = libavformat.ffmpeg_read_func(read_data_cb) self.seek_func = libavformat.ffmpeg_seek_func(seek_data_cb) def __del__(self): """These are usually freed when the source is, but no guarantee.""" if self.buffer: try: avutil.av_freep(self.buffer) except OSError: pass if self.fmt_context: try: avutil.av_freep(self.fmt_context) except OSError: pass def ffmpeg_open_memory_file(filename, file_object): """Open a media file from a file object. :rtype: FFmpegFile :return: The structure containing all the information for the media. """ file = FFmpegFile() file.context = libavformat.avformat.avformat_alloc_context() file.context.contents.seekable = 1 memory_file = MemoryFileObject(file_object) av_buf = libavutil.avutil.av_malloc(memory_file.buffer_size) memory_file.buffer = cast(av_buf, c_char_p) ptr = create_string_buffer(memory_file.buffer_size) memory_file.fmt_context = libavformat.avformat.avio_alloc_context( memory_file.buffer, memory_file.buffer_size, 0, ptr, memory_file.read_func, None, memory_file.seek_func ) file.context.contents.pb = memory_file.fmt_context file.context.contents.flags |= libavformat.AVFMT_FLAG_CUSTOM_IO result = avformat.avformat_open_input(byref(file.context), filename, None, None) if result != 0: raise FFmpegException('avformat_open_input in ffmpeg_open_filename returned an error opening file ' + filename.decode("utf8") + ' Error code: ' + str(result)) result = avformat.avformat_find_stream_info(file.context, None) if result < 0: raise FFmpegException('Could not find stream info') return file, memory_file def ffmpeg_open_filename(filename): """Open the media file. :rtype: FFmpegFile :return: The structure containing all the information for the media. """ file = FFmpegFile() # TODO: delete this structure and use directly AVFormatContext result = avformat.avformat_open_input(byref(file.context), filename, None, None) if result != 0: raise FFmpegException('avformat_open_input in ffmpeg_open_filename returned an error opening file ' + filename.decode("utf8") + ' Error code: ' + str(result)) result = avformat.avformat_find_stream_info(file.context, None) if result < 0: raise FFmpegException('Could not find stream info') return file def ffmpeg_close_file(file): """Close the media file and free resources.""" avformat.avformat_close_input(byref(file.context)) def ffmpeg_file_info(file): """Get information on the file: - number of streams - duration - artist - album - date - track :rtype: FileInfo :return: The file info instance containing all the meta information. """ info = FileInfo() info.n_streams = file.context.contents.nb_streams info.start_time = file.context.contents.start_time info.duration = file.context.contents.duration entry = avutil.av_dict_get(file.context.contents.metadata, asbytes('title'), None, 0) if entry: info.title = asstr(entry.contents.value) entry = avutil.av_dict_get(file.context.contents.metadata, asbytes('artist'), None, 0) \ or avutil.av_dict_get(file.context.contents.metadata, asbytes('album_artist'), None, 0) if entry: info.author = asstr(entry.contents.value) entry = avutil.av_dict_get(file.context.contents.metadata, asbytes('copyright'), None, 0) if entry: info.copyright = asstr(entry.contents.value) entry = avutil.av_dict_get(file.context.contents.metadata, asbytes('comment'), None, 0) if entry: info.comment = asstr(entry.contents.value) entry = avutil.av_dict_get(file.context.contents.metadata, asbytes('album'), None, 0) if entry: info.album = asstr(entry.contents.value) entry = avutil.av_dict_get(file.context.contents.metadata, asbytes('date'), None, 0) if entry: info.year = asstr(entry.contents.value) entry = avutil.av_dict_get(file.context.contents.metadata, asbytes('track'), None, 0) if entry: info.track = asstr(entry.contents.value) entry = avutil.av_dict_get(file.context.contents.metadata, asbytes('genre'), None, 0) if entry: info.genre = asstr(entry.contents.value) return info def ffmpeg_stream_info(file, stream_index): """Open the stream """ av_stream = file.context.contents.streams[stream_index].contents context = av_stream.codecpar.contents if context.codec_type == AVMEDIA_TYPE_VIDEO: if _debug: print("codec_type=", context.codec_type) print(" codec_id=", context.codec_id) print(" codec name=", avcodec.avcodec_get_name(context.codec_id).decode('utf-8')) print(" codec_tag=", context.codec_tag) print(" extradata=", context.extradata) print(" extradata_size=", context.extradata_size) print(" format=", context.format) print(" bit_rate=", context.bit_rate) print(" bits_per_coded_sample=", context.bits_per_coded_sample) print(" bits_per_raw_sample=", context.bits_per_raw_sample) print(" profile=", context.profile) print(" level=", context.level) print(" width=", context.width) print(" height=", context.height) print(" sample_aspect_ratio=", context.sample_aspect_ratio.num, context.sample_aspect_ratio.den) print(" field_order=", context.field_order) print(" color_range=", context.color_range) print(" color_primaries=", context.color_primaries) print(" color_trc=", context.color_trc) print(" color_space=", context.color_space) print(" chroma_location=", context.chroma_location) print(" video_delay=", context.video_delay) print(" channel_layout=", context.channel_layout) print(" channels=", context.channels) print(" sample_rate=", context.sample_rate) print(" block_align=", context.block_align) print(" frame_size=", context.frame_size) print(" initial_padding=", context.initial_padding) print(" trailing_padding=", context.trailing_padding) print(" seek_preroll=", context.seek_preroll) # frame_rate = avformat.av_guess_frame_rate(file.context, av_stream, None) info = StreamVideoInfo( context.width, context.height, context.sample_aspect_ratio.num, context.sample_aspect_ratio.den, frame_rate.num, frame_rate.den, context.codec_id ) elif context.codec_type == AVMEDIA_TYPE_AUDIO: info = StreamAudioInfo( context.format, context.sample_rate, context.channels ) if context.format in (AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8P): info.sample_bits = 8 elif context.format in (AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP): info.sample_bits = 16 elif context.format in (AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32P): info.sample_bits = 32 else: info.sample_format = None info.sample_bits = None else: return None return info def ffmpeg_open_stream(file, index): if not 0 <= index < file.context.contents.nb_streams: raise FFmpegException('index out of range. Only {} streams.'.format(file.context.contents.nb_streams)) codec_context = avcodec.avcodec_alloc_context3(None) if not codec_context: raise MemoryError('Could not allocate Codec Context.') result = avcodec.avcodec_parameters_to_context( codec_context, file.context.contents.streams[index].contents.codecpar) if result < 0: avcodec.avcodec_free_context(byref(codec_context)) raise FFmpegException('Could not copy the AVCodecContext.') codec_id = codec_context.contents.codec_id codec = avcodec.avcodec_find_decoder(codec_id) if _debug: print("Found Codec=", codec_id, "=", codec.contents.long_name.decode()) # VP8 and VP9 default codec don't support alpha transparency. # Force libvpx codec in this case. if codec_id == AV_CODEC_ID_VP9: newcodec = avcodec.avcodec_find_decoder_by_name("libvpx-vp9".encode('utf-8')) codec = newcodec or codec if codec_id == AV_CODEC_ID_VP8: newcodec = avcodec.avcodec_find_decoder_by_name("libvpx".encode('utf-8')) codec = newcodec or codec if not codec: raise FFmpegException('No codec found for this media. ' 'codecID={}'.format(codec_id)) codec_id = codec.contents.id if _debug: print("Loaded codec: ", codec.contents.long_name.decode()) result = avcodec.avcodec_open2(codec_context, codec, None) if result < 0: raise FFmpegException('Could not open the media with the codec.') stream = FFmpegStream() stream.format_context = file.context stream.codec_context = codec_context stream.type = codec_context.contents.codec_type stream.frame = avutil.av_frame_alloc() stream.time_base = file.context.contents.streams[index].contents.time_base return stream def ffmpeg_close_stream(stream): if stream.frame: avutil.av_frame_free(byref(stream.frame)) avcodec.avcodec_free_context(byref(stream.codec_context)) def ffmpeg_seek_file(file, timestamp, ): flags = 0 max_ts = file.context.contents.duration * AV_TIME_BASE result = avformat.avformat_seek_file( file.context, -1, 0, timestamp, timestamp, flags ) if result < 0: buf = create_string_buffer(128) avutil.av_strerror(result, buf, 128) descr = buf.value raise FFmpegException('Error occured while seeking. ' + descr.decode()) def ffmpeg_read(file, packet): """Read from the stream a packet. :rtype: bool :return: True if the packet was correctly read. False if the end of stream was reached or an error occured. """ avcodec.av_packet_unref(packet) result = avformat.av_read_frame(file.context, packet) if result < 0: return False return True def ffmpeg_get_packet_pts(file, packet): if packet.contents.dts != AV_NOPTS_VALUE: pts = packet.contents.dts else: pts = 0 timestamp = avutil.av_rescale_q(pts, file.context.contents.streams[ packet.contents.stream_index].contents.time_base, AV_TIME_BASE_Q) return timestamp def ffmpeg_get_frame_ts(stream): ts = stream.frame.contents.best_effort_timestamp timestamp = avutil.av_rescale_q(ts, stream.time_base, AV_TIME_BASE_Q) return timestamp def ffmpeg_init_packet(): p = avcodec.av_packet_alloc() if not p: raise MemoryError("Could not allocate AVPacket.") return p def ffmpeg_free_packet(packet): avcodec.av_packet_free(byref(packet)) def ffmpeg_unref_packet(packet): avcodec.av_packet_unref(byref(packet)) def ffmpeg_transfer_packet(dst, src): avcodec.av_packet_move_ref(dst, src) def get_version(): """Return an informative version string of FFmpeg""" return avutil.av_version_info().decode() def timestamp_from_ffmpeg(timestamp): return float(timestamp) / 1000000 def timestamp_to_ffmpeg(timestamp): return int(timestamp * 1000000) class _Packet: def __init__(self, packet, timestamp): self.packet = AVPacket() ffmpeg_transfer_packet(byref(self.packet), packet) self.timestamp = timestamp def __del__(self): if ffmpeg_unref_packet is not None: ffmpeg_unref_packet(self.packet) class VideoPacket(_Packet): _next_id = 0 def __init__(self, packet, timestamp): super(VideoPacket, self).__init__(packet, timestamp) # Decoded image. 0 == not decoded yet; None == Error or discarded self.image = 0 self.id = self._next_id VideoPacket._next_id += 1 class AudioPacket(_Packet): pass class FFmpegSource(StreamingSource): # Max increase/decrease of original sample size SAMPLE_CORRECTION_PERCENT_MAX = 10 # Maximum amount of packets to create for video and audio queues. MAX_QUEUE_SIZE = 100 def __init__(self, filename, file=None): self._packet = None self._video_stream = None self._audio_stream = None self._stream_end = False self._file = None self._memory_file = None encoded_filename = filename.encode(sys.getfilesystemencoding()) if file: self._file, self._memory_file = ffmpeg_open_memory_file(encoded_filename, file) else: self._file = ffmpeg_open_filename(encoded_filename) if not self._file: raise FFmpegException('Could not open "{0}"'.format(filename)) self._video_stream_index = None self._audio_stream_index = None self._audio_format = None self.img_convert_ctx = POINTER(SwsContext)() self.audio_convert_ctx = POINTER(SwrContext)() file_info = ffmpeg_file_info(self._file) self.info = SourceInfo() self.info.title = file_info.title self.info.author = file_info.author self.info.copyright = file_info.copyright self.info.comment = file_info.comment self.info.album = file_info.album self.info.year = file_info.year self.info.track = file_info.track self.info.genre = file_info.genre # Pick the first video and audio streams found, ignore others. for i in range(file_info.n_streams): info = ffmpeg_stream_info(self._file, i) if isinstance(info, StreamVideoInfo) and self._video_stream is None: stream = ffmpeg_open_stream(self._file, i) self.video_format = VideoFormat( width=info.width, height=info.height) if info.sample_aspect_num != 0: self.video_format.sample_aspect = ( float(info.sample_aspect_num) / info.sample_aspect_den) self.video_format.frame_rate = ( float(info.frame_rate_num) / info.frame_rate_den) self._video_stream = stream self._video_stream_index = i elif isinstance(info, StreamAudioInfo) and info.sample_bits in (8, 16, 24) and self._audio_stream is None: stream = ffmpeg_open_stream(self._file, i) self.audio_format = AudioFormat( channels=min(2, info.channels), sample_size=info.sample_bits, sample_rate=info.sample_rate) self._audio_stream = stream self._audio_stream_index = i channel_input = avutil.av_get_default_channel_layout(info.channels) channels_out = min(2, info.channels) channel_output = avutil.av_get_default_channel_layout(channels_out) sample_rate = stream.codec_context.contents.sample_rate sample_format = stream.codec_context.contents.sample_fmt if sample_format in (AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8P): self.tgt_format = AV_SAMPLE_FMT_U8 elif sample_format in (AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P): self.tgt_format = AV_SAMPLE_FMT_S16 elif sample_format in (AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32P): self.tgt_format = AV_SAMPLE_FMT_S32 elif sample_format in (AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP): self.tgt_format = AV_SAMPLE_FMT_S16 else: raise FFmpegException('Audio format not supported.') self.audio_convert_ctx = swresample.swr_alloc_set_opts(None, channel_output, self.tgt_format, sample_rate, channel_input, sample_format, sample_rate, 0, None) if (not self.audio_convert_ctx or swresample.swr_init(self.audio_convert_ctx) < 0): swresample.swr_free(self.audio_convert_ctx) raise FFmpegException('Cannot create sample rate converter.') self._packet = ffmpeg_init_packet() self._events = [] # They don't seem to be used! self.audioq = deque() # Make queue big enough to accomodate 1.2 sec? self._max_len_audioq = self.MAX_QUEUE_SIZE # Need to figure out a correct amount if self.audio_format: # Buffer 1 sec worth of audio nbytes = ffmpeg_get_audio_buffer_size(self.audio_format) self._audio_buffer = (c_uint8 * nbytes)() self.videoq = deque() self._max_len_videoq = self.MAX_QUEUE_SIZE # Need to figure out a correct amount self.start_time = self._get_start_time() self._duration = timestamp_from_ffmpeg(file_info.duration) self._duration -= self.start_time # Flag to determine if the _fillq method was already scheduled self._fillq_scheduled = False self._fillq() # Don't understand why, but some files show that seeking without # reading the first few packets results in a seeking where we lose # many packets at the beginning. # We only seek back to 0 for media which have a start_time > 0 if self.start_time > 0: self.seek(0.0) def __del__(self): if self._packet and ffmpeg_free_packet is not None: ffmpeg_free_packet(self._packet) if self._video_stream and swscale is not None: swscale.sws_freeContext(self.img_convert_ctx) ffmpeg_close_stream(self._video_stream) if self._audio_stream: swresample.swr_free(self.audio_convert_ctx) ffmpeg_close_stream(self._audio_stream) if self._file and ffmpeg_close_file is not None: ffmpeg_close_file(self._file) def seek(self, timestamp): if _debug: print('FFmpeg seek', timestamp) ffmpeg_seek_file( self._file, timestamp_to_ffmpeg(timestamp + self.start_time) ) del self._events[:] self._stream_end = False self._clear_video_audio_queues() self._fillq() # Consume video and audio packets until we arrive at the correct # timestamp location if not self.audio_format: while len(self.videoq) > 1: # We only advance if there is at least 2 packets in the queue # The queue is only left with 1 packet if we have reached the # end of the stream. if timestamp < self.videoq[1].timestamp: break else: self.get_next_video_frame(skip_empty_frame=False) elif not self.video_format: while len(self.audioq) > 1: # We only advance if there is at least 2 packets in the queue # The queue is only left with 1 packet if we have reached the # end of the stream. if timestamp < self.audioq[1].timestamp: break else: self._get_audio_packet() else: while len(self.audioq) > 1 and len(self.videoq) > 1: # We only advance if there is at least 2 packets in the queue # The queue is only left with 1 packet if we have reached the # end of the stream. audioq_is_first = self.audioq[0].timestamp < self.videoq[0].timestamp correct_audio_pos = timestamp < self.audioq[1].timestamp correct_video_pos = timestamp < self.videoq[1].timestamp if audioq_is_first and not correct_audio_pos: self._get_audio_packet() elif not correct_video_pos: self.get_next_video_frame(skip_empty_frame=False) else: break def _get_audio_packet(self): """Take an audio packet from the queue. This function will schedule its `_fillq` function to fill up the queues if space is available. Multiple calls to this method will only result in one scheduled call to `_fillq`. """ audio_data = self.audioq.popleft() low_lvl = self._check_low_level() if not low_lvl and not self._fillq_scheduled: pyglet.clock.schedule_once(lambda dt: self._fillq(), 0) self._fillq_scheduled = True return audio_data def _get_video_packet(self): """Take a video packet from the queue. This function will schedule its `_fillq` function to fill up the queues if space is available. Multiple calls to this method will only result in one scheduled call to `_fillq`. """ if not self.videoq: return None video_packet = self.videoq.popleft() low_lvl = self._check_low_level() if not low_lvl and not self._fillq_scheduled: pyglet.clock.schedule_once(lambda dt: self._fillq(), 0) self._fillq_scheduled = True return video_packet def _clear_video_audio_queues(self): """Empty both audio and video queues.""" self.audioq.clear() self.videoq.clear() def _fillq(self): """Fill up both Audio and Video queues if space is available in both""" # We clear our flag. self._fillq_scheduled = False while (len(self.audioq) < self._max_len_audioq and len(self.videoq) < self._max_len_videoq): if self._get_packet(): self._process_packet() else: self._stream_end = True break def _check_low_level(self): """Check if both audio and video queues are getting very low. If one of them has less than 2 elements, we fill the queue immediately with new packets. We don't wait for a scheduled call because we need them immediately. This would normally happens only during seek operations where we consume many packets to find the correct timestamp. """ if len(self.audioq) < 2 or len(self.videoq) < 2: if len(self.audioq) < self._max_len_audioq and len(self.videoq) < self._max_len_videoq: self._fillq() return True return False def _get_packet(self): # Read a packet into self._packet. Returns True if OK, False if no # more packets are in stream. return ffmpeg_read(self._file, self._packet) def _process_packet(self): """Process the packet that has been just read. Determines whether it's a video or audio packet and queue it in the appropriate queue. """ timestamp = ffmpeg_get_packet_pts(self._file, self._packet) timestamp = timestamp_from_ffmpeg(timestamp) timestamp -= self.start_time if self._packet.contents.stream_index == self._video_stream_index: video_packet = VideoPacket(self._packet, timestamp) if _debug: print('Created and queued packet %d (%f)' % (video_packet.id, video_packet.timestamp)) self.videoq.append(video_packet) return video_packet elif self.audio_format and self._packet.contents.stream_index == self._audio_stream_index: audio_packet = AudioPacket(self._packet, timestamp) self.audioq.append(audio_packet) return audio_packet def get_audio_data(self, num_bytes, compensation_time=0.0): data = b'' timestamp = duration = 0 while len(data) < num_bytes: if not self.audioq: break audio_packet = self._get_audio_packet() buffer, timestamp, duration = self._decode_audio_packet(audio_packet, compensation_time) if not buffer: break data += buffer # No data and no audio queue left if not data and not self.audioq: if not self._stream_end: # No more audio data in queue, but we haven't hit the stream end. if _debug: print("Audio queue was starved by the audio driver.") return None audio_data = AudioData(data, len(data), timestamp, duration, []) while self._events and self._events[0].timestamp <= (timestamp + duration): event = self._events.pop(0) if event.timestamp >= timestamp: event.timestamp -= timestamp audio_data.events.append(event) if _debug: print('get_audio_data returning ts {0} with events {1}'.format(audio_data.timestamp, audio_data.events)) print('remaining events are', self._events) return audio_data def _decode_audio_packet(self, audio_packet, compensation_time): while True: try: size_out = self._ffmpeg_decode_audio( audio_packet.packet, self._audio_buffer, compensation_time) except FFmpegException: break if size_out <= 0: break buffer = create_string_buffer(size_out) memmove(buffer, self._audio_buffer, len(buffer)) buffer = buffer.raw duration = float(len(buffer)) / self.audio_format.bytes_per_second timestamp = ffmpeg_get_frame_ts(self._audio_stream) timestamp = timestamp_from_ffmpeg(timestamp) return buffer, timestamp, duration return None, 0, 0 def _ffmpeg_decode_audio(self, packet, data_out, compensation_time): stream = self._audio_stream if stream.type != AVMEDIA_TYPE_AUDIO: raise FFmpegException('Trying to decode audio on a non-audio stream.') sent_result = avcodec.avcodec_send_packet( stream.codec_context, packet, ) if sent_result < 0: buf = create_string_buffer(128) avutil.av_strerror(sent_result, buf, 128) descr = buf.value raise FFmpegException('Error occurred sending packet to decoder. {}'.format(descr.decode())) receive_result = avcodec.avcodec_receive_frame( stream.codec_context, stream.frame ) if receive_result < 0: buf = create_string_buffer(128) avutil.av_strerror(receive_result, buf, 128) descr = buf.value raise FFmpegException('Error occurred receiving frame. {}'.format(descr.decode())) plane_size = c_int(0) data_size = avutil.av_samples_get_buffer_size( byref(plane_size), stream.codec_context.contents.channels, stream.frame.contents.nb_samples, stream.codec_context.contents.sample_fmt, 1) if data_size < 0: raise FFmpegException('Error in av_samples_get_buffer_size') if len(self._audio_buffer) < data_size: raise FFmpegException('Output audio buffer is too small for current audio frame!') nb_samples = stream.frame.contents.nb_samples sample_rate = stream.codec_context.contents.sample_rate bytes_per_sample = avutil.av_get_bytes_per_sample(self.tgt_format) channels_out = min(2, self.audio_format.channels) wanted_nb_samples = nb_samples + compensation_time * sample_rate min_nb_samples = (nb_samples * (100 - self.SAMPLE_CORRECTION_PERCENT_MAX) / 100) max_nb_samples = (nb_samples * (100 + self.SAMPLE_CORRECTION_PERCENT_MAX) / 100) wanted_nb_samples = min(max(wanted_nb_samples, min_nb_samples), max_nb_samples) wanted_nb_samples = int(wanted_nb_samples) if wanted_nb_samples != nb_samples: res = swresample.swr_set_compensation( self.audio_convert_ctx, (wanted_nb_samples - nb_samples), wanted_nb_samples ) if res < 0: raise FFmpegException('swr_set_compensation failed.') data_in = stream.frame.contents.extended_data p_data_out = cast(data_out, POINTER(c_uint8)) out_samples = swresample.swr_get_out_samples(self.audio_convert_ctx, nb_samples) total_samples_out = swresample.swr_convert(self.audio_convert_ctx, byref(p_data_out), out_samples, data_in, nb_samples) while True: # We loop because there could be some more samples buffered in # SwrContext. We advance the pointer where we write our samples. offset = (total_samples_out * channels_out * bytes_per_sample) p_data_offset = cast( addressof(p_data_out.contents) + offset, POINTER(c_uint8) ) samples_out = swresample.swr_convert(self.audio_convert_ctx, byref(p_data_offset), out_samples - total_samples_out, None, 0) if samples_out == 0: # No more samples. We can continue. break total_samples_out += samples_out size_out = (total_samples_out * channels_out * bytes_per_sample) return size_out def _decode_video_packet(self, video_packet): # # Some timing and profiling # pr = cProfile.Profile() # pr.enable() # clock = pyglet.clock.get_default() # t0 = clock.time() width = self.video_format.width height = self.video_format.height pitch = width * 4 # https://ffmpeg.org/doxygen/3.3/group__lavc__decoding.html#ga8f5b632a03ce83ac8e025894b1fc307a nbytes = (pitch * height + FF_INPUT_BUFFER_PADDING_SIZE) buffer = (c_uint8 * nbytes)() try: result = self._ffmpeg_decode_video(video_packet.packet, buffer) except FFmpegException: image_data = None else: image_data = image.ImageData(width, height, 'RGBA', buffer, pitch) timestamp = ffmpeg_get_frame_ts(self._video_stream) timestamp = timestamp_from_ffmpeg(timestamp) video_packet.timestamp = timestamp - self.start_time video_packet.image = image_data if _debug: print('Decoding video packet at timestamp', video_packet, video_packet.timestamp) # t2 = clock.time() # pr.disable() # print("Time in _decode_video_packet: {:.4f} s for timestamp {} s".format(t2-t0, packet.timestamp)) # if t2-t0 > 0.01: # import pstats # ps = pstats.Stats(pr).sort_stats("cumulative") # ps.print_stats() def _ffmpeg_decode_video(self, packet, data_out): stream = self._video_stream rgba_ptrs = (POINTER(c_uint8) * 4)() rgba_stride = (c_int * 4)() width = stream.codec_context.contents.width height = stream.codec_context.contents.height if stream.type != AVMEDIA_TYPE_VIDEO: raise FFmpegException('Trying to decode video on a non-video stream.') sent_result = avcodec.avcodec_send_packet( stream.codec_context, packet, ) if sent_result < 0: buf = create_string_buffer(128) avutil.av_strerror(sent_result, buf, 128) descr = buf.value raise FFmpegException('Video: Error occurred sending packet to decoder. {}'.format(descr.decode())) receive_result = avcodec.avcodec_receive_frame( stream.codec_context, stream.frame ) if receive_result < 0: buf = create_string_buffer(128) avutil.av_strerror(receive_result, buf, 128) descr = buf.value raise FFmpegException('Video: Error occurred receiving frame. {}'.format(descr.decode())) avutil.av_image_fill_arrays(rgba_ptrs, rgba_stride, data_out, AV_PIX_FMT_RGBA, width, height, 1) self.img_convert_ctx = swscale.sws_getCachedContext( self.img_convert_ctx, width, height, stream.codec_context.contents.pix_fmt, width, height, AV_PIX_FMT_RGBA, SWS_FAST_BILINEAR, None, None, None) swscale.sws_scale(self.img_convert_ctx, cast(stream.frame.contents.data, POINTER(POINTER(c_uint8))), stream.frame.contents.linesize, 0, height, rgba_ptrs, rgba_stride) return receive_result def get_next_video_timestamp(self): if not self.video_format: return ts = None if self.videoq: while True: # We skip video packets which are not video frames # This happens in mkv files for the first few frames. try: video_packet = self.videoq.popleft() except IndexError: break if video_packet.image == 0: self._decode_video_packet(video_packet) if video_packet.image is not None: ts = video_packet.timestamp self.videoq.appendleft(video_packet) # put it back break self._get_video_packet() else: ts = None if _debug: print('Next video timestamp is', ts) return ts def get_next_video_frame(self, skip_empty_frame=True): if not self.video_format: return while True: # We skip video packets which are not video frames # This happens in mkv files for the first few frames. video_packet = self._get_video_packet() if not video_packet: return None if video_packet.image == 0: self._decode_video_packet(video_packet) if video_packet.image is not None or not skip_empty_frame: break if _debug: print('Returning', video_packet) return video_packet.image def _get_start_time(self): def streams(): format_context = self._file.context for idx in (self._video_stream_index, self._audio_stream_index): if idx is None: continue stream = format_context.contents.streams[idx].contents yield stream def start_times(streams): yield 0 for stream in streams: start = stream.start_time if start == AV_NOPTS_VALUE: yield 0 start_time = avutil.av_rescale_q(start, stream.time_base, AV_TIME_BASE_Q) start_time = timestamp_from_ffmpeg(start_time) yield start_time return max(start_times(streams())) @property def audio_format(self): return self._audio_format @audio_format.setter def audio_format(self, value): self._audio_format = value if value is None: self.audioq.clear() ffmpeg_init() if pyglet.options['debug_media']: _debug = True else: _debug = False avutil.av_log_set_level(8) ######################################### # Decoder class: ######################################### class FFmpegDecoder(MediaDecoder): def get_file_extensions(self): return '.mp3', '.ogg' def decode(self, filename, file, streaming=True): if streaming: return FFmpegSource(filename, file) else: return StaticSource(FFmpegSource(filename, file)) def get_decoders(): return [FFmpegDecoder()] def get_encoders(): return []