Whisper - eliminate "missing/bad audio track" errors when files have multiple audio streams

This commit is contained in:
JayZed 2025-04-12 14:12:09 -04:00 committed by GitHub
parent 85d75a72b8
commit d6e7773a0f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -158,10 +158,18 @@ def encode_audio_stream(path, ffmpeg_path, audio_stream_language=None):
# Use the ISO 639-2 code if available
audio_stream_language = get_ISO_639_2_code(audio_stream_language)
logger.debug(f"Whisper will use the '{audio_stream_language}' audio stream for {path}")
inp = inp[f'a:m:language:{audio_stream_language}']
out, _ = inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000, af="aresample=async=1") \
.run(cmd=[ffmpeg_path, "-nostdin"], capture_stdout=True, capture_stderr=True)
# 0 = Pick first stream in case there are multiple language streams of the same language,
# otherwise ffmpeg will try to combine multiple streams, but our output format doesn't support that.
# The first stream is probably the correct one, as later streams are usually commentaries
lang_map = f"0:m:language:{audio_stream_language}"
else:
# there is only one stream, so just use that one
lang_map = ""
out, _ = (
inp.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000, af="aresample=async=1")
.global_args("-map", lang_map)
.run(cmd=[ffmpeg_path, "-nostdin"], capture_stdout=True, capture_stderr=True)
)
except ffmpeg.Error as e:
logger.warning(f"ffmpeg failed to load audio: {e.stderr.decode()}")