Improved whisper provider to not throttle when unsupported audio language is encountered. #2474

As we have noted before, bad input data should be no reason to throttle a provider. In this case, if the input language was not supported by whisper, we were raising a ValueError that was never caught and causing an error in the whisper provider for which it was throttled. Instead, we are now detecting this case and logging an error message. However, given that the input language was not one of the 99 currently known to whisper, it's probably a mislabeled audio track. If the user desired output language is English, then we will tell whisper that the input audio is also English and ask it to transcribe it. Whisper does a very good job of transcribing almost anything to English, so it's worth a try. This should address the throttling in issue #2474.
2025-04-24 06:37:16 -04:00 · 2024-04-29 22:11:47 -04:00 · 2024-04-29 22:11:47 -04:00 · 5749971d67
commit 5749971d67
parent c5a5dc9ddf
1 changed files with 15 additions and 3 deletions
--- a/custom_libs/subliminal_patch/providers/whisperai.py
+++ b/custom_libs/subliminal_patch/providers/whisperai.py
@ -169,7 +169,7 @@ def whisper_get_language_reverse(alpha3):
        lan = whisper_get_language(wl, whisper_languages[wl])
        if lan.alpha3 == alpha3:
            return wl
-    raise ValueError
+    return None

 def language_from_alpha3(lang):
    name = Language(lang).name
@ -317,7 +317,7 @@ class WhisperAIProvider(Provider):
        if out == None:
            logger.info(f"Whisper cannot process {subtitle.video.original_path} because of missing/bad audio track")
            subtitle.content = None
-            return         
+            return  

        logger.debug(f'Audio stream length (in WAV format) is {len(out):,} bytes')

@ -326,11 +326,23 @@ class WhisperAIProvider(Provider):
        else:
            output_language = "eng"

+        input_language = whisper_get_language_reverse(subtitle.audio_language)
+        if input_language is None:
+            if output_language == "eng":
+                # guess that audio track is mislabelled English and let whisper try to transcribe it
+                input_language = "en"
+                subtitle.task = "transcribe"
+                logger.info(f"Whisper treating unsupported audio track language: '{subtitle.audio_language}' as English")
+            else:
+                logger.info(f"Whisper cannot process {subtitle.video.original_path} because of unsupported audio track language: '{subtitle.audio_language}'")
+                subtitle.content = None
+                return
+        
        logger.info(f'Starting WhisperAI {subtitle.task} to {language_from_alpha3(output_language)} for {subtitle.video.original_path}')
        startTime = time.time()

        r = self.session.post(f"{self.endpoint}/asr",
-                              params={'task': subtitle.task, 'language': whisper_get_language_reverse(subtitle.audio_language), 'output': 'srt', 'encode': 'false'},
+                              params={'task': subtitle.task, 'language': input_language, 'output': 'srt', 'encode': 'false'},
                              files={'audio_file': out},
                              timeout=(self.response, self.timeout))