ai_speech_research_papers/tts.py at main

 # Written by retoor@molodetz.nl
 # This script listens to audio input via a microphone, recognizes speech using the Google API, sends the recognized text to a server for processing, and uses Google Cloud to convert the server response to speech.
 # Imports:
 # - speech_recognition: For speech recognition functionality.
 # - xmlrpc.client: To communicate with a remote server using the XML-RPC protocol.
 # - gcloud: Presumably for Google Cloud services, though this requires clarification or specific library inclusion.
 # MIT License
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 import speech_recognition as sr
 from xmlrpc.client import ServerProxy
 import gcloud
 molodetz = ServerProxy("https://api.molodetz.nl/rpc")
 def listen():
     recognizer = sr.Recognizer()
     with sr.Microphone() as source:
         print("Adjusting to surroundings for a five seconds.")
         #recognizer.non_speaking_duration = 60*60
         while True:
             print("Listening...")
             try:
                 audio_data = recognizer.listen(source, timeout=10)
                 text = recognizer.recognize_google(audio_data, language="nl-NL") #en-US
                 source = None
                 recognizer = None
                 return text
             except sr.WaitTimeoutError:
                 continue
             except sr.UnknownValueError:
                 continue
             except sr.RequestError:
                 continue
 async def main():
     #recognizer.adjust_for_ambient_noise(source, duration=5)
     while True:
         text = listen()
         print(f"You said:\n\t{text}")
         response_llm = molodetz.gpt4o_mini(text)
         print(f"GPT4o mini said:\n\t{response_llm}")
         await gcloud.tts(response_llm)
 if __name__ == "__main__":
     import asyncio
     asyncio.run(main())

No results found.