New speech api.
This commit is contained in:
commit
b6871a85c1
166
.gitignore
vendored
Normal file
166
.gitignore
vendored
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
.vscode
|
||||||
|
.history
|
||||||
|
*.db*
|
||||||
|
|
||||||
|
# ---> Python
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
8
Makefile
Normal file
8
Makefile
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
install:
|
||||||
|
python3 -m venv .venv
|
||||||
|
./.venv/bin/pip install -e .
|
||||||
|
|
||||||
|
tts:
|
||||||
|
./.venv/bin/rtts
|
||||||
|
stt:
|
||||||
|
./.venv/bin/rstt
|
3
pyproject.toml
Normal file
3
pyproject.toml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["setuptools", "wheel"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
6
requirements.txt
Normal file
6
requirements.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
pyaudio
|
||||||
|
SpeechRecognition
|
||||||
|
google-cloud-speech
|
||||||
|
google-cloud-texttospeech
|
||||||
|
google-auth
|
||||||
|
pygame
|
32
setup.cfg
Normal file
32
setup.cfg
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
[metadata]
|
||||||
|
name = rspeech
|
||||||
|
version = 1.0.0
|
||||||
|
description = Library for speech processing
|
||||||
|
author = retoor
|
||||||
|
author_email = retoor@molodetz.nl
|
||||||
|
license = MIT
|
||||||
|
long_description = file: README.md
|
||||||
|
long_description_content_type = text/markdown
|
||||||
|
|
||||||
|
[options]
|
||||||
|
packages = find:
|
||||||
|
package_dir =
|
||||||
|
= src
|
||||||
|
python_requires = >=3.7
|
||||||
|
install_requires =
|
||||||
|
pyaudio
|
||||||
|
SpeechRecognition
|
||||||
|
google-cloud-speech
|
||||||
|
google-cloud-texttospeech
|
||||||
|
google-auth
|
||||||
|
pygame
|
||||||
|
aiohttp
|
||||||
|
packaging
|
||||||
|
|
||||||
|
[options.packages.find]
|
||||||
|
where = src
|
||||||
|
|
||||||
|
[options.entry_points]
|
||||||
|
console_scripts =
|
||||||
|
rtts = rspeech.tts:main
|
||||||
|
rstt = rspeech.stt:main
|
0
src/rspeech/__init__.py
Normal file
0
src/rspeech/__init__.py
Normal file
0
src/rspeech/__main__.py
Normal file
0
src/rspeech/__main__.py
Normal file
137
src/rspeech/gcloud.py
Normal file
137
src/rspeech/gcloud.py
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
# Written by retoor@molodetz.nl
|
||||||
|
|
||||||
|
# This script interfaces with Google's Text-to-Speech API to synthesize spoken audio from text.
|
||||||
|
# It also includes functionality to handle Google authentication tokens.
|
||||||
|
|
||||||
|
# External imports:
|
||||||
|
# - aiohttp: Asynchronous HTTP requests.
|
||||||
|
# - google-auth packages: For managing Google authentication tokens.
|
||||||
|
# - env, play: Local modules for playing audio and environment configurations.
|
||||||
|
|
||||||
|
# MIT License
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
# THE SOFTWARE.
|
||||||
|
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import asyncio
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
import base64
|
||||||
|
import sys
|
||||||
|
from functools import cache
|
||||||
|
from google.oauth2 import id_token
|
||||||
|
from google.auth.transport import requests
|
||||||
|
import google.auth
|
||||||
|
from rspeech.play import play_audio
|
||||||
|
import google.oauth2.credentials
|
||||||
|
import uuid
|
||||||
|
import pathlib
|
||||||
|
from rspeech.play import play_audio
|
||||||
|
|
||||||
|
# Chars to be ignored in speech
|
||||||
|
IGNORE_CHARS = ["*", "#", "`","'",'"',"\\","/","---"]
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def google_token():
|
||||||
|
gcloud_default, project = google.auth.default()
|
||||||
|
from google.oauth2 import _client as google_auth_client
|
||||||
|
import google.auth.transport.urllib3 as google_auth_urllib3
|
||||||
|
import urllib3
|
||||||
|
http = urllib3.PoolManager()
|
||||||
|
request = google_auth_urllib3.Request(http)
|
||||||
|
token_uri = 'https://oauth2.googleapis.com/token'
|
||||||
|
refresh_token = gcloud_default.refresh_token
|
||||||
|
client_id = gcloud_default.client_id
|
||||||
|
client_secret = gcloud_default.client_secret
|
||||||
|
|
||||||
|
scopes = ['https://www.googleapis.com/auth/cloud-platform']
|
||||||
|
|
||||||
|
access_token, _, _, _ = google_auth_client.refresh_grant(
|
||||||
|
request, token_uri, refresh_token, client_id, client_secret, scopes)
|
||||||
|
return access_token
|
||||||
|
|
||||||
|
|
||||||
|
async def tts(text:str ,google_project:str="lisa-448004", language_code:str="nl-NL",ssml_gender:str="FEMALE",speaking_rate:float=1.0,pitch:float=0.0,name:str="nl-NL-Standard-D",ignore_chars=None):
|
||||||
|
if ignore_chars is None:
|
||||||
|
ignore_chars = IGNORE_CHARS
|
||||||
|
|
||||||
|
url = "https://texttospeech.googleapis.com/v1/text:synthesize"
|
||||||
|
|
||||||
|
# Remove markdown
|
||||||
|
for char in ignore_chars:
|
||||||
|
text = text.replace(char, "")
|
||||||
|
text = text.strip()
|
||||||
|
if not text:
|
||||||
|
return
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {google_token()}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"X-Goog-User-Project": google_project
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
"input": {
|
||||||
|
"text": text
|
||||||
|
},
|
||||||
|
"voice": {
|
||||||
|
"languageCode": language_code,
|
||||||
|
"name": name,
|
||||||
|
"ssmlGender": ssml_gender
|
||||||
|
},
|
||||||
|
"audioConfig": {
|
||||||
|
"audioEncoding": "MP3",
|
||||||
|
"speakingRate": speaking_rate,
|
||||||
|
"pitch": pitch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
response = await session.post(url, headers=headers, json=data)
|
||||||
|
response_json = await response.json()
|
||||||
|
audio_content = response_json.get("audioContent")
|
||||||
|
file = pathlib.Path(str(uuid.uuid4()) + ".mp3")
|
||||||
|
with file.open("wb") as audio_file:
|
||||||
|
audio_file.write(base64.b64decode(audio_content.encode('latin1')))
|
||||||
|
play_audio(file)
|
||||||
|
file.unlink()
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def oud():
|
||||||
|
client = speech.SpeechClient()
|
||||||
|
|
||||||
|
with open(file_path, "rb") as audio_file:
|
||||||
|
content = audio_file.read()
|
||||||
|
|
||||||
|
audio = speech.RecognitionAudio(content=content)
|
||||||
|
config = speech.RecognitionConfig(
|
||||||
|
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||||
|
sample_rate_hertz=16000,
|
||||||
|
language_code="en-US",
|
||||||
|
)
|
||||||
|
response = client.recognize(config=config, audio=audio)
|
||||||
|
for result in response.results:
|
||||||
|
print("Transcript:", result.alternatives[0].transcript)
|
||||||
|
|
||||||
|
|
||||||
|
async def main_async():
|
||||||
|
print(google_token())
|
||||||
|
await tts("If you hear this sentence, the google part works fine. Congrats.")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
asyncio.run(main_async())
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
76
src/rspeech/play.py
Normal file
76
src/rspeech/play.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
# Written by retoor@molodetz.nl
|
||||||
|
|
||||||
|
# This source code initializes a Text-to-Speech (TTS) engine, plays text as audio using the TTS engine, and plays audio files using both the VLC media player and PyAudio.
|
||||||
|
|
||||||
|
# Libraries imported: 'pyaudio', 'wave', 'pyttsx3', 'functools', 'os', 'simpleaudio'
|
||||||
|
|
||||||
|
# The MIT License (MIT)
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
# THE SOFTWARE.
|
||||||
|
|
||||||
|
import pyaudio
|
||||||
|
import functools
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import pygame
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def play_audio(filename):
|
||||||
|
pygame.mixer.init()
|
||||||
|
pygame.mixer.music.load(filename)
|
||||||
|
pygame.mixer.music.play()
|
||||||
|
while pygame.mixer.music.get_busy():
|
||||||
|
pygame.time.Clock().tick(10)
|
||||||
|
|
||||||
|
|
||||||
|
def play_audio2(filename):
|
||||||
|
ffmpeg_cmd = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-i", filename,
|
||||||
|
"-f", "s16le",
|
||||||
|
"-ar", "44100",
|
||||||
|
"-ac", "2",
|
||||||
|
"pipe:1"
|
||||||
|
]
|
||||||
|
process = subprocess.Popen(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=10**6)
|
||||||
|
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
stream = p.open(
|
||||||
|
format=p.get_format_from_width(2),
|
||||||
|
channels=2,
|
||||||
|
rate=44100,
|
||||||
|
output=True
|
||||||
|
)
|
||||||
|
chunk_size = 4096
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
data = process.stdout.read(chunk_size)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
stream.write(data)
|
||||||
|
finally:
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
p.terminate()
|
||||||
|
process.stdout.close()
|
||||||
|
process.wait()
|
103
src/rspeech/stt.py
Normal file
103
src/rspeech/stt.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# Written by retoor@molodetz.nl
|
||||||
|
|
||||||
|
# This script listens to audio input via a microphone, recognizes speech using the Google API, sends the recognized text to a server for processing, and uses Google Cloud to convert the server response to speech.
|
||||||
|
|
||||||
|
# Imports:
|
||||||
|
# - speech_recognition: For speech recognition functionality.
|
||||||
|
# - xmlrpc.client: To communicate with a remote server using the XML-RPC protocol.
|
||||||
|
# - gcloud: Presumably for Google Cloud services, though this requires clarification or specific library inclusion.
|
||||||
|
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all
|
||||||
|
# copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
# SOFTWARE.
|
||||||
|
|
||||||
|
import speech_recognition as sr
|
||||||
|
from rspeech import gcloud
|
||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def listen(timeout: int=0, phrase_time_limit: int=0,language:str="nl-NL",adjust_ambiance_seconds:int=1,save_to=None,recognize=True):
|
||||||
|
"""
|
||||||
|
Function for listening to audio input via a microphone and recognizing speech using the Google API. For this function there are no credentials or gcloud account required.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeout (int): The maximum amount of time in seconds to listen for audio input.
|
||||||
|
phrase_time_limit (int): The maximum amount of time in seconds for a single phrase of speech.
|
||||||
|
language (str): The language code for the speech recognition. Default is "nl-NL". For English use "en-US".
|
||||||
|
adjust_ambiance_seconds (int): The number of seconds to adjust to ambient noise. Default is 5.
|
||||||
|
save_to (str): The path to save the audio data to. Default is None.
|
||||||
|
recognize (bool): Whether to recognize speech or not. Default is True.
|
||||||
|
Returns:
|
||||||
|
str: The recognized speech as a string or True if recognize is set to False.
|
||||||
|
"""
|
||||||
|
|
||||||
|
recognizer = sr.Recognizer()
|
||||||
|
with sr.Microphone() as source:
|
||||||
|
|
||||||
|
if adjust_ambiance_seconds:
|
||||||
|
logger.info("Adjusting to surroundings for {adjust_ambiance_seconds} seconds.")
|
||||||
|
recognizer.adjust_for_ambient_noise(source, duration=adjust_ambiance_seconds)
|
||||||
|
while True:
|
||||||
|
logger.info("Listening...")
|
||||||
|
try:
|
||||||
|
audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
|
||||||
|
if save_to:
|
||||||
|
with open(save_to, "wb") as f:
|
||||||
|
logger.debug(f"Saved to {save_to}")
|
||||||
|
f.write(audio_data.get_wav_data())
|
||||||
|
|
||||||
|
if not recognize:
|
||||||
|
logger.info(f"Recognition is disabled so returning True.")
|
||||||
|
return True
|
||||||
|
text = recognizer.recognize_google(audio_data, language=language)
|
||||||
|
source = None
|
||||||
|
recognizer = None
|
||||||
|
logger.info(f"Returning {text}")
|
||||||
|
return text
|
||||||
|
except sr.WaitTimeoutError:
|
||||||
|
continue
|
||||||
|
except sr.UnknownValueError:
|
||||||
|
continue
|
||||||
|
except sr.RequestError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
async def listen_async(timeout: int=0, phrase_time_limit: int=0,language:str="nl-NL",adjust_ambiance_seconds:int=1,save_to=None,recognize=True):
|
||||||
|
def listen_sync():
|
||||||
|
return listen(timeout=timeout, phrase_time_limit=phrase_time_limit,language=language,adjust_ambiance_seconds=adjust_ambiance_seconds,save_to=save_to,recognize=recognize)
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
return await loop.run_in_executor(None, listen_sync)
|
||||||
|
|
||||||
|
|
||||||
|
async def main_async():
|
||||||
|
while True:
|
||||||
|
print("Listening...")
|
||||||
|
print(await listen_async())
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
asyncio.run(main_async())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
18
src/rspeech/tts.py
Normal file
18
src/rspeech/tts.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
import asyncio
|
||||||
|
from rspeech.gcloud import tts
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def main_async():
|
||||||
|
await tts("Type a few times return to stop.")
|
||||||
|
while True:
|
||||||
|
text = input("> ").strip()
|
||||||
|
if not text:
|
||||||
|
break
|
||||||
|
await tts(text)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
asyncio.run(main_async())
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user