From a847df0b81be7bd16997cecbde501badf6f29ab7 Mon Sep 17 00:00:00 2001 From: sarane22 Date: Mon, 8 Jun 2026 09:47:08 +0530 Subject: [PATCH 1/6] chore(all): Renaming Riva to NemotronSpeech --- README.md | 20 +++++----- {riva => nemotronspeech}/client/__init__.py | 20 +++++----- .../client/argparse_utils.py | 0 {riva => nemotronspeech}/client/asr.py | 24 ++++++------ {riva => nemotronspeech}/client/audio_io.py | 0 {riva => nemotronspeech}/client/auth.py | 4 +- {riva => nemotronspeech}/client/health.py | 0 {riva => nemotronspeech}/client/nlp.py | 26 ++++++------- {riva => nemotronspeech}/client/nmt.py | 24 ++++++------ .../client/package_info.py | 6 +-- .../client/proto/__init__.py | 0 {riva => nemotronspeech}/client/realtime.py | 0 {riva => nemotronspeech}/client/tts.py | 18 ++++----- ...nt.py => nemotron_streaming_asr_client.py} | 30 +++++++-------- scripts/asr/realtime_asr_client.py | 18 ++++----- scripts/asr/transcribe_file.py | 38 +++++++++---------- scripts/asr/transcribe_file_offline.py | 24 ++++++------ scripts/asr/transcribe_mic.py | 32 ++++++++-------- scripts/nlp/punctuation_client.py | 18 ++++----- scripts/nmt/nmt.py | 14 +++---- scripts/nmt/nmt_speech_to_speech.py | 18 ++++----- scripts/nmt/nmt_speech_to_text.py | 16 ++++---- scripts/tts/realtime_tts_client.py | 10 ++--- scripts/tts/talk.py | 22 +++++------ setup.py | 6 +-- tests/integration/asr.sh | 4 +- .../asr/define_test_control_vars.sh | 2 +- .../time_stamps_AntiBERTa.txt | 0 ... => test_nemotron_streaming_asr_client.sh} | 16 ++++---- tests/unit/test_asr.py | 8 ++-- tests/unit/test_auth.py | 2 +- tests/unit/test_nlp.py | 6 +-- tests/unit/test_tts.py | 8 ++-- 33 files changed, 217 insertions(+), 217 deletions(-) rename {riva => nemotronspeech}/client/__init__.py (55%) rename {riva => nemotronspeech}/client/argparse_utils.py (100%) rename {riva => nemotronspeech}/client/asr.py (95%) rename {riva => nemotronspeech}/client/audio_io.py (100%) rename {riva => nemotronspeech}/client/auth.py (97%) rename {riva => nemotronspeech}/client/health.py (100%) rename {riva => nemotronspeech}/client/nlp.py (93%) rename {riva => nemotronspeech}/client/nmt.py (85%) rename {riva => nemotronspeech}/client/package_info.py (82%) rename {riva => nemotronspeech}/client/proto/__init__.py (100%) rename {riva => nemotronspeech}/client/realtime.py (100%) rename {riva => nemotronspeech}/client/tts.py (91%) rename scripts/asr/{riva_streaming_asr_client.py => nemotron_streaming_asr_client.py} (83%) rename tests/integration/asr/reference_outputs/{test_riva_streaming_asr_client => test_nemotron_streaming_asr_client}/time_stamps_AntiBERTa.txt (100%) rename tests/integration/asr/{test_riva_streaming_asr_client.sh => test_nemotron_streaming_asr_client.sh} (89%) diff --git a/README.md b/README.md index 31f62248..7f3fa781 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,19 @@ [![License](https://img.shields.io/badge/license-MIT-green)](https://opensource.org/licenses/MIT) -# NVIDIA Riva Clients +# NVIDIA NemotronSpeech Clients -NVIDIA Riva is a GPU-accelerated SDK for building Speech AI applications that are customized for your use +NemotronSpeech is a GPU-accelerated SDK for building Speech AI applications that are customized for your use case and deliver real-time performance. This repo provides performant client example command-line clients. ## Main API -- `riva.client.ASRService` is a class for speech recognition, -- `riva.client.TTSService` is a class for speech synthesis, -- `riva.client.NLPService` is a class for natural language processing. +- `nemotronspeech.client.ASRService` is a class for speech recognition, +- `nemotronspeech.client.TTSService` is a class for speech synthesis, +- `nemotronspeech.client.NLPService` is a class for natural language processing. ## CLI interface - **Automatic Speech Recognition (ASR)** - - `scripts/asr/riva_streaming_asr_client.py` demonstrates streaming transcription in several threads, can print time stamps. + - `scripts/asr/nemotron_streaming_asr_client.py` demonstrates streaming transcription in several threads, can print time stamps. - `scripts/asr/transcribe_file.py` performs streaming transcription, - `scripts/asr/transcribe_file_offline.py` performs offline transcription, - `scripts/asr/transcribe_mic.py` performs streaming transcription of audio acquired through microphone. @@ -47,12 +47,12 @@ pip install --force-reinstall dist/*.whl ``` 3. `pip`: ```bash -pip install nvidia-riva-client +pip install nvidia-nemotronspeech-client ``` If you would like to use output and input audio devices (scripts `scripts/asr/transcribe_file_rt.py`, `scripts/asr/transcribe_mic.py`, `scripts/tts/talk.py`, `scripts/asr/realtime_asr_client.py`, `scripts/tts/realtime_tts_client.py` or module -`riva.client/audio_io.py`), you will need to install `PyAudio`. +`nemotronspeech.client/audio_io.py`), you will need to install `PyAudio`. ```bash conda install -c anaconda pyaudio ``` @@ -82,7 +82,7 @@ and restart. ### Server -Before running client part of Riva, please set up a server. The simplest +Before running client part of NemotronSpeech, please set up a server. The simplest way to do this is to follow [quick start guide](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html#local-deployment-using-quick-start-scripts). @@ -280,7 +280,7 @@ See tutorial notebooks in directory `tutorials`. ## Documentation -Additional documentation on the Riva Speech Skills SDK can be found [here](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/). +Additional documentation on the NemotronSpeech Skills SDK can be found [here](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/). ## License diff --git a/riva/client/__init__.py b/nemotronspeech/client/__init__.py similarity index 55% rename from riva/client/__init__.py rename to nemotronspeech/client/__init__.py index 7656bd69..9f248cdc 100644 --- a/riva/client/__init__.py +++ b/nemotronspeech/client/__init__.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: MIT -from riva.client.asr import ( +from nemotronspeech.client.asr import ( AudioChunkFileIterator, ASRService, add_audio_file_specs_to_config, @@ -14,15 +14,15 @@ add_endpoint_parameters_to_config, add_custom_configuration_to_config, ) -from riva.client.auth import Auth -from riva.client.nlp import ( +from nemotronspeech.client.auth import Auth +from nemotronspeech.client.nlp import ( NLPService, extract_all_text_classes_and_confidences, extract_all_token_classification_predictions, extract_most_probable_text_class_and_confidence, extract_most_probable_token_classification_predictions, ) -from riva.client.package_info import ( +from nemotronspeech.client.package_info import ( __contact_emails__, __contact_names__, __description__, @@ -35,9 +35,9 @@ __shortversion__, __version__, ) -from riva.client.proto.riva_asr_pb2 import RecognitionConfig, StreamingRecognitionConfig, EndpointingConfig -from riva.client.proto.riva_audio_pb2 import AudioEncoding -from riva.client.proto.riva_nlp_pb2 import AnalyzeIntentOptions -from riva.client.proto.riva_nmt_pb2 import StreamingTranslateSpeechToSpeechConfig, TranslationConfig, SynthesizeSpeechConfig, StreamingTranslateSpeechToTextConfig -from riva.client.tts import SpeechSynthesisService -from riva.client.nmt import NeuralMachineTranslationClient +from nemotronspeech.client.proto.riva_asr_pb2 import RecognitionConfig, StreamingRecognitionConfig, EndpointingConfig +from nemotronspeech.client.proto.riva_audio_pb2 import AudioEncoding +from nemotronspeech.client.proto.riva_nlp_pb2 import AnalyzeIntentOptions +from nemotronspeech.client.proto.riva_nmt_pb2 import StreamingTranslateSpeechToSpeechConfig, TranslationConfig, SynthesizeSpeechConfig, StreamingTranslateSpeechToTextConfig +from nemotronspeech.client.tts import SpeechSynthesisService +from nemotronspeech.client.nmt import NeuralMachineTranslationClient diff --git a/riva/client/argparse_utils.py b/nemotronspeech/client/argparse_utils.py similarity index 100% rename from riva/client/argparse_utils.py rename to nemotronspeech/client/argparse_utils.py diff --git a/riva/client/asr.py b/nemotronspeech/client/asr.py similarity index 95% rename from riva/client/asr.py rename to nemotronspeech/client/asr.py index 3c86fb51..f1dab0ad 100644 --- a/riva/client/asr.py +++ b/nemotronspeech/client/asr.py @@ -16,10 +16,10 @@ from google.protobuf.json_format import MessageToJson from grpc._channel import _MultiThreadedRendezvous -import riva.client -import riva.client.proto.riva_asr_pb2 as rasr -import riva.client.proto.riva_asr_pb2_grpc as rasr_srv -from riva.client.auth import Auth +import nemotronspeech.client +import nemotronspeech.client.proto.riva_asr_pb2 as rasr +import nemotronspeech.client.proto.riva_asr_pb2_grpc as rasr_srv +from nemotronspeech.client.auth import Auth def get_wav_file_parameters(input_file: Union[str, os.PathLike]) -> Dict[str, Union[int, float]]: @@ -194,7 +194,7 @@ def print_streaming( Prints streaming speech recognition results to provided files or streams. Args: - responses (:obj:`Iterable[riva.client.proto.riva_asr_pb2.StreamingRecognizeResponse]`): responses acquired during + responses (:obj:`Iterable[nemotronspeech.client.proto.riva_asr_pb2.StreamingRecognizeResponse]`): responses acquired during streaming speech recognition. output_file (:obj:`Union[Union[os.PathLike, str, TextIO], List[Union[os.PathLike, str, TextIO]]]`, `optional`): a path to an output file or a text stream or a list of paths/streams. If contains several elements, then @@ -394,7 +394,7 @@ def __init__(self, auth: Auth) -> None: Initializes an instance of the class. Args: - auth (:obj:`riva.client.auth.Auth`): an instance of :class:`riva.client.auth.Auth` which is used for + auth (:obj:`nemotronspeech.client.auth.Auth`): an instance of :class:`nemotronspeech.client.auth.Auth` which is used for authentication metadata generation. """ self.auth = auth @@ -420,7 +420,7 @@ def streaming_response_generator( with wave.open(file_name, 'rb') as wav_f: raw_audio = wav_f.readframes(n_frames) - streaming_config (:obj:`riva.client.proto.riva_asr_pb2.StreamingRecognitionConfig`): a config for streaming. + streaming_config (:obj:`nemotronspeech.client.proto.riva_asr_pb2.StreamingRecognitionConfig`): a config for streaming. You may find description of config fields in message ``StreamingRecognitionConfig`` in `common repo `_. @@ -428,12 +428,12 @@ def streaming_response_generator( .. code-style:: python - from riva.client import RecognitionConfig, StreamingRecognitionConfig + from nemotronspeech.client import RecognitionConfig, StreamingRecognitionConfig config = RecognitionConfig(enable_automatic_punctuation=True) streaming_config = StreamingRecognitionConfig(config, interim_results=True) Yields: - :obj:`riva.client.proto.riva_asr_pb2.StreamingRecognizeResponse`: responses for audio chunks in + :obj:`nemotronspeech.client.proto.riva_asr_pb2.StreamingRecognizeResponse`: responses for audio chunks in :param:`audio_chunks`. You may find description of response fields in declaration of ``StreamingRecognizeResponse`` message `here @@ -459,7 +459,7 @@ def offline_recognize( with wave.open(file_name, 'rb') as wav_f: raw_audio = wav_f.readframes(n_frames) - config (:obj:`riva.client.proto.riva_asr_pb2.RecognitionConfig`): a config for offline speech recognition. + config (:obj:`nemotronspeech.client.proto.riva_asr_pb2.RecognitionConfig`): a config for offline speech recognition. You may find description of config fields in message ``RecognitionConfig`` in `common repo `_. @@ -467,13 +467,13 @@ def offline_recognize( .. code-style:: python - from riva.client import RecognitionConfig + from nemotronspeech.client import RecognitionConfig config = RecognitionConfig(enable_automatic_punctuation=True) future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[riva.client.proto.riva_asr_pb2.RecognizeResponse, grpc._channel._MultiThreadedRendezvous]``: a + :obj:`Union[nemotronspeech.client.proto.riva_asr_pb2.RecognizeResponse, grpc._channel._MultiThreadedRendezvous]``: a response with results of :param:`audio_bytes` processing. You may find description of response fields in declaration of ``RecognizeResponse`` message `here `_. diff --git a/riva/client/audio_io.py b/nemotronspeech/client/audio_io.py similarity index 100% rename from riva/client/audio_io.py rename to nemotronspeech/client/audio_io.py diff --git a/riva/client/auth.py b/nemotronspeech/client/auth.py similarity index 97% rename from riva/client/auth.py rename to nemotronspeech/client/auth.py index 8a4688d4..63ed039b 100644 --- a/riva/client/auth.py +++ b/nemotronspeech/client/auth.py @@ -68,14 +68,14 @@ def __init__( Initialize the Auth class for establishing secure connections with a server. This class handles SSL/TLS configuration, authentication metadata, and gRPC channel creation - for secure communication with Riva services. + for secure communication with NemotronSpeech services. Args: ssl_root_cert (Optional[Union[str, os.PathLike]], optional): Path to the SSL root certificate file. If provided and use_ssl is False, SSL will still be enabled. Defaults to None. use_ssl (bool, optional): Whether to use SSL/TLS encryption. If True and ssl_root_cert is None, SSL will be used with default credentials. Defaults to False. - uri (str, optional): The Riva server URI in format "host:port". Defaults to "localhost:50051". + uri (str, optional): The NemotronSpeech server URI in format "host:port". Defaults to "localhost:50051". metadata_args (List[List[str]], optional): List of metadata key-value pairs for authentication. Each inner list should contain exactly 2 elements: [key, value]. Defaults to None. ssl_client_cert (Optional[Union[str, os.PathLike]], optional): Path to the SSL client certificate file. diff --git a/riva/client/health.py b/nemotronspeech/client/health.py similarity index 100% rename from riva/client/health.py rename to nemotronspeech/client/health.py diff --git a/riva/client/nlp.py b/nemotronspeech/client/nlp.py similarity index 93% rename from riva/client/nlp.py rename to nemotronspeech/client/nlp.py index 121524a7..2405c346 100644 --- a/riva/client/nlp.py +++ b/nemotronspeech/client/nlp.py @@ -6,9 +6,9 @@ from google.protobuf.message import Message from grpc._channel import _MultiThreadedRendezvous -import riva.client.proto.riva_nlp_pb2 as rnlp -import riva.client.proto.riva_nlp_pb2_grpc as rnlp_srv -from riva.client import Auth +import nemotronspeech.client.proto.riva_nlp_pb2 as rnlp +import nemotronspeech.client.proto.riva_nlp_pb2_grpc as rnlp_srv +from nemotronspeech.client import Auth def extract_all_text_classes_and_confidences( @@ -103,7 +103,7 @@ def __init__(self, auth: Auth) -> None: Initializes an instance of the class. Args: - auth (:obj:`Auth`): an instance of :class:`riva.client.auth.Auth` which is used for + auth (:obj:`Auth`): an instance of :class:`nemotronspeech.client.auth.Auth` which is used for authentication metadata generation. """ self.auth = auth @@ -125,7 +125,7 @@ def classify_text( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[riva.client.proto.riva_nlp_pb2.TextClassResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.TextClassResponse, grpc._channel._MultiThreadedRendezvous]`: a response with :param:`input_strings` classification results. You may find :class:`TextClassResponse` fields description `here `_. @@ -158,7 +158,7 @@ def classify_tokens( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[riva.client.proto.riva_nlp_pb2.TokenClassResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.TokenClassResponse, grpc._channel._MultiThreadedRendezvous]`: a response with results. You may find :class:`TokenClassResponse` fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a @@ -189,7 +189,7 @@ def transform_text( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[riva.client.proto.riva_nlp_pb2.TextTransformResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.TextTransformResponse, grpc._channel._MultiThreadedRendezvous]`: a model response. You may find :class:`TextTransformResponse` fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a @@ -211,7 +211,7 @@ def analyze_entities( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[riva.client.proto.riva_nlp_pb2.TokenClassResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.TokenClassResponse, grpc._channel._MultiThreadedRendezvous]`: a model response. You may find :class:`TokenClassResponse` fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a @@ -233,15 +233,15 @@ def analyze_intent( Args: input_string (:obj:`str`): a string which will be classified. - options (:obj:`riva.client.proto.riva_nlp_pb2.AnalyzeIntentOptions`, `optional`, - defaults to :obj:`riva.client.proto.riva_nlp_pb2.AnalyzeIntentOptions()`): + options (:obj:`nemotronspeech.client.proto.riva_nlp_pb2.AnalyzeIntentOptions`, `optional`, + defaults to :obj:`nemotronspeech.client.proto.riva_nlp_pb2.AnalyzeIntentOptions()`): an intent options. You may find fields description `here `_. Defaults to an instance of :obj:`AnalyzeIntentOptions` created without parameters. future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[riva.client.proto.riva_nlp_pb2.AnalyzeIntentResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.AnalyzeIntentResponse, grpc._channel._MultiThreadedRendezvous]`: a response with results. You may find fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a @@ -272,7 +272,7 @@ def punctuate_text( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[riva.client.proto.riva_nlp_pb2.TextTransformResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.TextTransformResponse, grpc._channel._MultiThreadedRendezvous]`: a response with results. You may find fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a @@ -296,7 +296,7 @@ def natural_query( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[riva.client.proto.riva_nlp_pb2.NaturalQueryResult, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.NaturalQueryResult, grpc._channel._MultiThreadedRendezvous]`: a response with a result. You may find fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a diff --git a/riva/client/nmt.py b/nemotronspeech/client/nmt.py similarity index 85% rename from riva/client/nmt.py rename to nemotronspeech/client/nmt.py index 39a0174e..ad7d44f0 100644 --- a/riva/client/nmt.py +++ b/nemotronspeech/client/nmt.py @@ -4,9 +4,9 @@ from typing import Callable, Dict, Generator, Iterable, List, Optional, TextIO, Union from grpc._channel import _MultiThreadedRendezvous -import riva.client.proto.riva_nmt_pb2 as riva_nmt -import riva.client.proto.riva_nmt_pb2_grpc as riva_nmt_srv -from riva.client import Auth +import nemotronspeech.client.proto.riva_nmt_pb2 as riva_nmt +import nemotronspeech.client.proto.riva_nmt_pb2_grpc as riva_nmt_srv +from nemotronspeech.client import Auth def streaming_s2s_request_generator( audio_chunks: Iterable[bytes], streaming_config: riva_nmt.StreamingTranslateSpeechToSpeechConfig @@ -39,7 +39,7 @@ def __init__(self, auth: Auth) -> None: Initializes an instance of the class. Args: - auth (:obj:`Auth`): an instance of :class:`riva.client.auth.Auth` which is used for authentication metadata + auth (:obj:`Auth`): an instance of :class:`nemotronspeech.client.auth.Auth` which is used for authentication metadata generation. """ self.auth = auth @@ -65,7 +65,7 @@ def streaming_s2s_response_generator( with wave.open(file_name, 'rb') as wav_f: raw_audio = wav_f.readframes(n_frames) - streaming_config (:obj:`riva.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToSpeechConfig`): a config for streaming. + streaming_config (:obj:`nemotronspeech.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToSpeechConfig`): a config for streaming. You may find description of config fields in message ``StreamingTranslateSpeechToSpeechConfig`` in `common repo `_. @@ -73,7 +73,7 @@ def streaming_s2s_response_generator( .. code-style:: python - from riva.client import RecognitionConfig, StreamingRecognitionConfig, StreamingTranslateSpeechToSpeechConfig, TranslationConfig, SynthesizeSpeechConfig + from nemotronspeech.client import RecognitionConfig, StreamingRecognitionConfig, StreamingTranslateSpeechToSpeechConfig, TranslationConfig, SynthesizeSpeechConfig config = RecognitionConfig(enable_automatic_punctuation=True) asr_config = StreamingRecognitionConfig(config, interim_results=True) translation_config = TranslationConfig(source_language_code="es-US", target_language_code="en-US") @@ -81,7 +81,7 @@ def streaming_s2s_response_generator( streaming_config = StreamingTranslateSpeechToSpeechConfig(asr_config, translation_config, tts_config) Yields: - :obj:`riva.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToSpeechResponse`: responses for audio chunks in + :obj:`nemotronspeech.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToSpeechResponse`: responses for audio chunks in :param:`audio_chunks`. You may find description of response fields in declaration of ``StreamingTranslateSpeechToSpeechResponse`` message `here @@ -112,7 +112,7 @@ def streaming_s2t_response_generator( with wave.open(file_name, 'rb') as wav_f: raw_audio = wav_f.readframes(n_frames) - streaming_config (:obj:`riva.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToTextConfig`): a config for streaming. + streaming_config (:obj:`nemotronspeech.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToTextConfig`): a config for streaming. You may find description of config fields in message ``StreamingTranslateSpeechToTextConfig`` in `common repo `_. @@ -120,14 +120,14 @@ def streaming_s2t_response_generator( .. code-style:: python - from riva.client import RecognitionConfig, StreamingRecognitionConfig, StreamingTranslateSpeechToTextConfig, TranslationConfig + from nemotronspeech.client import RecognitionConfig, StreamingRecognitionConfig, StreamingTranslateSpeechToTextConfig, TranslationConfig config = RecognitionConfig(enable_automatic_punctuation=True) asr_config = StreamingRecognitionConfig(config, interim_results=True) translation_config = TranslationConfig(source_language_code="es-US", target_language_code="en-US") streaming_config = StreamingTranslateSpeechToTextConfig(asr_config, translation_config) Yields: - :obj:`riva.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToTextResponse`: responses for audio chunks in + :obj:`nemotronspeech.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToTextResponse`: responses for audio chunks in :param:`audio_chunks`. You may find description of response fields in declaration of ``StreamingTranslateSpeechToTextResponse`` message `here @@ -157,8 +157,8 @@ def translate( response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[riva.client.proto.riva_nmt_pb2.TranslateTextResponse, grpc._channel._MultiThreadedRendezvous]`: - a response with output. You may find :class:`riva.client.proto.riva_nmt_pb2.TranslateTextResponse` fields + :obj:`Union[nemotronspeech.client.proto.riva_nmt_pb2.TranslateTextResponse, grpc._channel._MultiThreadedRendezvous]`: + a response with output. You may find :class:`nemotronspeech.client.proto.riva_nmt_pb2.TranslateTextResponse` fields description `here `_. """ diff --git a/riva/client/package_info.py b/nemotronspeech/client/package_info.py similarity index 82% rename from riva/client/package_info.py rename to nemotronspeech/client/package_info.py index b2297d11..ca794cd8 100644 --- a/riva/client/package_info.py +++ b/nemotronspeech/client/package_info.py @@ -12,15 +12,15 @@ __shortversion__ = '.'.join(map(str, VERSION[:3])) __version__ = '.'.join(map(str, VERSION[:3])) + ''.join(VERSION[3:]) -__package_name__ = 'nvidia-riva-client' +__package_name__ = 'nvidia-nemotronspeech-client' __contact_names__ = 'Anton Peganov' __contact_emails__ = 'apeganov@nvidia.com' __homepage__ = 'https://github.com/nvidia-riva/python-clients' __repository_url__ = 'https://github.com/nvidia-riva/python-clients' __download_url__ = 'hhttps://github.com/nvidia-riva/python-clients/releases' -__description__ = "Python implementation of the Riva Client API" +__description__ = "Python implementation of the NemotronSpeech Client API" __license__ = 'MIT' -__keywords__ = 'deep learning, machine learning, gpu, NLP, ASR, TTS, NMT, nvidia, speech, language, Riva, client' +__keywords__ = 'deep learning, machine learning, gpu, NLP, ASR, TTS, NMT, nvidia, speech, language, NemotronSpeech, client' __riva_version__ = "2.26.0" __riva_release__ = "26.05" __riva_models_version__ = "2.26.0" diff --git a/riva/client/proto/__init__.py b/nemotronspeech/client/proto/__init__.py similarity index 100% rename from riva/client/proto/__init__.py rename to nemotronspeech/client/proto/__init__.py diff --git a/riva/client/realtime.py b/nemotronspeech/client/realtime.py similarity index 100% rename from riva/client/realtime.py rename to nemotronspeech/client/realtime.py diff --git a/riva/client/tts.py b/nemotronspeech/client/tts.py similarity index 91% rename from riva/client/tts.py rename to nemotronspeech/client/tts.py index 8b0ce9a1..780ac653 100644 --- a/riva/client/tts.py +++ b/nemotronspeech/client/tts.py @@ -5,10 +5,10 @@ from grpc._channel import _MultiThreadedRendezvous -import riva.client.proto.riva_tts_pb2 as rtts -import riva.client.proto.riva_tts_pb2_grpc as rtts_srv -from riva.client import Auth -from riva.client.proto.riva_audio_pb2 import AudioEncoding +import nemotronspeech.client.proto.riva_tts_pb2 as rtts +import nemotronspeech.client.proto.riva_tts_pb2_grpc as rtts_srv +from nemotronspeech.client import Auth +from nemotronspeech.client.proto.riva_audio_pb2 import AudioEncoding import wave def parse_custom_configuration(custom_configuration: str) -> Dict[str, str]: @@ -54,7 +54,7 @@ def __init__(self, auth: Auth) -> None: Initializes an instance of the class. Args: - auth (:obj:`Auth`): an instance of :class:`riva.client.auth.Auth` which is used for authentication metadata + auth (:obj:`Auth`): an instance of :class:`nemotronspeech.client.auth.Auth` which is used for authentication metadata generation. """ self.auth = auth @@ -96,8 +96,8 @@ def synthesize( custom_configuration (:obj:`Dict[str, str]`, `optional`): Free-form key/value parameters forwarded to the synthesizer (e.g. ``{"exaggeration_factor": "1.5"}``). Model-specific. Returns: - :obj:`Union[riva.client.proto.riva_tts_pb2.SynthesizeSpeechResponse, grpc._channel._MultiThreadedRendezvous]`: - a response with output. You may find :class:`riva.client.proto.riva_tts_pb2.SynthesizeSpeechResponse` fields + :obj:`Union[nemotronspeech.client.proto.riva_tts_pb2.SynthesizeSpeechResponse, grpc._channel._MultiThreadedRendezvous]`: + a response with output. You may find :class:`nemotronspeech.client.proto.riva_tts_pb2.SynthesizeSpeechResponse` fields description `here `_. """ @@ -162,8 +162,8 @@ def synthesize_online( custom_configuration (:obj:`Dict[str, str]`, `optional`): Free-form key/value parameters forwarded to the synthesizer (e.g. ``{"exaggeration_factor": "1.5"}``). Model-specific. Yields: - :obj:`riva.client.proto.riva_tts_pb2.SynthesizeSpeechResponse`: a response with output. You may find - :class:`riva.client.proto.riva_tts_pb2.SynthesizeSpeechResponse` fields description `here + :obj:`nemotronspeech.client.proto.riva_tts_pb2.SynthesizeSpeechResponse`: a response with output. You may find + :class:`nemotronspeech.client.proto.riva_tts_pb2.SynthesizeSpeechResponse` fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a future object by calling ``result()`` method. diff --git a/scripts/asr/riva_streaming_asr_client.py b/scripts/asr/nemotron_streaming_asr_client.py similarity index 83% rename from scripts/asr/riva_streaming_asr_client.py rename to scripts/asr/nemotron_streaming_asr_client.py index 95fb3c99..e06c1af1 100644 --- a/scripts/asr/riva_streaming_asr_client.py +++ b/scripts/asr/nemotron_streaming_asr_client.py @@ -10,14 +10,14 @@ from threading import Thread from typing import Union -import riva.client -from riva.client.asr import get_wav_file_parameters -from riva.client.argparse_utils import ( +import nemotronspeech.client +from nemotronspeech.client.asr import get_wav_file_parameters +from nemotronspeech.client.argparse_utils import ( add_asr_config_argparse_parameters, add_connection_argparse_parameters, ) try: - from riva.client.argparse_utils import cli_main + from nemotronspeech.client.argparse_utils import cli_main except ImportError: def cli_main(func): return func @@ -59,7 +59,7 @@ def streaming_transcription_worker( ) -> None: output_file = Path(output_file).expanduser() try: - auth = riva.client.Auth( + auth = nemotronspeech.client.Auth( ssl_root_cert=args.ssl_root_cert, ssl_client_cert=args.ssl_client_cert, ssl_client_key=args.ssl_client_key, @@ -68,9 +68,9 @@ def streaming_transcription_worker( metadata_args=args.metadata, options=args.options ) - asr_service = riva.client.ASRService(auth) - config = riva.client.StreamingRecognitionConfig( - config=riva.client.RecognitionConfig( + asr_service = nemotronspeech.client.ASRService(auth) + config = nemotronspeech.client.StreamingRecognitionConfig( + config=nemotronspeech.client.RecognitionConfig( language_code=args.language_code, model=args.model_name, max_alternatives=args.max_alternatives, @@ -81,7 +81,7 @@ def streaming_transcription_worker( ), interim_results=True, ) - riva.client.add_endpoint_parameters_to_config( + nemotronspeech.client.add_endpoint_parameters_to_config( config, args.start_history, args.start_threshold, @@ -90,19 +90,19 @@ def streaming_transcription_worker( args.stop_threshold, args.stop_threshold_eou ) - riva.client.add_custom_configuration_to_config( + nemotronspeech.client.add_custom_configuration_to_config( config, args.custom_configuration ) - riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) - riva.client.add_speaker_diarization_to_config(config, args.speaker_diarization, args.diarization_max_speakers) + nemotronspeech.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) + nemotronspeech.client.add_speaker_diarization_to_config(config, args.speaker_diarization, args.diarization_max_speakers) for _ in range(args.num_iterations): - with riva.client.AudioChunkFileIterator( + with nemotronspeech.client.AudioChunkFileIterator( args.input_file, args.file_streaming_chunk, - delay_callback=riva.client.sleep_audio_length if args.simulate_realtime else None, + delay_callback=nemotronspeech.client.sleep_audio_length if args.simulate_realtime else None, ) as audio_chunk_iterator: - riva.client.print_streaming( + nemotronspeech.client.print_streaming( responses=asr_service.streaming_response_generator( audio_chunks=audio_chunk_iterator, streaming_config=config, diff --git a/scripts/asr/realtime_asr_client.py b/scripts/asr/realtime_asr_client.py index 9c6a5bd1..4391a353 100644 --- a/scripts/asr/realtime_asr_client.py +++ b/scripts/asr/realtime_asr_client.py @@ -6,15 +6,15 @@ import signal import sys -from riva.client.asr import get_wav_file_parameters, AudioChunkFileIterator -from riva.client.realtime import RealtimeClientASR -from riva.client.argparse_utils import ( +from nemotronspeech.client.asr import get_wav_file_parameters, AudioChunkFileIterator +from nemotronspeech.client.realtime import RealtimeClientASR +from nemotronspeech.client.argparse_utils import ( add_asr_config_argparse_parameters, add_realtime_config_argparse_parameters, add_connection_argparse_parameters, ) try: - from riva.client.argparse_utils import cli_main + from nemotronspeech.client.argparse_utils import cli_main except ImportError: def cli_main(func): return func @@ -120,8 +120,8 @@ def parse_args() -> argparse.Namespace: def get_default_device_index(): """Get default audio device index only when needed.""" try: - import riva.client.audio_io - default_device_info = riva.client.audio_io.get_default_input_device_info() + import nemotronspeech.client.audio_io + default_device_info = nemotronspeech.client.audio_io.get_default_input_device_info() return None if default_device_info is None else default_device_info['index'] except ModuleNotFoundError: return None @@ -145,7 +145,7 @@ async def create_audio_iterator(args): Audio iterator for streaming audio data """ if args.mic: - from riva.client.audio_io import MicrophoneStream + from nemotronspeech.client.audio_io import MicrophoneStream # Get default device index if not specified device_index = args.input_device @@ -302,8 +302,8 @@ async def main() -> None: # Handle list devices option if args.list_devices: try: - import riva.client.audio_io - riva.client.audio_io.list_input_devices() + import nemotronspeech.client.audio_io + nemotronspeech.client.audio_io.list_input_devices() except ModuleNotFoundError: print( "PyAudio not available. Install the system PortAudio headers first " diff --git a/scripts/asr/transcribe_file.py b/scripts/asr/transcribe_file.py index f474469a..a98a9f67 100644 --- a/scripts/asr/transcribe_file.py +++ b/scripts/asr/transcribe_file.py @@ -5,13 +5,13 @@ import os import sys -import riva.client -from riva.client.argparse_utils import ( +import nemotronspeech.client +from nemotronspeech.client.argparse_utils import ( add_asr_config_argparse_parameters, add_connection_argparse_parameters, ) try: - from riva.client.argparse_utils import cli_main, EXIT_BAD_INPUT + from nemotronspeech.client.argparse_utils import cli_main, EXIT_BAD_INPUT except ImportError: EXIT_BAD_INPUT = 2 def cli_main(func): @@ -67,7 +67,7 @@ def parse_args() -> argparse.Namespace: parser = add_asr_config_argparse_parameters(parser, max_alternatives=True, profanity_filter=True, word_time_offsets=True) args = parser.parse_args() if args.play_audio or args.output_device is not None or args.list_devices: - import riva.client.audio_io + import nemotronspeech.client.audio_io return args @@ -75,9 +75,9 @@ def parse_args() -> argparse.Namespace: def main() -> int: args = parse_args() if args.list_devices: - riva.client.audio_io.list_output_devices() + nemotronspeech.client.audio_io.list_output_devices() return - auth = riva.client.Auth( + auth = nemotronspeech.client.Auth( ssl_root_cert=args.ssl_root_cert, ssl_client_cert=args.ssl_client_cert, ssl_client_key=args.ssl_client_key, @@ -86,11 +86,11 @@ def main() -> int: metadata_args=args.metadata, options=args.options ) - asr_service = riva.client.ASRService(auth) + asr_service = nemotronspeech.client.ASRService(auth) if args.list_models: asr_models = dict() - config_response = asr_service.stub.GetRivaSpeechRecognitionConfig(riva.client.proto.riva_asr_pb2.RivaSpeechRecognitionConfigRequest()) + config_response = asr_service.stub.GetRivaSpeechRecognitionConfig(nemotronspeech.client.proto.riva_asr_pb2.RivaSpeechRecognitionConfigRequest()) for model_config in config_response.model_config: if model_config.parameters["type"] == "online": language_code = model_config.parameters['language_code'] @@ -109,8 +109,8 @@ def main() -> int: print(f"Invalid input file path: {args.input_file}", file=sys.stderr) return EXIT_BAD_INPUT - config = riva.client.StreamingRecognitionConfig( - config=riva.client.RecognitionConfig( + config = nemotronspeech.client.StreamingRecognitionConfig( + config=nemotronspeech.client.RecognitionConfig( language_code=args.language_code, model=args.model_name, max_alternatives=1, @@ -121,9 +121,9 @@ def main() -> int: ), interim_results=True, ) - riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) - riva.client.add_speaker_diarization_to_config(config, args.speaker_diarization, args.diarization_max_speakers) - riva.client.add_endpoint_parameters_to_config( + nemotronspeech.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) + nemotronspeech.client.add_speaker_diarization_to_config(config, args.speaker_diarization, args.diarization_max_speakers) + nemotronspeech.client.add_endpoint_parameters_to_config( config, args.start_history, args.start_threshold, @@ -132,7 +132,7 @@ def main() -> int: args.stop_threshold, args.stop_threshold_eou ) - riva.client.add_custom_configuration_to_config( + nemotronspeech.client.add_custom_configuration_to_config( config, args.custom_configuration ) @@ -143,17 +143,17 @@ def main() -> int: seglst_output_file = os.path.basename(args.input_file).split(".")[0] if args.play_audio or args.output_device is not None: - wp = riva.client.get_wav_file_parameters(args.input_file) - sound_callback = riva.client.audio_io.SoundCallBack( + wp = nemotronspeech.client.get_wav_file_parameters(args.input_file) + sound_callback = nemotronspeech.client.audio_io.SoundCallBack( args.output_device, wp['sampwidth'], wp['nchannels'], wp['framerate'], ) delay_callback = sound_callback else: - delay_callback = riva.client.sleep_audio_length if args.simulate_realtime else None - with riva.client.AudioChunkFileIterator( + delay_callback = nemotronspeech.client.sleep_audio_length if args.simulate_realtime else None + with nemotronspeech.client.AudioChunkFileIterator( args.input_file, args.file_streaming_chunk, delay_callback, ) as audio_chunk_iterator: - riva.client.print_streaming( + nemotronspeech.client.print_streaming( responses=asr_service.streaming_response_generator( audio_chunks=audio_chunk_iterator, streaming_config=config, diff --git a/scripts/asr/transcribe_file_offline.py b/scripts/asr/transcribe_file_offline.py index f637b0b6..3d5ffddc 100644 --- a/scripts/asr/transcribe_file_offline.py +++ b/scripts/asr/transcribe_file_offline.py @@ -6,13 +6,13 @@ import argparse from pathlib import Path -import riva.client -from riva.client.argparse_utils import ( +import nemotronspeech.client +from nemotronspeech.client.argparse_utils import ( add_asr_config_argparse_parameters, add_connection_argparse_parameters, ) try: - from riva.client.argparse_utils import cli_main, EXIT_BAD_INPUT + from nemotronspeech.client.argparse_utils import cli_main, EXIT_BAD_INPUT except ImportError: EXIT_BAD_INPUT = 2 def cli_main(func): @@ -44,7 +44,7 @@ def main() -> int: args = parse_args() options = [('grpc.max_receive_message_length', args.max_message_length), ('grpc.max_send_message_length', args.max_message_length)] - auth = riva.client.Auth( + auth = nemotronspeech.client.Auth( ssl_root_cert=args.ssl_root_cert, ssl_client_cert=args.ssl_client_cert, ssl_client_key=args.ssl_client_key, @@ -52,11 +52,11 @@ def main() -> int: uri=args.server, metadata_args=args.metadata, options=options) - asr_service = riva.client.ASRService(auth) + asr_service = nemotronspeech.client.ASRService(auth) if args.list_models: asr_models = dict() - config_response = asr_service.stub.GetRivaSpeechRecognitionConfig(riva.client.proto.riva_asr_pb2.RivaSpeechRecognitionConfigRequest()) + config_response = asr_service.stub.GetRivaSpeechRecognitionConfig(nemotronspeech.client.proto.riva_asr_pb2.RivaSpeechRecognitionConfigRequest()) for model_config in config_response.model_config: if model_config.parameters["type"] == "offline": language_code = model_config.parameters['language_code'] @@ -75,7 +75,7 @@ def main() -> int: print(f"Invalid input file path: {args.input_file}", file=sys.stderr) return EXIT_BAD_INPUT - config = riva.client.RecognitionConfig( + config = nemotronspeech.client.RecognitionConfig( language_code=args.language_code, model=args.model_name, max_alternatives=args.max_alternatives, @@ -84,9 +84,9 @@ def main() -> int: verbatim_transcripts=not args.no_verbatim_transcripts, enable_word_time_offsets=args.word_time_offsets or args.speaker_diarization, ) - riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) - riva.client.add_speaker_diarization_to_config(config, args.speaker_diarization, args.diarization_max_speakers) - riva.client.add_endpoint_parameters_to_config( + nemotronspeech.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) + nemotronspeech.client.add_speaker_diarization_to_config(config, args.speaker_diarization, args.diarization_max_speakers) + nemotronspeech.client.add_endpoint_parameters_to_config( config, args.start_history, args.start_threshold, @@ -95,7 +95,7 @@ def main() -> int: args.stop_threshold, args.stop_threshold_eou ) - riva.client.add_custom_configuration_to_config( + nemotronspeech.client.add_custom_configuration_to_config( config, args.custom_configuration ) @@ -104,7 +104,7 @@ def main() -> int: seglst_output_file = None if args.output_seglst: seglst_output_file = os.path.basename(args.input_file).split(".")[0] - riva.client.print_offline( + nemotronspeech.client.print_offline( response=asr_service.offline_recognize(data, config), speaker_diarization=args.speaker_diarization, seglst_output_file=seglst_output_file, diff --git a/scripts/asr/transcribe_mic.py b/scripts/asr/transcribe_mic.py index 320562dd..461f1b8c 100644 --- a/scripts/asr/transcribe_mic.py +++ b/scripts/asr/transcribe_mic.py @@ -4,20 +4,20 @@ import argparse import sys -import riva.client -from riva.client.argparse_utils import ( +import nemotronspeech.client +from nemotronspeech.client.argparse_utils import ( add_asr_config_argparse_parameters, add_connection_argparse_parameters, ) try: - from riva.client.argparse_utils import cli_main, EXIT_BAD_INPUT + from nemotronspeech.client.argparse_utils import cli_main, EXIT_BAD_INPUT except ImportError: EXIT_BAD_INPUT = 2 def cli_main(func): return func try: - import riva.client.audio_io + import nemotronspeech.client.audio_io except ModuleNotFoundError as e: print(f"ModuleNotFoundError: {e}", file=sys.stderr) print( @@ -29,7 +29,7 @@ def cli_main(func): sys.exit(EXIT_BAD_INPUT) def parse_args() -> argparse.Namespace: - default_device_info = riva.client.audio_io.get_default_input_device_info() + default_device_info = nemotronspeech.client.audio_io.get_default_input_device_info() default_device_index = None if default_device_info is None else default_device_info['index'] parser = argparse.ArgumentParser( description="Streaming transcription from microphone via Riva AI Services", @@ -59,9 +59,9 @@ def parse_args() -> argparse.Namespace: def main() -> int: args = parse_args() if args.list_devices: - riva.client.audio_io.list_input_devices() + nemotronspeech.client.audio_io.list_input_devices() return - auth = riva.client.Auth( + auth = nemotronspeech.client.Auth( ssl_root_cert=args.ssl_root_cert, ssl_client_cert=args.ssl_client_cert, ssl_client_key=args.ssl_client_key, @@ -70,10 +70,10 @@ def main() -> int: metadata_args=args.metadata, options=args.options ) - asr_service = riva.client.ASRService(auth) - config = riva.client.StreamingRecognitionConfig( - config=riva.client.RecognitionConfig( - encoding=riva.client.AudioEncoding.LINEAR_PCM, + asr_service = nemotronspeech.client.ASRService(auth) + config = nemotronspeech.client.StreamingRecognitionConfig( + config=nemotronspeech.client.RecognitionConfig( + encoding=nemotronspeech.client.AudioEncoding.LINEAR_PCM, language_code=args.language_code, model=args.model_name, max_alternatives=1, @@ -85,8 +85,8 @@ def main() -> int: ), interim_results=True, ) - riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) - riva.client.add_endpoint_parameters_to_config( + nemotronspeech.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) + nemotronspeech.client.add_endpoint_parameters_to_config( config, args.start_history, args.start_threshold, @@ -95,16 +95,16 @@ def main() -> int: args.stop_threshold, args.stop_threshold_eou ) - riva.client.add_custom_configuration_to_config( + nemotronspeech.client.add_custom_configuration_to_config( config, args.custom_configuration ) - with riva.client.audio_io.MicrophoneStream( + with nemotronspeech.client.audio_io.MicrophoneStream( args.sample_rate_hz, args.file_streaming_chunk, device=args.input_device, ) as audio_chunk_iterator: - riva.client.print_streaming( + nemotronspeech.client.print_streaming( responses=asr_service.streaming_response_generator( audio_chunks=audio_chunk_iterator, streaming_config=config, diff --git a/scripts/nlp/punctuation_client.py b/scripts/nlp/punctuation_client.py index 437cce21..057dddec 100644 --- a/scripts/nlp/punctuation_client.py +++ b/scripts/nlp/punctuation_client.py @@ -4,8 +4,8 @@ import argparse import time -import riva.client -from riva.client.argparse_utils import add_connection_argparse_parameters +import nemotronspeech.client +from nemotronspeech.client.argparse_utils import add_connection_argparse_parameters def parse_args() -> argparse.Namespace: @@ -39,7 +39,7 @@ def parse_args() -> argparse.Namespace: def run_punct_capit(args: argparse.Namespace) -> None: - auth = riva.client.Auth( + auth = nemotronspeech.client.Auth( ssl_root_cert=args.ssl_root_cert, ssl_client_cert=args.ssl_client_cert, ssl_client_key=args.ssl_client_key, @@ -48,12 +48,12 @@ def run_punct_capit(args: argparse.Namespace) -> None: metadata_args=args.metadata, options=args.options ) - nlp_service = riva.client.NLPService(auth) + nlp_service = nemotronspeech.client.NLPService(auth) if args.interactive: while True: query = input("Enter a query: ") start = time.time() - result = riva.client.nlp.extract_most_probable_transformed_text( + result = nemotronspeech.client.nlp.extract_most_probable_transformed_text( nlp_service.punctuate_text( input_strings=query, model_name=args.model, language_code=args.language_code ) @@ -63,7 +63,7 @@ def run_punct_capit(args: argparse.Namespace) -> None: print(result, end='\n' * 2) else: print( - riva.client.nlp.extract_most_probable_transformed_text( + nemotronspeech.client.nlp.extract_most_probable_transformed_text( nlp_service.punctuate_text( input_strings=args.query, model_name=args.model, language_code=args.language_code ) @@ -142,7 +142,7 @@ def run_tests(args: argparse.Namespace) -> int: ], } - auth = riva.client.Auth( + auth = nemotronspeech.client.Auth( ssl_root_cert=args.ssl_root_cert, ssl_client_cert=args.ssl_client_cert, ssl_client_key=args.ssl_client_key, @@ -151,11 +151,11 @@ def run_tests(args: argparse.Namespace) -> int: metadata_args=args.metadata, options=args.options ) - nlp_service = riva.client.NLPService(auth) + nlp_service = nemotronspeech.client.NLPService(auth) fail_count = 0 for input_, output_ref in zip(test_inputs[args.language_code], test_output_ref[args.language_code]): - pred = riva.client.nlp.extract_most_probable_transformed_text( + pred = nemotronspeech.client.nlp.extract_most_probable_transformed_text( nlp_service.punctuate_text( input_strings=input_, model_name=args.model, diff --git a/scripts/nmt/nmt.py b/scripts/nmt/nmt.py index 85861efc..b928e25b 100644 --- a/scripts/nmt/nmt.py +++ b/scripts/nmt/nmt.py @@ -30,13 +30,13 @@ import os import sys -import riva.client.proto.riva_nmt_pb2 as riva_nmt -import riva.client.proto.riva_nmt_pb2_grpc as riva_nmt_srv +import nemotronspeech.client.proto.riva_nmt_pb2 as riva_nmt +import nemotronspeech.client.proto.riva_nmt_pb2_grpc as riva_nmt_srv -import riva.client -from riva.client.argparse_utils import add_connection_argparse_parameters +import nemotronspeech.client +from nemotronspeech.client.argparse_utils import add_connection_argparse_parameters try: - from riva.client.argparse_utils import cli_main, EXIT_BAD_INPUT + from nemotronspeech.client.argparse_utils import cli_main, EXIT_BAD_INPUT except ImportError: EXIT_BAD_INPUT = 2 def cli_main(func): @@ -110,7 +110,7 @@ def parse_args() -> argparse.Namespace: def main() -> int: args = parse_args() - auth = riva.client.Auth( + auth = nemotronspeech.client.Auth( ssl_root_cert=args.ssl_root_cert, ssl_client_cert=args.ssl_client_cert, ssl_client_key=args.ssl_client_key, @@ -119,7 +119,7 @@ def main() -> int: metadata_args=args.metadata, options=args.options ) - nmt_client = riva.client.NeuralMachineTranslationClient(auth) + nmt_client = nemotronspeech.client.NeuralMachineTranslationClient(auth) def request(inputs): dnt_phrases_input = {} diff --git a/scripts/nmt/nmt_speech_to_speech.py b/scripts/nmt/nmt_speech_to_speech.py index d06f5819..7542861e 100644 --- a/scripts/nmt/nmt_speech_to_speech.py +++ b/scripts/nmt/nmt_speech_to_speech.py @@ -4,12 +4,12 @@ import wave from typing import Iterator -import riva.client -import riva.client.proto.riva_asr_pb2 as riva_asr_pb2 -import riva.client.proto.riva_nmt_pb2 as riva_nmt_pb2 -from riva.client.argparse_utils import add_connection_argparse_parameters +import nemotronspeech.client +import nemotronspeech.client.proto.riva_asr_pb2 as riva_asr_pb2 +import nemotronspeech.client.proto.riva_nmt_pb2 as riva_nmt_pb2 +from nemotronspeech.client.argparse_utils import add_connection_argparse_parameters try: - from riva.client.argparse_utils import cli_main + from nemotronspeech.client.argparse_utils import cli_main except ImportError: def cli_main(func): return func @@ -37,7 +37,7 @@ def main(): if not os.path.exists(args.audio_file): raise FileNotFoundError(f"Input audio file not found: {args.audio_file}") - auth = riva.client.Auth( + auth = nemotronspeech.client.Auth( ssl_root_cert=args.ssl_root_cert, ssl_client_cert=args.ssl_client_cert, ssl_client_key=args.ssl_client_key, @@ -46,7 +46,7 @@ def main(): metadata_args=args.metadata, options=args.options ) - nmt_client = riva.client.NeuralMachineTranslationClient(auth) + nmt_client = nemotronspeech.client.NeuralMachineTranslationClient(auth) if args.list_models: response = nmt_client.get_config() @@ -72,7 +72,7 @@ def main(): # Create synthesis config tts_config = riva_nmt_pb2.SynthesizeSpeechConfig( - encoding=riva.client.AudioEncoding.LINEAR_PCM, + encoding=nemotronspeech.client.AudioEncoding.LINEAR_PCM, language_code=args.target_language, voice_name=args.voice, sample_rate_hz=args.sample_rate_hz, @@ -84,7 +84,7 @@ def main(): ) responses = nmt_client.streaming_s2s_response_generator( - audio_chunks=riva.client.AudioChunkFileIterator(args.audio_file, 100), streaming_config=streaming_config + audio_chunks=nemotronspeech.client.AudioChunkFileIterator(args.audio_file, 100), streaming_config=streaming_config ) output_file = None diff --git a/scripts/nmt/nmt_speech_to_text.py b/scripts/nmt/nmt_speech_to_text.py index dbbd7607..adef304f 100644 --- a/scripts/nmt/nmt_speech_to_text.py +++ b/scripts/nmt/nmt_speech_to_text.py @@ -2,12 +2,12 @@ import os import sys -import riva.client -import riva.client.proto.riva_asr_pb2 as riva_asr_pb2 -import riva.client.proto.riva_nmt_pb2 as riva_nmt_pb2 -from riva.client.argparse_utils import add_connection_argparse_parameters +import nemotronspeech.client +import nemotronspeech.client.proto.riva_asr_pb2 as riva_asr_pb2 +import nemotronspeech.client.proto.riva_nmt_pb2 as riva_nmt_pb2 +from nemotronspeech.client.argparse_utils import add_connection_argparse_parameters try: - from riva.client.argparse_utils import cli_main + from nemotronspeech.client.argparse_utils import cli_main except ImportError: def cli_main(func): return func @@ -51,7 +51,7 @@ def main(): if not os.path.exists(args.audio_file): raise FileNotFoundError(f"Input audio file not found: {args.audio_file}") - auth = riva.client.Auth( + auth = nemotronspeech.client.Auth( ssl_root_cert=args.ssl_root_cert, ssl_client_cert=args.ssl_client_cert, ssl_client_key=args.ssl_client_key, @@ -60,7 +60,7 @@ def main(): metadata_args=args.metadata, options=args.options ) - nmt_client = riva.client.NeuralMachineTranslationClient(auth) + nmt_client = nemotronspeech.client.NeuralMachineTranslationClient(auth) if args.list_models: response = nmt_client.get_config(args.model) @@ -95,7 +95,7 @@ def main(): ) responses = nmt_client.streaming_s2t_response_generator( - audio_chunks=riva.client.AudioChunkFileIterator(args.audio_file, 100), + audio_chunks=nemotronspeech.client.AudioChunkFileIterator(args.audio_file, 100), streaming_config=streaming_config ) diff --git a/scripts/tts/realtime_tts_client.py b/scripts/tts/realtime_tts_client.py index 3144450b..6b6c1972 100644 --- a/scripts/tts/realtime_tts_client.py +++ b/scripts/tts/realtime_tts_client.py @@ -20,14 +20,14 @@ import websockets from websockets.exceptions import WebSocketException -from riva.client.argparse_utils import add_connection_argparse_parameters +from nemotronspeech.client.argparse_utils import add_connection_argparse_parameters try: - from riva.client.argparse_utils import cli_main + from nemotronspeech.client.argparse_utils import cli_main except ImportError: def cli_main(func): return func -from riva.client.realtime import RealtimeClientTTS +from nemotronspeech.client.realtime import RealtimeClientTTS logging.basicConfig( level=logging.INFO, @@ -489,8 +489,8 @@ async def main() -> int: voices = RealtimeClientTTS(args=args).list_voices() print(json.dumps(voices, indent=4)) elif args.list_devices: - import riva.client.audio_io - riva.client.audio_io.list_output_devices() + import nemotronspeech.client.audio_io + nemotronspeech.client.audio_io.list_output_devices() else: # Use parallel processing if num_parallel_requests > 1 if args.num_parallel_requests > 1: diff --git a/scripts/tts/talk.py b/scripts/tts/talk.py index cca41817..ca2f944e 100644 --- a/scripts/tts/talk.py +++ b/scripts/tts/talk.py @@ -8,15 +8,15 @@ import json from pathlib import Path -import riva.client -from riva.client.argparse_utils import add_connection_argparse_parameters +import nemotronspeech.client +from nemotronspeech.client.argparse_utils import add_connection_argparse_parameters try: - from riva.client.argparse_utils import cli_main, EXIT_BAD_INPUT + from nemotronspeech.client.argparse_utils import cli_main, EXIT_BAD_INPUT except ImportError: EXIT_BAD_INPUT = 2 def cli_main(func): return func -from riva.client.proto.riva_audio_pb2 import AudioEncoding +from nemotronspeech.client.proto.riva_audio_pb2 import AudioEncoding def read_file_to_dict(file_path): result_dict = {} @@ -97,7 +97,7 @@ def parse_args() -> argparse.Namespace: args.output = args.output.expanduser() try: if args.list_devices or args.output_device or args.play_audio: - import riva.client.audio_io + import nemotronspeech.client.audio_io except ModuleNotFoundError as e: print(f"ModuleNotFoundError: {e}") print( @@ -116,7 +116,7 @@ def main() -> int: print("Empty output file path not allowed", file=sys.stderr) return EXIT_BAD_INPUT if args.list_devices: - riva.client.audio_io.list_output_devices() + nemotronspeech.client.audio_io.list_output_devices() return if args.options is None: @@ -124,7 +124,7 @@ def main() -> int: args.options.append(('grpc.max_receive_message_length', args.max_message_length)) args.options.append(('grpc.max_send_message_length', args.max_message_length)) - auth = riva.client.Auth( + auth = nemotronspeech.client.Auth( ssl_root_cert=args.ssl_root_cert, ssl_client_cert=args.ssl_client_cert, ssl_client_key=args.ssl_client_key, @@ -133,14 +133,14 @@ def main() -> int: metadata_args=args.metadata, options=args.options ) - service = riva.client.SpeechSynthesisService(auth) + service = nemotronspeech.client.SpeechSynthesisService(auth) nchannels = 1 sampwidth = 2 sound_stream, out_f = None, None if args.list_voices: config_response = service.stub.GetRivaSynthesisConfig( - riva.client.proto.riva_tts_pb2.RivaSynthesisConfigRequest() + nemotronspeech.client.proto.riva_tts_pb2.RivaSynthesisConfigRequest() ) tts_models = dict() for model_config in config_response.model_config: @@ -169,7 +169,7 @@ def main() -> int: return EXIT_BAD_INPUT try: if args.output_device is not None or args.play_audio: - sound_stream = riva.client.audio_io.SoundCallBack( + sound_stream = nemotronspeech.client.audio_io.SoundCallBack( args.output_device, nchannels=nchannels, sampwidth=sampwidth, framerate=args.sample_rate_hz ) if args.output is not None: @@ -195,7 +195,7 @@ def main() -> int: custom_configuration_kwargs = {} if args.custom_configuration: - from riva.client.tts import parse_custom_configuration + from nemotronspeech.client.tts import parse_custom_configuration custom_configuration_kwargs['custom_configuration'] = parse_custom_configuration(args.custom_configuration) print("Generating audio for request...") diff --git a/setup.py b/setup.py index bf256402..aecc12fd 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ from setuptools.command.build_py import build_py -spec = importlib.util.spec_from_file_location('package_info', 'riva/client/package_info.py') +spec = importlib.util.spec_from_file_location('package_info', 'nemotronspeech/client/package_info.py') package_info = importlib.util.module_from_spec(spec) spec.loader.exec_module(package_info) @@ -43,7 +43,7 @@ class BuildPyCommand(build_py): def run(self): if not self.dry_run: - target_dir = setup_py_dir / 'riva/client/proto' + target_dir = setup_py_dir / 'nemotronspeech/client/proto' for elem in target_dir.iterdir(): if elem.name != '__init__.py': if elem.is_dir(): @@ -139,7 +139,7 @@ def req_file(filename): maintainer_email=__contact_emails__, keywords=__keywords__, # packages=setuptools.find_packages(exclude=['tests', 'tutorials', 'scripts']), - package_dir={"riva.client": "riva/client"}, + package_dir={"nemotronspeech.client": "nemotronspeech/client"}, cmdclass={"build_py": BuildPyCommand}, classifiers=[ "Development Status :: 4 - Beta", diff --git a/tests/integration/asr.sh b/tests/integration/asr.sh index 0ea43206..52a7228d 100644 --- a/tests/integration/asr.sh +++ b/tests/integration/asr.sh @@ -23,8 +23,8 @@ rm -rf out echo "Testing script transcribe_mic.py" bash "$(dirname $0)/asr/test_transcribe_mic.sh" -echo "Testing script riva_streaming_asr_client.py ..." -bash "$(dirname $0)/asr/test_riva_streaming_asr_client.sh" +echo "Testing script nemotron_streaming_asr_client.py ..." +bash "$(dirname $0)/asr/test_nemotron_streaming_asr_client.sh" echo "Testing script transcribe_file_offline.py" bash "$(dirname $0)/asr/test_transcribe_file_offline.sh" echo "Testing script transcribe_file.py ..." diff --git a/tests/integration/asr/define_test_control_vars.sh b/tests/integration/asr/define_test_control_vars.sh index 339a5ca9..52b081dd 100644 --- a/tests/integration/asr/define_test_control_vars.sh +++ b/tests/integration/asr/define_test_control_vars.sh @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: MIT -if [[ "${script_name}" == "riva_streaming_asr_client.py" ]]; then +if [[ "${script_name}" == "nemotron_streaming_asr_client.py" ]]; then use_stdout_for_testing=0 time_info_before_final_transcript=1 test_max_alternatives=1 diff --git a/tests/integration/asr/reference_outputs/test_riva_streaming_asr_client/time_stamps_AntiBERTa.txt b/tests/integration/asr/reference_outputs/test_nemotron_streaming_asr_client/time_stamps_AntiBERTa.txt similarity index 100% rename from tests/integration/asr/reference_outputs/test_riva_streaming_asr_client/time_stamps_AntiBERTa.txt rename to tests/integration/asr/reference_outputs/test_nemotron_streaming_asr_client/time_stamps_AntiBERTa.txt diff --git a/tests/integration/asr/test_riva_streaming_asr_client.sh b/tests/integration/asr/test_nemotron_streaming_asr_client.sh similarity index 89% rename from tests/integration/asr/test_riva_streaming_asr_client.sh rename to tests/integration/asr/test_nemotron_streaming_asr_client.sh index 42c9e8ce..a3806b1b 100644 --- a/tests/integration/asr/test_riva_streaming_asr_client.sh +++ b/tests/integration/asr/test_nemotron_streaming_asr_client.sh @@ -7,19 +7,19 @@ source "$(dirname $0)/../init_server_cli_params.sh" source "$(dirname $0)/test_functions.sh" -reference_outputs="$(dirname $0)/reference_outputs/test_riva_streaming_asr_client" -source "$(dirname $0)/../prepare_test_output_dir.sh" "$(dirname $0)" "riva_streaming_asr_client.py" +reference_outputs="$(dirname $0)/reference_outputs/test_nemotron_streaming_asr_client" +source "$(dirname $0)/../prepare_test_output_dir.sh" "$(dirname $0)" "nemotron_streaming_asr_client.py" rm -f output_0.txt test_string_presence \ - riva_streaming_asr_client.py \ + nemotron_streaming_asr_client.py \ "--input-file data/examples/en-US_sample.wav --language-code ru-RU" \ "details = \"Error: Unavailable model requested. Lang: ru-RU, Type: online\"" \ language_code_ru_RU \ 1 -test_simulate_realtime riva_streaming_asr_client.py -test_transcript_affecting_params riva_streaming_asr_client.py +test_simulate_realtime nemotron_streaming_asr_client.py +test_transcript_affecting_params nemotron_streaming_asr_client.py # Testing --word-time-offsets function test_word_time_offsets(){ @@ -30,7 +30,7 @@ function test_word_time_offsets(){ stdout_file="${test_output_dir}/stdout_word_time_offsets.txt" stderr_file="${test_output_dir}/stderr_word_time_offsets.txt" set +e - python scripts/asr/riva_streaming_asr_client.py ${server_args} ${exp_options} \ + python scripts/asr/nemotron_streaming_asr_client.py ${server_args} ${exp_options} \ 1>"${stdout_file}" 2>"${stderr_file}" retVal=$? process_exit_status @@ -74,7 +74,7 @@ function test_num_clients(){ stdout_file="${test_output_dir}/stdout_num_clients_2.txt" stderr_file="${test_output_dir}/stderr_num_clients_2.txt" set +e - python scripts/asr/riva_streaming_asr_client.py ${server_args} ${exp_options} \ + python scripts/asr/nemotron_streaming_asr_client.py ${server_args} ${exp_options} \ 1>"${stdout_file}" 2>"${stderr_file}" retVal=$? process_exit_status @@ -109,7 +109,7 @@ function test_num_iterations(){ stdout_file="${test_output_dir}/stdout_num_iterations.txt" stderr_file="${test_output_dir}/stderr_num_iterations.txt" set +e - python scripts/asr/riva_streaming_asr_client.py ${server_args} ${exp_options} \ + python scripts/asr/nemotron_streaming_asr_client.py ${server_args} ${exp_options} \ 1>"${stdout_file}" 2>"${stderr_file}" retVal=$? process_exit_status diff --git a/tests/unit/test_asr.py b/tests/unit/test_asr.py index 76b1376b..93492de2 100644 --- a/tests/unit/test_asr.py +++ b/tests/unit/test_asr.py @@ -5,9 +5,9 @@ from typing import Any, Generator, List, Union from unittest.mock import patch, Mock -import riva.client.proto.riva_asr_pb2 as rasr -from riva.client import ASRService -from riva.client.asr import streaming_request_generator +import nemotronspeech.client.proto.riva_asr_pb2 as rasr +from nemotronspeech.client import ASRService +from nemotronspeech.client.asr import streaming_request_generator from .helpers import set_auth_mock @@ -54,7 +54,7 @@ def is_iterable(obj: Any) -> bool: return True -@patch("riva.client.proto.riva_asr_pb2_grpc.RivaSpeechRecognitionStub.__init__", riva_asr_stub_init_patch) +@patch("nemotronspeech.client.proto.riva_asr_pb2_grpc.RivaSpeechRecognitionStub.__init__", riva_asr_stub_init_patch) class TestSpeechSynthesisService: def test_offline_recognize(self) -> None: auth, return_value_of_get_auth_metadata = set_auth_mock() diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index ee863224..f18cbd9e 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -5,7 +5,7 @@ import grpc -from riva.client.auth import create_channel, Auth +from nemotronspeech.client.auth import create_channel, Auth @patch("grpc.insecure_channel", Mock(return_value="insecure_channel")) diff --git a/tests/unit/test_nlp.py b/tests/unit/test_nlp.py index 9759a04f..1aa2e5ef 100644 --- a/tests/unit/test_nlp.py +++ b/tests/unit/test_nlp.py @@ -4,8 +4,8 @@ from typing import List, Union from unittest.mock import patch, Mock -import riva.client.proto.riva_nlp_pb2 as rnlp -from riva.client import NLPService +import nemotronspeech.client.proto.riva_nlp_pb2 as rnlp +from nemotronspeech.client import NLPService from .helpers import set_auth_mock @@ -76,7 +76,7 @@ def riva_nlp_stub_init_patch(self, channel): self.NaturalQuery = NATURAL_QUERY_MOCK -@patch("riva.client.proto.riva_nlp_pb2_grpc.RivaLanguageUnderstandingStub.__init__", riva_nlp_stub_init_patch) +@patch("nemotronspeech.client.proto.riva_nlp_pb2_grpc.RivaLanguageUnderstandingStub.__init__", riva_nlp_stub_init_patch) class TestSpeechSynthesisService: def test_classify_text(self) -> None: auth, return_value_of_get_auth_metadata = set_auth_mock() diff --git a/tests/unit/test_tts.py b/tests/unit/test_tts.py index 15f5f3f4..4452e0ef 100644 --- a/tests/unit/test_tts.py +++ b/tests/unit/test_tts.py @@ -5,9 +5,9 @@ from typing import Any, Generator from unittest.mock import patch, Mock -import riva.client.proto.riva_tts_pb2 as rtts -from riva.client import AudioEncoding -from riva.client.tts import SpeechSynthesisService +import nemotronspeech.client.proto.riva_tts_pb2 as rtts +from nemotronspeech.client import AudioEncoding +from nemotronspeech.client.tts import SpeechSynthesisService from .helpers import set_auth_mock @@ -50,7 +50,7 @@ def is_iterable(obj: Any) -> bool: return True -@patch("riva.client.proto.riva_tts_pb2_grpc.RivaSpeechSynthesisStub.__init__", riva_tts_stub_init_patch) +@patch("nemotronspeech.client.proto.riva_tts_pb2_grpc.RivaSpeechSynthesisStub.__init__", riva_tts_stub_init_patch) class TestSpeechSynthesisService: def test_synthesize(self) -> None: auth, return_value_of_get_auth_metadata = set_auth_mock() From d1ce4f31fc11ac84dacfc8135585cdf49ff355e8 Mon Sep 17 00:00:00 2001 From: sarane22 Date: Mon, 8 Jun 2026 12:37:32 +0530 Subject: [PATCH 2/6] chore(all): Updating protos --- nemotronspeech/client/__init__.py | 8 ++++---- nemotronspeech/client/asr.py | 14 +++++++------- nemotronspeech/client/nlp.py | 22 +++++++++++----------- nemotronspeech/client/nmt.py | 16 ++++++++-------- nemotronspeech/client/tts.py | 14 +++++++------- scripts/asr/transcribe_file.py | 2 +- scripts/asr/transcribe_file_offline.py | 2 +- scripts/nmt/nmt.py | 4 ++-- scripts/nmt/nmt_speech_to_speech.py | 14 +++++++------- scripts/nmt/nmt_speech_to_text.py | 12 ++++++------ scripts/tts/talk.py | 4 ++-- setup.py | 12 ++++++------ tests/unit/test_asr.py | 4 ++-- tests/unit/test_nlp.py | 4 ++-- tests/unit/test_tts.py | 4 ++-- 15 files changed, 68 insertions(+), 68 deletions(-) diff --git a/nemotronspeech/client/__init__.py b/nemotronspeech/client/__init__.py index 9f248cdc..e98d05e8 100644 --- a/nemotronspeech/client/__init__.py +++ b/nemotronspeech/client/__init__.py @@ -35,9 +35,9 @@ __shortversion__, __version__, ) -from nemotronspeech.client.proto.riva_asr_pb2 import RecognitionConfig, StreamingRecognitionConfig, EndpointingConfig -from nemotronspeech.client.proto.riva_audio_pb2 import AudioEncoding -from nemotronspeech.client.proto.riva_nlp_pb2 import AnalyzeIntentOptions -from nemotronspeech.client.proto.riva_nmt_pb2 import StreamingTranslateSpeechToSpeechConfig, TranslationConfig, SynthesizeSpeechConfig, StreamingTranslateSpeechToTextConfig +from nemotronspeech.client.proto.nemotron_asr_pb2 import RecognitionConfig, StreamingRecognitionConfig, EndpointingConfig +from nemotronspeech.client.proto.nemotron_audio_pb2 import AudioEncoding +from nemotronspeech.client.proto.nemotron_nlp_pb2 import AnalyzeIntentOptions +from nemotronspeech.client.proto.nemotron_nmt_pb2 import StreamingTranslateSpeechToSpeechConfig, TranslationConfig, SynthesizeSpeechConfig, StreamingTranslateSpeechToTextConfig from nemotronspeech.client.tts import SpeechSynthesisService from nemotronspeech.client.nmt import NeuralMachineTranslationClient diff --git a/nemotronspeech/client/asr.py b/nemotronspeech/client/asr.py index f1dab0ad..9d35b817 100644 --- a/nemotronspeech/client/asr.py +++ b/nemotronspeech/client/asr.py @@ -17,8 +17,8 @@ from grpc._channel import _MultiThreadedRendezvous import nemotronspeech.client -import nemotronspeech.client.proto.riva_asr_pb2 as rasr -import nemotronspeech.client.proto.riva_asr_pb2_grpc as rasr_srv +import nemotronspeech.client.proto.nemotron_asr_pb2 as rasr +import nemotronspeech.client.proto.nemotron_asr_pb2_grpc as rasr_srv from nemotronspeech.client.auth import Auth @@ -194,7 +194,7 @@ def print_streaming( Prints streaming speech recognition results to provided files or streams. Args: - responses (:obj:`Iterable[nemotronspeech.client.proto.riva_asr_pb2.StreamingRecognizeResponse]`): responses acquired during + responses (:obj:`Iterable[nemotronspeech.client.proto.nemotron_asr_pb2.StreamingRecognizeResponse]`): responses acquired during streaming speech recognition. output_file (:obj:`Union[Union[os.PathLike, str, TextIO], List[Union[os.PathLike, str, TextIO]]]`, `optional`): a path to an output file or a text stream or a list of paths/streams. If contains several elements, then @@ -420,7 +420,7 @@ def streaming_response_generator( with wave.open(file_name, 'rb') as wav_f: raw_audio = wav_f.readframes(n_frames) - streaming_config (:obj:`nemotronspeech.client.proto.riva_asr_pb2.StreamingRecognitionConfig`): a config for streaming. + streaming_config (:obj:`nemotronspeech.client.proto.nemotron_asr_pb2.StreamingRecognitionConfig`): a config for streaming. You may find description of config fields in message ``StreamingRecognitionConfig`` in `common repo `_. @@ -433,7 +433,7 @@ def streaming_response_generator( streaming_config = StreamingRecognitionConfig(config, interim_results=True) Yields: - :obj:`nemotronspeech.client.proto.riva_asr_pb2.StreamingRecognizeResponse`: responses for audio chunks in + :obj:`nemotronspeech.client.proto.nemotron_asr_pb2.StreamingRecognizeResponse`: responses for audio chunks in :param:`audio_chunks`. You may find description of response fields in declaration of ``StreamingRecognizeResponse`` message `here @@ -459,7 +459,7 @@ def offline_recognize( with wave.open(file_name, 'rb') as wav_f: raw_audio = wav_f.readframes(n_frames) - config (:obj:`nemotronspeech.client.proto.riva_asr_pb2.RecognitionConfig`): a config for offline speech recognition. + config (:obj:`nemotronspeech.client.proto.nemotron_asr_pb2.RecognitionConfig`): a config for offline speech recognition. You may find description of config fields in message ``RecognitionConfig`` in `common repo `_. @@ -473,7 +473,7 @@ def offline_recognize( response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[nemotronspeech.client.proto.riva_asr_pb2.RecognizeResponse, grpc._channel._MultiThreadedRendezvous]``: a + :obj:`Union[nemotronspeech.client.proto.nemotron_asr_pb2.RecognizeResponse, grpc._channel._MultiThreadedRendezvous]``: a response with results of :param:`audio_bytes` processing. You may find description of response fields in declaration of ``RecognizeResponse`` message `here `_. diff --git a/nemotronspeech/client/nlp.py b/nemotronspeech/client/nlp.py index 2405c346..fabcb399 100644 --- a/nemotronspeech/client/nlp.py +++ b/nemotronspeech/client/nlp.py @@ -6,8 +6,8 @@ from google.protobuf.message import Message from grpc._channel import _MultiThreadedRendezvous -import nemotronspeech.client.proto.riva_nlp_pb2 as rnlp -import nemotronspeech.client.proto.riva_nlp_pb2_grpc as rnlp_srv +import nemotronspeech.client.proto.nemotron_nlp_pb2 as rnlp +import nemotronspeech.client.proto.nemotron_nlp_pb2_grpc as rnlp_srv from nemotronspeech.client import Auth @@ -125,7 +125,7 @@ def classify_text( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.TextClassResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.nemotron_nlp_pb2.TextClassResponse, grpc._channel._MultiThreadedRendezvous]`: a response with :param:`input_strings` classification results. You may find :class:`TextClassResponse` fields description `here `_. @@ -158,7 +158,7 @@ def classify_tokens( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.TokenClassResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.nemotron_nlp_pb2.TokenClassResponse, grpc._channel._MultiThreadedRendezvous]`: a response with results. You may find :class:`TokenClassResponse` fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a @@ -189,7 +189,7 @@ def transform_text( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.TextTransformResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.nemotron_nlp_pb2.TextTransformResponse, grpc._channel._MultiThreadedRendezvous]`: a model response. You may find :class:`TextTransformResponse` fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a @@ -211,7 +211,7 @@ def analyze_entities( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.TokenClassResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.nemotron_nlp_pb2.TokenClassResponse, grpc._channel._MultiThreadedRendezvous]`: a model response. You may find :class:`TokenClassResponse` fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a @@ -233,15 +233,15 @@ def analyze_intent( Args: input_string (:obj:`str`): a string which will be classified. - options (:obj:`nemotronspeech.client.proto.riva_nlp_pb2.AnalyzeIntentOptions`, `optional`, - defaults to :obj:`nemotronspeech.client.proto.riva_nlp_pb2.AnalyzeIntentOptions()`): + options (:obj:`nemotronspeech.client.proto.nemotron_nlp_pb2.AnalyzeIntentOptions`, `optional`, + defaults to :obj:`nemotronspeech.client.proto.nemotron_nlp_pb2.AnalyzeIntentOptions()`): an intent options. You may find fields description `here `_. Defaults to an instance of :obj:`AnalyzeIntentOptions` created without parameters. future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.AnalyzeIntentResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.nemotron_nlp_pb2.AnalyzeIntentResponse, grpc._channel._MultiThreadedRendezvous]`: a response with results. You may find fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a @@ -272,7 +272,7 @@ def punctuate_text( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.TextTransformResponse, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.nemotron_nlp_pb2.TextTransformResponse, grpc._channel._MultiThreadedRendezvous]`: a response with results. You may find fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a @@ -296,7 +296,7 @@ def natural_query( future (:obj:`bool`, defaults to :obj:`False`): whether to return an async result instead of usual response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[nemotronspeech.client.proto.riva_nlp_pb2.NaturalQueryResult, grpc._channel._MultiThreadedRendezvous]`: a + :obj:`Union[nemotronspeech.client.proto.nemotron_nlp_pb2.NaturalQueryResult, grpc._channel._MultiThreadedRendezvous]`: a response with a result. You may find fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a diff --git a/nemotronspeech/client/nmt.py b/nemotronspeech/client/nmt.py index ad7d44f0..c55c2728 100644 --- a/nemotronspeech/client/nmt.py +++ b/nemotronspeech/client/nmt.py @@ -4,8 +4,8 @@ from typing import Callable, Dict, Generator, Iterable, List, Optional, TextIO, Union from grpc._channel import _MultiThreadedRendezvous -import nemotronspeech.client.proto.riva_nmt_pb2 as riva_nmt -import nemotronspeech.client.proto.riva_nmt_pb2_grpc as riva_nmt_srv +import nemotronspeech.client.proto.nemotron_nmt_pb2 as riva_nmt +import nemotronspeech.client.proto.nemotron_nmt_pb2_grpc as riva_nmt_srv from nemotronspeech.client import Auth def streaming_s2s_request_generator( @@ -65,7 +65,7 @@ def streaming_s2s_response_generator( with wave.open(file_name, 'rb') as wav_f: raw_audio = wav_f.readframes(n_frames) - streaming_config (:obj:`nemotronspeech.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToSpeechConfig`): a config for streaming. + streaming_config (:obj:`nemotronspeech.client.proto.nemotron_nmt_pb2.StreamingTranslateSpeechToSpeechConfig`): a config for streaming. You may find description of config fields in message ``StreamingTranslateSpeechToSpeechConfig`` in `common repo `_. @@ -81,7 +81,7 @@ def streaming_s2s_response_generator( streaming_config = StreamingTranslateSpeechToSpeechConfig(asr_config, translation_config, tts_config) Yields: - :obj:`nemotronspeech.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToSpeechResponse`: responses for audio chunks in + :obj:`nemotronspeech.client.proto.nemotron_nmt_pb2.StreamingTranslateSpeechToSpeechResponse`: responses for audio chunks in :param:`audio_chunks`. You may find description of response fields in declaration of ``StreamingTranslateSpeechToSpeechResponse`` message `here @@ -112,7 +112,7 @@ def streaming_s2t_response_generator( with wave.open(file_name, 'rb') as wav_f: raw_audio = wav_f.readframes(n_frames) - streaming_config (:obj:`nemotronspeech.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToTextConfig`): a config for streaming. + streaming_config (:obj:`nemotronspeech.client.proto.nemotron_nmt_pb2.StreamingTranslateSpeechToTextConfig`): a config for streaming. You may find description of config fields in message ``StreamingTranslateSpeechToTextConfig`` in `common repo `_. @@ -127,7 +127,7 @@ def streaming_s2t_response_generator( streaming_config = StreamingTranslateSpeechToTextConfig(asr_config, translation_config) Yields: - :obj:`nemotronspeech.client.proto.riva_nmt_pb2.StreamingTranslateSpeechToTextResponse`: responses for audio chunks in + :obj:`nemotronspeech.client.proto.nemotron_nmt_pb2.StreamingTranslateSpeechToTextResponse`: responses for audio chunks in :param:`audio_chunks`. You may find description of response fields in declaration of ``StreamingTranslateSpeechToTextResponse`` message `here @@ -157,8 +157,8 @@ def translate( response. You can get a response by calling ``result()`` method of the future object. Returns: - :obj:`Union[nemotronspeech.client.proto.riva_nmt_pb2.TranslateTextResponse, grpc._channel._MultiThreadedRendezvous]`: - a response with output. You may find :class:`nemotronspeech.client.proto.riva_nmt_pb2.TranslateTextResponse` fields + :obj:`Union[nemotronspeech.client.proto.nemotron_nmt_pb2.TranslateTextResponse, grpc._channel._MultiThreadedRendezvous]`: + a response with output. You may find :class:`nemotronspeech.client.proto.nemotron_nmt_pb2.TranslateTextResponse` fields description `here `_. """ diff --git a/nemotronspeech/client/tts.py b/nemotronspeech/client/tts.py index 780ac653..0585027c 100644 --- a/nemotronspeech/client/tts.py +++ b/nemotronspeech/client/tts.py @@ -5,10 +5,10 @@ from grpc._channel import _MultiThreadedRendezvous -import nemotronspeech.client.proto.riva_tts_pb2 as rtts -import nemotronspeech.client.proto.riva_tts_pb2_grpc as rtts_srv +import nemotronspeech.client.proto.nemotron_tts_pb2 as rtts +import nemotronspeech.client.proto.nemotron_tts_pb2_grpc as rtts_srv from nemotronspeech.client import Auth -from nemotronspeech.client.proto.riva_audio_pb2 import AudioEncoding +from nemotronspeech.client.proto.nemotron_audio_pb2 import AudioEncoding import wave def parse_custom_configuration(custom_configuration: str) -> Dict[str, str]: @@ -96,8 +96,8 @@ def synthesize( custom_configuration (:obj:`Dict[str, str]`, `optional`): Free-form key/value parameters forwarded to the synthesizer (e.g. ``{"exaggeration_factor": "1.5"}``). Model-specific. Returns: - :obj:`Union[nemotronspeech.client.proto.riva_tts_pb2.SynthesizeSpeechResponse, grpc._channel._MultiThreadedRendezvous]`: - a response with output. You may find :class:`nemotronspeech.client.proto.riva_tts_pb2.SynthesizeSpeechResponse` fields + :obj:`Union[nemotronspeech.client.proto.nemotron_tts_pb2.SynthesizeSpeechResponse, grpc._channel._MultiThreadedRendezvous]`: + a response with output. You may find :class:`nemotronspeech.client.proto.nemotron_tts_pb2.SynthesizeSpeechResponse` fields description `here `_. """ @@ -162,8 +162,8 @@ def synthesize_online( custom_configuration (:obj:`Dict[str, str]`, `optional`): Free-form key/value parameters forwarded to the synthesizer (e.g. ``{"exaggeration_factor": "1.5"}``). Model-specific. Yields: - :obj:`nemotronspeech.client.proto.riva_tts_pb2.SynthesizeSpeechResponse`: a response with output. You may find - :class:`nemotronspeech.client.proto.riva_tts_pb2.SynthesizeSpeechResponse` fields description `here + :obj:`nemotronspeech.client.proto.nemotron_tts_pb2.SynthesizeSpeechResponse`: a response with output. You may find + :class:`nemotronspeech.client.proto.nemotron_tts_pb2.SynthesizeSpeechResponse` fields description `here `_. If :param:`future` is :obj:`True`, then a future object is returned. You may retrieve a response from a future object by calling ``result()`` method. diff --git a/scripts/asr/transcribe_file.py b/scripts/asr/transcribe_file.py index a98a9f67..6ae0708e 100644 --- a/scripts/asr/transcribe_file.py +++ b/scripts/asr/transcribe_file.py @@ -90,7 +90,7 @@ def main() -> int: if args.list_models: asr_models = dict() - config_response = asr_service.stub.GetRivaSpeechRecognitionConfig(nemotronspeech.client.proto.riva_asr_pb2.RivaSpeechRecognitionConfigRequest()) + config_response = asr_service.stub.GetRivaSpeechRecognitionConfig(nemotronspeech.client.proto.nemotron_asr_pb2.RivaSpeechRecognitionConfigRequest()) for model_config in config_response.model_config: if model_config.parameters["type"] == "online": language_code = model_config.parameters['language_code'] diff --git a/scripts/asr/transcribe_file_offline.py b/scripts/asr/transcribe_file_offline.py index 3d5ffddc..6276bafa 100644 --- a/scripts/asr/transcribe_file_offline.py +++ b/scripts/asr/transcribe_file_offline.py @@ -56,7 +56,7 @@ def main() -> int: if args.list_models: asr_models = dict() - config_response = asr_service.stub.GetRivaSpeechRecognitionConfig(nemotronspeech.client.proto.riva_asr_pb2.RivaSpeechRecognitionConfigRequest()) + config_response = asr_service.stub.GetRivaSpeechRecognitionConfig(nemotronspeech.client.proto.nemotron_asr_pb2.RivaSpeechRecognitionConfigRequest()) for model_config in config_response.model_config: if model_config.parameters["type"] == "offline": language_code = model_config.parameters['language_code'] diff --git a/scripts/nmt/nmt.py b/scripts/nmt/nmt.py index b928e25b..96ea0f9d 100644 --- a/scripts/nmt/nmt.py +++ b/scripts/nmt/nmt.py @@ -30,8 +30,8 @@ import os import sys -import nemotronspeech.client.proto.riva_nmt_pb2 as riva_nmt -import nemotronspeech.client.proto.riva_nmt_pb2_grpc as riva_nmt_srv +import nemotronspeech.client.proto.nemotron_nmt_pb2 as riva_nmt +import nemotronspeech.client.proto.nemotron_nmt_pb2_grpc as riva_nmt_srv import nemotronspeech.client from nemotronspeech.client.argparse_utils import add_connection_argparse_parameters diff --git a/scripts/nmt/nmt_speech_to_speech.py b/scripts/nmt/nmt_speech_to_speech.py index 7542861e..5e36b22b 100644 --- a/scripts/nmt/nmt_speech_to_speech.py +++ b/scripts/nmt/nmt_speech_to_speech.py @@ -5,8 +5,8 @@ from typing import Iterator import nemotronspeech.client -import nemotronspeech.client.proto.riva_asr_pb2 as riva_asr_pb2 -import nemotronspeech.client.proto.riva_nmt_pb2 as riva_nmt_pb2 +import nemotronspeech.client.proto.nemotron_asr_pb2 as nemotron_asr_pb2 +import nemotronspeech.client.proto.nemotron_nmt_pb2 as nemotron_nmt_pb2 from nemotronspeech.client.argparse_utils import add_connection_argparse_parameters try: from nemotronspeech.client.argparse_utils import cli_main @@ -58,20 +58,20 @@ def main(): print(f"Server address: {args.server}") # Create ASR config - asr_config = riva_asr_pb2.StreamingRecognitionConfig( - config=riva_asr_pb2.RecognitionConfig( + asr_config = nemotron_asr_pb2.StreamingRecognitionConfig( + config=nemotron_asr_pb2.RecognitionConfig( language_code=args.source_language, max_alternatives=1, enable_automatic_punctuation=True ), interim_results=True, ) # Create translation config - translation_config = riva_nmt_pb2.TranslationConfig( + translation_config = nemotron_nmt_pb2.TranslationConfig( source_language_code=args.source_language, target_language_code=args.target_language, ) # Create synthesis config - tts_config = riva_nmt_pb2.SynthesizeSpeechConfig( + tts_config = nemotron_nmt_pb2.SynthesizeSpeechConfig( encoding=nemotronspeech.client.AudioEncoding.LINEAR_PCM, language_code=args.target_language, voice_name=args.voice, @@ -79,7 +79,7 @@ def main(): ) # Create streaming config - streaming_config = riva_nmt_pb2.StreamingTranslateSpeechToSpeechConfig( + streaming_config = nemotron_nmt_pb2.StreamingTranslateSpeechToSpeechConfig( asr_config=asr_config, translation_config=translation_config, tts_config=tts_config ) diff --git a/scripts/nmt/nmt_speech_to_text.py b/scripts/nmt/nmt_speech_to_text.py index adef304f..82df60e3 100644 --- a/scripts/nmt/nmt_speech_to_text.py +++ b/scripts/nmt/nmt_speech_to_text.py @@ -3,8 +3,8 @@ import sys import nemotronspeech.client -import nemotronspeech.client.proto.riva_asr_pb2 as riva_asr_pb2 -import nemotronspeech.client.proto.riva_nmt_pb2 as riva_nmt_pb2 +import nemotronspeech.client.proto.nemotron_asr_pb2 as nemotron_asr_pb2 +import nemotronspeech.client.proto.nemotron_nmt_pb2 as nemotron_nmt_pb2 from nemotronspeech.client.argparse_utils import add_connection_argparse_parameters try: from nemotronspeech.client.argparse_utils import cli_main @@ -72,8 +72,8 @@ def main(): print(f"Server address: {args.server}") # Create ASR config - asr_config = riva_asr_pb2.StreamingRecognitionConfig( - config=riva_asr_pb2.RecognitionConfig( + asr_config = nemotron_asr_pb2.StreamingRecognitionConfig( + config=nemotron_asr_pb2.RecognitionConfig( language_code=args.source_language, max_alternatives=1, enable_automatic_punctuation=True @@ -82,14 +82,14 @@ def main(): ) # Create translation config - translation_config = riva_nmt_pb2.TranslationConfig( + translation_config = nemotron_nmt_pb2.TranslationConfig( source_language_code=args.source_language, target_language_code=args.target_language, model_name=args.model ) # Create streaming config - streaming_config = riva_nmt_pb2.StreamingTranslateSpeechToTextConfig( + streaming_config = nemotron_nmt_pb2.StreamingTranslateSpeechToTextConfig( asr_config=asr_config, translation_config=translation_config ) diff --git a/scripts/tts/talk.py b/scripts/tts/talk.py index ca2f944e..4e819ec8 100644 --- a/scripts/tts/talk.py +++ b/scripts/tts/talk.py @@ -16,7 +16,7 @@ EXIT_BAD_INPUT = 2 def cli_main(func): return func -from nemotronspeech.client.proto.riva_audio_pb2 import AudioEncoding +from nemotronspeech.client.proto.nemotron_audio_pb2 import AudioEncoding def read_file_to_dict(file_path): result_dict = {} @@ -140,7 +140,7 @@ def main() -> int: if args.list_voices: config_response = service.stub.GetRivaSynthesisConfig( - nemotronspeech.client.proto.riva_tts_pb2.RivaSynthesisConfigRequest() + nemotronspeech.client.proto.nemotron_tts_pb2.RivaSynthesisConfigRequest() ) tts_models = dict() for model_config in config_response.model_config: diff --git a/setup.py b/setup.py index aecc12fd..02da23be 100644 --- a/setup.py +++ b/setup.py @@ -78,7 +78,7 @@ def run(self): # ) os.chdir(cwd) - glob_dir = str(setup_py_dir / 'common/riva/proto/*.proto') + glob_dir = str(setup_py_dir / 'common/nemotronspeech/proto/*.proto') print("glob dir: ", glob_dir) protos = glob(glob_dir) if not protos: @@ -99,17 +99,17 @@ def run(self): proto, ] ) - for fn in glob(str(target_dir / 'riva/proto/*_pb2*.py')): + for fn in glob(str(target_dir / 'nemotronspeech/proto/*_pb2*.py')): with open(fn) as f: text = f.read() with open(fn, 'w') as f: f.write(CHANGE_PB2_LOC_PATTERN.sub(r'from . import \1', text)) - # Move Python files to riva/client - for f in glob(str(target_dir / 'riva/proto/*.py')): + # Move Python files to nemotronspeech/client + for f in glob(str(target_dir / 'nemotronspeech/proto/*.py')): shutil.move(f, target_dir) # Remove leftover empty dirs - shutil.rmtree(target_dir / 'riva/proto') - shutil.rmtree(target_dir / 'riva') + shutil.rmtree(target_dir / 'nemotronspeech/proto') + shutil.rmtree(target_dir / 'nemotronspeech') open(target_dir / '__init__.py', 'w').close() super(BuildPyCommand, self).run() diff --git a/tests/unit/test_asr.py b/tests/unit/test_asr.py index 93492de2..8ddc3152 100644 --- a/tests/unit/test_asr.py +++ b/tests/unit/test_asr.py @@ -5,7 +5,7 @@ from typing import Any, Generator, List, Union from unittest.mock import patch, Mock -import nemotronspeech.client.proto.riva_asr_pb2 as rasr +import nemotronspeech.client.proto.nemotron_asr_pb2 as rasr from nemotronspeech.client import ASRService from nemotronspeech.client.asr import streaming_request_generator @@ -54,7 +54,7 @@ def is_iterable(obj: Any) -> bool: return True -@patch("nemotronspeech.client.proto.riva_asr_pb2_grpc.RivaSpeechRecognitionStub.__init__", riva_asr_stub_init_patch) +@patch("nemotronspeech.client.proto.nemotron_asr_pb2_grpc.RivaSpeechRecognitionStub.__init__", riva_asr_stub_init_patch) class TestSpeechSynthesisService: def test_offline_recognize(self) -> None: auth, return_value_of_get_auth_metadata = set_auth_mock() diff --git a/tests/unit/test_nlp.py b/tests/unit/test_nlp.py index 1aa2e5ef..4004361a 100644 --- a/tests/unit/test_nlp.py +++ b/tests/unit/test_nlp.py @@ -4,7 +4,7 @@ from typing import List, Union from unittest.mock import patch, Mock -import nemotronspeech.client.proto.riva_nlp_pb2 as rnlp +import nemotronspeech.client.proto.nemotron_nlp_pb2 as rnlp from nemotronspeech.client import NLPService from .helpers import set_auth_mock @@ -76,7 +76,7 @@ def riva_nlp_stub_init_patch(self, channel): self.NaturalQuery = NATURAL_QUERY_MOCK -@patch("nemotronspeech.client.proto.riva_nlp_pb2_grpc.RivaLanguageUnderstandingStub.__init__", riva_nlp_stub_init_patch) +@patch("nemotronspeech.client.proto.nemotron_nlp_pb2_grpc.RivaLanguageUnderstandingStub.__init__", riva_nlp_stub_init_patch) class TestSpeechSynthesisService: def test_classify_text(self) -> None: auth, return_value_of_get_auth_metadata = set_auth_mock() diff --git a/tests/unit/test_tts.py b/tests/unit/test_tts.py index 4452e0ef..7eb5e7b9 100644 --- a/tests/unit/test_tts.py +++ b/tests/unit/test_tts.py @@ -5,7 +5,7 @@ from typing import Any, Generator from unittest.mock import patch, Mock -import nemotronspeech.client.proto.riva_tts_pb2 as rtts +import nemotronspeech.client.proto.nemotron_tts_pb2 as rtts from nemotronspeech.client import AudioEncoding from nemotronspeech.client.tts import SpeechSynthesisService @@ -50,7 +50,7 @@ def is_iterable(obj: Any) -> bool: return True -@patch("nemotronspeech.client.proto.riva_tts_pb2_grpc.RivaSpeechSynthesisStub.__init__", riva_tts_stub_init_patch) +@patch("nemotronspeech.client.proto.nemotron_tts_pb2_grpc.RivaSpeechSynthesisStub.__init__", riva_tts_stub_init_patch) class TestSpeechSynthesisService: def test_synthesize(self) -> None: auth, return_value_of_get_auth_metadata = set_auth_mock() From b065551779b81ac5321a9598aec7f3eeff439d66 Mon Sep 17 00:00:00 2001 From: sarane22 Date: Mon, 8 Jun 2026 15:52:16 +0530 Subject: [PATCH 3/6] chore(all): Updating protos --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index bfe31f88..d4b32f2a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "common"] path = common - url = https://github.com/nvidia-riva/common.git - branch = main + url = https://gitlab-master.nvidia.com/sarane/common.git + branch = nemotron_name_change From b39ce8a23c69810252c5f952ce0c4ce023af8833 Mon Sep 17 00:00:00 2001 From: sarane22 Date: Mon, 8 Jun 2026 16:08:49 +0530 Subject: [PATCH 4/6] chore(all): Update common submodule to NemotronSpeech rename commit Pin common submodule to d7276290 (nemotron_name_change branch) so that setup.py finds proto files at common/nemotronspeech/proto/ during build. Co-Authored-By: Claude Sonnet 4.6 --- README.md | 8 ++++---- common | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7f3fa781..aa35295d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ [![License](https://img.shields.io/badge/license-MIT-green)](https://opensource.org/licenses/MIT) -# NVIDIA NemotronSpeech Clients +# NVIDIA Nemotron Speech Clients -NemotronSpeech is a GPU-accelerated SDK for building Speech AI applications that are customized for your use +Nemotron Speech is a GPU-accelerated SDK for building Speech AI applications that are customized for your use case and deliver real-time performance. This repo provides performant client example command-line clients. ## Main API @@ -82,7 +82,7 @@ and restart. ### Server -Before running client part of NemotronSpeech, please set up a server. The simplest +Before running client part of Nemotron Speech, please set up a server. The simplest way to do this is to follow [quick start guide](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html#local-deployment-using-quick-start-scripts). @@ -280,7 +280,7 @@ See tutorial notebooks in directory `tutorials`. ## Documentation -Additional documentation on the NemotronSpeech Skills SDK can be found [here](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/). +Additional documentation on the Nemotron Speech Skills SDK can be found [here](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/). ## License diff --git a/common b/common index 71df9826..d7276290 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit 71df98266725320a6b6b3a9f32a6da832dc93691 +Subproject commit d7276290d1031e69145140015542c6bd4c13f09a From 3b7964a1caac57889697daa031adc12cca67214b Mon Sep 17 00:00:00 2001 From: sarane22 Date: Wed, 10 Jun 2026 10:04:51 +0530 Subject: [PATCH 5/6] fix(setup.py): Update pb2 import rewrite pattern for renamed proto paths protoc now generates 'from nemotronspeech.proto import ...' (driven by the renamed import paths in the .proto files). Update CHANGE_PB2_LOC_PATTERN to match the new prefix so generated pb2 files get their cross-imports rewritten to relative 'from . import ...' correctly. Co-Authored-By: Claude Sonnet 4.6 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 02da23be..248e0de3 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ long_description = fh.read() long_description_content_type = "text/markdown" -CHANGE_PB2_LOC_PATTERN = re.compile('from riva.proto import (.+_pb2.*)') +CHANGE_PB2_LOC_PATTERN = re.compile('from nemotronspeech.proto import (.+_pb2.*)') class BuildPyCommand(build_py): From ffb2ab1bafa244d0342005eea8be7e17969a2da3 Mon Sep 17 00:00:00 2001 From: sarane22 Date: Wed, 10 Jun 2026 10:06:20 +0530 Subject: [PATCH 6/6] chore: Update common submodule to pick up Makefile pb2 pattern fix Co-Authored-By: Claude Sonnet 4.6 --- common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common b/common index d7276290..7ad51159 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit d7276290d1031e69145140015542c6bd4c13f09a +Subproject commit 7ad51159e7489ccc8c8341894999ec7142160702