Skip to content
This repository was archived by the owner on Sep 18, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions demo/seamless_server/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
google_credentials.json
__pycache__/
src/__pycache__/
debug/
models/
.vscode/
33 changes: 33 additions & 0 deletions demo/seamless_server/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
## to start the server
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lh5844 Could you update this README to reflect the steps you needed to get the server backend set up? I think you've had the most recent experience setting it up, so it would be helpful to incorporate anything you noticed which wasn't in the instructions. Also, we can remove the part about the google cloud account, and downloading the streaming models (since this is intended as a general-purpose demo).

For now, you could update the README to describe how to set up the demo for the dummy S2T/S2S models, but in the future we could switch it to e.g. the Whisper tiny model for running locally

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For context, I was hoping to test the installation from scratch to make sure there are not any unexpected installation issues for users


Clone the repo
`cd seamless-experiences/seamless_vc/seamless_server`

If running for the first time, create conda environment from the environment.yaml `conda env create -f environment.yml`
(or if you are on Mac OS, replace `environment.yml` with `environment_mac.yml`)

In each new terminal you use you will need to activate the conda environment:
`conda activate smlss_server`

To install Seamless related code run:
`pip install git+ssh://git@github.com/facebookresearch/SimulEval.git`
`pip install git+ssh://git@github.com/fairinternal/fairseq-py.git@seamless_main`

Run the server:
`pyhton app.py`

## Setup Google account if not already set up

https://cloud.google.com/translate/media/docs/streaming
Get google credential and put it into google_credentials.json file in the root of the repo.

## Download the Seamless models

See [list of current available demo](https://www.internalfb.com/intern/wiki/FAIR_Accel_Language/Projects/Seamless/Workstreams/Streaming/Tutorials/Demo/#available-models) in FAIR Seamless wiki. We need some following models to make the server run:

- es->en s2t model: Put the checkpoint file in "models/s2t_es-en_emma_multidomain_v0.1" under the root directory. To get the model files contact researchers ([Anna Sun](https://www.internalfb.com/profile/view/1115461094) or [Xutai Ma](https://www.internalfb.com/profile/view/100004735920998)).

## Debuging

For start_seamless_stream_es_en_s2t endpoint you can set debug=true when sending config event.
This enables extensive debug logging and it saves audio files in /debug folder. test_no_silence.wav contains data with silence chunks removed.
53 changes: 53 additions & 0 deletions demo/seamless_server/SimulevalAgentDirectory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Creates a directory in which to look up available agents


class NoAvailableAgentException(Exception):
pass


class AgentWithInfo:
def __init__(self, agent, name, modality, source_lang, target_lang):
self.agent = agent
self.name = name
self.modality = modality
self.source_lang = source_lang
self.target_lang = target_lang


class SimulevalAgentDirectory:
# Available models. These are the directories where the models can be found, and also serve as an ID for the model.
# s2t:
s2t_es_en_agent = "s2t_es-en_tt-waitk_multidomain"
s2t_en_es_agent = "s2t_en-es_tt-waitk_multidomain"
s2t_es_en_emma_agent = "s2t_es-en_emma_multidomain_v0.3"
s2t_en_es_emma_agent = "s2t_en-es_emma_multidomain_v0.3"
# s2s:
s2s_es_en_agent = "s2s_es-en_tt-waitk-unity2_multidomain"
s2s_es_en_emma_agent = "s2s_es-en_emma-unity2_multidomain_v0.2"

def __init__(self):
self.agents = []

def add_agent(self, agent, name, modality, source_lang, target_lang):
self.agents.append(
AgentWithInfo(agent, name, modality, source_lang, target_lang)
)

def get_agent(self, modality, source_lang, target_lang):
for agent in self.agents:
if (
agent.modality == modality
and agent.source_lang == source_lang
and agent.target_lang == target_lang
):
return agent.agent
return None

def get_agent_or_throw(self, modality, source_lang, target_lang):
agent = self.get_agent(modality, source_lang, target_lang)
if agent is None:
raise NoAvailableAgentException(
"No agent found for modality=%s, source_lang=%s, target_lang=%s"
% (modality, source_lang, target_lang)
)
return agent
219 changes: 219 additions & 0 deletions demo/seamless_server/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
from logging.config import dictConfig
from flask import Flask
from flask_sockets import Sockets
from SimulevalAgentDirectory import (
SimulevalAgentDirectory,
NoAvailableAgentException,
)


from SimulevalAgentDirectory import SimulevalAgentDirectory
from src.connection_tracker import ConnectionTracker

from src.simuleval_transcoder import SimulevalTranscoder
import json
import logging
from werkzeug.routing import Rule
import time

dictConfig(
{
"version": 1,
"formatters": {
"default": {
"format": "[%(asctime)s] %(levelname)s in %(module)s: %(message)s",
}
},
"handlers": {
"wsgi": {
"class": "logging.StreamHandler",
"stream": "ext://flask.logging.wsgi_errors_stream",
"formatter": "default",
}
},
"root": {"level": "INFO", "handlers": ["wsgi"]},
}
)


app = Flask(__name__)
sockets = Sockets(app)
app.logger.setLevel(logging.INFO)

available_agents = SimulevalAgentDirectory()

connection_tracker = ConnectionTracker(app.logger)


def start_seamless_stream_s2t(ws):
app.logger.info("WS Connection accepted")
remote_address = ws.environ.get("REMOTE_ADDR")

connection_tracker.add_connection(remote_address)

app.logger.info("Current connection tracker info:")
app.logger.info(str(connection_tracker))

transcoder = None
debug = False
async_processing = False

def log_debug(*args):
if debug:
app.logger.info(*args)

def ws_send(obj):
to_send = json.dumps(obj)
log_to_send = to_send
if "sample_rate" in to_send:
# don't log the speech payload
log_to_send = json.dumps({k: v for k, v in obj.items() if k != "payload"})
log_debug(f"Gonna send to client: {log_to_send}")
ws.send(to_send)

latency_sent = False

while not ws.closed:
message = ws.receive()
if message is None:
log_debug("No message received...")
continue

connection_tracker.log_recent_message(remote_address)

if transcoder:
speech_and_text_output = transcoder.get_buffered_output()
if speech_and_text_output is not None:
lat = None
if speech_and_text_output.speech_samples:
to_send = {
"event": "translation_speech",
"payload": speech_and_text_output.speech_samples,
"sample_rate": speech_and_text_output.speech_sample_rate,
}
elif speech_and_text_output.text:
to_send = {
"event": "translation_text",
"payload": speech_and_text_output.text,
}
else:
app.logger.warn(
"Got model output with neither speech nor text content"
)
to_send = {} # unexpected case, but not breaking the flow
to_send["eos"] = speech_and_text_output.final

to_send[
"server_active_connections"
] = connection_tracker.get_active_connection_count()

if not latency_sent:
lat = transcoder.first_translation_time()
latency_sent = True
to_send["latency"] = lat

ws_send(to_send)

if isinstance(message, bytearray) and transcoder is not None:
transcoder.process_incoming_bytes(message)
else:
data = json.loads(message)
if data["event"] == "config":
app.logger.debug("Received ws config")
debug = data.get("debug")
async_processing = data.get("async_processing")

source_language_2_letter = data.get("source_language")[:2]
target_language_2_letter = data.get("target_language")[:2]

# Currently s2s or s2t
model_type = data.get("model_type")

try:
agent = available_agents.get_agent_or_throw(
model_type,
source_language_2_letter,
target_language_2_letter,
)
except NoAvailableAgentException as e:
app.logger.warn(f"Error while getting agent: {e}")
ws_send({"event": "error", "payload": str(e)})
ws.close()
break

t0 = time.time()
transcoder = SimulevalTranscoder(
agent,
data["rate"],
debug=debug,
buffer_limit=int(data["buffer_limit"]),
)
t1 = time.time()
log_debug(f"Booting up VAD and transcoder took {t1-t0} sec")
ws_send({"event": "server_ready"})
if async_processing:
transcoder.start()

if data["event"] == "closed":
transcoder.close = True
log_debug("Closed Message received: {}".format(message))
ws.close()
break

if transcoder and not async_processing:
transcoder.process_pipeline_once()

if transcoder and transcoder.close:
ws.close()

app.logger.info("WS Connection closed")

connection_tracker.remove_connection(remote_address)
app.logger.info("Current connection tracker info:")
app.logger.info(str(connection_tracker))

if transcoder:
log_debug("closing transcoder")
transcoder.close = True


sockets.url_map.add(
Rule(
"/api/seamless_stream_es_en_s2t",
endpoint=start_seamless_stream_s2t,
websocket=True,
)
)

if __name__ == "__main__":
# Build all the agents before starting the server
# s2t:
available_agents.add_agent(
SimulevalTranscoder.build_agent(SimulevalAgentDirectory.s2t_es_en_emma_agent),
SimulevalAgentDirectory.s2t_es_en_emma_agent,
"s2t",
"es",
"en",
)
available_agents.add_agent(
SimulevalTranscoder.build_agent(SimulevalAgentDirectory.s2t_en_es_emma_agent),
SimulevalAgentDirectory.s2t_en_es_emma_agent,
"s2t",
"en",
"es",
)
# s2s:
available_agents.add_agent(
SimulevalTranscoder.build_agent(SimulevalAgentDirectory.s2s_es_en_emma_agent),
SimulevalAgentDirectory.s2s_es_en_emma_agent,
"s2s",
"es",
"en",
)

from gevent import pywsgi
from geventwebsocket.handler import WebSocketHandler

server = pywsgi.WSGIServer(("0.0.0.0", 8000), app, handler_class=WebSocketHandler)
app.logger.info("Starting server on port 8000...")
server.serve_forever()
Loading