facebookresearch · lh5844 · Aug 6, 2023 · Aug 6, 2023 · Aug 6, 2023 · Aug 7, 2023
diff --git a/demo/seamless_server/.gitignore b/demo/seamless_server/.gitignore
@@ -0,0 +1,6 @@
+google_credentials.json
+__pycache__/
+src/__pycache__/
+debug/
+models/
+.vscode/
diff --git a/demo/seamless_server/README.md b/demo/seamless_server/README.md
@@ -0,0 +1,33 @@
+## to start the server
+
+Clone the repo
+`cd seamless-experiences/seamless_vc/seamless_server`
+
+If running for the first time, create conda environment from the environment.yaml `conda env create -f environment.yml`
+(or if you are on Mac OS, replace `environment.yml` with `environment_mac.yml`)
+
+In each new terminal you use you will need to activate the conda environment:
+`conda activate smlss_server`
+
+To install Seamless related code run:
+`pip install git+ssh://git@github.com/facebookresearch/SimulEval.git`
+`pip install git+ssh://git@github.com/fairinternal/fairseq-py.git@seamless_main`
+
+Run the server:
+`pyhton app.py`
+
+## Setup Google account if not already set up
+
+https://cloud.google.com/translate/media/docs/streaming
+Get google credential and put it into google_credentials.json file in the root of the repo.
+
+## Download the Seamless models
+
+See [list of current available demo](https://www.internalfb.com/intern/wiki/FAIR_Accel_Language/Projects/Seamless/Workstreams/Streaming/Tutorials/Demo/#available-models) in FAIR Seamless wiki. We need some following models to make the server run:
+
+- es->en s2t model: Put the checkpoint file in "models/s2t_es-en_emma_multidomain_v0.1" under the root directory. To get the model files contact researchers ([Anna Sun](https://www.internalfb.com/profile/view/1115461094) or [Xutai Ma](https://www.internalfb.com/profile/view/100004735920998)).
+
+## Debuging
+
+For start_seamless_stream_es_en_s2t endpoint you can set debug=true when sending config event.
+This enables extensive debug logging and it saves audio files in /debug folder. test_no_silence.wav contains data with silence chunks removed.
diff --git a/demo/seamless_server/SimulevalAgentDirectory.py b/demo/seamless_server/SimulevalAgentDirectory.py
@@ -0,0 +1,53 @@
+# Creates a directory in which to look up available agents
+
+
+class NoAvailableAgentException(Exception):
+    pass
+
+
+class AgentWithInfo:
+    def __init__(self, agent, name, modality, source_lang, target_lang):
+        self.agent = agent
+        self.name = name
+        self.modality = modality
+        self.source_lang = source_lang
+        self.target_lang = target_lang
+
+
+class SimulevalAgentDirectory:
+    # Available models. These are the directories where the models can be found, and also serve as an ID for the model.
+    # s2t:
+    s2t_es_en_agent = "s2t_es-en_tt-waitk_multidomain"
+    s2t_en_es_agent = "s2t_en-es_tt-waitk_multidomain"
+    s2t_es_en_emma_agent = "s2t_es-en_emma_multidomain_v0.3"
+    s2t_en_es_emma_agent = "s2t_en-es_emma_multidomain_v0.3"
+    # s2s:
+    s2s_es_en_agent = "s2s_es-en_tt-waitk-unity2_multidomain"
+    s2s_es_en_emma_agent = "s2s_es-en_emma-unity2_multidomain_v0.2"
+
+    def __init__(self):
+        self.agents = []
+
+    def add_agent(self, agent, name, modality, source_lang, target_lang):
+        self.agents.append(
+            AgentWithInfo(agent, name, modality, source_lang, target_lang)
+        )
+
+    def get_agent(self, modality, source_lang, target_lang):
+        for agent in self.agents:
+            if (
+                agent.modality == modality
+                and agent.source_lang == source_lang
+                and agent.target_lang == target_lang
+            ):
+                return agent.agent
+        return None
+
+    def get_agent_or_throw(self, modality, source_lang, target_lang):
+        agent = self.get_agent(modality, source_lang, target_lang)
+        if agent is None:
+            raise NoAvailableAgentException(
+                "No agent found for modality=%s, source_lang=%s, target_lang=%s"
+                % (modality, source_lang, target_lang)
+            )
+        return agent
diff --git a/demo/seamless_server/app.py b/demo/seamless_server/app.py
@@ -0,0 +1,219 @@
+from logging.config import dictConfig
+from flask import Flask
+from flask_sockets import Sockets
+from SimulevalAgentDirectory import (
+    SimulevalAgentDirectory,
+    NoAvailableAgentException,
+)
+
+
+from SimulevalAgentDirectory import SimulevalAgentDirectory
+from src.connection_tracker import ConnectionTracker
+
+from src.simuleval_transcoder import SimulevalTranscoder
+import json
+import logging
+from werkzeug.routing import Rule
+import time
+
+dictConfig(
+    {
+        "version": 1,
+        "formatters": {
+            "default": {
+                "format": "[%(asctime)s] %(levelname)s in %(module)s: %(message)s",
+            }
+        },
+        "handlers": {
+            "wsgi": {
+                "class": "logging.StreamHandler",
+                "stream": "ext://flask.logging.wsgi_errors_stream",
+                "formatter": "default",
+            }
+        },
+        "root": {"level": "INFO", "handlers": ["wsgi"]},
+    }
+)
+
+
+app = Flask(__name__)
+sockets = Sockets(app)
+app.logger.setLevel(logging.INFO)
+
+available_agents = SimulevalAgentDirectory()
+
+connection_tracker = ConnectionTracker(app.logger)
+
+
+def start_seamless_stream_s2t(ws):
+    app.logger.info("WS Connection accepted")
+    remote_address = ws.environ.get("REMOTE_ADDR")
+
+    connection_tracker.add_connection(remote_address)
+
+    app.logger.info("Current connection tracker info:")
+    app.logger.info(str(connection_tracker))
+
+    transcoder = None
+    debug = False
+    async_processing = False
+
+    def log_debug(*args):
+        if debug:
+            app.logger.info(*args)
+
+    def ws_send(obj):
+        to_send = json.dumps(obj)
+        log_to_send = to_send
+        if "sample_rate" in to_send:
+            # don't log the speech payload
+            log_to_send = json.dumps({k: v for k, v in obj.items() if k != "payload"})
+        log_debug(f"Gonna send to client: {log_to_send}")
+        ws.send(to_send)
+
+    latency_sent = False
+
+    while not ws.closed:
+        message = ws.receive()
+        if message is None:
+            log_debug("No message received...")
+            continue
+
+        connection_tracker.log_recent_message(remote_address)
+
+        if transcoder:
+            speech_and_text_output = transcoder.get_buffered_output()
+            if speech_and_text_output is not None:
+                lat = None
+                if speech_and_text_output.speech_samples:
+                    to_send = {
+                        "event": "translation_speech",
+                        "payload": speech_and_text_output.speech_samples,
+                        "sample_rate": speech_and_text_output.speech_sample_rate,
+                    }
+                elif speech_and_text_output.text:
+                    to_send = {
+                        "event": "translation_text",
+                        "payload": speech_and_text_output.text,
+                    }
+                else:
+                    app.logger.warn(
+                        "Got model output with neither speech nor text content"
+                    )
+                    to_send = {}  # unexpected case, but not breaking the flow
+                to_send["eos"] = speech_and_text_output.final
+
+                to_send[
+                    "server_active_connections"
+                ] = connection_tracker.get_active_connection_count()
+
+                if not latency_sent:
+                    lat = transcoder.first_translation_time()
+                    latency_sent = True
+                    to_send["latency"] = lat
+
+                ws_send(to_send)
+
+        if isinstance(message, bytearray) and transcoder is not None:
+            transcoder.process_incoming_bytes(message)
+        else:
+            data = json.loads(message)
+            if data["event"] == "config":
+                app.logger.debug("Received ws config")
+                debug = data.get("debug")
+                async_processing = data.get("async_processing")
+
+                source_language_2_letter = data.get("source_language")[:2]
+                target_language_2_letter = data.get("target_language")[:2]
+
+                # Currently s2s or s2t
+                model_type = data.get("model_type")
+
+                try:
+                    agent = available_agents.get_agent_or_throw(
+                        model_type,
+                        source_language_2_letter,
+                        target_language_2_letter,
+                    )
+                except NoAvailableAgentException as e:
+                    app.logger.warn(f"Error while getting agent: {e}")
+                    ws_send({"event": "error", "payload": str(e)})
+                    ws.close()
+                    break
+
+                t0 = time.time()
+                transcoder = SimulevalTranscoder(
+                    agent,
+                    data["rate"],
+                    debug=debug,
+                    buffer_limit=int(data["buffer_limit"]),
+                )
+                t1 = time.time()
+                log_debug(f"Booting up VAD and transcoder took {t1-t0} sec")
+                ws_send({"event": "server_ready"})
+                if async_processing:
+                    transcoder.start()
+
+            if data["event"] == "closed":
+                transcoder.close = True
+                log_debug("Closed Message received: {}".format(message))
+                ws.close()
+                break
+
+        if transcoder and not async_processing:
+            transcoder.process_pipeline_once()
+
+        if transcoder and transcoder.close:
+            ws.close()
+
+    app.logger.info("WS Connection closed")
+
+    connection_tracker.remove_connection(remote_address)
+    app.logger.info("Current connection tracker info:")
+    app.logger.info(str(connection_tracker))
+
+    if transcoder:
+        log_debug("closing transcoder")
+        transcoder.close = True
+
+
+sockets.url_map.add(
+    Rule(
+        "/api/seamless_stream_es_en_s2t",
+        endpoint=start_seamless_stream_s2t,
+        websocket=True,
+    )
+)
+
+if __name__ == "__main__":
+    # Build all the agents before starting the server
+    # s2t:
+    available_agents.add_agent(
+        SimulevalTranscoder.build_agent(SimulevalAgentDirectory.s2t_es_en_emma_agent),
+        SimulevalAgentDirectory.s2t_es_en_emma_agent,
+        "s2t",
+        "es",
+        "en",
+    )
+    available_agents.add_agent(
+        SimulevalTranscoder.build_agent(SimulevalAgentDirectory.s2t_en_es_emma_agent),
+        SimulevalAgentDirectory.s2t_en_es_emma_agent,
+        "s2t",
+        "en",
+        "es",
+    )
+    # s2s:
+    available_agents.add_agent(
+        SimulevalTranscoder.build_agent(SimulevalAgentDirectory.s2s_es_en_emma_agent),
+        SimulevalAgentDirectory.s2s_es_en_emma_agent,
+        "s2s",
+        "es",
+        "en",
+    )
+
+    from gevent import pywsgi
+    from geventwebsocket.handler import WebSocketHandler
+
+    server = pywsgi.WSGIServer(("0.0.0.0", 8000), app, handler_class=WebSocketHandler)
+    app.logger.info("Starting server on port 8000...")
+    server.serve_forever()