From 0f721a82bcd51996ff38a51e21e4ad758d027609 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Fri, 19 Jul 2024 21:16:37 -0700 Subject: [PATCH 1/2] Replace `load` with `load-by-name` This change removes the `load` function, which generated a `graph` using some opaque bytes, a `graph-encoding` enum, and an `execution-target`. This mechanism allowed WebAssembly guest code (i.e., running within the WebAssembly sandbox) to control _when_ a model is loaded, but by doing so, exposed details that users will likely not need. In FaaS use cases, e.g., user code simply does not have the time to retrieve and load a model for every HTTP request. This PR proposes instead that users _always_ load models outside the sandbox and then load them by a host-specified name. This is a proposal intended for discussion, not a foregone conclusion, so please provide feedback! If you have a use case that relies directly on users being able to load models via buffers, that would undermine the assumptions of this PR (that no one will use wasi-nn in this way). But consider the downsides of the current approach: wasi-nn must keep track of an ever growing list of graph encodings and users must somehow "see through" wasi-nn to set up the model buffers. Switching to `load-by-name`--now called `load`--would resolve these issues, moving any model and framework details into the host configuration, where they already exist anyways. --- wit/wasi-nn.wit | 43 ++++++------------------------------------- 1 file changed, 6 insertions(+), 37 deletions(-) diff --git a/wit/wasi-nn.wit b/wit/wasi-nn.wit index 872e8cd..2555a46 100644 --- a/wit/wasi-nn.wit +++ b/wit/wasi-nn.wit @@ -62,48 +62,19 @@ interface tensor { /// framework (e.g., TensorFlow): interface graph { use errors.{error}; - use tensor.{tensor}; use inference.{graph-execution-context}; - /// An execution graph for performing inference (i.e., a model). - resource graph { - init-execution-context: func() -> result; - } - - /// Describes the encoding of the graph. This allows the API to be implemented by various - /// backends that encode (i.e., serialize) their graph IR with different formats. - enum graph-encoding { - openvino, - onnx, - tensorflow, - pytorch, - tensorflowlite, - ggml, - autodetect, - } - - /// Define where the graph should be executed. - enum execution-target { - cpu, - gpu, - tpu - } - - /// The graph initialization data. - /// - /// This gets bundled up into an array of buffers because implementing backends may encode their - /// graph IR in parts (e.g., OpenVINO stores its IR and weights separately). - type graph-builder = list; - - /// Load a `graph` from an opaque sequence of bytes to use for inference. - load: func(builder: list, encoding: graph-encoding, target: execution-target) -> result; - /// Load a `graph` by name. /// /// How the host expects the names to be passed and how it stores the graphs for retrieval via /// this function is **implementation-specific**. This allows hosts to choose name schemes that /// range from simple to complex (e.g., URLs?) and caching mechanisms of various kinds. - load-by-name: func(name: string) -> result; + load: func(name: string) -> result; + + /// An execution graph for performing inference (i.e., a model). + resource graph { + init-execution-context: func() -> result; + } } /// An inference "session" is encapsulated by a `graph-execution-context`. This structure binds a @@ -137,8 +108,6 @@ interface errors { enum error-code { // Caller module passed an invalid argument. invalid-argument, - // Invalid encoding. - invalid-encoding, // The operation timed out. timeout, // Runtime Error. From 2fc6261be785cadd4e83dec66f57cffba5fc98f0 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Fri, 19 Jul 2024 21:29:23 -0700 Subject: [PATCH 2/2] Fixup `ml.md` --- ml.md | 64 +++++++++-------------------------------------------------- 1 file changed, 9 insertions(+), 55 deletions(-) diff --git a/ml.md b/ml.md index bce656f..7100772 100644 --- a/ml.md +++ b/ml.md @@ -102,10 +102,6 @@ containing a single value, use [1] for the tensor dimensions.

Caller module passed an invalid argument.

  • -

    invalid-encoding

    -

    Invalid encoding. -

  • -
  • timeout

    The operation timed out.

  • @@ -231,73 +227,31 @@ framework (e.g., TensorFlow):

    type error

    error

    -#### `type tensor` -[`tensor`](#tensor) -

    #### `type graph-execution-context` [`graph-execution-context`](#graph_execution_context)

    #### `resource graph` -

    An execution graph for performing inference (i.e., a model).

    -

    enum graph-encoding

    -

    Describes the encoding of the graph. This allows the API to be implemented by various -backends that encode (i.e., serialize) their graph IR with different formats.

    -
    Enum Cases
    -
      -
    • openvino
    • -
    • onnx
    • -
    • tensorflow
    • -
    • pytorch
    • -
    • tensorflowlite
    • -
    • ggml
    • -
    • autodetect
    • -
    -

    enum execution-target

    -

    Define where the graph should be executed.

    -
    Enum Cases
    -
      -
    • cpu
    • -
    • gpu
    • -
    • tpu
    • -
    -

    type graph-builder

    -

    graph-builder

    -

    The graph initialization data. -

    This gets bundled up into an array of buffers because implementing backends may encode their -graph IR in parts (e.g., OpenVINO stores its IR and weights separately).

    -
    +

    An execution graph for performing inference (i.e., a model).

    Functions

    -

    [method]graph.init-execution-context: func

    -
    Params
    - -
    Return values
    -

    load: func

    -

    Load a graph from an opaque sequence of bytes to use for inference.

    +

    Load a graph by name.

    +

    How the host expects the names to be passed and how it stores the graphs for retrieval via +this function is implementation-specific. This allows hosts to choose name schemes that +range from simple to complex (e.g., URLs?) and caching mechanisms of various kinds.

    Params
    Return values
    -

    load-by-name: func

    -

    Load a graph by name.

    -

    How the host expects the names to be passed and how it stores the graphs for retrieval via -this function is implementation-specific. This allows hosts to choose name schemes that -range from simple to complex (e.g., URLs?) and caching mechanisms of various kinds.

    +

    [method]graph.init-execution-context: func

    Params
      -
    • name: string
    • +
    • self: borrow<graph>
    Return values