2025-07-27 00:02:14 -04:00
11 changed files with 654 additions and 167 deletions
--- a/.circleci/continue_config.yml
+++ b/.circleci/continue_config.yml
@ -854,7 +854,6 @@ jobs:
          install-yarn: true
          node-version: "18.16"
      - run: node --version
      - run: corepack enable
      - node/install-packages:
          app-dir: gpt4all-bindings/typescript
          pkg-manager: yarn
@ -885,7 +884,6 @@ jobs:
          install-yarn: true
          node-version: "18.16"
      - run: node --version
      - run: corepack enable
      - node/install-packages:
          app-dir: gpt4all-bindings/typescript
          pkg-manager: yarn
@ -898,14 +896,14 @@ jobs:
          name: "Persisting all necessary things to workspace"
          command: |  
            mkdir -p gpt4all-backend/prebuilds/darwin-x64
-            mkdir -p gpt4all-backend/runtimes/darwin
+            mkdir -p gpt4all-backend/runtimes/darwin-x64
-            cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin
+            cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin-x64
            cp gpt4all-bindings/typescript/prebuilds/darwin-x64/*.node gpt4all-backend/prebuilds/darwin-x64
      - persist_to_workspace:
          root: gpt4all-backend
          paths:
            - prebuilds/darwin-x64/*.node
-            - runtimes/darwin/*-*.*
+            - runtimes/darwin-x64/*-*.*
  build-nodejs-windows: 
    executor:
@ -927,7 +925,6 @@ jobs:
              nvm install 18.16.0
              nvm use 18.16.0
      - run: node --version 
      - run: corepack enable
      - run:           
          command: |
            npm install -g yarn
@ -961,7 +958,6 @@ jobs:
          install-yarn: true
          node-version: "18.16"
      - run: node --version
      - run: corepack enable
      - run: 
          command: |
            cd gpt4all-bindings/typescript
@ -976,12 +972,9 @@ jobs:
            cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/ 
            cp /tmp/gpt4all-backend/prebuilds/linux-x64/*.node prebuilds/linux-x64/    
-            # darwin has univeral runtime libraries
+            mkdir -p runtimes/darwin-x64/native
            mkdir -p runtimes/darwin/native
            mkdir -p prebuilds/darwin-x64/
-
+            cp /tmp/gpt4all-backend/runtimes/darwin-x64/*-*.* runtimes/darwin-x64/native/
            cp /tmp/gpt4all-backend/runtimes/darwin/*-*.* runtimes/darwin/native/
            cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/    
            # Fallback build if user is not on above prebuilds
--- a/gpt4all-bindings/python/docs/gpt4all_typescript.md
+++ b/gpt4all-bindings/python/docs/gpt4all_typescript.md
@ -1,14 +1,11 @@
 # GPT4All Node.js API
 Native Node.js LLM bindings for all.
 ```sh
-yarn add gpt4all@latest
+yarn add gpt4all@alpha
-npm install gpt4all@latest
+npm install gpt4all@alpha
 pnpm install gpt4all@latest
 pnpm install gpt4all@alpha
 ```
 The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-ts) are now out of date.
@ -18,12 +15,12 @@ The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-t
 *   Everything should work out the box.
 *   See [API Reference](#api-reference)
-### Chat Completion
+### Chat Completion (alpha)
 ```js
 import { createCompletion, loadModel } from '../src/gpt4all.js'
-const model = await loadModel('mistral-7b-openorca.Q4_0.gguf', { verbose: true });
+const model = await loadModel('ggml-vicuna-7b-1.1-q4_2', { verbose: true });
 const response = await createCompletion(model, [
    { role : 'system', content: 'You are meant to be annoying and unhelpful.'  },
@ -32,7 +29,7 @@ const response = await createCompletion(model, [
 ```
-### Embedding
+### Embedding (alpha)
 ```js
 import { createEmbedding, loadModel } from '../src/gpt4all.js'
@ -85,6 +82,8 @@ yarn
 git submodule update --init --depth 1 --recursive
 ```
 **AS OF NEW BACKEND** to build the backend,
 ```sh
 yarn build:backend
 ```
@ -153,16 +152,13 @@ This package is in active development, and breaking changes may happen until the
 ##### Table of Contents
 *   [ModelType](#modeltype)
 *   [ModelFile](#modelfile)
    *   [gptj](#gptj)
    *   [llama](#llama)
    *   [mpt](#mpt)
    *   [replit](#replit)
 *   [type](#type)
 *   [InferenceModel](#inferencemodel)
    *   [dispose](#dispose)
 *   [EmbeddingModel](#embeddingmodel)
    *   [dispose](#dispose-1)
 *   [LLModel](#llmodel)
    *   [constructor](#constructor)
        *   [Parameters](#parameters)
@ -180,20 +176,12 @@ This package is in active development, and breaking changes may happen until the
    *   [setLibraryPath](#setlibrarypath)
        *   [Parameters](#parameters-4)
    *   [getLibraryPath](#getlibrarypath)
    *   [initGpuByString](#initgpubystring)
        *   [Parameters](#parameters-5)
    *   [hasGpuDevice](#hasgpudevice)
    *   [listGpu](#listgpu)
    *   [dispose](#dispose-2)
 *   [GpuDevice](#gpudevice)
    *   [type](#type-2)
 *   [LoadModelOptions](#loadmodeloptions)
 *   [loadModel](#loadmodel)
-    *   [Parameters](#parameters-6)
+    *   [Parameters](#parameters-5)
 *   [createCompletion](#createcompletion)
-    *   [Parameters](#parameters-7)
+    *   [Parameters](#parameters-6)
 *   [createEmbedding](#createembedding)
-    *   [Parameters](#parameters-8)
+    *   [Parameters](#parameters-7)
 *   [CompletionOptions](#completionoptions)
    *   [verbose](#verbose)
    *   [systemPromptTemplate](#systemprompttemplate)
@ -226,14 +214,14 @@ This package is in active development, and breaking changes may happen until the
    *   [repeatLastN](#repeatlastn)
    *   [contextErase](#contexterase)
 *   [createTokenStream](#createtokenstream)
-    *   [Parameters](#parameters-9)
+    *   [Parameters](#parameters-8)
 *   [DEFAULT\_DIRECTORY](#default_directory)
 *   [DEFAULT\_LIBRARIES\_DIRECTORY](#default_libraries_directory)
 *   [DEFAULT\_MODEL\_CONFIG](#default_model_config)
-*   [DEFAULT\_PROMPT\_CONTEXT](#default_prompt_context)
+*   [DEFAULT\_PROMT\_CONTEXT](#default_promt_context)
 *   [DEFAULT\_MODEL\_LIST\_URL](#default_model_list_url)
 *   [downloadModel](#downloadmodel)
-    *   [Parameters](#parameters-10)
+    *   [Parameters](#parameters-9)
    *   [Examples](#examples)
 *   [DownloadModelOptions](#downloadmodeloptions)
    *   [modelPath](#modelpath)
@ -244,10 +232,16 @@ This package is in active development, and breaking changes may happen until the
    *   [cancel](#cancel)
    *   [promise](#promise)
 #### ModelType
 Type of the model
 Type: (`"gptj"` | `"llama"` | `"mpt"` | `"replit"`)
 #### ModelFile
 Full list of models available
-DEPRECATED!! These model names are outdated and this type will not be maintained, please use a string literal instead
+@deprecated These model names are outdated and this type will not be maintained, please use a string literal instead
 ##### gptj
@ -277,27 +271,7 @@ Type: `"ggml-replit-code-v1-3b.bin"`
 Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
-Type: ModelType
+Type: [ModelType](#modeltype)
 #### InferenceModel
 InferenceModel represents an LLM which can make chat predictions, similar to GPT transformers.
 ##### dispose
 delete and cleanup the native model
 Returns **void**&#x20;
 #### EmbeddingModel
 EmbeddingModel represents an LLM which can create embeddings, which are float arrays
 ##### dispose
 delete and cleanup the native model
 Returns **void**&#x20;
 #### LLModel
@ -320,7 +294,7 @@ Initialize a new LLModel.
 either 'gpt', mpt', or 'llama' or undefined
-Returns **(ModelType | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))**&#x20;
+Returns **([ModelType](#modeltype) | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))**&#x20;
 ##### name
@ -402,52 +376,6 @@ Where to get the pluggable backend libraries
 Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
 ##### initGpuByString
 Initiate a GPU by a string identifier.
 ###### Parameters
 *   `memory_required` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** Should be in the range size\_t or will throw
 *   `device_name` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name.
    read LoadModelOptions.device for more information
 Returns **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)**&#x20;
 ##### hasGpuDevice
 From C documentation
 Returns **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** True if a GPU device is successfully initialized, false otherwise.
 ##### listGpu
 GPUs that are usable for this LLModel
 *   Throws **any** if hasGpuDevice returns false (i think)
 Returns **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[GpuDevice](#gpudevice)>**&#x20;
 ##### dispose
 delete and cleanup the native model
 Returns **void**&#x20;
 #### GpuDevice
 an object that contains gpu data on this machine.
 ##### type
 same as VkPhysicalDeviceType
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 #### LoadModelOptions
 Options that configure a model's behavior.
 #### loadModel
 Loads a machine learning model with the specified name. The defacto way to create a model.
@ -456,9 +384,9 @@ By default this will download a model from the official GPT4ALL website, if a mo
 ##### Parameters
 *   `modelName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The name of the model to load.
-*   `options` **([LoadModelOptions](#loadmodeloptions) | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))?** (Optional) Additional options for loading the model.
+*   `options` **(LoadModelOptions | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))?** (Optional) Additional options for loading the model.
-Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<([InferenceModel](#inferencemodel) | [EmbeddingModel](#embeddingmodel))>** A promise that resolves to an instance of the loaded LLModel.
+Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<(InferenceModel | EmbeddingModel)>** A promise that resolves to an instance of the loaded LLModel.
 #### createCompletion
@ -466,7 +394,7 @@ The nodejs equivalent to python binding's chat\_completion
 ##### Parameters
-*   `model` **[InferenceModel](#inferencemodel)** The language model object.
+*   `model` **InferenceModel** The language model object.
 *   `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>** The array of messages for the conversation.
 *   `options` **[CompletionOptions](#completionoptions)** The options for creating the completion.
@ -479,7 +407,7 @@ meow
 ##### Parameters
-*   `model` **[EmbeddingModel](#embeddingmodel)** The language model object.
+*   `model` **EmbeddingModel** The language model object.
 *   `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** text to embed
 Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The completion result.
@ -724,7 +652,7 @@ Default model configuration.
 Type: ModelConfig
-#### DEFAULT\_PROMPT\_CONTEXT
+#### DEFAULT\_PROMT\_CONTEXT
 Default prompt context.
--- a/gpt4all-bindings/python/mkdocs.yml
+++ b/gpt4all-bindings/python/mkdocs.yml
@ -14,7 +14,7 @@ nav:
      - 'GPT4All in Python':
        - 'Generation': 'gpt4all_python.md'
        - 'Embedding': 'gpt4all_python_embedding.md'
-      - 'GPT4ALL in NodeJs': 'gpt4all_nodejs.md'
+      - 'GPT4ALL in NodeJs': 'gpt4all_typescript.md'
      - 'gpt4all_cli.md'
 #    - 'Tutorials':
 #      - 'gpt4all_modal.md'
--- a/gpt4all-bindings/typescript/README.md
+++ b/gpt4all-bindings/typescript/README.md
@ -1,14 +1,11 @@
 # GPT4All Node.js API
 Native Node.js LLM bindings for all.
 ```sh
 yarn add gpt4all@latest
 npm install gpt4all@latest
 pnpm install gpt4all@latest
 ```
 The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-ts) are now out of date.
@ -23,7 +20,7 @@ The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-t
 ```js
 import { createCompletion, loadModel } from '../src/gpt4all.js'
-const model = await loadModel('mistral-7b-openorca.Q4_0.gguf', { verbose: true });
+const model = await loadModel('ggml-vicuna-7b-1.1-q4_2', { verbose: true });
 const response = await createCompletion(model, [
    { role : 'system', content: 'You are meant to be annoying and unhelpful.'  },
@ -147,3 +144,587 @@ This package is in active development, and breaking changes may happen until the
 *   \[ ] createChatSession ( the python equivalent to create\_chat\_session )
 ### API Reference
 <!-- Generated by documentation.js. Update this documentation by updating the source code. -->
 ##### Table of Contents
 *   [ModelType](#modeltype)
 *   [ModelFile](#modelfile)
    *   [gptj](#gptj)
    *   [llama](#llama)
    *   [mpt](#mpt)
    *   [replit](#replit)
 *   [type](#type)
 *   [LLModel](#llmodel)
    *   [constructor](#constructor)
        *   [Parameters](#parameters)
    *   [type](#type-1)
    *   [name](#name)
    *   [stateSize](#statesize)
    *   [threadCount](#threadcount)
    *   [setThreadCount](#setthreadcount)
        *   [Parameters](#parameters-1)
    *   [raw\_prompt](#raw_prompt)
        *   [Parameters](#parameters-2)
    *   [embed](#embed)
        *   [Parameters](#parameters-3)
    *   [isModelLoaded](#ismodelloaded)
    *   [setLibraryPath](#setlibrarypath)
        *   [Parameters](#parameters-4)
    *   [getLibraryPath](#getlibrarypath)
 *   [loadModel](#loadmodel)
    *   [Parameters](#parameters-5)
 *   [createCompletion](#createcompletion)
    *   [Parameters](#parameters-6)
 *   [createEmbedding](#createembedding)
    *   [Parameters](#parameters-7)
 *   [CompletionOptions](#completionoptions)
    *   [verbose](#verbose)
    *   [systemPromptTemplate](#systemprompttemplate)
    *   [promptTemplate](#prompttemplate)
    *   [promptHeader](#promptheader)
    *   [promptFooter](#promptfooter)
 *   [PromptMessage](#promptmessage)
    *   [role](#role)
    *   [content](#content)
 *   [prompt\_tokens](#prompt_tokens)
 *   [completion\_tokens](#completion_tokens)
 *   [total\_tokens](#total_tokens)
 *   [CompletionReturn](#completionreturn)
    *   [model](#model)
    *   [usage](#usage)
    *   [choices](#choices)
 *   [CompletionChoice](#completionchoice)
    *   [message](#message)
 *   [LLModelPromptContext](#llmodelpromptcontext)
    *   [logitsSize](#logitssize)
    *   [tokensSize](#tokenssize)
    *   [nPast](#npast)
    *   [nCtx](#nctx)
    *   [nPredict](#npredict)
    *   [topK](#topk)
    *   [topP](#topp)
    *   [temp](#temp)
    *   [nBatch](#nbatch)
    *   [repeatPenalty](#repeatpenalty)
    *   [repeatLastN](#repeatlastn)
    *   [contextErase](#contexterase)
 *   [createTokenStream](#createtokenstream)
    *   [Parameters](#parameters-8)
 *   [DEFAULT\_DIRECTORY](#default_directory)
 *   [DEFAULT\_LIBRARIES\_DIRECTORY](#default_libraries_directory)
 *   [DEFAULT\_MODEL\_CONFIG](#default_model_config)
 *   [DEFAULT\_PROMT\_CONTEXT](#default_promt_context)
 *   [DEFAULT\_MODEL\_LIST\_URL](#default_model_list_url)
 *   [downloadModel](#downloadmodel)
    *   [Parameters](#parameters-9)
    *   [Examples](#examples)
 *   [DownloadModelOptions](#downloadmodeloptions)
    *   [modelPath](#modelpath)
    *   [verbose](#verbose-1)
    *   [url](#url)
    *   [md5sum](#md5sum)
 *   [DownloadController](#downloadcontroller)
    *   [cancel](#cancel)
    *   [promise](#promise)
 #### ModelType
 Type of the model
 Type: (`"gptj"` | `"llama"` | `"mpt"` | `"replit"`)
 #### ModelFile
 Full list of models available
@deprecated These model names are outdated and this type will not be maintained, please use a string literal instead
 ##### gptj
 List of GPT-J Models
 Type: (`"ggml-gpt4all-j-v1.3-groovy.bin"` | `"ggml-gpt4all-j-v1.2-jazzy.bin"` | `"ggml-gpt4all-j-v1.1-breezy.bin"` | `"ggml-gpt4all-j.bin"`)
 ##### llama
 List Llama Models
 Type: (`"ggml-gpt4all-l13b-snoozy.bin"` | `"ggml-vicuna-7b-1.1-q4_2.bin"` | `"ggml-vicuna-13b-1.1-q4_2.bin"` | `"ggml-wizardLM-7B.q4_2.bin"` | `"ggml-stable-vicuna-13B.q4_2.bin"` | `"ggml-nous-gpt4-vicuna-13b.bin"` | `"ggml-v3-13b-hermes-q5_1.bin"`)
 ##### mpt
 List of MPT Models
 Type: (`"ggml-mpt-7b-base.bin"` | `"ggml-mpt-7b-chat.bin"` | `"ggml-mpt-7b-instruct.bin"`)
 ##### replit
 List of Replit Models
 Type: `"ggml-replit-code-v1-3b.bin"`
 #### type
 Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
 Type: [ModelType](#modeltype)
 #### LLModel
 LLModel class representing a language model.
 This is a base class that provides common functionality for different types of language models.
 ##### constructor
 Initialize a new LLModel.
 ###### Parameters
 *   `path` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Absolute path to the model file.
 <!---->
 *   Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model file does not exist.
 ##### type
 either 'gpt', mpt', or 'llama' or undefined
 Returns **([ModelType](#modeltype) | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))**&#x20;
 ##### name
 The name of the model.
 Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
 ##### stateSize
 Get the size of the internal state of the model.
 NOTE: This state data is specific to the type of model you have created.
 Returns **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** the size in bytes of the internal state of the model
 ##### threadCount
 Get the number of threads used for model inference.
 The default is the number of physical cores your computer has.
 Returns **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The number of threads used for model inference.
 ##### setThreadCount
 Set the number of threads used for model inference.
 ###### Parameters
 *   `newNumber` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The new number of threads.
 Returns **void**&#x20;
 ##### raw\_prompt
 Prompt the model with a given input and optional parameters.
 This is the raw output from model.
 Use the prompt function exported for a value
 ###### Parameters
 *   `q` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The prompt input.
 *   `params` **Partial<[LLModelPromptContext](#llmodelpromptcontext)>** Optional parameters for the prompt context.
 *   `callback` **function (res: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)): void**&#x20;
 Returns **void** The result of the model prompt.
 ##### embed
 Embed text with the model. Keep in mind that
 not all models can embed text, (only bert can embed as of 07/16/2023 (mm/dd/yyyy))
 Use the prompt function exported for a value
 ###### Parameters
 *   `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
 *   `q`  The prompt input.
 *   `params`  Optional parameters for the prompt context.
 Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The result of the model prompt.
 ##### isModelLoaded
 Whether the model is loaded or not.
 Returns **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)**&#x20;
 ##### setLibraryPath
 Where to search for the pluggable backend libraries
 ###### Parameters
 *   `s` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
 Returns **void**&#x20;
 ##### getLibraryPath
 Where to get the pluggable backend libraries
 Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
 #### loadModel
 Loads a machine learning model with the specified name. The defacto way to create a model.
 By default this will download a model from the official GPT4ALL website, if a model is not present at given path.
 ##### Parameters
 *   `modelName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The name of the model to load.
 *   `options` **(LoadModelOptions | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))?** (Optional) Additional options for loading the model.
 Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<(InferenceModel | EmbeddingModel)>** A promise that resolves to an instance of the loaded LLModel.
 #### createCompletion
 The nodejs equivalent to python binding's chat\_completion
 ##### Parameters
 *   `model` **InferenceModel** The language model object.
 *   `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>** The array of messages for the conversation.
 *   `options` **[CompletionOptions](#completionoptions)** The options for creating the completion.
 Returns **[CompletionReturn](#completionreturn)** The completion result.
 #### createEmbedding
 The nodejs moral equivalent to python binding's Embed4All().embed()
 meow
 ##### Parameters
 *   `model` **EmbeddingModel** The language model object.
 *   `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** text to embed
 Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The completion result.
 #### CompletionOptions
 **Extends Partial\<LLModelPromptContext>**
 The options for creating the completion.
 ##### verbose
 Indicates if verbose logging is enabled.
 Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
 ##### systemPromptTemplate
 Template for the system message. Will be put before the conversation with %1 being replaced by all system messages.
 Note that if this is not defined, system messages will not be included in the prompt.
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 ##### promptTemplate
 Template for user messages, with %1 being replaced by the message.
 Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
 ##### promptHeader
 The initial instruction for the model, on top of the prompt
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 ##### promptFooter
 The last instruction for the model, appended to the end of the prompt.
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 #### PromptMessage
 A message in the conversation, identical to OpenAI's chat message.
 ##### role
 The role of the message.
 Type: (`"system"` | `"assistant"` | `"user"`)
 ##### content
 The message content.
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 #### prompt\_tokens
 The number of tokens used in the prompt.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 #### completion\_tokens
 The number of tokens used in the completion.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 #### total\_tokens
 The total number of tokens used.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 #### CompletionReturn
 The result of the completion, similar to OpenAI's format.
 ##### model
 The model used for the completion.
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 ##### usage
 Token usage report.
 Type: {prompt\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number), completion\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number), total\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)}
 ##### choices
 The generated completions.
 Type: [Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[CompletionChoice](#completionchoice)>
 #### CompletionChoice
 A completion choice, similar to OpenAI's format.
 ##### message
 Response message
 Type: [PromptMessage](#promptmessage)
 #### LLModelPromptContext
 Model inference arguments for generating completions.
 ##### logitsSize
 The size of the raw logits vector.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### tokensSize
 The size of the raw tokens vector.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### nPast
 The number of tokens in the past conversation.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### nCtx
 The number of tokens possible in the context window.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### nPredict
 The number of tokens to predict.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### topK
 The top-k logits to sample from.
 Top-K sampling selects the next token only from the top K most likely tokens predicted by the model.
 It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit
 the diversity of the output. A higher value for top-K (eg., 100) will consider more tokens and lead
 to more diverse text, while a lower value (eg., 10) will focus on the most probable tokens and generate
 more conservative text. 30 - 60 is a good range for most tasks.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### topP
 The nucleus sampling probability threshold.
 Top-P limits the selection of the next token to a subset of tokens with a cumulative probability
 above a threshold P. This method, also known as nucleus sampling, finds a balance between diversity
 and quality by considering both token probabilities and the number of tokens available for sampling.
 When using a higher value for top-P (eg., 0.95), the generated text becomes more diverse.
 On the other hand, a lower value (eg., 0.1) produces more focused and conservative text.
 The default value is 0.4, which is aimed to be the middle ground between focus and diversity, but
 for more creative tasks a higher top-p value will be beneficial, about 0.5-0.9 is a good range for that.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### temp
 The temperature to adjust the model's output distribution.
 Temperature is like a knob that adjusts how creative or focused the output becomes. Higher temperatures
 (eg., 1.2) increase randomness, resulting in more imaginative and diverse text. Lower temperatures (eg., 0.5)
 make the output more focused, predictable, and conservative. When the temperature is set to 0, the output
 becomes completely deterministic, always selecting the most probable next token and producing identical results
 each time. A safe range would be around 0.6 - 0.85, but you are free to search what value fits best for you.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### nBatch
 The number of predictions to generate in parallel.
 By splitting the prompt every N tokens, prompt-batch-size reduces RAM usage during processing. However,
 this can increase the processing time as a trade-off. If the N value is set too low (e.g., 10), long prompts
 with 500+ tokens will be most affected, requiring numerous processing runs to complete the prompt processing.
 To ensure optimal performance, setting the prompt-batch-size to 2048 allows processing of all tokens in a single run.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### repeatPenalty
 The penalty factor for repeated tokens.
 Repeat-penalty can help penalize tokens based on how frequently they occur in the text, including the input prompt.
 A token that has already appeared five times is penalized more heavily than a token that has appeared only one time.
 A value of 1 means that there is no penalty and values larger than 1 discourage repeated tokens.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### repeatLastN
 The number of last tokens to penalize.
 The repeat-penalty-tokens N option controls the number of tokens in the history to consider for penalizing repetition.
 A larger value will look further back in the generated text to prevent repetitions, while a smaller value will only
 consider recent tokens.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 ##### contextErase
 The percentage of context to erase if the context window is exceeded.
 Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
 #### createTokenStream
 TODO: Help wanted to implement this
 ##### Parameters
 *   `llmodel` **[LLModel](#llmodel)**&#x20;
 *   `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>**&#x20;
 *   `options` **[CompletionOptions](#completionoptions)**&#x20;
 Returns **function (ll: [LLModel](#llmodel)): AsyncGenerator<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>**&#x20;
 #### DEFAULT\_DIRECTORY
 From python api:
 models will be stored in (homedir)/.cache/gpt4all/\`
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 #### DEFAULT\_LIBRARIES\_DIRECTORY
 From python api:
 The default path for dynamic libraries to be stored.
 You may separate paths by a semicolon to search in multiple areas.
 This searches DEFAULT\_DIRECTORY/libraries, cwd/libraries, and finally cwd.
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 #### DEFAULT\_MODEL\_CONFIG
 Default model configuration.
 Type: ModelConfig
 #### DEFAULT\_PROMT\_CONTEXT
 Default prompt context.
 Type: [LLModelPromptContext](#llmodelpromptcontext)
 #### DEFAULT\_MODEL\_LIST\_URL
 Default model list url.
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 #### downloadModel
 Initiates the download of a model file.
 By default this downloads without waiting. use the controller returned to alter this behavior.
 ##### Parameters
 *   `modelName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The model to be downloaded.
 *   `options` **DownloadOptions** to pass into the downloader. Default is { location: (cwd), verbose: false }.
 ##### Examples
 ```javascript
 const download = downloadModel('ggml-gpt4all-j-v1.3-groovy.bin')
 download.promise.then(() => console.log('Downloaded!'))
 ```
 *   Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model already exists in the specified location.
 *   Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model cannot be found at the specified url.
 Returns **[DownloadController](#downloadcontroller)** object that allows controlling the download process.
 #### DownloadModelOptions
 Options for the model download process.
 ##### modelPath
 location to download the model.
 Default is process.cwd(), or the current working directory
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 ##### verbose
 Debug mode -- check how long it took to download in seconds
 Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
 ##### url
 Remote download url. Defaults to `https://gpt4all.io/models/gguf/<modelName>`
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 ##### md5sum
 MD5 sum of the model file. If this is provided, the downloaded file will be checked against this sum.
 If the sums do not match, an error will be thrown and the file will be deleted.
 Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
 #### DownloadController
 Model download controller.
 ##### cancel
 Cancel the request to download if this is called.
 Type: function (): void
 ##### promise
 A promise resolving to the downloaded models config once the download is done
 Type: [Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)\<ModelConfig>
--- a/gpt4all-bindings/typescript/index.cc
+++ b/gpt4all-bindings/typescript/index.cc
@ -81,7 +81,7 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
  Napi::Value NodeModelWrapper::InitGpuByString(const Napi::CallbackInfo& info) 
  {
    auto env = info.Env();
-    size_t memory_required = static_cast<size_t>(info[0].As<Napi::Number>().Uint32Value());
+    uint32_t memory_required = info[0].As<Napi::Number>();
    std::string gpu_device_identifier = info[1].As<Napi::String>();   
@ -149,14 +149,16 @@ Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info)
    }
    if(device != "cpu") {
        size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str());
        if(mem == 0) {
            std::cout << "WARNING: no memory needed. does this model support gpu?\n";
        }
        std::cout << "Initiating GPU\n";
        std::cout << "Memory required estimation: " << mem << "\n";
        auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem, device.c_str());
        if(success) {
            std::cout << "GPU init successfully\n";
        } else {
            //https://github.com/nomic-ai/gpt4all/blob/3acbef14b7c2436fe033cae9036e695d77461a16/gpt4all-bindings/python/gpt4all/pyllmodel.py#L215
            //Haven't implemented this but it is still open to contribution
            std::cout << "WARNING: Failed to init GPU\n";
        }
    }
--- a/gpt4all-bindings/typescript/package.json
+++ b/gpt4all-bindings/typescript/package.json
@ -1,6 +1,6 @@
 {
  "name": "gpt4all",
-  "version": "3.1.0",
+  "version": "3.0.0",
  "packageManager": "yarn@3.6.1",
  "main": "src/gpt4all.js",
  "repository": "nomic-ai/gpt4all",
@ -9,7 +9,9 @@
    "test": "jest",
    "build:backend": "node scripts/build.js",
    "build": "node-gyp-build",
-    "docs:build": "node scripts/docs.js && documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_nodejs.md"
+    "predocs:build": "node scripts/docs.js",
    "docs:build": "documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file ../python/docs/gpt4all_typescript.md",
    "postdocs:build": "documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section \"API Reference\" --readme-file README.md"
  },
  "files": [
    "src/**/*",
--- a/gpt4all-bindings/typescript/scripts/docs.js
+++ b/gpt4all-bindings/typescript/scripts/docs.js
@ -2,11 +2,7 @@
 const fs = require('fs');
-const newPath = '../python/docs/gpt4all_nodejs.md';
+const newPath = '../python/docs/gpt4all_typescript.md';
-const filepath = './README.md';
+const filepath = 'README.md';
-const intro = fs.readFileSync(filepath);
+const data = fs.readFileSync(filepath);
-
+fs.writeFileSync(newPath, data);
 fs.writeFileSync(
    newPath, intro
 );
--- a/gpt4all-bindings/typescript/src/config.js
+++ b/gpt4all-bindings/typescript/src/config.js
@ -9,13 +9,7 @@ const librarySearchPaths = [
    path.resolve(
        __dirname,
        "..",
-        `runtimes/${process.platform}-${process.arch}/native`,
+        `runtimes/${process.platform}-${process.arch}/native`
    ),
    //for darwin. This is hardcoded for now but it should work
    path.resolve(
        __dirname,
        "..",
        `runtimes/${process.platform}/native`,
    ),
    process.cwd(),
 ];
--- a/gpt4all-bindings/typescript/src/gpt4all.d.ts
+++ b/gpt4all-bindings/typescript/src/gpt4all.d.ts
@ -1,12 +1,13 @@
 /// <reference types="node" />
 declare module "gpt4all";
 /** Type of the model */
 type ModelType = "gptj" | "llama" | "mpt" | "replit";
 // NOTE: "deprecated" tag in below comment breaks the doc generator https://github.com/documentationjs/documentation/issues/1596
 /**
 * Full list of models available
- * DEPRECATED!! These model names are outdated and this type will not be maintained, please use a string literal instead
+ * @deprecated These model names are outdated and this type will not be maintained, please use a string literal instead
 */
 interface ModelFile {
    /** List of GPT-J Models */
@ -33,6 +34,7 @@ interface ModelFile {
    replit: "ggml-replit-code-v1-3b.bin";
 }
 //mirrors py options
 interface LLModelOptions {
    /**
     * Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
@ -49,11 +51,7 @@ interface ModelConfig {
    path: string;
    url?: string;
 }
-/**
+
 *
 * InferenceModel represents an LLM which can make chat predictions, similar to GPT transformers.
 *
 */
 declare class InferenceModel {
    constructor(llm: LLModel, config: ModelConfig);
    llm: LLModel;
@ -70,9 +68,6 @@ declare class InferenceModel {
    dispose(): void
 }
 /**
 * EmbeddingModel represents an LLM which can create embeddings, which are float arrays
 */
 declare class EmbeddingModel {
    constructor(llm: LLModel, config: ModelConfig);
    llm: LLModel;
@ -176,7 +171,6 @@ declare class LLModel {
    hasGpuDevice(): boolean
    /**
      * GPUs that are usable for this LLModel
      * @throws if hasGpuDevice returns false (i think)
      * @returns 
      */
    listGpu() : GpuDevice[]
@ -200,9 +194,6 @@ interface GpuDevice {
    vendor: string;
 }
 /**
  * Options that configure a model's behavior.
  */
 interface LoadModelOptions {
    modelPath?: string;
    librariesPath?: string;
--- a/gpt4all-bindings/typescript/src/gpt4all.js
+++ b/gpt4all-bindings/typescript/src/gpt4all.js
@ -18,7 +18,6 @@ const {
    DEFAULT_MODEL_LIST_URL,
 } = require("./config.js");
 const { InferenceModel, EmbeddingModel } = require("./models.js");
 const assert = require("assert");
 /**
 * Loads a machine learning model with the specified name. The defacto way to create a model.
@ -46,17 +45,23 @@ async function loadModel(modelName, options = {}) {
        verbose: loadOptions.verbose,
    });
-    assert.ok(typeof loadOptions.librariesPath === 'string');
+    const libSearchPaths = loadOptions.librariesPath.split(";");
    const existingPaths = loadOptions.librariesPath
        .split(";")
        .filter(existsSync)
        .join(';');
    console.log("Passing these paths into runtime library search:", existingPaths)
    let libPath = null;
    for (const searchPath of libSearchPaths) {
        if (existsSync(searchPath)) {
            libPath = searchPath;
            break;
        }
    }
    if (!libPath) {
        throw Error("Could not find a valid path from " + libSearchPaths);
    }
    const llmOptions = {
        model_name: appendBinSuffixIfMissing(modelName),
        model_path: loadOptions.modelPath,
-        library_path: existingPaths,
+        library_path: libPath,
        device: loadOptions.device,
    };
--- a/gpt4all-bindings/typescript/test/gpt4all.test.js
+++ b/gpt4all-bindings/typescript/test/gpt4all.test.js
@ -35,11 +35,6 @@ describe("config", () => {
                "..",
                `runtimes/${process.platform}-${process.arch}/native`
            ),
            path.resolve(
                __dirname,
                "..",
                `runtimes/${process.platform}/native`,
            ),
            process.cwd(),
        ];
        expect(typeof DEFAULT_LIBRARIES_DIRECTORY).toBe("string");