2025-08-16 00:04:55 -04:00
10 changed files with 21 additions and 82 deletions
--- a/.circleci/continue_config.yml
+++ b/.circleci/continue_config.yml
@ -644,10 +644,7 @@ jobs:
          app-dir: gpt4all-bindings/typescript
          pkg-manager: yarn
          override-ci-command: yarn install
-      - run: 
-          command: | 
-            cd gpt4all-bindings/typescript
-            yarn run test
+      - run: cd gpt4all-bindings/typescript
      - run: 
          command: |
            cd gpt4all-bindings/typescript
@ -776,10 +773,11 @@ workflows:
            branches:
              only:
          requires:
+            - node/test
            - npm-hold
-            - build-bindings-backend-linux
-            - build-bindings-backend-windows-msvc
-            - build-bindings-backend-macos
+#            - build-bindings-backend-linux
+#            - build-bindings-backend-windows-msvc
+#            - build-bindings-backend-macos
      # CSharp Jobs
      - build-csharp-linux:
          filters:
--- a/gpt4all-bindings/typescript/binding.gyp
+++ b/gpt4all-bindings/typescript/binding.gyp
@ -53,8 +53,7 @@
                '-fno-rtti',
            ],
            'cflags_cc': [
-                '-std=c++2a',
-                '-fexceptions'
+                '-std=c++2a'
            ]
        }]
      ]
--- a/gpt4all-bindings/typescript/index.cc
+++ b/gpt4all-bindings/typescript/index.cc
@ -97,20 +97,10 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
    std::string text = info[0].As<Napi::String>().Utf8Value();
    size_t embedding_size = 0;
    float* arr = llmodel_embedding(GetInference(), text.c_str(), &embedding_size);
-    if(arr == nullptr) {
-        Napi::Error::New(
-            env, 
-            "Cannot embed. native embedder returned 'nullptr'"
-        ).ThrowAsJavaScriptException(); 
-        return env.Undefined();
-    }
-
-    if(embedding_size == 0 && text.size() != 0 ) {
-        std::cout << "Warning: embedding length 0 but input text length > 0" << std::endl;
-    }
-    Napi::Float32Array js_array = Napi::Float32Array::New(env, embedding_size);
+    auto arr_size = sizeof(arr) / sizeof(float);
+    Napi::Float32Array js_array = Napi::Float32Array::New(info.Env(), arr_size);
    
-    for (size_t i = 0; i < embedding_size; ++i) {
+    for (size_t i = 0; i < arr_size; ++i) {
        float element = *(arr + i);
        js_array[i] = element;
    }
@ -230,7 +220,7 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
  }

  llmodel_model NodeModelWrapper::GetInference() {
-    return *inference_;
+    return *inference_.load();
  }

 //Exports Bindings
--- a/gpt4all-bindings/typescript/index.h
+++ b/gpt4all-bindings/typescript/index.h
@ -37,7 +37,7 @@ private:
  /**
   * The underlying inference that interfaces with the C interface
   */
-  std::shared_ptr<llmodel_model> inference_;
+  std::atomic<std::shared_ptr<llmodel_model>> inference_;

  std::string type;
  // corresponds to LLModel::name() in typescript
--- a/gpt4all-bindings/typescript/prompt.cc
+++ b/gpt4all-bindings/typescript/prompt.cc
@ -30,7 +30,7 @@ void threadEntry(TsfnContext* context) {
    context->tsfn.BlockingCall(&context->pc,
    [](Napi::Env env, Napi::Function jsCallback, PromptWorkContext* pc) {
        llmodel_prompt(
-            *pc->inference_,
+            *pc->inference_.load(),
            pc->question.c_str(),
            &prompt_callback,
            &response_callback,
@ -55,6 +55,7 @@ void FinalizerCallback(Napi::Env env,
  // Resolve the Promise previously returned to JS 
    context->deferred_.Resolve(Napi::String::New(env, context->pc.res));
    // Wait for the thread to finish executing before proceeding.
+    std::thread::id this_id = std::this_thread::get_id();
    context->nativeThread.join();
    delete context;
 }
--- a/gpt4all-bindings/typescript/prompt.h
+++ b/gpt4all-bindings/typescript/prompt.h
@ -10,7 +10,7 @@
 #include <memory>
 struct PromptWorkContext {
    std::string question;
-    std::shared_ptr<llmodel_model>& inference_;
+    std::atomic<std::shared_ptr<llmodel_model>>& inference_;
    llmodel_prompt_context prompt_params;
    std::string res;

--- a/gpt4all-bindings/typescript/spec/index.mjs
+++ b/gpt4all-bindings/typescript/spec/index.mjs
@ -1,7 +1,7 @@
-import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel, createEmbedding } from '../src/gpt4all.js'
+import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel } from '../src/gpt4all.js'

 const ll = await loadModel(
-    'orca-mini-3b.ggmlv3.q4_0.bin',
+    'ggml-gpt4all-j-v1.3-groovy.bin',
    { verbose: true }
 );

@ -37,32 +37,4 @@ const completion2 = await createCompletion(ll, [
    { role : 'user', content: 'What is two plus two?'  }, 
 ], {  verbose: true })

-console.log(completion2.choices[0].message)
-
-const embedder = await loadModel("ggml-all-MiniLM-L6-v2-f16.bin", { verbose: true })
-
-console.log(createEmbedding(embedder, "sdfdsfds"))
-// At the moment, from testing this code, concurrent model prompting is not possible. 
-// Behavior: The last prompt gets answered, but the rest are cancelled
-// my experience with threading is not the best, so if anyone who is good is willing to give this a shot,
-// maybe this is possible
-// afaik threading with llama.cpp is not the best, so this will be left here as reference
-
-//const responses = await Promise.all([
-//    createCompletion(ll, [ 
-//    { role : 'system', content: 'You are an advanced mathematician.'  },
-//    { role : 'user', content: 'What is 1 + 1?'  }, 
-//    ], { verbose: true }),
-//    createCompletion(ll, [ 
-//    { role : 'system', content: 'You are an advanced mathematician.'  },
-//    { role : 'user', content: 'What is 1 + 1?'  }, 
-//    ], { verbose: true }),
-//
-//createCompletion(ll, [ 
-//    { role : 'system', content: 'You are an advanced mathematician.'  },
-//    { role : 'user', content: 'What is 1 + 1?'  }, 
-//], { verbose: true })
-//
-//])
-//console.log(responses.map(s => s.choices[0].message))
-
+ console.log(completion2.choices[0].message)
--- a/gpt4all-bindings/typescript/src/gpt4all.d.ts
+++ b/gpt4all-bindings/typescript/src/gpt4all.d.ts
@ -84,7 +84,7 @@ declare class LLModel {

    /**
     * Prompt the model with a given input and optional parameters.
-     * This is the raw output from model.
+     * This is the raw output from std out.
     * Use the prompt function exported for a value
     * @param q The prompt input.
     * @param params Optional parameters for the prompt context.
@ -92,15 +92,6 @@ declare class LLModel {
     */
    raw_prompt(q: string, params: Partial<LLModelPromptContext>, callback: (res: string) => void): void; // TODO work on return type

-    /**
-     * Embed text with the model. Keep in mind that 
-     * not all models can embed text, (only bert can embed as of 07/16/2023 (mm/dd/yyyy))
-     * Use the prompt function exported for a value
-     * @param q The prompt input.
-     * @param params Optional parameters for the prompt context.
-     * @returns The result of the model prompt.
-     */
-    embed(text: string) : Float32Array
    /**
     * Whether the model is loaded or not.
     */
@ -161,11 +152,6 @@ declare function createCompletion(
    options?: CompletionOptions
 ): Promise<CompletionReturn>;

-declare function createEmbedding(
-    llmodel: LLModel,
-    text: string,
-): Float32Array
-
 /**
 * The options for creating the completion.
 */
@ -397,7 +383,6 @@ export {
    LoadModelOptions,
    loadModel,
    createCompletion,
-    createEmbedding,
    createTokenStream,
    DEFAULT_DIRECTORY,
    DEFAULT_LIBRARIES_DIRECTORY,
--- a/gpt4all-bindings/typescript/src/gpt4all.js
+++ b/gpt4all-bindings/typescript/src/gpt4all.js
@ -85,10 +85,6 @@ function createPrompt(messages, hasDefaultHeader, hasDefaultFooter) {
    return fullPrompt.join('\n');
 }

-
-function createEmbedding(llmodel, text) {
-    return llmodel.embed(text)
-}
 async function createCompletion(
    llmodel,
    messages,
@ -137,7 +133,6 @@ module.exports = {
    DEFAULT_DIRECTORY,
    LLModel,
    createCompletion,
-    createEmbedding,
    downloadModel,
    retrieveModel,
    loadModel,
--- a/gpt4all-bindings/typescript/src/util.js
+++ b/gpt4all-bindings/typescript/src/util.js
@ -1,5 +1,4 @@
-const { createWriteStream, existsSync, statSync } = require("node:fs");
-const fsp = require('node:fs/promises')
+const { createWriteStream, existsSync, unlink } = require("node:fs");
 const { performance } = require("node:perf_hooks");
 const path = require("node:path");
 const {mkdirp} = require("mkdirp");
@ -123,7 +122,7 @@ function downloadModel(modelName, options = {}) {
                if (options.md5sum) {
                    const fileHash = await md5File(partialModelPath);
                    if (fileHash !== options.md5sum) {
-                        await fsp.unlink(partialModelPath);
+                        await fs.unlink(partialModelPath);
                        return reject(
                            Error(`Model "${modelName}" failed verification: Hashes mismatch`)
                        );
@ -133,7 +132,7 @@ function downloadModel(modelName, options = {}) {
                    }
                }

-                await fsp.rename(partialModelPath, finalModelPath);
+                await fs.rename(partialModelPath, finalModelPath);
                resolve(finalModelPath);
            })
            .catch(reject);