Compare commits

..

No commits in common. "c5d346ce98a0ff22328fc3c41131155206a5825e" and "f3f25a99288dbd9f01c907b31e4b2d386f0d5a2b" have entirely different histories.

10 changed files with 21 additions and 82 deletions

View File

@ -644,10 +644,7 @@ jobs:
app-dir: gpt4all-bindings/typescript
pkg-manager: yarn
override-ci-command: yarn install
- run:
command: |
cd gpt4all-bindings/typescript
yarn run test
- run: cd gpt4all-bindings/typescript
- run:
command: |
cd gpt4all-bindings/typescript
@ -776,10 +773,11 @@ workflows:
branches:
only:
requires:
- node/test
- npm-hold
- build-bindings-backend-linux
- build-bindings-backend-windows-msvc
- build-bindings-backend-macos
# - build-bindings-backend-linux
# - build-bindings-backend-windows-msvc
# - build-bindings-backend-macos
# CSharp Jobs
- build-csharp-linux:
filters:

View File

@ -53,8 +53,7 @@
'-fno-rtti',
],
'cflags_cc': [
'-std=c++2a',
'-fexceptions'
'-std=c++2a'
]
}]
]

View File

@ -97,20 +97,10 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
std::string text = info[0].As<Napi::String>().Utf8Value();
size_t embedding_size = 0;
float* arr = llmodel_embedding(GetInference(), text.c_str(), &embedding_size);
if(arr == nullptr) {
Napi::Error::New(
env,
"Cannot embed. native embedder returned 'nullptr'"
).ThrowAsJavaScriptException();
return env.Undefined();
}
if(embedding_size == 0 && text.size() != 0 ) {
std::cout << "Warning: embedding length 0 but input text length > 0" << std::endl;
}
Napi::Float32Array js_array = Napi::Float32Array::New(env, embedding_size);
auto arr_size = sizeof(arr) / sizeof(float);
Napi::Float32Array js_array = Napi::Float32Array::New(info.Env(), arr_size);
for (size_t i = 0; i < embedding_size; ++i) {
for (size_t i = 0; i < arr_size; ++i) {
float element = *(arr + i);
js_array[i] = element;
}
@ -230,7 +220,7 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
}
llmodel_model NodeModelWrapper::GetInference() {
return *inference_;
return *inference_.load();
}
//Exports Bindings

View File

@ -37,7 +37,7 @@ private:
/**
* The underlying inference that interfaces with the C interface
*/
std::shared_ptr<llmodel_model> inference_;
std::atomic<std::shared_ptr<llmodel_model>> inference_;
std::string type;
// corresponds to LLModel::name() in typescript

View File

@ -30,7 +30,7 @@ void threadEntry(TsfnContext* context) {
context->tsfn.BlockingCall(&context->pc,
[](Napi::Env env, Napi::Function jsCallback, PromptWorkContext* pc) {
llmodel_prompt(
*pc->inference_,
*pc->inference_.load(),
pc->question.c_str(),
&prompt_callback,
&response_callback,
@ -55,6 +55,7 @@ void FinalizerCallback(Napi::Env env,
// Resolve the Promise previously returned to JS
context->deferred_.Resolve(Napi::String::New(env, context->pc.res));
// Wait for the thread to finish executing before proceeding.
std::thread::id this_id = std::this_thread::get_id();
context->nativeThread.join();
delete context;
}

View File

@ -10,7 +10,7 @@
#include <memory>
struct PromptWorkContext {
std::string question;
std::shared_ptr<llmodel_model>& inference_;
std::atomic<std::shared_ptr<llmodel_model>>& inference_;
llmodel_prompt_context prompt_params;
std::string res;

View File

@ -1,7 +1,7 @@
import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel, createEmbedding } from '../src/gpt4all.js'
import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel } from '../src/gpt4all.js'
const ll = await loadModel(
'orca-mini-3b.ggmlv3.q4_0.bin',
'ggml-gpt4all-j-v1.3-groovy.bin',
{ verbose: true }
);
@ -37,32 +37,4 @@ const completion2 = await createCompletion(ll, [
{ role : 'user', content: 'What is two plus two?' },
], { verbose: true })
console.log(completion2.choices[0].message)
const embedder = await loadModel("ggml-all-MiniLM-L6-v2-f16.bin", { verbose: true })
console.log(createEmbedding(embedder, "sdfdsfds"))
// At the moment, from testing this code, concurrent model prompting is not possible.
// Behavior: The last prompt gets answered, but the rest are cancelled
// my experience with threading is not the best, so if anyone who is good is willing to give this a shot,
// maybe this is possible
// afaik threading with llama.cpp is not the best, so this will be left here as reference
//const responses = await Promise.all([
// createCompletion(ll, [
// { role : 'system', content: 'You are an advanced mathematician.' },
// { role : 'user', content: 'What is 1 + 1?' },
// ], { verbose: true }),
// createCompletion(ll, [
// { role : 'system', content: 'You are an advanced mathematician.' },
// { role : 'user', content: 'What is 1 + 1?' },
// ], { verbose: true }),
//
//createCompletion(ll, [
// { role : 'system', content: 'You are an advanced mathematician.' },
// { role : 'user', content: 'What is 1 + 1?' },
//], { verbose: true })
//
//])
//console.log(responses.map(s => s.choices[0].message))
console.log(completion2.choices[0].message)

View File

@ -84,7 +84,7 @@ declare class LLModel {
/**
* Prompt the model with a given input and optional parameters.
* This is the raw output from model.
* This is the raw output from std out.
* Use the prompt function exported for a value
* @param q The prompt input.
* @param params Optional parameters for the prompt context.
@ -92,15 +92,6 @@ declare class LLModel {
*/
raw_prompt(q: string, params: Partial<LLModelPromptContext>, callback: (res: string) => void): void; // TODO work on return type
/**
* Embed text with the model. Keep in mind that
* not all models can embed text, (only bert can embed as of 07/16/2023 (mm/dd/yyyy))
* Use the prompt function exported for a value
* @param q The prompt input.
* @param params Optional parameters for the prompt context.
* @returns The result of the model prompt.
*/
embed(text: string) : Float32Array
/**
* Whether the model is loaded or not.
*/
@ -161,11 +152,6 @@ declare function createCompletion(
options?: CompletionOptions
): Promise<CompletionReturn>;
declare function createEmbedding(
llmodel: LLModel,
text: string,
): Float32Array
/**
* The options for creating the completion.
*/
@ -397,7 +383,6 @@ export {
LoadModelOptions,
loadModel,
createCompletion,
createEmbedding,
createTokenStream,
DEFAULT_DIRECTORY,
DEFAULT_LIBRARIES_DIRECTORY,

View File

@ -85,10 +85,6 @@ function createPrompt(messages, hasDefaultHeader, hasDefaultFooter) {
return fullPrompt.join('\n');
}
function createEmbedding(llmodel, text) {
return llmodel.embed(text)
}
async function createCompletion(
llmodel,
messages,
@ -137,7 +133,6 @@ module.exports = {
DEFAULT_DIRECTORY,
LLModel,
createCompletion,
createEmbedding,
downloadModel,
retrieveModel,
loadModel,

View File

@ -1,5 +1,4 @@
const { createWriteStream, existsSync, statSync } = require("node:fs");
const fsp = require('node:fs/promises')
const { createWriteStream, existsSync, unlink } = require("node:fs");
const { performance } = require("node:perf_hooks");
const path = require("node:path");
const {mkdirp} = require("mkdirp");
@ -123,7 +122,7 @@ function downloadModel(modelName, options = {}) {
if (options.md5sum) {
const fileHash = await md5File(partialModelPath);
if (fileHash !== options.md5sum) {
await fsp.unlink(partialModelPath);
await fs.unlink(partialModelPath);
return reject(
Error(`Model "${modelName}" failed verification: Hashes mismatch`)
);
@ -133,7 +132,7 @@ function downloadModel(modelName, options = {}) {
}
}
await fsp.rename(partialModelPath, finalModelPath);
await fs.rename(partialModelPath, finalModelPath);
resolve(finalModelPath);
})
.catch(reject);