Compare commits

..

8 Commits

Author SHA1 Message Date
Jacob Nguyen
c5d346ce98 fix circle ci 2023-07-16 22:51:10 -05:00
Jacob Nguyen
9ffa401c66 basic embeddings and yarn test" 2023-07-16 22:43:46 -05:00
Jacob Nguyen
be219e1ce9 readd build backend 2023-07-16 18:18:18 -05:00
Jacob Nguyen
71115a344b fix warnings, safer way to calculate arrsize 2023-07-16 18:13:54 -05:00
Jacob Nguyen
5425688e63 remove cpp 20 standard 2023-07-16 17:50:26 -05:00
Jacob Nguyen
8b6cc45043 fix fs not found 2023-07-16 10:34:31 -05:00
Jacob Nguyen
475076fb1d
Update binding.gyp
Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>
2023-07-16 09:37:19 -05:00
Jacob Nguyen
b6c2322c31
Update continue_config.yml
Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>
2023-07-15 23:04:56 -05:00
10 changed files with 82 additions and 21 deletions

View File

@ -644,7 +644,10 @@ jobs:
app-dir: gpt4all-bindings/typescript
pkg-manager: yarn
override-ci-command: yarn install
- run: cd gpt4all-bindings/typescript
- run:
command: |
cd gpt4all-bindings/typescript
yarn run test
- run:
command: |
cd gpt4all-bindings/typescript
@ -773,11 +776,10 @@ workflows:
branches:
only:
requires:
- node/test
- npm-hold
# - build-bindings-backend-linux
# - build-bindings-backend-windows-msvc
# - build-bindings-backend-macos
- build-bindings-backend-linux
- build-bindings-backend-windows-msvc
- build-bindings-backend-macos
# CSharp Jobs
- build-csharp-linux:
filters:

View File

@ -53,7 +53,8 @@
'-fno-rtti',
],
'cflags_cc': [
'-std=c++2a'
'-std=c++2a',
'-fexceptions'
]
}]
]

View File

@ -97,10 +97,20 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
std::string text = info[0].As<Napi::String>().Utf8Value();
size_t embedding_size = 0;
float* arr = llmodel_embedding(GetInference(), text.c_str(), &embedding_size);
auto arr_size = sizeof(arr) / sizeof(float);
Napi::Float32Array js_array = Napi::Float32Array::New(info.Env(), arr_size);
if(arr == nullptr) {
Napi::Error::New(
env,
"Cannot embed. native embedder returned 'nullptr'"
).ThrowAsJavaScriptException();
return env.Undefined();
}
for (size_t i = 0; i < arr_size; ++i) {
if(embedding_size == 0 && text.size() != 0 ) {
std::cout << "Warning: embedding length 0 but input text length > 0" << std::endl;
}
Napi::Float32Array js_array = Napi::Float32Array::New(env, embedding_size);
for (size_t i = 0; i < embedding_size; ++i) {
float element = *(arr + i);
js_array[i] = element;
}
@ -220,7 +230,7 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
}
llmodel_model NodeModelWrapper::GetInference() {
return *inference_.load();
return *inference_;
}
//Exports Bindings

View File

@ -37,7 +37,7 @@ private:
/**
* The underlying inference that interfaces with the C interface
*/
std::atomic<std::shared_ptr<llmodel_model>> inference_;
std::shared_ptr<llmodel_model> inference_;
std::string type;
// corresponds to LLModel::name() in typescript

View File

@ -30,7 +30,7 @@ void threadEntry(TsfnContext* context) {
context->tsfn.BlockingCall(&context->pc,
[](Napi::Env env, Napi::Function jsCallback, PromptWorkContext* pc) {
llmodel_prompt(
*pc->inference_.load(),
*pc->inference_,
pc->question.c_str(),
&prompt_callback,
&response_callback,
@ -55,7 +55,6 @@ void FinalizerCallback(Napi::Env env,
// Resolve the Promise previously returned to JS
context->deferred_.Resolve(Napi::String::New(env, context->pc.res));
// Wait for the thread to finish executing before proceeding.
std::thread::id this_id = std::this_thread::get_id();
context->nativeThread.join();
delete context;
}

View File

@ -10,7 +10,7 @@
#include <memory>
struct PromptWorkContext {
std::string question;
std::atomic<std::shared_ptr<llmodel_model>>& inference_;
std::shared_ptr<llmodel_model>& inference_;
llmodel_prompt_context prompt_params;
std::string res;

View File

@ -1,7 +1,7 @@
import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel } from '../src/gpt4all.js'
import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel, createEmbedding } from '../src/gpt4all.js'
const ll = await loadModel(
'ggml-gpt4all-j-v1.3-groovy.bin',
'orca-mini-3b.ggmlv3.q4_0.bin',
{ verbose: true }
);
@ -37,4 +37,32 @@ const completion2 = await createCompletion(ll, [
{ role : 'user', content: 'What is two plus two?' },
], { verbose: true })
console.log(completion2.choices[0].message)
console.log(completion2.choices[0].message)
const embedder = await loadModel("ggml-all-MiniLM-L6-v2-f16.bin", { verbose: true })
console.log(createEmbedding(embedder, "sdfdsfds"))
// At the moment, from testing this code, concurrent model prompting is not possible.
// Behavior: The last prompt gets answered, but the rest are cancelled
// my experience with threading is not the best, so if anyone who is good is willing to give this a shot,
// maybe this is possible
// afaik threading with llama.cpp is not the best, so this will be left here as reference
//const responses = await Promise.all([
// createCompletion(ll, [
// { role : 'system', content: 'You are an advanced mathematician.' },
// { role : 'user', content: 'What is 1 + 1?' },
// ], { verbose: true }),
// createCompletion(ll, [
// { role : 'system', content: 'You are an advanced mathematician.' },
// { role : 'user', content: 'What is 1 + 1?' },
// ], { verbose: true }),
//
//createCompletion(ll, [
// { role : 'system', content: 'You are an advanced mathematician.' },
// { role : 'user', content: 'What is 1 + 1?' },
//], { verbose: true })
//
//])
//console.log(responses.map(s => s.choices[0].message))

View File

@ -84,7 +84,7 @@ declare class LLModel {
/**
* Prompt the model with a given input and optional parameters.
* This is the raw output from std out.
* This is the raw output from model.
* Use the prompt function exported for a value
* @param q The prompt input.
* @param params Optional parameters for the prompt context.
@ -92,6 +92,15 @@ declare class LLModel {
*/
raw_prompt(q: string, params: Partial<LLModelPromptContext>, callback: (res: string) => void): void; // TODO work on return type
/**
* Embed text with the model. Keep in mind that
* not all models can embed text, (only bert can embed as of 07/16/2023 (mm/dd/yyyy))
* Use the prompt function exported for a value
* @param q The prompt input.
* @param params Optional parameters for the prompt context.
* @returns The result of the model prompt.
*/
embed(text: string) : Float32Array
/**
* Whether the model is loaded or not.
*/
@ -152,6 +161,11 @@ declare function createCompletion(
options?: CompletionOptions
): Promise<CompletionReturn>;
declare function createEmbedding(
llmodel: LLModel,
text: string,
): Float32Array
/**
* The options for creating the completion.
*/
@ -383,6 +397,7 @@ export {
LoadModelOptions,
loadModel,
createCompletion,
createEmbedding,
createTokenStream,
DEFAULT_DIRECTORY,
DEFAULT_LIBRARIES_DIRECTORY,

View File

@ -85,6 +85,10 @@ function createPrompt(messages, hasDefaultHeader, hasDefaultFooter) {
return fullPrompt.join('\n');
}
function createEmbedding(llmodel, text) {
return llmodel.embed(text)
}
async function createCompletion(
llmodel,
messages,
@ -133,6 +137,7 @@ module.exports = {
DEFAULT_DIRECTORY,
LLModel,
createCompletion,
createEmbedding,
downloadModel,
retrieveModel,
loadModel,

View File

@ -1,4 +1,5 @@
const { createWriteStream, existsSync, unlink } = require("node:fs");
const { createWriteStream, existsSync, statSync } = require("node:fs");
const fsp = require('node:fs/promises')
const { performance } = require("node:perf_hooks");
const path = require("node:path");
const {mkdirp} = require("mkdirp");
@ -122,7 +123,7 @@ function downloadModel(modelName, options = {}) {
if (options.md5sum) {
const fileHash = await md5File(partialModelPath);
if (fileHash !== options.md5sum) {
await fs.unlink(partialModelPath);
await fsp.unlink(partialModelPath);
return reject(
Error(`Model "${modelName}" failed verification: Hashes mismatch`)
);
@ -132,7 +133,7 @@ function downloadModel(modelName, options = {}) {
}
}
await fs.rename(partialModelPath, finalModelPath);
await fsp.rename(partialModelPath, finalModelPath);
resolve(finalModelPath);
})
.catch(reject);