Compare commits

..

No commits in common. "f3f25a99288dbd9f01c907b31e4b2d386f0d5a2b" and "ceef5d092200882c2e5f4f98d82f588718117cd9" have entirely different histories.

57 changed files with 4339 additions and 7910 deletions

View File

@ -463,47 +463,50 @@ jobs:
docker: docker:
- image: mcr.microsoft.com/dotnet/sdk:7.0-jammy # Ubuntu 22.04 - image: mcr.microsoft.com/dotnet/sdk:7.0-jammy # Ubuntu 22.04
steps: steps:
- checkout - when:
- attach_workspace: condition: << pipeline.parameters.run-csharp-workflow >>
at: /tmp/workspace steps:
- run: - checkout
name: "Prepare Native Libs" - attach_workspace:
command: | at: /tmp/workspace
cd gpt4all-bindings/csharp - run:
mkdir -p runtimes/linux-x64/native name: "Prepare Native Libs"
cp /tmp/workspace/runtimes/linux-x64/*.so runtimes/linux-x64/native/ command: |
ls -R runtimes cd gpt4all-bindings/csharp
- restore_cache: mkdir -p runtimes/linux-x64/native
keys: cp /tmp/workspace/runtimes/linux-x64/*.so runtimes/linux-x64/native/
- gpt4all-csharp-nuget-packages-nix ls -R runtimes
- run: - restore_cache:
name: "Install project dependencies" keys:
command: | - gpt4all-csharp-nuget-packages-nix
cd gpt4all-bindings/csharp - run:
dotnet restore Gpt4All name: "Install project dependencies"
- save_cache: command: |
paths: cd gpt4all-bindings/csharp
- ~/.nuget/packages dotnet restore Gpt4All
key: gpt4all-csharp-nuget-packages-nix - save_cache:
- run: paths:
name: Build C# Project - ~/.nuget/packages
command: | key: gpt4all-csharp-nuget-packages-nix
cd gpt4all-bindings/csharp - run:
dotnet build Gpt4All --configuration Release --nologo name: Build C# Project
- run: command: |
name: "Run C# Tests" cd gpt4all-bindings/csharp
command: | dotnet build Gpt4All --configuration Release --nologo
cd gpt4all-bindings/csharp - run:
dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx" name: "Run C# Tests"
- run: command: |
name: Test results cd gpt4all-bindings/csharp
command: | dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
cd gpt4all-bindings/csharp/Gpt4All.Tests - run:
dotnet tool install -g trx2junit name: Test results
export PATH="$PATH:$HOME/.dotnet/tools" command: |
trx2junit TestResults/*.trx cd gpt4all-bindings/csharp/Gpt4All.Tests
- store_test_results: dotnet tool install -g trx2junit
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults export PATH="$PATH:$HOME/.dotnet/tools"
trx2junit TestResults/*.trx
- store_test_results:
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
build-csharp-windows: build-csharp-windows:
executor: executor:
@ -511,99 +514,111 @@ jobs:
size: large size: large
shell: powershell.exe -ExecutionPolicy Bypass shell: powershell.exe -ExecutionPolicy Bypass
steps: steps:
- checkout - when:
- restore_cache: condition: << pipeline.parameters.run-csharp-workflow >>
keys: steps:
- gpt4all-csharp-nuget-packages-win - checkout
- attach_workspace: - restore_cache:
at: C:\Users\circleci\workspace keys:
- run: - gpt4all-csharp-nuget-packages-win
name: "Prepare Native Libs" - attach_workspace:
command: | at: C:\Users\circleci\workspace
cd gpt4all-bindings/csharp - run:
mkdir -p runtimes\win-x64\native name: "Prepare Native Libs"
cp C:\Users\circleci\workspace\runtimes\win-x64\*.dll runtimes\win-x64\native\ command: |
ls -R runtimes cd gpt4all-bindings/csharp
- run: mkdir -p runtimes\win-x64\native
name: "Install project dependencies" cp C:\Users\circleci\workspace\runtimes\win-x64\*.dll runtimes\win-x64\native\
command: | ls -R runtimes
cd gpt4all-bindings/csharp - run:
dotnet.exe restore Gpt4All name: "Install project dependencies"
- save_cache: command: |
paths: cd gpt4all-bindings/csharp
- C:\Users\circleci\.nuget\packages dotnet.exe restore Gpt4All
key: gpt4all-csharp-nuget-packages-win - save_cache:
- run: paths:
name: Build C# Project - C:\Users\circleci\.nuget\packages
command: | key: gpt4all-csharp-nuget-packages-win
cd gpt4all-bindings/csharp - run:
dotnet.exe build Gpt4All --configuration Release --nologo name: Build C# Project
- run: command: |
name: "Run C# Tests" cd gpt4all-bindings/csharp
command: | dotnet.exe build Gpt4All --configuration Release --nologo
cd gpt4all-bindings/csharp - run:
dotnet.exe test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx" name: "Run C# Tests"
- run: command: |
name: Test results cd gpt4all-bindings/csharp
command: | dotnet.exe test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
cd gpt4all-bindings/csharp/Gpt4All.Tests - run:
dotnet tool install -g trx2junit name: Test results
$Env:Path += ";$Env:USERPROFILE\.dotnet\tools" command: |
trx2junit TestResults/*.trx cd gpt4all-bindings/csharp/Gpt4All.Tests
- store_test_results: dotnet tool install -g trx2junit
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults $Env:Path += ";$Env:USERPROFILE\.dotnet\tools"
trx2junit TestResults/*.trx
- store_test_results:
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
build-csharp-macos: build-csharp-macos:
macos: macos:
xcode: "14.0.0" xcode: "14.0.0"
steps: steps:
- checkout - when:
- restore_cache: condition: << pipeline.parameters.run-csharp-workflow >>
keys: steps:
- gpt4all-csharp-nuget-packages-nix - checkout
- run: - restore_cache:
name: Install dependencies keys:
command: | - gpt4all-csharp-nuget-packages-nix
brew install --cask dotnet-sdk - run:
- attach_workspace: name: Install dependencies
at: /tmp/workspace command: |
- run: brew install --cask dotnet-sdk
name: "Prepare Native Libs" - attach_workspace:
command: | at: /tmp/workspace
cd gpt4all-bindings/csharp - run:
mkdir -p runtimes/osx/native name: "Prepare Native Libs"
cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/ command: |
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/ cd gpt4all-bindings/csharp
ls -R runtimes mkdir -p runtimes/osx/native
- run: cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/
name: "Install project dependencies" cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
command: | ls -R runtimes
cd gpt4all-bindings/csharp - run:
dotnet restore Gpt4All name: "Install project dependencies"
- save_cache: command: |
paths: cd gpt4all-bindings/csharp
- ~/.nuget/packages dotnet restore Gpt4All
key: gpt4all-csharp-nuget-packages-nix - save_cache:
- run: paths:
name: Build C# Project - ~/.nuget/packages
command: | key: gpt4all-csharp-nuget-packages-nix
cd gpt4all-bindings/csharp - run:
dotnet build Gpt4All --configuration Release --nologo name: Build C# Project
- run: command: |
name: "Run C# Tests" cd gpt4all-bindings/csharp
command: | dotnet build Gpt4All --configuration Release --nologo
cd gpt4all-bindings/csharp - run:
dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx" name: "Run C# Tests"
- run: command: |
name: Test results cd gpt4all-bindings/csharp
command: | dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
cd gpt4all-bindings/csharp/Gpt4All.Tests - run:
dotnet tool install -g trx2junit name: Test results
export PATH="$PATH:$HOME/.dotnet/tools" command: |
trx2junit TestResults/*.trx cd gpt4all-bindings/csharp/Gpt4All.Tests
- store_test_results: dotnet tool install -g trx2junit
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults export PATH="$PATH:$HOME/.dotnet/tools"
trx2junit TestResults/*.trx
- store_test_results:
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
build-nodejs-linux:
docker:
- image: circleci/node:erbium-bullseye-browsers-legacy
steps:
- when:
condition: << pipeline.parameters.run-ts-workflow >>
- checkout
store-and-upload-nupkgs: store-and-upload-nupkgs:
docker: docker:
- image: mcr.microsoft.com/dotnet/sdk:6.0-jammy # Ubuntu 22.04 - image: mcr.microsoft.com/dotnet/sdk:6.0-jammy # Ubuntu 22.04
@ -641,27 +656,27 @@ jobs:
node-version: "18.16" node-version: "18.16"
- run: node --version - run: node --version
- node/install-packages: - node/install-packages:
app-dir: gpt4all-bindings/typescript
pkg-manager: yarn pkg-manager: yarn
override-ci-command: yarn install
- run: cd gpt4all-bindings/typescript
- run: - run:
command: yarn run test
name: Run YARN tests
- run:
command: |
# excluding llmodel. nodejs bindings dont need llmodel.dll
cd gpt4all-bindings/typescript
mkdir -p runtimes/win32-x64/native
cp /tmp/workspace/runtimes/win-x64/*-*.dll runtimes/win-x64/native/
mkdir -p runtimes/linux-x64/native
cp /tmp/workspace/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/
mkdir -p runtimes/osx/native
cp /tmp/workspace/runtimes/osx-x64/*-*.dylib runtimes/osx/native/
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
- run:
name: Publish to NPM
command: | command: |
cd gpt4all-bindings/typescript npm set //registry.npmjs.org/:_authToken=$NPM_TOKEN
# excluding llmodel. nodejs bindings dont need llmodel.dll npm publish
mkdir -p runtimes/win32-x64/native
cp /tmp/workspace/runtimes/win-x64/*-*.dll runtimes/win-x64/native/
mkdir -p runtimes/linux-x64/native
cp /tmp/workspace/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/
mkdir -p runtimes/osx/native
cp /tmp/workspace/runtimes/osx-x64/*-*.dylib runtimes/osx/native/
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
ls -Ra gpt4all-bindings/typescript/runtimes
# - run:
# name: Publish to NPM
# command: |
# npm set //registry.npmjs.org/:_authToken=$NPM_TOKEN
# npm publish
workflows: workflows:
version: 2 version: 2
@ -741,8 +756,6 @@ workflows:
type: approval type: approval
- nuget-hold: - nuget-hold:
type: approval type: approval
- npm-hold:
type: approval
- build-bindings-backend-linux: - build-bindings-backend-linux:
filters: filters:
branches: branches:
@ -768,16 +781,6 @@ workflows:
requires: requires:
- hold - hold
# NodeJs Jobs # NodeJs Jobs
- prepare-npm-pkg:
filters:
branches:
only:
requires:
- node/test
- npm-hold
# - build-bindings-backend-linux
# - build-bindings-backend-windows-msvc
# - build-bindings-backend-macos
# CSharp Jobs # CSharp Jobs
- build-csharp-linux: - build-csharp-linux:
filters: filters:
@ -806,3 +809,4 @@ workflows:
- build-csharp-windows - build-csharp-windows
- build-csharp-linux - build-csharp-linux
- build-csharp-macos - build-csharp-macos

3
.gitignore vendored
View File

@ -1,6 +1,3 @@
*.arrow
squad_*
*sbert_embedded*
*.pkl *.pkl
ckpts* ckpts*
.deepspeed_env .deepspeed_env

View File

@ -1,6 +1,5 @@
cmake_minimum_required(VERSION 3.16) cmake_minimum_required(VERSION 3.16)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
if(APPLE) if(APPLE)
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON) option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
@ -20,7 +19,7 @@ endif()
include_directories("${CMAKE_CURRENT_BINARY_DIR}") include_directories("${CMAKE_CURRENT_BINARY_DIR}")
set(LLMODEL_VERSION_MAJOR 0) set(LLMODEL_VERSION_MAJOR 0)
set(LLMODEL_VERSION_MINOR 3) set(LLMODEL_VERSION_MINOR 2)
set(LLMODEL_VERSION_PATCH 0) set(LLMODEL_VERSION_PATCH 0)
set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}") set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C) project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
@ -125,10 +124,6 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
add_library(mpt-${BUILD_VARIANT} SHARED add_library(mpt-${BUILD_VARIANT} SHARED
mpt.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h) mpt.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
prepare_target(mpt ggml-230511) prepare_target(mpt ggml-230511)
add_library(bert-${BUILD_VARIANT} SHARED
bert.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
prepare_target(bert llama-mainline)
endif() endif()
endforeach() endforeach()

File diff suppressed because it is too large Load Diff

View File

@ -1,44 +0,0 @@
#ifndef BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
#error This file is NOT meant to be included outside of bert.cpp. Doing so is DANGEROUS. Be sure to know what you are doing before proceeding to #define BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
#endif
#ifndef BERT_H
#define BERT_H
#include <string>
#include <functional>
#include <vector>
#include <memory>
#include "llmodel.h"
struct BertPrivate;
class Bert : public LLModel {
public:
Bert();
~Bert();
bool supportsEmbedding() const override { return true; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override;
size_t stateSize() const override;
size_t saveState(uint8_t *dest) const override;
size_t restoreState(const uint8_t *src) override;
void setThreadCount(int32_t n_threads) override;
int32_t threadCount() const override;
std::vector<float> embedding(const std::string &text) override;
private:
std::unique_ptr<BertPrivate> d_ptr;
protected:
std::vector<Token> tokenize(PromptContext &, const std::string&) const override;
Token sampleToken(PromptContext &ctx) const override;
std::string tokenToString(Token) const override;
bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override;
int32_t contextLength() const override;
const std::vector<Token>& endTokens() const override;
};
#endif // BERT_H

View File

@ -16,8 +16,6 @@ public:
Falcon(); Falcon();
~Falcon(); ~Falcon();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override; bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override; bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override; size_t requiredMem(const std::string &modelPath) override;

View File

@ -15,8 +15,6 @@ public:
GPTJ(); GPTJ();
~GPTJ(); ~GPTJ();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override; bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override; bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override; size_t requiredMem(const std::string &modelPath) override;

View File

@ -15,8 +15,6 @@ public:
LLamaModel(); LLamaModel();
~LLamaModel(); ~LLamaModel();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override; bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override; bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override; size_t requiredMem(const std::string &modelPath) override;

View File

@ -10,19 +10,17 @@
#include <cassert> #include <cassert>
#include <cstdlib> #include <cstdlib>
#include <sstream> #include <sstream>
#ifdef _MSC_VER
#include <windows.h>
#include <processthreadsapi.h>
#endif
std::string s_implementations_search_path = "."; std::string s_implementations_search_path = ".";
static bool has_at_least_minimal_hardware() { static bool has_at_least_minimal_hardware() {
#if defined(__x86_64__) || defined(_M_X64) #ifdef __x86_64__
#ifndef _MSC_VER #ifndef _MSC_VER
return __builtin_cpu_supports("avx"); return __builtin_cpu_supports("avx");
#else #else
return IsProcessorFeaturePresent(PF_AVX_INSTRUCTIONS_AVAILABLE); int cpuInfo[4];
__cpuid(cpuInfo, 1);
return cpuInfo[2] & (1 << 28);
#endif #endif
#else #else
return true; // Don't know how to handle non-x86_64 return true; // Don't know how to handle non-x86_64
@ -30,53 +28,54 @@ static bool has_at_least_minimal_hardware() {
} }
static bool requires_avxonly() { static bool requires_avxonly() {
#if defined(__x86_64__) || defined(_M_X64) #ifdef __x86_64__
#ifndef _MSC_VER #ifndef _MSC_VER
return !__builtin_cpu_supports("avx2"); return !__builtin_cpu_supports("avx2");
#else #else
return !IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE); int cpuInfo[4];
__cpuidex(cpuInfo, 7, 0);
return !(cpuInfo[1] & (1 << 5));
#endif #endif
#else #else
return false; // Don't know how to handle non-x86_64 return false; // Don't know how to handle non-x86_64
#endif #endif
} }
LLModel::Implementation::Implementation(Dlhandle &&dlhandle_) LLModel::Implementation::Implementation(Dlhandle &&dlhandle_) : dlhandle(new Dlhandle(std::move(dlhandle_))) {
: m_dlhandle(new Dlhandle(std::move(dlhandle_))) { auto get_model_type = dlhandle->get<const char *()>("get_model_type");
auto get_model_type = m_dlhandle->get<const char *()>("get_model_type");
assert(get_model_type); assert(get_model_type);
m_modelType = get_model_type(); modelType = get_model_type();
auto get_build_variant = m_dlhandle->get<const char *()>("get_build_variant"); auto get_build_variant = dlhandle->get<const char *()>("get_build_variant");
assert(get_build_variant); assert(get_build_variant);
m_buildVariant = get_build_variant(); buildVariant = get_build_variant();
m_magicMatch = m_dlhandle->get<bool(std::ifstream&)>("magic_match"); magicMatch = dlhandle->get<bool(std::ifstream&)>("magic_match");
assert(m_magicMatch); assert(magicMatch);
m_construct = m_dlhandle->get<LLModel *()>("construct"); construct_ = dlhandle->get<LLModel *()>("construct");
assert(m_construct); assert(construct_);
} }
LLModel::Implementation::Implementation(Implementation &&o) LLModel::Implementation::Implementation(Implementation &&o)
: m_magicMatch(o.m_magicMatch) : construct_(o.construct_)
, m_construct(o.m_construct) , modelType(o.modelType)
, m_modelType(o.m_modelType) , buildVariant(o.buildVariant)
, m_buildVariant(o.m_buildVariant) , magicMatch(o.magicMatch)
, m_dlhandle(o.m_dlhandle) { , dlhandle(o.dlhandle) {
o.m_dlhandle = nullptr; o.dlhandle = nullptr;
} }
LLModel::Implementation::~Implementation() { LLModel::Implementation::~Implementation() {
if (m_dlhandle) delete m_dlhandle; if (dlhandle) delete dlhandle;
} }
bool LLModel::Implementation::isImplementation(const Dlhandle &dl) { bool LLModel::Implementation::isImplementation(const Dlhandle &dl) {
return dl.get<bool(uint32_t)>("is_g4a_backend_model_implementation"); return dl.get<bool(uint32_t)>("is_g4a_backend_model_implementation");
} }
const std::vector<LLModel::Implementation> &LLModel::Implementation::implementationList() { const std::vector<LLModel::Implementation> &LLModel::implementationList() {
// NOTE: allocated on heap so we leak intentionally on exit so we have a chance to clean up the // NOTE: allocated on heap so we leak intentionally on exit so we have a chance to clean up the
// individual models without the cleanup of the static list interfering // individual models without the cleanup of the static list interfering
static auto* libs = new std::vector<Implementation>([] () { static auto* libs = new std::vector<LLModel::Implementation>([] () {
std::vector<Implementation> fres; std::vector<LLModel::Implementation> fres;
auto search_in_directory = [&](const std::string& paths) { auto search_in_directory = [&](const std::string& paths) {
std::stringstream ss(paths); std::stringstream ss(paths);
@ -108,17 +107,17 @@ const std::vector<LLModel::Implementation> &LLModel::Implementation::implementat
return *libs; return *libs;
} }
const LLModel::Implementation* LLModel::Implementation::implementation(std::ifstream& f, const std::string& buildVariant) { const LLModel::Implementation* LLModel::implementation(std::ifstream& f, const std::string& buildVariant) {
for (const auto& i : implementationList()) { for (const auto& i : implementationList()) {
f.seekg(0); f.seekg(0);
if (!i.m_magicMatch(f)) continue; if (!i.magicMatch(f)) continue;
if (buildVariant != i.m_buildVariant) continue; if (buildVariant != i.buildVariant) continue;
return &i; return &i;
} }
return nullptr; return nullptr;
} }
LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant) { LLModel *LLModel::construct(const std::string &modelPath, std::string buildVariant) {
if (!has_at_least_minimal_hardware()) if (!has_at_least_minimal_hardware())
return nullptr; return nullptr;
@ -127,15 +126,14 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
std::ifstream f(modelPath, std::ios::binary); std::ifstream f(modelPath, std::ios::binary);
if (!f) return nullptr; if (!f) return nullptr;
// Get correct implementation // Get correct implementation
const Implementation* impl = nullptr; const LLModel::Implementation* impl = nullptr;
#if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs #if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
if (buildVariant == "auto") { if (buildVariant == "auto") {
size_t total_mem = getSystemTotalRAMInBytes(); size_t total_mem = getSystemTotalRAMInBytes();
impl = implementation(f, "metal"); impl = implementation(f, "metal");
if(impl) { if(impl) {
LLModel* metalimpl = impl->m_construct(); LLModel* metalimpl = impl->construct();
metalimpl->m_implementation = impl;
size_t req_mem = metalimpl->requiredMem(modelPath); size_t req_mem = metalimpl->requiredMem(modelPath);
float req_to_total = (float) req_mem / (float) total_mem; float req_to_total = (float) req_mem / (float) total_mem;
// on a 16GB M2 Mac a 13B q4_0 (0.52) works for me but a 13B q4_K_M (0.55) does not // on a 16GB M2 Mac a 13B q4_0 (0.52) works for me but a 13B q4_K_M (0.55) does not
@ -162,17 +160,14 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s
if (!impl) return nullptr; if (!impl) return nullptr;
} }
f.close(); f.close();
// Construct and return llmodel implementation // Construct and return llmodel implementation
auto fres = impl->m_construct(); return impl->construct();
fres->m_implementation = impl;
return fres;
} }
void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) { void LLModel::setImplementationsSearchPath(const std::string& path) {
s_implementations_search_path = path; s_implementations_search_path = path;
} }
const std::string& LLModel::Implementation::implementationsSearchPath() { const std::string& LLModel::implementationsSearchPath() {
return s_implementations_search_path; return s_implementations_search_path;
} }

View File

@ -12,34 +12,32 @@
#define LLMODEL_MAX_PROMPT_BATCH 128 #define LLMODEL_MAX_PROMPT_BATCH 128
class Dlhandle; class Dlhandle;
class LLModel { class LLModel {
public: public:
using Token = int32_t; using Token = int32_t;
class Implementation { class Implementation {
LLModel *(*construct_)();
public: public:
Implementation(Dlhandle&&); Implementation(Dlhandle&&);
Implementation(const Implementation&) = delete; Implementation(const Implementation&) = delete;
Implementation(Implementation&&); Implementation(Implementation&&);
~Implementation(); ~Implementation();
std::string_view modelType() const { return m_modelType; }
std::string_view buildVariant() const { return m_buildVariant; }
static bool isImplementation(const Dlhandle&); static bool isImplementation(const Dlhandle&);
static const std::vector<Implementation>& implementationList();
static const Implementation *implementation(std::ifstream& f, const std::string& buildVariant);
static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
static void setImplementationsSearchPath(const std::string& path);
static const std::string& implementationsSearchPath();
private: std::string_view modelType, buildVariant;
bool (*m_magicMatch)(std::ifstream& f); bool (*magicMatch)(std::ifstream& f);
LLModel *(*m_construct)(); Dlhandle *dlhandle;
private: // The only way an implementation should be constructed
std::string_view m_modelType; LLModel *construct() const {
std::string_view m_buildVariant; auto fres = construct_();
Dlhandle *m_dlhandle; fres->m_implementation = this;
return fres;
}
}; };
struct PromptContext { struct PromptContext {
@ -61,25 +59,18 @@ public:
explicit LLModel() {} explicit LLModel() {}
virtual ~LLModel() {} virtual ~LLModel() {}
virtual bool supportsEmbedding() const = 0;
virtual bool supportsCompletion() const = 0;
virtual bool loadModel(const std::string &modelPath) = 0; virtual bool loadModel(const std::string &modelPath) = 0;
virtual bool isModelLoaded() const = 0; virtual bool isModelLoaded() const = 0;
virtual size_t requiredMem(const std::string &modelPath) = 0; virtual size_t requiredMem(const std::string &modelPath) = 0;
virtual size_t stateSize() const { return 0; } virtual size_t stateSize() const { return 0; }
virtual size_t saveState(uint8_t */*dest*/) const { return 0; } virtual size_t saveState(uint8_t */*dest*/) const { return 0; }
virtual size_t restoreState(const uint8_t */*src*/) { return 0; } virtual size_t restoreState(const uint8_t */*src*/) { return 0; }
// This method requires the model to return true from supportsCompletion otherwise it will throw
// an error
virtual void prompt(const std::string &prompt, virtual void prompt(const std::string &prompt,
std::function<bool(int32_t)> promptCallback, std::function<bool(int32_t)> promptCallback,
std::function<bool(int32_t, const std::string&)> responseCallback, std::function<bool(int32_t, const std::string&)> responseCallback,
std::function<bool(bool)> recalculateCallback, std::function<bool(bool)> recalculateCallback,
PromptContext &ctx); PromptContext &ctx);
virtual std::vector<float> embedding(const std::string &text);
virtual void setThreadCount(int32_t /*n_threads*/) {} virtual void setThreadCount(int32_t /*n_threads*/) {}
virtual int32_t threadCount() const { return 1; } virtual int32_t threadCount() const { return 1; }
@ -87,6 +78,13 @@ public:
return *m_implementation; return *m_implementation;
} }
static const std::vector<Implementation>& implementationList();
static const Implementation *implementation(std::ifstream& f, const std::string& buildVariant);
static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
static void setImplementationsSearchPath(const std::string& path);
static const std::string& implementationsSearchPath();
protected: protected:
// These are pure virtual because subclasses need to implement as the default implementation of // These are pure virtual because subclasses need to implement as the default implementation of
// 'prompt' above calls these functions // 'prompt' above calls these functions
@ -102,9 +100,5 @@ protected:
void recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate); void recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate);
const Implementation *m_implementation = nullptr; const Implementation *m_implementation = nullptr;
private:
friend class LLMImplementation;
}; };
#endif // LLMODEL_H #endif // LLMODEL_H

View File

@ -29,7 +29,7 @@ llmodel_model llmodel_model_create2(const char *model_path, const char *build_va
int error_code = 0; int error_code = 0;
try { try {
wrapper->llModel = LLModel::Implementation::construct(model_path, build_variant); wrapper->llModel = LLModel::construct(model_path, build_variant);
} catch (const std::exception& e) { } catch (const std::exception& e) {
error_code = EINVAL; error_code = EINVAL;
last_error_message = e.what(); last_error_message = e.what();
@ -166,25 +166,6 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
ctx->context_erase = wrapper->promptContext.contextErase; ctx->context_erase = wrapper->promptContext.contextErase;
} }
float *llmodel_embedding(llmodel_model model, const char *text, size_t *embedding_size)
{
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
std::vector<float> embeddingVector = wrapper->llModel->embedding(text);
float *embedding = (float *)malloc(embeddingVector.size() * sizeof(float));
if(embedding == nullptr) {
*embedding_size = 0;
return nullptr;
}
std::copy(embeddingVector.begin(), embeddingVector.end(), embedding);
*embedding_size = embeddingVector.size();
return embedding;
}
void llmodel_free_embedding(float *ptr)
{
free(ptr);
}
void llmodel_setThreadCount(llmodel_model model, int32_t n_threads) void llmodel_setThreadCount(llmodel_model model, int32_t n_threads)
{ {
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model); LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
@ -199,10 +180,10 @@ int32_t llmodel_threadCount(llmodel_model model)
void llmodel_set_implementation_search_path(const char *path) void llmodel_set_implementation_search_path(const char *path)
{ {
LLModel::Implementation::setImplementationsSearchPath(path); LLModel::setImplementationsSearchPath(path);
} }
const char *llmodel_get_implementation_search_path() const char *llmodel_get_implementation_search_path()
{ {
return LLModel::Implementation::implementationsSearchPath().c_str(); return LLModel::implementationsSearchPath().c_str();
} }

View File

@ -171,23 +171,6 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
llmodel_recalculate_callback recalculate_callback, llmodel_recalculate_callback recalculate_callback,
llmodel_prompt_context *ctx); llmodel_prompt_context *ctx);
/**
* Generate an embedding using the model.
* @param model A pointer to the llmodel_model instance.
* @param text A string representing the text to generate an embedding for.
* @param embedding_size A pointer to a size_t type that will be set by the call indicating the length
* of the returned floating point array.
* @return A pointer to an array of floating point values passed to the calling method which then will
* be responsible for lifetime of this memory.
*/
float *llmodel_embedding(llmodel_model model, const char *text, size_t *embedding_size);
/**
* Frees the memory allocated by the llmodel_embedding function.
* @param ptr A pointer to the embedding as returned from llmodel_embedding.
*/
void llmodel_free_embedding(float *ptr);
/** /**
* Set the number of threads to be used by the model. * Set the number of threads to be used by the model.
* @param model A pointer to the llmodel_model instance. * @param model A pointer to the llmodel_model instance.

View File

@ -33,14 +33,7 @@ void LLModel::prompt(const std::string &prompt,
PromptContext &promptCtx) PromptContext &promptCtx)
{ {
if (!isModelLoaded()) { if (!isModelLoaded()) {
std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n"; std::cerr << implementation().modelType << " ERROR: prompt won't work with an unloaded model!\n";
return;
}
if (!supportsCompletion()) {
std::string errorMessage = "ERROR: this model does not support text completion or chat!\n";
responseCallback(-1, errorMessage);
std::cerr << implementation().modelType() << errorMessage;
return; return;
} }
@ -52,8 +45,8 @@ void LLModel::prompt(const std::string &prompt,
if ((int) embd_inp.size() > promptCtx.n_ctx - 4) { if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed."); responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() << std::cerr << implementation().modelType << " ERROR: The prompt is" << embd_inp.size() <<
" tokens and the context window is " << promptCtx.n_ctx << "!\n"; "tokens and the context window is" << promptCtx.n_ctx << "!\n";
return; return;
} }
@ -71,7 +64,7 @@ void LLModel::prompt(const std::string &prompt,
if (promptCtx.n_past + int32_t(batch.size()) > promptCtx.n_ctx) { if (promptCtx.n_past + int32_t(batch.size()) > promptCtx.n_ctx) {
const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase; const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase;
// Erase the first percentage of context from the tokens... // Erase the first percentage of context from the tokens...
std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n"; std::cerr << implementation().modelType << ": reached the end of the context window so resizing\n";
promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint); promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint);
promptCtx.n_past = promptCtx.tokens.size(); promptCtx.n_past = promptCtx.tokens.size();
recalculateContext(promptCtx, recalculateCallback); recalculateContext(promptCtx, recalculateCallback);
@ -79,7 +72,7 @@ void LLModel::prompt(const std::string &prompt,
} }
if (!evalTokens(promptCtx, batch)) { if (!evalTokens(promptCtx, batch)) {
std::cerr << implementation().modelType() << " ERROR: Failed to process prompt\n"; std::cerr << implementation().modelType << " ERROR: Failed to process prompt\n";
return; return;
} }
@ -110,7 +103,7 @@ void LLModel::prompt(const std::string &prompt,
if (promptCtx.n_past + 1 > promptCtx.n_ctx) { if (promptCtx.n_past + 1 > promptCtx.n_ctx) {
const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase; const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase;
// Erase the first percentage of context from the tokens... // Erase the first percentage of context from the tokens...
std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n"; std::cerr << implementation().modelType << ": reached the end of the context window so resizing\n";
promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint); promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint);
promptCtx.n_past = promptCtx.tokens.size(); promptCtx.n_past = promptCtx.tokens.size();
recalculateContext(promptCtx, recalculateCallback); recalculateContext(promptCtx, recalculateCallback);
@ -118,7 +111,7 @@ void LLModel::prompt(const std::string &prompt,
} }
if (!evalTokens(promptCtx, { id })) { if (!evalTokens(promptCtx, { id })) {
std::cerr << implementation().modelType() << " ERROR: Failed to predict next token\n"; std::cerr << implementation().modelType << " ERROR: Failed to predict next token\n";
return; return;
} }
@ -165,12 +158,3 @@ void LLModel::prompt(const std::string &prompt,
cachedTokens.clear(); cachedTokens.clear();
} }
} }
std::vector<float> LLModel::embedding(const std::string &/*text*/)
{
if (!supportsCompletion()) {
std::string errorMessage = "ERROR: this model does not support generating embeddings!\n";
std::cerr << implementation().modelType() << errorMessage;
}
return std::vector<float>();
}

View File

@ -15,8 +15,6 @@ public:
MPT(); MPT();
~MPT(); ~MPT();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override; bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override; bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override; size_t requiredMem(const std::string &modelPath) override;

View File

@ -17,8 +17,6 @@ public:
Replit(); Replit();
~Replit(); ~Replit();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override; bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override; bool isModelLoaded() const override;
size_t requiredMem(const std::string & modelPath) override; size_t requiredMem(const std::string & modelPath) override;

View File

@ -1,102 +0,0 @@
import sys
import struct
import json
import torch
import numpy as np
from transformers import AutoModel, AutoTokenizer
if len(sys.argv) < 3:
print("Usage: convert-h5-to-ggml.py dir-model [use-f32]\n")
print(" ftype == 0 -> float32")
print(" ftype == 1 -> float16")
sys.exit(1)
# output in the same directory as the model
dir_model = sys.argv[1]
fname_out = sys.argv[1] + "/ggml-model.bin"
with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
encoder = json.load(f)
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
hparams = json.load(f)
with open(dir_model + "/vocab.txt", "r", encoding="utf-8") as f:
vocab = f.readlines()
# possible data types
# ftype == 0 -> float32
# ftype == 1 -> float16
#
# map from ftype to string
ftype_str = ["f32", "f16"]
ftype = 1
if len(sys.argv) > 2:
ftype = int(sys.argv[2])
if ftype < 0 or ftype > 1:
print("Invalid ftype: " + str(ftype))
sys.exit(1)
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
tokenizer = AutoTokenizer.from_pretrained(dir_model)
model = AutoModel.from_pretrained(dir_model, low_cpu_mem_usage=True)
print (model)
print(tokenizer.encode('I believe the meaning of life is'))
list_vars = model.state_dict()
for name in list_vars.keys():
print(name, list_vars[name].shape, list_vars[name].dtype)
fout = open(fname_out, "wb")
print(hparams)
fout.write(struct.pack("i", 0x62657274)) # magic: ggml in hex
fout.write(struct.pack("i", hparams["vocab_size"]))
fout.write(struct.pack("i", hparams["max_position_embeddings"]))
fout.write(struct.pack("i", hparams["hidden_size"]))
fout.write(struct.pack("i", hparams["intermediate_size"]))
fout.write(struct.pack("i", hparams["num_attention_heads"]))
fout.write(struct.pack("i", hparams["num_hidden_layers"]))
fout.write(struct.pack("i", ftype))
for i in range(hparams["vocab_size"]):
text = vocab[i][:-1] # strips newline at the end
#print(f"{i}:{text}")
data = bytes(text, 'utf-8')
fout.write(struct.pack("i", len(data)))
fout.write(data)
for name in list_vars.keys():
data = list_vars[name].squeeze().numpy()
if name in ['embeddings.position_ids', 'pooler.dense.weight', 'pooler.dense.bias']:
continue
print("Processing variable: " + name + " with shape: ", data.shape)
n_dims = len(data.shape);
# ftype == 0 -> float32, ftype == 1 -> float16
if ftype == 1 and name[-7:] == ".weight" and n_dims == 2:
print(" Converting to float16")
data = data.astype(np.float16)
l_type = 1
else:
l_type = 0
# header
str = name.encode('utf-8')
fout.write(struct.pack("iii", n_dims, len(str), l_type))
for i in range(n_dims):
fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
fout.write(str);
# data
data.tofile(fout)
fout.close()
print("Done. Output file: " + fname_out)
print("")

View File

@ -2,13 +2,11 @@
## What models are supported by the GPT4All ecosystem? ## What models are supported by the GPT4All ecosystem?
Currently, there are five different model architectures that are supported: Currently, there are three different model architectures that are supported:
1. GPT-J - Based off of the GPT-J architecture with examples found [here](https://huggingface.co/EleutherAI/gpt-j-6b) 1. GPTJ - Based off of the GPT-J architecture with examples found [here](https://huggingface.co/EleutherAI/gpt-j-6b)
2. LLaMA - Based off of the LLaMA architecture with examples found [here](https://huggingface.co/models?sort=downloads&search=llama) 2. LLAMA - Based off of the LLAMA architecture with examples found [here](https://huggingface.co/models?sort=downloads&search=llama)
3. MPT - Based off of Mosaic ML's MPT architecture with examples found [here](https://huggingface.co/mosaicml/mpt-7b) 3. MPT - Based off of Mosaic ML's MPT architecture with examples found [here](https://huggingface.co/mosaicml/mpt-7b)
4. Replit - Based off of Replit Inc.'s Replit architecture with examples found [here](https://huggingface.co/replit/replit-code-v1-3b)
5. Falcon - Based off of TII's Falcon architecture with examples found [here](https://huggingface.co/tiiuae/falcon-40b)
## Why so many different architectures? What differentiates them? ## Why so many different architectures? What differentiates them?
@ -27,10 +25,6 @@ The upstream [llama.cpp](https://github.com/ggerganov/llama.cpp) project has int
Fortunately, we have engineered a submoduling system allowing us to dynamically load different versions of the underlying library so that Fortunately, we have engineered a submoduling system allowing us to dynamically load different versions of the underlying library so that
GPT4All just works. GPT4All just works.
## What are the system requirements?
Your CPU needs to support [AVX or AVX2 instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) and you need enough RAM to load a model into memory.
## What about GPU inference? ## What about GPU inference?
In newer versions of llama.cpp, there has been some added support for NVIDIA GPU's for inference. We're investigating how to incorporate this into our downloadable installers. In newer versions of llama.cpp, there has been some added support for NVIDIA GPU's for inference. We're investigating how to incorporate this into our downloadable installers.

View File

@ -1,7 +1,8 @@
# GPT4All Python Generation API # GPT4All Python API
The `GPT4All` python package provides bindings to our C/C++ model backend libraries. The `GPT4All` python package provides bindings to our C/C++ model backend libraries.
The source code and local build instructions can be found [here](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python). The source code and local build instructions can be found [here](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python).
## Quickstart ## Quickstart
```bash ```bash
@ -108,5 +109,5 @@ with model.chat_session():
print(model.current_chat_session) print(model.current_chat_session)
``` ```
### API documentation
::: gpt4all.gpt4all.GPT4All ::: gpt4all.gpt4all.GPT4All

View File

@ -1,35 +0,0 @@
# Embeddings
GPT4All supports generating high quality embeddings of arbitrary length documents of text using a CPU optimized contrastively trained [Sentence Transformer](https://www.sbert.net/). These embeddings are comparable in quality for many tasks with OpenAI.
## Quickstart
```bash
pip install gpt4all
```
### Generating embeddings
The embedding model will automatically be downloaded if not installed.
=== "Embed4All Example"
``` py
from gpt4all import GPT4All, Embed4All
text = 'The quick brown fox jumps over the lazy dog'
embedder = Embed4All()
output = embedder.embed(text)
print(output)
```
=== "Output"
```
[0.034696947783231735, -0.07192722707986832, 0.06923297047615051, ...]
```
### Speed of embedding generation
The following table lists the generation speed for text document captured on an Intel i913900HX CPU with DDR5 5600 running with 8 threads under stable load.
| Tokens | 128 | 512 | 2048 | 8129 | 16,384 |
| --------------- | ---- | ---- | ---- | ---- | ---- |
| Wall time (s) | .02 | .08 | .24 | .96 | 1.9 |
| Tokens / Second | 6508 | 6431 | 8622 | 8509 | 8369 |
### API documentation
::: gpt4all.gpt4all.Embed4All

View File

@ -1,2 +1,2 @@
from .gpt4all import GPT4All, Embed4All # noqa from .gpt4all import GPT4All # noqa
from .pyllmodel import LLModel # noqa from .pyllmodel import LLModel # noqa

View File

@ -15,36 +15,6 @@ from . import pyllmodel
# TODO: move to config # TODO: move to config
DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\") DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")
class Embed4All:
"""
Python class that handles embeddings for GPT4All.
"""
def __init__(
self,
n_threads: Optional[int] = None,
):
"""
Constructor
Args:
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
"""
self.gpt4all = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin', n_threads=n_threads)
def embed(
self,
text: str
) -> list[float]:
"""
Generate an embedding.
Args:
text: The text document to generate an embedding for.
Returns:
An embedding of your document of text.
"""
return self.gpt4all.model.generate_embedding(text)
class GPT4All: class GPT4All:
""" """
@ -69,7 +39,7 @@ class GPT4All:
model_type: Model architecture. This argument currently does not have any functionality and is just used as model_type: Model architecture. This argument currently does not have any functionality and is just used as
descriptive identifier for user. Default is None. descriptive identifier for user. Default is None.
allow_download: Allow API to download models from gpt4all.io. Default is True. allow_download: Allow API to download models from gpt4all.io. Default is True.
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically. n_threads: number of CPU threads used by GPT4All. Default is None, than the number of threads are determined automatically.
""" """
self.model_type = model_type self.model_type = model_type
self.model = pyllmodel.LLModel() self.model = pyllmodel.LLModel()

View File

@ -112,19 +112,6 @@ llmodel.llmodel_prompt.argtypes = [
llmodel.llmodel_prompt.restype = None llmodel.llmodel_prompt.restype = None
llmodel.llmodel_embedding.argtypes = [
ctypes.c_void_p,
ctypes.c_char_p,
ctypes.POINTER(ctypes.c_size_t),
]
llmodel.llmodel_embedding.restype = ctypes.POINTER(ctypes.c_float)
llmodel.llmodel_free_embedding.argtypes = [
ctypes.POINTER(ctypes.c_float)
]
llmodel.llmodel_free_embedding.restype = None
llmodel.llmodel_setThreadCount.argtypes = [ctypes.c_void_p, ctypes.c_int32] llmodel.llmodel_setThreadCount.argtypes = [ctypes.c_void_p, ctypes.c_int32]
llmodel.llmodel_setThreadCount.restype = None llmodel.llmodel_setThreadCount.restype = None
@ -154,11 +141,10 @@ class LLModel:
self.model = None self.model = None
self.model_name = None self.model_name = None
self.context = None self.context = None
self.llmodel_lib = llmodel
def __del__(self): def __del__(self):
if self.model is not None: if self.model is not None:
self.llmodel_lib.llmodel_model_destroy(self.model) llmodel.llmodel_model_destroy(self.model)
def memory_needed(self, model_path: str) -> int: def memory_needed(self, model_path: str) -> int:
model_path_enc = model_path.encode("utf-8") model_path_enc = model_path.encode("utf-8")
@ -247,17 +233,6 @@ class LLModel:
self.context.repeat_last_n = repeat_last_n self.context.repeat_last_n = repeat_last_n
self.context.context_erase = context_erase self.context.context_erase = context_erase
def generate_embedding(
self,
text: str
) -> list[float]:
embedding_size = ctypes.c_size_t()
c_text = ctypes.c_char_p(text.encode('utf-8'))
embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
embedding_array = [embedding_ptr[i] for i in range(embedding_size.value)]
llmodel.llmodel_free_embedding(embedding_ptr)
return list(embedding_array)
def prompt_model( def prompt_model(
self, self,
prompt: str, prompt: str,

View File

@ -1,18 +0,0 @@
import sys
from io import StringIO
from gpt4all import GPT4All, Embed4All
import time
def time_embedding(i, embedder):
text = 'foo bar ' * i
start_time = time.time()
output = embedder.embed(text)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Time report: {2 * i / elapsed_time} tokens/second with {2 * i} tokens taking {elapsed_time} seconds")
if __name__ == "__main__":
embedder = Embed4All(n_threads=8)
for i in [2**n for n in range(6, 14)]:
time_embedding(i, embedder)

View File

@ -1,8 +1,8 @@
import sys import sys
from io import StringIO from io import StringIO
from gpt4all import GPT4All, Embed4All from gpt4all import GPT4All
import time
def test_inference(): def test_inference():
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin') model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
@ -99,11 +99,3 @@ def test_inference_mpt():
output = model.generate(prompt) output = model.generate(prompt)
assert isinstance(output, str) assert isinstance(output, str)
assert len(output) > 0 assert len(output) > 0
def test_embedding():
text = 'The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox'
embedder = Embed4All()
output = embedder.embed(text)
#for i, value in enumerate(output):
#print(f'Value at index {i}: {value}')
assert len(output) == 384

View File

@ -10,9 +10,7 @@ use_directory_urls: false
nav: nav:
- 'index.md' - 'index.md'
- 'Bindings': - 'Bindings':
- 'GPT4All in Python': - 'GPT4All in Python': 'gpt4all_python.md'
- 'Generation': 'gpt4all_python.md'
- 'Embedding': 'gpt4all_python_embedding.md'
- 'GPT4ALL in NodeJs': 'gpt4all_typescript.md' - 'GPT4ALL in NodeJs': 'gpt4all_typescript.md'
- 'GPT4All Chat Client': 'gpt4all_chat.md' - 'GPT4All Chat Client': 'gpt4all_chat.md'
- 'gpt4all_cli.md' - 'gpt4all_cli.md'

View File

@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECtORY,
setup( setup(
name=package_name, name=package_name,
version="1.0.6", version="1.0.3",
description="Python bindings for GPT4All", description="Python bindings for GPT4All",
author="Richard Guo", author="Richard Guo",
author_email="richard@nomic.ai", author_email="richard@nomic.ai",

View File

@ -53,7 +53,7 @@ const response = await createCompletion(ll, [
* (win) msvc version 143 * (win) msvc version 143
* Can be obtained with visual studio 2022 build tools * Can be obtained with visual studio 2022 build tools
### Build (from source) ### Build
```sh ```sh
git clone https://github.com/nomic-ai/gpt4all.git git clone https://github.com/nomic-ai/gpt4all.git
@ -138,7 +138,7 @@ This package is in active development, and breaking changes may happen until the
* \[ ] createTokenStream, an async iterator that streams each token emitted from the model. Planning on following this [example](https://github.com/nodejs/node-addon-examples/tree/main/threadsafe-async-iterator) * \[ ] createTokenStream, an async iterator that streams each token emitted from the model. Planning on following this [example](https://github.com/nodejs/node-addon-examples/tree/main/threadsafe-async-iterator)
* \[ ] proper unit testing (integrate with circle ci) * \[ ] proper unit testing (integrate with circle ci)
* \[ ] publish to npm under alpha tag `gpt4all@alpha` * \[ ] publish to npm under alpha tag `gpt4all@alpha`
* \[x] have more people test on other platforms (mac tester needed) * \[ ] have more people test on other platforms (mac tester needed)
* \[x] switch to new pluggable backend * \[x] switch to new pluggable backend
### Documentation ### Documentation

View File

@ -53,7 +53,7 @@
'-fno-rtti', '-fno-rtti',
], ],
'cflags_cc': [ 'cflags_cc': [
'-std=c++2a' '-std=c++20'
] ]
}] }]
] ]

View File

@ -10,7 +10,6 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
InstanceMethod("stateSize", &NodeModelWrapper::StateSize), InstanceMethod("stateSize", &NodeModelWrapper::StateSize),
InstanceMethod("raw_prompt", &NodeModelWrapper::Prompt), InstanceMethod("raw_prompt", &NodeModelWrapper::Prompt),
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount), InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
InstanceMethod("embed", &NodeModelWrapper::GenerateEmbedding),
InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount), InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount),
InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath), InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
}); });
@ -92,23 +91,6 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference()))); return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference())));
} }
Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo& info) {
auto env = info.Env();
std::string text = info[0].As<Napi::String>().Utf8Value();
size_t embedding_size = 0;
float* arr = llmodel_embedding(GetInference(), text.c_str(), &embedding_size);
auto arr_size = sizeof(arr) / sizeof(float);
Napi::Float32Array js_array = Napi::Float32Array::New(info.Env(), arr_size);
for (size_t i = 0; i < arr_size; ++i) {
float element = *(arr + i);
js_array[i] = element;
}
llmodel_free_embedding(arr);
return js_array;
}
/** /**
* Generate a response using the model. * Generate a response using the model.

View File

@ -23,7 +23,6 @@ public:
void SetThreadCount(const Napi::CallbackInfo& info); void SetThreadCount(const Napi::CallbackInfo& info);
Napi::Value getName(const Napi::CallbackInfo& info); Napi::Value getName(const Napi::CallbackInfo& info);
Napi::Value ThreadCount(const Napi::CallbackInfo& info); Napi::Value ThreadCount(const Napi::CallbackInfo& info);
Napi::Value GenerateEmbedding(const Napi::CallbackInfo& info);
/* /*
* The path that is used to search for the dynamic libraries * The path that is used to search for the dynamic libraries
*/ */

View File

@ -1,6 +1,6 @@
{ {
"name": "gpt4all", "name": "gpt4all",
"version": "2.0.0rc", "version": "2.0.0",
"packageManager": "yarn@3.5.1", "packageManager": "yarn@3.5.1",
"main": "src/gpt4all.js", "main": "src/gpt4all.js",
"repository": "nomic-ai/gpt4all", "repository": "nomic-ai/gpt4all",

View File

@ -6,7 +6,7 @@ async function createPrebuilds(combinations) {
platform, platform,
arch, arch,
napi: true, napi: true,
targets: ["18.16.0"] targets: ["18.15.0"]
}; };
try { try {
await createPrebuild(opts); await createPrebuild(opts);

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,5 @@
cmake_minimum_required(VERSION 3.16) cmake_minimum_required(VERSION 3.16)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
@ -18,7 +17,7 @@ endif()
set(APP_VERSION_MAJOR 2) set(APP_VERSION_MAJOR 2)
set(APP_VERSION_MINOR 4) set(APP_VERSION_MINOR 4)
set(APP_VERSION_PATCH 14) set(APP_VERSION_PATCH 13)
set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}") set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
# Include the binary directory for the generated header file # Include the binary directory for the generated header file
@ -206,8 +205,6 @@ install(TARGETS replit-mainline-default DESTINATION lib COMPONENT ${COMPONENT_NA
if(APPLE) if(APPLE)
install(TARGETS replit-mainline-metal DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN}) install(TARGETS replit-mainline-metal DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
endif() endif()
install(TARGETS bert-avxonly DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
install(TARGETS bert-default DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
set(CPACK_GENERATOR "IFW") set(CPACK_GENERATOR "IFW")
set(CPACK_VERBATIM_VARIABLES YES) set(CPACK_VERBATIM_VARIABLES YES)

View File

@ -51,7 +51,19 @@ One click installers for macOS, Linux, and Windows at https://gpt4all.io
If you've already checked out the source code and/or built the program make sure when you do a git fetch to get the latest changes and that you also do ```git submodule update --init --recursive``` to update the submodules. If you've already checked out the source code and/or built the program make sure when you do a git fetch to get the latest changes and that you also do ```git submodule update --init --recursive``` to update the submodules.
## Manual download of models ## Manual download of models
* You can find a 'Model Explorer' on the official website where you can manually download models that we support: https://gpt4all.io/index.html * https://gpt4all.io/models/ggml-mpt-7b-chat.bin (default) (md5sum 756249d3d6abe23bde3b1ae272628640) Current best non-commercially licensable chat model based on MPT and trained by Mosaic ML.
* https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin (default) (md5sum 81a09a0ddf89690372fc296ff7f625af) Current best commercially licensable model based on GPT-J and trained by Nomic AI on the latest curated GPT4All dataset.
* https://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin (md5sum 91f886b68fbce697e9a3cd501951e455) Current best non-commercially licensable model based on Llama 13b and trained by Nomic AI on the latest curated GPT4All dataset.
* https://gpt4all.io/models/ggml-gpt4all-j-v1.2-jazzy.bin (md5sum 879344aaa9d62fdccbda0be7a09e7976) A commercially licensable model based on GPT-J and trained by Nomic AI on the v2 GPT4All dataset.
* https://gpt4all.io/models/ggml-gpt4all-j-v1.1-breezy.bin (md5sum 61d48a82cb188cceb14ebb8082bfec37) A commercially licensable model based on GPT-J and trained by Nomic AI on the v1 GPT4All dataset.
* https://gpt4all.io/models/ggml-gpt4all-j.bin (md5sum 5b5a3f9b858d33b29b52b89692415595) A commercially licensable model based on GPT-J and trained by Nomic AI on the v0 GPT4All dataset.
* https://gpt4all.io/models/ggml-vicuna-7b-1.1-q4_2.bin (md5sum 29119f8fa11712704c6b22ac5ab792ea) An non-commercially licensable model based on Llama 7b and trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
* https://gpt4all.io/models/ggml-vicuna-13b-1.1-q4_2.bin (md5sum 95999b7b0699e2070af63bf5d34101a8) An non-commercially licensable model based on Llama 13b and trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
* https://gpt4all.io/models/ggml-wizardLM-7B.q4_2.bin (md5sum 99e6d129745a3f1fb1121abed747b05a) An non-commercially licensable model based on Llama 7b and trained by Microsoft and Peking University.
* https://gpt4all.io/models/ggml-stable-vicuna-13B.q4_2.bin (md5sum 6cb4ee297537c9133bddab9692879de0) An non-commercially licensable model based on Llama 13b and RLHF trained by Stable AI.
* https://gpt4all.io/models/ggml-mpt-7b-base.bin (md5sum 120c32a51d020066288df045ef5d52b9) A commercially licensable model base pre-trained by Mosaic ML.
* https://gpt4all.io/models/ggml-nous-gpt4-vicuna-13b.bin (md5sum d5eafd5b0bd0d615cfd5fd763f642dfe) A non-commercially licensable model based on Vicuna 13b, fine-tuned on ~180,000 instructions, trained by Nous Research.
* https://gpt4all.io/models/ggml-mpt-7b-instruct.bin (md5sum 1cfa4958f489f0a0d1ffdf6b37322809) A commercially licensable instruct model based on MPT and trained by Mosaic ML.
## Terminal Only Interface with no Qt dependency ## Terminal Only Interface with no Qt dependency

View File

@ -155,7 +155,7 @@ void ChatGPTWorker::request(const QString &apiKey,
m_ctx = promptCtx; m_ctx = promptCtx;
QUrl openaiUrl("https://api.openai.com/v1/chat/completions"); QUrl openaiUrl("https://api.openai.com/v1/chat/completions");
const QString authorization = QString("Bearer %1").arg(apiKey).trimmed(); const QString authorization = QString("Bearer %1").arg(apiKey);
QNetworkRequest request(openaiUrl); QNetworkRequest request(openaiUrl);
request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json"); request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
request.setRawHeader("Authorization", authorization.toUtf8()); request.setRawHeader("Authorization", authorization.toUtf8());
@ -244,7 +244,7 @@ void ChatGPTWorker::handleReadyRead()
void ChatGPTWorker::handleErrorOccurred(QNetworkReply::NetworkError code) void ChatGPTWorker::handleErrorOccurred(QNetworkReply::NetworkError code)
{ {
QNetworkReply *reply = qobject_cast<QNetworkReply *>(sender()); QNetworkReply *reply = qobject_cast<QNetworkReply *>(sender());
if (!reply || reply->error() == QNetworkReply::OperationCanceledError /*when we call abort on purpose*/) { if (!reply) {
emit finished(); emit finished();
return; return;
} }

View File

@ -46,8 +46,6 @@ public:
ChatGPT(); ChatGPT();
virtual ~ChatGPT(); virtual ~ChatGPT();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override; bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override; bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override; size_t requiredMem(const std::string &modelPath) override;

View File

@ -14,7 +14,6 @@
#define REPLIT_INTERNAL_STATE_VERSION 0 #define REPLIT_INTERNAL_STATE_VERSION 0
#define LLAMA_INTERNAL_STATE_VERSION 0 #define LLAMA_INTERNAL_STATE_VERSION 0
#define FALCON_INTERNAL_STATE_VERSION 0 #define FALCON_INTERNAL_STATE_VERSION 0
#define BERT_INTERNAL_STATE_VERSION 0
class LLModelStore { class LLModelStore {
public: public:
@ -241,11 +240,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
#if defined(Q_OS_MAC) && defined(__arm__) #if defined(Q_OS_MAC) && defined(__arm__)
if (m_forceMetal) if (m_forceMetal)
m_llModelInfo.model = LLMImplementation::construct(filePath.toStdString(), "metal"); m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "metal");
else else
m_llModelInfo.model = LLMImplementation::construct(filePath.toStdString(), "auto"); m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "auto");
#else #else
m_llModelInfo.model = LLModel::Implementation::construct(filePath.toStdString(), "auto"); m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "auto");
#endif #endif
if (m_llModelInfo.model) { if (m_llModelInfo.model) {
@ -259,13 +258,12 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
m_llModelInfo = LLModelInfo(); m_llModelInfo = LLModelInfo();
emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename())); emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
} else { } else {
switch (m_llModelInfo.model->implementation().modelType()[0]) { switch (m_llModelInfo.model->implementation().modelType[0]) {
case 'L': m_llModelType = LLModelType::LLAMA_; break; case 'L': m_llModelType = LLModelType::LLAMA_; break;
case 'G': m_llModelType = LLModelType::GPTJ_; break; case 'G': m_llModelType = LLModelType::GPTJ_; break;
case 'M': m_llModelType = LLModelType::MPT_; break; case 'M': m_llModelType = LLModelType::MPT_; break;
case 'R': m_llModelType = LLModelType::REPLIT_; break; case 'R': m_llModelType = LLModelType::REPLIT_; break;
case 'F': m_llModelType = LLModelType::FALCON_; break; case 'F': m_llModelType = LLModelType::FALCON_; break;
case 'B': m_llModelType = LLModelType::BERT_; break;
default: default:
{ {
delete std::exchange(m_llModelInfo.model, nullptr); delete std::exchange(m_llModelInfo.model, nullptr);
@ -630,8 +628,8 @@ bool ChatLLM::handleNameRecalculate(bool isRecalc)
qDebug() << "name recalc" << m_llmThread.objectName() << isRecalc; qDebug() << "name recalc" << m_llmThread.objectName() << isRecalc;
#endif #endif
Q_UNUSED(isRecalc); Q_UNUSED(isRecalc);
qt_noop(); Q_UNREACHABLE();
return true; return false;
} }
bool ChatLLM::handleSystemPrompt(int32_t token) bool ChatLLM::handleSystemPrompt(int32_t token)
@ -671,8 +669,7 @@ bool ChatLLM::serialize(QDataStream &stream, int version)
case MPT_: stream << MPT_INTERNAL_STATE_VERSION; break; case MPT_: stream << MPT_INTERNAL_STATE_VERSION; break;
case GPTJ_: stream << GPTJ_INTERNAL_STATE_VERSION; break; case GPTJ_: stream << GPTJ_INTERNAL_STATE_VERSION; break;
case LLAMA_: stream << LLAMA_INTERNAL_STATE_VERSION; break; case LLAMA_: stream << LLAMA_INTERNAL_STATE_VERSION; break;
case FALCON_: stream << FALCON_INTERNAL_STATE_VERSION; break; case FALCON_: stream << LLAMA_INTERNAL_STATE_VERSION; break;
case BERT_: stream << BERT_INTERNAL_STATE_VERSION; break;
default: Q_UNREACHABLE(); default: Q_UNREACHABLE();
} }
} }
@ -791,18 +788,13 @@ void ChatLLM::processSystemPrompt()
if (!isModelLoaded() || m_processedSystemPrompt || m_isServer) if (!isModelLoaded() || m_processedSystemPrompt || m_isServer)
return; return;
const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
if (QString::fromStdString(systemPrompt).trimmed().isEmpty()) {
m_processedSystemPrompt = true;
return;
}
m_stopGenerating = false; m_stopGenerating = false;
auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1); auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1);
auto responseFunc = std::bind(&ChatLLM::handleSystemResponse, this, std::placeholders::_1, auto responseFunc = std::bind(&ChatLLM::handleSystemResponse, this, std::placeholders::_1,
std::placeholders::_2); std::placeholders::_2);
auto recalcFunc = std::bind(&ChatLLM::handleSystemRecalculate, this, std::placeholders::_1); auto recalcFunc = std::bind(&ChatLLM::handleSystemRecalculate, this, std::placeholders::_1);
const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo); const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo);
const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo); const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo);
const float top_p = MySettings::globalInstance()->modelTopP(m_modelInfo); const float top_p = MySettings::globalInstance()->modelTopP(m_modelInfo);

View File

@ -16,7 +16,6 @@ enum LLModelType {
CHATGPT_, CHATGPT_,
REPLIT_, REPLIT_,
FALCON_, FALCON_,
BERT_
}; };
struct LLModelInfo { struct LLModelInfo {

View File

@ -7,19 +7,16 @@ file(GLOB MYMPTLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NA
file(GLOB MYLLAMALIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama*) file(GLOB MYLLAMALIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama*)
file(GLOB MYREPLITLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libreplit*) file(GLOB MYREPLITLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libreplit*)
file(GLOB MYFALCONLLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libfalcon*) file(GLOB MYFALCONLLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libfalcon*)
file(GLOB MYBERTLLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libbert*)
file(GLOB MYLLMODELLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.*) file(GLOB MYLLMODELLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.*)
file(COPY ${MYGPTJLIBS} file(COPY ${MYGPTJLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks) DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYMPTLIBS} file(COPY ${MYMPTLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks) DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYLLAMALIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYREPLITLIBS} file(COPY ${MYREPLITLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks) DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYFALCONLLIBS} file(COPY ${MYFALCONLLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks) DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYBERTLLIBS} file(COPY ${MYLLAMALIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks) DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYLLMODELLIBS} file(COPY ${MYLLMODELLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks) DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)

View File

@ -8,7 +8,6 @@
#include <QFile> #include <QFile>
#include <QProcess> #include <QProcess>
#include <QResource> #include <QResource>
#include <QSettings>
#include <fstream> #include <fstream>
class MyLLM: public LLM { }; class MyLLM: public LLM { };
@ -34,7 +33,7 @@ LLM::LLM()
if (directoryExists(frameworksDir)) if (directoryExists(frameworksDir))
llmodelSearchPaths += ";" + frameworksDir; llmodelSearchPaths += ";" + frameworksDir;
#endif #endif
LLModel::Implementation::setImplementationsSearchPath(llmodelSearchPaths.toStdString()); LLModel::setImplementationsSearchPath(llmodelSearchPaths.toStdString());
#if defined(__x86_64__) #if defined(__x86_64__)
#ifndef _MSC_VER #ifndef _MSC_VER
@ -49,13 +48,7 @@ LLM::LLM()
#endif #endif
m_compatHardware = minimal; m_compatHardware = minimal;
} emit compatHardwareChanged();
bool LLM::hasSettingsAccess() const
{
QSettings settings;
settings.sync();
return settings.status() == QSettings::NoError;
} }
bool LLM::checkForUpdates() const bool LLM::checkForUpdates() const

View File

@ -6,11 +6,12 @@
class LLM : public QObject class LLM : public QObject
{ {
Q_OBJECT Q_OBJECT
Q_PROPERTY(bool compatHardware READ compatHardware NOTIFY compatHardwareChanged)
public: public:
static LLM *globalInstance(); static LLM *globalInstance();
Q_INVOKABLE bool hasSettingsAccess() const; bool compatHardware() const { return m_compatHardware; }
Q_INVOKABLE bool compatHardware() const { return m_compatHardware; }
Q_INVOKABLE bool checkForUpdates() const; Q_INVOKABLE bool checkForUpdates() const;
Q_INVOKABLE bool directoryExists(const QString &path) const; Q_INVOKABLE bool directoryExists(const QString &path) const;
@ -21,6 +22,7 @@ public:
Q_SIGNALS: Q_SIGNALS:
void chatListModelChanged(); void chatListModelChanged();
void modelListChanged(); void modelListChanged();
void compatHardwareChanged();
private: private:
bool m_compatHardware; bool m_compatHardware;

View File

@ -89,22 +89,14 @@ Window {
property bool hasShownModelDownload: false property bool hasShownModelDownload: false
property bool hasShownFirstStart: false property bool hasShownFirstStart: false
property bool hasShownSettingsAccess: false
function startupDialogs() { function startupDialogs() {
if (!LLM.compatHardware()) { if (!LLM.compatHardware) {
Network.sendNonCompatHardware(); Network.sendNonCompatHardware();
errorCompatHardware.open(); errorCompatHardware.open();
return; return;
} }
// check if we have access to settings and if not show an error
if (!hasShownSettingsAccess && !LLM.hasSettingsAccess()) {
errorSettingsAccess.open();
hasShownSettingsAccess = true;
return;
}
// check for first time start of this version // check for first time start of this version
if (!hasShownFirstStart && Download.isFirstStart()) { if (!hasShownFirstStart && Download.isFirstStart()) {
firstStartDialog.open(); firstStartDialog.open();
@ -143,20 +135,6 @@ Window {
+ qsTr("https://en.wikipedia.org/wiki/Advanced_Vector_Extensions</a>") + qsTr("https://en.wikipedia.org/wiki/Advanced_Vector_Extensions</a>")
} }
PopupDialog {
id: errorSettingsAccess
anchors.centerIn: parent
shouldTimeOut: false
shouldShowBusy: false
modal: true
text: qsTr("<h3>Encountered an error starting up:</h3><br>")
+ qsTr("<i>\"Inability to access settings file.\"</i>")
+ qsTr("<br><br>Unfortunately, something is preventing the program from accessing ")
+ qsTr("the settings file. This could be caused by incorrect permissions in the local ")
+ qsTr("app config directory where the settings file is located. ")
+ qsTr("Check out our <a href=\"https://discord.gg/4M2QFmTt2k\">discord channel</a> for help.")
}
StartupDialog { StartupDialog {
id: firstStartDialog id: firstStartDialog
anchors.centerIn: parent anchors.centerIn: parent

View File

@ -1,16 +1,18 @@
[ [
{ {
"order": "a", "order": "a",
"md5sum": "e8d47924f433bd561cb5244557147793", "md5sum": "4acc146dd43eb02845c233c29289c7c5",
"name": "Wizard v1.1", "name": "Hermes",
"filename": "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin", "filename": "nous-hermes-13b.ggmlv3.q4_0.bin",
"filesize": "7323310848", "filesize": "8136777088",
"requires": "2.4.7",
"ramrequired": "16", "ramrequired": "16",
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ", "description": "<strong>Best overall model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
"description": "<strong>Best overall model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul" "url": "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin",
"promptTemplate": "### Instruction:\n%1\n### Response:\n"
}, },
{ {
"order": "b", "order": "b",
@ -23,29 +25,12 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "Falcon", "type": "Falcon",
"systemPrompt": " ",
"description": "<strong>Best overall smaller model</strong><br><ul><li>Fast responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>", "description": "<strong>Best overall smaller model</strong><br><ul><li>Fast responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>",
"url": "https://huggingface.co/nomic-ai/gpt4all-falcon-ggml/resolve/main/ggml-model-gpt4all-falcon-q4_0.bin", "url": "https://huggingface.co/nomic-ai/gpt4all-falcon-ggml/resolve/main/ggml-model-gpt4all-falcon-q4_0.bin",
"promptTemplate": "### Instruction:\n%1\n### Response:\n" "promptTemplate": "### Instruction:\n%1\n### Response:\n"
}, },
{ {
"order": "c", "order": "c",
"md5sum": "4acc146dd43eb02845c233c29289c7c5",
"name": "Hermes",
"filename": "nous-hermes-13b.ggmlv3.q4_0.bin",
"filesize": "8136777088",
"requires": "2.4.7",
"ramrequired": "16",
"parameters": "13 billion",
"quant": "q4_0",
"type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Extremely good model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
"url": "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin",
"promptTemplate": "### Instruction:\n%1\n### Response:\n"
},
{
"order": "e",
"md5sum": "81a09a0ddf89690372fc296ff7f625af", "md5sum": "81a09a0ddf89690372fc296ff7f625af",
"name": "Groovy", "name": "Groovy",
"filename": "ggml-gpt4all-j-v1.3-groovy.bin", "filename": "ggml-gpt4all-j-v1.3-groovy.bin",
@ -54,11 +39,10 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "GPT-J", "type": "GPT-J",
"systemPrompt": " ",
"description": "<strong>Creative model can be used for commercial purposes</strong><br><ul><li>Fast responses<li>Creative responses</li><li>Instruction based</li><li>Trained by Nomic AI<li>Licensed for commercial use</ul>" "description": "<strong>Creative model can be used for commercial purposes</strong><br><ul><li>Fast responses<li>Creative responses</li><li>Instruction based</li><li>Trained by Nomic AI<li>Licensed for commercial use</ul>"
}, },
{ {
"order": "f", "order": "e",
"md5sum": "11d9f060ca24575a2c303bdc39952486", "md5sum": "11d9f060ca24575a2c303bdc39952486",
"name": "Snoozy", "name": "Snoozy",
"filename": "GPT4All-13B-snoozy.ggmlv3.q4_0.bin", "filename": "GPT4All-13B-snoozy.ggmlv3.q4_0.bin",
@ -68,12 +52,11 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>", "description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>",
"url": "https://huggingface.co/TheBloke/GPT4All-13B-snoozy-GGML/resolve/main/GPT4All-13B-snoozy.ggmlv3.q4_0.bin" "url": "https://huggingface.co/TheBloke/GPT4All-13B-snoozy-GGML/resolve/main/GPT4All-13B-snoozy.ggmlv3.q4_0.bin"
}, },
{ {
"order": "g", "order": "f",
"md5sum": "756249d3d6abe23bde3b1ae272628640", "md5sum": "756249d3d6abe23bde3b1ae272628640",
"name": "MPT Chat", "name": "MPT Chat",
"filename": "ggml-mpt-7b-chat.bin", "filename": "ggml-mpt-7b-chat.bin",
@ -88,9 +71,9 @@
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>" "systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"
}, },
{ {
"order": "h", "order": "g",
"md5sum": "e64e74375ce9d36a3d0af3db1523fd0a", "md5sum": "e64e74375ce9d36a3d0af3db1523fd0a",
"name": "Mini Orca", "name": "Orca",
"filename": "orca-mini-7b.ggmlv3.q4_0.bin", "filename": "orca-mini-7b.ggmlv3.q4_0.bin",
"filesize": "3791749248", "filesize": "3791749248",
"requires": "2.4.7", "requires": "2.4.7",
@ -104,9 +87,9 @@
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n" "systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
}, },
{ {
"order": "i", "order": "h",
"md5sum": "6a087f7f4598fad0bb70e6cb4023645e", "md5sum": "6a087f7f4598fad0bb70e6cb4023645e",
"name": "Mini Orca (Small)", "name": "Orca (Small)",
"filename": "orca-mini-3b.ggmlv3.q4_0.bin", "filename": "orca-mini-3b.ggmlv3.q4_0.bin",
"filesize": "1928446208", "filesize": "1928446208",
"requires": "2.4.7", "requires": "2.4.7",
@ -120,9 +103,9 @@
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n" "systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
}, },
{ {
"order": "j", "order": "i",
"md5sum": "959b7f65b2d12fd1e3ff99e7493c7a3a", "md5sum": "959b7f65b2d12fd1e3ff99e7493c7a3a",
"name": "Mini Orca (Large)", "name": "Orca (Large)",
"filename": "orca-mini-13b.ggmlv3.q4_0.bin", "filename": "orca-mini-13b.ggmlv3.q4_0.bin",
"filesize": "7323329152", "filesize": "7323329152",
"requires": "2.4.7", "requires": "2.4.7",
@ -136,7 +119,7 @@
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n" "systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
}, },
{ {
"order": "k", "order": "j",
"md5sum": "29119f8fa11712704c6b22ac5ab792ea", "md5sum": "29119f8fa11712704c6b22ac5ab792ea",
"name": "Vicuna", "name": "Vicuna",
"filename": "ggml-vicuna-7b-1.1-q4_2.bin", "filename": "ggml-vicuna-7b-1.1-q4_2.bin",
@ -145,11 +128,10 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_2", "quant": "q4_2",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Good small model - trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>" "description": "<strong>Good small model - trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
}, },
{ {
"order": "l", "order": "k",
"md5sum": "95999b7b0699e2070af63bf5d34101a8", "md5sum": "95999b7b0699e2070af63bf5d34101a8",
"name": "Vicuna (large)", "name": "Vicuna (large)",
"filename": "ggml-vicuna-13b-1.1-q4_2.bin", "filename": "ggml-vicuna-13b-1.1-q4_2.bin",
@ -158,11 +140,10 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_2", "quant": "q4_2",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Good larger model - trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>" "description": "<strong>Good larger model - trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
}, },
{ {
"order": "m", "order": "l",
"md5sum": "99e6d129745a3f1fb1121abed747b05a", "md5sum": "99e6d129745a3f1fb1121abed747b05a",
"name": "Wizard", "name": "Wizard",
"filename": "ggml-wizardLM-7B.q4_2.bin", "filename": "ggml-wizardLM-7B.q4_2.bin",
@ -171,11 +152,10 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_2", "quant": "q4_2",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Good small model - trained by by Microsoft and Peking University</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>" "description": "<strong>Good small model - trained by by Microsoft and Peking University</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
}, },
{ {
"order": "n", "order": "m",
"md5sum": "6cb4ee297537c9133bddab9692879de0", "md5sum": "6cb4ee297537c9133bddab9692879de0",
"name": "Stable Vicuna", "name": "Stable Vicuna",
"filename": "ggml-stable-vicuna-13B.q4_2.bin", "filename": "ggml-stable-vicuna-13B.q4_2.bin",
@ -188,7 +168,7 @@
"systemPrompt": "## Assistant: I am StableVicuna, a large language model created by CarperAI. I am here to chat!\n\n" "systemPrompt": "## Assistant: I am StableVicuna, a large language model created by CarperAI. I am here to chat!\n\n"
}, },
{ {
"order": "o", "order": "n",
"md5sum": "1cfa4958f489f0a0d1ffdf6b37322809", "md5sum": "1cfa4958f489f0a0d1ffdf6b37322809",
"name": "MPT Instruct", "name": "MPT Instruct",
"filename": "ggml-mpt-7b-instruct.bin", "filename": "ggml-mpt-7b-instruct.bin",
@ -198,11 +178,10 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "MPT", "type": "MPT",
"systemPrompt": " ",
"description": "<strong>Mosaic's instruction model</strong><br><ul><li>Instruction based<li>Trained by Mosaic ML<li>Licensed for commercial use</ul>" "description": "<strong>Mosaic's instruction model</strong><br><ul><li>Instruction based<li>Trained by Mosaic ML<li>Licensed for commercial use</ul>"
}, },
{ {
"order": "p", "order": "o",
"md5sum": "120c32a51d020066288df045ef5d52b9", "md5sum": "120c32a51d020066288df045ef5d52b9",
"name": "MPT Base", "name": "MPT Base",
"filename": "ggml-mpt-7b-base.bin", "filename": "ggml-mpt-7b-base.bin",
@ -212,11 +191,10 @@
"parameters": "7 billion", "parameters": "7 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "MPT", "type": "MPT",
"systemPrompt": " ",
"description": "<strong>Trained for text completion with no assistant finetuning</strong><br><ul><li>Completion based<li>Trained by Mosaic ML<li>Licensed for commercial use</ul>" "description": "<strong>Trained for text completion with no assistant finetuning</strong><br><ul><li>Completion based<li>Trained by Mosaic ML<li>Licensed for commercial use</ul>"
}, },
{ {
"order": "q", "order": "p",
"md5sum": "d5eafd5b0bd0d615cfd5fd763f642dfe", "md5sum": "d5eafd5b0bd0d615cfd5fd763f642dfe",
"name": "Nous Vicuna", "name": "Nous Vicuna",
"filename": "ggml-nous-gpt4-vicuna-13b.bin", "filename": "ggml-nous-gpt4-vicuna-13b.bin",
@ -225,11 +203,10 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Trained on ~180,000 instructions</strong><br><ul><li>Instruction based<li>Trained by Nous Research<li>Cannot be used commercially</ul>" "description": "<strong>Trained on ~180,000 instructions</strong><br><ul><li>Instruction based<li>Trained by Nous Research<li>Cannot be used commercially</ul>"
}, },
{ {
"order": "r", "order": "q",
"md5sum": "489d21fd48840dcb31e5f92f453f3a20", "md5sum": "489d21fd48840dcb31e5f92f453f3a20",
"name": "Wizard Uncensored", "name": "Wizard Uncensored",
"filename": "wizardLM-13B-Uncensored.ggmlv3.q4_0.bin", "filename": "wizardLM-13B-Uncensored.ggmlv3.q4_0.bin",
@ -239,12 +216,11 @@
"parameters": "13 billion", "parameters": "13 billion",
"quant": "q4_0", "quant": "q4_0",
"type": "LLaMA", "type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Trained on uncensored assistant data and instruction data</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>", "description": "<strong>Trained on uncensored assistant data and instruction data</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>",
"url": "https://huggingface.co/TheBloke/WizardLM-13B-Uncensored-GGML/resolve/main/wizardLM-13B-Uncensored.ggmlv3.q4_0.bin" "url": "https://huggingface.co/TheBloke/WizardLM-13B-Uncensored-GGML/resolve/main/wizardLM-13B-Uncensored.ggmlv3.q4_0.bin"
}, },
{ {
"order": "s", "order": "r",
"md5sum": "615890cb571fcaa0f70b2f8d15ef809e", "md5sum": "615890cb571fcaa0f70b2f8d15ef809e",
"disableGUI": "true", "disableGUI": "true",
"name": "Replit", "name": "Replit",
@ -255,23 +231,7 @@
"parameters": "3 billion", "parameters": "3 billion",
"quant": "f16", "quant": "f16",
"type": "Replit", "type": "Replit",
"systemPrompt": " ",
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use</ul>", "description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use</ul>",
"url": "https://huggingface.co/nomic-ai/ggml-replit-code-v1-3b/resolve/main/ggml-replit-code-v1-3b.bin" "url": "https://huggingface.co/nomic-ai/ggml-replit-code-v1-3b/resolve/main/ggml-replit-code-v1-3b.bin"
},
{
"order": "t",
"md5sum": "031bb5d5722c08d13e3e8eaf55c37391",
"disableGUI": "true",
"name": "Bert",
"filename": "ggml-all-MiniLM-L6-v2-f16.bin",
"filesize": "45521167",
"requires": "2.4.14",
"ramrequired": "1",
"parameters": "1 million",
"quant": "f16",
"type": "Bert",
"systemPrompt": " ",
"description": "<strong>Sbert</strong><br><ul><li>For embeddings"
} }
] ]

View File

@ -416,40 +416,6 @@
* Akarshan Biswas * Akarshan Biswas
* Adam Treat (Nomic AI) * Adam Treat (Nomic AI)
* Community (beta testers, bug reporters) * Community (beta testers, bug reporters)
"
},
{
"version": "2.4.13",
"notes":
"
* Fix bug with prolonging shutdown with generation
* Fix bug with update model info on deleting chats
* Fix bug with preventing closing of model download dialog
* Allows allow closing the model download dialog
* Fix numerous bugs with download of models.json and provide backup option
* Add json and c# highlighting
* Fix bug with chatgpt crashing
* Fix bug with chatgpt not working for some keys
* Fix bug with mixpanel opt outs not counting
* Fix problem with OOM errors causing crash and then repeating on next start
* Fix default thread setting and provide guardrails
* Fix tap handler in settings dialog for buttons
* Fix color of some text fields on macOS for settings dialog
* Fix problem with startup dialog not closing
* Provide error dialog for settings file not accessible
* Try and fix problems with avx-only detection
* Fix showing error in model downloads unnecessarily
* Prefer 7b models to load by default
* Add Wizard v1.1 to download list
* Rename Orca models to Mini Orca
* Don't use a system prompt unless model was trained with one by default
",
"contributors":
"
* Lakshay Kansal (Nomic AI)
* Aaron Miller (Nomic AI)
* Adam Treat (Nomic AI)
* Community (beta testers, bug reporters)
" "
} }
] ]

View File

@ -161,6 +161,16 @@ int InstalledModels::count() const
return rowCount(); return rowCount();
} }
QString InstalledModels::firstId() const
{
if (rowCount() > 0) {
QModelIndex firstIndex = index(0, 0);
return sourceModel()->data(firstIndex, ModelList::IdRole).toString();
} else {
return QString();
}
}
DownloadableModels::DownloadableModels(QObject *parent) DownloadableModels::DownloadableModels(QObject *parent)
: QSortFilterProxyModel(parent) : QSortFilterProxyModel(parent)
, m_expanded(false) , m_expanded(false)
@ -212,7 +222,6 @@ ModelList::ModelList()
: QAbstractListModel(nullptr) : QAbstractListModel(nullptr)
, m_installedModels(new InstalledModels(this)) , m_installedModels(new InstalledModels(this))
, m_downloadableModels(new DownloadableModels(this)) , m_downloadableModels(new DownloadableModels(this))
, m_asyncModelRequestOngoing(false)
{ {
m_installedModels->setSourceModel(this); m_installedModels->setSourceModel(this);
m_downloadableModels->setSourceModel(this); m_downloadableModels->setSourceModel(this);
@ -288,9 +297,12 @@ ModelInfo ModelList::defaultModelInfo() const
settings.sync(); settings.sync();
// The user default model can be set by the user in the settings dialog. The "default" user // The user default model can be set by the user in the settings dialog. The "default" user
// default model is "Application default" which signals we should use the logic here. // default model is "Application default" which signals we should use the default model that was
// specified by the models.json file.
const QString userDefaultModelName = MySettings::globalInstance()->userDefaultModel(); const QString userDefaultModelName = MySettings::globalInstance()->userDefaultModel();
const bool hasUserDefaultName = !userDefaultModelName.isEmpty() && userDefaultModelName != "Application default"; const bool hasUserDefaultName = !userDefaultModelName.isEmpty() && userDefaultModelName != "Application default";
const QString defaultModelName = settings.value("defaultModel").toString();
const bool hasDefaultName = hasUserDefaultName ? false : !defaultModelName.isEmpty();
ModelInfo *defaultModel = nullptr; ModelInfo *defaultModel = nullptr;
for (ModelInfo *info : m_models) { for (ModelInfo *info : m_models) {
@ -298,10 +310,12 @@ ModelInfo ModelList::defaultModelInfo() const
continue; continue;
defaultModel = info; defaultModel = info;
const size_t ramrequired = defaultModel->ramrequired; // If we don't have either setting, then just use the first model that is installed
if (!hasUserDefaultName && !hasDefaultName)
break;
// If we don't have either setting, then just use the first model that requires less than 16GB that is installed // If we don't have a user specified default, but *do* have a default setting and match, then use it
if (!hasUserDefaultName && !info->isChatGPT && ramrequired > 0 && ramrequired < 16) if (!hasUserDefaultName && hasDefaultName && (defaultModel->id() == defaultModelName))
break; break;
// If we have a user specified default and match, then use it // If we have a user specified default and match, then use it
@ -821,7 +835,7 @@ void ModelList::updateModelsFromDirectory()
for (const QString &id : modelsById) { for (const QString &id : modelsById) {
updateData(id, FilenameRole, filename); updateData(id, FilenameRole, filename);
updateData(id, ChatGPTRole, filename.startsWith("chatgpt-")); updateData(id, ChatGPTRole, filename.startsWith("chatgpt-"));
updateData(id, DirpathRole, info.dir().absolutePath() + "/"); updateData(id, DirpathRole, path);
updateData(id, FilesizeRole, toFileSize(info.size())); updateData(id, FilesizeRole, toFileSize(info.size()));
} }
} }
@ -832,6 +846,14 @@ void ModelList::updateModelsFromDirectory()
processDirectory(exePath); processDirectory(exePath);
if (localPath != exePath) if (localPath != exePath)
processDirectory(localPath); processDirectory(localPath);
if (installedModels()->count()) {
const QString firstModel =
installedModels()->firstId();
QSettings settings;
settings.setValue("defaultModel", firstModel);
settings.sync();
}
} }
void ModelList::updateModelsFromJson() void ModelList::updateModelsFromJson()
@ -877,9 +899,6 @@ void ModelList::updateModelsFromJson()
void ModelList::updateModelsFromJsonAsync() void ModelList::updateModelsFromJsonAsync()
{ {
m_asyncModelRequestOngoing = true;
emit asyncModelRequestOngoingChanged();
#if defined(USE_LOCAL_MODELSJSON) #if defined(USE_LOCAL_MODELSJSON)
QUrl jsonUrl("file://" + QDir::homePath() + "/dev/large_language_models/gpt4all/gpt4all-chat/metadata/models.json"); QUrl jsonUrl("file://" + QDir::homePath() + "/dev/large_language_models/gpt4all/gpt4all-chat/metadata/models.json");
#else #else
@ -892,37 +911,17 @@ void ModelList::updateModelsFromJsonAsync()
QNetworkReply *jsonReply = m_networkManager.get(request); QNetworkReply *jsonReply = m_networkManager.get(request);
connect(qApp, &QCoreApplication::aboutToQuit, jsonReply, &QNetworkReply::abort); connect(qApp, &QCoreApplication::aboutToQuit, jsonReply, &QNetworkReply::abort);
connect(jsonReply, &QNetworkReply::finished, this, &ModelList::handleModelsJsonDownloadFinished); connect(jsonReply, &QNetworkReply::finished, this, &ModelList::handleModelsJsonDownloadFinished);
connect(jsonReply, &QNetworkReply::errorOccurred, this, &ModelList::handleModelsJsonDownloadErrorOccurred);
} }
void ModelList::handleModelsJsonDownloadFinished() void ModelList::handleModelsJsonDownloadFinished()
{ {
QNetworkReply *jsonReply = qobject_cast<QNetworkReply *>(sender()); QNetworkReply *jsonReply = qobject_cast<QNetworkReply *>(sender());
if (!jsonReply) { if (!jsonReply)
m_asyncModelRequestOngoing = false;
emit asyncModelRequestOngoingChanged();
return; return;
}
QByteArray jsonData = jsonReply->readAll(); QByteArray jsonData = jsonReply->readAll();
jsonReply->deleteLater(); jsonReply->deleteLater();
parseModelsJsonFile(jsonData, true); parseModelsJsonFile(jsonData, true);
m_asyncModelRequestOngoing = false;
emit asyncModelRequestOngoingChanged();
}
void ModelList::handleModelsJsonDownloadErrorOccurred(QNetworkReply::NetworkError code)
{
// TODO: Show what error occurred in the GUI
m_asyncModelRequestOngoing = false;
emit asyncModelRequestOngoingChanged();
QNetworkReply *reply = qobject_cast<QNetworkReply *>(sender());
if (!reply)
return;
qWarning() << QString("ERROR: Modellist download failed with error code \"%1-%2\"")
.arg(code).arg(reply->errorString()).toStdString();
} }
void ModelList::handleSslErrors(QNetworkReply *reply, const QList<QSslError> &errors) void ModelList::handleSslErrors(QNetworkReply *reply, const QList<QSslError> &errors)
@ -1109,6 +1108,14 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
updateData(id, ModelList::QuantRole, "NA"); updateData(id, ModelList::QuantRole, "NA");
updateData(id, ModelList::TypeRole, "GPT"); updateData(id, ModelList::TypeRole, "GPT");
} }
if (installedModels()->count()) {
const QString firstModel =
installedModels()->firstId();
QSettings settings;
settings.setValue("defaultModel", firstModel);
settings.sync();
}
} }
void ModelList::updateModelsFromSettings() void ModelList::updateModelsFromSettings()

View File

@ -127,6 +127,7 @@ class InstalledModels : public QSortFilterProxyModel
public: public:
explicit InstalledModels(QObject *parent); explicit InstalledModels(QObject *parent);
int count() const; int count() const;
QString firstId() const;
Q_SIGNALS: Q_SIGNALS:
void countChanged(); void countChanged();
@ -168,7 +169,6 @@ class ModelList : public QAbstractListModel
Q_PROPERTY(InstalledModels* installedModels READ installedModels NOTIFY installedModelsChanged) Q_PROPERTY(InstalledModels* installedModels READ installedModels NOTIFY installedModelsChanged)
Q_PROPERTY(DownloadableModels* downloadableModels READ downloadableModels NOTIFY downloadableModelsChanged) Q_PROPERTY(DownloadableModels* downloadableModels READ downloadableModels NOTIFY downloadableModelsChanged)
Q_PROPERTY(QList<QString> userDefaultModelList READ userDefaultModelList NOTIFY userDefaultModelListChanged) Q_PROPERTY(QList<QString> userDefaultModelList READ userDefaultModelList NOTIFY userDefaultModelListChanged)
Q_PROPERTY(bool asyncModelRequestOngoing READ asyncModelRequestOngoing NOTIFY asyncModelRequestOngoingChanged)
public: public:
static ModelList *globalInstance(); static ModelList *globalInstance();
@ -296,14 +296,12 @@ public:
} }
QString incompleteDownloadPath(const QString &modelFile); QString incompleteDownloadPath(const QString &modelFile);
bool asyncModelRequestOngoing() const { return m_asyncModelRequestOngoing; }
Q_SIGNALS: Q_SIGNALS:
void countChanged(); void countChanged();
void installedModelsChanged(); void installedModelsChanged();
void downloadableModelsChanged(); void downloadableModelsChanged();
void userDefaultModelListChanged(); void userDefaultModelListChanged();
void asyncModelRequestOngoingChanged();
private Q_SLOTS: private Q_SLOTS:
void updateModelsFromJson(); void updateModelsFromJson();
@ -312,7 +310,6 @@ private Q_SLOTS:
void updateModelsFromDirectory(); void updateModelsFromDirectory();
void updateDataForSettings(); void updateDataForSettings();
void handleModelsJsonDownloadFinished(); void handleModelsJsonDownloadFinished();
void handleModelsJsonDownloadErrorOccurred(QNetworkReply::NetworkError code);
void handleSslErrors(QNetworkReply *reply, const QList<QSslError> &errors); void handleSslErrors(QNetworkReply *reply, const QList<QSslError> &errors);
private: private:
@ -331,7 +328,6 @@ private:
QList<ModelInfo*> m_models; QList<ModelInfo*> m_models;
QHash<QString, ModelInfo*> m_modelMap; QHash<QString, ModelInfo*> m_modelMap;
QFileSystemWatcher *m_watcher; QFileSystemWatcher *m_watcher;
bool m_asyncModelRequestOngoing;
private: private:
explicit ModelList(); explicit ModelList();

View File

@ -41,7 +41,7 @@ MyDialog {
} }
Label { Label {
visible: !ModelList.downloadableModels.count && !ModelList.asyncModelRequestOngoing visible: !ModelList.downloadableModels.count
Layout.fillWidth: true Layout.fillWidth: true
Layout.fillHeight: true Layout.fillHeight: true
horizontalAlignment: Qt.AlignHCenter horizontalAlignment: Qt.AlignHCenter
@ -50,15 +50,6 @@ MyDialog {
color: theme.mutedTextColor color: theme.mutedTextColor
} }
MyBusyIndicator {
visible: !ModelList.downloadableModels.count && ModelList.asyncModelRequestOngoing
running: ModelList.asyncModelRequestOngoing
Accessible.role: Accessible.Animation
Layout.alignment: Qt.AlignCenter
Accessible.name: qsTr("Busy indicator")
Accessible.description: qsTr("Displayed when the models request is ongoing")
}
ScrollView { ScrollView {
id: scrollView id: scrollView
ScrollBar.vertical.policy: ScrollBar.AlwaysOn ScrollBar.vertical.policy: ScrollBar.AlwaysOn

View File

@ -18,9 +18,6 @@ enum Language {
Go, Go,
Json, Json,
Csharp, Csharp,
Latex,
Html,
Php
}; };
static QColor keywordColor = "#2e95d3"; // blue static QColor keywordColor = "#2e95d3"; // blue
@ -36,11 +33,6 @@ static QColor commandColor = functionCallColor;
static QColor variableColor = numberColor; static QColor variableColor = numberColor;
static QColor keyColor = functionColor; static QColor keyColor = functionColor;
static QColor valueColor = stringColor; static QColor valueColor = stringColor;
static QColor parameterColor = stringColor;
static QColor attributeNameColor = numberColor;
static QColor attributeValueColor = stringColor;
static QColor specialCharacterColor = functionColor;
static QColor doctypeColor = commentColor;
static Language stringToLanguage(const QString &language) static Language stringToLanguage(const QString &language)
{ {
@ -70,12 +62,6 @@ static Language stringToLanguage(const QString &language)
return Go; return Go;
if (language == "json") if (language == "json")
return Json; return Json;
if (language == "latex")
return Latex;
if (language == "html")
return Html;
if (language == "php")
return Php;
return None; return None;
} }
@ -575,135 +561,6 @@ static QVector<HighlightingRule> bashHighlightingRules()
return highlightingRules; return highlightingRules;
} }
static QVector<HighlightingRule> latexHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
if (highlightingRules.isEmpty()) {
HighlightingRule rule;
QTextCharFormat commandFormat;
commandFormat.setForeground(commandColor); // commandColor needs to be set to your liking
rule.pattern = QRegularExpression("\\\\[A-Za-z]+"); // Pattern for LaTeX commands
rule.format = commandFormat;
highlightingRules.append(rule);
QTextCharFormat commentFormat;
commentFormat.setForeground(commentColor); // commentColor needs to be set to your liking
rule.pattern = QRegularExpression("%[^\n]*"); // Pattern for LaTeX comments
rule.format = commentFormat;
highlightingRules.append(rule);
}
return highlightingRules;
}
static QVector<HighlightingRule> htmlHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
if (highlightingRules.isEmpty()) {
HighlightingRule rule;
QTextCharFormat attributeNameFormat;
attributeNameFormat.setForeground(attributeNameColor);
rule.pattern = QRegularExpression("\\b(\\w+)\\s*=");
rule.format = attributeNameFormat;
highlightingRules.append(rule);
QTextCharFormat attributeValueFormat;
attributeValueFormat.setForeground(attributeValueColor);
rule.pattern = QRegularExpression("\".*?\"|'.*?'");
rule.format = attributeValueFormat;
highlightingRules.append(rule);
QTextCharFormat commentFormat;
commentFormat.setForeground(commentColor);
rule.pattern = QRegularExpression("<!--.*?-->");
rule.format = commentFormat;
highlightingRules.append(rule);
QTextCharFormat specialCharacterFormat;
specialCharacterFormat.setForeground(specialCharacterColor);
rule.pattern = QRegularExpression("&[a-zA-Z0-9#]*;");
rule.format = specialCharacterFormat;
highlightingRules.append(rule);
QTextCharFormat doctypeFormat;
doctypeFormat.setForeground(doctypeColor);
rule.pattern = QRegularExpression("<!DOCTYPE.*?>");
rule.format = doctypeFormat;
highlightingRules.append(rule);
}
return highlightingRules;
}
static QVector<HighlightingRule> phpHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
if (highlightingRules.isEmpty()) {
HighlightingRule rule;
QTextCharFormat functionCallFormat;
functionCallFormat.setForeground(functionCallColor);
rule.pattern = QRegularExpression("\\b(\\w+)\\s*(?=\\()");
rule.format = functionCallFormat;
highlightingRules.append(rule);
QTextCharFormat functionFormat;
functionFormat.setForeground(functionColor);
rule.pattern = QRegularExpression("\\bfunction\\s+(\\w+)\\b");
rule.format = functionFormat;
highlightingRules.append(rule);
QTextCharFormat numberFormat;
numberFormat.setForeground(numberColor);
rule.pattern = QRegularExpression("\\b[0-9]*\\.?[0-9]+\\b");
rule.format = numberFormat;
highlightingRules.append(rule);
QTextCharFormat keywordFormat;
keywordFormat.setForeground(keywordColor);
QStringList keywordPatterns = {
"\\bif\\b", "\\belse\\b", "\\belseif\\b", "\\bwhile\\b", "\\bfor\\b",
"\\bforeach\\b", "\\breturn\\b", "\\bprint\\b", "\\binclude\\b", "\\brequire\\b",
"\\binclude_once\\b", "\\brequire_once\\b", "\\btry\\b", "\\bcatch\\b",
"\\bfinally\\b", "\\bcontinue\\b", "\\bbreak\\b", "\\bclass\\b", "\\bfunction\\b",
"\\bnew\\b", "\\bthrow\\b", "\\barray\\b", "\\bpublic\\b", "\\bprivate\\b",
"\\bprotected\\b", "\\bstatic\\b", "\\bglobal\\b", "\\bisset\\b", "\\bunset\\b",
"\\bnull\\b", "\\btrue\\b", "\\bfalse\\b"
};
for (const QString &pattern : keywordPatterns) {
rule.pattern = QRegularExpression(pattern);
rule.format = keywordFormat;
highlightingRules.append(rule);
}
QTextCharFormat stringFormat;
stringFormat.setForeground(stringColor);
rule.pattern = QRegularExpression("\".*?\"");
rule.format = stringFormat;
highlightingRules.append(rule);
rule.pattern = QRegularExpression("\'.*?\'");
rule.format = stringFormat;
highlightingRules.append(rule);
QTextCharFormat commentFormat;
commentFormat.setForeground(commentColor);
rule.pattern = QRegularExpression("//[^\n]*");
rule.format = commentFormat;
highlightingRules.append(rule);
rule.pattern = QRegularExpression("/\\*.*?\\*/");
rule.format = commentFormat;
highlightingRules.append(rule);
}
return highlightingRules;
}
static QVector<HighlightingRule> jsonHighlightingRules() static QVector<HighlightingRule> jsonHighlightingRules()
{ {
static QVector<HighlightingRule> highlightingRules; static QVector<HighlightingRule> highlightingRules;
@ -759,12 +616,6 @@ void SyntaxHighlighter::highlightBlock(const QString &text)
rules = javaHighlightingRules(); rules = javaHighlightingRules();
else if (block.userState() == Json) else if (block.userState() == Json)
rules = jsonHighlightingRules(); rules = jsonHighlightingRules();
else if (block.userState() == Latex)
rules = latexHighlightingRules();
else if (block.userState() == Html)
rules = htmlHighlightingRules();
else if (block.userState() == Php)
rules = phpHighlightingRules();
for (const HighlightingRule &rule : qAsConst(rules)) { for (const HighlightingRule &rule : qAsConst(rules)) {
QRegularExpressionMatchIterator matchIterator = rule.pattern.globalMatch(text); QRegularExpressionMatchIterator matchIterator = rule.pattern.globalMatch(text);
@ -970,10 +821,7 @@ void ResponseText::handleCodeBlocks()
|| firstWord == "java" || firstWord == "java"
|| firstWord == "go" || firstWord == "go"
|| firstWord == "golang" || firstWord == "golang"
|| firstWord == "json" || firstWord == "json") {
|| firstWord == "latex"
|| firstWord == "html"
|| firstWord == "php") {
codeLanguage = firstWord; codeLanguage = firstWord;
capturedText.remove(0, match.captured(0).length()); capturedText.remove(0, match.captured(0).length());
} }

View File

@ -1,49 +0,0 @@
{
"train_batch_size": "auto",
"gradient_accumulation_steps": "auto",
"train_micro_batch_size_per_gpu": "auto",
"fp16": {
"enabled": "auto",
"min_loss_scale": 1,
"loss_scale_window": 1000,
"hysteresis": 2,
"initial_scale_power": 32
},
"bf16": {
"enabled": "auto"
},
"gradient_clipping": 1.0,
"zero_optimization": {
"stage": 1,
"offload_param": {
"device": "none"
},
"offload_optimizer": {
"device": "none"
},
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"contiguous_gradients": true
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": "auto",
"betas": [
0.9,
0.999
],
"eps": 1e-08
}
},
"scheduler": {
"type": "WarmupDecayLR",
"params": {
"warmup_min_lr": 0,
"warmup_max_lr": "auto",
"warmup_num_steps": "auto",
"warmup_type": "linear",
"total_num_steps": "auto"
}
}
}

View File

@ -1,48 +0,0 @@
{
"train_batch_size": "auto",
"gradient_accumulation_steps": "auto",
"train_micro_batch_size_per_gpu": "auto",
"fp16": {
"enabled": "auto",
"min_loss_scale": 1,
"loss_scale_window": 1000,
"hysteresis": 2,
"initial_scale_power": 32
},
"bf16": {
"enabled": "auto"
},
"gradient_clipping": 1.0,
"zero_optimization": {
"stage": 2,
"offload_param": {
"device": "none"
},
"offload_optimizer": {
"device": "none"
},
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"contiguous_gradients": true
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": "auto",
"betas": [
0.9,
0.999
],
"eps": 1e-08
}
},
"scheduler": {
"type": "WarmupLR",
"params": {
"warmup_min_lr": 0,
"warmup_max_lr": "auto",
"warmup_num_steps": "auto",
"warmup_type": "linear"
}
}
}

View File

@ -1,34 +0,0 @@
# model/tokenizer
model_name: "tiiuae/falcon-7b"
tokenizer_name: "tiiuae/falcon-7b"
gradient_checkpointing: true
save_name: "nomic-ai/gpt4all-falcon"
# dataset
streaming: false
num_proc: 64
dataset_path: "nomic-ai/gpt4all-j-prompt-generations"
revision: "v1.3-groovy"
max_length: 1024
batch_size: 32
# train dynamics
lr: 2.0e-5
min_lr: 0
weight_decay: 0.0
eval_every: 500
eval_steps: 105
save_every: 1000
log_grads_every: 500
output_dir: "ckpts/falcon"
checkpoint: "/home/paperspace/gpt4all/ckpts/mpt/step_1000"
lora: false
warmup_steps: 500
num_epochs: 2
# logging
wandb: true
wandb_entity: "gpt4all"
wandb_project_name: "gpt4all"
seed: 42

View File

@ -1,34 +0,0 @@
# model/tokenizer
model_name: "mosaicml/mpt-7b"
tokenizer_name: "mosaicml/mpt-7b"
gradient_checkpointing: false
save_name: "nomic-ai/mpt-finetuned-round2"
# dataset
streaming: false
num_proc: 64
dataset_path: "nomic-ai/gpt4all-j-prompt-generations"
revision: "v1.3-groovy"
max_length: 1024
batch_size: 8
# train dynamics
lr: 2.0e-5
min_lr: 0
weight_decay: 0.0
eval_every: 500
eval_steps: 105
save_every: 1000
log_grads_every: 500
output_dir: "ckpts/mpt"
checkpoint: null
lora: false
warmup_steps: 500
num_epochs: 2
# logging
wandb: false
wandb_entity: "gpt4all"
wandb_project_name: "gpt4all"
seed: 42

View File

@ -1,34 +0,0 @@
# model/tokenizer
model_name: "openlm-research/open_llama_7b"
tokenizer_name: "openlm-research/open_llama_7b"
gradient_checkpointing: true
save_name: "nomic-ai/gpt4all-openllama"
# dataset
streaming: false
num_proc: 64
dataset_path: "nomic-ai/gpt4all-updated"
revision: null
max_length: 1024
batch_size: 32
# train dynamics
lr: 2.0e-5
min_lr: 0
weight_decay: 0.0
eval_every: 500
log_every: 10
save_every: 1000
log_grads_every: 500
output_dir: "ckpts/falcon"
checkpoint: null
lora: false
warmup_steps: 500
num_epochs: 3
# logging
wandb: true
wandb_entity: "gpt4all"
wandb_project_name: "gpt4all"
seed: 42

View File

@ -12,7 +12,7 @@ def tokenize_inputs(config, tokenizer, examples):
# hacky backward compatible # hacky backward compatible
different_eos = tokenizer.eos_token != "</s>" different_eos = tokenizer.eos_token != "</s>"
out = {"labels": [], "input_ids": [], "attention_mask": []} out = {"labels": [], "input_ids": []}
for prompt, response in zip(examples["prompt"], examples["response"]): for prompt, response in zip(examples["prompt"], examples["response"]):
if different_eos: if different_eos:
if response.count("</s> \n") > 0: if response.count("</s> \n") > 0:
@ -49,10 +49,9 @@ def tokenize_inputs(config, tokenizer, examples):
print(response) print(response)
raise raise
padded = tokenizer.pad({"input_ids": input_tokens}, padding="max_length", max_length=max_length, return_tensors="pt") input_tokens = tokenizer.pad({"input_ids": input_tokens}, padding="max_length", max_length=max_length)["input_ids"]
out["labels"].append(labels) out["labels"].append(labels)
out["input_ids"].append(padded["input_ids"]) out["input_ids"].append(input_tokens)
out["attention_mask"].append(padded["attention_mask"])
out = {k: torch.stack(v) if isinstance(v, list) else v for k, v in out.items()} out = {k: torch.stack(v) if isinstance(v, list) else v for k, v in out.items()}
@ -73,7 +72,7 @@ def load_data(config, tokenizer):
dataset = load_dataset("json", data_files=files, split="train") dataset = load_dataset("json", data_files=files, split="train")
else: else:
dataset = load_dataset(dataset_path, split="train", revision=config["revision"] if "revision" in config else None) dataset = load_dataset(dataset_path, split="train")
dataset = dataset.train_test_split(test_size=.05, seed=config["seed"]) dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])
@ -84,23 +83,19 @@ def load_data(config, tokenizer):
else: else:
kwargs = {} kwargs = {}
cols_to_keep = ["input_ids", "labels", "attention_mask"]
# tokenize inputs and return labels and attention mask # tokenize inputs and return labels and attention mask
train_dataset = train_dataset.map( train_dataset = train_dataset.map(
lambda ele: tokenize_inputs(config, tokenizer, ele), lambda ele: tokenize_inputs(config, tokenizer, ele),
batched=True, batched=True,
remove_columns=["source", "prompt"],
**kwargs **kwargs
) )
remove_cols = [col for col in train_dataset.column_names if col not in cols_to_keep]
train_dataset = train_dataset.remove_columns(remove_cols)
val_dataset = val_dataset.map( val_dataset = val_dataset.map(
lambda ele: tokenize_inputs(config, tokenizer, ele), lambda ele: tokenize_inputs(config, tokenizer, ele),
batched=True, batched=True,
remove_columns=["source", "prompt"],
**kwargs **kwargs
) )
remove_cols = [col for col in val_dataset.column_names if col not in cols_to_keep]
val_dataset = val_dataset.remove_columns(remove_cols)
train_dataset = train_dataset.with_format("torch") train_dataset = train_dataset.with_format("torch")
val_dataset = val_dataset.with_format("torch") val_dataset = val_dataset.with_format("torch")
@ -111,14 +106,12 @@ def load_data(config, tokenizer):
train_dataset, train_dataset,
collate_fn=DefaultDataCollator(), collate_fn=DefaultDataCollator(),
batch_size=config["batch_size"], batch_size=config["batch_size"],
shuffle=True,
) )
val_dataloader = DataLoader( val_dataloader = DataLoader(
val_dataset, val_dataset,
collate_fn=DefaultDataCollator(), collate_fn=DefaultDataCollator(),
batch_size=config["batch_size"], batch_size=config["batch_size"],
shuffle=True,
) )
return train_dataloader, val_dataloader return train_dataloader, val_dataloader

View File

@ -1,10 +1,10 @@
accelerate accelerate
datasets datasets
einops
torchmetrics torchmetrics
evaluate evaluate
transformers>=4.28.0 transformers>=4.28.0
wandb wandb
pip
peft peft
nodelist-inflator nodelist-inflator
deepspeed deepspeed

View File

@ -1,5 +1,5 @@
import os import os
from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler, LlamaForCausalLM
import torch import torch
from torch.optim import AdamW from torch.optim import AdamW
from argparse import ArgumentParser from argparse import ArgumentParser
@ -42,7 +42,7 @@ def train(accelerator, config):
accelerator.print(config) accelerator.print(config)
accelerator.print(f"Using {accelerator.num_processes} GPUs") accelerator.print(f"Using {accelerator.num_processes} GPUs")
tokenizer = AutoTokenizer.from_pretrained(config['tokenizer_name'], model_max_length=config['max_length'], use_fast=False) tokenizer = AutoTokenizer.from_pretrained(config['tokenizer_name'], model_max_length=config['max_length'])
# if no pad token, set it to eos # if no pad token, set it to eos
if tokenizer.pad_token is None: if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token = tokenizer.eos_token
@ -53,7 +53,6 @@ def train(accelerator, config):
checkpoint = config["gradient_checkpointing"] checkpoint = config["gradient_checkpointing"]
model = AutoModelForCausalLM.from_pretrained(config["model_name"], model = AutoModelForCausalLM.from_pretrained(config["model_name"],
use_cache=False if checkpoint else True, use_cache=False if checkpoint else True,
trust_remote_code=True) trust_remote_code=True)
@ -87,7 +86,7 @@ def train(accelerator, config):
# decay to min_lr instead of 0 # decay to min_lr instead of 0
lr_ratio = config["min_lr"] / config["lr"] lr_ratio = config["min_lr"] / config["lr"]
accelerator.print(f"Len of train_dataloader: {len(train_dataloader)}") accelerator.print(f"Len of train_dataloader: {len(train_dataloader)}")
total_num_steps = (len(train_dataloader) / gradient_accumulation_steps) * (config["num_epochs"]) total_num_steps = (len(train_dataloader) / gradient_accumulation_steps) * config["num_epochs"]
# instead of decaying to zero, decay to ratio of min_lr / lr # instead of decaying to zero, decay to ratio of min_lr / lr
total_num_steps += int(total_num_steps * lr_ratio) + config["warmup_steps"] total_num_steps += int(total_num_steps * lr_ratio) + config["warmup_steps"]
accelerator.print(f"Total training steps: {total_num_steps}") accelerator.print(f"Total training steps: {total_num_steps}")
@ -105,7 +104,7 @@ def train(accelerator, config):
) )
else: else:
scheduler = DummyScheduler( scheduler = DummyScheduler(
optimizer, total_num_steps=total_num_steps, warmup_num_steps=config["warmup_steps"] optimizer, total_num_steps=config["warmup_steps"], warmup_num_steps=config["warmup_steps"]
) )
model, optimizer, train_dataloader, val_dataloader, scheduler = accelerator.prepare( model, optimizer, train_dataloader, val_dataloader, scheduler = accelerator.prepare(
@ -118,34 +117,26 @@ def train(accelerator, config):
if config["checkpoint"]: if config["checkpoint"]:
accelerator.load_state(config["checkpoint"]) accelerator.load_state(config["checkpoint"])
accelerator.print(f"Resumed from checkpoint: {config['checkpoint']}") accelerator.print(f"Resumed from checkpoint: {config['checkpoint']}")
path = os.path.basename(config["checkpoint"]) path = os.path.basename(config["train_args"]["resume_from_checkpoint"])
training_difference = os.path.splitext(path)[0] training_difference = os.path.splitext(path)[0]
resume_step = int(training_difference.replace("step_", "")) resume_step = int(training_difference.replace("step_", ""))
train_dataloader = accelerator.skip_first_batches(train_dataloader, resume_step) accelerator.skip_first_batches(train_dataloader, resume_step)
accelerator.print(f"Resuming from step {resume_step}") accelerator.print(f"Resuming from step {resume_step}")
else:
resume_step = 0
# log gradients # log gradients
if accelerator.is_main_process and config["wandb"]: if accelerator.is_main_process and config["wandb"]:
wandb.watch(model, log_freq=config["log_grads_every"], log="all") wandb.watch(model, log_freq=config["log_grads_every"], log="all")
for epoch in range(config["num_epochs"]):
accelerator.wait_for_everyone()
for epoch in range(0, config["num_epochs"]):
train_loss = MeanMetric(nan_strategy="error").to(model.device) train_loss = MeanMetric(nan_strategy="error").to(model.device)
for step, batch in enumerate(tqdm(train_dataloader)): for step, batch in enumerate(tqdm(train_dataloader)):
curr_step = epoch * len(train_dataloader) + step
model.train() model.train()
outputs = model(**batch) outputs = model(**batch)
loss = outputs.loss loss = outputs.loss
# gather loss before backprop in case of gradient accumulation # gather loss before backprop in case of gradient accumulation
loss_values = accelerator.gather_for_metrics({"loss": loss.detach().float()}) loss_values = accelerator.gather_for_metrics({"loss": loss.detach().float()})
if config["wandb"]:
accelerator.log({"loss": torch.mean(loss_values["loss"]).item()}, step=curr_step)
train_loss.update(loss_values["loss"]) train_loss.update(loss_values["loss"])
loss = loss / gradient_accumulation_steps loss = loss / gradient_accumulation_steps
@ -153,8 +144,9 @@ def train(accelerator, config):
# get gradient norm of all params # get gradient norm of all params
# log LR in case something weird happens # log LR in case something weird happens
if step > 0 and step % (config["log_lr_every"]) == 0: if step > 0 and step % (config["eval_every"] // 10) == 0:
if config["wandb"]: if config["wandb"]:
curr_step = step + epoch * len(train_dataloader)
accelerator.log({"lr": scheduler.get_last_lr()[0]}, step=curr_step) accelerator.log({"lr": scheduler.get_last_lr()[0]}, step=curr_step)
if (step + 1) % gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1: if (step + 1) % gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1:
@ -164,6 +156,7 @@ def train(accelerator, config):
if step > 0 and step % config["save_every"] == 0: if step > 0 and step % config["save_every"] == 0:
curr_step = step + epoch * len(train_dataloader)
accelerator.save_state(f"{config['output_dir']}/step_{curr_step}") accelerator.save_state(f"{config['output_dir']}/step_{curr_step}")
if step > 0 and (step % config["eval_every"] == 0 or step == len(train_dataloader) - 1): if step > 0 and (step % config["eval_every"] == 0 or step == len(train_dataloader) - 1):
@ -177,6 +170,7 @@ def train(accelerator, config):
} }
if config["wandb"]: if config["wandb"]:
curr_step = step + epoch * len(train_dataloader)
accelerator.log({**log_train, **log_val}, step=curr_step) accelerator.log({**log_train, **log_val}, step=curr_step)
accelerator.print(f"Current LR: {scheduler.get_last_lr()[0]}") accelerator.print(f"Current LR: {scheduler.get_last_lr()[0]}")
@ -187,14 +181,8 @@ def train(accelerator, config):
accelerator.print(f"Epoch {epoch} finished") accelerator.print(f"Epoch {epoch} finished")
accelerator.print(f"Pushing to HF hub") accelerator.print(f"Pushing to HF hub")
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model) unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(
f"{config['output_dir']}/epoch_{epoch}",
is_main_process=accelerator.is_main_process,
save_function=accelerator.save,
state_dict=accelerator.get_state_dict(model),
)
try: try:
if accelerator.is_main_process: if accelerator.is_main_process:
unwrapped_model.push_to_hub(config["save_name"] + f"-epoch_{epoch}", private=True) unwrapped_model.push_to_hub(config["save_name"] + f"-epoch_{epoch}", private=True)
@ -203,17 +191,22 @@ def train(accelerator, config):
accelerator.print(e) accelerator.print(e)
accelerator.print(f"Failed to push to hub") accelerator.print(f"Failed to push to hub")
if config["num_epochs"] > 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained( unwrapped_model.save_pretrained(
f"{config['output_dir']}/final", f"{config['output_dir']}/epoch_{epoch}",
is_main_process=accelerator.is_main_process, is_main_process=accelerator.is_main_process,
save_function=accelerator.save, save_function=accelerator.save,
state_dict=accelerator.get_state_dict(model), state_dict=accelerator.get_state_dict(model),
) )
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(
f"{config['output_dir']}/final",
is_main_process=accelerator.is_main_process,
save_function=accelerator.save,
state_dict=accelerator.get_state_dict(model),
)
accelerator.end_training() accelerator.end_training()