Compare commits

...

60 Commits

Author SHA1 Message Date
Jacob Nguyen
f3f25a9928 revmove noop 2023-07-15 22:58:44 -05:00
Jacob Nguyen
44e20688cf prevent rebuild 2023-07-15 22:00:28 -05:00
Jacob Nguyen
a82ce3b864 fix 2023-07-15 21:17:03 -05:00
Jacob Nguyen
5bf4462ac2 revert 2023-07-15 21:14:39 -05:00
Jacob Nguyen
cabb089f25 update ci 2023-07-15 21:12:30 -05:00
Jacob Nguyen
965b435406 fix pwd 2023-07-15 20:50:07 -05:00
Jacob Nguyen
127288180a fix pwd 2023-07-15 20:42:50 -05:00
Jacob Nguyen
a82204e514 fix ci 2023-07-15 20:21:07 -05:00
Jacob Nguyen
2aa1352628 fixed required workflows 2023-07-15 18:19:31 -05:00
Jacob Nguyen
2c36da14b3 fix 2023-07-15 17:29:44 -05:00
Jacob Nguyen
6675ccda4d fix again 2023-07-15 17:27:56 -05:00
Jacob Nguyen
9e903775ae bruh 2023-07-15 17:19:46 -05:00
Jacob Nguyen
020053203f update circle ci script 2023-07-15 17:18:54 -05:00
Jacob Nguyen
5ce7563afb fix circle ci 2023-07-15 14:38:00 -05:00
Jacob Nguyen
115719612a fix circle ci 2023-07-15 14:32:39 -05:00
Jacob Nguyen
1ca7e7e083 Merge branch 'main' into jacoobes-patch-1 2023-07-15 14:13:34 -05:00
Jacob Nguyen
477b13a5de basic embedding with sbert (not tested & cpp side only) 2023-07-15 14:10:28 -05:00
Andriy Mulyar
cfd70b69fc
Update gpt4all_python_embedding.md
Signed-off-by: Andriy Mulyar <andriy.mulyar@gmail.com>
2023-07-14 14:54:56 -04:00
Andriy Mulyar
306105e62f
Update gpt4all_python_embedding.md
Signed-off-by: Andriy Mulyar <andriy.mulyar@gmail.com>
2023-07-14 14:54:36 -04:00
Andriy Mulyar
89e277bb3c
Update gpt4all_python_embedding.md
Signed-off-by: Andriy Mulyar <andriy.mulyar@gmail.com>
2023-07-14 14:30:14 -04:00
Adam Treat
f543affa9a Add better docs and threading support to bert. 2023-07-14 14:14:22 -04:00
Lakshay Kansal
6c8669cad3 highlighting rules for html and php and latex 2023-07-14 11:36:01 -04:00
Adam Treat
0c0a4f2c22 Add the docs. 2023-07-14 10:48:18 -04:00
Adam Treat
6656f0f41e Fix the test to work and not do timings. 2023-07-14 09:48:57 -04:00
Adam Treat
bb2b82e1b9 Add docs and bump version since we changed python api again. 2023-07-14 09:48:57 -04:00
Aaron Miller
c77ab849c0 LLModel objects should hold a reference to the library
prevents llmodel lib from being gc'd before live model objects
2023-07-14 09:48:57 -04:00
Aaron Miller
1c4a244291 bump mem allocation a bit 2023-07-14 09:48:57 -04:00
Aaron Miller
936dcd2bfc use default n_threads 2023-07-14 09:48:57 -04:00
Aaron Miller
15f1fe5445 rename embedder 2023-07-14 09:48:57 -04:00
Adam Treat
ee4186d579 Fixup bert python bindings. 2023-07-14 09:48:57 -04:00
cosmic-snow
6200900677
Fix Windows MSVC arch detection (#1194)
- in llmodel.cpp to fix AVX-only handling

Signed-off-by: cosmic-snow <134004613+cosmic-snow@users.noreply.github.com>
2023-07-13 14:44:17 -04:00
Adam Treat
4963db8f43 Bump the version numbers for both python and c backend. 2023-07-13 14:21:46 -04:00
Adam Treat
0efdbfcffe Bert 2023-07-13 14:21:46 -04:00
Adam Treat
315a1f2aa2 Move it back as internal class. 2023-07-13 14:21:46 -04:00
Adam Treat
ae8eb297ac Add sbert backend. 2023-07-13 14:21:46 -04:00
Adam Treat
1f749d7633 Clean up backend code a bit and hide impl. details. 2023-07-13 14:21:46 -04:00
Adam Treat
33557b1f39 Move the implementation out of llmodel class. 2023-07-13 14:21:46 -04:00
Adam Treat
64b409e0b8 keep trying 2023-07-13 13:57:22 -04:00
Adam Treat
e59946f05d try again to unbreak circleci 2023-07-13 13:55:22 -04:00
Adam Treat
b72b409d40 try again to unbreak circlci 2023-07-13 13:52:55 -04:00
Adam Treat
59cae1132c Try and unbreak circleci. 2023-07-13 13:45:47 -04:00
Adam Treat
a0dae86a95 Add bert to models.json 2023-07-13 13:37:12 -04:00
AT
18ca8901f0
Update README.md
Signed-off-by: AT <manyoso@users.noreply.github.com>
2023-07-12 16:30:56 -04:00
cosmic-snow
00a945eaee Update gpt4all_faq.md
- Add information about AVX/AVX2.
- Update supported architectures.

Signed-off-by: cosmic-snow <134004613+cosmic-snow@users.noreply.github.com>
2023-07-12 15:19:26 -04:00
Zach Nussbaum
6c4f449b7a
fix: update train scripts and configs for other models (#1164)
* feat: falcon config

* feat: mpt config

* chore: gitignore

* refactor: step calculation

* fix: attention mask + shuffle on epoch end

* fix: return tensors

* fix: wait for everyone

* chore: config

* chore: ds config

* fix: remove ccols

* fix: logging and saving

* chore: add einops
2023-07-12 15:18:24 -04:00
Adam Treat
e8b19b8e82 Bump version to 2.4.14 and provide release notes. 2023-07-12 14:58:45 -04:00
Adam Treat
8eb0844277 Check if the trimmed version is empty. 2023-07-12 14:31:43 -04:00
Adam Treat
be395c12cc Make all system prompts empty by default if model does not include in training data. 2023-07-12 14:31:43 -04:00
Aaron Miller
6a8fa27c8d Correctly find models in subdirs of model dir
QDirIterator doesn't seem particular subdir aware, its path() returns
the iterated dir. This was the simplest way I found to get this right.
2023-07-12 14:18:40 -04:00
Adam Treat
8893db5896 Add wizard model and rename orca to be more specific. 2023-07-12 14:12:46 -04:00
Adam Treat
60627bd41f Prefer 7b models in order of default model load. 2023-07-12 12:50:18 -04:00
Aaron Miller
5df4f1bf8c codespell 2023-07-12 12:49:06 -04:00
Aaron Miller
10ca2c4475 center the spinner 2023-07-12 12:49:06 -04:00
Adam Treat
e9897518d1 Show busy if models.json download taking longer than expected. 2023-07-12 12:49:06 -04:00
Aaron Miller
432b7ebbd7 include windows.h just to be safe 2023-07-12 12:46:46 -04:00
Aaron Miller
95b8fb312e windows/msvc: use high level processor feature detection API
see https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
2023-07-12 12:46:46 -04:00
Aaron Miller
ad0e7fd01f chatgpt: ensure no extra newline in header 2023-07-12 10:53:25 -04:00
Aaron Miller
f0faa23ad5
cmakelists: always export build commands (#1179)
friendly for using editors with clangd integration that don't also
manage the build themselves
2023-07-12 10:49:24 -04:00
Adam Treat
0d726b22b8 When we explicitly cancel an operation we shouldn't throw an error. 2023-07-12 10:34:10 -04:00
Adam Treat
13b2d47be5 Provide an error dialog if for any reason we can't access the settings file. 2023-07-12 08:50:21 -04:00
57 changed files with 7910 additions and 4339 deletions

View File

@ -463,50 +463,47 @@ jobs:
docker:
- image: mcr.microsoft.com/dotnet/sdk:7.0-jammy # Ubuntu 22.04
steps:
- when:
condition: << pipeline.parameters.run-csharp-workflow >>
steps:
- checkout
- attach_workspace:
at: /tmp/workspace
- run:
name: "Prepare Native Libs"
command: |
cd gpt4all-bindings/csharp
mkdir -p runtimes/linux-x64/native
cp /tmp/workspace/runtimes/linux-x64/*.so runtimes/linux-x64/native/
ls -R runtimes
- restore_cache:
keys:
- gpt4all-csharp-nuget-packages-nix
- run:
name: "Install project dependencies"
command: |
cd gpt4all-bindings/csharp
dotnet restore Gpt4All
- save_cache:
paths:
- ~/.nuget/packages
key: gpt4all-csharp-nuget-packages-nix
- run:
name: Build C# Project
command: |
cd gpt4all-bindings/csharp
dotnet build Gpt4All --configuration Release --nologo
- run:
name: "Run C# Tests"
command: |
cd gpt4all-bindings/csharp
dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
- run:
name: Test results
command: |
cd gpt4all-bindings/csharp/Gpt4All.Tests
dotnet tool install -g trx2junit
export PATH="$PATH:$HOME/.dotnet/tools"
trx2junit TestResults/*.trx
- store_test_results:
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
- checkout
- attach_workspace:
at: /tmp/workspace
- run:
name: "Prepare Native Libs"
command: |
cd gpt4all-bindings/csharp
mkdir -p runtimes/linux-x64/native
cp /tmp/workspace/runtimes/linux-x64/*.so runtimes/linux-x64/native/
ls -R runtimes
- restore_cache:
keys:
- gpt4all-csharp-nuget-packages-nix
- run:
name: "Install project dependencies"
command: |
cd gpt4all-bindings/csharp
dotnet restore Gpt4All
- save_cache:
paths:
- ~/.nuget/packages
key: gpt4all-csharp-nuget-packages-nix
- run:
name: Build C# Project
command: |
cd gpt4all-bindings/csharp
dotnet build Gpt4All --configuration Release --nologo
- run:
name: "Run C# Tests"
command: |
cd gpt4all-bindings/csharp
dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
- run:
name: Test results
command: |
cd gpt4all-bindings/csharp/Gpt4All.Tests
dotnet tool install -g trx2junit
export PATH="$PATH:$HOME/.dotnet/tools"
trx2junit TestResults/*.trx
- store_test_results:
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
build-csharp-windows:
executor:
@ -514,111 +511,99 @@ jobs:
size: large
shell: powershell.exe -ExecutionPolicy Bypass
steps:
- when:
condition: << pipeline.parameters.run-csharp-workflow >>
steps:
- checkout
- restore_cache:
keys:
- gpt4all-csharp-nuget-packages-win
- attach_workspace:
at: C:\Users\circleci\workspace
- run:
name: "Prepare Native Libs"
command: |
cd gpt4all-bindings/csharp
mkdir -p runtimes\win-x64\native
cp C:\Users\circleci\workspace\runtimes\win-x64\*.dll runtimes\win-x64\native\
ls -R runtimes
- run:
name: "Install project dependencies"
command: |
cd gpt4all-bindings/csharp
dotnet.exe restore Gpt4All
- save_cache:
paths:
- C:\Users\circleci\.nuget\packages
key: gpt4all-csharp-nuget-packages-win
- run:
name: Build C# Project
command: |
cd gpt4all-bindings/csharp
dotnet.exe build Gpt4All --configuration Release --nologo
- run:
name: "Run C# Tests"
command: |
cd gpt4all-bindings/csharp
dotnet.exe test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
- run:
name: Test results
command: |
cd gpt4all-bindings/csharp/Gpt4All.Tests
dotnet tool install -g trx2junit
$Env:Path += ";$Env:USERPROFILE\.dotnet\tools"
trx2junit TestResults/*.trx
- store_test_results:
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
- checkout
- restore_cache:
keys:
- gpt4all-csharp-nuget-packages-win
- attach_workspace:
at: C:\Users\circleci\workspace
- run:
name: "Prepare Native Libs"
command: |
cd gpt4all-bindings/csharp
mkdir -p runtimes\win-x64\native
cp C:\Users\circleci\workspace\runtimes\win-x64\*.dll runtimes\win-x64\native\
ls -R runtimes
- run:
name: "Install project dependencies"
command: |
cd gpt4all-bindings/csharp
dotnet.exe restore Gpt4All
- save_cache:
paths:
- C:\Users\circleci\.nuget\packages
key: gpt4all-csharp-nuget-packages-win
- run:
name: Build C# Project
command: |
cd gpt4all-bindings/csharp
dotnet.exe build Gpt4All --configuration Release --nologo
- run:
name: "Run C# Tests"
command: |
cd gpt4all-bindings/csharp
dotnet.exe test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
- run:
name: Test results
command: |
cd gpt4all-bindings/csharp/Gpt4All.Tests
dotnet tool install -g trx2junit
$Env:Path += ";$Env:USERPROFILE\.dotnet\tools"
trx2junit TestResults/*.trx
- store_test_results:
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
build-csharp-macos:
macos:
xcode: "14.0.0"
steps:
- when:
condition: << pipeline.parameters.run-csharp-workflow >>
steps:
- checkout
- restore_cache:
keys:
- gpt4all-csharp-nuget-packages-nix
- run:
name: Install dependencies
command: |
brew install --cask dotnet-sdk
- attach_workspace:
at: /tmp/workspace
- run:
name: "Prepare Native Libs"
command: |
cd gpt4all-bindings/csharp
mkdir -p runtimes/osx/native
cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
ls -R runtimes
- run:
name: "Install project dependencies"
command: |
cd gpt4all-bindings/csharp
dotnet restore Gpt4All
- save_cache:
paths:
- ~/.nuget/packages
key: gpt4all-csharp-nuget-packages-nix
- run:
name: Build C# Project
command: |
cd gpt4all-bindings/csharp
dotnet build Gpt4All --configuration Release --nologo
- run:
name: "Run C# Tests"
command: |
cd gpt4all-bindings/csharp
dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
- run:
name: Test results
command: |
cd gpt4all-bindings/csharp/Gpt4All.Tests
dotnet tool install -g trx2junit
export PATH="$PATH:$HOME/.dotnet/tools"
trx2junit TestResults/*.trx
- store_test_results:
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
build-nodejs-linux:
docker:
- image: circleci/node:erbium-bullseye-browsers-legacy
steps:
- when:
condition: << pipeline.parameters.run-ts-workflow >>
- checkout
- checkout
- restore_cache:
keys:
- gpt4all-csharp-nuget-packages-nix
- run:
name: Install dependencies
command: |
brew install --cask dotnet-sdk
- attach_workspace:
at: /tmp/workspace
- run:
name: "Prepare Native Libs"
command: |
cd gpt4all-bindings/csharp
mkdir -p runtimes/osx/native
cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
ls -R runtimes
- run:
name: "Install project dependencies"
command: |
cd gpt4all-bindings/csharp
dotnet restore Gpt4All
- save_cache:
paths:
- ~/.nuget/packages
key: gpt4all-csharp-nuget-packages-nix
- run:
name: Build C# Project
command: |
cd gpt4all-bindings/csharp
dotnet build Gpt4All --configuration Release --nologo
- run:
name: "Run C# Tests"
command: |
cd gpt4all-bindings/csharp
dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
- run:
name: Test results
command: |
cd gpt4all-bindings/csharp/Gpt4All.Tests
dotnet tool install -g trx2junit
export PATH="$PATH:$HOME/.dotnet/tools"
trx2junit TestResults/*.trx
- store_test_results:
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
store-and-upload-nupkgs:
docker:
- image: mcr.microsoft.com/dotnet/sdk:6.0-jammy # Ubuntu 22.04
@ -656,27 +641,27 @@ jobs:
node-version: "18.16"
- run: node --version
- node/install-packages:
app-dir: gpt4all-bindings/typescript
pkg-manager: yarn
- run:
command: yarn run test
name: Run YARN tests
override-ci-command: yarn install
- run: cd gpt4all-bindings/typescript
- run:
command: |
# excluding llmodel. nodejs bindings dont need llmodel.dll
cd gpt4all-bindings/typescript
mkdir -p runtimes/win32-x64/native
cp /tmp/workspace/runtimes/win-x64/*-*.dll runtimes/win-x64/native/
mkdir -p runtimes/linux-x64/native
cp /tmp/workspace/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/
mkdir -p runtimes/osx/native
cp /tmp/workspace/runtimes/osx-x64/*-*.dylib runtimes/osx/native/
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
- run:
name: Publish to NPM
command: |
npm set //registry.npmjs.org/:_authToken=$NPM_TOKEN
npm publish
cd gpt4all-bindings/typescript
# excluding llmodel. nodejs bindings dont need llmodel.dll
mkdir -p runtimes/win32-x64/native
cp /tmp/workspace/runtimes/win-x64/*-*.dll runtimes/win-x64/native/
mkdir -p runtimes/linux-x64/native
cp /tmp/workspace/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/
mkdir -p runtimes/osx/native
cp /tmp/workspace/runtimes/osx-x64/*-*.dylib runtimes/osx/native/
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
ls -Ra gpt4all-bindings/typescript/runtimes
# - run:
# name: Publish to NPM
# command: |
# npm set //registry.npmjs.org/:_authToken=$NPM_TOKEN
# npm publish
workflows:
version: 2
@ -756,6 +741,8 @@ workflows:
type: approval
- nuget-hold:
type: approval
- npm-hold:
type: approval
- build-bindings-backend-linux:
filters:
branches:
@ -781,6 +768,16 @@ workflows:
requires:
- hold
# NodeJs Jobs
- prepare-npm-pkg:
filters:
branches:
only:
requires:
- node/test
- npm-hold
# - build-bindings-backend-linux
# - build-bindings-backend-windows-msvc
# - build-bindings-backend-macos
# CSharp Jobs
- build-csharp-linux:
filters:
@ -809,4 +806,3 @@ workflows:
- build-csharp-windows
- build-csharp-linux
- build-csharp-macos

3
.gitignore vendored
View File

@ -1,3 +1,6 @@
*.arrow
squad_*
*sbert_embedded*
*.pkl
ckpts*
.deepspeed_env

View File

@ -1,5 +1,6 @@
cmake_minimum_required(VERSION 3.16)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
if(APPLE)
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
@ -19,7 +20,7 @@ endif()
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
set(LLMODEL_VERSION_MAJOR 0)
set(LLMODEL_VERSION_MINOR 2)
set(LLMODEL_VERSION_MINOR 3)
set(LLMODEL_VERSION_PATCH 0)
set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
@ -124,6 +125,10 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
add_library(mpt-${BUILD_VARIANT} SHARED
mpt.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
prepare_target(mpt ggml-230511)
add_library(bert-${BUILD_VARIANT} SHARED
bert.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
prepare_target(bert llama-mainline)
endif()
endforeach()

1066
gpt4all-backend/bert.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,44 @@
#ifndef BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
#error This file is NOT meant to be included outside of bert.cpp. Doing so is DANGEROUS. Be sure to know what you are doing before proceeding to #define BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
#endif
#ifndef BERT_H
#define BERT_H
#include <string>
#include <functional>
#include <vector>
#include <memory>
#include "llmodel.h"
struct BertPrivate;
class Bert : public LLModel {
public:
Bert();
~Bert();
bool supportsEmbedding() const override { return true; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override;
size_t stateSize() const override;
size_t saveState(uint8_t *dest) const override;
size_t restoreState(const uint8_t *src) override;
void setThreadCount(int32_t n_threads) override;
int32_t threadCount() const override;
std::vector<float> embedding(const std::string &text) override;
private:
std::unique_ptr<BertPrivate> d_ptr;
protected:
std::vector<Token> tokenize(PromptContext &, const std::string&) const override;
Token sampleToken(PromptContext &ctx) const override;
std::string tokenToString(Token) const override;
bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override;
int32_t contextLength() const override;
const std::vector<Token>& endTokens() const override;
};
#endif // BERT_H

View File

@ -16,6 +16,8 @@ public:
Falcon();
~Falcon();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override;

View File

@ -15,6 +15,8 @@ public:
GPTJ();
~GPTJ();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override;

View File

@ -15,6 +15,8 @@ public:
LLamaModel();
~LLamaModel();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override;

View File

@ -10,17 +10,19 @@
#include <cassert>
#include <cstdlib>
#include <sstream>
#ifdef _MSC_VER
#include <windows.h>
#include <processthreadsapi.h>
#endif
std::string s_implementations_search_path = ".";
static bool has_at_least_minimal_hardware() {
#ifdef __x86_64__
#if defined(__x86_64__) || defined(_M_X64)
#ifndef _MSC_VER
return __builtin_cpu_supports("avx");
#else
int cpuInfo[4];
__cpuid(cpuInfo, 1);
return cpuInfo[2] & (1 << 28);
return IsProcessorFeaturePresent(PF_AVX_INSTRUCTIONS_AVAILABLE);
#endif
#else
return true; // Don't know how to handle non-x86_64
@ -28,54 +30,53 @@ static bool has_at_least_minimal_hardware() {
}
static bool requires_avxonly() {
#ifdef __x86_64__
#if defined(__x86_64__) || defined(_M_X64)
#ifndef _MSC_VER
return !__builtin_cpu_supports("avx2");
#else
int cpuInfo[4];
__cpuidex(cpuInfo, 7, 0);
return !(cpuInfo[1] & (1 << 5));
return !IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE);
#endif
#else
return false; // Don't know how to handle non-x86_64
#endif
}
LLModel::Implementation::Implementation(Dlhandle &&dlhandle_) : dlhandle(new Dlhandle(std::move(dlhandle_))) {
auto get_model_type = dlhandle->get<const char *()>("get_model_type");
LLModel::Implementation::Implementation(Dlhandle &&dlhandle_)
: m_dlhandle(new Dlhandle(std::move(dlhandle_))) {
auto get_model_type = m_dlhandle->get<const char *()>("get_model_type");
assert(get_model_type);
modelType = get_model_type();
auto get_build_variant = dlhandle->get<const char *()>("get_build_variant");
m_modelType = get_model_type();
auto get_build_variant = m_dlhandle->get<const char *()>("get_build_variant");
assert(get_build_variant);
buildVariant = get_build_variant();
magicMatch = dlhandle->get<bool(std::ifstream&)>("magic_match");
assert(magicMatch);
construct_ = dlhandle->get<LLModel *()>("construct");
assert(construct_);
m_buildVariant = get_build_variant();
m_magicMatch = m_dlhandle->get<bool(std::ifstream&)>("magic_match");
assert(m_magicMatch);
m_construct = m_dlhandle->get<LLModel *()>("construct");
assert(m_construct);
}
LLModel::Implementation::Implementation(Implementation &&o)
: construct_(o.construct_)
, modelType(o.modelType)
, buildVariant(o.buildVariant)
, magicMatch(o.magicMatch)
, dlhandle(o.dlhandle) {
o.dlhandle = nullptr;
: m_magicMatch(o.m_magicMatch)
, m_construct(o.m_construct)
, m_modelType(o.m_modelType)
, m_buildVariant(o.m_buildVariant)
, m_dlhandle(o.m_dlhandle) {
o.m_dlhandle = nullptr;
}
LLModel::Implementation::~Implementation() {
if (dlhandle) delete dlhandle;
if (m_dlhandle) delete m_dlhandle;
}
bool LLModel::Implementation::isImplementation(const Dlhandle &dl) {
return dl.get<bool(uint32_t)>("is_g4a_backend_model_implementation");
}
const std::vector<LLModel::Implementation> &LLModel::implementationList() {
const std::vector<LLModel::Implementation> &LLModel::Implementation::implementationList() {
// NOTE: allocated on heap so we leak intentionally on exit so we have a chance to clean up the
// individual models without the cleanup of the static list interfering
static auto* libs = new std::vector<LLModel::Implementation>([] () {
std::vector<LLModel::Implementation> fres;
static auto* libs = new std::vector<Implementation>([] () {
std::vector<Implementation> fres;
auto search_in_directory = [&](const std::string& paths) {
std::stringstream ss(paths);
@ -107,17 +108,17 @@ const std::vector<LLModel::Implementation> &LLModel::implementationList() {
return *libs;
}
const LLModel::Implementation* LLModel::implementation(std::ifstream& f, const std::string& buildVariant) {
const LLModel::Implementation* LLModel::Implementation::implementation(std::ifstream& f, const std::string& buildVariant) {
for (const auto& i : implementationList()) {
f.seekg(0);
if (!i.magicMatch(f)) continue;
if (buildVariant != i.buildVariant) continue;
if (!i.m_magicMatch(f)) continue;
if (buildVariant != i.m_buildVariant) continue;
return &i;
}
return nullptr;
}
LLModel *LLModel::construct(const std::string &modelPath, std::string buildVariant) {
LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant) {
if (!has_at_least_minimal_hardware())
return nullptr;
@ -126,14 +127,15 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
std::ifstream f(modelPath, std::ios::binary);
if (!f) return nullptr;
// Get correct implementation
const LLModel::Implementation* impl = nullptr;
const Implementation* impl = nullptr;
#if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
if (buildVariant == "auto") {
size_t total_mem = getSystemTotalRAMInBytes();
impl = implementation(f, "metal");
if(impl) {
LLModel* metalimpl = impl->construct();
LLModel* metalimpl = impl->m_construct();
metalimpl->m_implementation = impl;
size_t req_mem = metalimpl->requiredMem(modelPath);
float req_to_total = (float) req_mem / (float) total_mem;
// on a 16GB M2 Mac a 13B q4_0 (0.52) works for me but a 13B q4_K_M (0.55) does not
@ -160,14 +162,17 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
if (!impl) return nullptr;
}
f.close();
// Construct and return llmodel implementation
return impl->construct();
auto fres = impl->m_construct();
fres->m_implementation = impl;
return fres;
}
void LLModel::setImplementationsSearchPath(const std::string& path) {
void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) {
s_implementations_search_path = path;
}
const std::string& LLModel::implementationsSearchPath() {
const std::string& LLModel::Implementation::implementationsSearchPath() {
return s_implementations_search_path;
}

View File

@ -12,32 +12,34 @@
#define LLMODEL_MAX_PROMPT_BATCH 128
class Dlhandle;
class LLModel {
public:
using Token = int32_t;
class Implementation {
LLModel *(*construct_)();
public:
Implementation(Dlhandle&&);
Implementation(const Implementation&) = delete;
Implementation(Implementation&&);
~Implementation();
std::string_view modelType() const { return m_modelType; }
std::string_view buildVariant() const { return m_buildVariant; }
static bool isImplementation(const Dlhandle&);
static const std::vector<Implementation>& implementationList();
static const Implementation *implementation(std::ifstream& f, const std::string& buildVariant);
static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
static void setImplementationsSearchPath(const std::string& path);
static const std::string& implementationsSearchPath();
std::string_view modelType, buildVariant;
bool (*magicMatch)(std::ifstream& f);
Dlhandle *dlhandle;
private:
bool (*m_magicMatch)(std::ifstream& f);
LLModel *(*m_construct)();
// The only way an implementation should be constructed
LLModel *construct() const {
auto fres = construct_();
fres->m_implementation = this;
return fres;
}
private:
std::string_view m_modelType;
std::string_view m_buildVariant;
Dlhandle *m_dlhandle;
};
struct PromptContext {
@ -59,18 +61,25 @@ public:
explicit LLModel() {}
virtual ~LLModel() {}
virtual bool supportsEmbedding() const = 0;
virtual bool supportsCompletion() const = 0;
virtual bool loadModel(const std::string &modelPath) = 0;
virtual bool isModelLoaded() const = 0;
virtual size_t requiredMem(const std::string &modelPath) = 0;
virtual size_t stateSize() const { return 0; }
virtual size_t saveState(uint8_t */*dest*/) const { return 0; }
virtual size_t restoreState(const uint8_t */*src*/) { return 0; }
// This method requires the model to return true from supportsCompletion otherwise it will throw
// an error
virtual void prompt(const std::string &prompt,
std::function<bool(int32_t)> promptCallback,
std::function<bool(int32_t, const std::string&)> responseCallback,
std::function<bool(bool)> recalculateCallback,
PromptContext &ctx);
virtual std::vector<float> embedding(const std::string &text);
virtual void setThreadCount(int32_t /*n_threads*/) {}
virtual int32_t threadCount() const { return 1; }
@ -78,13 +87,6 @@ public:
return *m_implementation;
}
static const std::vector<Implementation>& implementationList();
static const Implementation *implementation(std::ifstream& f, const std::string& buildVariant);
static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
static void setImplementationsSearchPath(const std::string& path);
static const std::string& implementationsSearchPath();
protected:
// These are pure virtual because subclasses need to implement as the default implementation of
// 'prompt' above calls these functions
@ -100,5 +102,9 @@ protected:
void recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate);
const Implementation *m_implementation = nullptr;
private:
friend class LLMImplementation;
};
#endif // LLMODEL_H

View File

@ -29,7 +29,7 @@ llmodel_model llmodel_model_create2(const char *model_path, const char *build_va
int error_code = 0;
try {
wrapper->llModel = LLModel::construct(model_path, build_variant);
wrapper->llModel = LLModel::Implementation::construct(model_path, build_variant);
} catch (const std::exception& e) {
error_code = EINVAL;
last_error_message = e.what();
@ -166,6 +166,25 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
ctx->context_erase = wrapper->promptContext.contextErase;
}
float *llmodel_embedding(llmodel_model model, const char *text, size_t *embedding_size)
{
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
std::vector<float> embeddingVector = wrapper->llModel->embedding(text);
float *embedding = (float *)malloc(embeddingVector.size() * sizeof(float));
if(embedding == nullptr) {
*embedding_size = 0;
return nullptr;
}
std::copy(embeddingVector.begin(), embeddingVector.end(), embedding);
*embedding_size = embeddingVector.size();
return embedding;
}
void llmodel_free_embedding(float *ptr)
{
free(ptr);
}
void llmodel_setThreadCount(llmodel_model model, int32_t n_threads)
{
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
@ -180,10 +199,10 @@ int32_t llmodel_threadCount(llmodel_model model)
void llmodel_set_implementation_search_path(const char *path)
{
LLModel::setImplementationsSearchPath(path);
LLModel::Implementation::setImplementationsSearchPath(path);
}
const char *llmodel_get_implementation_search_path()
{
return LLModel::implementationsSearchPath().c_str();
return LLModel::Implementation::implementationsSearchPath().c_str();
}

View File

@ -171,6 +171,23 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
llmodel_recalculate_callback recalculate_callback,
llmodel_prompt_context *ctx);
/**
* Generate an embedding using the model.
* @param model A pointer to the llmodel_model instance.
* @param text A string representing the text to generate an embedding for.
* @param embedding_size A pointer to a size_t type that will be set by the call indicating the length
* of the returned floating point array.
* @return A pointer to an array of floating point values passed to the calling method which then will
* be responsible for lifetime of this memory.
*/
float *llmodel_embedding(llmodel_model model, const char *text, size_t *embedding_size);
/**
* Frees the memory allocated by the llmodel_embedding function.
* @param ptr A pointer to the embedding as returned from llmodel_embedding.
*/
void llmodel_free_embedding(float *ptr);
/**
* Set the number of threads to be used by the model.
* @param model A pointer to the llmodel_model instance.

View File

@ -33,7 +33,14 @@ void LLModel::prompt(const std::string &prompt,
PromptContext &promptCtx)
{
if (!isModelLoaded()) {
std::cerr << implementation().modelType << " ERROR: prompt won't work with an unloaded model!\n";
std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n";
return;
}
if (!supportsCompletion()) {
std::string errorMessage = "ERROR: this model does not support text completion or chat!\n";
responseCallback(-1, errorMessage);
std::cerr << implementation().modelType() << errorMessage;
return;
}
@ -45,8 +52,8 @@ void LLModel::prompt(const std::string &prompt,
if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
std::cerr << implementation().modelType << " ERROR: The prompt is" << embd_inp.size() <<
"tokens and the context window is" << promptCtx.n_ctx << "!\n";
std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() <<
" tokens and the context window is " << promptCtx.n_ctx << "!\n";
return;
}
@ -64,7 +71,7 @@ void LLModel::prompt(const std::string &prompt,
if (promptCtx.n_past + int32_t(batch.size()) > promptCtx.n_ctx) {
const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase;
// Erase the first percentage of context from the tokens...
std::cerr << implementation().modelType << ": reached the end of the context window so resizing\n";
std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n";
promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint);
promptCtx.n_past = promptCtx.tokens.size();
recalculateContext(promptCtx, recalculateCallback);
@ -72,7 +79,7 @@ void LLModel::prompt(const std::string &prompt,
}
if (!evalTokens(promptCtx, batch)) {
std::cerr << implementation().modelType << " ERROR: Failed to process prompt\n";
std::cerr << implementation().modelType() << " ERROR: Failed to process prompt\n";
return;
}
@ -103,7 +110,7 @@ void LLModel::prompt(const std::string &prompt,
if (promptCtx.n_past + 1 > promptCtx.n_ctx) {
const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase;
// Erase the first percentage of context from the tokens...
std::cerr << implementation().modelType << ": reached the end of the context window so resizing\n";
std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n";
promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint);
promptCtx.n_past = promptCtx.tokens.size();
recalculateContext(promptCtx, recalculateCallback);
@ -111,7 +118,7 @@ void LLModel::prompt(const std::string &prompt,
}
if (!evalTokens(promptCtx, { id })) {
std::cerr << implementation().modelType << " ERROR: Failed to predict next token\n";
std::cerr << implementation().modelType() << " ERROR: Failed to predict next token\n";
return;
}
@ -158,3 +165,12 @@ void LLModel::prompt(const std::string &prompt,
cachedTokens.clear();
}
}
std::vector<float> LLModel::embedding(const std::string &/*text*/)
{
if (!supportsCompletion()) {
std::string errorMessage = "ERROR: this model does not support generating embeddings!\n";
std::cerr << implementation().modelType() << errorMessage;
}
return std::vector<float>();
}

View File

@ -15,6 +15,8 @@ public:
MPT();
~MPT();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override;

View File

@ -17,6 +17,8 @@ public:
Replit();
~Replit();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string & modelPath) override;

View File

@ -0,0 +1,102 @@
import sys
import struct
import json
import torch
import numpy as np
from transformers import AutoModel, AutoTokenizer
if len(sys.argv) < 3:
print("Usage: convert-h5-to-ggml.py dir-model [use-f32]\n")
print(" ftype == 0 -> float32")
print(" ftype == 1 -> float16")
sys.exit(1)
# output in the same directory as the model
dir_model = sys.argv[1]
fname_out = sys.argv[1] + "/ggml-model.bin"
with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
encoder = json.load(f)
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
hparams = json.load(f)
with open(dir_model + "/vocab.txt", "r", encoding="utf-8") as f:
vocab = f.readlines()
# possible data types
# ftype == 0 -> float32
# ftype == 1 -> float16
#
# map from ftype to string
ftype_str = ["f32", "f16"]
ftype = 1
if len(sys.argv) > 2:
ftype = int(sys.argv[2])
if ftype < 0 or ftype > 1:
print("Invalid ftype: " + str(ftype))
sys.exit(1)
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
tokenizer = AutoTokenizer.from_pretrained(dir_model)
model = AutoModel.from_pretrained(dir_model, low_cpu_mem_usage=True)
print (model)
print(tokenizer.encode('I believe the meaning of life is'))
list_vars = model.state_dict()
for name in list_vars.keys():
print(name, list_vars[name].shape, list_vars[name].dtype)
fout = open(fname_out, "wb")
print(hparams)
fout.write(struct.pack("i", 0x62657274)) # magic: ggml in hex
fout.write(struct.pack("i", hparams["vocab_size"]))
fout.write(struct.pack("i", hparams["max_position_embeddings"]))
fout.write(struct.pack("i", hparams["hidden_size"]))
fout.write(struct.pack("i", hparams["intermediate_size"]))
fout.write(struct.pack("i", hparams["num_attention_heads"]))
fout.write(struct.pack("i", hparams["num_hidden_layers"]))
fout.write(struct.pack("i", ftype))
for i in range(hparams["vocab_size"]):
text = vocab[i][:-1] # strips newline at the end
#print(f"{i}:{text}")
data = bytes(text, 'utf-8')
fout.write(struct.pack("i", len(data)))
fout.write(data)
for name in list_vars.keys():
data = list_vars[name].squeeze().numpy()
if name in ['embeddings.position_ids', 'pooler.dense.weight', 'pooler.dense.bias']:
continue
print("Processing variable: " + name + " with shape: ", data.shape)
n_dims = len(data.shape);
# ftype == 0 -> float32, ftype == 1 -> float16
if ftype == 1 and name[-7:] == ".weight" and n_dims == 2:
print(" Converting to float16")
data = data.astype(np.float16)
l_type = 1
else:
l_type = 0
# header
str = name.encode('utf-8')
fout.write(struct.pack("iii", n_dims, len(str), l_type))
for i in range(n_dims):
fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
fout.write(str);
# data
data.tofile(fout)
fout.close()
print("Done. Output file: " + fname_out)
print("")

View File

@ -2,11 +2,13 @@
## What models are supported by the GPT4All ecosystem?
Currently, there are three different model architectures that are supported:
Currently, there are five different model architectures that are supported:
1. GPTJ - Based off of the GPT-J architecture with examples found [here](https://huggingface.co/EleutherAI/gpt-j-6b)
2. LLAMA - Based off of the LLAMA architecture with examples found [here](https://huggingface.co/models?sort=downloads&search=llama)
1. GPT-J - Based off of the GPT-J architecture with examples found [here](https://huggingface.co/EleutherAI/gpt-j-6b)
2. LLaMA - Based off of the LLaMA architecture with examples found [here](https://huggingface.co/models?sort=downloads&search=llama)
3. MPT - Based off of Mosaic ML's MPT architecture with examples found [here](https://huggingface.co/mosaicml/mpt-7b)
4. Replit - Based off of Replit Inc.'s Replit architecture with examples found [here](https://huggingface.co/replit/replit-code-v1-3b)
5. Falcon - Based off of TII's Falcon architecture with examples found [here](https://huggingface.co/tiiuae/falcon-40b)
## Why so many different architectures? What differentiates them?
@ -25,6 +27,10 @@ The upstream [llama.cpp](https://github.com/ggerganov/llama.cpp) project has int
Fortunately, we have engineered a submoduling system allowing us to dynamically load different versions of the underlying library so that
GPT4All just works.
## What are the system requirements?
Your CPU needs to support [AVX or AVX2 instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) and you need enough RAM to load a model into memory.
## What about GPU inference?
In newer versions of llama.cpp, there has been some added support for NVIDIA GPU's for inference. We're investigating how to incorporate this into our downloadable installers.

View File

@ -1,8 +1,7 @@
# GPT4All Python API
# GPT4All Python Generation API
The `GPT4All` python package provides bindings to our C/C++ model backend libraries.
The source code and local build instructions can be found [here](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python).
## Quickstart
```bash
@ -109,5 +108,5 @@ with model.chat_session():
print(model.current_chat_session)
```
### API documentation
::: gpt4all.gpt4all.GPT4All

View File

@ -0,0 +1,35 @@
# Embeddings
GPT4All supports generating high quality embeddings of arbitrary length documents of text using a CPU optimized contrastively trained [Sentence Transformer](https://www.sbert.net/). These embeddings are comparable in quality for many tasks with OpenAI.
## Quickstart
```bash
pip install gpt4all
```
### Generating embeddings
The embedding model will automatically be downloaded if not installed.
=== "Embed4All Example"
``` py
from gpt4all import GPT4All, Embed4All
text = 'The quick brown fox jumps over the lazy dog'
embedder = Embed4All()
output = embedder.embed(text)
print(output)
```
=== "Output"
```
[0.034696947783231735, -0.07192722707986832, 0.06923297047615051, ...]
```
### Speed of embedding generation
The following table lists the generation speed for text document captured on an Intel i913900HX CPU with DDR5 5600 running with 8 threads under stable load.
| Tokens | 128 | 512 | 2048 | 8129 | 16,384 |
| --------------- | ---- | ---- | ---- | ---- | ---- |
| Wall time (s) | .02 | .08 | .24 | .96 | 1.9 |
| Tokens / Second | 6508 | 6431 | 8622 | 8509 | 8369 |
### API documentation
::: gpt4all.gpt4all.Embed4All

View File

@ -1,2 +1,2 @@
from .gpt4all import GPT4All # noqa
from .gpt4all import GPT4All, Embed4All # noqa
from .pyllmodel import LLModel # noqa

View File

@ -15,6 +15,36 @@ from . import pyllmodel
# TODO: move to config
DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")
class Embed4All:
"""
Python class that handles embeddings for GPT4All.
"""
def __init__(
self,
n_threads: Optional[int] = None,
):
"""
Constructor
Args:
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
"""
self.gpt4all = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin', n_threads=n_threads)
def embed(
self,
text: str
) -> list[float]:
"""
Generate an embedding.
Args:
text: The text document to generate an embedding for.
Returns:
An embedding of your document of text.
"""
return self.gpt4all.model.generate_embedding(text)
class GPT4All:
"""
@ -39,7 +69,7 @@ class GPT4All:
model_type: Model architecture. This argument currently does not have any functionality and is just used as
descriptive identifier for user. Default is None.
allow_download: Allow API to download models from gpt4all.io. Default is True.
n_threads: number of CPU threads used by GPT4All. Default is None, than the number of threads are determined automatically.
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
"""
self.model_type = model_type
self.model = pyllmodel.LLModel()

View File

@ -112,6 +112,19 @@ llmodel.llmodel_prompt.argtypes = [
llmodel.llmodel_prompt.restype = None
llmodel.llmodel_embedding.argtypes = [
ctypes.c_void_p,
ctypes.c_char_p,
ctypes.POINTER(ctypes.c_size_t),
]
llmodel.llmodel_embedding.restype = ctypes.POINTER(ctypes.c_float)
llmodel.llmodel_free_embedding.argtypes = [
ctypes.POINTER(ctypes.c_float)
]
llmodel.llmodel_free_embedding.restype = None
llmodel.llmodel_setThreadCount.argtypes = [ctypes.c_void_p, ctypes.c_int32]
llmodel.llmodel_setThreadCount.restype = None
@ -141,10 +154,11 @@ class LLModel:
self.model = None
self.model_name = None
self.context = None
self.llmodel_lib = llmodel
def __del__(self):
if self.model is not None:
llmodel.llmodel_model_destroy(self.model)
self.llmodel_lib.llmodel_model_destroy(self.model)
def memory_needed(self, model_path: str) -> int:
model_path_enc = model_path.encode("utf-8")
@ -233,6 +247,17 @@ class LLModel:
self.context.repeat_last_n = repeat_last_n
self.context.context_erase = context_erase
def generate_embedding(
self,
text: str
) -> list[float]:
embedding_size = ctypes.c_size_t()
c_text = ctypes.c_char_p(text.encode('utf-8'))
embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
embedding_array = [embedding_ptr[i] for i in range(embedding_size.value)]
llmodel.llmodel_free_embedding(embedding_ptr)
return list(embedding_array)
def prompt_model(
self,
prompt: str,

View File

@ -0,0 +1,18 @@
import sys
from io import StringIO
from gpt4all import GPT4All, Embed4All
import time
def time_embedding(i, embedder):
text = 'foo bar ' * i
start_time = time.time()
output = embedder.embed(text)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Time report: {2 * i / elapsed_time} tokens/second with {2 * i} tokens taking {elapsed_time} seconds")
if __name__ == "__main__":
embedder = Embed4All(n_threads=8)
for i in [2**n for n in range(6, 14)]:
time_embedding(i, embedder)

View File

@ -1,8 +1,8 @@
import sys
from io import StringIO
from gpt4all import GPT4All
from gpt4all import GPT4All, Embed4All
import time
def test_inference():
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
@ -99,3 +99,11 @@ def test_inference_mpt():
output = model.generate(prompt)
assert isinstance(output, str)
assert len(output) > 0
def test_embedding():
text = 'The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox'
embedder = Embed4All()
output = embedder.embed(text)
#for i, value in enumerate(output):
#print(f'Value at index {i}: {value}')
assert len(output) == 384

View File

@ -10,7 +10,9 @@ use_directory_urls: false
nav:
- 'index.md'
- 'Bindings':
- 'GPT4All in Python': 'gpt4all_python.md'
- 'GPT4All in Python':
- 'Generation': 'gpt4all_python.md'
- 'Embedding': 'gpt4all_python_embedding.md'
- 'GPT4ALL in NodeJs': 'gpt4all_typescript.md'
- 'GPT4All Chat Client': 'gpt4all_chat.md'
- 'gpt4all_cli.md'

View File

@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECtORY,
setup(
name=package_name,
version="1.0.3",
version="1.0.6",
description="Python bindings for GPT4All",
author="Richard Guo",
author_email="richard@nomic.ai",

View File

@ -53,7 +53,7 @@ const response = await createCompletion(ll, [
* (win) msvc version 143
* Can be obtained with visual studio 2022 build tools
### Build
### Build (from source)
```sh
git clone https://github.com/nomic-ai/gpt4all.git
@ -138,7 +138,7 @@ This package is in active development, and breaking changes may happen until the
* \[ ] createTokenStream, an async iterator that streams each token emitted from the model. Planning on following this [example](https://github.com/nodejs/node-addon-examples/tree/main/threadsafe-async-iterator)
* \[ ] proper unit testing (integrate with circle ci)
* \[ ] publish to npm under alpha tag `gpt4all@alpha`
* \[ ] have more people test on other platforms (mac tester needed)
* \[x] have more people test on other platforms (mac tester needed)
* \[x] switch to new pluggable backend
### Documentation

View File

@ -53,7 +53,7 @@
'-fno-rtti',
],
'cflags_cc': [
'-std=c++20'
'-std=c++2a'
]
}]
]

View File

@ -10,6 +10,7 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
InstanceMethod("stateSize", &NodeModelWrapper::StateSize),
InstanceMethod("raw_prompt", &NodeModelWrapper::Prompt),
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
InstanceMethod("embed", &NodeModelWrapper::GenerateEmbedding),
InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount),
InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
});
@ -91,6 +92,23 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference())));
}
Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo& info) {
auto env = info.Env();
std::string text = info[0].As<Napi::String>().Utf8Value();
size_t embedding_size = 0;
float* arr = llmodel_embedding(GetInference(), text.c_str(), &embedding_size);
auto arr_size = sizeof(arr) / sizeof(float);
Napi::Float32Array js_array = Napi::Float32Array::New(info.Env(), arr_size);
for (size_t i = 0; i < arr_size; ++i) {
float element = *(arr + i);
js_array[i] = element;
}
llmodel_free_embedding(arr);
return js_array;
}
/**
* Generate a response using the model.

View File

@ -23,6 +23,7 @@ public:
void SetThreadCount(const Napi::CallbackInfo& info);
Napi::Value getName(const Napi::CallbackInfo& info);
Napi::Value ThreadCount(const Napi::CallbackInfo& info);
Napi::Value GenerateEmbedding(const Napi::CallbackInfo& info);
/*
* The path that is used to search for the dynamic libraries
*/

View File

@ -1,6 +1,6 @@
{
"name": "gpt4all",
"version": "2.0.0",
"version": "2.0.0rc",
"packageManager": "yarn@3.5.1",
"main": "src/gpt4all.js",
"repository": "nomic-ai/gpt4all",

View File

@ -6,7 +6,7 @@ async function createPrebuilds(combinations) {
platform,
arch,
napi: true,
targets: ["18.15.0"]
targets: ["18.16.0"]
};
try {
await createPrebuild(opts);

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
cmake_minimum_required(VERSION 3.16)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
@ -17,7 +18,7 @@ endif()
set(APP_VERSION_MAJOR 2)
set(APP_VERSION_MINOR 4)
set(APP_VERSION_PATCH 13)
set(APP_VERSION_PATCH 14)
set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
# Include the binary directory for the generated header file
@ -205,6 +206,8 @@ install(TARGETS replit-mainline-default DESTINATION lib COMPONENT ${COMPONENT_NA
if(APPLE)
install(TARGETS replit-mainline-metal DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
endif()
install(TARGETS bert-avxonly DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
install(TARGETS bert-default DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
set(CPACK_GENERATOR "IFW")
set(CPACK_VERBATIM_VARIABLES YES)

View File

@ -51,19 +51,7 @@ One click installers for macOS, Linux, and Windows at https://gpt4all.io
If you've already checked out the source code and/or built the program make sure when you do a git fetch to get the latest changes and that you also do ```git submodule update --init --recursive``` to update the submodules.
## Manual download of models
* https://gpt4all.io/models/ggml-mpt-7b-chat.bin (default) (md5sum 756249d3d6abe23bde3b1ae272628640) Current best non-commercially licensable chat model based on MPT and trained by Mosaic ML.
* https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin (default) (md5sum 81a09a0ddf89690372fc296ff7f625af) Current best commercially licensable model based on GPT-J and trained by Nomic AI on the latest curated GPT4All dataset.
* https://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin (md5sum 91f886b68fbce697e9a3cd501951e455) Current best non-commercially licensable model based on Llama 13b and trained by Nomic AI on the latest curated GPT4All dataset.
* https://gpt4all.io/models/ggml-gpt4all-j-v1.2-jazzy.bin (md5sum 879344aaa9d62fdccbda0be7a09e7976) A commercially licensable model based on GPT-J and trained by Nomic AI on the v2 GPT4All dataset.
* https://gpt4all.io/models/ggml-gpt4all-j-v1.1-breezy.bin (md5sum 61d48a82cb188cceb14ebb8082bfec37) A commercially licensable model based on GPT-J and trained by Nomic AI on the v1 GPT4All dataset.
* https://gpt4all.io/models/ggml-gpt4all-j.bin (md5sum 5b5a3f9b858d33b29b52b89692415595) A commercially licensable model based on GPT-J and trained by Nomic AI on the v0 GPT4All dataset.
* https://gpt4all.io/models/ggml-vicuna-7b-1.1-q4_2.bin (md5sum 29119f8fa11712704c6b22ac5ab792ea) An non-commercially licensable model based on Llama 7b and trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
* https://gpt4all.io/models/ggml-vicuna-13b-1.1-q4_2.bin (md5sum 95999b7b0699e2070af63bf5d34101a8) An non-commercially licensable model based on Llama 13b and trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
* https://gpt4all.io/models/ggml-wizardLM-7B.q4_2.bin (md5sum 99e6d129745a3f1fb1121abed747b05a) An non-commercially licensable model based on Llama 7b and trained by Microsoft and Peking University.
* https://gpt4all.io/models/ggml-stable-vicuna-13B.q4_2.bin (md5sum 6cb4ee297537c9133bddab9692879de0) An non-commercially licensable model based on Llama 13b and RLHF trained by Stable AI.
* https://gpt4all.io/models/ggml-mpt-7b-base.bin (md5sum 120c32a51d020066288df045ef5d52b9) A commercially licensable model base pre-trained by Mosaic ML.
* https://gpt4all.io/models/ggml-nous-gpt4-vicuna-13b.bin (md5sum d5eafd5b0bd0d615cfd5fd763f642dfe) A non-commercially licensable model based on Vicuna 13b, fine-tuned on ~180,000 instructions, trained by Nous Research.
* https://gpt4all.io/models/ggml-mpt-7b-instruct.bin (md5sum 1cfa4958f489f0a0d1ffdf6b37322809) A commercially licensable instruct model based on MPT and trained by Mosaic ML.
* You can find a 'Model Explorer' on the official website where you can manually download models that we support: https://gpt4all.io/index.html
## Terminal Only Interface with no Qt dependency

View File

@ -155,7 +155,7 @@ void ChatGPTWorker::request(const QString &apiKey,
m_ctx = promptCtx;
QUrl openaiUrl("https://api.openai.com/v1/chat/completions");
const QString authorization = QString("Bearer %1").arg(apiKey);
const QString authorization = QString("Bearer %1").arg(apiKey).trimmed();
QNetworkRequest request(openaiUrl);
request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
request.setRawHeader("Authorization", authorization.toUtf8());
@ -244,7 +244,7 @@ void ChatGPTWorker::handleReadyRead()
void ChatGPTWorker::handleErrorOccurred(QNetworkReply::NetworkError code)
{
QNetworkReply *reply = qobject_cast<QNetworkReply *>(sender());
if (!reply) {
if (!reply || reply->error() == QNetworkReply::OperationCanceledError /*when we call abort on purpose*/) {
emit finished();
return;
}

View File

@ -46,6 +46,8 @@ public:
ChatGPT();
virtual ~ChatGPT();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath) override;

View File

@ -14,6 +14,7 @@
#define REPLIT_INTERNAL_STATE_VERSION 0
#define LLAMA_INTERNAL_STATE_VERSION 0
#define FALCON_INTERNAL_STATE_VERSION 0
#define BERT_INTERNAL_STATE_VERSION 0
class LLModelStore {
public:
@ -240,11 +241,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
#if defined(Q_OS_MAC) && defined(__arm__)
if (m_forceMetal)
m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "metal");
m_llModelInfo.model = LLMImplementation::construct(filePath.toStdString(), "metal");
else
m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "auto");
m_llModelInfo.model = LLMImplementation::construct(filePath.toStdString(), "auto");
#else
m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "auto");
m_llModelInfo.model = LLModel::Implementation::construct(filePath.toStdString(), "auto");
#endif
if (m_llModelInfo.model) {
@ -258,12 +259,13 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
m_llModelInfo = LLModelInfo();
emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
} else {
switch (m_llModelInfo.model->implementation().modelType[0]) {
switch (m_llModelInfo.model->implementation().modelType()[0]) {
case 'L': m_llModelType = LLModelType::LLAMA_; break;
case 'G': m_llModelType = LLModelType::GPTJ_; break;
case 'M': m_llModelType = LLModelType::MPT_; break;
case 'R': m_llModelType = LLModelType::REPLIT_; break;
case 'F': m_llModelType = LLModelType::FALCON_; break;
case 'B': m_llModelType = LLModelType::BERT_; break;
default:
{
delete std::exchange(m_llModelInfo.model, nullptr);
@ -628,8 +630,8 @@ bool ChatLLM::handleNameRecalculate(bool isRecalc)
qDebug() << "name recalc" << m_llmThread.objectName() << isRecalc;
#endif
Q_UNUSED(isRecalc);
Q_UNREACHABLE();
return false;
qt_noop();
return true;
}
bool ChatLLM::handleSystemPrompt(int32_t token)
@ -669,7 +671,8 @@ bool ChatLLM::serialize(QDataStream &stream, int version)
case MPT_: stream << MPT_INTERNAL_STATE_VERSION; break;
case GPTJ_: stream << GPTJ_INTERNAL_STATE_VERSION; break;
case LLAMA_: stream << LLAMA_INTERNAL_STATE_VERSION; break;
case FALCON_: stream << LLAMA_INTERNAL_STATE_VERSION; break;
case FALCON_: stream << FALCON_INTERNAL_STATE_VERSION; break;
case BERT_: stream << BERT_INTERNAL_STATE_VERSION; break;
default: Q_UNREACHABLE();
}
}
@ -788,13 +791,18 @@ void ChatLLM::processSystemPrompt()
if (!isModelLoaded() || m_processedSystemPrompt || m_isServer)
return;
const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
if (QString::fromStdString(systemPrompt).trimmed().isEmpty()) {
m_processedSystemPrompt = true;
return;
}
m_stopGenerating = false;
auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1);
auto responseFunc = std::bind(&ChatLLM::handleSystemResponse, this, std::placeholders::_1,
std::placeholders::_2);
auto recalcFunc = std::bind(&ChatLLM::handleSystemRecalculate, this, std::placeholders::_1);
const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo);
const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo);
const float top_p = MySettings::globalInstance()->modelTopP(m_modelInfo);

View File

@ -16,6 +16,7 @@ enum LLModelType {
CHATGPT_,
REPLIT_,
FALCON_,
BERT_
};
struct LLModelInfo {

View File

@ -7,16 +7,19 @@ file(GLOB MYMPTLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NA
file(GLOB MYLLAMALIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama*)
file(GLOB MYREPLITLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libreplit*)
file(GLOB MYFALCONLLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libfalcon*)
file(GLOB MYBERTLLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libbert*)
file(GLOB MYLLMODELLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.*)
file(COPY ${MYGPTJLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYMPTLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYLLAMALIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYREPLITLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYFALCONLLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYLLAMALIBS}
file(COPY ${MYBERTLLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYLLMODELLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)

View File

@ -8,6 +8,7 @@
#include <QFile>
#include <QProcess>
#include <QResource>
#include <QSettings>
#include <fstream>
class MyLLM: public LLM { };
@ -33,7 +34,7 @@ LLM::LLM()
if (directoryExists(frameworksDir))
llmodelSearchPaths += ";" + frameworksDir;
#endif
LLModel::setImplementationsSearchPath(llmodelSearchPaths.toStdString());
LLModel::Implementation::setImplementationsSearchPath(llmodelSearchPaths.toStdString());
#if defined(__x86_64__)
#ifndef _MSC_VER
@ -48,7 +49,13 @@ LLM::LLM()
#endif
m_compatHardware = minimal;
emit compatHardwareChanged();
}
bool LLM::hasSettingsAccess() const
{
QSettings settings;
settings.sync();
return settings.status() == QSettings::NoError;
}
bool LLM::checkForUpdates() const

View File

@ -6,12 +6,11 @@
class LLM : public QObject
{
Q_OBJECT
Q_PROPERTY(bool compatHardware READ compatHardware NOTIFY compatHardwareChanged)
public:
static LLM *globalInstance();
bool compatHardware() const { return m_compatHardware; }
Q_INVOKABLE bool hasSettingsAccess() const;
Q_INVOKABLE bool compatHardware() const { return m_compatHardware; }
Q_INVOKABLE bool checkForUpdates() const;
Q_INVOKABLE bool directoryExists(const QString &path) const;
@ -22,7 +21,6 @@ public:
Q_SIGNALS:
void chatListModelChanged();
void modelListChanged();
void compatHardwareChanged();
private:
bool m_compatHardware;

View File

@ -89,14 +89,22 @@ Window {
property bool hasShownModelDownload: false
property bool hasShownFirstStart: false
property bool hasShownSettingsAccess: false
function startupDialogs() {
if (!LLM.compatHardware) {
if (!LLM.compatHardware()) {
Network.sendNonCompatHardware();
errorCompatHardware.open();
return;
}
// check if we have access to settings and if not show an error
if (!hasShownSettingsAccess && !LLM.hasSettingsAccess()) {
errorSettingsAccess.open();
hasShownSettingsAccess = true;
return;
}
// check for first time start of this version
if (!hasShownFirstStart && Download.isFirstStart()) {
firstStartDialog.open();
@ -135,6 +143,20 @@ Window {
+ qsTr("https://en.wikipedia.org/wiki/Advanced_Vector_Extensions</a>")
}
PopupDialog {
id: errorSettingsAccess
anchors.centerIn: parent
shouldTimeOut: false
shouldShowBusy: false
modal: true
text: qsTr("<h3>Encountered an error starting up:</h3><br>")
+ qsTr("<i>\"Inability to access settings file.\"</i>")
+ qsTr("<br><br>Unfortunately, something is preventing the program from accessing ")
+ qsTr("the settings file. This could be caused by incorrect permissions in the local ")
+ qsTr("app config directory where the settings file is located. ")
+ qsTr("Check out our <a href=\"https://discord.gg/4M2QFmTt2k\">discord channel</a> for help.")
}
StartupDialog {
id: firstStartDialog
anchors.centerIn: parent

View File

@ -1,18 +1,16 @@
[
{
"order": "a",
"md5sum": "4acc146dd43eb02845c233c29289c7c5",
"name": "Hermes",
"filename": "nous-hermes-13b.ggmlv3.q4_0.bin",
"filesize": "8136777088",
"requires": "2.4.7",
"md5sum": "e8d47924f433bd561cb5244557147793",
"name": "Wizard v1.1",
"filename": "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin",
"filesize": "7323310848",
"ramrequired": "16",
"parameters": "13 billion",
"quant": "q4_0",
"type": "LLaMA",
"description": "<strong>Best overall model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
"url": "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin",
"promptTemplate": "### Instruction:\n%1\n### Response:\n"
"systemPrompt": " ",
"description": "<strong>Best overall model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul"
},
{
"order": "b",
@ -25,12 +23,29 @@
"parameters": "7 billion",
"quant": "q4_0",
"type": "Falcon",
"systemPrompt": " ",
"description": "<strong>Best overall smaller model</strong><br><ul><li>Fast responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>",
"url": "https://huggingface.co/nomic-ai/gpt4all-falcon-ggml/resolve/main/ggml-model-gpt4all-falcon-q4_0.bin",
"promptTemplate": "### Instruction:\n%1\n### Response:\n"
},
{
"order": "c",
"md5sum": "4acc146dd43eb02845c233c29289c7c5",
"name": "Hermes",
"filename": "nous-hermes-13b.ggmlv3.q4_0.bin",
"filesize": "8136777088",
"requires": "2.4.7",
"ramrequired": "16",
"parameters": "13 billion",
"quant": "q4_0",
"type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Extremely good model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
"url": "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin",
"promptTemplate": "### Instruction:\n%1\n### Response:\n"
},
{
"order": "e",
"md5sum": "81a09a0ddf89690372fc296ff7f625af",
"name": "Groovy",
"filename": "ggml-gpt4all-j-v1.3-groovy.bin",
@ -39,10 +54,11 @@
"parameters": "7 billion",
"quant": "q4_0",
"type": "GPT-J",
"systemPrompt": " ",
"description": "<strong>Creative model can be used for commercial purposes</strong><br><ul><li>Fast responses<li>Creative responses</li><li>Instruction based</li><li>Trained by Nomic AI<li>Licensed for commercial use</ul>"
},
{
"order": "e",
"order": "f",
"md5sum": "11d9f060ca24575a2c303bdc39952486",
"name": "Snoozy",
"filename": "GPT4All-13B-snoozy.ggmlv3.q4_0.bin",
@ -52,11 +68,12 @@
"parameters": "13 billion",
"quant": "q4_0",
"type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>",
"url": "https://huggingface.co/TheBloke/GPT4All-13B-snoozy-GGML/resolve/main/GPT4All-13B-snoozy.ggmlv3.q4_0.bin"
},
{
"order": "f",
"order": "g",
"md5sum": "756249d3d6abe23bde3b1ae272628640",
"name": "MPT Chat",
"filename": "ggml-mpt-7b-chat.bin",
@ -71,9 +88,9 @@
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"
},
{
"order": "g",
"order": "h",
"md5sum": "e64e74375ce9d36a3d0af3db1523fd0a",
"name": "Orca",
"name": "Mini Orca",
"filename": "orca-mini-7b.ggmlv3.q4_0.bin",
"filesize": "3791749248",
"requires": "2.4.7",
@ -87,9 +104,9 @@
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
},
{
"order": "h",
"order": "i",
"md5sum": "6a087f7f4598fad0bb70e6cb4023645e",
"name": "Orca (Small)",
"name": "Mini Orca (Small)",
"filename": "orca-mini-3b.ggmlv3.q4_0.bin",
"filesize": "1928446208",
"requires": "2.4.7",
@ -103,9 +120,9 @@
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
},
{
"order": "i",
"order": "j",
"md5sum": "959b7f65b2d12fd1e3ff99e7493c7a3a",
"name": "Orca (Large)",
"name": "Mini Orca (Large)",
"filename": "orca-mini-13b.ggmlv3.q4_0.bin",
"filesize": "7323329152",
"requires": "2.4.7",
@ -119,7 +136,7 @@
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
},
{
"order": "j",
"order": "k",
"md5sum": "29119f8fa11712704c6b22ac5ab792ea",
"name": "Vicuna",
"filename": "ggml-vicuna-7b-1.1-q4_2.bin",
@ -128,10 +145,11 @@
"parameters": "7 billion",
"quant": "q4_2",
"type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Good small model - trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
},
{
"order": "k",
"order": "l",
"md5sum": "95999b7b0699e2070af63bf5d34101a8",
"name": "Vicuna (large)",
"filename": "ggml-vicuna-13b-1.1-q4_2.bin",
@ -140,10 +158,11 @@
"parameters": "13 billion",
"quant": "q4_2",
"type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Good larger model - trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
},
{
"order": "l",
"order": "m",
"md5sum": "99e6d129745a3f1fb1121abed747b05a",
"name": "Wizard",
"filename": "ggml-wizardLM-7B.q4_2.bin",
@ -152,10 +171,11 @@
"parameters": "7 billion",
"quant": "q4_2",
"type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Good small model - trained by by Microsoft and Peking University</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
},
{
"order": "m",
"order": "n",
"md5sum": "6cb4ee297537c9133bddab9692879de0",
"name": "Stable Vicuna",
"filename": "ggml-stable-vicuna-13B.q4_2.bin",
@ -168,7 +188,7 @@
"systemPrompt": "## Assistant: I am StableVicuna, a large language model created by CarperAI. I am here to chat!\n\n"
},
{
"order": "n",
"order": "o",
"md5sum": "1cfa4958f489f0a0d1ffdf6b37322809",
"name": "MPT Instruct",
"filename": "ggml-mpt-7b-instruct.bin",
@ -178,10 +198,11 @@
"parameters": "7 billion",
"quant": "q4_0",
"type": "MPT",
"systemPrompt": " ",
"description": "<strong>Mosaic's instruction model</strong><br><ul><li>Instruction based<li>Trained by Mosaic ML<li>Licensed for commercial use</ul>"
},
{
"order": "o",
"order": "p",
"md5sum": "120c32a51d020066288df045ef5d52b9",
"name": "MPT Base",
"filename": "ggml-mpt-7b-base.bin",
@ -191,10 +212,11 @@
"parameters": "7 billion",
"quant": "q4_0",
"type": "MPT",
"systemPrompt": " ",
"description": "<strong>Trained for text completion with no assistant finetuning</strong><br><ul><li>Completion based<li>Trained by Mosaic ML<li>Licensed for commercial use</ul>"
},
{
"order": "p",
"order": "q",
"md5sum": "d5eafd5b0bd0d615cfd5fd763f642dfe",
"name": "Nous Vicuna",
"filename": "ggml-nous-gpt4-vicuna-13b.bin",
@ -203,10 +225,11 @@
"parameters": "13 billion",
"quant": "q4_0",
"type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Trained on ~180,000 instructions</strong><br><ul><li>Instruction based<li>Trained by Nous Research<li>Cannot be used commercially</ul>"
},
{
"order": "q",
"order": "r",
"md5sum": "489d21fd48840dcb31e5f92f453f3a20",
"name": "Wizard Uncensored",
"filename": "wizardLM-13B-Uncensored.ggmlv3.q4_0.bin",
@ -216,11 +239,12 @@
"parameters": "13 billion",
"quant": "q4_0",
"type": "LLaMA",
"systemPrompt": " ",
"description": "<strong>Trained on uncensored assistant data and instruction data</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>",
"url": "https://huggingface.co/TheBloke/WizardLM-13B-Uncensored-GGML/resolve/main/wizardLM-13B-Uncensored.ggmlv3.q4_0.bin"
},
{
"order": "r",
"order": "s",
"md5sum": "615890cb571fcaa0f70b2f8d15ef809e",
"disableGUI": "true",
"name": "Replit",
@ -231,7 +255,23 @@
"parameters": "3 billion",
"quant": "f16",
"type": "Replit",
"systemPrompt": " ",
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use</ul>",
"url": "https://huggingface.co/nomic-ai/ggml-replit-code-v1-3b/resolve/main/ggml-replit-code-v1-3b.bin"
},
{
"order": "t",
"md5sum": "031bb5d5722c08d13e3e8eaf55c37391",
"disableGUI": "true",
"name": "Bert",
"filename": "ggml-all-MiniLM-L6-v2-f16.bin",
"filesize": "45521167",
"requires": "2.4.14",
"ramrequired": "1",
"parameters": "1 million",
"quant": "f16",
"type": "Bert",
"systemPrompt": " ",
"description": "<strong>Sbert</strong><br><ul><li>For embeddings"
}
]

View File

@ -416,6 +416,40 @@
* Akarshan Biswas
* Adam Treat (Nomic AI)
* Community (beta testers, bug reporters)
"
},
{
"version": "2.4.13",
"notes":
"
* Fix bug with prolonging shutdown with generation
* Fix bug with update model info on deleting chats
* Fix bug with preventing closing of model download dialog
* Allows allow closing the model download dialog
* Fix numerous bugs with download of models.json and provide backup option
* Add json and c# highlighting
* Fix bug with chatgpt crashing
* Fix bug with chatgpt not working for some keys
* Fix bug with mixpanel opt outs not counting
* Fix problem with OOM errors causing crash and then repeating on next start
* Fix default thread setting and provide guardrails
* Fix tap handler in settings dialog for buttons
* Fix color of some text fields on macOS for settings dialog
* Fix problem with startup dialog not closing
* Provide error dialog for settings file not accessible
* Try and fix problems with avx-only detection
* Fix showing error in model downloads unnecessarily
* Prefer 7b models to load by default
* Add Wizard v1.1 to download list
* Rename Orca models to Mini Orca
* Don't use a system prompt unless model was trained with one by default
",
"contributors":
"
* Lakshay Kansal (Nomic AI)
* Aaron Miller (Nomic AI)
* Adam Treat (Nomic AI)
* Community (beta testers, bug reporters)
"
}
]

View File

@ -161,16 +161,6 @@ int InstalledModels::count() const
return rowCount();
}
QString InstalledModels::firstId() const
{
if (rowCount() > 0) {
QModelIndex firstIndex = index(0, 0);
return sourceModel()->data(firstIndex, ModelList::IdRole).toString();
} else {
return QString();
}
}
DownloadableModels::DownloadableModels(QObject *parent)
: QSortFilterProxyModel(parent)
, m_expanded(false)
@ -222,6 +212,7 @@ ModelList::ModelList()
: QAbstractListModel(nullptr)
, m_installedModels(new InstalledModels(this))
, m_downloadableModels(new DownloadableModels(this))
, m_asyncModelRequestOngoing(false)
{
m_installedModels->setSourceModel(this);
m_downloadableModels->setSourceModel(this);
@ -297,12 +288,9 @@ ModelInfo ModelList::defaultModelInfo() const
settings.sync();
// The user default model can be set by the user in the settings dialog. The "default" user
// default model is "Application default" which signals we should use the default model that was
// specified by the models.json file.
// default model is "Application default" which signals we should use the logic here.
const QString userDefaultModelName = MySettings::globalInstance()->userDefaultModel();
const bool hasUserDefaultName = !userDefaultModelName.isEmpty() && userDefaultModelName != "Application default";
const QString defaultModelName = settings.value("defaultModel").toString();
const bool hasDefaultName = hasUserDefaultName ? false : !defaultModelName.isEmpty();
ModelInfo *defaultModel = nullptr;
for (ModelInfo *info : m_models) {
@ -310,12 +298,10 @@ ModelInfo ModelList::defaultModelInfo() const
continue;
defaultModel = info;
// If we don't have either setting, then just use the first model that is installed
if (!hasUserDefaultName && !hasDefaultName)
break;
const size_t ramrequired = defaultModel->ramrequired;
// If we don't have a user specified default, but *do* have a default setting and match, then use it
if (!hasUserDefaultName && hasDefaultName && (defaultModel->id() == defaultModelName))
// If we don't have either setting, then just use the first model that requires less than 16GB that is installed
if (!hasUserDefaultName && !info->isChatGPT && ramrequired > 0 && ramrequired < 16)
break;
// If we have a user specified default and match, then use it
@ -835,7 +821,7 @@ void ModelList::updateModelsFromDirectory()
for (const QString &id : modelsById) {
updateData(id, FilenameRole, filename);
updateData(id, ChatGPTRole, filename.startsWith("chatgpt-"));
updateData(id, DirpathRole, path);
updateData(id, DirpathRole, info.dir().absolutePath() + "/");
updateData(id, FilesizeRole, toFileSize(info.size()));
}
}
@ -846,14 +832,6 @@ void ModelList::updateModelsFromDirectory()
processDirectory(exePath);
if (localPath != exePath)
processDirectory(localPath);
if (installedModels()->count()) {
const QString firstModel =
installedModels()->firstId();
QSettings settings;
settings.setValue("defaultModel", firstModel);
settings.sync();
}
}
void ModelList::updateModelsFromJson()
@ -899,6 +877,9 @@ void ModelList::updateModelsFromJson()
void ModelList::updateModelsFromJsonAsync()
{
m_asyncModelRequestOngoing = true;
emit asyncModelRequestOngoingChanged();
#if defined(USE_LOCAL_MODELSJSON)
QUrl jsonUrl("file://" + QDir::homePath() + "/dev/large_language_models/gpt4all/gpt4all-chat/metadata/models.json");
#else
@ -911,17 +892,37 @@ void ModelList::updateModelsFromJsonAsync()
QNetworkReply *jsonReply = m_networkManager.get(request);
connect(qApp, &QCoreApplication::aboutToQuit, jsonReply, &QNetworkReply::abort);
connect(jsonReply, &QNetworkReply::finished, this, &ModelList::handleModelsJsonDownloadFinished);
connect(jsonReply, &QNetworkReply::errorOccurred, this, &ModelList::handleModelsJsonDownloadErrorOccurred);
}
void ModelList::handleModelsJsonDownloadFinished()
{
QNetworkReply *jsonReply = qobject_cast<QNetworkReply *>(sender());
if (!jsonReply)
if (!jsonReply) {
m_asyncModelRequestOngoing = false;
emit asyncModelRequestOngoingChanged();
return;
}
QByteArray jsonData = jsonReply->readAll();
jsonReply->deleteLater();
parseModelsJsonFile(jsonData, true);
m_asyncModelRequestOngoing = false;
emit asyncModelRequestOngoingChanged();
}
void ModelList::handleModelsJsonDownloadErrorOccurred(QNetworkReply::NetworkError code)
{
// TODO: Show what error occurred in the GUI
m_asyncModelRequestOngoing = false;
emit asyncModelRequestOngoingChanged();
QNetworkReply *reply = qobject_cast<QNetworkReply *>(sender());
if (!reply)
return;
qWarning() << QString("ERROR: Modellist download failed with error code \"%1-%2\"")
.arg(code).arg(reply->errorString()).toStdString();
}
void ModelList::handleSslErrors(QNetworkReply *reply, const QList<QSslError> &errors)
@ -1108,14 +1109,6 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
updateData(id, ModelList::QuantRole, "NA");
updateData(id, ModelList::TypeRole, "GPT");
}
if (installedModels()->count()) {
const QString firstModel =
installedModels()->firstId();
QSettings settings;
settings.setValue("defaultModel", firstModel);
settings.sync();
}
}
void ModelList::updateModelsFromSettings()

View File

@ -127,7 +127,6 @@ class InstalledModels : public QSortFilterProxyModel
public:
explicit InstalledModels(QObject *parent);
int count() const;
QString firstId() const;
Q_SIGNALS:
void countChanged();
@ -169,6 +168,7 @@ class ModelList : public QAbstractListModel
Q_PROPERTY(InstalledModels* installedModels READ installedModels NOTIFY installedModelsChanged)
Q_PROPERTY(DownloadableModels* downloadableModels READ downloadableModels NOTIFY downloadableModelsChanged)
Q_PROPERTY(QList<QString> userDefaultModelList READ userDefaultModelList NOTIFY userDefaultModelListChanged)
Q_PROPERTY(bool asyncModelRequestOngoing READ asyncModelRequestOngoing NOTIFY asyncModelRequestOngoingChanged)
public:
static ModelList *globalInstance();
@ -296,12 +296,14 @@ public:
}
QString incompleteDownloadPath(const QString &modelFile);
bool asyncModelRequestOngoing() const { return m_asyncModelRequestOngoing; }
Q_SIGNALS:
void countChanged();
void installedModelsChanged();
void downloadableModelsChanged();
void userDefaultModelListChanged();
void asyncModelRequestOngoingChanged();
private Q_SLOTS:
void updateModelsFromJson();
@ -310,6 +312,7 @@ private Q_SLOTS:
void updateModelsFromDirectory();
void updateDataForSettings();
void handleModelsJsonDownloadFinished();
void handleModelsJsonDownloadErrorOccurred(QNetworkReply::NetworkError code);
void handleSslErrors(QNetworkReply *reply, const QList<QSslError> &errors);
private:
@ -328,6 +331,7 @@ private:
QList<ModelInfo*> m_models;
QHash<QString, ModelInfo*> m_modelMap;
QFileSystemWatcher *m_watcher;
bool m_asyncModelRequestOngoing;
private:
explicit ModelList();

View File

@ -41,7 +41,7 @@ MyDialog {
}
Label {
visible: !ModelList.downloadableModels.count
visible: !ModelList.downloadableModels.count && !ModelList.asyncModelRequestOngoing
Layout.fillWidth: true
Layout.fillHeight: true
horizontalAlignment: Qt.AlignHCenter
@ -50,6 +50,15 @@ MyDialog {
color: theme.mutedTextColor
}
MyBusyIndicator {
visible: !ModelList.downloadableModels.count && ModelList.asyncModelRequestOngoing
running: ModelList.asyncModelRequestOngoing
Accessible.role: Accessible.Animation
Layout.alignment: Qt.AlignCenter
Accessible.name: qsTr("Busy indicator")
Accessible.description: qsTr("Displayed when the models request is ongoing")
}
ScrollView {
id: scrollView
ScrollBar.vertical.policy: ScrollBar.AlwaysOn

View File

@ -18,6 +18,9 @@ enum Language {
Go,
Json,
Csharp,
Latex,
Html,
Php
};
static QColor keywordColor = "#2e95d3"; // blue
@ -33,6 +36,11 @@ static QColor commandColor = functionCallColor;
static QColor variableColor = numberColor;
static QColor keyColor = functionColor;
static QColor valueColor = stringColor;
static QColor parameterColor = stringColor;
static QColor attributeNameColor = numberColor;
static QColor attributeValueColor = stringColor;
static QColor specialCharacterColor = functionColor;
static QColor doctypeColor = commentColor;
static Language stringToLanguage(const QString &language)
{
@ -62,6 +70,12 @@ static Language stringToLanguage(const QString &language)
return Go;
if (language == "json")
return Json;
if (language == "latex")
return Latex;
if (language == "html")
return Html;
if (language == "php")
return Php;
return None;
}
@ -561,6 +575,135 @@ static QVector<HighlightingRule> bashHighlightingRules()
return highlightingRules;
}
static QVector<HighlightingRule> latexHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
if (highlightingRules.isEmpty()) {
HighlightingRule rule;
QTextCharFormat commandFormat;
commandFormat.setForeground(commandColor); // commandColor needs to be set to your liking
rule.pattern = QRegularExpression("\\\\[A-Za-z]+"); // Pattern for LaTeX commands
rule.format = commandFormat;
highlightingRules.append(rule);
QTextCharFormat commentFormat;
commentFormat.setForeground(commentColor); // commentColor needs to be set to your liking
rule.pattern = QRegularExpression("%[^\n]*"); // Pattern for LaTeX comments
rule.format = commentFormat;
highlightingRules.append(rule);
}
return highlightingRules;
}
static QVector<HighlightingRule> htmlHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
if (highlightingRules.isEmpty()) {
HighlightingRule rule;
QTextCharFormat attributeNameFormat;
attributeNameFormat.setForeground(attributeNameColor);
rule.pattern = QRegularExpression("\\b(\\w+)\\s*=");
rule.format = attributeNameFormat;
highlightingRules.append(rule);
QTextCharFormat attributeValueFormat;
attributeValueFormat.setForeground(attributeValueColor);
rule.pattern = QRegularExpression("\".*?\"|'.*?'");
rule.format = attributeValueFormat;
highlightingRules.append(rule);
QTextCharFormat commentFormat;
commentFormat.setForeground(commentColor);
rule.pattern = QRegularExpression("<!--.*?-->");
rule.format = commentFormat;
highlightingRules.append(rule);
QTextCharFormat specialCharacterFormat;
specialCharacterFormat.setForeground(specialCharacterColor);
rule.pattern = QRegularExpression("&[a-zA-Z0-9#]*;");
rule.format = specialCharacterFormat;
highlightingRules.append(rule);
QTextCharFormat doctypeFormat;
doctypeFormat.setForeground(doctypeColor);
rule.pattern = QRegularExpression("<!DOCTYPE.*?>");
rule.format = doctypeFormat;
highlightingRules.append(rule);
}
return highlightingRules;
}
static QVector<HighlightingRule> phpHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
if (highlightingRules.isEmpty()) {
HighlightingRule rule;
QTextCharFormat functionCallFormat;
functionCallFormat.setForeground(functionCallColor);
rule.pattern = QRegularExpression("\\b(\\w+)\\s*(?=\\()");
rule.format = functionCallFormat;
highlightingRules.append(rule);
QTextCharFormat functionFormat;
functionFormat.setForeground(functionColor);
rule.pattern = QRegularExpression("\\bfunction\\s+(\\w+)\\b");
rule.format = functionFormat;
highlightingRules.append(rule);
QTextCharFormat numberFormat;
numberFormat.setForeground(numberColor);
rule.pattern = QRegularExpression("\\b[0-9]*\\.?[0-9]+\\b");
rule.format = numberFormat;
highlightingRules.append(rule);
QTextCharFormat keywordFormat;
keywordFormat.setForeground(keywordColor);
QStringList keywordPatterns = {
"\\bif\\b", "\\belse\\b", "\\belseif\\b", "\\bwhile\\b", "\\bfor\\b",
"\\bforeach\\b", "\\breturn\\b", "\\bprint\\b", "\\binclude\\b", "\\brequire\\b",
"\\binclude_once\\b", "\\brequire_once\\b", "\\btry\\b", "\\bcatch\\b",
"\\bfinally\\b", "\\bcontinue\\b", "\\bbreak\\b", "\\bclass\\b", "\\bfunction\\b",
"\\bnew\\b", "\\bthrow\\b", "\\barray\\b", "\\bpublic\\b", "\\bprivate\\b",
"\\bprotected\\b", "\\bstatic\\b", "\\bglobal\\b", "\\bisset\\b", "\\bunset\\b",
"\\bnull\\b", "\\btrue\\b", "\\bfalse\\b"
};
for (const QString &pattern : keywordPatterns) {
rule.pattern = QRegularExpression(pattern);
rule.format = keywordFormat;
highlightingRules.append(rule);
}
QTextCharFormat stringFormat;
stringFormat.setForeground(stringColor);
rule.pattern = QRegularExpression("\".*?\"");
rule.format = stringFormat;
highlightingRules.append(rule);
rule.pattern = QRegularExpression("\'.*?\'");
rule.format = stringFormat;
highlightingRules.append(rule);
QTextCharFormat commentFormat;
commentFormat.setForeground(commentColor);
rule.pattern = QRegularExpression("//[^\n]*");
rule.format = commentFormat;
highlightingRules.append(rule);
rule.pattern = QRegularExpression("/\\*.*?\\*/");
rule.format = commentFormat;
highlightingRules.append(rule);
}
return highlightingRules;
}
static QVector<HighlightingRule> jsonHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
@ -616,6 +759,12 @@ void SyntaxHighlighter::highlightBlock(const QString &text)
rules = javaHighlightingRules();
else if (block.userState() == Json)
rules = jsonHighlightingRules();
else if (block.userState() == Latex)
rules = latexHighlightingRules();
else if (block.userState() == Html)
rules = htmlHighlightingRules();
else if (block.userState() == Php)
rules = phpHighlightingRules();
for (const HighlightingRule &rule : qAsConst(rules)) {
QRegularExpressionMatchIterator matchIterator = rule.pattern.globalMatch(text);
@ -821,7 +970,10 @@ void ResponseText::handleCodeBlocks()
|| firstWord == "java"
|| firstWord == "go"
|| firstWord == "golang"
|| firstWord == "json") {
|| firstWord == "json"
|| firstWord == "latex"
|| firstWord == "html"
|| firstWord == "php") {
codeLanguage = firstWord;
capturedText.remove(0, match.captured(0).length());
}

View File

@ -0,0 +1,49 @@
{
"train_batch_size": "auto",
"gradient_accumulation_steps": "auto",
"train_micro_batch_size_per_gpu": "auto",
"fp16": {
"enabled": "auto",
"min_loss_scale": 1,
"loss_scale_window": 1000,
"hysteresis": 2,
"initial_scale_power": 32
},
"bf16": {
"enabled": "auto"
},
"gradient_clipping": 1.0,
"zero_optimization": {
"stage": 1,
"offload_param": {
"device": "none"
},
"offload_optimizer": {
"device": "none"
},
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"contiguous_gradients": true
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": "auto",
"betas": [
0.9,
0.999
],
"eps": 1e-08
}
},
"scheduler": {
"type": "WarmupDecayLR",
"params": {
"warmup_min_lr": 0,
"warmup_max_lr": "auto",
"warmup_num_steps": "auto",
"warmup_type": "linear",
"total_num_steps": "auto"
}
}
}

View File

@ -0,0 +1,48 @@
{
"train_batch_size": "auto",
"gradient_accumulation_steps": "auto",
"train_micro_batch_size_per_gpu": "auto",
"fp16": {
"enabled": "auto",
"min_loss_scale": 1,
"loss_scale_window": 1000,
"hysteresis": 2,
"initial_scale_power": 32
},
"bf16": {
"enabled": "auto"
},
"gradient_clipping": 1.0,
"zero_optimization": {
"stage": 2,
"offload_param": {
"device": "none"
},
"offload_optimizer": {
"device": "none"
},
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"contiguous_gradients": true
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": "auto",
"betas": [
0.9,
0.999
],
"eps": 1e-08
}
},
"scheduler": {
"type": "WarmupLR",
"params": {
"warmup_min_lr": 0,
"warmup_max_lr": "auto",
"warmup_num_steps": "auto",
"warmup_type": "linear"
}
}
}

View File

@ -0,0 +1,34 @@
# model/tokenizer
model_name: "tiiuae/falcon-7b"
tokenizer_name: "tiiuae/falcon-7b"
gradient_checkpointing: true
save_name: "nomic-ai/gpt4all-falcon"
# dataset
streaming: false
num_proc: 64
dataset_path: "nomic-ai/gpt4all-j-prompt-generations"
revision: "v1.3-groovy"
max_length: 1024
batch_size: 32
# train dynamics
lr: 2.0e-5
min_lr: 0
weight_decay: 0.0
eval_every: 500
eval_steps: 105
save_every: 1000
log_grads_every: 500
output_dir: "ckpts/falcon"
checkpoint: "/home/paperspace/gpt4all/ckpts/mpt/step_1000"
lora: false
warmup_steps: 500
num_epochs: 2
# logging
wandb: true
wandb_entity: "gpt4all"
wandb_project_name: "gpt4all"
seed: 42

View File

@ -0,0 +1,34 @@
# model/tokenizer
model_name: "mosaicml/mpt-7b"
tokenizer_name: "mosaicml/mpt-7b"
gradient_checkpointing: false
save_name: "nomic-ai/mpt-finetuned-round2"
# dataset
streaming: false
num_proc: 64
dataset_path: "nomic-ai/gpt4all-j-prompt-generations"
revision: "v1.3-groovy"
max_length: 1024
batch_size: 8
# train dynamics
lr: 2.0e-5
min_lr: 0
weight_decay: 0.0
eval_every: 500
eval_steps: 105
save_every: 1000
log_grads_every: 500
output_dir: "ckpts/mpt"
checkpoint: null
lora: false
warmup_steps: 500
num_epochs: 2
# logging
wandb: false
wandb_entity: "gpt4all"
wandb_project_name: "gpt4all"
seed: 42

View File

@ -0,0 +1,34 @@
# model/tokenizer
model_name: "openlm-research/open_llama_7b"
tokenizer_name: "openlm-research/open_llama_7b"
gradient_checkpointing: true
save_name: "nomic-ai/gpt4all-openllama"
# dataset
streaming: false
num_proc: 64
dataset_path: "nomic-ai/gpt4all-updated"
revision: null
max_length: 1024
batch_size: 32
# train dynamics
lr: 2.0e-5
min_lr: 0
weight_decay: 0.0
eval_every: 500
log_every: 10
save_every: 1000
log_grads_every: 500
output_dir: "ckpts/falcon"
checkpoint: null
lora: false
warmup_steps: 500
num_epochs: 3
# logging
wandb: true
wandb_entity: "gpt4all"
wandb_project_name: "gpt4all"
seed: 42

View File

@ -12,7 +12,7 @@ def tokenize_inputs(config, tokenizer, examples):
# hacky backward compatible
different_eos = tokenizer.eos_token != "</s>"
out = {"labels": [], "input_ids": []}
out = {"labels": [], "input_ids": [], "attention_mask": []}
for prompt, response in zip(examples["prompt"], examples["response"]):
if different_eos:
if response.count("</s> \n") > 0:
@ -49,9 +49,10 @@ def tokenize_inputs(config, tokenizer, examples):
print(response)
raise
input_tokens = tokenizer.pad({"input_ids": input_tokens}, padding="max_length", max_length=max_length)["input_ids"]
padded = tokenizer.pad({"input_ids": input_tokens}, padding="max_length", max_length=max_length, return_tensors="pt")
out["labels"].append(labels)
out["input_ids"].append(input_tokens)
out["input_ids"].append(padded["input_ids"])
out["attention_mask"].append(padded["attention_mask"])
out = {k: torch.stack(v) if isinstance(v, list) else v for k, v in out.items()}
@ -72,7 +73,7 @@ def load_data(config, tokenizer):
dataset = load_dataset("json", data_files=files, split="train")
else:
dataset = load_dataset(dataset_path, split="train")
dataset = load_dataset(dataset_path, split="train", revision=config["revision"] if "revision" in config else None)
dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])
@ -83,19 +84,23 @@ def load_data(config, tokenizer):
else:
kwargs = {}
cols_to_keep = ["input_ids", "labels", "attention_mask"]
# tokenize inputs and return labels and attention mask
train_dataset = train_dataset.map(
lambda ele: tokenize_inputs(config, tokenizer, ele),
batched=True,
remove_columns=["source", "prompt"],
**kwargs
)
remove_cols = [col for col in train_dataset.column_names if col not in cols_to_keep]
train_dataset = train_dataset.remove_columns(remove_cols)
val_dataset = val_dataset.map(
lambda ele: tokenize_inputs(config, tokenizer, ele),
batched=True,
remove_columns=["source", "prompt"],
**kwargs
)
remove_cols = [col for col in val_dataset.column_names if col not in cols_to_keep]
val_dataset = val_dataset.remove_columns(remove_cols)
train_dataset = train_dataset.with_format("torch")
val_dataset = val_dataset.with_format("torch")
@ -106,12 +111,14 @@ def load_data(config, tokenizer):
train_dataset,
collate_fn=DefaultDataCollator(),
batch_size=config["batch_size"],
shuffle=True,
)
val_dataloader = DataLoader(
val_dataset,
collate_fn=DefaultDataCollator(),
batch_size=config["batch_size"],
shuffle=True,
)
return train_dataloader, val_dataloader

View File

@ -1,10 +1,10 @@
accelerate
datasets
einops
torchmetrics
evaluate
transformers>=4.28.0
wandb
pip
peft
nodelist-inflator
deepspeed

View File

@ -1,5 +1,5 @@
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler, LlamaForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler
import torch
from torch.optim import AdamW
from argparse import ArgumentParser
@ -42,7 +42,7 @@ def train(accelerator, config):
accelerator.print(config)
accelerator.print(f"Using {accelerator.num_processes} GPUs")
tokenizer = AutoTokenizer.from_pretrained(config['tokenizer_name'], model_max_length=config['max_length'])
tokenizer = AutoTokenizer.from_pretrained(config['tokenizer_name'], model_max_length=config['max_length'], use_fast=False)
# if no pad token, set it to eos
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
@ -53,6 +53,7 @@ def train(accelerator, config):
checkpoint = config["gradient_checkpointing"]
model = AutoModelForCausalLM.from_pretrained(config["model_name"],
use_cache=False if checkpoint else True,
trust_remote_code=True)
@ -86,7 +87,7 @@ def train(accelerator, config):
# decay to min_lr instead of 0
lr_ratio = config["min_lr"] / config["lr"]
accelerator.print(f"Len of train_dataloader: {len(train_dataloader)}")
total_num_steps = (len(train_dataloader) / gradient_accumulation_steps) * config["num_epochs"]
total_num_steps = (len(train_dataloader) / gradient_accumulation_steps) * (config["num_epochs"])
# instead of decaying to zero, decay to ratio of min_lr / lr
total_num_steps += int(total_num_steps * lr_ratio) + config["warmup_steps"]
accelerator.print(f"Total training steps: {total_num_steps}")
@ -104,7 +105,7 @@ def train(accelerator, config):
)
else:
scheduler = DummyScheduler(
optimizer, total_num_steps=config["warmup_steps"], warmup_num_steps=config["warmup_steps"]
optimizer, total_num_steps=total_num_steps, warmup_num_steps=config["warmup_steps"]
)
model, optimizer, train_dataloader, val_dataloader, scheduler = accelerator.prepare(
@ -117,26 +118,34 @@ def train(accelerator, config):
if config["checkpoint"]:
accelerator.load_state(config["checkpoint"])
accelerator.print(f"Resumed from checkpoint: {config['checkpoint']}")
path = os.path.basename(config["train_args"]["resume_from_checkpoint"])
path = os.path.basename(config["checkpoint"])
training_difference = os.path.splitext(path)[0]
resume_step = int(training_difference.replace("step_", ""))
accelerator.skip_first_batches(train_dataloader, resume_step)
train_dataloader = accelerator.skip_first_batches(train_dataloader, resume_step)
accelerator.print(f"Resuming from step {resume_step}")
else:
resume_step = 0
# log gradients
if accelerator.is_main_process and config["wandb"]:
wandb.watch(model, log_freq=config["log_grads_every"], log="all")
for epoch in range(config["num_epochs"]):
accelerator.wait_for_everyone()
for epoch in range(0, config["num_epochs"]):
train_loss = MeanMetric(nan_strategy="error").to(model.device)
for step, batch in enumerate(tqdm(train_dataloader)):
curr_step = epoch * len(train_dataloader) + step
model.train()
outputs = model(**batch)
loss = outputs.loss
# gather loss before backprop in case of gradient accumulation
loss_values = accelerator.gather_for_metrics({"loss": loss.detach().float()})
if config["wandb"]:
accelerator.log({"loss": torch.mean(loss_values["loss"]).item()}, step=curr_step)
train_loss.update(loss_values["loss"])
loss = loss / gradient_accumulation_steps
@ -144,9 +153,8 @@ def train(accelerator, config):
# get gradient norm of all params
# log LR in case something weird happens
if step > 0 and step % (config["eval_every"] // 10) == 0:
if step > 0 and step % (config["log_lr_every"]) == 0:
if config["wandb"]:
curr_step = step + epoch * len(train_dataloader)
accelerator.log({"lr": scheduler.get_last_lr()[0]}, step=curr_step)
if (step + 1) % gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1:
@ -156,7 +164,6 @@ def train(accelerator, config):
if step > 0 and step % config["save_every"] == 0:
curr_step = step + epoch * len(train_dataloader)
accelerator.save_state(f"{config['output_dir']}/step_{curr_step}")
if step > 0 and (step % config["eval_every"] == 0 or step == len(train_dataloader) - 1):
@ -170,7 +177,6 @@ def train(accelerator, config):
}
if config["wandb"]:
curr_step = step + epoch * len(train_dataloader)
accelerator.log({**log_train, **log_val}, step=curr_step)
accelerator.print(f"Current LR: {scheduler.get_last_lr()[0]}")
@ -181,8 +187,14 @@ def train(accelerator, config):
accelerator.print(f"Epoch {epoch} finished")
accelerator.print(f"Pushing to HF hub")
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(
f"{config['output_dir']}/epoch_{epoch}",
is_main_process=accelerator.is_main_process,
save_function=accelerator.save,
state_dict=accelerator.get_state_dict(model),
)
try:
if accelerator.is_main_process:
unwrapped_model.push_to_hub(config["save_name"] + f"-epoch_{epoch}", private=True)
@ -191,21 +203,16 @@ def train(accelerator, config):
accelerator.print(e)
accelerator.print(f"Failed to push to hub")
if config["num_epochs"] > 1:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(
f"{config['output_dir']}/epoch_{epoch}",
f"{config['output_dir']}/final",
is_main_process=accelerator.is_main_process,
save_function=accelerator.save,
state_dict=accelerator.get_state_dict(model),
)
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(
f"{config['output_dir']}/final",
is_main_process=accelerator.is_main_process,
save_function=accelerator.save,
state_dict=accelerator.get_state_dict(model),
)
accelerator.end_training()