mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-08-13 00:05:57 -04:00
Compare commits
60 Commits
ceef5d0922
...
f3f25a9928
Author | SHA1 | Date | |
---|---|---|---|
|
f3f25a9928 | ||
|
44e20688cf | ||
|
a82ce3b864 | ||
|
5bf4462ac2 | ||
|
cabb089f25 | ||
|
965b435406 | ||
|
127288180a | ||
|
a82204e514 | ||
|
2aa1352628 | ||
|
2c36da14b3 | ||
|
6675ccda4d | ||
|
9e903775ae | ||
|
020053203f | ||
|
5ce7563afb | ||
|
115719612a | ||
|
1ca7e7e083 | ||
|
477b13a5de | ||
|
cfd70b69fc | ||
|
306105e62f | ||
|
89e277bb3c | ||
|
f543affa9a | ||
|
6c8669cad3 | ||
|
0c0a4f2c22 | ||
|
6656f0f41e | ||
|
bb2b82e1b9 | ||
|
c77ab849c0 | ||
|
1c4a244291 | ||
|
936dcd2bfc | ||
|
15f1fe5445 | ||
|
ee4186d579 | ||
|
6200900677 | ||
|
4963db8f43 | ||
|
0efdbfcffe | ||
|
315a1f2aa2 | ||
|
ae8eb297ac | ||
|
1f749d7633 | ||
|
33557b1f39 | ||
|
64b409e0b8 | ||
|
e59946f05d | ||
|
b72b409d40 | ||
|
59cae1132c | ||
|
a0dae86a95 | ||
|
18ca8901f0 | ||
|
00a945eaee | ||
|
6c4f449b7a | ||
|
e8b19b8e82 | ||
|
8eb0844277 | ||
|
be395c12cc | ||
|
6a8fa27c8d | ||
|
8893db5896 | ||
|
60627bd41f | ||
|
5df4f1bf8c | ||
|
10ca2c4475 | ||
|
e9897518d1 | ||
|
432b7ebbd7 | ||
|
95b8fb312e | ||
|
ad0e7fd01f | ||
|
f0faa23ad5 | ||
|
0d726b22b8 | ||
|
13b2d47be5 |
@ -463,50 +463,47 @@ jobs:
|
||||
docker:
|
||||
- image: mcr.microsoft.com/dotnet/sdk:7.0-jammy # Ubuntu 22.04
|
||||
steps:
|
||||
- when:
|
||||
condition: << pipeline.parameters.run-csharp-workflow >>
|
||||
steps:
|
||||
- checkout
|
||||
- attach_workspace:
|
||||
at: /tmp/workspace
|
||||
- run:
|
||||
name: "Prepare Native Libs"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
mkdir -p runtimes/linux-x64/native
|
||||
cp /tmp/workspace/runtimes/linux-x64/*.so runtimes/linux-x64/native/
|
||||
ls -R runtimes
|
||||
- restore_cache:
|
||||
keys:
|
||||
- gpt4all-csharp-nuget-packages-nix
|
||||
- run:
|
||||
name: "Install project dependencies"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet restore Gpt4All
|
||||
- save_cache:
|
||||
paths:
|
||||
- ~/.nuget/packages
|
||||
key: gpt4all-csharp-nuget-packages-nix
|
||||
- run:
|
||||
name: Build C# Project
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet build Gpt4All --configuration Release --nologo
|
||||
- run:
|
||||
name: "Run C# Tests"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
|
||||
- run:
|
||||
name: Test results
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp/Gpt4All.Tests
|
||||
dotnet tool install -g trx2junit
|
||||
export PATH="$PATH:$HOME/.dotnet/tools"
|
||||
trx2junit TestResults/*.trx
|
||||
- store_test_results:
|
||||
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
|
||||
- checkout
|
||||
- attach_workspace:
|
||||
at: /tmp/workspace
|
||||
- run:
|
||||
name: "Prepare Native Libs"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
mkdir -p runtimes/linux-x64/native
|
||||
cp /tmp/workspace/runtimes/linux-x64/*.so runtimes/linux-x64/native/
|
||||
ls -R runtimes
|
||||
- restore_cache:
|
||||
keys:
|
||||
- gpt4all-csharp-nuget-packages-nix
|
||||
- run:
|
||||
name: "Install project dependencies"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet restore Gpt4All
|
||||
- save_cache:
|
||||
paths:
|
||||
- ~/.nuget/packages
|
||||
key: gpt4all-csharp-nuget-packages-nix
|
||||
- run:
|
||||
name: Build C# Project
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet build Gpt4All --configuration Release --nologo
|
||||
- run:
|
||||
name: "Run C# Tests"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
|
||||
- run:
|
||||
name: Test results
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp/Gpt4All.Tests
|
||||
dotnet tool install -g trx2junit
|
||||
export PATH="$PATH:$HOME/.dotnet/tools"
|
||||
trx2junit TestResults/*.trx
|
||||
- store_test_results:
|
||||
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
|
||||
|
||||
build-csharp-windows:
|
||||
executor:
|
||||
@ -514,111 +511,99 @@ jobs:
|
||||
size: large
|
||||
shell: powershell.exe -ExecutionPolicy Bypass
|
||||
steps:
|
||||
- when:
|
||||
condition: << pipeline.parameters.run-csharp-workflow >>
|
||||
steps:
|
||||
- checkout
|
||||
- restore_cache:
|
||||
keys:
|
||||
- gpt4all-csharp-nuget-packages-win
|
||||
- attach_workspace:
|
||||
at: C:\Users\circleci\workspace
|
||||
- run:
|
||||
name: "Prepare Native Libs"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
mkdir -p runtimes\win-x64\native
|
||||
cp C:\Users\circleci\workspace\runtimes\win-x64\*.dll runtimes\win-x64\native\
|
||||
ls -R runtimes
|
||||
- run:
|
||||
name: "Install project dependencies"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet.exe restore Gpt4All
|
||||
- save_cache:
|
||||
paths:
|
||||
- C:\Users\circleci\.nuget\packages
|
||||
key: gpt4all-csharp-nuget-packages-win
|
||||
- run:
|
||||
name: Build C# Project
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet.exe build Gpt4All --configuration Release --nologo
|
||||
- run:
|
||||
name: "Run C# Tests"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet.exe test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
|
||||
- run:
|
||||
name: Test results
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp/Gpt4All.Tests
|
||||
dotnet tool install -g trx2junit
|
||||
$Env:Path += ";$Env:USERPROFILE\.dotnet\tools"
|
||||
trx2junit TestResults/*.trx
|
||||
- store_test_results:
|
||||
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
|
||||
- checkout
|
||||
- restore_cache:
|
||||
keys:
|
||||
- gpt4all-csharp-nuget-packages-win
|
||||
- attach_workspace:
|
||||
at: C:\Users\circleci\workspace
|
||||
- run:
|
||||
name: "Prepare Native Libs"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
mkdir -p runtimes\win-x64\native
|
||||
cp C:\Users\circleci\workspace\runtimes\win-x64\*.dll runtimes\win-x64\native\
|
||||
ls -R runtimes
|
||||
- run:
|
||||
name: "Install project dependencies"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet.exe restore Gpt4All
|
||||
- save_cache:
|
||||
paths:
|
||||
- C:\Users\circleci\.nuget\packages
|
||||
key: gpt4all-csharp-nuget-packages-win
|
||||
- run:
|
||||
name: Build C# Project
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet.exe build Gpt4All --configuration Release --nologo
|
||||
- run:
|
||||
name: "Run C# Tests"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet.exe test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
|
||||
- run:
|
||||
name: Test results
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp/Gpt4All.Tests
|
||||
dotnet tool install -g trx2junit
|
||||
$Env:Path += ";$Env:USERPROFILE\.dotnet\tools"
|
||||
trx2junit TestResults/*.trx
|
||||
- store_test_results:
|
||||
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
|
||||
|
||||
build-csharp-macos:
|
||||
macos:
|
||||
xcode: "14.0.0"
|
||||
steps:
|
||||
- when:
|
||||
condition: << pipeline.parameters.run-csharp-workflow >>
|
||||
steps:
|
||||
- checkout
|
||||
- restore_cache:
|
||||
keys:
|
||||
- gpt4all-csharp-nuget-packages-nix
|
||||
- run:
|
||||
name: Install dependencies
|
||||
command: |
|
||||
brew install --cask dotnet-sdk
|
||||
- attach_workspace:
|
||||
at: /tmp/workspace
|
||||
- run:
|
||||
name: "Prepare Native Libs"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
mkdir -p runtimes/osx/native
|
||||
cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/
|
||||
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
|
||||
ls -R runtimes
|
||||
- run:
|
||||
name: "Install project dependencies"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet restore Gpt4All
|
||||
- save_cache:
|
||||
paths:
|
||||
- ~/.nuget/packages
|
||||
key: gpt4all-csharp-nuget-packages-nix
|
||||
- run:
|
||||
name: Build C# Project
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet build Gpt4All --configuration Release --nologo
|
||||
- run:
|
||||
name: "Run C# Tests"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
|
||||
- run:
|
||||
name: Test results
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp/Gpt4All.Tests
|
||||
dotnet tool install -g trx2junit
|
||||
export PATH="$PATH:$HOME/.dotnet/tools"
|
||||
trx2junit TestResults/*.trx
|
||||
- store_test_results:
|
||||
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
|
||||
build-nodejs-linux:
|
||||
docker:
|
||||
- image: circleci/node:erbium-bullseye-browsers-legacy
|
||||
steps:
|
||||
- when:
|
||||
condition: << pipeline.parameters.run-ts-workflow >>
|
||||
- checkout
|
||||
- checkout
|
||||
- restore_cache:
|
||||
keys:
|
||||
- gpt4all-csharp-nuget-packages-nix
|
||||
- run:
|
||||
name: Install dependencies
|
||||
command: |
|
||||
brew install --cask dotnet-sdk
|
||||
- attach_workspace:
|
||||
at: /tmp/workspace
|
||||
- run:
|
||||
name: "Prepare Native Libs"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
mkdir -p runtimes/osx/native
|
||||
cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/
|
||||
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
|
||||
ls -R runtimes
|
||||
- run:
|
||||
name: "Install project dependencies"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet restore Gpt4All
|
||||
- save_cache:
|
||||
paths:
|
||||
- ~/.nuget/packages
|
||||
key: gpt4all-csharp-nuget-packages-nix
|
||||
- run:
|
||||
name: Build C# Project
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet build Gpt4All --configuration Release --nologo
|
||||
- run:
|
||||
name: "Run C# Tests"
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp
|
||||
dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
|
||||
- run:
|
||||
name: Test results
|
||||
command: |
|
||||
cd gpt4all-bindings/csharp/Gpt4All.Tests
|
||||
dotnet tool install -g trx2junit
|
||||
export PATH="$PATH:$HOME/.dotnet/tools"
|
||||
trx2junit TestResults/*.trx
|
||||
- store_test_results:
|
||||
path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
|
||||
|
||||
store-and-upload-nupkgs:
|
||||
docker:
|
||||
- image: mcr.microsoft.com/dotnet/sdk:6.0-jammy # Ubuntu 22.04
|
||||
@ -656,27 +641,27 @@ jobs:
|
||||
node-version: "18.16"
|
||||
- run: node --version
|
||||
- node/install-packages:
|
||||
app-dir: gpt4all-bindings/typescript
|
||||
pkg-manager: yarn
|
||||
- run:
|
||||
command: yarn run test
|
||||
name: Run YARN tests
|
||||
override-ci-command: yarn install
|
||||
- run: cd gpt4all-bindings/typescript
|
||||
- run:
|
||||
command: |
|
||||
# excluding llmodel. nodejs bindings dont need llmodel.dll
|
||||
cd gpt4all-bindings/typescript
|
||||
mkdir -p runtimes/win32-x64/native
|
||||
cp /tmp/workspace/runtimes/win-x64/*-*.dll runtimes/win-x64/native/
|
||||
mkdir -p runtimes/linux-x64/native
|
||||
cp /tmp/workspace/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/
|
||||
mkdir -p runtimes/osx/native
|
||||
cp /tmp/workspace/runtimes/osx-x64/*-*.dylib runtimes/osx/native/
|
||||
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
|
||||
|
||||
- run:
|
||||
name: Publish to NPM
|
||||
command: |
|
||||
npm set //registry.npmjs.org/:_authToken=$NPM_TOKEN
|
||||
npm publish
|
||||
cd gpt4all-bindings/typescript
|
||||
# excluding llmodel. nodejs bindings dont need llmodel.dll
|
||||
mkdir -p runtimes/win32-x64/native
|
||||
cp /tmp/workspace/runtimes/win-x64/*-*.dll runtimes/win-x64/native/
|
||||
mkdir -p runtimes/linux-x64/native
|
||||
cp /tmp/workspace/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/
|
||||
mkdir -p runtimes/osx/native
|
||||
cp /tmp/workspace/runtimes/osx-x64/*-*.dylib runtimes/osx/native/
|
||||
cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
|
||||
ls -Ra gpt4all-bindings/typescript/runtimes
|
||||
# - run:
|
||||
# name: Publish to NPM
|
||||
# command: |
|
||||
# npm set //registry.npmjs.org/:_authToken=$NPM_TOKEN
|
||||
# npm publish
|
||||
|
||||
workflows:
|
||||
version: 2
|
||||
@ -756,6 +741,8 @@ workflows:
|
||||
type: approval
|
||||
- nuget-hold:
|
||||
type: approval
|
||||
- npm-hold:
|
||||
type: approval
|
||||
- build-bindings-backend-linux:
|
||||
filters:
|
||||
branches:
|
||||
@ -781,6 +768,16 @@ workflows:
|
||||
requires:
|
||||
- hold
|
||||
# NodeJs Jobs
|
||||
- prepare-npm-pkg:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
requires:
|
||||
- node/test
|
||||
- npm-hold
|
||||
# - build-bindings-backend-linux
|
||||
# - build-bindings-backend-windows-msvc
|
||||
# - build-bindings-backend-macos
|
||||
# CSharp Jobs
|
||||
- build-csharp-linux:
|
||||
filters:
|
||||
@ -809,4 +806,3 @@ workflows:
|
||||
- build-csharp-windows
|
||||
- build-csharp-linux
|
||||
- build-csharp-macos
|
||||
|
||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,6 @@
|
||||
*.arrow
|
||||
squad_*
|
||||
*sbert_embedded*
|
||||
*.pkl
|
||||
ckpts*
|
||||
.deepspeed_env
|
||||
|
@ -1,5 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
if(APPLE)
|
||||
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
|
||||
@ -19,7 +20,7 @@ endif()
|
||||
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
set(LLMODEL_VERSION_MAJOR 0)
|
||||
set(LLMODEL_VERSION_MINOR 2)
|
||||
set(LLMODEL_VERSION_MINOR 3)
|
||||
set(LLMODEL_VERSION_PATCH 0)
|
||||
set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
|
||||
project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
|
||||
@ -124,6 +125,10 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
|
||||
add_library(mpt-${BUILD_VARIANT} SHARED
|
||||
mpt.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
|
||||
prepare_target(mpt ggml-230511)
|
||||
|
||||
add_library(bert-${BUILD_VARIANT} SHARED
|
||||
bert.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
|
||||
prepare_target(bert llama-mainline)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
|
1066
gpt4all-backend/bert.cpp
Normal file
1066
gpt4all-backend/bert.cpp
Normal file
File diff suppressed because it is too large
Load Diff
44
gpt4all-backend/bert_impl.h
Normal file
44
gpt4all-backend/bert_impl.h
Normal file
@ -0,0 +1,44 @@
|
||||
#ifndef BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
|
||||
#error This file is NOT meant to be included outside of bert.cpp. Doing so is DANGEROUS. Be sure to know what you are doing before proceeding to #define BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
|
||||
#endif
|
||||
#ifndef BERT_H
|
||||
#define BERT_H
|
||||
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "llmodel.h"
|
||||
|
||||
struct BertPrivate;
|
||||
class Bert : public LLModel {
|
||||
public:
|
||||
Bert();
|
||||
~Bert();
|
||||
|
||||
bool supportsEmbedding() const override { return true; }
|
||||
bool supportsCompletion() const override { return true; }
|
||||
bool loadModel(const std::string &modelPath) override;
|
||||
bool isModelLoaded() const override;
|
||||
size_t requiredMem(const std::string &modelPath) override;
|
||||
size_t stateSize() const override;
|
||||
size_t saveState(uint8_t *dest) const override;
|
||||
size_t restoreState(const uint8_t *src) override;
|
||||
void setThreadCount(int32_t n_threads) override;
|
||||
int32_t threadCount() const override;
|
||||
|
||||
std::vector<float> embedding(const std::string &text) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<BertPrivate> d_ptr;
|
||||
|
||||
protected:
|
||||
std::vector<Token> tokenize(PromptContext &, const std::string&) const override;
|
||||
Token sampleToken(PromptContext &ctx) const override;
|
||||
std::string tokenToString(Token) const override;
|
||||
bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override;
|
||||
int32_t contextLength() const override;
|
||||
const std::vector<Token>& endTokens() const override;
|
||||
};
|
||||
|
||||
#endif // BERT_H
|
@ -16,6 +16,8 @@ public:
|
||||
Falcon();
|
||||
~Falcon();
|
||||
|
||||
bool supportsEmbedding() const override { return false; }
|
||||
bool supportsCompletion() const override { return true; }
|
||||
bool loadModel(const std::string &modelPath) override;
|
||||
bool isModelLoaded() const override;
|
||||
size_t requiredMem(const std::string &modelPath) override;
|
||||
|
@ -15,6 +15,8 @@ public:
|
||||
GPTJ();
|
||||
~GPTJ();
|
||||
|
||||
bool supportsEmbedding() const override { return false; }
|
||||
bool supportsCompletion() const override { return true; }
|
||||
bool loadModel(const std::string &modelPath) override;
|
||||
bool isModelLoaded() const override;
|
||||
size_t requiredMem(const std::string &modelPath) override;
|
||||
|
@ -15,6 +15,8 @@ public:
|
||||
LLamaModel();
|
||||
~LLamaModel();
|
||||
|
||||
bool supportsEmbedding() const override { return false; }
|
||||
bool supportsCompletion() const override { return true; }
|
||||
bool loadModel(const std::string &modelPath) override;
|
||||
bool isModelLoaded() const override;
|
||||
size_t requiredMem(const std::string &modelPath) override;
|
||||
|
@ -10,17 +10,19 @@
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <sstream>
|
||||
#ifdef _MSC_VER
|
||||
#include <windows.h>
|
||||
#include <processthreadsapi.h>
|
||||
#endif
|
||||
|
||||
std::string s_implementations_search_path = ".";
|
||||
|
||||
static bool has_at_least_minimal_hardware() {
|
||||
#ifdef __x86_64__
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
#ifndef _MSC_VER
|
||||
return __builtin_cpu_supports("avx");
|
||||
#else
|
||||
int cpuInfo[4];
|
||||
__cpuid(cpuInfo, 1);
|
||||
return cpuInfo[2] & (1 << 28);
|
||||
return IsProcessorFeaturePresent(PF_AVX_INSTRUCTIONS_AVAILABLE);
|
||||
#endif
|
||||
#else
|
||||
return true; // Don't know how to handle non-x86_64
|
||||
@ -28,54 +30,53 @@ static bool has_at_least_minimal_hardware() {
|
||||
}
|
||||
|
||||
static bool requires_avxonly() {
|
||||
#ifdef __x86_64__
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
#ifndef _MSC_VER
|
||||
return !__builtin_cpu_supports("avx2");
|
||||
#else
|
||||
int cpuInfo[4];
|
||||
__cpuidex(cpuInfo, 7, 0);
|
||||
return !(cpuInfo[1] & (1 << 5));
|
||||
return !IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE);
|
||||
#endif
|
||||
#else
|
||||
return false; // Don't know how to handle non-x86_64
|
||||
#endif
|
||||
}
|
||||
|
||||
LLModel::Implementation::Implementation(Dlhandle &&dlhandle_) : dlhandle(new Dlhandle(std::move(dlhandle_))) {
|
||||
auto get_model_type = dlhandle->get<const char *()>("get_model_type");
|
||||
LLModel::Implementation::Implementation(Dlhandle &&dlhandle_)
|
||||
: m_dlhandle(new Dlhandle(std::move(dlhandle_))) {
|
||||
auto get_model_type = m_dlhandle->get<const char *()>("get_model_type");
|
||||
assert(get_model_type);
|
||||
modelType = get_model_type();
|
||||
auto get_build_variant = dlhandle->get<const char *()>("get_build_variant");
|
||||
m_modelType = get_model_type();
|
||||
auto get_build_variant = m_dlhandle->get<const char *()>("get_build_variant");
|
||||
assert(get_build_variant);
|
||||
buildVariant = get_build_variant();
|
||||
magicMatch = dlhandle->get<bool(std::ifstream&)>("magic_match");
|
||||
assert(magicMatch);
|
||||
construct_ = dlhandle->get<LLModel *()>("construct");
|
||||
assert(construct_);
|
||||
m_buildVariant = get_build_variant();
|
||||
m_magicMatch = m_dlhandle->get<bool(std::ifstream&)>("magic_match");
|
||||
assert(m_magicMatch);
|
||||
m_construct = m_dlhandle->get<LLModel *()>("construct");
|
||||
assert(m_construct);
|
||||
}
|
||||
|
||||
LLModel::Implementation::Implementation(Implementation &&o)
|
||||
: construct_(o.construct_)
|
||||
, modelType(o.modelType)
|
||||
, buildVariant(o.buildVariant)
|
||||
, magicMatch(o.magicMatch)
|
||||
, dlhandle(o.dlhandle) {
|
||||
o.dlhandle = nullptr;
|
||||
: m_magicMatch(o.m_magicMatch)
|
||||
, m_construct(o.m_construct)
|
||||
, m_modelType(o.m_modelType)
|
||||
, m_buildVariant(o.m_buildVariant)
|
||||
, m_dlhandle(o.m_dlhandle) {
|
||||
o.m_dlhandle = nullptr;
|
||||
}
|
||||
|
||||
LLModel::Implementation::~Implementation() {
|
||||
if (dlhandle) delete dlhandle;
|
||||
if (m_dlhandle) delete m_dlhandle;
|
||||
}
|
||||
|
||||
bool LLModel::Implementation::isImplementation(const Dlhandle &dl) {
|
||||
return dl.get<bool(uint32_t)>("is_g4a_backend_model_implementation");
|
||||
}
|
||||
|
||||
const std::vector<LLModel::Implementation> &LLModel::implementationList() {
|
||||
const std::vector<LLModel::Implementation> &LLModel::Implementation::implementationList() {
|
||||
// NOTE: allocated on heap so we leak intentionally on exit so we have a chance to clean up the
|
||||
// individual models without the cleanup of the static list interfering
|
||||
static auto* libs = new std::vector<LLModel::Implementation>([] () {
|
||||
std::vector<LLModel::Implementation> fres;
|
||||
static auto* libs = new std::vector<Implementation>([] () {
|
||||
std::vector<Implementation> fres;
|
||||
|
||||
auto search_in_directory = [&](const std::string& paths) {
|
||||
std::stringstream ss(paths);
|
||||
@ -107,17 +108,17 @@ const std::vector<LLModel::Implementation> &LLModel::implementationList() {
|
||||
return *libs;
|
||||
}
|
||||
|
||||
const LLModel::Implementation* LLModel::implementation(std::ifstream& f, const std::string& buildVariant) {
|
||||
const LLModel::Implementation* LLModel::Implementation::implementation(std::ifstream& f, const std::string& buildVariant) {
|
||||
for (const auto& i : implementationList()) {
|
||||
f.seekg(0);
|
||||
if (!i.magicMatch(f)) continue;
|
||||
if (buildVariant != i.buildVariant) continue;
|
||||
if (!i.m_magicMatch(f)) continue;
|
||||
if (buildVariant != i.m_buildVariant) continue;
|
||||
return &i;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
LLModel *LLModel::construct(const std::string &modelPath, std::string buildVariant) {
|
||||
LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant) {
|
||||
|
||||
if (!has_at_least_minimal_hardware())
|
||||
return nullptr;
|
||||
@ -126,14 +127,15 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
|
||||
std::ifstream f(modelPath, std::ios::binary);
|
||||
if (!f) return nullptr;
|
||||
// Get correct implementation
|
||||
const LLModel::Implementation* impl = nullptr;
|
||||
const Implementation* impl = nullptr;
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
|
||||
if (buildVariant == "auto") {
|
||||
size_t total_mem = getSystemTotalRAMInBytes();
|
||||
impl = implementation(f, "metal");
|
||||
if(impl) {
|
||||
LLModel* metalimpl = impl->construct();
|
||||
LLModel* metalimpl = impl->m_construct();
|
||||
metalimpl->m_implementation = impl;
|
||||
size_t req_mem = metalimpl->requiredMem(modelPath);
|
||||
float req_to_total = (float) req_mem / (float) total_mem;
|
||||
// on a 16GB M2 Mac a 13B q4_0 (0.52) works for me but a 13B q4_K_M (0.55) does not
|
||||
@ -160,14 +162,17 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
|
||||
if (!impl) return nullptr;
|
||||
}
|
||||
f.close();
|
||||
|
||||
// Construct and return llmodel implementation
|
||||
return impl->construct();
|
||||
auto fres = impl->m_construct();
|
||||
fres->m_implementation = impl;
|
||||
return fres;
|
||||
}
|
||||
|
||||
void LLModel::setImplementationsSearchPath(const std::string& path) {
|
||||
void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) {
|
||||
s_implementations_search_path = path;
|
||||
}
|
||||
|
||||
const std::string& LLModel::implementationsSearchPath() {
|
||||
const std::string& LLModel::Implementation::implementationsSearchPath() {
|
||||
return s_implementations_search_path;
|
||||
}
|
||||
|
@ -12,32 +12,34 @@
|
||||
#define LLMODEL_MAX_PROMPT_BATCH 128
|
||||
|
||||
class Dlhandle;
|
||||
|
||||
class LLModel {
|
||||
public:
|
||||
using Token = int32_t;
|
||||
|
||||
class Implementation {
|
||||
LLModel *(*construct_)();
|
||||
|
||||
public:
|
||||
Implementation(Dlhandle&&);
|
||||
Implementation(const Implementation&) = delete;
|
||||
Implementation(Implementation&&);
|
||||
~Implementation();
|
||||
|
||||
std::string_view modelType() const { return m_modelType; }
|
||||
std::string_view buildVariant() const { return m_buildVariant; }
|
||||
|
||||
static bool isImplementation(const Dlhandle&);
|
||||
static const std::vector<Implementation>& implementationList();
|
||||
static const Implementation *implementation(std::ifstream& f, const std::string& buildVariant);
|
||||
static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
|
||||
static void setImplementationsSearchPath(const std::string& path);
|
||||
static const std::string& implementationsSearchPath();
|
||||
|
||||
std::string_view modelType, buildVariant;
|
||||
bool (*magicMatch)(std::ifstream& f);
|
||||
Dlhandle *dlhandle;
|
||||
private:
|
||||
bool (*m_magicMatch)(std::ifstream& f);
|
||||
LLModel *(*m_construct)();
|
||||
|
||||
// The only way an implementation should be constructed
|
||||
LLModel *construct() const {
|
||||
auto fres = construct_();
|
||||
fres->m_implementation = this;
|
||||
return fres;
|
||||
}
|
||||
private:
|
||||
std::string_view m_modelType;
|
||||
std::string_view m_buildVariant;
|
||||
Dlhandle *m_dlhandle;
|
||||
};
|
||||
|
||||
struct PromptContext {
|
||||
@ -59,18 +61,25 @@ public:
|
||||
explicit LLModel() {}
|
||||
virtual ~LLModel() {}
|
||||
|
||||
virtual bool supportsEmbedding() const = 0;
|
||||
virtual bool supportsCompletion() const = 0;
|
||||
virtual bool loadModel(const std::string &modelPath) = 0;
|
||||
virtual bool isModelLoaded() const = 0;
|
||||
virtual size_t requiredMem(const std::string &modelPath) = 0;
|
||||
virtual size_t stateSize() const { return 0; }
|
||||
virtual size_t saveState(uint8_t */*dest*/) const { return 0; }
|
||||
virtual size_t restoreState(const uint8_t */*src*/) { return 0; }
|
||||
|
||||
// This method requires the model to return true from supportsCompletion otherwise it will throw
|
||||
// an error
|
||||
virtual void prompt(const std::string &prompt,
|
||||
std::function<bool(int32_t)> promptCallback,
|
||||
std::function<bool(int32_t, const std::string&)> responseCallback,
|
||||
std::function<bool(bool)> recalculateCallback,
|
||||
PromptContext &ctx);
|
||||
|
||||
virtual std::vector<float> embedding(const std::string &text);
|
||||
|
||||
virtual void setThreadCount(int32_t /*n_threads*/) {}
|
||||
virtual int32_t threadCount() const { return 1; }
|
||||
|
||||
@ -78,13 +87,6 @@ public:
|
||||
return *m_implementation;
|
||||
}
|
||||
|
||||
static const std::vector<Implementation>& implementationList();
|
||||
static const Implementation *implementation(std::ifstream& f, const std::string& buildVariant);
|
||||
static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
|
||||
|
||||
static void setImplementationsSearchPath(const std::string& path);
|
||||
static const std::string& implementationsSearchPath();
|
||||
|
||||
protected:
|
||||
// These are pure virtual because subclasses need to implement as the default implementation of
|
||||
// 'prompt' above calls these functions
|
||||
@ -100,5 +102,9 @@ protected:
|
||||
void recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate);
|
||||
|
||||
const Implementation *m_implementation = nullptr;
|
||||
|
||||
private:
|
||||
friend class LLMImplementation;
|
||||
};
|
||||
|
||||
#endif // LLMODEL_H
|
||||
|
@ -29,7 +29,7 @@ llmodel_model llmodel_model_create2(const char *model_path, const char *build_va
|
||||
int error_code = 0;
|
||||
|
||||
try {
|
||||
wrapper->llModel = LLModel::construct(model_path, build_variant);
|
||||
wrapper->llModel = LLModel::Implementation::construct(model_path, build_variant);
|
||||
} catch (const std::exception& e) {
|
||||
error_code = EINVAL;
|
||||
last_error_message = e.what();
|
||||
@ -166,6 +166,25 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
|
||||
ctx->context_erase = wrapper->promptContext.contextErase;
|
||||
}
|
||||
|
||||
float *llmodel_embedding(llmodel_model model, const char *text, size_t *embedding_size)
|
||||
{
|
||||
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
|
||||
std::vector<float> embeddingVector = wrapper->llModel->embedding(text);
|
||||
float *embedding = (float *)malloc(embeddingVector.size() * sizeof(float));
|
||||
if(embedding == nullptr) {
|
||||
*embedding_size = 0;
|
||||
return nullptr;
|
||||
}
|
||||
std::copy(embeddingVector.begin(), embeddingVector.end(), embedding);
|
||||
*embedding_size = embeddingVector.size();
|
||||
return embedding;
|
||||
}
|
||||
|
||||
void llmodel_free_embedding(float *ptr)
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
void llmodel_setThreadCount(llmodel_model model, int32_t n_threads)
|
||||
{
|
||||
LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
|
||||
@ -180,10 +199,10 @@ int32_t llmodel_threadCount(llmodel_model model)
|
||||
|
||||
void llmodel_set_implementation_search_path(const char *path)
|
||||
{
|
||||
LLModel::setImplementationsSearchPath(path);
|
||||
LLModel::Implementation::setImplementationsSearchPath(path);
|
||||
}
|
||||
|
||||
const char *llmodel_get_implementation_search_path()
|
||||
{
|
||||
return LLModel::implementationsSearchPath().c_str();
|
||||
return LLModel::Implementation::implementationsSearchPath().c_str();
|
||||
}
|
||||
|
@ -171,6 +171,23 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
|
||||
llmodel_recalculate_callback recalculate_callback,
|
||||
llmodel_prompt_context *ctx);
|
||||
|
||||
/**
|
||||
* Generate an embedding using the model.
|
||||
* @param model A pointer to the llmodel_model instance.
|
||||
* @param text A string representing the text to generate an embedding for.
|
||||
* @param embedding_size A pointer to a size_t type that will be set by the call indicating the length
|
||||
* of the returned floating point array.
|
||||
* @return A pointer to an array of floating point values passed to the calling method which then will
|
||||
* be responsible for lifetime of this memory.
|
||||
*/
|
||||
float *llmodel_embedding(llmodel_model model, const char *text, size_t *embedding_size);
|
||||
|
||||
/**
|
||||
* Frees the memory allocated by the llmodel_embedding function.
|
||||
* @param ptr A pointer to the embedding as returned from llmodel_embedding.
|
||||
*/
|
||||
void llmodel_free_embedding(float *ptr);
|
||||
|
||||
/**
|
||||
* Set the number of threads to be used by the model.
|
||||
* @param model A pointer to the llmodel_model instance.
|
||||
|
@ -33,7 +33,14 @@ void LLModel::prompt(const std::string &prompt,
|
||||
PromptContext &promptCtx)
|
||||
{
|
||||
if (!isModelLoaded()) {
|
||||
std::cerr << implementation().modelType << " ERROR: prompt won't work with an unloaded model!\n";
|
||||
std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n";
|
||||
return;
|
||||
}
|
||||
|
||||
if (!supportsCompletion()) {
|
||||
std::string errorMessage = "ERROR: this model does not support text completion or chat!\n";
|
||||
responseCallback(-1, errorMessage);
|
||||
std::cerr << implementation().modelType() << errorMessage;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -45,8 +52,8 @@ void LLModel::prompt(const std::string &prompt,
|
||||
|
||||
if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
|
||||
responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
|
||||
std::cerr << implementation().modelType << " ERROR: The prompt is" << embd_inp.size() <<
|
||||
"tokens and the context window is" << promptCtx.n_ctx << "!\n";
|
||||
std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() <<
|
||||
" tokens and the context window is " << promptCtx.n_ctx << "!\n";
|
||||
return;
|
||||
}
|
||||
|
||||
@ -64,7 +71,7 @@ void LLModel::prompt(const std::string &prompt,
|
||||
if (promptCtx.n_past + int32_t(batch.size()) > promptCtx.n_ctx) {
|
||||
const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase;
|
||||
// Erase the first percentage of context from the tokens...
|
||||
std::cerr << implementation().modelType << ": reached the end of the context window so resizing\n";
|
||||
std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n";
|
||||
promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint);
|
||||
promptCtx.n_past = promptCtx.tokens.size();
|
||||
recalculateContext(promptCtx, recalculateCallback);
|
||||
@ -72,7 +79,7 @@ void LLModel::prompt(const std::string &prompt,
|
||||
}
|
||||
|
||||
if (!evalTokens(promptCtx, batch)) {
|
||||
std::cerr << implementation().modelType << " ERROR: Failed to process prompt\n";
|
||||
std::cerr << implementation().modelType() << " ERROR: Failed to process prompt\n";
|
||||
return;
|
||||
}
|
||||
|
||||
@ -103,7 +110,7 @@ void LLModel::prompt(const std::string &prompt,
|
||||
if (promptCtx.n_past + 1 > promptCtx.n_ctx) {
|
||||
const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase;
|
||||
// Erase the first percentage of context from the tokens...
|
||||
std::cerr << implementation().modelType << ": reached the end of the context window so resizing\n";
|
||||
std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n";
|
||||
promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint);
|
||||
promptCtx.n_past = promptCtx.tokens.size();
|
||||
recalculateContext(promptCtx, recalculateCallback);
|
||||
@ -111,7 +118,7 @@ void LLModel::prompt(const std::string &prompt,
|
||||
}
|
||||
|
||||
if (!evalTokens(promptCtx, { id })) {
|
||||
std::cerr << implementation().modelType << " ERROR: Failed to predict next token\n";
|
||||
std::cerr << implementation().modelType() << " ERROR: Failed to predict next token\n";
|
||||
return;
|
||||
}
|
||||
|
||||
@ -158,3 +165,12 @@ void LLModel::prompt(const std::string &prompt,
|
||||
cachedTokens.clear();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<float> LLModel::embedding(const std::string &/*text*/)
|
||||
{
|
||||
if (!supportsCompletion()) {
|
||||
std::string errorMessage = "ERROR: this model does not support generating embeddings!\n";
|
||||
std::cerr << implementation().modelType() << errorMessage;
|
||||
}
|
||||
return std::vector<float>();
|
||||
}
|
||||
|
@ -15,6 +15,8 @@ public:
|
||||
MPT();
|
||||
~MPT();
|
||||
|
||||
bool supportsEmbedding() const override { return false; }
|
||||
bool supportsCompletion() const override { return true; }
|
||||
bool loadModel(const std::string &modelPath) override;
|
||||
bool isModelLoaded() const override;
|
||||
size_t requiredMem(const std::string &modelPath) override;
|
||||
|
@ -17,6 +17,8 @@ public:
|
||||
Replit();
|
||||
~Replit();
|
||||
|
||||
bool supportsEmbedding() const override { return false; }
|
||||
bool supportsCompletion() const override { return true; }
|
||||
bool loadModel(const std::string &modelPath) override;
|
||||
bool isModelLoaded() const override;
|
||||
size_t requiredMem(const std::string & modelPath) override;
|
||||
|
102
gpt4all-backend/scripts/convert_bert_hf_to_ggml.py
Normal file
102
gpt4all-backend/scripts/convert_bert_hf_to_ggml.py
Normal file
@ -0,0 +1,102 @@
|
||||
import sys
|
||||
import struct
|
||||
import json
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from transformers import AutoModel, AutoTokenizer
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: convert-h5-to-ggml.py dir-model [use-f32]\n")
|
||||
print(" ftype == 0 -> float32")
|
||||
print(" ftype == 1 -> float16")
|
||||
sys.exit(1)
|
||||
|
||||
# output in the same directory as the model
|
||||
dir_model = sys.argv[1]
|
||||
fname_out = sys.argv[1] + "/ggml-model.bin"
|
||||
|
||||
with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
|
||||
encoder = json.load(f)
|
||||
|
||||
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
|
||||
hparams = json.load(f)
|
||||
|
||||
with open(dir_model + "/vocab.txt", "r", encoding="utf-8") as f:
|
||||
vocab = f.readlines()
|
||||
# possible data types
|
||||
# ftype == 0 -> float32
|
||||
# ftype == 1 -> float16
|
||||
#
|
||||
# map from ftype to string
|
||||
ftype_str = ["f32", "f16"]
|
||||
|
||||
ftype = 1
|
||||
if len(sys.argv) > 2:
|
||||
ftype = int(sys.argv[2])
|
||||
if ftype < 0 or ftype > 1:
|
||||
print("Invalid ftype: " + str(ftype))
|
||||
sys.exit(1)
|
||||
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
|
||||
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||
model = AutoModel.from_pretrained(dir_model, low_cpu_mem_usage=True)
|
||||
print (model)
|
||||
|
||||
print(tokenizer.encode('I believe the meaning of life is'))
|
||||
|
||||
list_vars = model.state_dict()
|
||||
for name in list_vars.keys():
|
||||
print(name, list_vars[name].shape, list_vars[name].dtype)
|
||||
|
||||
fout = open(fname_out, "wb")
|
||||
|
||||
print(hparams)
|
||||
|
||||
fout.write(struct.pack("i", 0x62657274)) # magic: ggml in hex
|
||||
fout.write(struct.pack("i", hparams["vocab_size"]))
|
||||
fout.write(struct.pack("i", hparams["max_position_embeddings"]))
|
||||
fout.write(struct.pack("i", hparams["hidden_size"]))
|
||||
fout.write(struct.pack("i", hparams["intermediate_size"]))
|
||||
fout.write(struct.pack("i", hparams["num_attention_heads"]))
|
||||
fout.write(struct.pack("i", hparams["num_hidden_layers"]))
|
||||
fout.write(struct.pack("i", ftype))
|
||||
|
||||
for i in range(hparams["vocab_size"]):
|
||||
text = vocab[i][:-1] # strips newline at the end
|
||||
#print(f"{i}:{text}")
|
||||
data = bytes(text, 'utf-8')
|
||||
fout.write(struct.pack("i", len(data)))
|
||||
fout.write(data)
|
||||
|
||||
for name in list_vars.keys():
|
||||
data = list_vars[name].squeeze().numpy()
|
||||
if name in ['embeddings.position_ids', 'pooler.dense.weight', 'pooler.dense.bias']:
|
||||
continue
|
||||
print("Processing variable: " + name + " with shape: ", data.shape)
|
||||
|
||||
n_dims = len(data.shape);
|
||||
|
||||
# ftype == 0 -> float32, ftype == 1 -> float16
|
||||
if ftype == 1 and name[-7:] == ".weight" and n_dims == 2:
|
||||
print(" Converting to float16")
|
||||
data = data.astype(np.float16)
|
||||
l_type = 1
|
||||
else:
|
||||
l_type = 0
|
||||
|
||||
# header
|
||||
str = name.encode('utf-8')
|
||||
fout.write(struct.pack("iii", n_dims, len(str), l_type))
|
||||
for i in range(n_dims):
|
||||
fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
|
||||
fout.write(str);
|
||||
|
||||
# data
|
||||
data.tofile(fout)
|
||||
|
||||
fout.close()
|
||||
|
||||
print("Done. Output file: " + fname_out)
|
||||
print("")
|
@ -2,11 +2,13 @@
|
||||
|
||||
## What models are supported by the GPT4All ecosystem?
|
||||
|
||||
Currently, there are three different model architectures that are supported:
|
||||
Currently, there are five different model architectures that are supported:
|
||||
|
||||
1. GPTJ - Based off of the GPT-J architecture with examples found [here](https://huggingface.co/EleutherAI/gpt-j-6b)
|
||||
2. LLAMA - Based off of the LLAMA architecture with examples found [here](https://huggingface.co/models?sort=downloads&search=llama)
|
||||
1. GPT-J - Based off of the GPT-J architecture with examples found [here](https://huggingface.co/EleutherAI/gpt-j-6b)
|
||||
2. LLaMA - Based off of the LLaMA architecture with examples found [here](https://huggingface.co/models?sort=downloads&search=llama)
|
||||
3. MPT - Based off of Mosaic ML's MPT architecture with examples found [here](https://huggingface.co/mosaicml/mpt-7b)
|
||||
4. Replit - Based off of Replit Inc.'s Replit architecture with examples found [here](https://huggingface.co/replit/replit-code-v1-3b)
|
||||
5. Falcon - Based off of TII's Falcon architecture with examples found [here](https://huggingface.co/tiiuae/falcon-40b)
|
||||
|
||||
## Why so many different architectures? What differentiates them?
|
||||
|
||||
@ -25,6 +27,10 @@ The upstream [llama.cpp](https://github.com/ggerganov/llama.cpp) project has int
|
||||
Fortunately, we have engineered a submoduling system allowing us to dynamically load different versions of the underlying library so that
|
||||
GPT4All just works.
|
||||
|
||||
## What are the system requirements?
|
||||
|
||||
Your CPU needs to support [AVX or AVX2 instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) and you need enough RAM to load a model into memory.
|
||||
|
||||
## What about GPU inference?
|
||||
|
||||
In newer versions of llama.cpp, there has been some added support for NVIDIA GPU's for inference. We're investigating how to incorporate this into our downloadable installers.
|
||||
|
@ -1,8 +1,7 @@
|
||||
# GPT4All Python API
|
||||
# GPT4All Python Generation API
|
||||
The `GPT4All` python package provides bindings to our C/C++ model backend libraries.
|
||||
The source code and local build instructions can be found [here](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python).
|
||||
|
||||
|
||||
## Quickstart
|
||||
|
||||
```bash
|
||||
@ -109,5 +108,5 @@ with model.chat_session():
|
||||
print(model.current_chat_session)
|
||||
```
|
||||
|
||||
|
||||
### API documentation
|
||||
::: gpt4all.gpt4all.GPT4All
|
||||
|
35
gpt4all-bindings/python/docs/gpt4all_python_embedding.md
Normal file
35
gpt4all-bindings/python/docs/gpt4all_python_embedding.md
Normal file
@ -0,0 +1,35 @@
|
||||
# Embeddings
|
||||
GPT4All supports generating high quality embeddings of arbitrary length documents of text using a CPU optimized contrastively trained [Sentence Transformer](https://www.sbert.net/). These embeddings are comparable in quality for many tasks with OpenAI.
|
||||
|
||||
## Quickstart
|
||||
|
||||
```bash
|
||||
pip install gpt4all
|
||||
```
|
||||
|
||||
### Generating embeddings
|
||||
The embedding model will automatically be downloaded if not installed.
|
||||
|
||||
=== "Embed4All Example"
|
||||
``` py
|
||||
from gpt4all import GPT4All, Embed4All
|
||||
text = 'The quick brown fox jumps over the lazy dog'
|
||||
embedder = Embed4All()
|
||||
output = embedder.embed(text)
|
||||
print(output)
|
||||
```
|
||||
=== "Output"
|
||||
```
|
||||
[0.034696947783231735, -0.07192722707986832, 0.06923297047615051, ...]
|
||||
```
|
||||
### Speed of embedding generation
|
||||
The following table lists the generation speed for text document captured on an Intel i913900HX CPU with DDR5 5600 running with 8 threads under stable load.
|
||||
|
||||
| Tokens | 128 | 512 | 2048 | 8129 | 16,384 |
|
||||
| --------------- | ---- | ---- | ---- | ---- | ---- |
|
||||
| Wall time (s) | .02 | .08 | .24 | .96 | 1.9 |
|
||||
| Tokens / Second | 6508 | 6431 | 8622 | 8509 | 8369 |
|
||||
|
||||
|
||||
### API documentation
|
||||
::: gpt4all.gpt4all.Embed4All
|
@ -1,2 +1,2 @@
|
||||
from .gpt4all import GPT4All # noqa
|
||||
from .gpt4all import GPT4All, Embed4All # noqa
|
||||
from .pyllmodel import LLModel # noqa
|
||||
|
@ -15,6 +15,36 @@ from . import pyllmodel
|
||||
# TODO: move to config
|
||||
DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")
|
||||
|
||||
class Embed4All:
|
||||
"""
|
||||
Python class that handles embeddings for GPT4All.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
n_threads: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
Constructor
|
||||
|
||||
Args:
|
||||
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
|
||||
"""
|
||||
self.gpt4all = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin', n_threads=n_threads)
|
||||
|
||||
def embed(
|
||||
self,
|
||||
text: str
|
||||
) -> list[float]:
|
||||
"""
|
||||
Generate an embedding.
|
||||
|
||||
Args:
|
||||
text: The text document to generate an embedding for.
|
||||
|
||||
Returns:
|
||||
An embedding of your document of text.
|
||||
"""
|
||||
return self.gpt4all.model.generate_embedding(text)
|
||||
|
||||
class GPT4All:
|
||||
"""
|
||||
@ -39,7 +69,7 @@ class GPT4All:
|
||||
model_type: Model architecture. This argument currently does not have any functionality and is just used as
|
||||
descriptive identifier for user. Default is None.
|
||||
allow_download: Allow API to download models from gpt4all.io. Default is True.
|
||||
n_threads: number of CPU threads used by GPT4All. Default is None, than the number of threads are determined automatically.
|
||||
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
|
||||
"""
|
||||
self.model_type = model_type
|
||||
self.model = pyllmodel.LLModel()
|
||||
|
@ -112,6 +112,19 @@ llmodel.llmodel_prompt.argtypes = [
|
||||
|
||||
llmodel.llmodel_prompt.restype = None
|
||||
|
||||
llmodel.llmodel_embedding.argtypes = [
|
||||
ctypes.c_void_p,
|
||||
ctypes.c_char_p,
|
||||
ctypes.POINTER(ctypes.c_size_t),
|
||||
]
|
||||
|
||||
llmodel.llmodel_embedding.restype = ctypes.POINTER(ctypes.c_float)
|
||||
|
||||
llmodel.llmodel_free_embedding.argtypes = [
|
||||
ctypes.POINTER(ctypes.c_float)
|
||||
]
|
||||
llmodel.llmodel_free_embedding.restype = None
|
||||
|
||||
llmodel.llmodel_setThreadCount.argtypes = [ctypes.c_void_p, ctypes.c_int32]
|
||||
llmodel.llmodel_setThreadCount.restype = None
|
||||
|
||||
@ -141,10 +154,11 @@ class LLModel:
|
||||
self.model = None
|
||||
self.model_name = None
|
||||
self.context = None
|
||||
self.llmodel_lib = llmodel
|
||||
|
||||
def __del__(self):
|
||||
if self.model is not None:
|
||||
llmodel.llmodel_model_destroy(self.model)
|
||||
self.llmodel_lib.llmodel_model_destroy(self.model)
|
||||
|
||||
def memory_needed(self, model_path: str) -> int:
|
||||
model_path_enc = model_path.encode("utf-8")
|
||||
@ -233,6 +247,17 @@ class LLModel:
|
||||
self.context.repeat_last_n = repeat_last_n
|
||||
self.context.context_erase = context_erase
|
||||
|
||||
def generate_embedding(
|
||||
self,
|
||||
text: str
|
||||
) -> list[float]:
|
||||
embedding_size = ctypes.c_size_t()
|
||||
c_text = ctypes.c_char_p(text.encode('utf-8'))
|
||||
embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
|
||||
embedding_array = [embedding_ptr[i] for i in range(embedding_size.value)]
|
||||
llmodel.llmodel_free_embedding(embedding_ptr)
|
||||
return list(embedding_array)
|
||||
|
||||
def prompt_model(
|
||||
self,
|
||||
prompt: str,
|
||||
|
18
gpt4all-bindings/python/gpt4all/tests/test_embed_timings.py
Normal file
18
gpt4all-bindings/python/gpt4all/tests/test_embed_timings.py
Normal file
@ -0,0 +1,18 @@
|
||||
import sys
|
||||
from io import StringIO
|
||||
|
||||
from gpt4all import GPT4All, Embed4All
|
||||
import time
|
||||
|
||||
def time_embedding(i, embedder):
|
||||
text = 'foo bar ' * i
|
||||
start_time = time.time()
|
||||
output = embedder.embed(text)
|
||||
end_time = time.time()
|
||||
elapsed_time = end_time - start_time
|
||||
print(f"Time report: {2 * i / elapsed_time} tokens/second with {2 * i} tokens taking {elapsed_time} seconds")
|
||||
|
||||
if __name__ == "__main__":
|
||||
embedder = Embed4All(n_threads=8)
|
||||
for i in [2**n for n in range(6, 14)]:
|
||||
time_embedding(i, embedder)
|
@ -1,8 +1,8 @@
|
||||
import sys
|
||||
from io import StringIO
|
||||
|
||||
from gpt4all import GPT4All
|
||||
|
||||
from gpt4all import GPT4All, Embed4All
|
||||
import time
|
||||
|
||||
def test_inference():
|
||||
model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
|
||||
@ -99,3 +99,11 @@ def test_inference_mpt():
|
||||
output = model.generate(prompt)
|
||||
assert isinstance(output, str)
|
||||
assert len(output) > 0
|
||||
|
||||
def test_embedding():
|
||||
text = 'The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox'
|
||||
embedder = Embed4All()
|
||||
output = embedder.embed(text)
|
||||
#for i, value in enumerate(output):
|
||||
#print(f'Value at index {i}: {value}')
|
||||
assert len(output) == 384
|
||||
|
@ -10,7 +10,9 @@ use_directory_urls: false
|
||||
nav:
|
||||
- 'index.md'
|
||||
- 'Bindings':
|
||||
- 'GPT4All in Python': 'gpt4all_python.md'
|
||||
- 'GPT4All in Python':
|
||||
- 'Generation': 'gpt4all_python.md'
|
||||
- 'Embedding': 'gpt4all_python_embedding.md'
|
||||
- 'GPT4ALL in NodeJs': 'gpt4all_typescript.md'
|
||||
- 'GPT4All Chat Client': 'gpt4all_chat.md'
|
||||
- 'gpt4all_cli.md'
|
||||
|
@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECtORY,
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version="1.0.3",
|
||||
version="1.0.6",
|
||||
description="Python bindings for GPT4All",
|
||||
author="Richard Guo",
|
||||
author_email="richard@nomic.ai",
|
||||
|
@ -53,7 +53,7 @@ const response = await createCompletion(ll, [
|
||||
* (win) msvc version 143
|
||||
* Can be obtained with visual studio 2022 build tools
|
||||
|
||||
### Build
|
||||
### Build (from source)
|
||||
|
||||
```sh
|
||||
git clone https://github.com/nomic-ai/gpt4all.git
|
||||
@ -138,7 +138,7 @@ This package is in active development, and breaking changes may happen until the
|
||||
* \[ ] createTokenStream, an async iterator that streams each token emitted from the model. Planning on following this [example](https://github.com/nodejs/node-addon-examples/tree/main/threadsafe-async-iterator)
|
||||
* \[ ] proper unit testing (integrate with circle ci)
|
||||
* \[ ] publish to npm under alpha tag `gpt4all@alpha`
|
||||
* \[ ] have more people test on other platforms (mac tester needed)
|
||||
* \[x] have more people test on other platforms (mac tester needed)
|
||||
* \[x] switch to new pluggable backend
|
||||
|
||||
### Documentation
|
||||
|
@ -53,7 +53,7 @@
|
||||
'-fno-rtti',
|
||||
],
|
||||
'cflags_cc': [
|
||||
'-std=c++20'
|
||||
'-std=c++2a'
|
||||
]
|
||||
}]
|
||||
]
|
||||
|
@ -10,6 +10,7 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
|
||||
InstanceMethod("stateSize", &NodeModelWrapper::StateSize),
|
||||
InstanceMethod("raw_prompt", &NodeModelWrapper::Prompt),
|
||||
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
|
||||
InstanceMethod("embed", &NodeModelWrapper::GenerateEmbedding),
|
||||
InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount),
|
||||
InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
|
||||
});
|
||||
@ -91,6 +92,23 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
|
||||
return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference())));
|
||||
}
|
||||
|
||||
Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo& info) {
|
||||
auto env = info.Env();
|
||||
std::string text = info[0].As<Napi::String>().Utf8Value();
|
||||
size_t embedding_size = 0;
|
||||
float* arr = llmodel_embedding(GetInference(), text.c_str(), &embedding_size);
|
||||
auto arr_size = sizeof(arr) / sizeof(float);
|
||||
Napi::Float32Array js_array = Napi::Float32Array::New(info.Env(), arr_size);
|
||||
|
||||
for (size_t i = 0; i < arr_size; ++i) {
|
||||
float element = *(arr + i);
|
||||
js_array[i] = element;
|
||||
}
|
||||
|
||||
llmodel_free_embedding(arr);
|
||||
|
||||
return js_array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a response using the model.
|
||||
|
@ -23,6 +23,7 @@ public:
|
||||
void SetThreadCount(const Napi::CallbackInfo& info);
|
||||
Napi::Value getName(const Napi::CallbackInfo& info);
|
||||
Napi::Value ThreadCount(const Napi::CallbackInfo& info);
|
||||
Napi::Value GenerateEmbedding(const Napi::CallbackInfo& info);
|
||||
/*
|
||||
* The path that is used to search for the dynamic libraries
|
||||
*/
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "gpt4all",
|
||||
"version": "2.0.0",
|
||||
"version": "2.0.0rc",
|
||||
"packageManager": "yarn@3.5.1",
|
||||
"main": "src/gpt4all.js",
|
||||
"repository": "nomic-ai/gpt4all",
|
||||
|
@ -6,7 +6,7 @@ async function createPrebuilds(combinations) {
|
||||
platform,
|
||||
arch,
|
||||
napi: true,
|
||||
targets: ["18.15.0"]
|
||||
targets: ["18.16.0"]
|
||||
};
|
||||
try {
|
||||
await createPrebuild(opts);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
@ -17,7 +18,7 @@ endif()
|
||||
|
||||
set(APP_VERSION_MAJOR 2)
|
||||
set(APP_VERSION_MINOR 4)
|
||||
set(APP_VERSION_PATCH 13)
|
||||
set(APP_VERSION_PATCH 14)
|
||||
set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
|
||||
|
||||
# Include the binary directory for the generated header file
|
||||
@ -205,6 +206,8 @@ install(TARGETS replit-mainline-default DESTINATION lib COMPONENT ${COMPONENT_NA
|
||||
if(APPLE)
|
||||
install(TARGETS replit-mainline-metal DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
endif()
|
||||
install(TARGETS bert-avxonly DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
install(TARGETS bert-default DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
|
||||
|
||||
set(CPACK_GENERATOR "IFW")
|
||||
set(CPACK_VERBATIM_VARIABLES YES)
|
||||
|
@ -51,19 +51,7 @@ One click installers for macOS, Linux, and Windows at https://gpt4all.io
|
||||
If you've already checked out the source code and/or built the program make sure when you do a git fetch to get the latest changes and that you also do ```git submodule update --init --recursive``` to update the submodules.
|
||||
|
||||
## Manual download of models
|
||||
* https://gpt4all.io/models/ggml-mpt-7b-chat.bin (default) (md5sum 756249d3d6abe23bde3b1ae272628640) Current best non-commercially licensable chat model based on MPT and trained by Mosaic ML.
|
||||
* https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin (default) (md5sum 81a09a0ddf89690372fc296ff7f625af) Current best commercially licensable model based on GPT-J and trained by Nomic AI on the latest curated GPT4All dataset.
|
||||
* https://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin (md5sum 91f886b68fbce697e9a3cd501951e455) Current best non-commercially licensable model based on Llama 13b and trained by Nomic AI on the latest curated GPT4All dataset.
|
||||
* https://gpt4all.io/models/ggml-gpt4all-j-v1.2-jazzy.bin (md5sum 879344aaa9d62fdccbda0be7a09e7976) A commercially licensable model based on GPT-J and trained by Nomic AI on the v2 GPT4All dataset.
|
||||
* https://gpt4all.io/models/ggml-gpt4all-j-v1.1-breezy.bin (md5sum 61d48a82cb188cceb14ebb8082bfec37) A commercially licensable model based on GPT-J and trained by Nomic AI on the v1 GPT4All dataset.
|
||||
* https://gpt4all.io/models/ggml-gpt4all-j.bin (md5sum 5b5a3f9b858d33b29b52b89692415595) A commercially licensable model based on GPT-J and trained by Nomic AI on the v0 GPT4All dataset.
|
||||
* https://gpt4all.io/models/ggml-vicuna-7b-1.1-q4_2.bin (md5sum 29119f8fa11712704c6b22ac5ab792ea) An non-commercially licensable model based on Llama 7b and trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
|
||||
* https://gpt4all.io/models/ggml-vicuna-13b-1.1-q4_2.bin (md5sum 95999b7b0699e2070af63bf5d34101a8) An non-commercially licensable model based on Llama 13b and trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
|
||||
* https://gpt4all.io/models/ggml-wizardLM-7B.q4_2.bin (md5sum 99e6d129745a3f1fb1121abed747b05a) An non-commercially licensable model based on Llama 7b and trained by Microsoft and Peking University.
|
||||
* https://gpt4all.io/models/ggml-stable-vicuna-13B.q4_2.bin (md5sum 6cb4ee297537c9133bddab9692879de0) An non-commercially licensable model based on Llama 13b and RLHF trained by Stable AI.
|
||||
* https://gpt4all.io/models/ggml-mpt-7b-base.bin (md5sum 120c32a51d020066288df045ef5d52b9) A commercially licensable model base pre-trained by Mosaic ML.
|
||||
* https://gpt4all.io/models/ggml-nous-gpt4-vicuna-13b.bin (md5sum d5eafd5b0bd0d615cfd5fd763f642dfe) A non-commercially licensable model based on Vicuna 13b, fine-tuned on ~180,000 instructions, trained by Nous Research.
|
||||
* https://gpt4all.io/models/ggml-mpt-7b-instruct.bin (md5sum 1cfa4958f489f0a0d1ffdf6b37322809) A commercially licensable instruct model based on MPT and trained by Mosaic ML.
|
||||
* You can find a 'Model Explorer' on the official website where you can manually download models that we support: https://gpt4all.io/index.html
|
||||
|
||||
## Terminal Only Interface with no Qt dependency
|
||||
|
||||
|
@ -155,7 +155,7 @@ void ChatGPTWorker::request(const QString &apiKey,
|
||||
m_ctx = promptCtx;
|
||||
|
||||
QUrl openaiUrl("https://api.openai.com/v1/chat/completions");
|
||||
const QString authorization = QString("Bearer %1").arg(apiKey);
|
||||
const QString authorization = QString("Bearer %1").arg(apiKey).trimmed();
|
||||
QNetworkRequest request(openaiUrl);
|
||||
request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
|
||||
request.setRawHeader("Authorization", authorization.toUtf8());
|
||||
@ -244,7 +244,7 @@ void ChatGPTWorker::handleReadyRead()
|
||||
void ChatGPTWorker::handleErrorOccurred(QNetworkReply::NetworkError code)
|
||||
{
|
||||
QNetworkReply *reply = qobject_cast<QNetworkReply *>(sender());
|
||||
if (!reply) {
|
||||
if (!reply || reply->error() == QNetworkReply::OperationCanceledError /*when we call abort on purpose*/) {
|
||||
emit finished();
|
||||
return;
|
||||
}
|
||||
|
@ -46,6 +46,8 @@ public:
|
||||
ChatGPT();
|
||||
virtual ~ChatGPT();
|
||||
|
||||
bool supportsEmbedding() const override { return false; }
|
||||
bool supportsCompletion() const override { return true; }
|
||||
bool loadModel(const std::string &modelPath) override;
|
||||
bool isModelLoaded() const override;
|
||||
size_t requiredMem(const std::string &modelPath) override;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#define REPLIT_INTERNAL_STATE_VERSION 0
|
||||
#define LLAMA_INTERNAL_STATE_VERSION 0
|
||||
#define FALCON_INTERNAL_STATE_VERSION 0
|
||||
#define BERT_INTERNAL_STATE_VERSION 0
|
||||
|
||||
class LLModelStore {
|
||||
public:
|
||||
@ -240,11 +241,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
|
||||
#if defined(Q_OS_MAC) && defined(__arm__)
|
||||
if (m_forceMetal)
|
||||
m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "metal");
|
||||
m_llModelInfo.model = LLMImplementation::construct(filePath.toStdString(), "metal");
|
||||
else
|
||||
m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "auto");
|
||||
m_llModelInfo.model = LLMImplementation::construct(filePath.toStdString(), "auto");
|
||||
#else
|
||||
m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "auto");
|
||||
m_llModelInfo.model = LLModel::Implementation::construct(filePath.toStdString(), "auto");
|
||||
#endif
|
||||
|
||||
if (m_llModelInfo.model) {
|
||||
@ -258,12 +259,13 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
m_llModelInfo = LLModelInfo();
|
||||
emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
|
||||
} else {
|
||||
switch (m_llModelInfo.model->implementation().modelType[0]) {
|
||||
switch (m_llModelInfo.model->implementation().modelType()[0]) {
|
||||
case 'L': m_llModelType = LLModelType::LLAMA_; break;
|
||||
case 'G': m_llModelType = LLModelType::GPTJ_; break;
|
||||
case 'M': m_llModelType = LLModelType::MPT_; break;
|
||||
case 'R': m_llModelType = LLModelType::REPLIT_; break;
|
||||
case 'F': m_llModelType = LLModelType::FALCON_; break;
|
||||
case 'B': m_llModelType = LLModelType::BERT_; break;
|
||||
default:
|
||||
{
|
||||
delete std::exchange(m_llModelInfo.model, nullptr);
|
||||
@ -628,8 +630,8 @@ bool ChatLLM::handleNameRecalculate(bool isRecalc)
|
||||
qDebug() << "name recalc" << m_llmThread.objectName() << isRecalc;
|
||||
#endif
|
||||
Q_UNUSED(isRecalc);
|
||||
Q_UNREACHABLE();
|
||||
return false;
|
||||
qt_noop();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ChatLLM::handleSystemPrompt(int32_t token)
|
||||
@ -669,7 +671,8 @@ bool ChatLLM::serialize(QDataStream &stream, int version)
|
||||
case MPT_: stream << MPT_INTERNAL_STATE_VERSION; break;
|
||||
case GPTJ_: stream << GPTJ_INTERNAL_STATE_VERSION; break;
|
||||
case LLAMA_: stream << LLAMA_INTERNAL_STATE_VERSION; break;
|
||||
case FALCON_: stream << LLAMA_INTERNAL_STATE_VERSION; break;
|
||||
case FALCON_: stream << FALCON_INTERNAL_STATE_VERSION; break;
|
||||
case BERT_: stream << BERT_INTERNAL_STATE_VERSION; break;
|
||||
default: Q_UNREACHABLE();
|
||||
}
|
||||
}
|
||||
@ -788,13 +791,18 @@ void ChatLLM::processSystemPrompt()
|
||||
if (!isModelLoaded() || m_processedSystemPrompt || m_isServer)
|
||||
return;
|
||||
|
||||
const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
|
||||
if (QString::fromStdString(systemPrompt).trimmed().isEmpty()) {
|
||||
m_processedSystemPrompt = true;
|
||||
return;
|
||||
}
|
||||
|
||||
m_stopGenerating = false;
|
||||
auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1);
|
||||
auto responseFunc = std::bind(&ChatLLM::handleSystemResponse, this, std::placeholders::_1,
|
||||
std::placeholders::_2);
|
||||
auto recalcFunc = std::bind(&ChatLLM::handleSystemRecalculate, this, std::placeholders::_1);
|
||||
|
||||
const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
|
||||
const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo);
|
||||
const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo);
|
||||
const float top_p = MySettings::globalInstance()->modelTopP(m_modelInfo);
|
||||
|
@ -16,6 +16,7 @@ enum LLModelType {
|
||||
CHATGPT_,
|
||||
REPLIT_,
|
||||
FALCON_,
|
||||
BERT_
|
||||
};
|
||||
|
||||
struct LLModelInfo {
|
||||
|
@ -7,16 +7,19 @@ file(GLOB MYMPTLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NA
|
||||
file(GLOB MYLLAMALIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama*)
|
||||
file(GLOB MYREPLITLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libreplit*)
|
||||
file(GLOB MYFALCONLLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libfalcon*)
|
||||
file(GLOB MYBERTLLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libbert*)
|
||||
file(GLOB MYLLMODELLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.*)
|
||||
file(COPY ${MYGPTJLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
file(COPY ${MYMPTLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
file(COPY ${MYLLAMALIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
file(COPY ${MYREPLITLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
file(COPY ${MYFALCONLLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
file(COPY ${MYLLAMALIBS}
|
||||
file(COPY ${MYBERTLLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
file(COPY ${MYLLMODELLIBS}
|
||||
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <QFile>
|
||||
#include <QProcess>
|
||||
#include <QResource>
|
||||
#include <QSettings>
|
||||
#include <fstream>
|
||||
|
||||
class MyLLM: public LLM { };
|
||||
@ -33,7 +34,7 @@ LLM::LLM()
|
||||
if (directoryExists(frameworksDir))
|
||||
llmodelSearchPaths += ";" + frameworksDir;
|
||||
#endif
|
||||
LLModel::setImplementationsSearchPath(llmodelSearchPaths.toStdString());
|
||||
LLModel::Implementation::setImplementationsSearchPath(llmodelSearchPaths.toStdString());
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#ifndef _MSC_VER
|
||||
@ -48,7 +49,13 @@ LLM::LLM()
|
||||
#endif
|
||||
|
||||
m_compatHardware = minimal;
|
||||
emit compatHardwareChanged();
|
||||
}
|
||||
|
||||
bool LLM::hasSettingsAccess() const
|
||||
{
|
||||
QSettings settings;
|
||||
settings.sync();
|
||||
return settings.status() == QSettings::NoError;
|
||||
}
|
||||
|
||||
bool LLM::checkForUpdates() const
|
||||
|
@ -6,12 +6,11 @@
|
||||
class LLM : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
Q_PROPERTY(bool compatHardware READ compatHardware NOTIFY compatHardwareChanged)
|
||||
|
||||
public:
|
||||
static LLM *globalInstance();
|
||||
|
||||
bool compatHardware() const { return m_compatHardware; }
|
||||
Q_INVOKABLE bool hasSettingsAccess() const;
|
||||
Q_INVOKABLE bool compatHardware() const { return m_compatHardware; }
|
||||
|
||||
Q_INVOKABLE bool checkForUpdates() const;
|
||||
Q_INVOKABLE bool directoryExists(const QString &path) const;
|
||||
@ -22,7 +21,6 @@ public:
|
||||
Q_SIGNALS:
|
||||
void chatListModelChanged();
|
||||
void modelListChanged();
|
||||
void compatHardwareChanged();
|
||||
|
||||
private:
|
||||
bool m_compatHardware;
|
||||
|
@ -89,14 +89,22 @@ Window {
|
||||
|
||||
property bool hasShownModelDownload: false
|
||||
property bool hasShownFirstStart: false
|
||||
property bool hasShownSettingsAccess: false
|
||||
|
||||
function startupDialogs() {
|
||||
if (!LLM.compatHardware) {
|
||||
if (!LLM.compatHardware()) {
|
||||
Network.sendNonCompatHardware();
|
||||
errorCompatHardware.open();
|
||||
return;
|
||||
}
|
||||
|
||||
// check if we have access to settings and if not show an error
|
||||
if (!hasShownSettingsAccess && !LLM.hasSettingsAccess()) {
|
||||
errorSettingsAccess.open();
|
||||
hasShownSettingsAccess = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// check for first time start of this version
|
||||
if (!hasShownFirstStart && Download.isFirstStart()) {
|
||||
firstStartDialog.open();
|
||||
@ -135,6 +143,20 @@ Window {
|
||||
+ qsTr("https://en.wikipedia.org/wiki/Advanced_Vector_Extensions</a>")
|
||||
}
|
||||
|
||||
PopupDialog {
|
||||
id: errorSettingsAccess
|
||||
anchors.centerIn: parent
|
||||
shouldTimeOut: false
|
||||
shouldShowBusy: false
|
||||
modal: true
|
||||
text: qsTr("<h3>Encountered an error starting up:</h3><br>")
|
||||
+ qsTr("<i>\"Inability to access settings file.\"</i>")
|
||||
+ qsTr("<br><br>Unfortunately, something is preventing the program from accessing ")
|
||||
+ qsTr("the settings file. This could be caused by incorrect permissions in the local ")
|
||||
+ qsTr("app config directory where the settings file is located. ")
|
||||
+ qsTr("Check out our <a href=\"https://discord.gg/4M2QFmTt2k\">discord channel</a> for help.")
|
||||
}
|
||||
|
||||
StartupDialog {
|
||||
id: firstStartDialog
|
||||
anchors.centerIn: parent
|
||||
|
@ -1,18 +1,16 @@
|
||||
[
|
||||
{
|
||||
"order": "a",
|
||||
"md5sum": "4acc146dd43eb02845c233c29289c7c5",
|
||||
"name": "Hermes",
|
||||
"filename": "nous-hermes-13b.ggmlv3.q4_0.bin",
|
||||
"filesize": "8136777088",
|
||||
"requires": "2.4.7",
|
||||
"md5sum": "e8d47924f433bd561cb5244557147793",
|
||||
"name": "Wizard v1.1",
|
||||
"filename": "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin",
|
||||
"filesize": "7323310848",
|
||||
"ramrequired": "16",
|
||||
"parameters": "13 billion",
|
||||
"quant": "q4_0",
|
||||
"type": "LLaMA",
|
||||
"description": "<strong>Best overall model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
|
||||
"url": "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin",
|
||||
"promptTemplate": "### Instruction:\n%1\n### Response:\n"
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Best overall model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul"
|
||||
},
|
||||
{
|
||||
"order": "b",
|
||||
@ -25,12 +23,29 @@
|
||||
"parameters": "7 billion",
|
||||
"quant": "q4_0",
|
||||
"type": "Falcon",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Best overall smaller model</strong><br><ul><li>Fast responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>",
|
||||
"url": "https://huggingface.co/nomic-ai/gpt4all-falcon-ggml/resolve/main/ggml-model-gpt4all-falcon-q4_0.bin",
|
||||
"promptTemplate": "### Instruction:\n%1\n### Response:\n"
|
||||
},
|
||||
{
|
||||
"order": "c",
|
||||
"md5sum": "4acc146dd43eb02845c233c29289c7c5",
|
||||
"name": "Hermes",
|
||||
"filename": "nous-hermes-13b.ggmlv3.q4_0.bin",
|
||||
"filesize": "8136777088",
|
||||
"requires": "2.4.7",
|
||||
"ramrequired": "16",
|
||||
"parameters": "13 billion",
|
||||
"quant": "q4_0",
|
||||
"type": "LLaMA",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Extremely good model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
|
||||
"url": "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin",
|
||||
"promptTemplate": "### Instruction:\n%1\n### Response:\n"
|
||||
},
|
||||
{
|
||||
"order": "e",
|
||||
"md5sum": "81a09a0ddf89690372fc296ff7f625af",
|
||||
"name": "Groovy",
|
||||
"filename": "ggml-gpt4all-j-v1.3-groovy.bin",
|
||||
@ -39,10 +54,11 @@
|
||||
"parameters": "7 billion",
|
||||
"quant": "q4_0",
|
||||
"type": "GPT-J",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Creative model can be used for commercial purposes</strong><br><ul><li>Fast responses<li>Creative responses</li><li>Instruction based</li><li>Trained by Nomic AI<li>Licensed for commercial use</ul>"
|
||||
},
|
||||
{
|
||||
"order": "e",
|
||||
"order": "f",
|
||||
"md5sum": "11d9f060ca24575a2c303bdc39952486",
|
||||
"name": "Snoozy",
|
||||
"filename": "GPT4All-13B-snoozy.ggmlv3.q4_0.bin",
|
||||
@ -52,11 +68,12 @@
|
||||
"parameters": "13 billion",
|
||||
"quant": "q4_0",
|
||||
"type": "LLaMA",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>",
|
||||
"url": "https://huggingface.co/TheBloke/GPT4All-13B-snoozy-GGML/resolve/main/GPT4All-13B-snoozy.ggmlv3.q4_0.bin"
|
||||
},
|
||||
{
|
||||
"order": "f",
|
||||
"order": "g",
|
||||
"md5sum": "756249d3d6abe23bde3b1ae272628640",
|
||||
"name": "MPT Chat",
|
||||
"filename": "ggml-mpt-7b-chat.bin",
|
||||
@ -71,9 +88,9 @@
|
||||
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"
|
||||
},
|
||||
{
|
||||
"order": "g",
|
||||
"order": "h",
|
||||
"md5sum": "e64e74375ce9d36a3d0af3db1523fd0a",
|
||||
"name": "Orca",
|
||||
"name": "Mini Orca",
|
||||
"filename": "orca-mini-7b.ggmlv3.q4_0.bin",
|
||||
"filesize": "3791749248",
|
||||
"requires": "2.4.7",
|
||||
@ -87,9 +104,9 @@
|
||||
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
|
||||
},
|
||||
{
|
||||
"order": "h",
|
||||
"order": "i",
|
||||
"md5sum": "6a087f7f4598fad0bb70e6cb4023645e",
|
||||
"name": "Orca (Small)",
|
||||
"name": "Mini Orca (Small)",
|
||||
"filename": "orca-mini-3b.ggmlv3.q4_0.bin",
|
||||
"filesize": "1928446208",
|
||||
"requires": "2.4.7",
|
||||
@ -103,9 +120,9 @@
|
||||
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
|
||||
},
|
||||
{
|
||||
"order": "i",
|
||||
"order": "j",
|
||||
"md5sum": "959b7f65b2d12fd1e3ff99e7493c7a3a",
|
||||
"name": "Orca (Large)",
|
||||
"name": "Mini Orca (Large)",
|
||||
"filename": "orca-mini-13b.ggmlv3.q4_0.bin",
|
||||
"filesize": "7323329152",
|
||||
"requires": "2.4.7",
|
||||
@ -119,7 +136,7 @@
|
||||
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
|
||||
},
|
||||
{
|
||||
"order": "j",
|
||||
"order": "k",
|
||||
"md5sum": "29119f8fa11712704c6b22ac5ab792ea",
|
||||
"name": "Vicuna",
|
||||
"filename": "ggml-vicuna-7b-1.1-q4_2.bin",
|
||||
@ -128,10 +145,11 @@
|
||||
"parameters": "7 billion",
|
||||
"quant": "q4_2",
|
||||
"type": "LLaMA",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Good small model - trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
|
||||
},
|
||||
{
|
||||
"order": "k",
|
||||
"order": "l",
|
||||
"md5sum": "95999b7b0699e2070af63bf5d34101a8",
|
||||
"name": "Vicuna (large)",
|
||||
"filename": "ggml-vicuna-13b-1.1-q4_2.bin",
|
||||
@ -140,10 +158,11 @@
|
||||
"parameters": "13 billion",
|
||||
"quant": "q4_2",
|
||||
"type": "LLaMA",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Good larger model - trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
|
||||
},
|
||||
{
|
||||
"order": "l",
|
||||
"order": "m",
|
||||
"md5sum": "99e6d129745a3f1fb1121abed747b05a",
|
||||
"name": "Wizard",
|
||||
"filename": "ggml-wizardLM-7B.q4_2.bin",
|
||||
@ -152,10 +171,11 @@
|
||||
"parameters": "7 billion",
|
||||
"quant": "q4_2",
|
||||
"type": "LLaMA",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Good small model - trained by by Microsoft and Peking University</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
|
||||
},
|
||||
{
|
||||
"order": "m",
|
||||
"order": "n",
|
||||
"md5sum": "6cb4ee297537c9133bddab9692879de0",
|
||||
"name": "Stable Vicuna",
|
||||
"filename": "ggml-stable-vicuna-13B.q4_2.bin",
|
||||
@ -168,7 +188,7 @@
|
||||
"systemPrompt": "## Assistant: I am StableVicuna, a large language model created by CarperAI. I am here to chat!\n\n"
|
||||
},
|
||||
{
|
||||
"order": "n",
|
||||
"order": "o",
|
||||
"md5sum": "1cfa4958f489f0a0d1ffdf6b37322809",
|
||||
"name": "MPT Instruct",
|
||||
"filename": "ggml-mpt-7b-instruct.bin",
|
||||
@ -178,10 +198,11 @@
|
||||
"parameters": "7 billion",
|
||||
"quant": "q4_0",
|
||||
"type": "MPT",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Mosaic's instruction model</strong><br><ul><li>Instruction based<li>Trained by Mosaic ML<li>Licensed for commercial use</ul>"
|
||||
},
|
||||
{
|
||||
"order": "o",
|
||||
"order": "p",
|
||||
"md5sum": "120c32a51d020066288df045ef5d52b9",
|
||||
"name": "MPT Base",
|
||||
"filename": "ggml-mpt-7b-base.bin",
|
||||
@ -191,10 +212,11 @@
|
||||
"parameters": "7 billion",
|
||||
"quant": "q4_0",
|
||||
"type": "MPT",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Trained for text completion with no assistant finetuning</strong><br><ul><li>Completion based<li>Trained by Mosaic ML<li>Licensed for commercial use</ul>"
|
||||
},
|
||||
{
|
||||
"order": "p",
|
||||
"order": "q",
|
||||
"md5sum": "d5eafd5b0bd0d615cfd5fd763f642dfe",
|
||||
"name": "Nous Vicuna",
|
||||
"filename": "ggml-nous-gpt4-vicuna-13b.bin",
|
||||
@ -203,10 +225,11 @@
|
||||
"parameters": "13 billion",
|
||||
"quant": "q4_0",
|
||||
"type": "LLaMA",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Trained on ~180,000 instructions</strong><br><ul><li>Instruction based<li>Trained by Nous Research<li>Cannot be used commercially</ul>"
|
||||
},
|
||||
{
|
||||
"order": "q",
|
||||
"order": "r",
|
||||
"md5sum": "489d21fd48840dcb31e5f92f453f3a20",
|
||||
"name": "Wizard Uncensored",
|
||||
"filename": "wizardLM-13B-Uncensored.ggmlv3.q4_0.bin",
|
||||
@ -216,11 +239,12 @@
|
||||
"parameters": "13 billion",
|
||||
"quant": "q4_0",
|
||||
"type": "LLaMA",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Trained on uncensored assistant data and instruction data</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>",
|
||||
"url": "https://huggingface.co/TheBloke/WizardLM-13B-Uncensored-GGML/resolve/main/wizardLM-13B-Uncensored.ggmlv3.q4_0.bin"
|
||||
},
|
||||
{
|
||||
"order": "r",
|
||||
"order": "s",
|
||||
"md5sum": "615890cb571fcaa0f70b2f8d15ef809e",
|
||||
"disableGUI": "true",
|
||||
"name": "Replit",
|
||||
@ -231,7 +255,23 @@
|
||||
"parameters": "3 billion",
|
||||
"quant": "f16",
|
||||
"type": "Replit",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use</ul>",
|
||||
"url": "https://huggingface.co/nomic-ai/ggml-replit-code-v1-3b/resolve/main/ggml-replit-code-v1-3b.bin"
|
||||
},
|
||||
{
|
||||
"order": "t",
|
||||
"md5sum": "031bb5d5722c08d13e3e8eaf55c37391",
|
||||
"disableGUI": "true",
|
||||
"name": "Bert",
|
||||
"filename": "ggml-all-MiniLM-L6-v2-f16.bin",
|
||||
"filesize": "45521167",
|
||||
"requires": "2.4.14",
|
||||
"ramrequired": "1",
|
||||
"parameters": "1 million",
|
||||
"quant": "f16",
|
||||
"type": "Bert",
|
||||
"systemPrompt": " ",
|
||||
"description": "<strong>Sbert</strong><br><ul><li>For embeddings"
|
||||
}
|
||||
]
|
||||
|
@ -416,6 +416,40 @@
|
||||
* Akarshan Biswas
|
||||
* Adam Treat (Nomic AI)
|
||||
* Community (beta testers, bug reporters)
|
||||
"
|
||||
},
|
||||
{
|
||||
"version": "2.4.13",
|
||||
"notes":
|
||||
"
|
||||
* Fix bug with prolonging shutdown with generation
|
||||
* Fix bug with update model info on deleting chats
|
||||
* Fix bug with preventing closing of model download dialog
|
||||
* Allows allow closing the model download dialog
|
||||
* Fix numerous bugs with download of models.json and provide backup option
|
||||
* Add json and c# highlighting
|
||||
* Fix bug with chatgpt crashing
|
||||
* Fix bug with chatgpt not working for some keys
|
||||
* Fix bug with mixpanel opt outs not counting
|
||||
* Fix problem with OOM errors causing crash and then repeating on next start
|
||||
* Fix default thread setting and provide guardrails
|
||||
* Fix tap handler in settings dialog for buttons
|
||||
* Fix color of some text fields on macOS for settings dialog
|
||||
* Fix problem with startup dialog not closing
|
||||
* Provide error dialog for settings file not accessible
|
||||
* Try and fix problems with avx-only detection
|
||||
* Fix showing error in model downloads unnecessarily
|
||||
* Prefer 7b models to load by default
|
||||
* Add Wizard v1.1 to download list
|
||||
* Rename Orca models to Mini Orca
|
||||
* Don't use a system prompt unless model was trained with one by default
|
||||
",
|
||||
"contributors":
|
||||
"
|
||||
* Lakshay Kansal (Nomic AI)
|
||||
* Aaron Miller (Nomic AI)
|
||||
* Adam Treat (Nomic AI)
|
||||
* Community (beta testers, bug reporters)
|
||||
"
|
||||
}
|
||||
]
|
||||
|
@ -161,16 +161,6 @@ int InstalledModels::count() const
|
||||
return rowCount();
|
||||
}
|
||||
|
||||
QString InstalledModels::firstId() const
|
||||
{
|
||||
if (rowCount() > 0) {
|
||||
QModelIndex firstIndex = index(0, 0);
|
||||
return sourceModel()->data(firstIndex, ModelList::IdRole).toString();
|
||||
} else {
|
||||
return QString();
|
||||
}
|
||||
}
|
||||
|
||||
DownloadableModels::DownloadableModels(QObject *parent)
|
||||
: QSortFilterProxyModel(parent)
|
||||
, m_expanded(false)
|
||||
@ -222,6 +212,7 @@ ModelList::ModelList()
|
||||
: QAbstractListModel(nullptr)
|
||||
, m_installedModels(new InstalledModels(this))
|
||||
, m_downloadableModels(new DownloadableModels(this))
|
||||
, m_asyncModelRequestOngoing(false)
|
||||
{
|
||||
m_installedModels->setSourceModel(this);
|
||||
m_downloadableModels->setSourceModel(this);
|
||||
@ -297,12 +288,9 @@ ModelInfo ModelList::defaultModelInfo() const
|
||||
settings.sync();
|
||||
|
||||
// The user default model can be set by the user in the settings dialog. The "default" user
|
||||
// default model is "Application default" which signals we should use the default model that was
|
||||
// specified by the models.json file.
|
||||
// default model is "Application default" which signals we should use the logic here.
|
||||
const QString userDefaultModelName = MySettings::globalInstance()->userDefaultModel();
|
||||
const bool hasUserDefaultName = !userDefaultModelName.isEmpty() && userDefaultModelName != "Application default";
|
||||
const QString defaultModelName = settings.value("defaultModel").toString();
|
||||
const bool hasDefaultName = hasUserDefaultName ? false : !defaultModelName.isEmpty();
|
||||
|
||||
ModelInfo *defaultModel = nullptr;
|
||||
for (ModelInfo *info : m_models) {
|
||||
@ -310,12 +298,10 @@ ModelInfo ModelList::defaultModelInfo() const
|
||||
continue;
|
||||
defaultModel = info;
|
||||
|
||||
// If we don't have either setting, then just use the first model that is installed
|
||||
if (!hasUserDefaultName && !hasDefaultName)
|
||||
break;
|
||||
const size_t ramrequired = defaultModel->ramrequired;
|
||||
|
||||
// If we don't have a user specified default, but *do* have a default setting and match, then use it
|
||||
if (!hasUserDefaultName && hasDefaultName && (defaultModel->id() == defaultModelName))
|
||||
// If we don't have either setting, then just use the first model that requires less than 16GB that is installed
|
||||
if (!hasUserDefaultName && !info->isChatGPT && ramrequired > 0 && ramrequired < 16)
|
||||
break;
|
||||
|
||||
// If we have a user specified default and match, then use it
|
||||
@ -835,7 +821,7 @@ void ModelList::updateModelsFromDirectory()
|
||||
for (const QString &id : modelsById) {
|
||||
updateData(id, FilenameRole, filename);
|
||||
updateData(id, ChatGPTRole, filename.startsWith("chatgpt-"));
|
||||
updateData(id, DirpathRole, path);
|
||||
updateData(id, DirpathRole, info.dir().absolutePath() + "/");
|
||||
updateData(id, FilesizeRole, toFileSize(info.size()));
|
||||
}
|
||||
}
|
||||
@ -846,14 +832,6 @@ void ModelList::updateModelsFromDirectory()
|
||||
processDirectory(exePath);
|
||||
if (localPath != exePath)
|
||||
processDirectory(localPath);
|
||||
|
||||
if (installedModels()->count()) {
|
||||
const QString firstModel =
|
||||
installedModels()->firstId();
|
||||
QSettings settings;
|
||||
settings.setValue("defaultModel", firstModel);
|
||||
settings.sync();
|
||||
}
|
||||
}
|
||||
|
||||
void ModelList::updateModelsFromJson()
|
||||
@ -899,6 +877,9 @@ void ModelList::updateModelsFromJson()
|
||||
|
||||
void ModelList::updateModelsFromJsonAsync()
|
||||
{
|
||||
m_asyncModelRequestOngoing = true;
|
||||
emit asyncModelRequestOngoingChanged();
|
||||
|
||||
#if defined(USE_LOCAL_MODELSJSON)
|
||||
QUrl jsonUrl("file://" + QDir::homePath() + "/dev/large_language_models/gpt4all/gpt4all-chat/metadata/models.json");
|
||||
#else
|
||||
@ -911,17 +892,37 @@ void ModelList::updateModelsFromJsonAsync()
|
||||
QNetworkReply *jsonReply = m_networkManager.get(request);
|
||||
connect(qApp, &QCoreApplication::aboutToQuit, jsonReply, &QNetworkReply::abort);
|
||||
connect(jsonReply, &QNetworkReply::finished, this, &ModelList::handleModelsJsonDownloadFinished);
|
||||
connect(jsonReply, &QNetworkReply::errorOccurred, this, &ModelList::handleModelsJsonDownloadErrorOccurred);
|
||||
}
|
||||
|
||||
void ModelList::handleModelsJsonDownloadFinished()
|
||||
{
|
||||
QNetworkReply *jsonReply = qobject_cast<QNetworkReply *>(sender());
|
||||
if (!jsonReply)
|
||||
if (!jsonReply) {
|
||||
m_asyncModelRequestOngoing = false;
|
||||
emit asyncModelRequestOngoingChanged();
|
||||
return;
|
||||
}
|
||||
|
||||
QByteArray jsonData = jsonReply->readAll();
|
||||
jsonReply->deleteLater();
|
||||
parseModelsJsonFile(jsonData, true);
|
||||
m_asyncModelRequestOngoing = false;
|
||||
emit asyncModelRequestOngoingChanged();
|
||||
}
|
||||
|
||||
void ModelList::handleModelsJsonDownloadErrorOccurred(QNetworkReply::NetworkError code)
|
||||
{
|
||||
// TODO: Show what error occurred in the GUI
|
||||
m_asyncModelRequestOngoing = false;
|
||||
emit asyncModelRequestOngoingChanged();
|
||||
|
||||
QNetworkReply *reply = qobject_cast<QNetworkReply *>(sender());
|
||||
if (!reply)
|
||||
return;
|
||||
|
||||
qWarning() << QString("ERROR: Modellist download failed with error code \"%1-%2\"")
|
||||
.arg(code).arg(reply->errorString()).toStdString();
|
||||
}
|
||||
|
||||
void ModelList::handleSslErrors(QNetworkReply *reply, const QList<QSslError> &errors)
|
||||
@ -1108,14 +1109,6 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
|
||||
updateData(id, ModelList::QuantRole, "NA");
|
||||
updateData(id, ModelList::TypeRole, "GPT");
|
||||
}
|
||||
|
||||
if (installedModels()->count()) {
|
||||
const QString firstModel =
|
||||
installedModels()->firstId();
|
||||
QSettings settings;
|
||||
settings.setValue("defaultModel", firstModel);
|
||||
settings.sync();
|
||||
}
|
||||
}
|
||||
|
||||
void ModelList::updateModelsFromSettings()
|
||||
|
@ -127,7 +127,6 @@ class InstalledModels : public QSortFilterProxyModel
|
||||
public:
|
||||
explicit InstalledModels(QObject *parent);
|
||||
int count() const;
|
||||
QString firstId() const;
|
||||
|
||||
Q_SIGNALS:
|
||||
void countChanged();
|
||||
@ -169,6 +168,7 @@ class ModelList : public QAbstractListModel
|
||||
Q_PROPERTY(InstalledModels* installedModels READ installedModels NOTIFY installedModelsChanged)
|
||||
Q_PROPERTY(DownloadableModels* downloadableModels READ downloadableModels NOTIFY downloadableModelsChanged)
|
||||
Q_PROPERTY(QList<QString> userDefaultModelList READ userDefaultModelList NOTIFY userDefaultModelListChanged)
|
||||
Q_PROPERTY(bool asyncModelRequestOngoing READ asyncModelRequestOngoing NOTIFY asyncModelRequestOngoingChanged)
|
||||
|
||||
public:
|
||||
static ModelList *globalInstance();
|
||||
@ -296,12 +296,14 @@ public:
|
||||
}
|
||||
|
||||
QString incompleteDownloadPath(const QString &modelFile);
|
||||
bool asyncModelRequestOngoing() const { return m_asyncModelRequestOngoing; }
|
||||
|
||||
Q_SIGNALS:
|
||||
void countChanged();
|
||||
void installedModelsChanged();
|
||||
void downloadableModelsChanged();
|
||||
void userDefaultModelListChanged();
|
||||
void asyncModelRequestOngoingChanged();
|
||||
|
||||
private Q_SLOTS:
|
||||
void updateModelsFromJson();
|
||||
@ -310,6 +312,7 @@ private Q_SLOTS:
|
||||
void updateModelsFromDirectory();
|
||||
void updateDataForSettings();
|
||||
void handleModelsJsonDownloadFinished();
|
||||
void handleModelsJsonDownloadErrorOccurred(QNetworkReply::NetworkError code);
|
||||
void handleSslErrors(QNetworkReply *reply, const QList<QSslError> &errors);
|
||||
|
||||
private:
|
||||
@ -328,6 +331,7 @@ private:
|
||||
QList<ModelInfo*> m_models;
|
||||
QHash<QString, ModelInfo*> m_modelMap;
|
||||
QFileSystemWatcher *m_watcher;
|
||||
bool m_asyncModelRequestOngoing;
|
||||
|
||||
private:
|
||||
explicit ModelList();
|
||||
|
@ -41,7 +41,7 @@ MyDialog {
|
||||
}
|
||||
|
||||
Label {
|
||||
visible: !ModelList.downloadableModels.count
|
||||
visible: !ModelList.downloadableModels.count && !ModelList.asyncModelRequestOngoing
|
||||
Layout.fillWidth: true
|
||||
Layout.fillHeight: true
|
||||
horizontalAlignment: Qt.AlignHCenter
|
||||
@ -50,6 +50,15 @@ MyDialog {
|
||||
color: theme.mutedTextColor
|
||||
}
|
||||
|
||||
MyBusyIndicator {
|
||||
visible: !ModelList.downloadableModels.count && ModelList.asyncModelRequestOngoing
|
||||
running: ModelList.asyncModelRequestOngoing
|
||||
Accessible.role: Accessible.Animation
|
||||
Layout.alignment: Qt.AlignCenter
|
||||
Accessible.name: qsTr("Busy indicator")
|
||||
Accessible.description: qsTr("Displayed when the models request is ongoing")
|
||||
}
|
||||
|
||||
ScrollView {
|
||||
id: scrollView
|
||||
ScrollBar.vertical.policy: ScrollBar.AlwaysOn
|
||||
|
@ -18,6 +18,9 @@ enum Language {
|
||||
Go,
|
||||
Json,
|
||||
Csharp,
|
||||
Latex,
|
||||
Html,
|
||||
Php
|
||||
};
|
||||
|
||||
static QColor keywordColor = "#2e95d3"; // blue
|
||||
@ -33,6 +36,11 @@ static QColor commandColor = functionCallColor;
|
||||
static QColor variableColor = numberColor;
|
||||
static QColor keyColor = functionColor;
|
||||
static QColor valueColor = stringColor;
|
||||
static QColor parameterColor = stringColor;
|
||||
static QColor attributeNameColor = numberColor;
|
||||
static QColor attributeValueColor = stringColor;
|
||||
static QColor specialCharacterColor = functionColor;
|
||||
static QColor doctypeColor = commentColor;
|
||||
|
||||
static Language stringToLanguage(const QString &language)
|
||||
{
|
||||
@ -62,6 +70,12 @@ static Language stringToLanguage(const QString &language)
|
||||
return Go;
|
||||
if (language == "json")
|
||||
return Json;
|
||||
if (language == "latex")
|
||||
return Latex;
|
||||
if (language == "html")
|
||||
return Html;
|
||||
if (language == "php")
|
||||
return Php;
|
||||
return None;
|
||||
}
|
||||
|
||||
@ -561,6 +575,135 @@ static QVector<HighlightingRule> bashHighlightingRules()
|
||||
return highlightingRules;
|
||||
}
|
||||
|
||||
static QVector<HighlightingRule> latexHighlightingRules()
|
||||
{
|
||||
static QVector<HighlightingRule> highlightingRules;
|
||||
if (highlightingRules.isEmpty()) {
|
||||
|
||||
HighlightingRule rule;
|
||||
|
||||
QTextCharFormat commandFormat;
|
||||
commandFormat.setForeground(commandColor); // commandColor needs to be set to your liking
|
||||
rule.pattern = QRegularExpression("\\\\[A-Za-z]+"); // Pattern for LaTeX commands
|
||||
rule.format = commandFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
QTextCharFormat commentFormat;
|
||||
commentFormat.setForeground(commentColor); // commentColor needs to be set to your liking
|
||||
rule.pattern = QRegularExpression("%[^\n]*"); // Pattern for LaTeX comments
|
||||
rule.format = commentFormat;
|
||||
highlightingRules.append(rule);
|
||||
}
|
||||
return highlightingRules;
|
||||
}
|
||||
|
||||
static QVector<HighlightingRule> htmlHighlightingRules()
|
||||
{
|
||||
static QVector<HighlightingRule> highlightingRules;
|
||||
if (highlightingRules.isEmpty()) {
|
||||
|
||||
HighlightingRule rule;
|
||||
|
||||
QTextCharFormat attributeNameFormat;
|
||||
attributeNameFormat.setForeground(attributeNameColor);
|
||||
rule.pattern = QRegularExpression("\\b(\\w+)\\s*=");
|
||||
rule.format = attributeNameFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
QTextCharFormat attributeValueFormat;
|
||||
attributeValueFormat.setForeground(attributeValueColor);
|
||||
rule.pattern = QRegularExpression("\".*?\"|'.*?'");
|
||||
rule.format = attributeValueFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
QTextCharFormat commentFormat;
|
||||
commentFormat.setForeground(commentColor);
|
||||
rule.pattern = QRegularExpression("<!--.*?-->");
|
||||
rule.format = commentFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
QTextCharFormat specialCharacterFormat;
|
||||
specialCharacterFormat.setForeground(specialCharacterColor);
|
||||
rule.pattern = QRegularExpression("&[a-zA-Z0-9#]*;");
|
||||
rule.format = specialCharacterFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
QTextCharFormat doctypeFormat;
|
||||
doctypeFormat.setForeground(doctypeColor);
|
||||
rule.pattern = QRegularExpression("<!DOCTYPE.*?>");
|
||||
rule.format = doctypeFormat;
|
||||
highlightingRules.append(rule);
|
||||
}
|
||||
return highlightingRules;
|
||||
}
|
||||
|
||||
static QVector<HighlightingRule> phpHighlightingRules()
|
||||
{
|
||||
static QVector<HighlightingRule> highlightingRules;
|
||||
if (highlightingRules.isEmpty()) {
|
||||
|
||||
HighlightingRule rule;
|
||||
|
||||
QTextCharFormat functionCallFormat;
|
||||
functionCallFormat.setForeground(functionCallColor);
|
||||
rule.pattern = QRegularExpression("\\b(\\w+)\\s*(?=\\()");
|
||||
rule.format = functionCallFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
QTextCharFormat functionFormat;
|
||||
functionFormat.setForeground(functionColor);
|
||||
rule.pattern = QRegularExpression("\\bfunction\\s+(\\w+)\\b");
|
||||
rule.format = functionFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
QTextCharFormat numberFormat;
|
||||
numberFormat.setForeground(numberColor);
|
||||
rule.pattern = QRegularExpression("\\b[0-9]*\\.?[0-9]+\\b");
|
||||
rule.format = numberFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
QTextCharFormat keywordFormat;
|
||||
keywordFormat.setForeground(keywordColor);
|
||||
QStringList keywordPatterns = {
|
||||
"\\bif\\b", "\\belse\\b", "\\belseif\\b", "\\bwhile\\b", "\\bfor\\b",
|
||||
"\\bforeach\\b", "\\breturn\\b", "\\bprint\\b", "\\binclude\\b", "\\brequire\\b",
|
||||
"\\binclude_once\\b", "\\brequire_once\\b", "\\btry\\b", "\\bcatch\\b",
|
||||
"\\bfinally\\b", "\\bcontinue\\b", "\\bbreak\\b", "\\bclass\\b", "\\bfunction\\b",
|
||||
"\\bnew\\b", "\\bthrow\\b", "\\barray\\b", "\\bpublic\\b", "\\bprivate\\b",
|
||||
"\\bprotected\\b", "\\bstatic\\b", "\\bglobal\\b", "\\bisset\\b", "\\bunset\\b",
|
||||
"\\bnull\\b", "\\btrue\\b", "\\bfalse\\b"
|
||||
};
|
||||
|
||||
for (const QString &pattern : keywordPatterns) {
|
||||
rule.pattern = QRegularExpression(pattern);
|
||||
rule.format = keywordFormat;
|
||||
highlightingRules.append(rule);
|
||||
}
|
||||
|
||||
QTextCharFormat stringFormat;
|
||||
stringFormat.setForeground(stringColor);
|
||||
rule.pattern = QRegularExpression("\".*?\"");
|
||||
rule.format = stringFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
rule.pattern = QRegularExpression("\'.*?\'");
|
||||
rule.format = stringFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
QTextCharFormat commentFormat;
|
||||
commentFormat.setForeground(commentColor);
|
||||
rule.pattern = QRegularExpression("//[^\n]*");
|
||||
rule.format = commentFormat;
|
||||
highlightingRules.append(rule);
|
||||
|
||||
rule.pattern = QRegularExpression("/\\*.*?\\*/");
|
||||
rule.format = commentFormat;
|
||||
highlightingRules.append(rule);
|
||||
}
|
||||
return highlightingRules;
|
||||
}
|
||||
|
||||
|
||||
static QVector<HighlightingRule> jsonHighlightingRules()
|
||||
{
|
||||
static QVector<HighlightingRule> highlightingRules;
|
||||
@ -616,6 +759,12 @@ void SyntaxHighlighter::highlightBlock(const QString &text)
|
||||
rules = javaHighlightingRules();
|
||||
else if (block.userState() == Json)
|
||||
rules = jsonHighlightingRules();
|
||||
else if (block.userState() == Latex)
|
||||
rules = latexHighlightingRules();
|
||||
else if (block.userState() == Html)
|
||||
rules = htmlHighlightingRules();
|
||||
else if (block.userState() == Php)
|
||||
rules = phpHighlightingRules();
|
||||
|
||||
for (const HighlightingRule &rule : qAsConst(rules)) {
|
||||
QRegularExpressionMatchIterator matchIterator = rule.pattern.globalMatch(text);
|
||||
@ -821,7 +970,10 @@ void ResponseText::handleCodeBlocks()
|
||||
|| firstWord == "java"
|
||||
|| firstWord == "go"
|
||||
|| firstWord == "golang"
|
||||
|| firstWord == "json") {
|
||||
|| firstWord == "json"
|
||||
|| firstWord == "latex"
|
||||
|| firstWord == "html"
|
||||
|| firstWord == "php") {
|
||||
codeLanguage = firstWord;
|
||||
capturedText.remove(0, match.captured(0).length());
|
||||
}
|
||||
|
49
gpt4all-training/configs/deepspeed/ds_config_mpt.json
Normal file
49
gpt4all-training/configs/deepspeed/ds_config_mpt.json
Normal file
@ -0,0 +1,49 @@
|
||||
{
|
||||
"train_batch_size": "auto",
|
||||
"gradient_accumulation_steps": "auto",
|
||||
"train_micro_batch_size_per_gpu": "auto",
|
||||
"fp16": {
|
||||
"enabled": "auto",
|
||||
"min_loss_scale": 1,
|
||||
"loss_scale_window": 1000,
|
||||
"hysteresis": 2,
|
||||
"initial_scale_power": 32
|
||||
},
|
||||
"bf16": {
|
||||
"enabled": "auto"
|
||||
},
|
||||
"gradient_clipping": 1.0,
|
||||
"zero_optimization": {
|
||||
"stage": 1,
|
||||
"offload_param": {
|
||||
"device": "none"
|
||||
},
|
||||
"offload_optimizer": {
|
||||
"device": "none"
|
||||
},
|
||||
"allgather_partitions": true,
|
||||
"allgather_bucket_size": 5e8,
|
||||
"contiguous_gradients": true
|
||||
},
|
||||
"optimizer": {
|
||||
"type": "AdamW",
|
||||
"params": {
|
||||
"lr": "auto",
|
||||
"betas": [
|
||||
0.9,
|
||||
0.999
|
||||
],
|
||||
"eps": 1e-08
|
||||
}
|
||||
},
|
||||
"scheduler": {
|
||||
"type": "WarmupDecayLR",
|
||||
"params": {
|
||||
"warmup_min_lr": 0,
|
||||
"warmup_max_lr": "auto",
|
||||
"warmup_num_steps": "auto",
|
||||
"warmup_type": "linear",
|
||||
"total_num_steps": "auto"
|
||||
}
|
||||
}
|
||||
}
|
48
gpt4all-training/configs/deepspeed/ds_config_pythia.json
Normal file
48
gpt4all-training/configs/deepspeed/ds_config_pythia.json
Normal file
@ -0,0 +1,48 @@
|
||||
{
|
||||
"train_batch_size": "auto",
|
||||
"gradient_accumulation_steps": "auto",
|
||||
"train_micro_batch_size_per_gpu": "auto",
|
||||
"fp16": {
|
||||
"enabled": "auto",
|
||||
"min_loss_scale": 1,
|
||||
"loss_scale_window": 1000,
|
||||
"hysteresis": 2,
|
||||
"initial_scale_power": 32
|
||||
},
|
||||
"bf16": {
|
||||
"enabled": "auto"
|
||||
},
|
||||
"gradient_clipping": 1.0,
|
||||
"zero_optimization": {
|
||||
"stage": 2,
|
||||
"offload_param": {
|
||||
"device": "none"
|
||||
},
|
||||
"offload_optimizer": {
|
||||
"device": "none"
|
||||
},
|
||||
"allgather_partitions": true,
|
||||
"allgather_bucket_size": 5e8,
|
||||
"contiguous_gradients": true
|
||||
},
|
||||
"optimizer": {
|
||||
"type": "AdamW",
|
||||
"params": {
|
||||
"lr": "auto",
|
||||
"betas": [
|
||||
0.9,
|
||||
0.999
|
||||
],
|
||||
"eps": 1e-08
|
||||
}
|
||||
},
|
||||
"scheduler": {
|
||||
"type": "WarmupLR",
|
||||
"params": {
|
||||
"warmup_min_lr": 0,
|
||||
"warmup_max_lr": "auto",
|
||||
"warmup_num_steps": "auto",
|
||||
"warmup_type": "linear"
|
||||
}
|
||||
}
|
||||
}
|
34
gpt4all-training/configs/train/finetune_falcon.yaml
Normal file
34
gpt4all-training/configs/train/finetune_falcon.yaml
Normal file
@ -0,0 +1,34 @@
|
||||
# model/tokenizer
|
||||
model_name: "tiiuae/falcon-7b"
|
||||
tokenizer_name: "tiiuae/falcon-7b"
|
||||
gradient_checkpointing: true
|
||||
save_name: "nomic-ai/gpt4all-falcon"
|
||||
|
||||
# dataset
|
||||
streaming: false
|
||||
num_proc: 64
|
||||
dataset_path: "nomic-ai/gpt4all-j-prompt-generations"
|
||||
revision: "v1.3-groovy"
|
||||
max_length: 1024
|
||||
batch_size: 32
|
||||
|
||||
# train dynamics
|
||||
lr: 2.0e-5
|
||||
min_lr: 0
|
||||
weight_decay: 0.0
|
||||
eval_every: 500
|
||||
eval_steps: 105
|
||||
save_every: 1000
|
||||
log_grads_every: 500
|
||||
output_dir: "ckpts/falcon"
|
||||
checkpoint: "/home/paperspace/gpt4all/ckpts/mpt/step_1000"
|
||||
lora: false
|
||||
warmup_steps: 500
|
||||
num_epochs: 2
|
||||
|
||||
# logging
|
||||
wandb: true
|
||||
wandb_entity: "gpt4all"
|
||||
wandb_project_name: "gpt4all"
|
||||
seed: 42
|
||||
|
34
gpt4all-training/configs/train/finetune_mpt.yaml
Normal file
34
gpt4all-training/configs/train/finetune_mpt.yaml
Normal file
@ -0,0 +1,34 @@
|
||||
# model/tokenizer
|
||||
model_name: "mosaicml/mpt-7b"
|
||||
tokenizer_name: "mosaicml/mpt-7b"
|
||||
gradient_checkpointing: false
|
||||
save_name: "nomic-ai/mpt-finetuned-round2"
|
||||
|
||||
# dataset
|
||||
streaming: false
|
||||
num_proc: 64
|
||||
dataset_path: "nomic-ai/gpt4all-j-prompt-generations"
|
||||
revision: "v1.3-groovy"
|
||||
max_length: 1024
|
||||
batch_size: 8
|
||||
|
||||
# train dynamics
|
||||
lr: 2.0e-5
|
||||
min_lr: 0
|
||||
weight_decay: 0.0
|
||||
eval_every: 500
|
||||
eval_steps: 105
|
||||
save_every: 1000
|
||||
log_grads_every: 500
|
||||
output_dir: "ckpts/mpt"
|
||||
checkpoint: null
|
||||
lora: false
|
||||
warmup_steps: 500
|
||||
num_epochs: 2
|
||||
|
||||
# logging
|
||||
wandb: false
|
||||
wandb_entity: "gpt4all"
|
||||
wandb_project_name: "gpt4all"
|
||||
seed: 42
|
||||
|
34
gpt4all-training/configs/train/finetune_openllama.yaml
Normal file
34
gpt4all-training/configs/train/finetune_openllama.yaml
Normal file
@ -0,0 +1,34 @@
|
||||
# model/tokenizer
|
||||
model_name: "openlm-research/open_llama_7b"
|
||||
tokenizer_name: "openlm-research/open_llama_7b"
|
||||
gradient_checkpointing: true
|
||||
save_name: "nomic-ai/gpt4all-openllama"
|
||||
|
||||
# dataset
|
||||
streaming: false
|
||||
num_proc: 64
|
||||
dataset_path: "nomic-ai/gpt4all-updated"
|
||||
revision: null
|
||||
max_length: 1024
|
||||
batch_size: 32
|
||||
|
||||
# train dynamics
|
||||
lr: 2.0e-5
|
||||
min_lr: 0
|
||||
weight_decay: 0.0
|
||||
eval_every: 500
|
||||
log_every: 10
|
||||
save_every: 1000
|
||||
log_grads_every: 500
|
||||
output_dir: "ckpts/falcon"
|
||||
checkpoint: null
|
||||
lora: false
|
||||
warmup_steps: 500
|
||||
num_epochs: 3
|
||||
|
||||
# logging
|
||||
wandb: true
|
||||
wandb_entity: "gpt4all"
|
||||
wandb_project_name: "gpt4all"
|
||||
seed: 42
|
||||
|
@ -12,7 +12,7 @@ def tokenize_inputs(config, tokenizer, examples):
|
||||
|
||||
# hacky backward compatible
|
||||
different_eos = tokenizer.eos_token != "</s>"
|
||||
out = {"labels": [], "input_ids": []}
|
||||
out = {"labels": [], "input_ids": [], "attention_mask": []}
|
||||
for prompt, response in zip(examples["prompt"], examples["response"]):
|
||||
if different_eos:
|
||||
if response.count("</s> \n") > 0:
|
||||
@ -49,9 +49,10 @@ def tokenize_inputs(config, tokenizer, examples):
|
||||
print(response)
|
||||
raise
|
||||
|
||||
input_tokens = tokenizer.pad({"input_ids": input_tokens}, padding="max_length", max_length=max_length)["input_ids"]
|
||||
padded = tokenizer.pad({"input_ids": input_tokens}, padding="max_length", max_length=max_length, return_tensors="pt")
|
||||
out["labels"].append(labels)
|
||||
out["input_ids"].append(input_tokens)
|
||||
out["input_ids"].append(padded["input_ids"])
|
||||
out["attention_mask"].append(padded["attention_mask"])
|
||||
|
||||
out = {k: torch.stack(v) if isinstance(v, list) else v for k, v in out.items()}
|
||||
|
||||
@ -72,7 +73,7 @@ def load_data(config, tokenizer):
|
||||
dataset = load_dataset("json", data_files=files, split="train")
|
||||
|
||||
else:
|
||||
dataset = load_dataset(dataset_path, split="train")
|
||||
dataset = load_dataset(dataset_path, split="train", revision=config["revision"] if "revision" in config else None)
|
||||
|
||||
dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])
|
||||
|
||||
@ -83,19 +84,23 @@ def load_data(config, tokenizer):
|
||||
else:
|
||||
kwargs = {}
|
||||
|
||||
cols_to_keep = ["input_ids", "labels", "attention_mask"]
|
||||
# tokenize inputs and return labels and attention mask
|
||||
train_dataset = train_dataset.map(
|
||||
lambda ele: tokenize_inputs(config, tokenizer, ele),
|
||||
batched=True,
|
||||
remove_columns=["source", "prompt"],
|
||||
**kwargs
|
||||
)
|
||||
remove_cols = [col for col in train_dataset.column_names if col not in cols_to_keep]
|
||||
train_dataset = train_dataset.remove_columns(remove_cols)
|
||||
|
||||
val_dataset = val_dataset.map(
|
||||
lambda ele: tokenize_inputs(config, tokenizer, ele),
|
||||
batched=True,
|
||||
remove_columns=["source", "prompt"],
|
||||
**kwargs
|
||||
)
|
||||
remove_cols = [col for col in val_dataset.column_names if col not in cols_to_keep]
|
||||
val_dataset = val_dataset.remove_columns(remove_cols)
|
||||
|
||||
train_dataset = train_dataset.with_format("torch")
|
||||
val_dataset = val_dataset.with_format("torch")
|
||||
@ -106,12 +111,14 @@ def load_data(config, tokenizer):
|
||||
train_dataset,
|
||||
collate_fn=DefaultDataCollator(),
|
||||
batch_size=config["batch_size"],
|
||||
shuffle=True,
|
||||
)
|
||||
|
||||
val_dataloader = DataLoader(
|
||||
val_dataset,
|
||||
collate_fn=DefaultDataCollator(),
|
||||
batch_size=config["batch_size"],
|
||||
shuffle=True,
|
||||
)
|
||||
|
||||
return train_dataloader, val_dataloader
|
||||
|
@ -1,10 +1,10 @@
|
||||
accelerate
|
||||
datasets
|
||||
einops
|
||||
torchmetrics
|
||||
evaluate
|
||||
transformers>=4.28.0
|
||||
wandb
|
||||
pip
|
||||
peft
|
||||
nodelist-inflator
|
||||
deepspeed
|
||||
|
@ -1,5 +1,5 @@
|
||||
import os
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler, LlamaForCausalLM
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler
|
||||
import torch
|
||||
from torch.optim import AdamW
|
||||
from argparse import ArgumentParser
|
||||
@ -42,7 +42,7 @@ def train(accelerator, config):
|
||||
accelerator.print(config)
|
||||
accelerator.print(f"Using {accelerator.num_processes} GPUs")
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(config['tokenizer_name'], model_max_length=config['max_length'])
|
||||
tokenizer = AutoTokenizer.from_pretrained(config['tokenizer_name'], model_max_length=config['max_length'], use_fast=False)
|
||||
# if no pad token, set it to eos
|
||||
if tokenizer.pad_token is None:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
@ -53,6 +53,7 @@ def train(accelerator, config):
|
||||
|
||||
|
||||
checkpoint = config["gradient_checkpointing"]
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(config["model_name"],
|
||||
use_cache=False if checkpoint else True,
|
||||
trust_remote_code=True)
|
||||
@ -86,7 +87,7 @@ def train(accelerator, config):
|
||||
# decay to min_lr instead of 0
|
||||
lr_ratio = config["min_lr"] / config["lr"]
|
||||
accelerator.print(f"Len of train_dataloader: {len(train_dataloader)}")
|
||||
total_num_steps = (len(train_dataloader) / gradient_accumulation_steps) * config["num_epochs"]
|
||||
total_num_steps = (len(train_dataloader) / gradient_accumulation_steps) * (config["num_epochs"])
|
||||
# instead of decaying to zero, decay to ratio of min_lr / lr
|
||||
total_num_steps += int(total_num_steps * lr_ratio) + config["warmup_steps"]
|
||||
accelerator.print(f"Total training steps: {total_num_steps}")
|
||||
@ -104,7 +105,7 @@ def train(accelerator, config):
|
||||
)
|
||||
else:
|
||||
scheduler = DummyScheduler(
|
||||
optimizer, total_num_steps=config["warmup_steps"], warmup_num_steps=config["warmup_steps"]
|
||||
optimizer, total_num_steps=total_num_steps, warmup_num_steps=config["warmup_steps"]
|
||||
)
|
||||
|
||||
model, optimizer, train_dataloader, val_dataloader, scheduler = accelerator.prepare(
|
||||
@ -117,26 +118,34 @@ def train(accelerator, config):
|
||||
if config["checkpoint"]:
|
||||
accelerator.load_state(config["checkpoint"])
|
||||
accelerator.print(f"Resumed from checkpoint: {config['checkpoint']}")
|
||||
path = os.path.basename(config["train_args"]["resume_from_checkpoint"])
|
||||
path = os.path.basename(config["checkpoint"])
|
||||
training_difference = os.path.splitext(path)[0]
|
||||
resume_step = int(training_difference.replace("step_", ""))
|
||||
accelerator.skip_first_batches(train_dataloader, resume_step)
|
||||
train_dataloader = accelerator.skip_first_batches(train_dataloader, resume_step)
|
||||
accelerator.print(f"Resuming from step {resume_step}")
|
||||
else:
|
||||
resume_step = 0
|
||||
|
||||
|
||||
# log gradients
|
||||
if accelerator.is_main_process and config["wandb"]:
|
||||
wandb.watch(model, log_freq=config["log_grads_every"], log="all")
|
||||
|
||||
for epoch in range(config["num_epochs"]):
|
||||
|
||||
accelerator.wait_for_everyone()
|
||||
|
||||
for epoch in range(0, config["num_epochs"]):
|
||||
train_loss = MeanMetric(nan_strategy="error").to(model.device)
|
||||
for step, batch in enumerate(tqdm(train_dataloader)):
|
||||
curr_step = epoch * len(train_dataloader) + step
|
||||
model.train()
|
||||
outputs = model(**batch)
|
||||
loss = outputs.loss
|
||||
|
||||
# gather loss before backprop in case of gradient accumulation
|
||||
loss_values = accelerator.gather_for_metrics({"loss": loss.detach().float()})
|
||||
if config["wandb"]:
|
||||
accelerator.log({"loss": torch.mean(loss_values["loss"]).item()}, step=curr_step)
|
||||
train_loss.update(loss_values["loss"])
|
||||
|
||||
loss = loss / gradient_accumulation_steps
|
||||
@ -144,9 +153,8 @@ def train(accelerator, config):
|
||||
# get gradient norm of all params
|
||||
|
||||
# log LR in case something weird happens
|
||||
if step > 0 and step % (config["eval_every"] // 10) == 0:
|
||||
if step > 0 and step % (config["log_lr_every"]) == 0:
|
||||
if config["wandb"]:
|
||||
curr_step = step + epoch * len(train_dataloader)
|
||||
accelerator.log({"lr": scheduler.get_last_lr()[0]}, step=curr_step)
|
||||
|
||||
if (step + 1) % gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1:
|
||||
@ -156,7 +164,6 @@ def train(accelerator, config):
|
||||
|
||||
|
||||
if step > 0 and step % config["save_every"] == 0:
|
||||
curr_step = step + epoch * len(train_dataloader)
|
||||
accelerator.save_state(f"{config['output_dir']}/step_{curr_step}")
|
||||
|
||||
if step > 0 and (step % config["eval_every"] == 0 or step == len(train_dataloader) - 1):
|
||||
@ -170,7 +177,6 @@ def train(accelerator, config):
|
||||
}
|
||||
|
||||
if config["wandb"]:
|
||||
curr_step = step + epoch * len(train_dataloader)
|
||||
accelerator.log({**log_train, **log_val}, step=curr_step)
|
||||
|
||||
accelerator.print(f"Current LR: {scheduler.get_last_lr()[0]}")
|
||||
@ -181,8 +187,14 @@ def train(accelerator, config):
|
||||
|
||||
accelerator.print(f"Epoch {epoch} finished")
|
||||
accelerator.print(f"Pushing to HF hub")
|
||||
accelerator.wait_for_everyone()
|
||||
unwrapped_model = accelerator.unwrap_model(model)
|
||||
|
||||
unwrapped_model.save_pretrained(
|
||||
f"{config['output_dir']}/epoch_{epoch}",
|
||||
is_main_process=accelerator.is_main_process,
|
||||
save_function=accelerator.save,
|
||||
state_dict=accelerator.get_state_dict(model),
|
||||
)
|
||||
try:
|
||||
if accelerator.is_main_process:
|
||||
unwrapped_model.push_to_hub(config["save_name"] + f"-epoch_{epoch}", private=True)
|
||||
@ -191,21 +203,16 @@ def train(accelerator, config):
|
||||
accelerator.print(e)
|
||||
accelerator.print(f"Failed to push to hub")
|
||||
|
||||
|
||||
if config["num_epochs"] > 1:
|
||||
accelerator.wait_for_everyone()
|
||||
unwrapped_model = accelerator.unwrap_model(model)
|
||||
unwrapped_model.save_pretrained(
|
||||
f"{config['output_dir']}/epoch_{epoch}",
|
||||
f"{config['output_dir']}/final",
|
||||
is_main_process=accelerator.is_main_process,
|
||||
save_function=accelerator.save,
|
||||
state_dict=accelerator.get_state_dict(model),
|
||||
)
|
||||
|
||||
accelerator.wait_for_everyone()
|
||||
unwrapped_model = accelerator.unwrap_model(model)
|
||||
unwrapped_model.save_pretrained(
|
||||
f"{config['output_dir']}/final",
|
||||
is_main_process=accelerator.is_main_process,
|
||||
save_function=accelerator.save,
|
||||
state_dict=accelerator.get_state_dict(model),
|
||||
)
|
||||
|
||||
accelerator.end_training()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user