revmove noop

prevent rebuild
fix
2025-08-13 00:05:57 -04:00 · 2023-07-15 22:58:44 -05:00 · 2023-07-15 22:00:28 -05:00 · 2023-07-15 21:17:03 -05:00 · 2023-07-15 21:14:39 -05:00 · 2023-07-15 21:12:30 -05:00
57 changed files with 7910 additions and 4339 deletions
--- a/.circleci/continue_config.yml
+++ b/.circleci/continue_config.yml
@ -463,50 +463,47 @@ jobs:
    docker:
      - image: mcr.microsoft.com/dotnet/sdk:7.0-jammy # Ubuntu 22.04
    steps:
-      - when:
-          condition: << pipeline.parameters.run-csharp-workflow >>
-          steps:
-            - checkout
-            - attach_workspace:
-                at: /tmp/workspace
-            - run:
-                name: "Prepare Native Libs"
-                command: |
-                  cd gpt4all-bindings/csharp
-                  mkdir -p runtimes/linux-x64/native
-                  cp /tmp/workspace/runtimes/linux-x64/*.so runtimes/linux-x64/native/
-                  ls -R runtimes
-            - restore_cache:
-                keys:
-                  - gpt4all-csharp-nuget-packages-nix
-            - run:
-                name: "Install project dependencies"
-                command: |
-                  cd gpt4all-bindings/csharp
-                  dotnet restore Gpt4All
-            - save_cache:
-                paths:
-                  - ~/.nuget/packages
-                key: gpt4all-csharp-nuget-packages-nix
-            - run:
-                name: Build C# Project
-                command: |
-                  cd gpt4all-bindings/csharp
-                  dotnet build Gpt4All --configuration Release --nologo
-            - run:
-                name: "Run C# Tests"
-                command: |
-                  cd gpt4all-bindings/csharp
-                  dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
-            - run:
-                name: Test results
-                command: |
-                    cd gpt4all-bindings/csharp/Gpt4All.Tests
-                    dotnet tool install -g trx2junit
-                    export PATH="$PATH:$HOME/.dotnet/tools"
-                    trx2junit TestResults/*.trx
-            - store_test_results:
-                path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
+      - checkout
+      - attach_workspace:
+          at: /tmp/workspace
+      - run:
+          name: "Prepare Native Libs"
+          command: |
+            cd gpt4all-bindings/csharp
+            mkdir -p runtimes/linux-x64/native
+            cp /tmp/workspace/runtimes/linux-x64/*.so runtimes/linux-x64/native/
+            ls -R runtimes
+      - restore_cache:
+          keys:
+            - gpt4all-csharp-nuget-packages-nix
+      - run:
+          name: "Install project dependencies"
+          command: |
+            cd gpt4all-bindings/csharp
+            dotnet restore Gpt4All
+      - save_cache:
+          paths:
+            - ~/.nuget/packages
+          key: gpt4all-csharp-nuget-packages-nix
+      - run:
+          name: Build C# Project
+          command: |
+            cd gpt4all-bindings/csharp
+            dotnet build Gpt4All --configuration Release --nologo
+      - run:
+          name: "Run C# Tests"
+          command: |
+            cd gpt4all-bindings/csharp
+            dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
+      - run:
+          name: Test results
+          command: |
+              cd gpt4all-bindings/csharp/Gpt4All.Tests
+              dotnet tool install -g trx2junit
+              export PATH="$PATH:$HOME/.dotnet/tools"
+              trx2junit TestResults/*.trx
+      - store_test_results:
+          path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults

  build-csharp-windows:
    executor:
@ -514,111 +511,99 @@ jobs:
      size: large
      shell: powershell.exe -ExecutionPolicy Bypass
    steps:
-      - when:
-          condition: << pipeline.parameters.run-csharp-workflow >>
-          steps:
-            - checkout
-            - restore_cache:
-                keys:
-                  - gpt4all-csharp-nuget-packages-win
-            - attach_workspace:
-                at: C:\Users\circleci\workspace
-            - run:
-                name: "Prepare Native Libs"
-                command: |
-                  cd gpt4all-bindings/csharp
-                  mkdir -p runtimes\win-x64\native
-                  cp C:\Users\circleci\workspace\runtimes\win-x64\*.dll runtimes\win-x64\native\
-                  ls -R runtimes
-            - run:
-                name: "Install project dependencies"
-                command: |
-                  cd gpt4all-bindings/csharp
-                  dotnet.exe restore Gpt4All
-            - save_cache:
-                paths:
-                  - C:\Users\circleci\.nuget\packages
-                key: gpt4all-csharp-nuget-packages-win
-            - run:
-                name: Build C# Project
-                command: |
-                  cd gpt4all-bindings/csharp
-                  dotnet.exe build Gpt4All --configuration Release --nologo
-            - run:
-                name: "Run C# Tests"
-                command: |
-                  cd gpt4all-bindings/csharp
-                  dotnet.exe test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
-            - run:
-                name: Test results
-                command: |
-                    cd gpt4all-bindings/csharp/Gpt4All.Tests
-                    dotnet tool install -g trx2junit
-                    $Env:Path += ";$Env:USERPROFILE\.dotnet\tools"
-                    trx2junit TestResults/*.trx
-            - store_test_results:
-                path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
+      - checkout
+      - restore_cache:
+          keys:
+            - gpt4all-csharp-nuget-packages-win
+      - attach_workspace:
+          at: C:\Users\circleci\workspace
+      - run:
+          name: "Prepare Native Libs"
+          command: |
+            cd gpt4all-bindings/csharp
+            mkdir -p runtimes\win-x64\native
+            cp C:\Users\circleci\workspace\runtimes\win-x64\*.dll runtimes\win-x64\native\
+            ls -R runtimes
+      - run:
+          name: "Install project dependencies"
+          command: |
+            cd gpt4all-bindings/csharp
+            dotnet.exe restore Gpt4All
+      - save_cache:
+          paths:
+            - C:\Users\circleci\.nuget\packages
+          key: gpt4all-csharp-nuget-packages-win
+      - run:
+          name: Build C# Project
+          command: |
+            cd gpt4all-bindings/csharp
+            dotnet.exe build Gpt4All --configuration Release --nologo
+      - run:
+          name: "Run C# Tests"
+          command: |
+            cd gpt4all-bindings/csharp
+            dotnet.exe test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
+      - run:
+          name: Test results
+          command: |
+              cd gpt4all-bindings/csharp/Gpt4All.Tests
+              dotnet tool install -g trx2junit
+              $Env:Path += ";$Env:USERPROFILE\.dotnet\tools"
+              trx2junit TestResults/*.trx
+      - store_test_results:
+          path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults

  build-csharp-macos:
    macos:
      xcode: "14.0.0"
    steps:
-      - when:
-          condition: << pipeline.parameters.run-csharp-workflow >>
-          steps:
-            - checkout
-            - restore_cache:
-                keys:
-                  - gpt4all-csharp-nuget-packages-nix
-            - run:
-                name: Install dependencies
-                command: |
-                  brew install --cask dotnet-sdk
-            - attach_workspace:
-                at: /tmp/workspace
-            - run:
-                name: "Prepare Native Libs"
-                command: |
-                  cd gpt4all-bindings/csharp
-                  mkdir -p runtimes/osx/native
-                  cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/
-                  cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
-                  ls -R runtimes
-            - run:
-                name: "Install project dependencies"
-                command: |
-                  cd gpt4all-bindings/csharp
-                  dotnet restore Gpt4All
-            - save_cache:
-                paths:
-                  - ~/.nuget/packages
-                key: gpt4all-csharp-nuget-packages-nix
-            - run:
-                name: Build C# Project
-                command: |
-                  cd gpt4all-bindings/csharp
-                  dotnet build Gpt4All --configuration Release --nologo
-            - run:
-                name: "Run C# Tests"
-                command: |
-                  cd gpt4all-bindings/csharp
-                  dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
-            - run:
-                name: Test results
-                command: |
-                    cd gpt4all-bindings/csharp/Gpt4All.Tests
-                    dotnet tool install -g trx2junit
-                    export PATH="$PATH:$HOME/.dotnet/tools"
-                    trx2junit TestResults/*.trx
-            - store_test_results:
-                path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
-  build-nodejs-linux: 
-    docker:
-      - image: circleci/node:erbium-bullseye-browsers-legacy
-    steps:
-      - when:
-        condition: << pipeline.parameters.run-ts-workflow >> 
-          - checkout
+      - checkout
+      - restore_cache:
+          keys:
+            - gpt4all-csharp-nuget-packages-nix
+      - run:
+          name: Install dependencies
+          command: |
+            brew install --cask dotnet-sdk
+      - attach_workspace:
+          at: /tmp/workspace
+      - run:
+          name: "Prepare Native Libs"
+          command: |
+            cd gpt4all-bindings/csharp
+            mkdir -p runtimes/osx/native
+            cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/
+            cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
+            ls -R runtimes
+      - run:
+          name: "Install project dependencies"
+          command: |
+            cd gpt4all-bindings/csharp
+            dotnet restore Gpt4All
+      - save_cache:
+          paths:
+            - ~/.nuget/packages
+          key: gpt4all-csharp-nuget-packages-nix
+      - run:
+          name: Build C# Project
+          command: |
+            cd gpt4all-bindings/csharp
+            dotnet build Gpt4All --configuration Release --nologo
+      - run:
+          name: "Run C# Tests"
+          command: |
+            cd gpt4all-bindings/csharp
+            dotnet test Gpt4All.Tests -v n -c Release --filter "SKIP_ON_CI!=True" --logger "trx"
+      - run:
+          name: Test results
+          command: |
+              cd gpt4all-bindings/csharp/Gpt4All.Tests
+              dotnet tool install -g trx2junit
+              export PATH="$PATH:$HOME/.dotnet/tools"
+              trx2junit TestResults/*.trx
+      - store_test_results:
+          path: gpt4all-bindings/csharp/Gpt4All.Tests/TestResults
+
  store-and-upload-nupkgs:
    docker:
      - image: mcr.microsoft.com/dotnet/sdk:6.0-jammy # Ubuntu 22.04
@ -656,27 +641,27 @@ jobs:
          node-version: "18.16"
      - run: node --version
      - node/install-packages:
+          app-dir: gpt4all-bindings/typescript
          pkg-manager: yarn
-      - run:
-          command: yarn run test
-          name: Run YARN tests
+          override-ci-command: yarn install
+      - run: cd gpt4all-bindings/typescript
      - run: 
-        command: |
-          # excluding llmodel. nodejs bindings dont need llmodel.dll
-          cd gpt4all-bindings/typescript
-          mkdir -p runtimes/win32-x64/native
-          cp /tmp/workspace/runtimes/win-x64/*-*.dll runtimes/win-x64/native/ 
-          mkdir -p runtimes/linux-x64/native 
-          cp /tmp/workspace/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/ 
-          mkdir -p runtimes/osx/native
-          cp /tmp/workspace/runtimes/osx-x64/*-*.dylib runtimes/osx/native/
-          cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
-
-      - run:
-          name: Publish to NPM
          command: |
-            npm set //registry.npmjs.org/:_authToken=$NPM_TOKEN
-            npm publish 
+            cd gpt4all-bindings/typescript
+            # excluding llmodel. nodejs bindings dont need llmodel.dll
+            mkdir -p runtimes/win32-x64/native
+            cp /tmp/workspace/runtimes/win-x64/*-*.dll runtimes/win-x64/native/ 
+            mkdir -p runtimes/linux-x64/native 
+            cp /tmp/workspace/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/ 
+            mkdir -p runtimes/osx/native
+            cp /tmp/workspace/runtimes/osx-x64/*-*.dylib runtimes/osx/native/
+            cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
+            ls -Ra gpt4all-bindings/typescript/runtimes
+#     - run:
+#          name: Publish to NPM
+#          command: |
+#            npm set //registry.npmjs.org/:_authToken=$NPM_TOKEN
+#            npm publish 

 workflows:
  version: 2
@ -756,6 +741,8 @@ workflows:
          type: approval
      - nuget-hold:
          type: approval
+      - npm-hold:
+          type: approval
      - build-bindings-backend-linux:
          filters:
            branches:
@ -781,6 +768,16 @@ workflows:
          requires:
            - hold
      # NodeJs Jobs 
+      - prepare-npm-pkg: 
+          filters:
+            branches:
+              only:
+          requires:
+            - node/test
+            - npm-hold
+#            - build-bindings-backend-linux
+#            - build-bindings-backend-windows-msvc
+#            - build-bindings-backend-macos
      # CSharp Jobs
      - build-csharp-linux:
          filters:
@ -809,4 +806,3 @@ workflows:
            - build-csharp-windows
            - build-csharp-linux
            - build-csharp-macos
-        
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,6 @@
+*.arrow
+squad_*
+*sbert_embedded*
 *.pkl
 ckpts*
 .deepspeed_env
--- a/gpt4all-backend/CMakeLists.txt
+++ b/gpt4all-backend/CMakeLists.txt
@ -1,5 +1,6 @@
 cmake_minimum_required(VERSION 3.16)
 set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

 if(APPLE)
  option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
@ -19,7 +20,7 @@ endif()
 include_directories("${CMAKE_CURRENT_BINARY_DIR}")

 set(LLMODEL_VERSION_MAJOR 0)
-set(LLMODEL_VERSION_MINOR 2)
+set(LLMODEL_VERSION_MINOR 3)
 set(LLMODEL_VERSION_PATCH 0)
 set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
 project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
@ -124,6 +125,10 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
        add_library(mpt-${BUILD_VARIANT} SHARED
            mpt.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
        prepare_target(mpt ggml-230511)
+
+        add_library(bert-${BUILD_VARIANT} SHARED
+            bert.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
+        prepare_target(bert llama-mainline)
    endif()
 endforeach()

--- a/gpt4all-backend/bert.cpp
+++ b/gpt4all-backend/bert.cpp
--- a/gpt4all-backend/bert_impl.h
+++ b/gpt4all-backend/bert_impl.h
@ -0,0 +1,44 @@
+#ifndef BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
+#error This file is NOT meant to be included outside of bert.cpp. Doing so is DANGEROUS. Be sure to know what you are doing before proceeding to #define BERT_H_I_KNOW_WHAT_I_AM_DOING_WHEN_INCLUDING_THIS_FILE
+#endif
+#ifndef BERT_H
+#define BERT_H
+
+#include <string>
+#include <functional>
+#include <vector>
+#include <memory>
+#include "llmodel.h"
+
+struct BertPrivate;
+class Bert : public LLModel {
+public:
+    Bert();
+    ~Bert();
+
+    bool supportsEmbedding() const override { return true; }
+    bool supportsCompletion() const override { return true; }
+    bool loadModel(const std::string &modelPath) override;
+    bool isModelLoaded() const override;
+    size_t requiredMem(const std::string &modelPath) override;
+    size_t stateSize() const override;
+    size_t saveState(uint8_t *dest) const override;
+    size_t restoreState(const uint8_t *src) override;
+    void setThreadCount(int32_t n_threads) override;
+    int32_t threadCount() const override;
+
+    std::vector<float> embedding(const std::string &text) override;
+
+private:
+    std::unique_ptr<BertPrivate> d_ptr;
+
+protected:
+    std::vector<Token> tokenize(PromptContext &, const std::string&) const override;
+    Token sampleToken(PromptContext &ctx) const override;
+    std::string tokenToString(Token) const override;
+    bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override;
+    int32_t contextLength() const override;
+    const std::vector<Token>& endTokens() const override;
+};
+
+#endif // BERT_H
--- a/gpt4all-backend/falcon_impl.h
+++ b/gpt4all-backend/falcon_impl.h
@ -16,6 +16,8 @@ public:
    Falcon();
    ~Falcon();

+    bool supportsEmbedding() const override { return false; }
+    bool supportsCompletion() const override { return true; }
    bool loadModel(const std::string &modelPath) override;
    bool isModelLoaded() const override;
    size_t requiredMem(const std::string &modelPath) override;
--- a/gpt4all-backend/gptj_impl.h
+++ b/gpt4all-backend/gptj_impl.h
@ -15,6 +15,8 @@ public:
    GPTJ();
    ~GPTJ();

+    bool supportsEmbedding() const override { return false; }
+    bool supportsCompletion() const override { return true; }
    bool loadModel(const std::string &modelPath) override;
    bool isModelLoaded() const override;
    size_t requiredMem(const std::string &modelPath) override;
--- a/gpt4all-backend/llamamodel_impl.h
+++ b/gpt4all-backend/llamamodel_impl.h
@ -15,6 +15,8 @@ public:
    LLamaModel();
    ~LLamaModel();

+    bool supportsEmbedding() const override { return false; }
+    bool supportsCompletion() const override { return true; }
    bool loadModel(const std::string &modelPath) override;
    bool isModelLoaded() const override;
    size_t requiredMem(const std::string &modelPath) override;
--- a/gpt4all-backend/llmodel.cpp
+++ b/gpt4all-backend/llmodel.cpp
@ -10,17 +10,19 @@
 #include <cassert>
 #include <cstdlib>
 #include <sstream>
+#ifdef _MSC_VER
+#include <windows.h>
+#include <processthreadsapi.h>
+#endif

 std::string s_implementations_search_path = ".";

 static bool has_at_least_minimal_hardware() {
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
    #ifndef _MSC_VER
        return __builtin_cpu_supports("avx");
    #else
-        int cpuInfo[4];
-        __cpuid(cpuInfo, 1);
-        return cpuInfo[2] & (1 << 28);
+        return IsProcessorFeaturePresent(PF_AVX_INSTRUCTIONS_AVAILABLE);
    #endif
 #else
    return true; // Don't know how to handle non-x86_64
@ -28,54 +30,53 @@ static bool has_at_least_minimal_hardware() {
 }

 static bool requires_avxonly() {
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(_M_X64)
    #ifndef _MSC_VER
        return !__builtin_cpu_supports("avx2");
    #else
-        int cpuInfo[4];
-        __cpuidex(cpuInfo, 7, 0);
-        return !(cpuInfo[1] & (1 << 5));
+        return !IsProcessorFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE);
    #endif
 #else
    return false; // Don't know how to handle non-x86_64
 #endif
 }

-LLModel::Implementation::Implementation(Dlhandle &&dlhandle_) : dlhandle(new Dlhandle(std::move(dlhandle_))) {
-    auto get_model_type = dlhandle->get<const char *()>("get_model_type");
+LLModel::Implementation::Implementation(Dlhandle &&dlhandle_)
+    : m_dlhandle(new Dlhandle(std::move(dlhandle_))) {
+    auto get_model_type = m_dlhandle->get<const char *()>("get_model_type");
    assert(get_model_type);
-    modelType = get_model_type();
-    auto get_build_variant = dlhandle->get<const char *()>("get_build_variant");
+    m_modelType = get_model_type();
+    auto get_build_variant = m_dlhandle->get<const char *()>("get_build_variant");
    assert(get_build_variant);
-    buildVariant = get_build_variant();
-    magicMatch = dlhandle->get<bool(std::ifstream&)>("magic_match");
-    assert(magicMatch);
-    construct_ = dlhandle->get<LLModel *()>("construct");
-    assert(construct_);
+    m_buildVariant = get_build_variant();
+    m_magicMatch = m_dlhandle->get<bool(std::ifstream&)>("magic_match");
+    assert(m_magicMatch);
+    m_construct = m_dlhandle->get<LLModel *()>("construct");
+    assert(m_construct);
 }

 LLModel::Implementation::Implementation(Implementation &&o)
-    : construct_(o.construct_)
-    , modelType(o.modelType)
-    , buildVariant(o.buildVariant)
-    , magicMatch(o.magicMatch)
-    , dlhandle(o.dlhandle) {
-    o.dlhandle = nullptr;
+    : m_magicMatch(o.m_magicMatch)
+    , m_construct(o.m_construct)
+    , m_modelType(o.m_modelType)
+    , m_buildVariant(o.m_buildVariant)
+    , m_dlhandle(o.m_dlhandle) {
+    o.m_dlhandle = nullptr;
 }

 LLModel::Implementation::~Implementation() {
-    if (dlhandle) delete dlhandle;
+    if (m_dlhandle) delete m_dlhandle;
 }

 bool LLModel::Implementation::isImplementation(const Dlhandle &dl) {
    return dl.get<bool(uint32_t)>("is_g4a_backend_model_implementation");
 }

-const std::vector<LLModel::Implementation> &LLModel::implementationList() {
+const std::vector<LLModel::Implementation> &LLModel::Implementation::implementationList() {
    // NOTE: allocated on heap so we leak intentionally on exit so we have a chance to clean up the
    // individual models without the cleanup of the static list interfering
-    static auto* libs = new std::vector<LLModel::Implementation>([] () {
-        std::vector<LLModel::Implementation> fres;
+    static auto* libs = new std::vector<Implementation>([] () {
+        std::vector<Implementation> fres;

        auto search_in_directory = [&](const std::string& paths) {
            std::stringstream ss(paths);
@ -107,17 +108,17 @@ const std::vector<LLModel::Implementation> &LLModel::implementationList() {
    return *libs;
 }

-const LLModel::Implementation* LLModel::implementation(std::ifstream& f, const std::string& buildVariant) {
+const LLModel::Implementation* LLModel::Implementation::implementation(std::ifstream& f, const std::string& buildVariant) {
    for (const auto& i : implementationList()) {
        f.seekg(0);
-        if (!i.magicMatch(f)) continue;
-        if (buildVariant != i.buildVariant) continue;
+        if (!i.m_magicMatch(f)) continue;
+        if (buildVariant != i.m_buildVariant) continue;
        return &i;
    }
    return nullptr;
 }

-LLModel *LLModel::construct(const std::string &modelPath, std::string buildVariant) {
+LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant) {

    if (!has_at_least_minimal_hardware())
        return nullptr;
@ -126,14 +127,15 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
    std::ifstream f(modelPath, std::ios::binary);
    if (!f) return nullptr;
    // Get correct implementation
-    const LLModel::Implementation* impl = nullptr;
+    const Implementation* impl = nullptr;

    #if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
        if (buildVariant == "auto") {
            size_t total_mem = getSystemTotalRAMInBytes();
            impl = implementation(f, "metal");
            if(impl) {
-                LLModel* metalimpl = impl->construct();
+                LLModel* metalimpl = impl->m_construct();
+                metalimpl->m_implementation = impl;
                size_t req_mem = metalimpl->requiredMem(modelPath);
                float req_to_total = (float) req_mem / (float) total_mem;
                // on a 16GB M2 Mac a 13B q4_0 (0.52) works for me but a 13B q4_K_M (0.55) does not
@ -160,14 +162,17 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
        if (!impl) return nullptr;
    }
    f.close();
+
    // Construct and return llmodel implementation
-    return impl->construct();
+    auto fres = impl->m_construct();
+    fres->m_implementation = impl;
+    return fres;
 }

-void LLModel::setImplementationsSearchPath(const std::string& path) {
+void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) {
    s_implementations_search_path = path;
 }

-const std::string& LLModel::implementationsSearchPath() {
+const std::string& LLModel::Implementation::implementationsSearchPath() {
    return s_implementations_search_path;
 }
--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/llmodel.h
@ -12,32 +12,34 @@
 #define LLMODEL_MAX_PROMPT_BATCH 128

 class Dlhandle;
-
 class LLModel {
 public:
    using Token = int32_t;
-
    class Implementation {
-        LLModel *(*construct_)();
-
    public:
        Implementation(Dlhandle&&);
        Implementation(const Implementation&) = delete;
        Implementation(Implementation&&);
        ~Implementation();

+        std::string_view modelType() const { return m_modelType; }
+        std::string_view buildVariant() const { return m_buildVariant; }
+
        static bool isImplementation(const Dlhandle&);
+        static const std::vector<Implementation>& implementationList();
+        static const Implementation *implementation(std::ifstream& f, const std::string& buildVariant);
+        static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
+        static void setImplementationsSearchPath(const std::string& path);
+        static const std::string& implementationsSearchPath();

-        std::string_view modelType, buildVariant;
-        bool (*magicMatch)(std::ifstream& f);
-        Dlhandle *dlhandle;
+    private:
+        bool (*m_magicMatch)(std::ifstream& f);
+        LLModel *(*m_construct)();

-        // The only way an implementation should be constructed
-        LLModel *construct() const {
-            auto fres = construct_();
-            fres->m_implementation = this;
-            return fres;
-        }
+    private:
+        std::string_view m_modelType;
+        std::string_view m_buildVariant;
+        Dlhandle *m_dlhandle;
    };

    struct PromptContext {
@ -59,18 +61,25 @@ public:
    explicit LLModel() {}
    virtual ~LLModel() {}

+    virtual bool supportsEmbedding() const = 0;
+    virtual bool supportsCompletion() const = 0;
    virtual bool loadModel(const std::string &modelPath) = 0;
    virtual bool isModelLoaded() const = 0;
    virtual size_t requiredMem(const std::string &modelPath) = 0;
    virtual size_t stateSize() const { return 0; }
    virtual size_t saveState(uint8_t */*dest*/) const { return 0; }
    virtual size_t restoreState(const uint8_t */*src*/) { return 0; }
+
+    // This method requires the model to return true from supportsCompletion otherwise it will throw
+    // an error
    virtual void prompt(const std::string &prompt,
                        std::function<bool(int32_t)> promptCallback,
                        std::function<bool(int32_t, const std::string&)> responseCallback,
                        std::function<bool(bool)> recalculateCallback,
                        PromptContext &ctx);

+    virtual std::vector<float> embedding(const std::string &text);
+
    virtual void setThreadCount(int32_t /*n_threads*/) {}
    virtual int32_t threadCount() const { return 1; }

@ -78,13 +87,6 @@ public:
        return *m_implementation;
    }

-    static const std::vector<Implementation>& implementationList();
-    static const Implementation *implementation(std::ifstream& f, const std::string& buildVariant);
-    static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto");
-
-    static void setImplementationsSearchPath(const std::string& path);
-    static const std::string& implementationsSearchPath();
-
 protected:
    // These are pure virtual because subclasses need to implement as the default implementation of
    // 'prompt' above calls these functions
@ -100,5 +102,9 @@ protected:
    void recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate);

    const Implementation *m_implementation = nullptr;
+
+private:
+    friend class LLMImplementation;
 };
+
 #endif // LLMODEL_H
--- a/gpt4all-backend/llmodel_c.cpp
+++ b/gpt4all-backend/llmodel_c.cpp
@ -29,7 +29,7 @@ llmodel_model llmodel_model_create2(const char *model_path, const char *build_va
    int error_code = 0;

    try {
-        wrapper->llModel = LLModel::construct(model_path, build_variant);
+        wrapper->llModel = LLModel::Implementation::construct(model_path, build_variant);
    } catch (const std::exception& e) {
        error_code = EINVAL;
        last_error_message = e.what();
@ -166,6 +166,25 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
    ctx->context_erase = wrapper->promptContext.contextErase;
 }

+float *llmodel_embedding(llmodel_model model, const char *text, size_t *embedding_size)
+{
+    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
+    std::vector<float> embeddingVector = wrapper->llModel->embedding(text);
+    float *embedding = (float *)malloc(embeddingVector.size() * sizeof(float));
+    if(embedding == nullptr) {
+        *embedding_size = 0;
+        return nullptr;
+    }
+    std::copy(embeddingVector.begin(), embeddingVector.end(), embedding);
+    *embedding_size = embeddingVector.size();
+    return embedding;
+}
+
+void llmodel_free_embedding(float *ptr)
+{
+    free(ptr);
+}
+
 void llmodel_setThreadCount(llmodel_model model, int32_t n_threads)
 {
    LLModelWrapper *wrapper = reinterpret_cast<LLModelWrapper*>(model);
@ -180,10 +199,10 @@ int32_t llmodel_threadCount(llmodel_model model)

 void llmodel_set_implementation_search_path(const char *path)
 {
-    LLModel::setImplementationsSearchPath(path);
+    LLModel::Implementation::setImplementationsSearchPath(path);
 }

 const char *llmodel_get_implementation_search_path()
 {
-    return LLModel::implementationsSearchPath().c_str();
+    return LLModel::Implementation::implementationsSearchPath().c_str();
 }
--- a/gpt4all-backend/llmodel_c.h
+++ b/gpt4all-backend/llmodel_c.h
@ -171,6 +171,23 @@ void llmodel_prompt(llmodel_model model, const char *prompt,
                    llmodel_recalculate_callback recalculate_callback,
                    llmodel_prompt_context *ctx);

+/**
+ * Generate an embedding using the model.
+ * @param model A pointer to the llmodel_model instance.
+ * @param text A string representing the text to generate an embedding for.
+ * @param embedding_size A pointer to a size_t type that will be set by the call indicating the length
+ * of the returned floating point array.
+ * @return A pointer to an array of floating point values passed to the calling method which then will
+ * be responsible for lifetime of this memory.
+ */
+float *llmodel_embedding(llmodel_model model, const char *text, size_t *embedding_size);
+
+/**
+ * Frees the memory allocated by the llmodel_embedding function.
+ * @param ptr A pointer to the embedding as returned from llmodel_embedding.
+ */
+void llmodel_free_embedding(float *ptr);
+
 /**
 * Set the number of threads to be used by the model.
 * @param model A pointer to the llmodel_model instance.
--- a/gpt4all-backend/llmodel_shared.cpp
+++ b/gpt4all-backend/llmodel_shared.cpp
@ -33,7 +33,14 @@ void LLModel::prompt(const std::string &prompt,
                     PromptContext &promptCtx)
 {
    if (!isModelLoaded()) {
-        std::cerr << implementation().modelType << " ERROR: prompt won't work with an unloaded model!\n";
+        std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n";
+        return;
+    }
+
+    if (!supportsCompletion()) {
+        std::string errorMessage = "ERROR: this model does not support text completion or chat!\n";
+        responseCallback(-1, errorMessage);
+        std::cerr << implementation().modelType() << errorMessage;
        return;
    }

@ -45,8 +52,8 @@ void LLModel::prompt(const std::string &prompt,

    if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
        responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
-        std::cerr << implementation().modelType << " ERROR: The prompt is" << embd_inp.size() <<
-            "tokens and the context window is" << promptCtx.n_ctx << "!\n";
+        std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() <<
+            " tokens and the context window is " << promptCtx.n_ctx << "!\n";
        return;
    }

@ -64,7 +71,7 @@ void LLModel::prompt(const std::string &prompt,
        if (promptCtx.n_past + int32_t(batch.size()) > promptCtx.n_ctx) {
            const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase;
            // Erase the first percentage of context from the tokens...
-            std::cerr << implementation().modelType << ": reached the end of the context window so resizing\n";
+            std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n";
            promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint);
            promptCtx.n_past = promptCtx.tokens.size();
            recalculateContext(promptCtx, recalculateCallback);
@ -72,7 +79,7 @@ void LLModel::prompt(const std::string &prompt,
        }

        if (!evalTokens(promptCtx, batch)) {
-            std::cerr << implementation().modelType << " ERROR: Failed to process prompt\n";
+            std::cerr << implementation().modelType() << " ERROR: Failed to process prompt\n";
            return;
        }

@ -103,7 +110,7 @@ void LLModel::prompt(const std::string &prompt,
        if (promptCtx.n_past + 1 > promptCtx.n_ctx) {
            const int32_t erasePoint = promptCtx.n_ctx * promptCtx.contextErase;
            // Erase the first percentage of context from the tokens...
-            std::cerr << implementation().modelType << ": reached the end of the context window so resizing\n";
+            std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n";
            promptCtx.tokens.erase(promptCtx.tokens.begin(), promptCtx.tokens.begin() + erasePoint);
            promptCtx.n_past = promptCtx.tokens.size();
            recalculateContext(promptCtx, recalculateCallback);
@ -111,7 +118,7 @@ void LLModel::prompt(const std::string &prompt,
        }

        if (!evalTokens(promptCtx, { id })) {
-            std::cerr << implementation().modelType << " ERROR: Failed to predict next token\n";
+            std::cerr << implementation().modelType() << " ERROR: Failed to predict next token\n";
            return;
        }

@ -158,3 +165,12 @@ void LLModel::prompt(const std::string &prompt,
        cachedTokens.clear();
    }
 }
+
+std::vector<float> LLModel::embedding(const std::string &/*text*/)
+{
+    if (!supportsCompletion()) {
+        std::string errorMessage = "ERROR: this model does not support generating embeddings!\n";
+        std::cerr << implementation().modelType() << errorMessage;
+    }
+    return std::vector<float>();
+}
--- a/gpt4all-backend/mpt_impl.h
+++ b/gpt4all-backend/mpt_impl.h
@ -15,6 +15,8 @@ public:
    MPT();
    ~MPT();

+    bool supportsEmbedding() const override { return false; }
+    bool supportsCompletion() const override { return true; }
    bool loadModel(const std::string &modelPath) override;
    bool isModelLoaded() const override;
    size_t requiredMem(const std::string &modelPath) override;
--- a/gpt4all-backend/replit_impl.h
+++ b/gpt4all-backend/replit_impl.h
@ -17,6 +17,8 @@ public:
    Replit();
    ~Replit();

+    bool supportsEmbedding() const override { return false; }
+    bool supportsCompletion() const override { return true; }
    bool loadModel(const std::string &modelPath) override;
    bool isModelLoaded() const override;
    size_t requiredMem(const std::string & modelPath) override;
--- a/gpt4all-backend/scripts/convert_bert_hf_to_ggml.py
+++ b/gpt4all-backend/scripts/convert_bert_hf_to_ggml.py
@ -0,0 +1,102 @@
+import sys
+import struct
+import json
+import torch
+import numpy as np
+
+from transformers import AutoModel, AutoTokenizer
+
+if len(sys.argv) < 3:
+    print("Usage: convert-h5-to-ggml.py dir-model [use-f32]\n")
+    print("  ftype == 0 -> float32")
+    print("  ftype == 1 -> float16")
+    sys.exit(1)
+
+# output in the same directory as the model
+dir_model = sys.argv[1]
+fname_out = sys.argv[1] + "/ggml-model.bin"
+
+with open(dir_model + "/tokenizer.json", "r", encoding="utf-8") as f:
+    encoder = json.load(f)
+
+with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
+    hparams = json.load(f)
+
+with open(dir_model + "/vocab.txt", "r", encoding="utf-8") as f:
+    vocab = f.readlines()
+# possible data types
+#   ftype == 0 -> float32
+#   ftype == 1 -> float16
+#
+# map from ftype to string
+ftype_str = ["f32", "f16"]
+
+ftype = 1
+if len(sys.argv) > 2:
+    ftype = int(sys.argv[2])
+    if ftype < 0 or ftype > 1:
+        print("Invalid ftype: " + str(ftype))
+        sys.exit(1)
+    fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
+
+
+tokenizer = AutoTokenizer.from_pretrained(dir_model)
+model = AutoModel.from_pretrained(dir_model, low_cpu_mem_usage=True)
+print (model)
+
+print(tokenizer.encode('I believe the meaning of life is'))
+
+list_vars = model.state_dict()
+for name in list_vars.keys():
+    print(name, list_vars[name].shape, list_vars[name].dtype)
+
+fout = open(fname_out, "wb")
+
+print(hparams)
+
+fout.write(struct.pack("i", 0x62657274)) # magic: ggml in hex
+fout.write(struct.pack("i", hparams["vocab_size"]))
+fout.write(struct.pack("i", hparams["max_position_embeddings"]))
+fout.write(struct.pack("i", hparams["hidden_size"]))
+fout.write(struct.pack("i", hparams["intermediate_size"]))
+fout.write(struct.pack("i", hparams["num_attention_heads"]))
+fout.write(struct.pack("i", hparams["num_hidden_layers"]))
+fout.write(struct.pack("i", ftype))
+
+for i in range(hparams["vocab_size"]):
+    text = vocab[i][:-1] # strips newline at the end
+    #print(f"{i}:{text}")
+    data = bytes(text, 'utf-8')
+    fout.write(struct.pack("i", len(data)))
+    fout.write(data)
+
+for name in list_vars.keys():
+    data = list_vars[name].squeeze().numpy()
+    if name in ['embeddings.position_ids', 'pooler.dense.weight', 'pooler.dense.bias']:
+        continue
+    print("Processing variable: " + name + " with shape: ", data.shape)
+
+    n_dims = len(data.shape);
+
+    # ftype == 0 -> float32, ftype == 1 -> float16
+    if ftype == 1 and name[-7:] == ".weight" and n_dims == 2:
+            print("  Converting to float16")
+            data = data.astype(np.float16)
+            l_type = 1
+    else:
+        l_type = 0
+
+    # header
+    str = name.encode('utf-8')
+    fout.write(struct.pack("iii", n_dims, len(str), l_type))
+    for i in range(n_dims):
+        fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
+    fout.write(str);
+
+    # data
+    data.tofile(fout)
+
+fout.close()
+
+print("Done. Output file: " + fname_out)
+print("")
--- a/gpt4all-bindings/python/docs/gpt4all_faq.md
+++ b/gpt4all-bindings/python/docs/gpt4all_faq.md
@ -2,11 +2,13 @@

 ## What models are supported by the GPT4All ecosystem?

-Currently, there are three different model architectures that are supported:
+Currently, there are five different model architectures that are supported:

-1. GPTJ - Based off of the GPT-J architecture with examples found [here](https://huggingface.co/EleutherAI/gpt-j-6b)
-2. LLAMA - Based off of the LLAMA architecture with examples found [here](https://huggingface.co/models?sort=downloads&search=llama)
+1. GPT-J - Based off of the GPT-J architecture with examples found [here](https://huggingface.co/EleutherAI/gpt-j-6b)
+2. LLaMA - Based off of the LLaMA architecture with examples found [here](https://huggingface.co/models?sort=downloads&search=llama)
 3. MPT - Based off of Mosaic ML's MPT architecture with examples found [here](https://huggingface.co/mosaicml/mpt-7b)
+4. Replit - Based off of Replit Inc.'s Replit architecture with examples found [here](https://huggingface.co/replit/replit-code-v1-3b)
+5. Falcon - Based off of TII's Falcon architecture with examples found [here](https://huggingface.co/tiiuae/falcon-40b)

 ## Why so many different architectures? What differentiates them?

@ -25,6 +27,10 @@ The upstream [llama.cpp](https://github.com/ggerganov/llama.cpp) project has int
 Fortunately, we have engineered a submoduling system allowing us to dynamically load different versions of the underlying library so that
 GPT4All just works.

+## What are the system requirements?
+
+Your CPU needs to support [AVX or AVX2 instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) and you need enough RAM to load a model into memory.
+
 ## What about GPU inference?

 In newer versions of llama.cpp, there has been some added support for NVIDIA GPU's for inference. We're investigating how to incorporate this into our downloadable installers.
--- a/gpt4all-bindings/python/docs/gpt4all_python.md
+++ b/gpt4all-bindings/python/docs/gpt4all_python.md
@ -1,8 +1,7 @@
-# GPT4All Python API
+# GPT4All Python Generation API
 The `GPT4All` python package provides bindings to our C/C++ model backend libraries.
 The source code and local build instructions can be found [here](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python).

-
 ## Quickstart

 ```bash
@ -109,5 +108,5 @@ with model.chat_session():
    print(model.current_chat_session)
 ```

-
+### API documentation
 ::: gpt4all.gpt4all.GPT4All
--- a/gpt4all-bindings/python/docs/gpt4all_python_embedding.md
+++ b/gpt4all-bindings/python/docs/gpt4all_python_embedding.md
@ -0,0 +1,35 @@
+# Embeddings
+GPT4All supports generating high quality embeddings of arbitrary length documents of text using a CPU optimized contrastively trained [Sentence Transformer](https://www.sbert.net/). These embeddings are comparable in quality for many tasks with OpenAI.
+
+## Quickstart
+
+```bash
+pip install gpt4all
+```
+
+### Generating embeddings
+The embedding model will automatically be downloaded if not installed.
+
+=== "Embed4All Example"
+    ``` py
+    from gpt4all import GPT4All, Embed4All
+    text = 'The quick brown fox jumps over the lazy dog'
+    embedder = Embed4All()
+    output = embedder.embed(text)
+    print(output)
+    ```
+=== "Output"
+    ```
+    [0.034696947783231735, -0.07192722707986832, 0.06923297047615051, ...]
+    ```
+### Speed of embedding generation
+The following table lists the generation speed for text document captured on an Intel i913900HX CPU with DDR5 5600 running with 8 threads under stable load.
+
+| Tokens          | 128  | 512  | 2048 | 8129 | 16,384 |
+| --------------- | ---- | ---- | ---- | ---- | ---- |
+| Wall time (s)   | .02  | .08  | .24  | .96  | 1.9  |
+| Tokens / Second | 6508 | 6431 | 8622 | 8509 | 8369 |
+
+
+### API documentation
+::: gpt4all.gpt4all.Embed4All
--- a/gpt4all-bindings/python/gpt4all/init.py
+++ b/gpt4all-bindings/python/gpt4all/init.py
@ -1,2 +1,2 @@
-from .gpt4all import GPT4All  # noqa
+from .gpt4all import GPT4All, Embed4All  # noqa
 from .pyllmodel import LLModel  # noqa
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@ -15,6 +15,36 @@ from . import pyllmodel
 # TODO: move to config
 DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")

+class Embed4All:
+    """
+    Python class that handles embeddings for GPT4All.
+    """
+    def __init__(
+        self,
+        n_threads: Optional[int] = None,
+    ):
+        """
+        Constructor
+
+        Args:
+            n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
+        """
+        self.gpt4all = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin', n_threads=n_threads)
+
+    def embed(
+        self,
+        text: str
+    ) -> list[float]:
+        """
+        Generate an embedding.
+
+        Args:
+            text: The text document to generate an embedding for.
+
+        Returns:
+            An embedding of your document of text.
+        """
+        return self.gpt4all.model.generate_embedding(text)

 class GPT4All:
    """
@ -39,7 +69,7 @@ class GPT4All:
            model_type: Model architecture. This argument currently does not have any functionality and is just used as
                descriptive identifier for user. Default is None.
            allow_download: Allow API to download models from gpt4all.io. Default is True.
-            n_threads: number of CPU threads used by GPT4All. Default is None, than the number of threads are determined automatically.
+            n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
        """
        self.model_type = model_type
        self.model = pyllmodel.LLModel()
--- a/gpt4all-bindings/python/gpt4all/pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/pyllmodel.py
@ -112,6 +112,19 @@ llmodel.llmodel_prompt.argtypes = [

 llmodel.llmodel_prompt.restype = None

+llmodel.llmodel_embedding.argtypes = [
+    ctypes.c_void_p,
+    ctypes.c_char_p,
+    ctypes.POINTER(ctypes.c_size_t),
+]
+
+llmodel.llmodel_embedding.restype = ctypes.POINTER(ctypes.c_float)
+
+llmodel.llmodel_free_embedding.argtypes = [
+    ctypes.POINTER(ctypes.c_float)
+]
+llmodel.llmodel_free_embedding.restype = None
+
 llmodel.llmodel_setThreadCount.argtypes = [ctypes.c_void_p, ctypes.c_int32]
 llmodel.llmodel_setThreadCount.restype = None

@ -141,10 +154,11 @@ class LLModel:
        self.model = None
        self.model_name = None
        self.context = None
+        self.llmodel_lib = llmodel

    def __del__(self):
        if self.model is not None:
-            llmodel.llmodel_model_destroy(self.model)
+            self.llmodel_lib.llmodel_model_destroy(self.model)

    def memory_needed(self, model_path: str) -> int:
        model_path_enc = model_path.encode("utf-8")
@ -233,6 +247,17 @@ class LLModel:
        self.context.repeat_last_n = repeat_last_n
        self.context.context_erase = context_erase

+    def generate_embedding(
+        self,
+        text: str
+    ) -> list[float]:
+        embedding_size = ctypes.c_size_t()
+        c_text = ctypes.c_char_p(text.encode('utf-8'))
+        embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
+        embedding_array = [embedding_ptr[i] for i in range(embedding_size.value)]
+        llmodel.llmodel_free_embedding(embedding_ptr)
+        return list(embedding_array)
+
    def prompt_model(
        self,
        prompt: str,
--- a/gpt4all-bindings/python/gpt4all/tests/test_embed_timings.py
+++ b/gpt4all-bindings/python/gpt4all/tests/test_embed_timings.py
@ -0,0 +1,18 @@
+import sys
+from io import StringIO
+
+from gpt4all import GPT4All, Embed4All
+import time
+
+def time_embedding(i, embedder):
+    text = 'foo bar ' * i
+    start_time = time.time()
+    output = embedder.embed(text)
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"Time report: {2 * i / elapsed_time} tokens/second with {2 * i} tokens taking {elapsed_time} seconds")
+
+if __name__ == "__main__":
+    embedder = Embed4All(n_threads=8)
+    for i in [2**n for n in range(6, 14)]:
+        time_embedding(i, embedder)
--- a/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
@ -1,8 +1,8 @@
 import sys
 from io import StringIO

-from gpt4all import GPT4All
-
+from gpt4all import GPT4All, Embed4All
+import time

 def test_inference():
    model = GPT4All(model_name='orca-mini-3b.ggmlv3.q4_0.bin')
@ -99,3 +99,11 @@ def test_inference_mpt():
    output = model.generate(prompt)
    assert isinstance(output, str)
    assert len(output) > 0
+
+def test_embedding():
+    text = 'The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox'
+    embedder = Embed4All()
+    output = embedder.embed(text)
+    #for i, value in enumerate(output):
+        #print(f'Value at index {i}: {value}')
+    assert len(output) == 384
--- a/gpt4all-bindings/python/mkdocs.yml
+++ b/gpt4all-bindings/python/mkdocs.yml
@ -10,7 +10,9 @@ use_directory_urls: false
 nav:
    - 'index.md'
    - 'Bindings':
-      - 'GPT4All in Python': 'gpt4all_python.md'
+      - 'GPT4All in Python':
+        - 'Generation': 'gpt4all_python.md'
+        - 'Embedding': 'gpt4all_python_embedding.md'
      - 'GPT4ALL in NodeJs': 'gpt4all_typescript.md'
      - 'GPT4All Chat Client': 'gpt4all_chat.md'
      - 'gpt4all_cli.md'
--- a/gpt4all-bindings/python/setup.py
+++ b/gpt4all-bindings/python/setup.py
@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECtORY,

 setup(
    name=package_name,
-    version="1.0.3",
+    version="1.0.6",
    description="Python bindings for GPT4All",
    author="Richard Guo",
    author_email="richard@nomic.ai",
--- a/gpt4all-bindings/typescript/README.md
+++ b/gpt4all-bindings/typescript/README.md
@ -53,7 +53,7 @@ const response = await createCompletion(ll, [
 *   (win) msvc version 143
    *   Can be obtained with visual studio 2022 build tools

-### Build
+### Build (from source)

 ```sh
 git clone https://github.com/nomic-ai/gpt4all.git
@ -138,7 +138,7 @@ This package is in active development, and breaking changes may happen until the
 *   \[ ] createTokenStream, an async iterator that streams each token emitted from the model. Planning on following this [example](https://github.com/nodejs/node-addon-examples/tree/main/threadsafe-async-iterator)
 *   \[ ] proper unit testing (integrate with circle ci)
 *   \[ ] publish to npm under alpha tag `gpt4all@alpha`
-*   \[ ] have more people test on other platforms (mac tester needed)
+*   \[x] have more people test on other platforms (mac tester needed)
 *   \[x] switch to new pluggable backend

 ### Documentation
--- a/gpt4all-bindings/typescript/binding.gyp
+++ b/gpt4all-bindings/typescript/binding.gyp
@ -53,7 +53,7 @@
                '-fno-rtti',
            ],
            'cflags_cc': [
-                '-std=c++20'
+                '-std=c++2a'
            ]
        }]
      ]
--- a/gpt4all-bindings/typescript/index.cc
+++ b/gpt4all-bindings/typescript/index.cc
@ -10,6 +10,7 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
       InstanceMethod("stateSize", &NodeModelWrapper::StateSize),
       InstanceMethod("raw_prompt", &NodeModelWrapper::Prompt),
       InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
+       InstanceMethod("embed", &NodeModelWrapper::GenerateEmbedding),
       InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount),
       InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
    });
@ -91,6 +92,23 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
    return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference())));
  }
  
+  Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo& info) {
+    auto env = info.Env();
+    std::string text = info[0].As<Napi::String>().Utf8Value();
+    size_t embedding_size = 0;
+    float* arr = llmodel_embedding(GetInference(), text.c_str(), &embedding_size);
+    auto arr_size = sizeof(arr) / sizeof(float);
+    Napi::Float32Array js_array = Napi::Float32Array::New(info.Env(), arr_size);
+    
+    for (size_t i = 0; i < arr_size; ++i) {
+        float element = *(arr + i);
+        js_array[i] = element;
+    }
+
+    llmodel_free_embedding(arr);
+
+    return js_array;
+  }

 /**
 * Generate a response using the model.
--- a/gpt4all-bindings/typescript/index.h
+++ b/gpt4all-bindings/typescript/index.h
@ -23,6 +23,7 @@ public:
  void SetThreadCount(const Napi::CallbackInfo& info);
  Napi::Value getName(const Napi::CallbackInfo& info);
  Napi::Value ThreadCount(const Napi::CallbackInfo& info);
+  Napi::Value GenerateEmbedding(const Napi::CallbackInfo& info);
  /*
   * The path that is used to search for the dynamic libraries
   */
--- a/gpt4all-bindings/typescript/package.json
+++ b/gpt4all-bindings/typescript/package.json
@ -1,6 +1,6 @@
 {
  "name": "gpt4all",
-  "version": "2.0.0",
+  "version": "2.0.0rc",
  "packageManager": "yarn@3.5.1",
  "main": "src/gpt4all.js",
  "repository": "nomic-ai/gpt4all",
--- a/gpt4all-bindings/typescript/scripts/prebuild.js
+++ b/gpt4all-bindings/typescript/scripts/prebuild.js
@ -6,7 +6,7 @@ async function createPrebuilds(combinations) {
            platform,
            arch,
            napi: true,
-            targets: ["18.15.0"]
+            targets: ["18.16.0"]
        };
        try {
            await createPrebuild(opts);
--- a/gpt4all-bindings/typescript/yarn.lock
+++ b/gpt4all-bindings/typescript/yarn.lock
--- a/gpt4all-chat/CMakeLists.txt
+++ b/gpt4all-chat/CMakeLists.txt
@ -1,5 +1,6 @@
 cmake_minimum_required(VERSION 3.16)

+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)

@ -17,7 +18,7 @@ endif()

 set(APP_VERSION_MAJOR 2)
 set(APP_VERSION_MINOR 4)
-set(APP_VERSION_PATCH 13)
+set(APP_VERSION_PATCH 14)
 set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")

 # Include the binary directory for the generated header file
@ -205,6 +206,8 @@ install(TARGETS replit-mainline-default DESTINATION lib COMPONENT ${COMPONENT_NA
 if(APPLE)
 install(TARGETS replit-mainline-metal DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
 endif()
+install(TARGETS bert-avxonly DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})
+install(TARGETS bert-default DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN})

 set(CPACK_GENERATOR "IFW")
 set(CPACK_VERBATIM_VARIABLES YES)
--- a/gpt4all-chat/README.md
+++ b/gpt4all-chat/README.md
@ -51,19 +51,7 @@ One click installers for macOS, Linux, and Windows at https://gpt4all.io
 If you've already checked out the source code and/or built the program make sure when you do a git fetch to get the latest changes and that you also do ```git submodule update --init --recursive``` to update the submodules.

 ## Manual download of models
-* https://gpt4all.io/models/ggml-mpt-7b-chat.bin (default) (md5sum 756249d3d6abe23bde3b1ae272628640) Current best non-commercially licensable chat model based on MPT and trained by Mosaic ML.
-* https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin (default) (md5sum 81a09a0ddf89690372fc296ff7f625af) Current best commercially licensable model based on GPT-J and trained by Nomic AI on the latest curated GPT4All dataset.
-* https://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin (md5sum 91f886b68fbce697e9a3cd501951e455) Current best non-commercially licensable model based on Llama 13b and trained by Nomic AI on the latest curated GPT4All dataset.
-* https://gpt4all.io/models/ggml-gpt4all-j-v1.2-jazzy.bin (md5sum 879344aaa9d62fdccbda0be7a09e7976) A commercially licensable model based on GPT-J and trained by Nomic AI on the v2 GPT4All dataset.
-* https://gpt4all.io/models/ggml-gpt4all-j-v1.1-breezy.bin (md5sum 61d48a82cb188cceb14ebb8082bfec37) A commercially licensable model based on GPT-J and trained by Nomic AI on the v1 GPT4All dataset.
-* https://gpt4all.io/models/ggml-gpt4all-j.bin (md5sum 5b5a3f9b858d33b29b52b89692415595) A commercially licensable model based on GPT-J and trained by Nomic AI on the v0 GPT4All dataset.
-* https://gpt4all.io/models/ggml-vicuna-7b-1.1-q4_2.bin (md5sum 29119f8fa11712704c6b22ac5ab792ea) An non-commercially licensable model based on Llama 7b and trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
-* https://gpt4all.io/models/ggml-vicuna-13b-1.1-q4_2.bin (md5sum 95999b7b0699e2070af63bf5d34101a8) An non-commercially licensable model based on Llama 13b and trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego.
-* https://gpt4all.io/models/ggml-wizardLM-7B.q4_2.bin (md5sum 99e6d129745a3f1fb1121abed747b05a) An non-commercially licensable model based on Llama 7b and trained by Microsoft and Peking University.
-* https://gpt4all.io/models/ggml-stable-vicuna-13B.q4_2.bin (md5sum 6cb4ee297537c9133bddab9692879de0) An non-commercially licensable model based on Llama 13b and RLHF trained by Stable AI.
-* https://gpt4all.io/models/ggml-mpt-7b-base.bin (md5sum 120c32a51d020066288df045ef5d52b9) A commercially licensable model base pre-trained by Mosaic ML.
-* https://gpt4all.io/models/ggml-nous-gpt4-vicuna-13b.bin (md5sum d5eafd5b0bd0d615cfd5fd763f642dfe) A non-commercially licensable model based on Vicuna 13b, fine-tuned on ~180,000 instructions, trained by Nous Research.
-* https://gpt4all.io/models/ggml-mpt-7b-instruct.bin (md5sum 1cfa4958f489f0a0d1ffdf6b37322809) A commercially licensable instruct model based on MPT and trained by Mosaic ML.
+* You can find a 'Model Explorer' on the official website where you can manually download models that we support: https://gpt4all.io/index.html

 ## Terminal Only Interface with no Qt dependency

--- a/gpt4all-chat/chatgpt.cpp
+++ b/gpt4all-chat/chatgpt.cpp
@ -155,7 +155,7 @@ void ChatGPTWorker::request(const QString &apiKey,
    m_ctx = promptCtx;

    QUrl openaiUrl("https://api.openai.com/v1/chat/completions");
-    const QString authorization = QString("Bearer %1").arg(apiKey);
+    const QString authorization = QString("Bearer %1").arg(apiKey).trimmed();
    QNetworkRequest request(openaiUrl);
    request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json");
    request.setRawHeader("Authorization", authorization.toUtf8());
@ -244,7 +244,7 @@ void ChatGPTWorker::handleReadyRead()
 void ChatGPTWorker::handleErrorOccurred(QNetworkReply::NetworkError code)
 {
    QNetworkReply *reply = qobject_cast<QNetworkReply *>(sender());
-    if (!reply) {
+    if (!reply || reply->error() == QNetworkReply::OperationCanceledError /*when we call abort on purpose*/) {
        emit finished();
        return;
    }
--- a/gpt4all-chat/chatgpt.h
+++ b/gpt4all-chat/chatgpt.h
@ -46,6 +46,8 @@ public:
    ChatGPT();
    virtual ~ChatGPT();

+    bool supportsEmbedding() const override { return false; }
+    bool supportsCompletion() const override { return true; }
    bool loadModel(const std::string &modelPath) override;
    bool isModelLoaded() const override;
    size_t requiredMem(const std::string &modelPath) override;
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@ -14,6 +14,7 @@
 #define REPLIT_INTERNAL_STATE_VERSION 0
 #define LLAMA_INTERNAL_STATE_VERSION 0
 #define FALCON_INTERNAL_STATE_VERSION 0
+#define BERT_INTERNAL_STATE_VERSION 0

 class LLModelStore {
 public:
@ -240,11 +241,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)

 #if defined(Q_OS_MAC) && defined(__arm__)
            if (m_forceMetal)
-                m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "metal");
+                m_llModelInfo.model = LLMImplementation::construct(filePath.toStdString(), "metal");
            else
-                m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "auto");
+                m_llModelInfo.model = LLMImplementation::construct(filePath.toStdString(), "auto");
 #else
-            m_llModelInfo.model = LLModel::construct(filePath.toStdString(), "auto");
+            m_llModelInfo.model = LLModel::Implementation::construct(filePath.toStdString(), "auto");
 #endif

            if (m_llModelInfo.model) {
@ -258,12 +259,13 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                    m_llModelInfo = LLModelInfo();
                    emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
                } else {
-                    switch (m_llModelInfo.model->implementation().modelType[0]) {
+                    switch (m_llModelInfo.model->implementation().modelType()[0]) {
                    case 'L': m_llModelType = LLModelType::LLAMA_; break;
                    case 'G': m_llModelType = LLModelType::GPTJ_; break;
                    case 'M': m_llModelType = LLModelType::MPT_; break;
                    case 'R': m_llModelType = LLModelType::REPLIT_; break;
                    case 'F': m_llModelType = LLModelType::FALCON_; break;
+                    case 'B': m_llModelType = LLModelType::BERT_; break;
                    default:
                        {
                            delete std::exchange(m_llModelInfo.model, nullptr);
@ -628,8 +630,8 @@ bool ChatLLM::handleNameRecalculate(bool isRecalc)
    qDebug() << "name recalc" << m_llmThread.objectName() << isRecalc;
 #endif
    Q_UNUSED(isRecalc);
-    Q_UNREACHABLE();
-    return false;
+    qt_noop();
+    return true;
 }

 bool ChatLLM::handleSystemPrompt(int32_t token)
@ -669,7 +671,8 @@ bool ChatLLM::serialize(QDataStream &stream, int version)
        case MPT_: stream << MPT_INTERNAL_STATE_VERSION; break;
        case GPTJ_: stream << GPTJ_INTERNAL_STATE_VERSION; break;
        case LLAMA_: stream << LLAMA_INTERNAL_STATE_VERSION; break;
-        case FALCON_: stream << LLAMA_INTERNAL_STATE_VERSION; break;
+        case FALCON_: stream << FALCON_INTERNAL_STATE_VERSION; break;
+        case BERT_: stream << BERT_INTERNAL_STATE_VERSION; break;
        default: Q_UNREACHABLE();
        }
    }
@ -788,13 +791,18 @@ void ChatLLM::processSystemPrompt()
    if (!isModelLoaded() || m_processedSystemPrompt || m_isServer)
        return;

+    const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
+    if (QString::fromStdString(systemPrompt).trimmed().isEmpty()) {
+        m_processedSystemPrompt = true;
+        return;
+    }
+
    m_stopGenerating = false;
    auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1);
    auto responseFunc = std::bind(&ChatLLM::handleSystemResponse, this, std::placeholders::_1,
        std::placeholders::_2);
    auto recalcFunc = std::bind(&ChatLLM::handleSystemRecalculate, this, std::placeholders::_1);

-    const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
    const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo);
    const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo);
    const float top_p = MySettings::globalInstance()->modelTopP(m_modelInfo);
--- a/gpt4all-chat/chatllm.h
+++ b/gpt4all-chat/chatllm.h
@ -16,6 +16,7 @@ enum LLModelType {
    CHATGPT_,
    REPLIT_,
    FALCON_,
+    BERT_
 };

 struct LLModelInfo {
--- a/gpt4all-chat/cmake/deploy-qt-mac.cmake.in
+++ b/gpt4all-chat/cmake/deploy-qt-mac.cmake.in
@ -7,16 +7,19 @@ file(GLOB MYMPTLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NA
 file(GLOB MYLLAMALIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama*)
 file(GLOB MYREPLITLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libreplit*)
 file(GLOB MYFALCONLLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libfalcon*)
+file(GLOB MYBERTLLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libbert*)
 file(GLOB MYLLMODELLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.*)
 file(COPY ${MYGPTJLIBS}
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
 file(COPY ${MYMPTLIBS}
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
+file(COPY ${MYLLAMALIBS}
+     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
 file(COPY ${MYREPLITLIBS}
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
 file(COPY ${MYFALCONLLIBS}
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
-file(COPY ${MYLLAMALIBS}
+file(COPY ${MYBERTLLIBS}
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
 file(COPY ${MYLLMODELLIBS}
     DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
--- a/gpt4all-chat/llm.cpp
+++ b/gpt4all-chat/llm.cpp
@ -8,6 +8,7 @@
 #include <QFile>
 #include <QProcess>
 #include <QResource>
+#include <QSettings>
 #include <fstream>

 class MyLLM: public LLM { };
@ -33,7 +34,7 @@ LLM::LLM()
    if (directoryExists(frameworksDir))
        llmodelSearchPaths += ";" + frameworksDir;
 #endif
-    LLModel::setImplementationsSearchPath(llmodelSearchPaths.toStdString());
+    LLModel::Implementation::setImplementationsSearchPath(llmodelSearchPaths.toStdString());

 #if defined(__x86_64__)
    #ifndef _MSC_VER
@ -48,7 +49,13 @@ LLM::LLM()
 #endif

    m_compatHardware = minimal;
-    emit compatHardwareChanged();
+}
+
+bool LLM::hasSettingsAccess() const
+{
+    QSettings settings;
+    settings.sync();
+    return settings.status() == QSettings::NoError;
 }

 bool LLM::checkForUpdates() const
--- a/gpt4all-chat/llm.h
+++ b/gpt4all-chat/llm.h
@ -6,12 +6,11 @@
 class LLM : public QObject
 {
    Q_OBJECT
-    Q_PROPERTY(bool compatHardware READ compatHardware NOTIFY compatHardwareChanged)
-
 public:
    static LLM *globalInstance();

-    bool compatHardware() const { return m_compatHardware; }
+    Q_INVOKABLE bool hasSettingsAccess() const;
+    Q_INVOKABLE bool compatHardware() const { return m_compatHardware; }

    Q_INVOKABLE bool checkForUpdates() const;
    Q_INVOKABLE bool directoryExists(const QString &path) const;
@ -22,7 +21,6 @@ public:
 Q_SIGNALS:
    void chatListModelChanged();
    void modelListChanged();
-    void compatHardwareChanged();

 private:
    bool m_compatHardware;
--- a/gpt4all-chat/main.qml
+++ b/gpt4all-chat/main.qml
@ -89,14 +89,22 @@ Window {

    property bool hasShownModelDownload: false
    property bool hasShownFirstStart: false
+    property bool hasShownSettingsAccess: false

    function startupDialogs() {
-        if (!LLM.compatHardware) {
+        if (!LLM.compatHardware()) {
            Network.sendNonCompatHardware();
            errorCompatHardware.open();
            return;
        }

+        // check if we have access to settings and if not show an error
+        if (!hasShownSettingsAccess && !LLM.hasSettingsAccess()) {
+            errorSettingsAccess.open();
+            hasShownSettingsAccess = true;
+            return;
+        }
+
        // check for first time start of this version
        if (!hasShownFirstStart && Download.isFirstStart()) {
            firstStartDialog.open();
@ -135,6 +143,20 @@ Window {
            + qsTr("https://en.wikipedia.org/wiki/Advanced_Vector_Extensions</a>")
    }

+    PopupDialog {
+        id: errorSettingsAccess
+        anchors.centerIn: parent
+        shouldTimeOut: false
+        shouldShowBusy: false
+        modal: true
+        text: qsTr("<h3>Encountered an error starting up:</h3><br>")
+            + qsTr("<i>\"Inability to access settings file.\"</i>")
+            + qsTr("<br><br>Unfortunately, something is preventing the program from accessing ")
+            + qsTr("the settings file. This could be caused by incorrect permissions in the local ")
+            + qsTr("app config directory where the settings file is located. ")
+            + qsTr("Check out our <a href=\"https://discord.gg/4M2QFmTt2k\">discord channel</a> for help.")
+    }
+
    StartupDialog {
        id: firstStartDialog
        anchors.centerIn: parent
--- a/gpt4all-chat/metadata/models.json
+++ b/gpt4all-chat/metadata/models.json
@ -1,18 +1,16 @@
 [
  {
    "order": "a",
-    "md5sum": "4acc146dd43eb02845c233c29289c7c5",
-    "name": "Hermes",
-    "filename": "nous-hermes-13b.ggmlv3.q4_0.bin",
-    "filesize": "8136777088",
-    "requires": "2.4.7",
+    "md5sum": "e8d47924f433bd561cb5244557147793",
+    "name": "Wizard v1.1",
+    "filename": "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin",
+    "filesize": "7323310848",
    "ramrequired": "16",
    "parameters": "13 billion",
    "quant": "q4_0",
    "type": "LLaMA",
-    "description": "<strong>Best overall model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
-    "url": "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin",
-    "promptTemplate": "### Instruction:\n%1\n### Response:\n"
+    "systemPrompt": " ",
+    "description": "<strong>Best overall model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul"
  },
  {
    "order": "b",
@ -25,12 +23,29 @@
    "parameters": "7 billion",
    "quant": "q4_0",
    "type": "Falcon",
+    "systemPrompt": " ",
    "description": "<strong>Best overall smaller model</strong><br><ul><li>Fast responses</li><li>Instruction based</li><li>Trained by TII<li>Finetuned by Nomic AI<li>Licensed for commercial use</ul>",
    "url": "https://huggingface.co/nomic-ai/gpt4all-falcon-ggml/resolve/main/ggml-model-gpt4all-falcon-q4_0.bin",
    "promptTemplate": "### Instruction:\n%1\n### Response:\n"
  },
  {
    "order": "c",
+    "md5sum": "4acc146dd43eb02845c233c29289c7c5",
+    "name": "Hermes",
+    "filename": "nous-hermes-13b.ggmlv3.q4_0.bin",
+    "filesize": "8136777088",
+    "requires": "2.4.7",
+    "ramrequired": "16",
+    "parameters": "13 billion",
+    "quant": "q4_0",
+    "type": "LLaMA",
+    "systemPrompt": " ",
+    "description": "<strong>Extremely good model</strong><br><ul><li>Instruction based<li>Gives long responses<li>Curated with 300,000 uncensored instructions<li>Trained by Nous Research<li>Cannot be used commercially</ul>",
+    "url": "https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_0.bin",
+    "promptTemplate": "### Instruction:\n%1\n### Response:\n"
+  },
+  {
+    "order": "e",
    "md5sum": "81a09a0ddf89690372fc296ff7f625af",
    "name": "Groovy",
    "filename": "ggml-gpt4all-j-v1.3-groovy.bin",
@ -39,10 +54,11 @@
    "parameters": "7 billion",
    "quant": "q4_0",
    "type": "GPT-J",
+    "systemPrompt": " ",
    "description": "<strong>Creative model can be used for commercial purposes</strong><br><ul><li>Fast responses<li>Creative responses</li><li>Instruction based</li><li>Trained by Nomic AI<li>Licensed for commercial use</ul>"
  },
  {
-    "order": "e",
+    "order": "f",
    "md5sum": "11d9f060ca24575a2c303bdc39952486",
    "name": "Snoozy",
    "filename": "GPT4All-13B-snoozy.ggmlv3.q4_0.bin",
@ -52,11 +68,12 @@
    "parameters": "13 billion",
    "quant": "q4_0",
    "type": "LLaMA",
+    "systemPrompt": " ",
    "description": "<strong>Very good overall model</strong><br><ul><li>Instruction based<li>Based on the same dataset as Groovy<li>Slower than Groovy, with higher quality responses<li>Trained by Nomic AI<li>Cannot be used commercially</ul>",
    "url": "https://huggingface.co/TheBloke/GPT4All-13B-snoozy-GGML/resolve/main/GPT4All-13B-snoozy.ggmlv3.q4_0.bin"
  },
  {
-    "order": "f",
+    "order": "g",
    "md5sum": "756249d3d6abe23bde3b1ae272628640",
    "name": "MPT Chat",
    "filename": "ggml-mpt-7b-chat.bin",
@ -71,9 +88,9 @@
    "systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"
  },
  {
-    "order": "g",
+    "order": "h",
    "md5sum": "e64e74375ce9d36a3d0af3db1523fd0a",
-    "name": "Orca",
+    "name": "Mini Orca",
    "filename": "orca-mini-7b.ggmlv3.q4_0.bin",
    "filesize": "3791749248",
    "requires": "2.4.7",
@ -87,9 +104,9 @@
    "systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
  },
  {
-    "order": "h",
+    "order": "i",
    "md5sum": "6a087f7f4598fad0bb70e6cb4023645e",
-    "name": "Orca (Small)",
+    "name": "Mini Orca (Small)",
    "filename": "orca-mini-3b.ggmlv3.q4_0.bin",
    "filesize": "1928446208",
    "requires": "2.4.7",
@ -103,9 +120,9 @@
    "systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
  },
  {
-    "order": "i",
+    "order": "j",
    "md5sum": "959b7f65b2d12fd1e3ff99e7493c7a3a",
-    "name": "Orca (Large)",
+    "name": "Mini Orca (Large)",
    "filename": "orca-mini-13b.ggmlv3.q4_0.bin",
    "filesize": "7323329152",
    "requires": "2.4.7",
@ -119,7 +136,7 @@
    "systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
  },
  {
-    "order": "j",
+    "order": "k",
    "md5sum": "29119f8fa11712704c6b22ac5ab792ea",
    "name": "Vicuna",
    "filename": "ggml-vicuna-7b-1.1-q4_2.bin",
@ -128,10 +145,11 @@
    "parameters": "7 billion",
    "quant": "q4_2",
    "type": "LLaMA",
+    "systemPrompt": " ",
    "description": "<strong>Good small model - trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
  },
  {
-    "order": "k",
+    "order": "l",
    "md5sum": "95999b7b0699e2070af63bf5d34101a8",
    "name": "Vicuna (large)",
    "filename": "ggml-vicuna-13b-1.1-q4_2.bin",
@ -140,10 +158,11 @@
    "parameters": "13 billion",
    "quant": "q4_2",
    "type": "LLaMA",
+    "systemPrompt": " ",
    "description": "<strong>Good larger model - trained by teams from UC Berkeley, CMU, Stanford, MBZUAI, and UC San Diego</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
  },
  {
-    "order": "l",
+    "order": "m",
    "md5sum": "99e6d129745a3f1fb1121abed747b05a",
    "name": "Wizard",
    "filename": "ggml-wizardLM-7B.q4_2.bin",
@ -152,10 +171,11 @@
    "parameters": "7 billion",
    "quant": "q4_2",
    "type": "LLaMA",
+    "systemPrompt": " ",
    "description": "<strong>Good small model - trained by by Microsoft and Peking University</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>"
  },
  {
-    "order": "m",
+    "order": "n",
    "md5sum": "6cb4ee297537c9133bddab9692879de0",
    "name": "Stable Vicuna",
    "filename": "ggml-stable-vicuna-13B.q4_2.bin",
@ -168,7 +188,7 @@
    "systemPrompt": "## Assistant: I am StableVicuna, a large language model created by CarperAI. I am here to chat!\n\n"
  },
  {
-    "order": "n",
+    "order": "o",
    "md5sum": "1cfa4958f489f0a0d1ffdf6b37322809",
    "name": "MPT Instruct",
    "filename": "ggml-mpt-7b-instruct.bin",
@ -178,10 +198,11 @@
    "parameters": "7 billion",
    "quant": "q4_0",
    "type": "MPT",
+    "systemPrompt": " ",
    "description": "<strong>Mosaic's instruction model</strong><br><ul><li>Instruction based<li>Trained by Mosaic ML<li>Licensed for commercial use</ul>"
  },
  {
-    "order": "o",
+    "order": "p",
    "md5sum": "120c32a51d020066288df045ef5d52b9",
    "name": "MPT Base",
    "filename": "ggml-mpt-7b-base.bin",
@ -191,10 +212,11 @@
    "parameters": "7 billion",
    "quant": "q4_0",
    "type": "MPT",
+    "systemPrompt": " ",
    "description": "<strong>Trained for text completion with no assistant finetuning</strong><br><ul><li>Completion based<li>Trained by Mosaic ML<li>Licensed for commercial use</ul>"
  },
  {
-    "order": "p",
+    "order": "q",
    "md5sum": "d5eafd5b0bd0d615cfd5fd763f642dfe",
    "name": "Nous Vicuna",
    "filename": "ggml-nous-gpt4-vicuna-13b.bin",
@ -203,10 +225,11 @@
    "parameters": "13 billion",
    "quant": "q4_0",
    "type": "LLaMA",
+    "systemPrompt": " ",
    "description": "<strong>Trained on ~180,000 instructions</strong><br><ul><li>Instruction based<li>Trained by Nous Research<li>Cannot be used commercially</ul>"
  },
  {
-    "order": "q",
+    "order": "r",
    "md5sum": "489d21fd48840dcb31e5f92f453f3a20",
    "name": "Wizard Uncensored",
    "filename": "wizardLM-13B-Uncensored.ggmlv3.q4_0.bin",
@ -216,11 +239,12 @@
    "parameters": "13 billion",
    "quant": "q4_0",
    "type": "LLaMA",
+    "systemPrompt": " ",
    "description": "<strong>Trained on uncensored assistant data and instruction data</strong><br><ul><li>Instruction based<li>Cannot be used commercially</ul>",
    "url": "https://huggingface.co/TheBloke/WizardLM-13B-Uncensored-GGML/resolve/main/wizardLM-13B-Uncensored.ggmlv3.q4_0.bin"
  },
  {
-    "order": "r",
+    "order": "s",
    "md5sum": "615890cb571fcaa0f70b2f8d15ef809e",
    "disableGUI": "true",
    "name": "Replit",
@ -231,7 +255,23 @@
    "parameters": "3 billion",
    "quant": "f16",
    "type": "Replit",
+    "systemPrompt": " ",
    "description": "<strong>Trained on subset of the Stack</strong><br><ul><li>Code completion based<li>Licensed for commercial use</ul>",
    "url": "https://huggingface.co/nomic-ai/ggml-replit-code-v1-3b/resolve/main/ggml-replit-code-v1-3b.bin"
+  },
+  {
+    "order": "t",
+    "md5sum": "031bb5d5722c08d13e3e8eaf55c37391",
+    "disableGUI": "true",
+    "name": "Bert",
+    "filename": "ggml-all-MiniLM-L6-v2-f16.bin",
+    "filesize": "45521167",
+    "requires": "2.4.14",
+    "ramrequired": "1",
+    "parameters": "1 million",
+    "quant": "f16",
+    "type": "Bert",
+    "systemPrompt": " ",
+    "description": "<strong>Sbert</strong><br><ul><li>For embeddings"
  }
 ]
--- a/gpt4all-chat/metadata/release.json
+++ b/gpt4all-chat/metadata/release.json
@ -416,6 +416,40 @@
 * Akarshan Biswas
 * Adam Treat (Nomic AI)
 * Community (beta testers, bug reporters)
+"
+  },
+  {
+    "version": "2.4.13",
+    "notes":
+"
+* Fix bug with prolonging shutdown with generation
+* Fix bug with update model info on deleting chats
+* Fix bug with preventing closing of model download dialog
+* Allows allow closing the model download dialog
+* Fix numerous bugs with download of models.json and provide backup option
+* Add json and c# highlighting
+* Fix bug with chatgpt crashing
+* Fix bug with chatgpt not working for some keys
+* Fix bug with mixpanel opt outs not counting
+* Fix problem with OOM errors causing crash and then repeating on next start
+* Fix default thread setting and provide guardrails
+* Fix tap handler in settings dialog for buttons
+* Fix color of some text fields on macOS for settings dialog
+* Fix problem with startup dialog not closing
+* Provide error dialog for settings file not accessible
+* Try and fix problems with avx-only detection
+* Fix showing error in model downloads unnecessarily
+* Prefer 7b models to load by default
+* Add Wizard v1.1 to download list
+* Rename Orca models to Mini Orca
+* Don't use a system prompt unless model was trained with one by default
+",
+    "contributors":
+"
+* Lakshay Kansal (Nomic AI)
+* Aaron Miller (Nomic AI)
+* Adam Treat (Nomic AI)
+* Community (beta testers, bug reporters)
 "
  }
 ]
--- a/gpt4all-chat/modellist.cpp
+++ b/gpt4all-chat/modellist.cpp
@ -161,16 +161,6 @@ int InstalledModels::count() const
    return rowCount();
 }

-QString InstalledModels::firstId() const
-{
-    if (rowCount() > 0) {
-        QModelIndex firstIndex = index(0, 0);
-        return sourceModel()->data(firstIndex, ModelList::IdRole).toString();
-    } else {
-        return QString();
-    }
-}
-
 DownloadableModels::DownloadableModels(QObject *parent)
    : QSortFilterProxyModel(parent)
    , m_expanded(false)
@ -222,6 +212,7 @@ ModelList::ModelList()
    : QAbstractListModel(nullptr)
    , m_installedModels(new InstalledModels(this))
    , m_downloadableModels(new DownloadableModels(this))
+    , m_asyncModelRequestOngoing(false)
 {
    m_installedModels->setSourceModel(this);
    m_downloadableModels->setSourceModel(this);
@ -297,12 +288,9 @@ ModelInfo ModelList::defaultModelInfo() const
    settings.sync();

    // The user default model can be set by the user in the settings dialog. The "default" user
-    // default model is "Application default" which signals we should use the default model that was
-    // specified by the models.json file.
+    // default model is "Application default" which signals we should use the logic here.
    const QString userDefaultModelName = MySettings::globalInstance()->userDefaultModel();
    const bool hasUserDefaultName = !userDefaultModelName.isEmpty() && userDefaultModelName != "Application default";
-    const QString defaultModelName = settings.value("defaultModel").toString();
-    const bool hasDefaultName = hasUserDefaultName ? false : !defaultModelName.isEmpty();

    ModelInfo *defaultModel = nullptr;
    for (ModelInfo *info : m_models) {
@ -310,12 +298,10 @@ ModelInfo ModelList::defaultModelInfo() const
            continue;
        defaultModel = info;

-        // If we don't have either setting, then just use the first model that is installed
-        if (!hasUserDefaultName && !hasDefaultName)
-            break;
+        const size_t ramrequired = defaultModel->ramrequired;

-        // If we don't have a user specified default, but *do* have a default setting and match, then use it
-        if (!hasUserDefaultName && hasDefaultName && (defaultModel->id() == defaultModelName))
+        // If we don't have either setting, then just use the first model that requires less than 16GB that is installed
+        if (!hasUserDefaultName && !info->isChatGPT && ramrequired > 0 && ramrequired < 16)
            break;

        // If we have a user specified default and match, then use it
@ -835,7 +821,7 @@ void ModelList::updateModelsFromDirectory()
                    for (const QString &id : modelsById) {
                        updateData(id, FilenameRole, filename);
                        updateData(id, ChatGPTRole, filename.startsWith("chatgpt-"));
-                        updateData(id, DirpathRole, path);
+                        updateData(id, DirpathRole, info.dir().absolutePath() + "/");
                        updateData(id, FilesizeRole, toFileSize(info.size()));
                    }
                }
@ -846,14 +832,6 @@ void ModelList::updateModelsFromDirectory()
    processDirectory(exePath);
    if (localPath != exePath)
        processDirectory(localPath);
-
-    if (installedModels()->count()) {
-        const QString firstModel =
-            installedModels()->firstId();
-        QSettings settings;
-        settings.setValue("defaultModel", firstModel);
-        settings.sync();
-    }
 }

 void ModelList::updateModelsFromJson()
@ -899,6 +877,9 @@ void ModelList::updateModelsFromJson()

 void ModelList::updateModelsFromJsonAsync()
 {
+    m_asyncModelRequestOngoing = true;
+    emit asyncModelRequestOngoingChanged();
+
 #if defined(USE_LOCAL_MODELSJSON)
    QUrl jsonUrl("file://" + QDir::homePath() + "/dev/large_language_models/gpt4all/gpt4all-chat/metadata/models.json");
 #else
@ -911,17 +892,37 @@ void ModelList::updateModelsFromJsonAsync()
    QNetworkReply *jsonReply = m_networkManager.get(request);
    connect(qApp, &QCoreApplication::aboutToQuit, jsonReply, &QNetworkReply::abort);
    connect(jsonReply, &QNetworkReply::finished, this, &ModelList::handleModelsJsonDownloadFinished);
+    connect(jsonReply, &QNetworkReply::errorOccurred, this, &ModelList::handleModelsJsonDownloadErrorOccurred);
 }

 void ModelList::handleModelsJsonDownloadFinished()
 {
    QNetworkReply *jsonReply = qobject_cast<QNetworkReply *>(sender());
-    if (!jsonReply)
+    if (!jsonReply) {
+        m_asyncModelRequestOngoing = false;
+        emit asyncModelRequestOngoingChanged();
        return;
+    }

    QByteArray jsonData = jsonReply->readAll();
    jsonReply->deleteLater();
    parseModelsJsonFile(jsonData, true);
+    m_asyncModelRequestOngoing = false;
+    emit asyncModelRequestOngoingChanged();
+}
+
+void ModelList::handleModelsJsonDownloadErrorOccurred(QNetworkReply::NetworkError code)
+{
+    // TODO: Show what error occurred in the GUI
+    m_asyncModelRequestOngoing = false;
+    emit asyncModelRequestOngoingChanged();
+
+    QNetworkReply *reply = qobject_cast<QNetworkReply *>(sender());
+    if (!reply)
+        return;
+
+    qWarning() << QString("ERROR: Modellist download failed with error code \"%1-%2\"")
+                      .arg(code).arg(reply->errorString()).toStdString();
 }

 void ModelList::handleSslErrors(QNetworkReply *reply, const QList<QSslError> &errors)
@ -1108,14 +1109,6 @@ void ModelList::parseModelsJsonFile(const QByteArray &jsonData, bool save)
        updateData(id, ModelList::QuantRole, "NA");
        updateData(id, ModelList::TypeRole, "GPT");
    }
-
-    if (installedModels()->count()) {
-        const QString firstModel =
-            installedModels()->firstId();
-        QSettings settings;
-        settings.setValue("defaultModel", firstModel);
-        settings.sync();
-    }
 }

 void ModelList::updateModelsFromSettings()
--- a/gpt4all-chat/modellist.h
+++ b/gpt4all-chat/modellist.h
@ -127,7 +127,6 @@ class InstalledModels : public QSortFilterProxyModel
 public:
    explicit InstalledModels(QObject *parent);
    int count() const;
-    QString firstId() const;

 Q_SIGNALS:
    void countChanged();
@ -169,6 +168,7 @@ class ModelList : public QAbstractListModel
    Q_PROPERTY(InstalledModels* installedModels READ installedModels NOTIFY installedModelsChanged)
    Q_PROPERTY(DownloadableModels* downloadableModels READ downloadableModels NOTIFY downloadableModelsChanged)
    Q_PROPERTY(QList<QString> userDefaultModelList READ userDefaultModelList NOTIFY userDefaultModelListChanged)
+    Q_PROPERTY(bool asyncModelRequestOngoing READ asyncModelRequestOngoing NOTIFY asyncModelRequestOngoingChanged)

 public:
    static ModelList *globalInstance();
@ -296,12 +296,14 @@ public:
    }

    QString incompleteDownloadPath(const QString &modelFile);
+    bool asyncModelRequestOngoing() const { return m_asyncModelRequestOngoing; }

 Q_SIGNALS:
    void countChanged();
    void installedModelsChanged();
    void downloadableModelsChanged();
    void userDefaultModelListChanged();
+    void asyncModelRequestOngoingChanged();

 private Q_SLOTS:
    void updateModelsFromJson();
@ -310,6 +312,7 @@ private Q_SLOTS:
    void updateModelsFromDirectory();
    void updateDataForSettings();
    void handleModelsJsonDownloadFinished();
+    void handleModelsJsonDownloadErrorOccurred(QNetworkReply::NetworkError code);
    void handleSslErrors(QNetworkReply *reply, const QList<QSslError> &errors);

 private:
@ -328,6 +331,7 @@ private:
    QList<ModelInfo*> m_models;
    QHash<QString, ModelInfo*> m_modelMap;
    QFileSystemWatcher *m_watcher;
+    bool m_asyncModelRequestOngoing;

 private:
    explicit ModelList();
--- a/gpt4all-chat/qml/ModelDownloaderDialog.qml
+++ b/gpt4all-chat/qml/ModelDownloaderDialog.qml
@ -41,7 +41,7 @@ MyDialog {
        }

        Label {
-            visible: !ModelList.downloadableModels.count
+            visible: !ModelList.downloadableModels.count && !ModelList.asyncModelRequestOngoing
            Layout.fillWidth: true
            Layout.fillHeight: true
            horizontalAlignment: Qt.AlignHCenter
@ -50,6 +50,15 @@ MyDialog {
            color: theme.mutedTextColor
        }

+        MyBusyIndicator {
+            visible: !ModelList.downloadableModels.count && ModelList.asyncModelRequestOngoing
+            running: ModelList.asyncModelRequestOngoing
+            Accessible.role: Accessible.Animation
+            Layout.alignment: Qt.AlignCenter
+            Accessible.name: qsTr("Busy indicator")
+            Accessible.description: qsTr("Displayed when the models request is ongoing")
+        }
+
        ScrollView {
            id: scrollView
            ScrollBar.vertical.policy: ScrollBar.AlwaysOn
--- a/gpt4all-chat/responsetext.cpp
+++ b/gpt4all-chat/responsetext.cpp
@ -18,6 +18,9 @@ enum Language {
    Go,
    Json,
    Csharp,
+    Latex,
+    Html,
+    Php
 };

 static QColor keywordColor      = "#2e95d3"; // blue
@ -33,6 +36,11 @@ static QColor commandColor = functionCallColor;
 static QColor variableColor = numberColor;
 static QColor keyColor = functionColor;
 static QColor valueColor = stringColor;
+static QColor parameterColor = stringColor;
+static QColor attributeNameColor = numberColor;
+static QColor attributeValueColor = stringColor;
+static QColor specialCharacterColor = functionColor;
+static QColor doctypeColor = commentColor;

 static Language stringToLanguage(const QString &language)
 {
@ -62,6 +70,12 @@ static Language stringToLanguage(const QString &language)
        return Go;
    if (language == "json")
        return Json;
+    if (language == "latex")
+        return Latex;
+    if (language == "html")
+        return Html;
+    if (language == "php")
+        return Php;
    return None;
 }

@ -561,6 +575,135 @@ static QVector<HighlightingRule> bashHighlightingRules()
    return highlightingRules;
 }

+static QVector<HighlightingRule> latexHighlightingRules()
+{
+    static QVector<HighlightingRule> highlightingRules;
+    if (highlightingRules.isEmpty()) {
+
+        HighlightingRule rule;
+
+        QTextCharFormat commandFormat;
+        commandFormat.setForeground(commandColor); // commandColor needs to be set to your liking
+        rule.pattern = QRegularExpression("\\\\[A-Za-z]+"); // Pattern for LaTeX commands
+        rule.format = commandFormat;
+        highlightingRules.append(rule);
+
+        QTextCharFormat commentFormat;
+        commentFormat.setForeground(commentColor); // commentColor needs to be set to your liking
+        rule.pattern = QRegularExpression("%[^\n]*"); // Pattern for LaTeX comments
+        rule.format = commentFormat;
+        highlightingRules.append(rule);
+    }
+    return highlightingRules;
+}
+
+static QVector<HighlightingRule> htmlHighlightingRules()
+{
+    static QVector<HighlightingRule> highlightingRules;
+    if (highlightingRules.isEmpty()) {
+
+        HighlightingRule rule;
+
+        QTextCharFormat attributeNameFormat;
+        attributeNameFormat.setForeground(attributeNameColor);
+        rule.pattern = QRegularExpression("\\b(\\w+)\\s*=");
+        rule.format = attributeNameFormat;
+        highlightingRules.append(rule);
+
+        QTextCharFormat attributeValueFormat;
+        attributeValueFormat.setForeground(attributeValueColor);
+        rule.pattern = QRegularExpression("\".*?\"|'.*?'");
+        rule.format = attributeValueFormat;
+        highlightingRules.append(rule);
+
+        QTextCharFormat commentFormat;
+        commentFormat.setForeground(commentColor);
+        rule.pattern = QRegularExpression("<!--.*?-->");
+        rule.format = commentFormat;
+        highlightingRules.append(rule);
+
+        QTextCharFormat specialCharacterFormat;
+        specialCharacterFormat.setForeground(specialCharacterColor);
+        rule.pattern = QRegularExpression("&[a-zA-Z0-9#]*;");
+        rule.format = specialCharacterFormat;
+        highlightingRules.append(rule);
+
+        QTextCharFormat doctypeFormat;
+        doctypeFormat.setForeground(doctypeColor);
+        rule.pattern = QRegularExpression("<!DOCTYPE.*?>");
+        rule.format = doctypeFormat;
+        highlightingRules.append(rule);
+    }
+    return highlightingRules;
+}
+
+static QVector<HighlightingRule> phpHighlightingRules()
+{
+    static QVector<HighlightingRule> highlightingRules;
+    if (highlightingRules.isEmpty()) {
+
+        HighlightingRule rule;
+
+        QTextCharFormat functionCallFormat;
+        functionCallFormat.setForeground(functionCallColor);
+        rule.pattern = QRegularExpression("\\b(\\w+)\\s*(?=\\()");
+        rule.format = functionCallFormat;
+        highlightingRules.append(rule);
+
+        QTextCharFormat functionFormat;
+        functionFormat.setForeground(functionColor);
+        rule.pattern = QRegularExpression("\\bfunction\\s+(\\w+)\\b");
+        rule.format = functionFormat;
+        highlightingRules.append(rule);
+
+        QTextCharFormat numberFormat;
+        numberFormat.setForeground(numberColor);
+        rule.pattern = QRegularExpression("\\b[0-9]*\\.?[0-9]+\\b");
+        rule.format = numberFormat;
+        highlightingRules.append(rule);
+
+        QTextCharFormat keywordFormat;
+        keywordFormat.setForeground(keywordColor);
+        QStringList keywordPatterns = {
+            "\\bif\\b", "\\belse\\b", "\\belseif\\b", "\\bwhile\\b", "\\bfor\\b",
+            "\\bforeach\\b", "\\breturn\\b", "\\bprint\\b", "\\binclude\\b", "\\brequire\\b",
+            "\\binclude_once\\b", "\\brequire_once\\b", "\\btry\\b", "\\bcatch\\b",
+            "\\bfinally\\b", "\\bcontinue\\b", "\\bbreak\\b", "\\bclass\\b", "\\bfunction\\b",
+            "\\bnew\\b", "\\bthrow\\b", "\\barray\\b", "\\bpublic\\b", "\\bprivate\\b",
+            "\\bprotected\\b", "\\bstatic\\b", "\\bglobal\\b", "\\bisset\\b", "\\bunset\\b",
+            "\\bnull\\b", "\\btrue\\b", "\\bfalse\\b"
+        };
+
+        for (const QString &pattern : keywordPatterns) {
+            rule.pattern = QRegularExpression(pattern);
+            rule.format = keywordFormat;
+            highlightingRules.append(rule);
+        }
+
+        QTextCharFormat stringFormat;
+        stringFormat.setForeground(stringColor);
+        rule.pattern = QRegularExpression("\".*?\"");
+        rule.format = stringFormat;
+        highlightingRules.append(rule);
+
+        rule.pattern = QRegularExpression("\'.*?\'");
+        rule.format = stringFormat;
+        highlightingRules.append(rule);
+
+        QTextCharFormat commentFormat;
+        commentFormat.setForeground(commentColor);
+        rule.pattern = QRegularExpression("//[^\n]*");
+        rule.format = commentFormat;
+        highlightingRules.append(rule);
+
+        rule.pattern = QRegularExpression("/\\*.*?\\*/");
+        rule.format = commentFormat;
+        highlightingRules.append(rule);
+    }
+    return highlightingRules;
+}
+
+
 static QVector<HighlightingRule> jsonHighlightingRules()
 {
    static QVector<HighlightingRule> highlightingRules;
@ -616,6 +759,12 @@ void SyntaxHighlighter::highlightBlock(const QString &text)
        rules = javaHighlightingRules();
    else if (block.userState() == Json)
        rules = jsonHighlightingRules();
+    else if (block.userState() == Latex)
+        rules = latexHighlightingRules();
+    else if (block.userState() == Html)
+        rules = htmlHighlightingRules();
+    else if (block.userState() == Php)
+        rules = phpHighlightingRules();

    for (const HighlightingRule &rule : qAsConst(rules)) {
        QRegularExpressionMatchIterator matchIterator = rule.pattern.globalMatch(text);
@ -821,7 +970,10 @@ void ResponseText::handleCodeBlocks()
                || firstWord == "java"
                || firstWord == "go"
                || firstWord == "golang"
-                || firstWord == "json") {
+                || firstWord == "json"
+                || firstWord == "latex"
+                || firstWord == "html"
+                || firstWord == "php") {
                codeLanguage = firstWord;
                capturedText.remove(0, match.captured(0).length());
            }
--- a/gpt4all-training/configs/deepspeed/ds_config_mpt.json
+++ b/gpt4all-training/configs/deepspeed/ds_config_mpt.json
@ -0,0 +1,49 @@
+{
+	"train_batch_size": "auto",
+	"gradient_accumulation_steps": "auto",
+	"train_micro_batch_size_per_gpu": "auto",
+	"fp16": {
+	  "enabled": "auto",
+	  "min_loss_scale": 1,
+	  "loss_scale_window": 1000,
+	  "hysteresis": 2,
+	  "initial_scale_power": 32
+	},
+	"bf16": {
+		"enabled": "auto"
+	},
+	"gradient_clipping": 1.0,
+	"zero_optimization": {
+		"stage": 1,
+		"offload_param": {
+		  "device": "none"
+		},
+		"offload_optimizer": {
+		  "device": "none"
+		},
+		"allgather_partitions": true,
+		"allgather_bucket_size": 5e8,
+		"contiguous_gradients": true
+	  },
+	"optimizer": {
+		"type": "AdamW",
+		"params": {
+		  "lr": "auto",
+		  "betas": [
+			0.9,
+			0.999
+		  ],
+		  "eps": 1e-08
+		}
+	  },
+	  "scheduler": {
+		"type": "WarmupDecayLR",
+		"params": {
+		  "warmup_min_lr": 0,
+		  "warmup_max_lr": "auto",
+		  "warmup_num_steps": "auto",
+		  "warmup_type": "linear",
+		  "total_num_steps": "auto"
+		}
+	  }
+}
--- a/gpt4all-training/configs/deepspeed/ds_config_pythia.json
+++ b/gpt4all-training/configs/deepspeed/ds_config_pythia.json
@ -0,0 +1,48 @@
+{
+	"train_batch_size": "auto",
+	"gradient_accumulation_steps": "auto",
+	"train_micro_batch_size_per_gpu": "auto",
+	"fp16": {
+	  "enabled": "auto",
+	  "min_loss_scale": 1,
+	  "loss_scale_window": 1000,
+	  "hysteresis": 2,
+	  "initial_scale_power": 32
+	},
+	"bf16": {
+		"enabled": "auto"
+	},
+	"gradient_clipping": 1.0,
+	"zero_optimization": {
+	  "stage": 2,
+	  "offload_param": {
+		"device": "none"
+	  },
+	  "offload_optimizer": {
+		"device": "none"
+	  },
+	  "allgather_partitions": true,
+	  "allgather_bucket_size": 5e8,
+	  "contiguous_gradients": true
+	},
+	"optimizer": {
+		"type": "AdamW",
+		"params": {
+		  "lr": "auto",
+		  "betas": [
+			0.9,
+			0.999
+		  ],
+		  "eps": 1e-08
+		}
+	  },
+	  "scheduler": {
+		"type": "WarmupLR",
+		"params": {
+		  "warmup_min_lr": 0,
+		  "warmup_max_lr": "auto",
+		  "warmup_num_steps": "auto",
+		  "warmup_type": "linear"
+		}
+	  }
+}
--- a/gpt4all-training/configs/train/finetune_falcon.yaml
+++ b/gpt4all-training/configs/train/finetune_falcon.yaml
@ -0,0 +1,34 @@
+# model/tokenizer
+model_name: "tiiuae/falcon-7b"
+tokenizer_name: "tiiuae/falcon-7b"
+gradient_checkpointing: true
+save_name: "nomic-ai/gpt4all-falcon"
+
+# dataset
+streaming: false
+num_proc: 64
+dataset_path: "nomic-ai/gpt4all-j-prompt-generations"
+revision: "v1.3-groovy"
+max_length: 1024
+batch_size: 32
+
+# train dynamics
+lr: 2.0e-5
+min_lr: 0 
+weight_decay: 0.0
+eval_every: 500
+eval_steps: 105
+save_every: 1000
+log_grads_every: 500
+output_dir: "ckpts/falcon"
+checkpoint: "/home/paperspace/gpt4all/ckpts/mpt/step_1000"
+lora: false
+warmup_steps: 500
+num_epochs: 2 
+
+# logging
+wandb: true
+wandb_entity: "gpt4all"
+wandb_project_name: "gpt4all"
+seed: 42
+
--- a/gpt4all-training/configs/train/finetune_mpt.yaml
+++ b/gpt4all-training/configs/train/finetune_mpt.yaml
@ -0,0 +1,34 @@
+# model/tokenizer
+model_name: "mosaicml/mpt-7b"
+tokenizer_name: "mosaicml/mpt-7b"
+gradient_checkpointing: false
+save_name: "nomic-ai/mpt-finetuned-round2"
+
+# dataset
+streaming: false
+num_proc: 64
+dataset_path: "nomic-ai/gpt4all-j-prompt-generations"
+revision: "v1.3-groovy"
+max_length: 1024
+batch_size: 8
+
+# train dynamics
+lr: 2.0e-5
+min_lr: 0 
+weight_decay: 0.0
+eval_every: 500
+eval_steps: 105
+save_every: 1000
+log_grads_every: 500
+output_dir: "ckpts/mpt"
+checkpoint: null
+lora: false
+warmup_steps: 500
+num_epochs: 2 
+
+# logging
+wandb: false
+wandb_entity: "gpt4all"
+wandb_project_name: "gpt4all"
+seed: 42
+
--- a/gpt4all-training/configs/train/finetune_openllama.yaml
+++ b/gpt4all-training/configs/train/finetune_openllama.yaml
@ -0,0 +1,34 @@
+# model/tokenizer
+model_name: "openlm-research/open_llama_7b"
+tokenizer_name: "openlm-research/open_llama_7b"
+gradient_checkpointing: true
+save_name: "nomic-ai/gpt4all-openllama"
+
+# dataset
+streaming: false
+num_proc: 64
+dataset_path: "nomic-ai/gpt4all-updated"
+revision: null
+max_length: 1024
+batch_size: 32
+
+# train dynamics
+lr: 2.0e-5
+min_lr: 0 
+weight_decay: 0.0
+eval_every: 500
+log_every: 10
+save_every: 1000
+log_grads_every: 500
+output_dir: "ckpts/falcon"
+checkpoint: null
+lora: false
+warmup_steps: 500
+num_epochs: 3 
+
+# logging
+wandb: true
+wandb_entity: "gpt4all"
+wandb_project_name: "gpt4all"
+seed: 42
+
--- a/gpt4all-training/data.py
+++ b/gpt4all-training/data.py
@ -12,7 +12,7 @@ def tokenize_inputs(config, tokenizer, examples):

    # hacky backward compatible
    different_eos = tokenizer.eos_token != "</s>"
-    out = {"labels": [], "input_ids": []}
+    out = {"labels": [], "input_ids": [], "attention_mask": []}
    for prompt, response in zip(examples["prompt"], examples["response"]):
        if different_eos:
            if response.count("</s> \n") > 0:
@ -49,9 +49,10 @@ def tokenize_inputs(config, tokenizer, examples):
            print(response)
            raise

-        input_tokens = tokenizer.pad({"input_ids": input_tokens}, padding="max_length", max_length=max_length)["input_ids"]
+        padded = tokenizer.pad({"input_ids": input_tokens}, padding="max_length", max_length=max_length, return_tensors="pt")
        out["labels"].append(labels)
-        out["input_ids"].append(input_tokens)
+        out["input_ids"].append(padded["input_ids"])
+        out["attention_mask"].append(padded["attention_mask"])

    out = {k: torch.stack(v) if isinstance(v, list) else v for k, v in out.items()}

@ -72,7 +73,7 @@ def load_data(config, tokenizer):
        dataset = load_dataset("json", data_files=files, split="train")

    else:
-        dataset = load_dataset(dataset_path, split="train")
+        dataset = load_dataset(dataset_path, split="train", revision=config["revision"] if "revision" in config else None)

    dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])

@ -83,19 +84,23 @@ def load_data(config, tokenizer):
    else:
        kwargs = {}

+    cols_to_keep = ["input_ids", "labels", "attention_mask"]
    # tokenize inputs and return labels and attention mask
    train_dataset = train_dataset.map(
        lambda ele: tokenize_inputs(config, tokenizer, ele),
        batched=True,
-        remove_columns=["source", "prompt"],
        **kwargs
    )
+    remove_cols = [col for col in train_dataset.column_names if col not in cols_to_keep]
+    train_dataset = train_dataset.remove_columns(remove_cols)
+
    val_dataset = val_dataset.map(
        lambda ele: tokenize_inputs(config, tokenizer, ele),
        batched=True,
-        remove_columns=["source", "prompt"],
        **kwargs
    )
+    remove_cols = [col for col in val_dataset.column_names if col not in cols_to_keep]
+    val_dataset = val_dataset.remove_columns(remove_cols)

    train_dataset = train_dataset.with_format("torch")
    val_dataset = val_dataset.with_format("torch")
@ -106,12 +111,14 @@ def load_data(config, tokenizer):
        train_dataset,
        collate_fn=DefaultDataCollator(),
        batch_size=config["batch_size"],
+        shuffle=True,
    )

    val_dataloader = DataLoader(
        val_dataset,
        collate_fn=DefaultDataCollator(),
        batch_size=config["batch_size"],
+        shuffle=True,
    )

    return train_dataloader, val_dataloader
--- a/gpt4all-training/requirements.txt
+++ b/gpt4all-training/requirements.txt
@ -1,10 +1,10 @@
 accelerate
 datasets
+einops
 torchmetrics
 evaluate
 transformers>=4.28.0
 wandb
-pip
 peft
 nodelist-inflator
 deepspeed
--- a/gpt4all-training/train.py
+++ b/gpt4all-training/train.py
@ -1,5 +1,5 @@
 import os
-from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler, LlamaForCausalLM
+from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler
 import torch
 from torch.optim import AdamW
 from argparse import ArgumentParser
@ -42,7 +42,7 @@ def train(accelerator, config):
    accelerator.print(config)
    accelerator.print(f"Using {accelerator.num_processes} GPUs")

-    tokenizer = AutoTokenizer.from_pretrained(config['tokenizer_name'], model_max_length=config['max_length'])
+    tokenizer = AutoTokenizer.from_pretrained(config['tokenizer_name'], model_max_length=config['max_length'], use_fast=False)
    # if no pad token, set it to eos
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
@ -53,6 +53,7 @@ def train(accelerator, config):


    checkpoint = config["gradient_checkpointing"]
+
    model = AutoModelForCausalLM.from_pretrained(config["model_name"], 
                                                    use_cache=False if checkpoint else True,
                                                    trust_remote_code=True) 
@ -86,7 +87,7 @@ def train(accelerator, config):
    # decay to min_lr instead of 0
    lr_ratio = config["min_lr"] / config["lr"]
    accelerator.print(f"Len of train_dataloader: {len(train_dataloader)}")
-    total_num_steps = (len(train_dataloader) / gradient_accumulation_steps) * config["num_epochs"]
+    total_num_steps = (len(train_dataloader) / gradient_accumulation_steps) * (config["num_epochs"])
    # instead of decaying to zero, decay to ratio of min_lr / lr
    total_num_steps += int(total_num_steps * lr_ratio) + config["warmup_steps"]
    accelerator.print(f"Total training steps: {total_num_steps}")
@ -104,7 +105,7 @@ def train(accelerator, config):
        )
    else:
        scheduler = DummyScheduler(
-            optimizer, total_num_steps=config["warmup_steps"], warmup_num_steps=config["warmup_steps"]
+            optimizer, total_num_steps=total_num_steps, warmup_num_steps=config["warmup_steps"]
        )

    model, optimizer, train_dataloader, val_dataloader, scheduler = accelerator.prepare(
@ -117,26 +118,34 @@ def train(accelerator, config):
    if config["checkpoint"]:
        accelerator.load_state(config["checkpoint"])
        accelerator.print(f"Resumed from checkpoint: {config['checkpoint']}")
-        path = os.path.basename(config["train_args"]["resume_from_checkpoint"])
+        path = os.path.basename(config["checkpoint"])
        training_difference = os.path.splitext(path)[0]
        resume_step = int(training_difference.replace("step_", ""))
-        accelerator.skip_first_batches(train_dataloader, resume_step)
+        train_dataloader = accelerator.skip_first_batches(train_dataloader, resume_step)
        accelerator.print(f"Resuming from step {resume_step}")
+    else:
+        resume_step = 0


    # log gradients
    if accelerator.is_main_process and config["wandb"]:
        wandb.watch(model, log_freq=config["log_grads_every"], log="all")

-    for epoch in range(config["num_epochs"]):
+
+    accelerator.wait_for_everyone()
+
+    for epoch in range(0, config["num_epochs"]):
        train_loss = MeanMetric(nan_strategy="error").to(model.device)
        for step, batch in enumerate(tqdm(train_dataloader)):
+            curr_step = epoch * len(train_dataloader) + step
            model.train()
            outputs = model(**batch)
            loss = outputs.loss

            # gather loss before backprop in case of gradient accumulation
            loss_values = accelerator.gather_for_metrics({"loss": loss.detach().float()})
+            if config["wandb"]:
+                accelerator.log({"loss": torch.mean(loss_values["loss"]).item()}, step=curr_step)
            train_loss.update(loss_values["loss"])

            loss = loss / gradient_accumulation_steps
@ -144,9 +153,8 @@ def train(accelerator, config):
            # get gradient norm of all params

            # log LR in case something weird happens 
-            if step > 0 and step % (config["eval_every"] // 10) == 0:
+            if step > 0 and step % (config["log_lr_every"]) == 0:
                if config["wandb"]:
-                    curr_step = step + epoch * len(train_dataloader)
                    accelerator.log({"lr": scheduler.get_last_lr()[0]}, step=curr_step)

            if (step + 1) % gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1:
@ -156,7 +164,6 @@ def train(accelerator, config):


            if step > 0 and step % config["save_every"] == 0:
-                curr_step = step + epoch * len(train_dataloader)
                accelerator.save_state(f"{config['output_dir']}/step_{curr_step}")

            if step > 0 and (step % config["eval_every"] == 0 or step == len(train_dataloader) - 1):
@ -170,7 +177,6 @@ def train(accelerator, config):
                }

                if config["wandb"]:
-                    curr_step = step + epoch * len(train_dataloader)
                    accelerator.log({**log_train, **log_val}, step=curr_step)

                accelerator.print(f"Current LR: {scheduler.get_last_lr()[0]}")
@ -181,8 +187,14 @@ def train(accelerator, config):

        accelerator.print(f"Epoch {epoch} finished")
        accelerator.print(f"Pushing to HF hub")
-        accelerator.wait_for_everyone()
        unwrapped_model = accelerator.unwrap_model(model)
+
+        unwrapped_model.save_pretrained(
+            f"{config['output_dir']}/epoch_{epoch}",
+            is_main_process=accelerator.is_main_process,
+            save_function=accelerator.save,
+            state_dict=accelerator.get_state_dict(model),
+        )
        try:
            if accelerator.is_main_process:
                unwrapped_model.push_to_hub(config["save_name"] + f"-epoch_{epoch}", private=True)
@ -191,21 +203,16 @@ def train(accelerator, config):
            accelerator.print(e)
            accelerator.print(f"Failed to push to hub")

+            
+    if config["num_epochs"] > 1:
+        accelerator.wait_for_everyone()
+        unwrapped_model = accelerator.unwrap_model(model)
        unwrapped_model.save_pretrained(
-            f"{config['output_dir']}/epoch_{epoch}",
+            f"{config['output_dir']}/final",
            is_main_process=accelerator.is_main_process,
            save_function=accelerator.save,
            state_dict=accelerator.get_state_dict(model),
        )
-            
-    accelerator.wait_for_everyone()
-    unwrapped_model = accelerator.unwrap_model(model)
-    unwrapped_model.save_pretrained(
-        f"{config['output_dir']}/final",
-        is_main_process=accelerator.is_main_process,
-        save_function=accelerator.save,
-        state_dict=accelerator.get_state_dict(model),
-    )

    accelerator.end_training()
Author	SHA1	Message	Date
Jacob Nguyen	f3f25a9928	revmove noop	2023-07-15 22:58:44 -05:00
Jacob Nguyen	44e20688cf	prevent rebuild	2023-07-15 22:00:28 -05:00
Jacob Nguyen	a82ce3b864	fix	2023-07-15 21:17:03 -05:00
Jacob Nguyen	5bf4462ac2	revert	2023-07-15 21:14:39 -05:00
Jacob Nguyen	cabb089f25	update ci	2023-07-15 21:12:30 -05:00
Jacob Nguyen	965b435406	fix pwd	2023-07-15 20:50:07 -05:00
Jacob Nguyen	127288180a	fix pwd	2023-07-15 20:42:50 -05:00
Jacob Nguyen	a82204e514	fix ci	2023-07-15 20:21:07 -05:00
Jacob Nguyen	2aa1352628	fixed required workflows	2023-07-15 18:19:31 -05:00
Jacob Nguyen	2c36da14b3	fix	2023-07-15 17:29:44 -05:00
Jacob Nguyen	6675ccda4d	fix again	2023-07-15 17:27:56 -05:00
Jacob Nguyen	9e903775ae	bruh	2023-07-15 17:19:46 -05:00
Jacob Nguyen	020053203f	update circle ci script	2023-07-15 17:18:54 -05:00
Jacob Nguyen	5ce7563afb	fix circle ci	2023-07-15 14:38:00 -05:00
Jacob Nguyen	115719612a	fix circle ci	2023-07-15 14:32:39 -05:00
Jacob Nguyen	1ca7e7e083	Merge branch 'main' into jacoobes-patch-1	2023-07-15 14:13:34 -05:00
Jacob Nguyen	477b13a5de	basic embedding with sbert (not tested & cpp side only)	2023-07-15 14:10:28 -05:00
Andriy Mulyar	cfd70b69fc	Update gpt4all_python_embedding.md Signed-off-by: Andriy Mulyar <andriy.mulyar@gmail.com>	2023-07-14 14:54:56 -04:00
Andriy Mulyar	306105e62f	Update gpt4all_python_embedding.md Signed-off-by: Andriy Mulyar <andriy.mulyar@gmail.com>	2023-07-14 14:54:36 -04:00
Andriy Mulyar	89e277bb3c	Update gpt4all_python_embedding.md Signed-off-by: Andriy Mulyar <andriy.mulyar@gmail.com>	2023-07-14 14:30:14 -04:00
Adam Treat	f543affa9a	Add better docs and threading support to bert.	2023-07-14 14:14:22 -04:00
Lakshay Kansal	6c8669cad3	highlighting rules for html and php and latex	2023-07-14 11:36:01 -04:00
Adam Treat	0c0a4f2c22	Add the docs.	2023-07-14 10:48:18 -04:00
Adam Treat	6656f0f41e	Fix the test to work and not do timings.	2023-07-14 09:48:57 -04:00
Adam Treat	bb2b82e1b9	Add docs and bump version since we changed python api again.	2023-07-14 09:48:57 -04:00
Aaron Miller	c77ab849c0	LLModel objects should hold a reference to the library prevents llmodel lib from being gc'd before live model objects	2023-07-14 09:48:57 -04:00
Aaron Miller	1c4a244291	bump mem allocation a bit	2023-07-14 09:48:57 -04:00
Aaron Miller	936dcd2bfc	use default n_threads	2023-07-14 09:48:57 -04:00
Aaron Miller	15f1fe5445	rename embedder	2023-07-14 09:48:57 -04:00
Adam Treat	ee4186d579	Fixup bert python bindings.	2023-07-14 09:48:57 -04:00
cosmic-snow	6200900677	Fix Windows MSVC arch detection (#1194 ) - in llmodel.cpp to fix AVX-only handling Signed-off-by: cosmic-snow <134004613+cosmic-snow@users.noreply.github.com>	2023-07-13 14:44:17 -04:00
Adam Treat	4963db8f43	Bump the version numbers for both python and c backend.	2023-07-13 14:21:46 -04:00
Adam Treat	0efdbfcffe	Bert	2023-07-13 14:21:46 -04:00
Adam Treat	315a1f2aa2	Move it back as internal class.	2023-07-13 14:21:46 -04:00
Adam Treat	ae8eb297ac	Add sbert backend.	2023-07-13 14:21:46 -04:00
Adam Treat	1f749d7633	Clean up backend code a bit and hide impl. details.	2023-07-13 14:21:46 -04:00
Adam Treat	33557b1f39	Move the implementation out of llmodel class.	2023-07-13 14:21:46 -04:00
Adam Treat	64b409e0b8	keep trying	2023-07-13 13:57:22 -04:00
Adam Treat	e59946f05d	try again to unbreak circleci	2023-07-13 13:55:22 -04:00
Adam Treat	b72b409d40	try again to unbreak circlci	2023-07-13 13:52:55 -04:00
Adam Treat	59cae1132c	Try and unbreak circleci.	2023-07-13 13:45:47 -04:00
Adam Treat	a0dae86a95	Add bert to models.json	2023-07-13 13:37:12 -04:00
AT	18ca8901f0	Update README.md Signed-off-by: AT <manyoso@users.noreply.github.com>	2023-07-12 16:30:56 -04:00
cosmic-snow	00a945eaee	Update gpt4all_faq.md - Add information about AVX/AVX2. - Update supported architectures. Signed-off-by: cosmic-snow <134004613+cosmic-snow@users.noreply.github.com>	2023-07-12 15:19:26 -04:00
Zach Nussbaum	6c4f449b7a	fix: update train scripts and configs for other models (#1164 ) * feat: falcon config * feat: mpt config * chore: gitignore * refactor: step calculation * fix: attention mask + shuffle on epoch end * fix: return tensors * fix: wait for everyone * chore: config * chore: ds config * fix: remove ccols * fix: logging and saving * chore: add einops	2023-07-12 15:18:24 -04:00
Adam Treat	e8b19b8e82	Bump version to 2.4.14 and provide release notes.	2023-07-12 14:58:45 -04:00
Adam Treat	8eb0844277	Check if the trimmed version is empty.	2023-07-12 14:31:43 -04:00
Adam Treat	be395c12cc	Make all system prompts empty by default if model does not include in training data.	2023-07-12 14:31:43 -04:00
Aaron Miller	6a8fa27c8d	Correctly find models in subdirs of model dir QDirIterator doesn't seem particular subdir aware, its path() returns the iterated dir. This was the simplest way I found to get this right.	2023-07-12 14:18:40 -04:00
Adam Treat	8893db5896	Add wizard model and rename orca to be more specific.	2023-07-12 14:12:46 -04:00
Adam Treat	60627bd41f	Prefer 7b models in order of default model load.	2023-07-12 12:50:18 -04:00
Aaron Miller	5df4f1bf8c	codespell	2023-07-12 12:49:06 -04:00
Aaron Miller	10ca2c4475	center the spinner	2023-07-12 12:49:06 -04:00
Adam Treat	e9897518d1	Show busy if models.json download taking longer than expected.	2023-07-12 12:49:06 -04:00
Aaron Miller	432b7ebbd7	include windows.h just to be safe	2023-07-12 12:46:46 -04:00
Aaron Miller	95b8fb312e	windows/msvc: use high level processor feature detection API see https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent	2023-07-12 12:46:46 -04:00
Aaron Miller	ad0e7fd01f	chatgpt: ensure no extra newline in header	2023-07-12 10:53:25 -04:00
Aaron Miller	f0faa23ad5	cmakelists: always export build commands (#1179 ) friendly for using editors with clangd integration that don't also manage the build themselves	2023-07-12 10:49:24 -04:00
Adam Treat	0d726b22b8	When we explicitly cancel an operation we shouldn't throw an error.	2023-07-12 10:34:10 -04:00
Adam Treat	13b2d47be5	Provide an error dialog if for any reason we can't access the settings file.	2023-07-12 08:50:21 -04:00