fix typings and vulkan build works on win

Merge branch 'main' into feat(ts)/gpu
Release notes for v2.4.19 and bump the version.
2025-07-18 00:03:40 -04:00 · 2023-09-16 13:59:16 -05:00 · 2023-09-16 13:45:25 -05:00 · 2023-09-16 13:44:08 -05:00 · 2023-09-16 13:44:08 -05:00 · 2023-09-16 13:44:08 -05:00
24 changed files with 338 additions and 49 deletions
--- a/.circleci/continue_config.yml
+++ b/.circleci/continue_config.yml
@ -312,7 +312,7 @@ jobs:
            mkdir build
            cd build
            $env:Path += ";C:\VulkanSDK\1.3.261.1\bin"
-            cmake -G "MinGW Makefiles" .. -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON
+            cmake -G "MinGW Makefiles" .. -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DKOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER=OFF
            cmake --build . --parallel
      - run:
          name: Build wheel
--- a/gpt4all-backend/CMakeLists.txt
+++ b/gpt4all-backend/CMakeLists.txt
@ -134,6 +134,8 @@ add_library(llmodel
    llmodel_c.h llmodel_c.cpp
    dlhandle.h
 )
+target_link_libraries(llmodel PRIVATE ggml-mainline-default)
+target_compile_definitions(llmodel PRIVATE GGML_BUILD_VARIANT="default")
 target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}")

 set_target_properties(llmodel PROPERTIES
--- a/gpt4all-backend/llama.cpp-mainline
+++ b/gpt4all-backend/llama.cpp-mainline
@ -1 +1 @@
-Subproject commit ced231980e0f88b9c7b454c456256c71c4f3cb75
+Subproject commit 0631ea363c14335969095976bbe17bf20503bc6d
--- a/gpt4all-backend/llama.cpp.cmake
+++ b/gpt4all-backend/llama.cpp.cmake
@ -154,6 +154,7 @@ if (LLAMA_OPENBLAS)
 endif()

 if (LLAMA_KOMPUTE)
+    add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
    find_package(Vulkan COMPONENTS glslc REQUIRED)
    find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
    if (NOT glslc_executable)
@ -184,19 +185,35 @@ if (LLAMA_KOMPUTE)
        string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
        set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
        message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
-        add_custom_command(
-          OUTPUT ${OUTPUT_HEADER_FILE}
-          COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
-          COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
-          COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
-          COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
-          COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
-          COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${spv_file} >> ${OUTPUT_HEADER_FILE}
-          COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
-          COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
-          DEPENDS ${spv_file} xxd
-          COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
-        )
+        if(CMAKE_GENERATOR MATCHES "Visual Studio") 
+            add_custom_command(
+              OUTPUT ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}/xxd -i ${spv_file} >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
+              DEPENDS ${spv_file} xxd
+              COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}/xxd"
+            )
+        else()
+            add_custom_command(
+              OUTPUT ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${spv_file} >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
+              COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
+              DEPENDS ${spv_file} xxd
+              COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
+            )
+        endif()
      endforeach()
    endfunction()

--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@ -168,6 +168,10 @@ bool LLamaModel::loadModel(const std::string &modelPath)

    d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params);
    if (!d_ptr->ctx) {
+#ifdef GGML_USE_KOMPUTE
+        // Explicitly free the device so next load it doesn't use it
+        ggml_vk_free_device();
+#endif
        std::cerr << "LLAMA ERROR: failed to load model from " <<  modelPath << std::endl;
        return false;
    }
@ -194,7 +198,7 @@ int32_t LLamaModel::threadCount() const {

 LLamaModel::~LLamaModel()
 {
-    if(d_ptr->ctx) {
+    if (d_ptr->ctx) {
        llama_free(d_ptr->ctx);
    }
 }
@ -337,6 +341,16 @@ bool LLamaModel::hasGPUDevice()
 #endif
 }

+bool LLamaModel::usingGPUDevice()
+{
+#if defined(GGML_USE_KOMPUTE)
+    return ggml_vk_using_vulkan();
+#elif defined(GGML_USE_METAL)
+    return true;
+#endif
+    return false;
+}
+
 #if defined(_WIN32)
 #define DLL_EXPORT __declspec(dllexport)
 #else
--- a/gpt4all-backend/llamamodel_impl.h
+++ b/gpt4all-backend/llamamodel_impl.h
@ -30,6 +30,7 @@ public:
    bool initializeGPUDevice(const GPUDevice &device) override;
    bool initializeGPUDevice(int device) override;
    bool hasGPUDevice() override;
+    bool usingGPUDevice() override;

 private:
    LLamaPrivate *d_ptr;
--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/llmodel.h
@ -100,6 +100,8 @@ public:
    virtual bool initializeGPUDevice(const GPUDevice &/*device*/) { return false; }
    virtual bool initializeGPUDevice(int /*device*/) { return false; }
    virtual bool hasGPUDevice() { return false; }
+    virtual bool usingGPUDevice() { return false; }
+    static std::vector<GPUDevice> availableGPUDevices();

 protected:
    // These are pure virtual because subclasses need to implement as the default implementation of
--- a/gpt4all-backend/llmodel_shared.cpp
+++ b/gpt4all-backend/llmodel_shared.cpp
@ -4,6 +4,10 @@
 #include <iostream>
 #include <unordered_set>

+#ifdef GGML_USE_KOMPUTE
+#include "ggml-vulkan.h"
+#endif
+
 void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate) {
    size_t i = 0;
    promptCtx.n_past = 0;
@ -174,3 +178,26 @@ std::vector<float> LLModel::embedding(const std::string &/*text*/)
    }
    return std::vector<float>();
 }
+
+std::vector<LLModel::GPUDevice> LLModel::availableGPUDevices()
+{
+#if defined(GGML_USE_KOMPUTE)
+    std::vector<ggml_vk_device> vkDevices = ggml_vk_available_devices(0);
+
+    std::vector<LLModel::GPUDevice> devices;
+    for(const auto& vkDevice : vkDevices) {
+        LLModel::GPUDevice device;
+        device.index = vkDevice.index;
+        device.type = vkDevice.type;
+        device.heapSize = vkDevice.heapSize;
+        device.name = vkDevice.name;
+        device.vendor = vkDevice.vendor;
+
+        devices.push_back(device);
+    }
+
+    return devices;
+#else
+    return std::vector<LLModel::GPUDevice>();
+#endif
+}
--- a/gpt4all-backend/replit.cpp
+++ b/gpt4all-backend/replit.cpp
@ -163,7 +163,7 @@ struct mpt_hparams {
    int32_t n_embd      = 0; //max_seq_len
    int32_t n_head      = 0; // n_heads
    int32_t n_layer     = 0; //n_layers
-    int32_t ftype       = 0; 
+    int32_t ftype       = 0;
 };

 struct replit_layer {
@ -220,7 +220,7 @@ static bool kv_cache_init(
    params.mem_size   = cache.buf.size;
    params.mem_buffer = cache.buf.addr;
    params.no_alloc   = false;
-    
+
    cache.ctx = ggml_init(params);
    if (!cache.ctx) {
        fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__);
@ -503,7 +503,7 @@ bool replit_model_load(const std::string & fname, std::istream &fin, replit_mode
    }

    GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "data", data_ptr, data_size, max_size));
-    GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "kv", ggml_get_mem_buffer(model.kv_self.ctx), 
+    GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "kv", ggml_get_mem_buffer(model.kv_self.ctx),
                                                                ggml_get_mem_size(model.kv_self.ctx), 0));
    GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "eval", model.eval_buf.addr, model.eval_buf.size, 0));
    GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "scr0", model.scr0_buf.addr, model.scr0_buf.size, 0));
@ -975,6 +975,14 @@ const std::vector<LLModel::Token> &Replit::endTokens() const
    return fres;
 }

+bool Replit::usingGPUDevice()
+{
+#if defined(GGML_USE_METAL)
+    return true;
+#endif
+    return false;
+}
+
 #if defined(_WIN32)
 #define DLL_EXPORT __declspec(dllexport)
 #else
--- a/gpt4all-backend/replit_impl.h
+++ b/gpt4all-backend/replit_impl.h
@ -27,6 +27,7 @@ public:
    size_t restoreState(const uint8_t *src) override;
    void setThreadCount(int32_t n_threads) override;
    int32_t threadCount() const override;
+    bool usingGPUDevice() override;

 private:
    ReplitPrivate *d_ptr;
--- a/gpt4all-bindings/python/setup.py
+++ b/gpt4all-bindings/python/setup.py
@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECtORY,

 setup(
    name=package_name,
-    version="1.0.9",
+    version="1.0.12",
    description="Python bindings for GPT4All",
    author="Nomic and the Open Source Community",
    author_email="support@nomic.ai",
--- a/gpt4all-bindings/typescript/README.md
+++ b/gpt4all-bindings/typescript/README.md
@ -58,6 +58,7 @@ const fltArray = createEmbedding(model, "Pain is inevitable, suffering optional"
 *   (win) msvc version 143
    *   Can be obtained with visual studio 2022 build tools
 *   python 3
+*   Vulkan SDK. Should be installable via `pkg manager of choice` or [here](https://vulkan.lunarg.com/#new_tab)

 ### Build (from source)

@ -73,15 +74,12 @@ cd gpt4all-bindings/typescript
 ```sh
 yarn
 ```
-
 *   llama.cpp git submodule for gpt4all can be possibly absent. If this is the case, make sure to run in llama.cpp parent directory

 ```sh
 git submodule update --init --depth 1 --recursive
 ```

-**AS OF NEW BACKEND** to build the backend,
-
 ```sh
 yarn build:backend
 ```
--- a/gpt4all-bindings/typescript/src/gpt4all.d.ts
+++ b/gpt4all-bindings/typescript/src/gpt4all.d.ts
@ -162,7 +162,7 @@ declare class LLModel {
      * GPUs that are usable for this LLModel
      * @returns 
      */
-    availableGpus() : GpuDevice[]
+    listGpu() : GpuDevice[]
 }
 /** 
  * an object that contains gpu data on this machine.
@ -223,7 +223,7 @@ declare function loadModel(

 declare function loadModel(
    modelName: string,
-    options?: EmbeddingOptions | InferenceOptions
+    options?: EmbeddingModelOptions | InferenceModelOptions
 ): Promise<InferenceModel | EmbeddingModel>;

 /**
@ -440,7 +440,7 @@ declare const DEFAULT_MODEL_CONFIG: ModelConfig;
 /**
 * Default prompt context.
 */
-declare const DEFAULT_PROMT_CONTEXT: LLModelPromptContext;
+declare const DEFAULT_PROMPT_CONTEXT: LLModelPromptContext;

 /**
 * Default model list url.
@ -541,7 +541,7 @@ export {
    DEFAULT_DIRECTORY,
    DEFAULT_LIBRARIES_DIRECTORY,
    DEFAULT_MODEL_CONFIG,
-    DEFAULT_PROMT_CONTEXT,
+    DEFAULT_PROMPT_CONTEXT,
    DEFAULT_MODEL_LIST_URL,
    downloadModel,
    retrieveModel,
--- a/gpt4all-chat/CMakeLists.txt
+++ b/gpt4all-chat/CMakeLists.txt
@ -18,7 +18,7 @@ endif()

 set(APP_VERSION_MAJOR 2)
 set(APP_VERSION_MINOR 4)
-set(APP_VERSION_PATCH 15)
+set(APP_VERSION_PATCH 20)
 set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")

 # Include the binary directory for the generated header file
--- a/gpt4all-chat/chat.cpp
+++ b/gpt4all-chat/chat.cpp
@ -56,6 +56,7 @@ void Chat::connectLLM()
    connect(m_llmodel, &ChatLLM::recalcChanged, this, &Chat::handleRecalculating, Qt::QueuedConnection);
    connect(m_llmodel, &ChatLLM::generatedNameChanged, this, &Chat::generatedNameChanged, Qt::QueuedConnection);
    connect(m_llmodel, &ChatLLM::reportSpeed, this, &Chat::handleTokenSpeedChanged, Qt::QueuedConnection);
+    connect(m_llmodel, &ChatLLM::reportDevice, this, &Chat::handleDeviceChanged, Qt::QueuedConnection);
    connect(m_llmodel, &ChatLLM::databaseResultsChanged, this, &Chat::handleDatabaseResultsChanged, Qt::QueuedConnection);
    connect(m_llmodel, &ChatLLM::modelInfoChanged, this, &Chat::handleModelInfoChanged, Qt::QueuedConnection);

@ -345,6 +346,12 @@ void Chat::handleTokenSpeedChanged(const QString &tokenSpeed)
    emit tokenSpeedChanged();
 }

+void Chat::handleDeviceChanged(const QString &device)
+{
+    m_device = device;
+    emit deviceChanged();
+}
+
 void Chat::handleDatabaseResultsChanged(const QList<ResultInfo> &results)
 {
    m_databaseResults = results;
--- a/gpt4all-chat/chat.h
+++ b/gpt4all-chat/chat.h
@ -25,6 +25,7 @@ class Chat : public QObject
    Q_PROPERTY(QList<QString> collectionList READ collectionList NOTIFY collectionListChanged)
    Q_PROPERTY(QString modelLoadingError READ modelLoadingError NOTIFY modelLoadingErrorChanged)
    Q_PROPERTY(QString tokenSpeed READ tokenSpeed NOTIFY tokenSpeedChanged);
+    Q_PROPERTY(QString device READ device NOTIFY deviceChanged);
    QML_ELEMENT
    QML_UNCREATABLE("Only creatable from c++!")

@ -88,6 +89,7 @@ public:
    QString modelLoadingError() const { return m_modelLoadingError; }

    QString tokenSpeed() const { return m_tokenSpeed; }
+    QString device() const { return m_device; }

 public Q_SLOTS:
    void serverNewPromptResponsePair(const QString &prompt);
@ -115,6 +117,7 @@ Q_SIGNALS:
    void isServerChanged();
    void collectionListChanged(const QList<QString> &collectionList);
    void tokenSpeedChanged();
+    void deviceChanged();

 private Q_SLOTS:
    void handleResponseChanged(const QString &response);
@ -125,6 +128,7 @@ private Q_SLOTS:
    void handleRecalculating();
    void handleModelLoadingError(const QString &error);
    void handleTokenSpeedChanged(const QString &tokenSpeed);
+    void handleDeviceChanged(const QString &device);
    void handleDatabaseResultsChanged(const QList<ResultInfo> &results);
    void handleModelInfoChanged(const ModelInfo &modelInfo);
    void handleModelInstalled();
@ -137,6 +141,7 @@ private:
    ModelInfo m_modelInfo;
    QString m_modelLoadingError;
    QString m_tokenSpeed;
+    QString m_device;
    QString m_response;
    QList<QString> m_collections;
    ChatModel *m_chatModel;
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@ -81,6 +81,7 @@ ChatLLM::ChatLLM(Chat *parent, bool isServer)
    connect(parent, &Chat::idChanged, this, &ChatLLM::handleChatIdChanged);
    connect(&m_llmThread, &QThread::started, this, &ChatLLM::handleThreadStarted);
    connect(MySettings::globalInstance(), &MySettings::forceMetalChanged, this, &ChatLLM::handleForceMetalChanged);
+    connect(MySettings::globalInstance(), &MySettings::deviceChanged, this, &ChatLLM::handleDeviceChanged);

    // The following are blocking operations and will block the llm thread
    connect(this, &ChatLLM::requestRetrieveFromDB, LocalDocs::globalInstance()->database(), &Database::retrieveFromDB,
@ -124,6 +125,16 @@ void ChatLLM::handleForceMetalChanged(bool forceMetal)
 #endif
 }

+void ChatLLM::handleDeviceChanged()
+{
+    if (isModelLoaded() && m_shouldBeLoaded) {
+        m_reloadingToChangeVariant = true;
+        unloadModel();
+        reloadModel();
+        m_reloadingToChangeVariant = false;
+    }
+}
+
 bool ChatLLM::loadDefaultModel()
 {
    ModelInfo defaultModel = ModelList::globalInstance()->defaultModelInfo();
@ -250,16 +261,52 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
 #endif

            if (m_llModelInfo.model) {
+                // Update the settings that a model is being loaded and update the device list
                MySettings::globalInstance()->setAttemptModelLoad(filePath);
+
+                // Pick the best match for the device
+                QString actualDevice = m_llModelInfo.model->implementation().buildVariant() == "metal" ? "Metal" : "CPU";
+                const QString requestedDevice = MySettings::globalInstance()->device();
+                if (requestedDevice != "CPU") {
+                    const size_t requiredMemory = m_llModelInfo.model->requiredMem(filePath.toStdString());
+                    std::vector<LLModel::GPUDevice> availableDevices = m_llModelInfo.model->availableGPUDevices(requiredMemory);
+                    if (!availableDevices.empty() && requestedDevice == "Auto" && availableDevices.front().type == 2 /*a discrete gpu*/) {
+                        m_llModelInfo.model->initializeGPUDevice(availableDevices.front());
+                        actualDevice = QString::fromStdString(availableDevices.front().name);
+                    } else {
+                        for (LLModel::GPUDevice &d : availableDevices) {
+                            if (QString::fromStdString(d.name) == requestedDevice) {
+                                m_llModelInfo.model->initializeGPUDevice(d);
+                                actualDevice = QString::fromStdString(d.name);
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                // Report which device we're actually using
+                emit reportDevice(actualDevice);
+
                bool success = m_llModelInfo.model->loadModel(filePath.toStdString());
+                if (!success && actualDevice != "CPU") {
+                    emit reportDevice("CPU");
+                    success = m_llModelInfo.model->loadModel(filePath.toStdString());
+                }
+
                MySettings::globalInstance()->setAttemptModelLoad(QString());
                if (!success) {
-                    delete std::exchange(m_llModelInfo.model, nullptr);
+                    delete m_llModelInfo.model;
+                    m_llModelInfo.model = nullptr;
                    if (!m_isServer)
                        LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
                    m_llModelInfo = LLModelInfo();
                    emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
                } else {
+                    // We might have had to fallback to CPU after load if the model is not possible to accelerate
+                    // for instance if the quantization method is not supported on Vulkan yet
+                    if (actualDevice != "CPU" && !m_llModelInfo.model->usingGPUDevice())
+                        emit reportDevice("CPU");
+
                    switch (m_llModelInfo.model->implementation().modelType()[0]) {
                    case 'L': m_llModelType = LLModelType::LLAMA_; break;
                    case 'G': m_llModelType = LLModelType::GPTJ_; break;
@ -270,7 +317,8 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                    case 'S': m_llModelType = LLModelType::STARCODER_; break;
                    default:
                        {
-                            delete std::exchange(m_llModelInfo.model, nullptr);
+                            delete m_llModelInfo.model;
+                            m_llModelInfo.model = nullptr;
                            if (!m_isServer)
                                LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
                            m_llModelInfo = LLModelInfo();
--- a/gpt4all-chat/chatllm.h
+++ b/gpt4all-chat/chatllm.h
@ -111,6 +111,7 @@ public Q_SLOTS:
    void handleShouldBeLoadedChanged();
    void handleThreadStarted();
    void handleForceMetalChanged(bool forceMetal);
+    void handleDeviceChanged();
    void processSystemPrompt();

 Q_SIGNALS:
@ -128,6 +129,7 @@ Q_SIGNALS:
    void shouldBeLoadedChanged();
    void requestRetrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize, QList<ResultInfo> *results);
    void reportSpeed(const QString &speed);
+    void reportDevice(const QString &device);
    void databaseResultsChanged(const QList<ResultInfo>&);
    void modelInfoChanged(const ModelInfo &modelInfo);

--- a/gpt4all-chat/main.qml
+++ b/gpt4all-chat/main.qml
@ -1006,13 +1006,14 @@ Window {
        }

        Text {
-            id: speed
+            id: device
            anchors.bottom: textInputView.top
            anchors.bottomMargin: 20
            anchors.right: parent.right
            anchors.rightMargin: 30
            color: theme.mutedTextColor
-            text: currentChat.tokenSpeed
+            visible: currentChat.tokenSpeed !== ""
+            text: qsTr("Speed: ") + currentChat.tokenSpeed + "<br>" + qsTr("Device: ") + currentChat.device
            font.pixelSize: theme.fontSizeLarge
        }

--- a/gpt4all-chat/metadata/release.json
+++ b/gpt4all-chat/metadata/release.json
@ -464,6 +464,71 @@
 "
 * Lakshay Kansal (Nomic AI)
 * Adam Treat (Nomic AI)
+"
+  },
+  {
+    "version": "2.4.15",
+    "notes":
+"
+* Add Vulkan GPU backend which allows inference on AMD, Intel and NVIDIA GPUs
+* Add ability to switch font sizes
+* Various bug fixes
+",
+    "contributors":
+"
+* Adam Treat (Nomic AI)
+* Aaron Miller (Nomic AI)
+* Nils Sauer (Nomic AI)
+* Lakshay Kansal (Nomic AI)
+"
+  },
+  {
+    "version": "2.4.16",
+    "notes":
+"
+* Bugfix for properly falling back to CPU when GPU can't be used
+* Report the actual device we're using
+* Fix context bugs for GPU accelerated models
+",
+    "contributors":
+"
+* Adam Treat (Nomic AI)
+* Aaron Miller (Nomic AI)
+"
+  },
+  {
+    "version": "2.4.17",
+    "notes":
+"
+* Bugfix for properly falling back to CPU when GPU is out of memory
+",
+    "contributors":
+"
+* Adam Treat (Nomic AI)
+* Aaron Miller (Nomic AI)
+"
+  },
+  {
+    "version": "2.4.18",
+    "notes":
+"
+* Bugfix for devices to show up in the settings combobox on application start and not just on model load
+* Send information on requested device and actual device on model load to help assess which model/gpu/os combos are working
+",
+    "contributors":
+"
+* Adam Treat (Nomic AI)
+"
+  },
+  {
+    "version": "2.4.19",
+    "notes":
+"
+* Fix a crasher on systems with corrupted vulkan drivers or corrupted vulkan dlls
+",
+    "contributors":
+"
+* Adam Treat (Nomic AI)
 "
  }
 ]
--- a/gpt4all-chat/mysettings.cpp
+++ b/gpt4all-chat/mysettings.cpp
@ -1,5 +1,6 @@
 #include "mysettings.h"
 #include "modellist.h"
+#include "../gpt4all-backend/llmodel.h"

 #include <QDir>
 #include <QFile>
@ -23,6 +24,7 @@ static bool     default_localDocsShowReferences = true;
 static QString  default_networkAttribution      = "";
 static bool     default_networkIsActive         = false;
 static bool     default_networkUsageStatsActive = false;
+static QString  default_device              = "Auto";

 static QString defaultLocalModelsPath()
 {
@ -62,6 +64,24 @@ MySettings::MySettings()
    : QObject{nullptr}
 {
    QSettings::setDefaultFormat(QSettings::IniFormat);
+
+    std::vector<LLModel::GPUDevice> devices = LLModel::availableGPUDevices();
+    QVector<QString> deviceList{ "Auto" };
+    for (LLModel::GPUDevice &d : devices)
+        deviceList << QString::fromStdString(d.name);
+    deviceList << "CPU";
+    setDeviceList(deviceList);
+}
+
+Q_INVOKABLE QVector<QString> MySettings::deviceList() const
+{
+    return m_deviceList;
+}
+
+void MySettings::setDeviceList(const QVector<QString> &deviceList)
+{
+    m_deviceList = deviceList;
+    emit deviceListChanged();
 }

 void MySettings::restoreModelDefaults(const ModelInfo &model)
@ -79,6 +99,9 @@ void MySettings::restoreModelDefaults(const ModelInfo &model)

 void MySettings::restoreApplicationDefaults()
 {
+    setChatTheme(default_chatTheme);
+    setFontSize(default_fontSize);
+    setDevice(default_device);
    setThreadCount(default_threadCount);
    setSaveChats(default_saveChats);
    setSaveChatGPTChats(default_saveChatGPTChats);
@ -485,7 +508,7 @@ QString MySettings::chatTheme() const

 void MySettings::setChatTheme(const QString &u)
 {
-    if(chatTheme() == u)
+    if (chatTheme() == u)
        return;

    QSettings setting;
@ -503,7 +526,7 @@ QString MySettings::fontSize() const

 void MySettings::setFontSize(const QString &u)
 {
-    if(fontSize() == u)
+    if (fontSize() == u)
        return;

    QSettings setting;
@ -512,6 +535,24 @@ void MySettings::setFontSize(const QString &u)
    emit fontSizeChanged();
 }

+QString MySettings::device() const
+{
+    QSettings setting;
+    setting.sync();
+    return setting.value("device", default_device).toString();
+}
+
+void MySettings::setDevice(const QString &u)
+{
+    if (device() == u)
+        return;
+
+    QSettings setting;
+    setting.setValue("device", u);
+    setting.sync();
+    emit deviceChanged();
+}
+
 bool MySettings::forceMetal() const
 {
    return m_forceMetal;
--- a/gpt4all-chat/mysettings.h
+++ b/gpt4all-chat/mysettings.h
@ -25,6 +25,8 @@ class MySettings : public QObject
    Q_PROPERTY(QString networkAttribution READ networkAttribution WRITE setNetworkAttribution NOTIFY networkAttributionChanged)
    Q_PROPERTY(bool networkIsActive READ networkIsActive WRITE setNetworkIsActive NOTIFY networkIsActiveChanged)
    Q_PROPERTY(bool networkUsageStatsActive READ networkUsageStatsActive WRITE setNetworkUsageStatsActive NOTIFY networkUsageStatsActiveChanged)
+    Q_PROPERTY(QString device READ device WRITE setDevice NOTIFY deviceChanged)
+    Q_PROPERTY(QVector<QString> deviceList READ deviceList NOTIFY deviceListChanged)

 public:
    static MySettings *globalInstance();
@ -78,6 +80,8 @@ public:
    void setFontSize(const QString &u);
    bool forceMetal() const;
    void setForceMetal(bool b);
+    QString device() const;
+    void setDevice(const QString &u);

    // Release/Download settings
    QString lastVersionStarted() const;
@ -102,6 +106,9 @@ public:
    QString attemptModelLoad() const;
    void setAttemptModelLoad(const QString &modelFile);

+    QVector<QString> deviceList() const;
+    void setDeviceList(const QVector<QString> &deviceList);
+
 Q_SIGNALS:
    void nameChanged(const ModelInfo &model);
    void filenameChanged(const ModelInfo &model);
@ -131,9 +138,12 @@ Q_SIGNALS:
    void networkIsActiveChanged();
    void networkUsageStatsActiveChanged();
    void attemptModelLoadChanged();
+    void deviceChanged();
+    void deviceListChanged();

 private:
    bool m_forceMetal;
+    QVector<QString> m_deviceList;

 private:
    explicit MySettings();
--- a/gpt4all-chat/network.cpp
+++ b/gpt4all-chat/network.cpp
@ -393,6 +393,8 @@ void Network::sendMixpanelEvent(const QString &ev, const QVector<KeyValue> &valu
    properties.insert("name", QCoreApplication::applicationName() + " v"
        + QCoreApplication::applicationVersion());
    properties.insert("model", ChatListModel::globalInstance()->currentChat()->modelInfo().filename());
+    properties.insert("requestedDevice", MySettings::globalInstance()->device());
+    properties.insert("actualDevice", ChatListModel::globalInstance()->currentChat()->device());

    // Some additional startup information
    if (ev == "startup") {
--- a/gpt4all-chat/qml/ApplicationSettings.qml
+++ b/gpt4all-chat/qml/ApplicationSettings.qml
@ -89,17 +89,55 @@ MySettingsTab {
            }
        }
        Label {
-            id: defaultModelLabel
-            text: qsTr("Default model:")
+            id: deviceLabel
+            text: qsTr("Device:")
            color: theme.textColor
            font.pixelSize: theme.fontSizeLarge
            Layout.row: 3
            Layout.column: 0
        }
        MyComboBox {
-            id: comboBox
+            id: deviceBox
            Layout.row: 3
            Layout.column: 1
+            Layout.columnSpan: 1
+            Layout.minimumWidth: 350
+            Layout.fillWidth: false
+            model: MySettings.deviceList
+            Accessible.role: Accessible.ComboBox
+            Accessible.name: qsTr("ComboBox for displaying/picking the device")
+            Accessible.description: qsTr("Use this for picking the device of the chat client")
+            function updateModel() {
+                deviceBox.currentIndex = deviceBox.indexOfValue(MySettings.device);
+            }
+            Component.onCompleted: {
+                deviceBox.updateModel()
+            }
+            Connections {
+                target: MySettings
+                function onDeviceChanged() {
+                    deviceBox.updateModel()
+                }
+                function onDeviceListChanged() {
+                    deviceBox.updateModel()
+                }
+            }
+            onActivated: {
+                MySettings.device = deviceBox.currentText
+            }
+        }
+        Label {
+            id: defaultModelLabel
+            text: qsTr("Default model:")
+            color: theme.textColor
+            font.pixelSize: theme.fontSizeLarge
+            Layout.row: 4
+            Layout.column: 0
+        }
+        MyComboBox {
+            id: comboBox
+            Layout.row: 4
+            Layout.column: 1
            Layout.columnSpan: 2
            Layout.minimumWidth: 350
            Layout.fillWidth: true
@ -128,7 +166,7 @@ MySettingsTab {
            text: qsTr("Download path:")
            color: theme.textColor
            font.pixelSize: theme.fontSizeLarge
-            Layout.row: 4
+            Layout.row: 5
            Layout.column: 0
        }
        MyDirectoryField {
@ -136,7 +174,7 @@ MySettingsTab {
            text: MySettings.modelPath
            font.pixelSize: theme.fontSizeLarge
            implicitWidth: 300
-            Layout.row: 4
+            Layout.row: 5
            Layout.column: 1
            Layout.fillWidth: true
            ToolTip.text: qsTr("Path where model files will be downloaded to")
@ -153,7 +191,7 @@ MySettingsTab {
            }
        }
        MyButton {
-            Layout.row: 4
+            Layout.row: 5
            Layout.column: 2
            text: qsTr("Browse")
            Accessible.description: qsTr("Opens a folder picker dialog to choose where to save model files")
@ -168,7 +206,7 @@ MySettingsTab {
            text: qsTr("CPU Threads:")
            color: theme.textColor
            font.pixelSize: theme.fontSizeLarge
-            Layout.row: 5
+            Layout.row: 6
            Layout.column: 0
        }
        MyTextField {
@ -177,7 +215,7 @@ MySettingsTab {
            font.pixelSize: theme.fontSizeLarge
            ToolTip.text: qsTr("Amount of processing threads to use bounded by 1 and number of logical processors")
            ToolTip.visible: hovered
-            Layout.row: 5
+            Layout.row: 6
            Layout.column: 1
            validator: IntValidator {
                bottom: 1
@ -200,12 +238,12 @@ MySettingsTab {
            text: qsTr("Save chats to disk:")
            color: theme.textColor
            font.pixelSize: theme.fontSizeLarge
-            Layout.row: 6
+            Layout.row: 7
            Layout.column: 0
        }
        MyCheckBox {
            id: saveChatsBox
-            Layout.row: 6
+            Layout.row: 7
            Layout.column: 1
            checked: MySettings.saveChats
            onClicked: {
@ -220,12 +258,12 @@ MySettingsTab {
            text: qsTr("Save ChatGPT chats to disk:")
            color: theme.textColor
            font.pixelSize: theme.fontSizeLarge
-            Layout.row: 7
+            Layout.row: 8
            Layout.column: 0
        }
        MyCheckBox {
            id: saveChatGPTChatsBox
-            Layout.row: 7
+            Layout.row: 8
            Layout.column: 1
            checked: MySettings.saveChatGPTChats
            onClicked: {
@ -237,12 +275,12 @@ MySettingsTab {
            text: qsTr("Enable API server:")
            color: theme.textColor
            font.pixelSize: theme.fontSizeLarge
-            Layout.row: 8
+            Layout.row: 9
            Layout.column: 0
        }
        MyCheckBox {
            id: serverChatBox
-            Layout.row: 8
+            Layout.row: 9
            Layout.column: 1
            checked: MySettings.serverChat
            onClicked: {
@ -252,7 +290,7 @@ MySettingsTab {
            ToolTip.visible: hovered
        }
        Rectangle {
-            Layout.row: 9
+            Layout.row: 10
            Layout.column: 0
            Layout.columnSpan: 3
            Layout.fillWidth: true
Author	SHA1	Message	Date
Jacob Nguyen	3dde1d977c	fix typings and vulkan build works on win	2023-09-16 13:59:16 -05:00
Jacob Nguyen	af28bd0579	Merge branch 'main' into feat(ts)/gpu	2023-09-16 13:45:25 -05:00
Adam Treat	1e5d52fd3e	Release notes for v2.4.19 and bump the version.	2023-09-16 13:44:08 -05:00
Adam Treat	6ab97c4487	Fix for crashes on systems where vulkan is not installed properly.	2023-09-16 13:44:08 -05:00
Adam Treat	7f46228bf5	Release notes for v2.4.18 and bump the version.	2023-09-16 13:44:08 -05:00
Adam Treat	ce51f82cb3	Actually bump the version.	2023-09-16 13:44:08 -05:00
Adam Treat	d713c4c655	Send actual and requested device info for those who have opt-in.	2023-09-16 13:44:08 -05:00
Adam Treat	3c9acadcf3	Link against ggml in bin so we can get the available devices without loading a model.	2023-09-16 13:44:08 -05:00
Adam Treat	45706602e2	Bump the Python version to python-v1.0.12 to restrict the quants that vulkan recognizes.	2023-09-16 13:44:08 -05:00
Adam Treat	81bdcc7c91	Release notes for v2.4.17 and bump the version.	2023-09-16 13:44:07 -05:00
Adam Treat	635b40d832	Fallback to CPU more robustly.	2023-09-16 13:44:07 -05:00
Adam Treat	b63c162c25	Release notes for v2.4.16 and bump the version.	2023-09-16 13:44:07 -05:00
Adam Treat	780da62cc0	Bump to new llama with new bugfix.	2023-09-16 13:44:07 -05:00
Adam Treat	6eb6f23929	Only show GPU when we're actually using it.	2023-09-16 13:44:07 -05:00
Adam Treat	3c5b5f05ef	Report the actual device we're using.	2023-09-16 13:44:07 -05:00
Adam Treat	11e459ecef	Sync to a newer version of llama.cpp with bugfix for vulkan.	2023-09-16 13:44:07 -05:00
Adam Treat	0a19cef006	Fix a bug where we're not properly falling back to CPU.	2023-09-16 13:44:07 -05:00
Adam Treat	ce9f64e8bc	Add version 2.4.15 and bump the version number.	2023-09-16 13:44:07 -05:00
Adam Treat	2a913ca301	Update the submodule.	2023-09-16 13:44:07 -05:00
Aaron Miller	299dabe7cd	init at most one vulkan device, submodule update fixes issues w/ multiple of the same gpu	2023-09-16 13:44:07 -05:00
Adam Treat	ea41e60745	Fix up the name and formatting.	2023-09-16 13:44:07 -05:00
Adam Treat	74b48005a5	Show the device we're currently using.	2023-09-16 13:44:07 -05:00
Adam Treat	246ba226f2	When device is Auto (the default) then we will only consider discrete GPU's otherwise fallback to CPU.	2023-09-16 13:44:07 -05:00
Adam Treat	be830350b0	Bring the vulkan backend to the GUI.	2023-09-16 13:44:07 -05:00
Aaron Miller	e5b0d2de51	vulkan python bindings on windows fixes	2023-09-16 13:44:06 -05:00
Adam Treat	71e2000552	Don't link against libvulkan.	2023-09-16 13:44:06 -05:00
Aaron Miller	73ff1c417b	bump python version (library linking fix)	2023-09-16 13:44:06 -05:00
Aaron Miller	463d9cb258	remove extra dynamic linker deps when building with vulkan	2023-09-16 13:44:06 -05:00
Jacob Nguyen	e86c63750d	Update llama.cpp.cmake Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>	2023-09-16 11:42:56 -07:00
Adam Treat	f47e698193	Release notes for v2.4.19 and bump the version.	2023-09-16 12:35:08 -04:00
Adam Treat	84905aa281	Fix for crashes on systems where vulkan is not installed properly.	2023-09-16 12:19:46 -04:00
Adam Treat	ecf014f03b	Release notes for v2.4.18 and bump the version.	2023-09-16 10:21:50 -04:00
Adam Treat	e6e724d2dc	Actually bump the version.	2023-09-16 10:07:20 -04:00
Adam Treat	06a833e652	Send actual and requested device info for those who have opt-in.	2023-09-16 09:42:22 -04:00
Adam Treat	045f6e6cdc	Link against ggml in bin so we can get the available devices without loading a model.	2023-09-15 14:45:25 -04:00
Adam Treat	0f046cf905	Bump the Python version to python-v1.0.12 to restrict the quants that vulkan recognizes.	2023-09-15 09:12:20 -04:00
Adam Treat	655372dbfa	Release notes for v2.4.17 and bump the version.	2023-09-14 17:11:04 -04:00
Adam Treat	aa33419c6e	Fallback to CPU more robustly.	2023-09-14 16:53:11 -04:00
Adam Treat	79843c269e	Release notes for v2.4.16 and bump the version.	2023-09-14 11:24:25 -04:00
Adam Treat	9013a089bd	Bump to new llama with new bugfix.	2023-09-14 10:02:11 -04:00
Adam Treat	3076e0bf26	Only show GPU when we're actually using it.	2023-09-14 09:59:19 -04:00
Adam Treat	1fa67a585c	Report the actual device we're using.	2023-09-14 08:25:37 -04:00
Adam Treat	cf4eb530ce	Sync to a newer version of llama.cpp with bugfix for vulkan.	2023-09-13 21:01:44 -04:00
Adam Treat	21a3244645	Fix a bug where we're not properly falling back to CPU.	2023-09-13 19:30:27 -04:00
Adam Treat	0458c9b4e6	Add version 2.4.15 and bump the version number.	2023-09-13 17:55:50 -04:00
Adam Treat	4b9a345aee	Update the submodule.	2023-09-13 17:05:46 -04:00
Aaron Miller	6f038c136b	init at most one vulkan device, submodule update fixes issues w/ multiple of the same gpu	2023-09-13 12:49:53 -07:00
Adam Treat	86e862df7e	Fix up the name and formatting.	2023-09-13 15:48:55 -04:00
Adam Treat	358ff2a477	Show the device we're currently using.	2023-09-13 15:24:33 -04:00
Adam Treat	891ddafc33	When device is Auto (the default) then we will only consider discrete GPU's otherwise fallback to CPU.	2023-09-13 11:59:36 -04:00
Adam Treat	8f99dca70f	Bring the vulkan backend to the GUI.	2023-09-13 11:26:10 -04:00
Aaron Miller	f0735efa7d	vulkan python bindings on windows fixes	2023-09-12 14:16:02 -07:00
Adam Treat	c953b321b7	Don't link against libvulkan.	2023-09-12 14:26:56 -04:00
Aaron Miller	0ad1472b62	bump python version (library linking fix)	2023-09-11 09:42:06 -07:00
Aaron Miller	c4d23512e4	remove extra dynamic linker deps when building with vulkan	2023-09-11 08:44:39 -07:00