mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-07-18 00:03:40 -04:00
Compare commits
55 Commits
efde701399
...
3dde1d977c
Author | SHA1 | Date | |
---|---|---|---|
|
3dde1d977c | ||
|
af28bd0579 | ||
|
1e5d52fd3e | ||
|
6ab97c4487 | ||
|
7f46228bf5 | ||
|
ce51f82cb3 | ||
|
d713c4c655 | ||
|
3c9acadcf3 | ||
|
45706602e2 | ||
|
81bdcc7c91 | ||
|
635b40d832 | ||
|
b63c162c25 | ||
|
780da62cc0 | ||
|
6eb6f23929 | ||
|
3c5b5f05ef | ||
|
11e459ecef | ||
|
0a19cef006 | ||
|
ce9f64e8bc | ||
|
2a913ca301 | ||
|
299dabe7cd | ||
|
ea41e60745 | ||
|
74b48005a5 | ||
|
246ba226f2 | ||
|
be830350b0 | ||
|
e5b0d2de51 | ||
|
71e2000552 | ||
|
73ff1c417b | ||
|
463d9cb258 | ||
|
e86c63750d | ||
|
f47e698193 | ||
|
84905aa281 | ||
|
ecf014f03b | ||
|
e6e724d2dc | ||
|
06a833e652 | ||
|
045f6e6cdc | ||
|
0f046cf905 | ||
|
655372dbfa | ||
|
aa33419c6e | ||
|
79843c269e | ||
|
9013a089bd | ||
|
3076e0bf26 | ||
|
1fa67a585c | ||
|
cf4eb530ce | ||
|
21a3244645 | ||
|
0458c9b4e6 | ||
|
4b9a345aee | ||
|
6f038c136b | ||
|
86e862df7e | ||
|
358ff2a477 | ||
|
891ddafc33 | ||
|
8f99dca70f | ||
|
f0735efa7d | ||
|
c953b321b7 | ||
|
0ad1472b62 | ||
|
c4d23512e4 |
@ -312,7 +312,7 @@ jobs:
|
||||
mkdir build
|
||||
cd build
|
||||
$env:Path += ";C:\VulkanSDK\1.3.261.1\bin"
|
||||
cmake -G "MinGW Makefiles" .. -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON
|
||||
cmake -G "MinGW Makefiles" .. -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DKOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER=OFF
|
||||
cmake --build . --parallel
|
||||
- run:
|
||||
name: Build wheel
|
||||
|
@ -134,6 +134,8 @@ add_library(llmodel
|
||||
llmodel_c.h llmodel_c.cpp
|
||||
dlhandle.h
|
||||
)
|
||||
target_link_libraries(llmodel PRIVATE ggml-mainline-default)
|
||||
target_compile_definitions(llmodel PRIVATE GGML_BUILD_VARIANT="default")
|
||||
target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
|
||||
set_target_properties(llmodel PROPERTIES
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit ced231980e0f88b9c7b454c456256c71c4f3cb75
|
||||
Subproject commit 0631ea363c14335969095976bbe17bf20503bc6d
|
@ -154,6 +154,7 @@ if (LLAMA_OPENBLAS)
|
||||
endif()
|
||||
|
||||
if (LLAMA_KOMPUTE)
|
||||
add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
|
||||
find_package(Vulkan COMPONENTS glslc REQUIRED)
|
||||
find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
|
||||
if (NOT glslc_executable)
|
||||
@ -184,6 +185,21 @@ if (LLAMA_KOMPUTE)
|
||||
string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
|
||||
set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
|
||||
message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
|
||||
if(CMAKE_GENERATOR MATCHES "Visual Studio")
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}/xxd -i ${spv_file} >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||
DEPENDS ${spv_file} xxd
|
||||
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}/xxd"
|
||||
)
|
||||
else()
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUT_HEADER_FILE}
|
||||
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
|
||||
@ -197,6 +213,7 @@ if (LLAMA_KOMPUTE)
|
||||
DEPENDS ${spv_file} xxd
|
||||
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
|
||||
)
|
||||
endif()
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
|
@ -168,6 +168,10 @@ bool LLamaModel::loadModel(const std::string &modelPath)
|
||||
|
||||
d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params);
|
||||
if (!d_ptr->ctx) {
|
||||
#ifdef GGML_USE_KOMPUTE
|
||||
// Explicitly free the device so next load it doesn't use it
|
||||
ggml_vk_free_device();
|
||||
#endif
|
||||
std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
|
||||
return false;
|
||||
}
|
||||
@ -194,7 +198,7 @@ int32_t LLamaModel::threadCount() const {
|
||||
|
||||
LLamaModel::~LLamaModel()
|
||||
{
|
||||
if(d_ptr->ctx) {
|
||||
if (d_ptr->ctx) {
|
||||
llama_free(d_ptr->ctx);
|
||||
}
|
||||
}
|
||||
@ -337,6 +341,16 @@ bool LLamaModel::hasGPUDevice()
|
||||
#endif
|
||||
}
|
||||
|
||||
bool LLamaModel::usingGPUDevice()
|
||||
{
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
return ggml_vk_using_vulkan();
|
||||
#elif defined(GGML_USE_METAL)
|
||||
return true;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define DLL_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
|
@ -30,6 +30,7 @@ public:
|
||||
bool initializeGPUDevice(const GPUDevice &device) override;
|
||||
bool initializeGPUDevice(int device) override;
|
||||
bool hasGPUDevice() override;
|
||||
bool usingGPUDevice() override;
|
||||
|
||||
private:
|
||||
LLamaPrivate *d_ptr;
|
||||
|
@ -100,6 +100,8 @@ public:
|
||||
virtual bool initializeGPUDevice(const GPUDevice &/*device*/) { return false; }
|
||||
virtual bool initializeGPUDevice(int /*device*/) { return false; }
|
||||
virtual bool hasGPUDevice() { return false; }
|
||||
virtual bool usingGPUDevice() { return false; }
|
||||
static std::vector<GPUDevice> availableGPUDevices();
|
||||
|
||||
protected:
|
||||
// These are pure virtual because subclasses need to implement as the default implementation of
|
||||
|
@ -4,6 +4,10 @@
|
||||
#include <iostream>
|
||||
#include <unordered_set>
|
||||
|
||||
#ifdef GGML_USE_KOMPUTE
|
||||
#include "ggml-vulkan.h"
|
||||
#endif
|
||||
|
||||
void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate) {
|
||||
size_t i = 0;
|
||||
promptCtx.n_past = 0;
|
||||
@ -174,3 +178,26 @@ std::vector<float> LLModel::embedding(const std::string &/*text*/)
|
||||
}
|
||||
return std::vector<float>();
|
||||
}
|
||||
|
||||
std::vector<LLModel::GPUDevice> LLModel::availableGPUDevices()
|
||||
{
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
std::vector<ggml_vk_device> vkDevices = ggml_vk_available_devices(0);
|
||||
|
||||
std::vector<LLModel::GPUDevice> devices;
|
||||
for(const auto& vkDevice : vkDevices) {
|
||||
LLModel::GPUDevice device;
|
||||
device.index = vkDevice.index;
|
||||
device.type = vkDevice.type;
|
||||
device.heapSize = vkDevice.heapSize;
|
||||
device.name = vkDevice.name;
|
||||
device.vendor = vkDevice.vendor;
|
||||
|
||||
devices.push_back(device);
|
||||
}
|
||||
|
||||
return devices;
|
||||
#else
|
||||
return std::vector<LLModel::GPUDevice>();
|
||||
#endif
|
||||
}
|
||||
|
@ -975,6 +975,14 @@ const std::vector<LLModel::Token> &Replit::endTokens() const
|
||||
return fres;
|
||||
}
|
||||
|
||||
bool Replit::usingGPUDevice()
|
||||
{
|
||||
#if defined(GGML_USE_METAL)
|
||||
return true;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define DLL_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
|
@ -27,6 +27,7 @@ public:
|
||||
size_t restoreState(const uint8_t *src) override;
|
||||
void setThreadCount(int32_t n_threads) override;
|
||||
int32_t threadCount() const override;
|
||||
bool usingGPUDevice() override;
|
||||
|
||||
private:
|
||||
ReplitPrivate *d_ptr;
|
||||
|
@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECtORY,
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version="1.0.9",
|
||||
version="1.0.12",
|
||||
description="Python bindings for GPT4All",
|
||||
author="Nomic and the Open Source Community",
|
||||
author_email="support@nomic.ai",
|
||||
|
@ -58,6 +58,7 @@ const fltArray = createEmbedding(model, "Pain is inevitable, suffering optional"
|
||||
* (win) msvc version 143
|
||||
* Can be obtained with visual studio 2022 build tools
|
||||
* python 3
|
||||
* Vulkan SDK. Should be installable via `pkg manager of choice` or [here](https://vulkan.lunarg.com/#new_tab)
|
||||
|
||||
### Build (from source)
|
||||
|
||||
@ -73,15 +74,12 @@ cd gpt4all-bindings/typescript
|
||||
```sh
|
||||
yarn
|
||||
```
|
||||
|
||||
* llama.cpp git submodule for gpt4all can be possibly absent. If this is the case, make sure to run in llama.cpp parent directory
|
||||
|
||||
```sh
|
||||
git submodule update --init --depth 1 --recursive
|
||||
```
|
||||
|
||||
**AS OF NEW BACKEND** to build the backend,
|
||||
|
||||
```sh
|
||||
yarn build:backend
|
||||
```
|
||||
|
8
gpt4all-bindings/typescript/src/gpt4all.d.ts
vendored
8
gpt4all-bindings/typescript/src/gpt4all.d.ts
vendored
@ -162,7 +162,7 @@ declare class LLModel {
|
||||
* GPUs that are usable for this LLModel
|
||||
* @returns
|
||||
*/
|
||||
availableGpus() : GpuDevice[]
|
||||
listGpu() : GpuDevice[]
|
||||
}
|
||||
/**
|
||||
* an object that contains gpu data on this machine.
|
||||
@ -223,7 +223,7 @@ declare function loadModel(
|
||||
|
||||
declare function loadModel(
|
||||
modelName: string,
|
||||
options?: EmbeddingOptions | InferenceOptions
|
||||
options?: EmbeddingModelOptions | InferenceModelOptions
|
||||
): Promise<InferenceModel | EmbeddingModel>;
|
||||
|
||||
/**
|
||||
@ -440,7 +440,7 @@ declare const DEFAULT_MODEL_CONFIG: ModelConfig;
|
||||
/**
|
||||
* Default prompt context.
|
||||
*/
|
||||
declare const DEFAULT_PROMT_CONTEXT: LLModelPromptContext;
|
||||
declare const DEFAULT_PROMPT_CONTEXT: LLModelPromptContext;
|
||||
|
||||
/**
|
||||
* Default model list url.
|
||||
@ -541,7 +541,7 @@ export {
|
||||
DEFAULT_DIRECTORY,
|
||||
DEFAULT_LIBRARIES_DIRECTORY,
|
||||
DEFAULT_MODEL_CONFIG,
|
||||
DEFAULT_PROMT_CONTEXT,
|
||||
DEFAULT_PROMPT_CONTEXT,
|
||||
DEFAULT_MODEL_LIST_URL,
|
||||
downloadModel,
|
||||
retrieveModel,
|
||||
|
@ -18,7 +18,7 @@ endif()
|
||||
|
||||
set(APP_VERSION_MAJOR 2)
|
||||
set(APP_VERSION_MINOR 4)
|
||||
set(APP_VERSION_PATCH 15)
|
||||
set(APP_VERSION_PATCH 20)
|
||||
set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
|
||||
|
||||
# Include the binary directory for the generated header file
|
||||
|
@ -56,6 +56,7 @@ void Chat::connectLLM()
|
||||
connect(m_llmodel, &ChatLLM::recalcChanged, this, &Chat::handleRecalculating, Qt::QueuedConnection);
|
||||
connect(m_llmodel, &ChatLLM::generatedNameChanged, this, &Chat::generatedNameChanged, Qt::QueuedConnection);
|
||||
connect(m_llmodel, &ChatLLM::reportSpeed, this, &Chat::handleTokenSpeedChanged, Qt::QueuedConnection);
|
||||
connect(m_llmodel, &ChatLLM::reportDevice, this, &Chat::handleDeviceChanged, Qt::QueuedConnection);
|
||||
connect(m_llmodel, &ChatLLM::databaseResultsChanged, this, &Chat::handleDatabaseResultsChanged, Qt::QueuedConnection);
|
||||
connect(m_llmodel, &ChatLLM::modelInfoChanged, this, &Chat::handleModelInfoChanged, Qt::QueuedConnection);
|
||||
|
||||
@ -345,6 +346,12 @@ void Chat::handleTokenSpeedChanged(const QString &tokenSpeed)
|
||||
emit tokenSpeedChanged();
|
||||
}
|
||||
|
||||
void Chat::handleDeviceChanged(const QString &device)
|
||||
{
|
||||
m_device = device;
|
||||
emit deviceChanged();
|
||||
}
|
||||
|
||||
void Chat::handleDatabaseResultsChanged(const QList<ResultInfo> &results)
|
||||
{
|
||||
m_databaseResults = results;
|
||||
|
@ -25,6 +25,7 @@ class Chat : public QObject
|
||||
Q_PROPERTY(QList<QString> collectionList READ collectionList NOTIFY collectionListChanged)
|
||||
Q_PROPERTY(QString modelLoadingError READ modelLoadingError NOTIFY modelLoadingErrorChanged)
|
||||
Q_PROPERTY(QString tokenSpeed READ tokenSpeed NOTIFY tokenSpeedChanged);
|
||||
Q_PROPERTY(QString device READ device NOTIFY deviceChanged);
|
||||
QML_ELEMENT
|
||||
QML_UNCREATABLE("Only creatable from c++!")
|
||||
|
||||
@ -88,6 +89,7 @@ public:
|
||||
QString modelLoadingError() const { return m_modelLoadingError; }
|
||||
|
||||
QString tokenSpeed() const { return m_tokenSpeed; }
|
||||
QString device() const { return m_device; }
|
||||
|
||||
public Q_SLOTS:
|
||||
void serverNewPromptResponsePair(const QString &prompt);
|
||||
@ -115,6 +117,7 @@ Q_SIGNALS:
|
||||
void isServerChanged();
|
||||
void collectionListChanged(const QList<QString> &collectionList);
|
||||
void tokenSpeedChanged();
|
||||
void deviceChanged();
|
||||
|
||||
private Q_SLOTS:
|
||||
void handleResponseChanged(const QString &response);
|
||||
@ -125,6 +128,7 @@ private Q_SLOTS:
|
||||
void handleRecalculating();
|
||||
void handleModelLoadingError(const QString &error);
|
||||
void handleTokenSpeedChanged(const QString &tokenSpeed);
|
||||
void handleDeviceChanged(const QString &device);
|
||||
void handleDatabaseResultsChanged(const QList<ResultInfo> &results);
|
||||
void handleModelInfoChanged(const ModelInfo &modelInfo);
|
||||
void handleModelInstalled();
|
||||
@ -137,6 +141,7 @@ private:
|
||||
ModelInfo m_modelInfo;
|
||||
QString m_modelLoadingError;
|
||||
QString m_tokenSpeed;
|
||||
QString m_device;
|
||||
QString m_response;
|
||||
QList<QString> m_collections;
|
||||
ChatModel *m_chatModel;
|
||||
|
@ -81,6 +81,7 @@ ChatLLM::ChatLLM(Chat *parent, bool isServer)
|
||||
connect(parent, &Chat::idChanged, this, &ChatLLM::handleChatIdChanged);
|
||||
connect(&m_llmThread, &QThread::started, this, &ChatLLM::handleThreadStarted);
|
||||
connect(MySettings::globalInstance(), &MySettings::forceMetalChanged, this, &ChatLLM::handleForceMetalChanged);
|
||||
connect(MySettings::globalInstance(), &MySettings::deviceChanged, this, &ChatLLM::handleDeviceChanged);
|
||||
|
||||
// The following are blocking operations and will block the llm thread
|
||||
connect(this, &ChatLLM::requestRetrieveFromDB, LocalDocs::globalInstance()->database(), &Database::retrieveFromDB,
|
||||
@ -124,6 +125,16 @@ void ChatLLM::handleForceMetalChanged(bool forceMetal)
|
||||
#endif
|
||||
}
|
||||
|
||||
void ChatLLM::handleDeviceChanged()
|
||||
{
|
||||
if (isModelLoaded() && m_shouldBeLoaded) {
|
||||
m_reloadingToChangeVariant = true;
|
||||
unloadModel();
|
||||
reloadModel();
|
||||
m_reloadingToChangeVariant = false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ChatLLM::loadDefaultModel()
|
||||
{
|
||||
ModelInfo defaultModel = ModelList::globalInstance()->defaultModelInfo();
|
||||
@ -250,16 +261,52 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
#endif
|
||||
|
||||
if (m_llModelInfo.model) {
|
||||
// Update the settings that a model is being loaded and update the device list
|
||||
MySettings::globalInstance()->setAttemptModelLoad(filePath);
|
||||
|
||||
// Pick the best match for the device
|
||||
QString actualDevice = m_llModelInfo.model->implementation().buildVariant() == "metal" ? "Metal" : "CPU";
|
||||
const QString requestedDevice = MySettings::globalInstance()->device();
|
||||
if (requestedDevice != "CPU") {
|
||||
const size_t requiredMemory = m_llModelInfo.model->requiredMem(filePath.toStdString());
|
||||
std::vector<LLModel::GPUDevice> availableDevices = m_llModelInfo.model->availableGPUDevices(requiredMemory);
|
||||
if (!availableDevices.empty() && requestedDevice == "Auto" && availableDevices.front().type == 2 /*a discrete gpu*/) {
|
||||
m_llModelInfo.model->initializeGPUDevice(availableDevices.front());
|
||||
actualDevice = QString::fromStdString(availableDevices.front().name);
|
||||
} else {
|
||||
for (LLModel::GPUDevice &d : availableDevices) {
|
||||
if (QString::fromStdString(d.name) == requestedDevice) {
|
||||
m_llModelInfo.model->initializeGPUDevice(d);
|
||||
actualDevice = QString::fromStdString(d.name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Report which device we're actually using
|
||||
emit reportDevice(actualDevice);
|
||||
|
||||
bool success = m_llModelInfo.model->loadModel(filePath.toStdString());
|
||||
if (!success && actualDevice != "CPU") {
|
||||
emit reportDevice("CPU");
|
||||
success = m_llModelInfo.model->loadModel(filePath.toStdString());
|
||||
}
|
||||
|
||||
MySettings::globalInstance()->setAttemptModelLoad(QString());
|
||||
if (!success) {
|
||||
delete std::exchange(m_llModelInfo.model, nullptr);
|
||||
delete m_llModelInfo.model;
|
||||
m_llModelInfo.model = nullptr;
|
||||
if (!m_isServer)
|
||||
LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
|
||||
m_llModelInfo = LLModelInfo();
|
||||
emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
|
||||
} else {
|
||||
// We might have had to fallback to CPU after load if the model is not possible to accelerate
|
||||
// for instance if the quantization method is not supported on Vulkan yet
|
||||
if (actualDevice != "CPU" && !m_llModelInfo.model->usingGPUDevice())
|
||||
emit reportDevice("CPU");
|
||||
|
||||
switch (m_llModelInfo.model->implementation().modelType()[0]) {
|
||||
case 'L': m_llModelType = LLModelType::LLAMA_; break;
|
||||
case 'G': m_llModelType = LLModelType::GPTJ_; break;
|
||||
@ -270,7 +317,8 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
|
||||
case 'S': m_llModelType = LLModelType::STARCODER_; break;
|
||||
default:
|
||||
{
|
||||
delete std::exchange(m_llModelInfo.model, nullptr);
|
||||
delete m_llModelInfo.model;
|
||||
m_llModelInfo.model = nullptr;
|
||||
if (!m_isServer)
|
||||
LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
|
||||
m_llModelInfo = LLModelInfo();
|
||||
|
@ -111,6 +111,7 @@ public Q_SLOTS:
|
||||
void handleShouldBeLoadedChanged();
|
||||
void handleThreadStarted();
|
||||
void handleForceMetalChanged(bool forceMetal);
|
||||
void handleDeviceChanged();
|
||||
void processSystemPrompt();
|
||||
|
||||
Q_SIGNALS:
|
||||
@ -128,6 +129,7 @@ Q_SIGNALS:
|
||||
void shouldBeLoadedChanged();
|
||||
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize, QList<ResultInfo> *results);
|
||||
void reportSpeed(const QString &speed);
|
||||
void reportDevice(const QString &device);
|
||||
void databaseResultsChanged(const QList<ResultInfo>&);
|
||||
void modelInfoChanged(const ModelInfo &modelInfo);
|
||||
|
||||
|
@ -1006,13 +1006,14 @@ Window {
|
||||
}
|
||||
|
||||
Text {
|
||||
id: speed
|
||||
id: device
|
||||
anchors.bottom: textInputView.top
|
||||
anchors.bottomMargin: 20
|
||||
anchors.right: parent.right
|
||||
anchors.rightMargin: 30
|
||||
color: theme.mutedTextColor
|
||||
text: currentChat.tokenSpeed
|
||||
visible: currentChat.tokenSpeed !== ""
|
||||
text: qsTr("Speed: ") + currentChat.tokenSpeed + "<br>" + qsTr("Device: ") + currentChat.device
|
||||
font.pixelSize: theme.fontSizeLarge
|
||||
}
|
||||
|
||||
|
@ -464,6 +464,71 @@
|
||||
"
|
||||
* Lakshay Kansal (Nomic AI)
|
||||
* Adam Treat (Nomic AI)
|
||||
"
|
||||
},
|
||||
{
|
||||
"version": "2.4.15",
|
||||
"notes":
|
||||
"
|
||||
* Add Vulkan GPU backend which allows inference on AMD, Intel and NVIDIA GPUs
|
||||
* Add ability to switch font sizes
|
||||
* Various bug fixes
|
||||
",
|
||||
"contributors":
|
||||
"
|
||||
* Adam Treat (Nomic AI)
|
||||
* Aaron Miller (Nomic AI)
|
||||
* Nils Sauer (Nomic AI)
|
||||
* Lakshay Kansal (Nomic AI)
|
||||
"
|
||||
},
|
||||
{
|
||||
"version": "2.4.16",
|
||||
"notes":
|
||||
"
|
||||
* Bugfix for properly falling back to CPU when GPU can't be used
|
||||
* Report the actual device we're using
|
||||
* Fix context bugs for GPU accelerated models
|
||||
",
|
||||
"contributors":
|
||||
"
|
||||
* Adam Treat (Nomic AI)
|
||||
* Aaron Miller (Nomic AI)
|
||||
"
|
||||
},
|
||||
{
|
||||
"version": "2.4.17",
|
||||
"notes":
|
||||
"
|
||||
* Bugfix for properly falling back to CPU when GPU is out of memory
|
||||
",
|
||||
"contributors":
|
||||
"
|
||||
* Adam Treat (Nomic AI)
|
||||
* Aaron Miller (Nomic AI)
|
||||
"
|
||||
},
|
||||
{
|
||||
"version": "2.4.18",
|
||||
"notes":
|
||||
"
|
||||
* Bugfix for devices to show up in the settings combobox on application start and not just on model load
|
||||
* Send information on requested device and actual device on model load to help assess which model/gpu/os combos are working
|
||||
",
|
||||
"contributors":
|
||||
"
|
||||
* Adam Treat (Nomic AI)
|
||||
"
|
||||
},
|
||||
{
|
||||
"version": "2.4.19",
|
||||
"notes":
|
||||
"
|
||||
* Fix a crasher on systems with corrupted vulkan drivers or corrupted vulkan dlls
|
||||
",
|
||||
"contributors":
|
||||
"
|
||||
* Adam Treat (Nomic AI)
|
||||
"
|
||||
}
|
||||
]
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "mysettings.h"
|
||||
#include "modellist.h"
|
||||
#include "../gpt4all-backend/llmodel.h"
|
||||
|
||||
#include <QDir>
|
||||
#include <QFile>
|
||||
@ -23,6 +24,7 @@ static bool default_localDocsShowReferences = true;
|
||||
static QString default_networkAttribution = "";
|
||||
static bool default_networkIsActive = false;
|
||||
static bool default_networkUsageStatsActive = false;
|
||||
static QString default_device = "Auto";
|
||||
|
||||
static QString defaultLocalModelsPath()
|
||||
{
|
||||
@ -62,6 +64,24 @@ MySettings::MySettings()
|
||||
: QObject{nullptr}
|
||||
{
|
||||
QSettings::setDefaultFormat(QSettings::IniFormat);
|
||||
|
||||
std::vector<LLModel::GPUDevice> devices = LLModel::availableGPUDevices();
|
||||
QVector<QString> deviceList{ "Auto" };
|
||||
for (LLModel::GPUDevice &d : devices)
|
||||
deviceList << QString::fromStdString(d.name);
|
||||
deviceList << "CPU";
|
||||
setDeviceList(deviceList);
|
||||
}
|
||||
|
||||
Q_INVOKABLE QVector<QString> MySettings::deviceList() const
|
||||
{
|
||||
return m_deviceList;
|
||||
}
|
||||
|
||||
void MySettings::setDeviceList(const QVector<QString> &deviceList)
|
||||
{
|
||||
m_deviceList = deviceList;
|
||||
emit deviceListChanged();
|
||||
}
|
||||
|
||||
void MySettings::restoreModelDefaults(const ModelInfo &model)
|
||||
@ -79,6 +99,9 @@ void MySettings::restoreModelDefaults(const ModelInfo &model)
|
||||
|
||||
void MySettings::restoreApplicationDefaults()
|
||||
{
|
||||
setChatTheme(default_chatTheme);
|
||||
setFontSize(default_fontSize);
|
||||
setDevice(default_device);
|
||||
setThreadCount(default_threadCount);
|
||||
setSaveChats(default_saveChats);
|
||||
setSaveChatGPTChats(default_saveChatGPTChats);
|
||||
@ -485,7 +508,7 @@ QString MySettings::chatTheme() const
|
||||
|
||||
void MySettings::setChatTheme(const QString &u)
|
||||
{
|
||||
if(chatTheme() == u)
|
||||
if (chatTheme() == u)
|
||||
return;
|
||||
|
||||
QSettings setting;
|
||||
@ -503,7 +526,7 @@ QString MySettings::fontSize() const
|
||||
|
||||
void MySettings::setFontSize(const QString &u)
|
||||
{
|
||||
if(fontSize() == u)
|
||||
if (fontSize() == u)
|
||||
return;
|
||||
|
||||
QSettings setting;
|
||||
@ -512,6 +535,24 @@ void MySettings::setFontSize(const QString &u)
|
||||
emit fontSizeChanged();
|
||||
}
|
||||
|
||||
QString MySettings::device() const
|
||||
{
|
||||
QSettings setting;
|
||||
setting.sync();
|
||||
return setting.value("device", default_device).toString();
|
||||
}
|
||||
|
||||
void MySettings::setDevice(const QString &u)
|
||||
{
|
||||
if (device() == u)
|
||||
return;
|
||||
|
||||
QSettings setting;
|
||||
setting.setValue("device", u);
|
||||
setting.sync();
|
||||
emit deviceChanged();
|
||||
}
|
||||
|
||||
bool MySettings::forceMetal() const
|
||||
{
|
||||
return m_forceMetal;
|
||||
|
@ -25,6 +25,8 @@ class MySettings : public QObject
|
||||
Q_PROPERTY(QString networkAttribution READ networkAttribution WRITE setNetworkAttribution NOTIFY networkAttributionChanged)
|
||||
Q_PROPERTY(bool networkIsActive READ networkIsActive WRITE setNetworkIsActive NOTIFY networkIsActiveChanged)
|
||||
Q_PROPERTY(bool networkUsageStatsActive READ networkUsageStatsActive WRITE setNetworkUsageStatsActive NOTIFY networkUsageStatsActiveChanged)
|
||||
Q_PROPERTY(QString device READ device WRITE setDevice NOTIFY deviceChanged)
|
||||
Q_PROPERTY(QVector<QString> deviceList READ deviceList NOTIFY deviceListChanged)
|
||||
|
||||
public:
|
||||
static MySettings *globalInstance();
|
||||
@ -78,6 +80,8 @@ public:
|
||||
void setFontSize(const QString &u);
|
||||
bool forceMetal() const;
|
||||
void setForceMetal(bool b);
|
||||
QString device() const;
|
||||
void setDevice(const QString &u);
|
||||
|
||||
// Release/Download settings
|
||||
QString lastVersionStarted() const;
|
||||
@ -102,6 +106,9 @@ public:
|
||||
QString attemptModelLoad() const;
|
||||
void setAttemptModelLoad(const QString &modelFile);
|
||||
|
||||
QVector<QString> deviceList() const;
|
||||
void setDeviceList(const QVector<QString> &deviceList);
|
||||
|
||||
Q_SIGNALS:
|
||||
void nameChanged(const ModelInfo &model);
|
||||
void filenameChanged(const ModelInfo &model);
|
||||
@ -131,9 +138,12 @@ Q_SIGNALS:
|
||||
void networkIsActiveChanged();
|
||||
void networkUsageStatsActiveChanged();
|
||||
void attemptModelLoadChanged();
|
||||
void deviceChanged();
|
||||
void deviceListChanged();
|
||||
|
||||
private:
|
||||
bool m_forceMetal;
|
||||
QVector<QString> m_deviceList;
|
||||
|
||||
private:
|
||||
explicit MySettings();
|
||||
|
@ -393,6 +393,8 @@ void Network::sendMixpanelEvent(const QString &ev, const QVector<KeyValue> &valu
|
||||
properties.insert("name", QCoreApplication::applicationName() + " v"
|
||||
+ QCoreApplication::applicationVersion());
|
||||
properties.insert("model", ChatListModel::globalInstance()->currentChat()->modelInfo().filename());
|
||||
properties.insert("requestedDevice", MySettings::globalInstance()->device());
|
||||
properties.insert("actualDevice", ChatListModel::globalInstance()->currentChat()->device());
|
||||
|
||||
// Some additional startup information
|
||||
if (ev == "startup") {
|
||||
|
@ -89,17 +89,55 @@ MySettingsTab {
|
||||
}
|
||||
}
|
||||
Label {
|
||||
id: defaultModelLabel
|
||||
text: qsTr("Default model:")
|
||||
id: deviceLabel
|
||||
text: qsTr("Device:")
|
||||
color: theme.textColor
|
||||
font.pixelSize: theme.fontSizeLarge
|
||||
Layout.row: 3
|
||||
Layout.column: 0
|
||||
}
|
||||
MyComboBox {
|
||||
id: comboBox
|
||||
id: deviceBox
|
||||
Layout.row: 3
|
||||
Layout.column: 1
|
||||
Layout.columnSpan: 1
|
||||
Layout.minimumWidth: 350
|
||||
Layout.fillWidth: false
|
||||
model: MySettings.deviceList
|
||||
Accessible.role: Accessible.ComboBox
|
||||
Accessible.name: qsTr("ComboBox for displaying/picking the device")
|
||||
Accessible.description: qsTr("Use this for picking the device of the chat client")
|
||||
function updateModel() {
|
||||
deviceBox.currentIndex = deviceBox.indexOfValue(MySettings.device);
|
||||
}
|
||||
Component.onCompleted: {
|
||||
deviceBox.updateModel()
|
||||
}
|
||||
Connections {
|
||||
target: MySettings
|
||||
function onDeviceChanged() {
|
||||
deviceBox.updateModel()
|
||||
}
|
||||
function onDeviceListChanged() {
|
||||
deviceBox.updateModel()
|
||||
}
|
||||
}
|
||||
onActivated: {
|
||||
MySettings.device = deviceBox.currentText
|
||||
}
|
||||
}
|
||||
Label {
|
||||
id: defaultModelLabel
|
||||
text: qsTr("Default model:")
|
||||
color: theme.textColor
|
||||
font.pixelSize: theme.fontSizeLarge
|
||||
Layout.row: 4
|
||||
Layout.column: 0
|
||||
}
|
||||
MyComboBox {
|
||||
id: comboBox
|
||||
Layout.row: 4
|
||||
Layout.column: 1
|
||||
Layout.columnSpan: 2
|
||||
Layout.minimumWidth: 350
|
||||
Layout.fillWidth: true
|
||||
@ -128,7 +166,7 @@ MySettingsTab {
|
||||
text: qsTr("Download path:")
|
||||
color: theme.textColor
|
||||
font.pixelSize: theme.fontSizeLarge
|
||||
Layout.row: 4
|
||||
Layout.row: 5
|
||||
Layout.column: 0
|
||||
}
|
||||
MyDirectoryField {
|
||||
@ -136,7 +174,7 @@ MySettingsTab {
|
||||
text: MySettings.modelPath
|
||||
font.pixelSize: theme.fontSizeLarge
|
||||
implicitWidth: 300
|
||||
Layout.row: 4
|
||||
Layout.row: 5
|
||||
Layout.column: 1
|
||||
Layout.fillWidth: true
|
||||
ToolTip.text: qsTr("Path where model files will be downloaded to")
|
||||
@ -153,7 +191,7 @@ MySettingsTab {
|
||||
}
|
||||
}
|
||||
MyButton {
|
||||
Layout.row: 4
|
||||
Layout.row: 5
|
||||
Layout.column: 2
|
||||
text: qsTr("Browse")
|
||||
Accessible.description: qsTr("Opens a folder picker dialog to choose where to save model files")
|
||||
@ -168,7 +206,7 @@ MySettingsTab {
|
||||
text: qsTr("CPU Threads:")
|
||||
color: theme.textColor
|
||||
font.pixelSize: theme.fontSizeLarge
|
||||
Layout.row: 5
|
||||
Layout.row: 6
|
||||
Layout.column: 0
|
||||
}
|
||||
MyTextField {
|
||||
@ -177,7 +215,7 @@ MySettingsTab {
|
||||
font.pixelSize: theme.fontSizeLarge
|
||||
ToolTip.text: qsTr("Amount of processing threads to use bounded by 1 and number of logical processors")
|
||||
ToolTip.visible: hovered
|
||||
Layout.row: 5
|
||||
Layout.row: 6
|
||||
Layout.column: 1
|
||||
validator: IntValidator {
|
||||
bottom: 1
|
||||
@ -200,12 +238,12 @@ MySettingsTab {
|
||||
text: qsTr("Save chats to disk:")
|
||||
color: theme.textColor
|
||||
font.pixelSize: theme.fontSizeLarge
|
||||
Layout.row: 6
|
||||
Layout.row: 7
|
||||
Layout.column: 0
|
||||
}
|
||||
MyCheckBox {
|
||||
id: saveChatsBox
|
||||
Layout.row: 6
|
||||
Layout.row: 7
|
||||
Layout.column: 1
|
||||
checked: MySettings.saveChats
|
||||
onClicked: {
|
||||
@ -220,12 +258,12 @@ MySettingsTab {
|
||||
text: qsTr("Save ChatGPT chats to disk:")
|
||||
color: theme.textColor
|
||||
font.pixelSize: theme.fontSizeLarge
|
||||
Layout.row: 7
|
||||
Layout.row: 8
|
||||
Layout.column: 0
|
||||
}
|
||||
MyCheckBox {
|
||||
id: saveChatGPTChatsBox
|
||||
Layout.row: 7
|
||||
Layout.row: 8
|
||||
Layout.column: 1
|
||||
checked: MySettings.saveChatGPTChats
|
||||
onClicked: {
|
||||
@ -237,12 +275,12 @@ MySettingsTab {
|
||||
text: qsTr("Enable API server:")
|
||||
color: theme.textColor
|
||||
font.pixelSize: theme.fontSizeLarge
|
||||
Layout.row: 8
|
||||
Layout.row: 9
|
||||
Layout.column: 0
|
||||
}
|
||||
MyCheckBox {
|
||||
id: serverChatBox
|
||||
Layout.row: 8
|
||||
Layout.row: 9
|
||||
Layout.column: 1
|
||||
checked: MySettings.serverChat
|
||||
onClicked: {
|
||||
@ -252,7 +290,7 @@ MySettingsTab {
|
||||
ToolTip.visible: hovered
|
||||
}
|
||||
Rectangle {
|
||||
Layout.row: 9
|
||||
Layout.row: 10
|
||||
Layout.column: 0
|
||||
Layout.columnSpan: 3
|
||||
Layout.fillWidth: true
|
||||
|
Loading…
x
Reference in New Issue
Block a user