Compare commits

...

5 Commits

Author SHA1 Message Date
AT
96cee4f9ac
Explicitly clear the kv cache each time we eval tokens to match n_past. (#1808) 2024-01-03 14:06:08 -05:00
ThiloteE
2d566710e5 Address review 2024-01-03 11:13:07 -06:00
ThiloteE
a0f7d7ae0e Fix for "LLModel ERROR: Could not find CPU LLaMA implementation" v2 2024-01-03 11:13:07 -06:00
ThiloteE
38d81c14d0 Fixes https://github.com/nomic-ai/gpt4all/issues/1760 LLModel ERROR: Could not find CPU LLaMA implementation.
Inspired by Microsoft docs for LoadLibraryExA (https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibraryexa).
When using LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR, the lpFileName parameter must specify a fully qualified path, also it needs to be backslashes (\), not forward slashes (/).
2024-01-03 11:13:07 -06:00
Gerhard Stein
3e99b90c0b Some cleanps 2024-01-03 08:41:40 -06:00
6 changed files with 15 additions and 17 deletions

View File

@ -53,6 +53,8 @@ public:
} }
}; };
#else #else
#include <algorithm>
#include <filesystem>
#include <string> #include <string>
#include <exception> #include <exception>
#include <stdexcept> #include <stdexcept>
@ -75,7 +77,9 @@ public:
Dlhandle() : chandle(nullptr) {} Dlhandle() : chandle(nullptr) {}
Dlhandle(const std::string& fpath) { Dlhandle(const std::string& fpath) {
chandle = LoadLibraryExA(fpath.c_str(), NULL, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR); std::string afpath = std::filesystem::absolute(fpath).string();
std::replace(afpath.begin(), afpath.end(), '/', '\\');
chandle = LoadLibraryExA(afpath.c_str(), NULL, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR);
if (!chandle) { if (!chandle) {
throw Exception("dlopen(\""+fpath+"\"): Error"); throw Exception("dlopen(\""+fpath+"\"): Error");
} }

View File

@ -298,6 +298,8 @@ LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const
bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const
{ {
llama_kv_cache_seq_rm(d_ptr->ctx, 0, ctx.n_past, -1);
llama_batch batch = llama_batch_init(tokens.size(), 0, 1); llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
batch.n_tokens = tokens.size(); batch.n_tokens = tokens.size();

View File

@ -10,14 +10,9 @@ Chat::Chat(QObject *parent)
, m_id(Network::globalInstance()->generateUniqueId()) , m_id(Network::globalInstance()->generateUniqueId())
, m_name(tr("New Chat")) , m_name(tr("New Chat"))
, m_chatModel(new ChatModel(this)) , m_chatModel(new ChatModel(this))
, m_responseInProgress(false)
, m_responseState(Chat::ResponseStopped) , m_responseState(Chat::ResponseStopped)
, m_creationDate(QDateTime::currentSecsSinceEpoch()) , m_creationDate(QDateTime::currentSecsSinceEpoch())
, m_llmodel(new ChatLLM(this)) , m_llmodel(new ChatLLM(this))
, m_isServer(false)
, m_shouldDeleteLater(false)
, m_isModelLoaded(false)
, m_shouldLoadModelWhenInstalled(false)
, m_collectionModel(new LocalDocsCollectionsModel(this)) , m_collectionModel(new LocalDocsCollectionsModel(this))
{ {
connectLLM(); connectLLM();

View File

@ -155,15 +155,15 @@ private:
QString m_response; QString m_response;
QList<QString> m_collections; QList<QString> m_collections;
ChatModel *m_chatModel; ChatModel *m_chatModel;
bool m_responseInProgress; bool m_responseInProgress = false;
ResponseState m_responseState; ResponseState m_responseState;
qint64 m_creationDate; qint64 m_creationDate;
ChatLLM *m_llmodel; ChatLLM *m_llmodel;
QList<ResultInfo> m_databaseResults; QList<ResultInfo> m_databaseResults;
bool m_isServer; bool m_isServer = false;
bool m_shouldDeleteLater; bool m_shouldDeleteLater = false;
bool m_isModelLoaded; bool m_isModelLoaded = false;
bool m_shouldLoadModelWhenInstalled; bool m_shouldLoadModelWhenInstalled = false;
LocalDocsCollectionsModel *m_collectionModel; LocalDocsCollectionsModel *m_collectionModel;
}; };

View File

@ -16,9 +16,6 @@ ChatListModel *ChatListModel::globalInstance()
ChatListModel::ChatListModel() ChatListModel::ChatListModel()
: QAbstractListModel(nullptr) : QAbstractListModel(nullptr)
, m_newChat(nullptr)
, m_serverChat(nullptr)
, m_currentChat(nullptr)
{ {
addChat(); addChat();

View File

@ -239,9 +239,9 @@ private Q_SLOTS:
} }
private: private:
Chat* m_newChat; Chat* m_newChat = nullptr;
Chat* m_serverChat; Chat* m_serverChat = nullptr;
Chat* m_currentChat; Chat* m_currentChat = nullptr;
QList<Chat*> m_chats; QList<Chat*> m_chats;
private: private: