Explicitly clear the kv cache each time we eval tokens to match n_past. (#1808 )

Address review
Fix for "LLModel ERROR: Could not find CPU LLaMA implementation" v2
2025-07-29 00:04:04 -04:00 · 2024-01-03 14:06:08 -05:00 · 2024-01-03 11:13:07 -06:00 · 2024-01-03 11:13:07 -06:00 · 2024-01-03 11:13:07 -06:00 · 2024-01-03 08:41:40 -06:00
6 changed files with 15 additions and 17 deletions
--- a/gpt4all-backend/dlhandle.h
+++ b/gpt4all-backend/dlhandle.h
@ -53,6 +53,8 @@ public:
    }
 };
 #else
+#include <algorithm>
+#include <filesystem>
 #include <string>
 #include <exception>
 #include <stdexcept>
@ -75,7 +77,9 @@ public:

    Dlhandle() : chandle(nullptr) {}
    Dlhandle(const std::string& fpath) {
-        chandle = LoadLibraryExA(fpath.c_str(), NULL, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR);
+        std::string afpath = std::filesystem::absolute(fpath).string();
+        std::replace(afpath.begin(), afpath.end(), '/', '\\');
+        chandle = LoadLibraryExA(afpath.c_str(), NULL, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR);
        if (!chandle) {
            throw Exception("dlopen(\""+fpath+"\"): Error");
        }
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@ -298,6 +298,8 @@ LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const

 bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const
 {
+    llama_kv_cache_seq_rm(d_ptr->ctx, 0, ctx.n_past, -1);
+
    llama_batch batch = llama_batch_init(tokens.size(), 0, 1);

    batch.n_tokens = tokens.size();
--- a/gpt4all-chat/chat.cpp
+++ b/gpt4all-chat/chat.cpp
@ -10,14 +10,9 @@ Chat::Chat(QObject *parent)
    , m_id(Network::globalInstance()->generateUniqueId())
    , m_name(tr("New Chat"))
    , m_chatModel(new ChatModel(this))
-    , m_responseInProgress(false)
    , m_responseState(Chat::ResponseStopped)
    , m_creationDate(QDateTime::currentSecsSinceEpoch())
    , m_llmodel(new ChatLLM(this))
-    , m_isServer(false)
-    , m_shouldDeleteLater(false)
-    , m_isModelLoaded(false)
-    , m_shouldLoadModelWhenInstalled(false)
    , m_collectionModel(new LocalDocsCollectionsModel(this))
 {
    connectLLM();
--- a/gpt4all-chat/chat.h
+++ b/gpt4all-chat/chat.h
@ -155,15 +155,15 @@ private:
    QString m_response;
    QList<QString> m_collections;
    ChatModel *m_chatModel;
-    bool m_responseInProgress;
+    bool m_responseInProgress = false;
    ResponseState m_responseState;
    qint64 m_creationDate;
    ChatLLM *m_llmodel;
    QList<ResultInfo> m_databaseResults;
-    bool m_isServer;
-    bool m_shouldDeleteLater;
-    bool m_isModelLoaded;
-    bool m_shouldLoadModelWhenInstalled;
+    bool m_isServer = false;
+    bool m_shouldDeleteLater = false;
+    bool m_isModelLoaded = false;
+    bool m_shouldLoadModelWhenInstalled = false;
    LocalDocsCollectionsModel *m_collectionModel;
 };

--- a/gpt4all-chat/chatlistmodel.cpp
+++ b/gpt4all-chat/chatlistmodel.cpp
@ -16,9 +16,6 @@ ChatListModel *ChatListModel::globalInstance()

 ChatListModel::ChatListModel()
    : QAbstractListModel(nullptr)
-    , m_newChat(nullptr)
-    , m_serverChat(nullptr)
-    , m_currentChat(nullptr)
 {
    addChat();

--- a/gpt4all-chat/chatlistmodel.h
+++ b/gpt4all-chat/chatlistmodel.h
@ -239,9 +239,9 @@ private Q_SLOTS:
    }

 private:
-    Chat* m_newChat;
-    Chat* m_serverChat;
-    Chat* m_currentChat;
+    Chat* m_newChat = nullptr;
+    Chat* m_serverChat = nullptr;
+    Chat* m_currentChat = nullptr;
    QList<Chat*> m_chats;

 private:
Author	SHA1	Message	Date
AT	96cee4f9ac	Explicitly clear the kv cache each time we eval tokens to match n_past. (#1808 )	2024-01-03 14:06:08 -05:00
ThiloteE	2d566710e5	Address review	2024-01-03 11:13:07 -06:00
ThiloteE	a0f7d7ae0e	Fix for "LLModel ERROR: Could not find CPU LLaMA implementation" v2	2024-01-03 11:13:07 -06:00
ThiloteE	38d81c14d0	Fixes https://github.com/nomic-ai/gpt4all/issues/1760 LLModel ERROR: Could not find CPU LLaMA implementation. Inspired by Microsoft docs for LoadLibraryExA (https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibraryexa). When using LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR, the lpFileName parameter must specify a fully qualified path, also it needs to be backslashes (\), not forward slashes (/).	2024-01-03 11:13:07 -06:00
Gerhard Stein	3e99b90c0b	Some cleanps	2024-01-03 08:41:40 -06:00