update mini-orca 3b to gguf2, license

Signed-off-by: Aaron Miller <apage43@ninjawhale.com>
q6k, q4_1 mat*mat
2025-07-20 00:02:00 -04:00 · 2023-10-12 14:57:07 -04:00 · 2023-10-12 14:56:54 -04:00 · 2023-10-12 14:01:44 -04:00 · 2023-10-12 13:35:27 -04:00 · 2023-10-12 07:53:12 -04:00
19 changed files with 378 additions and 192 deletions
--- a/README.md
+++ b/README.md
@ -92,6 +92,17 @@ Example tags: `backend`, `bindings`, `python-bindings`, `documentation`, etc.
 <a href="https://s3.amazonaws.com/static.nomic.ai/gpt4all/2023_GPT4All_Technical_Report.pdf">:green_book: Technical Report 1: GPT4All</a>
 </p>
 ## Star History
 <a href="https://star-history.com/#nomic-ai/gpt4all&Date">
  <picture>
    <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=nomic-ai/gpt4all&type=Date&theme=dark" />
    <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=nomic-ai/gpt4all&type=Date" />
    <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=nomic-ai/gpt4all&type=Date" />
  </picture>
 </a>
 ## Citation
 If you utilize this repository, models or data in a downstream project, please consider citing it with:
--- a/gpt4all-backend/llama.cpp-mainline
+++ b/gpt4all-backend/llama.cpp-mainline
@ -1 +1 @@
-Subproject commit 3742085b0429cbe0ede49bcb9f891e4a5e25a724
+Subproject commit 500689ad356a81a471a7fb68cc70f7aee5a5f56e
--- a/gpt4all-backend/llama.cpp.cmake
+++ b/gpt4all-backend/llama.cpp.cmake
@ -242,6 +242,8 @@ if (LLAMA_KOMPUTE)
          kompute/op_mul_mat_mat_f16.comp
          kompute/op_mul_mat_mat_q8_0.comp
          kompute/op_mul_mat_mat_q4_0.comp
          kompute/op_mul_mat_mat_q4_1.comp
          kompute/op_mul_mat_mat_q6_k.comp
          kompute/op_mul_mat_f16.comp
          kompute/op_mul_mat_q8_0.comp
          kompute/op_mul_mat_q4_0.comp
@ -276,6 +278,8 @@ if (LLAMA_KOMPUTE)
          shaderop_mul_mat_mat_f16.h
          shaderop_mul_mat_mat_q8_0.h
          shaderop_mul_mat_mat_q4_0.h
          shaderop_mul_mat_mat_q4_1.h
          shaderop_mul_mat_mat_q6_k.h
          shaderop_mul_mat_f16.h
          shaderop_mul_mat_q8_0.h
          shaderop_mul_mat_q4_0.h
--- a/gpt4all-bindings/cli/app.py
+++ b/gpt4all-bindings/cli/app.py
@ -4,13 +4,13 @@ The GPT4All CLI is a self-contained script based on the `gpt4all` and `typer` pa
 REPL to communicate with a language model similar to the chat GUI application, but more basic.
 """
 import importlib.metadata
 import io
 import pkg_resources  # should be present as a dependency of gpt4all
 import sys
 import typer
 from collections import namedtuple
 from typing_extensions import Annotated
 import typer
 from gpt4all import GPT4All
@ -79,7 +79,7 @@ def repl(
    use_new_loop = False
    try:
-        version = pkg_resources.Environment()['gpt4all'][0].version
+        version = importlib.metadata.version('gpt4all')
        version_major = int(version.split('.')[0])
        if version_major >= 1:
            use_new_loop = True
--- a/gpt4all-bindings/java/src/main/java/com/hexadevlabs/gpt4all/LLModel.java
+++ b/gpt4all-bindings/java/src/main/java/com/hexadevlabs/gpt4all/LLModel.java
@ -8,9 +8,8 @@ import java.io.ByteArrayOutputStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.HashMap;
+import java.util.*;
-import java.util.List;
+import java.util.stream.Collectors;
 import java.util.Map;
 public  class LLModel implements AutoCloseable {
@ -306,6 +305,197 @@ public  class LLModel implements AutoCloseable {
        };
    }
    /**
     * The array of messages for the conversation.
     */
    public static class Messages {
        private final List<PromptMessage> messages = new ArrayList<>();
        public Messages(PromptMessage...messages) {
            this.messages.addAll(Arrays.asList(messages));
        }
        public Messages(List<PromptMessage> messages) {
            this.messages.addAll(messages);
        }
        public Messages addPromptMessage(PromptMessage promptMessage) {
            this.messages.add(promptMessage);
            return this;
        }
        List<PromptMessage> toList() {
            return Collections.unmodifiableList(this.messages);
        }
        List<Map<String, String>> toListMap() {
            return messages.stream()
                    .map(PromptMessage::toMap).collect(Collectors.toList());
        }
    }
    /**
     * A message in the conversation, identical to OpenAI's chat message.
     */
    public static class PromptMessage {
        private static final String ROLE = "role";
        private static final String CONTENT = "content";
        private final Map<String, String> message = new HashMap<>();
        public PromptMessage() {
        }
        public PromptMessage(Role role, String content) {
            addRole(role);
            addContent(content);
        }
        public PromptMessage addRole(Role role) {
            return this.addParameter(ROLE, role.type());
        }
        public PromptMessage addContent(String content) {
            return this.addParameter(CONTENT, content);
        }
        public PromptMessage addParameter(String key, String value) {
            this.message.put(key, value);
            return this;
        }
        public String content() {
            return this.parameter(CONTENT);
        }
        public Role role() {
            String role = this.parameter(ROLE);
            return Role.from(role);
        }
        public String parameter(String key) {
            return this.message.get(key);
        }
        Map<String, String> toMap() {
            return Collections.unmodifiableMap(this.message);
        }
    }
    public enum Role {
        SYSTEM("system"), ASSISTANT("assistant"), USER("user");
        private final String type;
        String type() {
            return this.type;
        }
        static Role from(String type) {
            if (type == null) {
                return null;
            }
            switch (type) {
                case "system": return SYSTEM;
                case "assistant": return ASSISTANT;
                case "user": return USER;
                default: throw new IllegalArgumentException(
                        String.format("You passed %s type but only %s are supported",
                                type, Arrays.toString(Role.values())
                        )
                );
            }
        }
        Role(String type) {
            this.type = type;
        }
        @Override
        public String toString() {
            return type();
        }
    }
    /**
     * The result of the completion, similar to OpenAI's format.
     */
    public static class CompletionReturn {
        private String model;
        private Usage usage;
        private Choices choices;
        public CompletionReturn(String model, Usage usage, Choices choices) {
            this.model = model;
            this.usage = usage;
            this.choices = choices;
        }
        public Choices choices() {
            return choices;
        }
        public String model() {
            return model;
        }
        public Usage usage() {
            return usage;
        }
    }
    /**
     * The generated completions.
     */
    public static class Choices {
        private final List<CompletionChoice> choices = new ArrayList<>();
        public Choices(List<CompletionChoice> choices) {
            this.choices.addAll(choices);
        }
        public Choices(CompletionChoice...completionChoices){
            this.choices.addAll(Arrays.asList(completionChoices));
        }
        public Choices addCompletionChoice(CompletionChoice completionChoice) {
            this.choices.add(completionChoice);
            return this;
        }
        public CompletionChoice first() {
            return this.choices.get(0);
        }
        public int totalChoices() {
            return this.choices.size();
        }
        public CompletionChoice get(int index) {
            return this.choices.get(index);
        }
        public List<CompletionChoice> choices() {
            return Collections.unmodifiableList(choices);
        }
    }
    /**
     * A completion choice, similar to OpenAI's format.
     */
    public static class CompletionChoice extends PromptMessage {
        public CompletionChoice(Role role, String content) {
            super(role, content);
        }
    }
    public static class ChatCompletionResponse {
        public String model;
@ -323,6 +513,41 @@ public  class LLModel implements AutoCloseable {
        // Getters and setters
    }
    public CompletionReturn chatCompletionResponse(Messages messages,
                                                                GenerationConfig generationConfig) {
        return chatCompletion(messages, generationConfig, false, false);
    }
    /**
     * chatCompletion formats the existing chat conversation into a template to be
     * easier to process for chat UIs. It is not absolutely necessary as generate method
     * may be directly used to make generations with gpt models.
     *
     * @param messages object to create theMessages to send to GPT model
     * @param generationConfig How to decode/process the generation.
     * @param streamToStdOut Send tokens as they are calculated Standard output.
     * @param outputFullPromptToStdOut Should full prompt built out of messages be sent to Standard output.
     * @return CompletionReturn contains stats and generated Text.
     */
    public CompletionReturn chatCompletion(Messages messages,
                                                  GenerationConfig generationConfig, boolean streamToStdOut,
                                                  boolean outputFullPromptToStdOut) {
        String fullPrompt = buildPrompt(messages.toListMap());
        if(outputFullPromptToStdOut)
            System.out.print(fullPrompt);
        String generatedText = generate(fullPrompt, generationConfig, streamToStdOut);
        final CompletionChoice promptMessage = new CompletionChoice(Role.ASSISTANT, generatedText);
        final Choices choices = new Choices(promptMessage);
        final Usage usage = getUsage(fullPrompt, generatedText);
        return new CompletionReturn(this.modelName, usage, choices);
    }
    public ChatCompletionResponse chatCompletion(List<Map<String, String>> messages,
                                                              GenerationConfig generationConfig) {
        return chatCompletion(messages, generationConfig, false, false);
@ -352,19 +577,23 @@ public  class LLModel implements AutoCloseable {
        ChatCompletionResponse  response = new ChatCompletionResponse();
        response.model = this.modelName;
-        Usage usage = new Usage();
+        response.usage = getUsage(fullPrompt, generatedText);
        usage.promptTokens = fullPrompt.length();
        usage.completionTokens = generatedText.length();
        usage.totalTokens = fullPrompt.length() + generatedText.length();
        response.usage =  usage;
        Map<String, String> message = new HashMap<>();
        message.put("role", "assistant");
        message.put("content", generatedText);
        response.choices = List.of(message);
        return response;
    }
    private Usage getUsage(String fullPrompt, String generatedText) {
        Usage usage = new Usage();
        usage.promptTokens = fullPrompt.length();
        usage.completionTokens = generatedText.length();
        usage.totalTokens = fullPrompt.length() + generatedText.length();
        return usage;
    }
    protected static String buildPrompt(List<Map<String, String>> messages) {
--- a/gpt4all-bindings/java/src/test/java/com/hexadevlabs/gpt4all/BasicTests.java
+++ b/gpt4all-bindings/java/src/test/java/com/hexadevlabs/gpt4all/BasicTests.java
@ -28,6 +28,33 @@ import static org.mockito.Mockito.*;
@ExtendWith(MockitoExtension.class)
 public class BasicTests {
    @Test
    public void simplePromptWithObject(){
        LLModel model = Mockito.spy(new LLModel());
        LLModel.GenerationConfig config =
                LLModel.config()
                        .withNPredict(20)
                        .build();
        // The generate method will return "4"
        doReturn("4").when( model ).generate(anyString(), eq(config), eq(true));
        LLModel.PromptMessage promptMessage1 = new LLModel.PromptMessage(LLModel.Role.SYSTEM, "You are a helpful assistant");
        LLModel.PromptMessage promptMessage2 = new LLModel.PromptMessage(LLModel.Role.USER, "Add 2+2");
        LLModel.Messages messages = new LLModel.Messages(promptMessage1, promptMessage2);
        LLModel.CompletionReturn response = model.chatCompletion(
                messages, config, true, true);
        assertTrue( response.choices().first().content().contains("4") );
        // Verifies the prompt and response are certain length.
        assertEquals( 224 , response.usage().totalTokens );
    }
    @Test
    public void simplePrompt(){
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@ -39,7 +39,7 @@ class Embed4All:
        Args:
            n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
        """
-        self.gpt4all = GPT4All(model_name or 'ggml-all-MiniLM-L6-v2-f16.gguf', n_threads=n_threads, **kwargs)
+        self.gpt4all = GPT4All(model_name or 'all-MiniLM-L6-v2-f16.gguf', n_threads=n_threads, **kwargs)
    def embed(self, text: str) -> List[float]:
        """
--- a/gpt4all-bindings/python/gpt4all/pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/pyllmodel.py
@ -1,23 +1,27 @@
 import atexit
 import ctypes
 import importlib.resources
 import logging
 import os
 import platform
 from queue import Queue
 import re
 import subprocess
 import sys
 import threading
 from contextlib import ExitStack
 from queue import Queue
 from typing import Callable, Iterable, List
 import pkg_resources
 logger: logging.Logger = logging.getLogger(__name__)
-# TODO: provide a config file to make this more robust
+file_manager = ExitStack()
-LLMODEL_PATH = os.path.join("llmodel_DO_NOT_MODIFY", "build").replace("\\", "\\\\")
+atexit.register(file_manager.close)  # clean up files on exit
 MODEL_LIB_PATH = str(pkg_resources.resource_filename("gpt4all", LLMODEL_PATH)).replace("\\", "\\\\")
 # TODO: provide a config file to make this more robust
 MODEL_LIB_PATH = file_manager.enter_context(importlib.resources.as_file(
    importlib.resources.files("gpt4all") / "llmodel_DO_NOT_MODIFY" / "build",
 ))
 def load_llmodel_library():
    system = platform.system()
@ -36,9 +40,7 @@ def load_llmodel_library():
    llmodel_file = "libllmodel" + "." + c_lib_ext
-    llmodel_dir = str(pkg_resources.resource_filename("gpt4all", os.path.join(LLMODEL_PATH, llmodel_file))).replace(
+    llmodel_dir = str(MODEL_LIB_PATH / llmodel_file).replace("\\", r"\\")
        "\\", "\\\\"
    )
    llmodel_lib = ctypes.CDLL(llmodel_dir)
@ -131,7 +133,7 @@ llmodel.llmodel_set_implementation_search_path.restype = None
 llmodel.llmodel_threadCount.argtypes = [ctypes.c_void_p]
 llmodel.llmodel_threadCount.restype = ctypes.c_int32
-llmodel.llmodel_set_implementation_search_path(MODEL_LIB_PATH.encode("utf-8"))
+llmodel.llmodel_set_implementation_search_path(str(MODEL_LIB_PATH).replace("\\", r"\\").encode("utf-8"))
 llmodel.llmodel_available_gpu_devices.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.POINTER(ctypes.c_int32)]
 llmodel.llmodel_available_gpu_devices.restype = ctypes.POINTER(LLModelGPUDevice)
--- a/gpt4all-chat/README.md
+++ b/gpt4all-chat/README.md
@ -32,13 +32,8 @@ One click installers for macOS, Linux, and Windows at https://gpt4all.io
 * Multi-chat - a list of current and past chats and the ability to save/delete/export and switch between
 * Text to speech - have the AI response with voice
 * Speech to text - give the prompt with your voice
 * Python bindings
 * Typescript bindings
 * Plugin support for langchain other developer tools
-* Save your prompt/responses to disk
+* chat gui headless operation mode
 * Upload prompt/response manually/automatically to nomic.ai to aid future training runs
 * Syntax highlighting support for programming languages, etc.
 * REST API with a built-in webserver in the chat gui itself with a headless operation mode as well
 * Advanced settings for changing temperature, topk, etc. (DONE)
 * * Improve the accessibility of the installer for screen reader users
 * YOUR IDEA HERE
--- a/gpt4all-chat/chat.cpp
+++ b/gpt4all-chat/chat.cpp
@ -385,7 +385,11 @@ bool Chat::serialize(QDataStream &stream, int version) const
        stream << m_modelInfo.filename();
    if (version > 2)
        stream << m_collections;
-    if (!m_llmodel->serialize(stream, version, true /*serializeKV*/))
+
    const bool serializeKV = MySettings::globalInstance()->saveChatsContext();
    if (version > 5)
        stream << serializeKV;
    if (!m_llmodel->serialize(stream, version, serializeKV))
        return false;
    if (!m_chatModel->serialize(stream, version))
        return false;
@ -413,7 +417,6 @@ bool Chat::deserialize(QDataStream &stream, int version)
    if (!m_modelInfo.id().isEmpty())
        emit modelInfoChanged();
    bool deserializeKV = true; // make this a setting
    bool discardKV = m_modelInfo.id().isEmpty();
    // Prior to version 2 gptj models had a bug that fixed the kv_cache to F32 instead of F16 so
@ -425,6 +428,11 @@ bool Chat::deserialize(QDataStream &stream, int version)
        stream >> m_collections;
        emit collectionListChanged(m_collections);
    }
    bool deserializeKV = true;
    if (version > 5)
        stream >> deserializeKV;
    m_llmodel->setModelInfo(m_modelInfo);
    if (!m_llmodel->deserialize(stream, version, deserializeKV, discardKV))
        return false;
--- a/gpt4all-chat/chat.h
+++ b/gpt4all-chat/chat.h
@ -54,6 +54,8 @@ public:
    }
    ChatModel *chatModel() { return m_chatModel; }
    bool isNewChat() const { return m_name == tr("New Chat") && !m_chatModel->count(); }
    Q_INVOKABLE void reset();
    Q_INVOKABLE void processSystemPrompt();
    Q_INVOKABLE bool isModelLoaded() const;
--- a/gpt4all-chat/chatlistmodel.cpp
+++ b/gpt4all-chat/chatlistmodel.cpp
@ -5,7 +5,7 @@
 #include <QDataStream>
 #define CHAT_FORMAT_MAGIC 0xF5D553CC
-#define CHAT_FORMAT_VERSION 5
+#define CHAT_FORMAT_VERSION 6
 class MyChatListModel: public ChatListModel { };
 Q_GLOBAL_STATIC(MyChatListModel, chatListModelInstance)
@ -17,11 +17,10 @@ ChatListModel *ChatListModel::globalInstance()
 ChatListModel::ChatListModel()
    : QAbstractListModel(nullptr)
    , m_newChat(nullptr)
    , m_dummyChat(nullptr)
    , m_serverChat(nullptr)
    , m_currentChat(nullptr)
 {
-    addDummyChat();
+    addChat();
    ChatsRestoreThread *thread = new ChatsRestoreThread;
    connect(thread, &ChatsRestoreThread::chatRestored, this, &ChatListModel::restoreChat);
@ -59,10 +58,7 @@ void ChatListModel::saveChats()
    for (Chat *chat : m_chats) {
        if (chat == m_serverChat)
            continue;
-        const bool isChatGPT = chat->modelInfo().isChatGPT;
+        if (chat->isNewChat())
        if (!isChatGPT && !MySettings::globalInstance()->saveChats())
            continue;
        if (isChatGPT && !MySettings::globalInstance()->saveChatGPTChats())
            continue;
        toSave.append(chat);
    }
@ -197,47 +193,47 @@ void ChatsRestoreThread::run()
    });
    for (FileInfo &f : files) {
-            QFile file(f.file);
+        QFile file(f.file);
-            bool success = file.open(QIODevice::ReadOnly);
+        bool success = file.open(QIODevice::ReadOnly);
-            if (!success) {
+        if (!success) {
-                qWarning() << "ERROR: Couldn't restore chat from file:" << file.fileName();
+            qWarning() << "ERROR: Couldn't restore chat from file:" << file.fileName();
            continue;
        }
        QDataStream in(&file);
        qint32 version = 0;
        if (!f.oldFile) {
            // Read and check the header
            quint32 magic;
            in >> magic;
            if (magic != CHAT_FORMAT_MAGIC) {
                qWarning() << "ERROR: Chat file has bad magic:" << file.fileName();
                continue;
            }
            QDataStream in(&file);
-            qint32 version = 0;
+            // Read the version
-            if (!f.oldFile) {
+            in >> version;
-                // Read and check the header
+            if (version < 1) {
-                quint32 magic;
+                qWarning() << "ERROR: Chat file has non supported version:" << file.fileName();
-                in >> magic;
+                continue;
                if (magic != CHAT_FORMAT_MAGIC) {
                    qWarning() << "ERROR: Chat file has bad magic:" << file.fileName();
                    continue;
                }
                // Read the version
                in >> version;
                if (version < 1) {
                    qWarning() << "ERROR: Chat file has non supported version:" << file.fileName();
                    continue;
                }
                if (version <= 1)
                    in.setVersion(QDataStream::Qt_6_2);
            }
-            qDebug() << "deserializing chat" << f.file;
+            if (version <= 1)
                in.setVersion(QDataStream::Qt_6_2);
        }
-            Chat *chat = new Chat;
+        qDebug() << "deserializing chat" << f.file;
-            chat->moveToThread(qApp->thread());
+
-            if (!chat->deserialize(in, version)) {
+        Chat *chat = new Chat;
-                qWarning() << "ERROR: Couldn't deserialize chat from file:" << file.fileName();
+        chat->moveToThread(qApp->thread());
-            } else {
+        if (!chat->deserialize(in, version)) {
-                emit chatRestored(chat);
+            qWarning() << "ERROR: Couldn't deserialize chat from file:" << file.fileName();
-            }
+        } else {
-            if (f.oldFile)
+            emit chatRestored(chat);
-               file.remove(); // No longer storing in this directory
+        }
-            file.close();
+        if (f.oldFile)
           file.remove(); // No longer storing in this directory
        file.close();
    }
    qint64 elapsedTime = timer.elapsed();
@ -249,35 +245,13 @@ void ChatListModel::restoreChat(Chat *chat)
    chat->setParent(this);
    connect(chat, &Chat::nameChanged, this, &ChatListModel::nameChanged);
-    if (m_dummyChat) {
+    beginInsertRows(QModelIndex(), m_chats.size(), m_chats.size());
-        beginResetModel();
+    m_chats.append(chat);
-        m_chats = QList<Chat*>({chat});
+    endInsertRows();
        setCurrentChat(chat);
        delete m_dummyChat;
        m_dummyChat = nullptr;
        endResetModel();
    } else {
        beginInsertRows(QModelIndex(), m_chats.size(), m_chats.size());
        m_chats.append(chat);
        endInsertRows();
    }
 }
 void ChatListModel::chatsRestoredFinished()
 {
    if (m_dummyChat) {
        beginResetModel();
        Chat *dummy = m_dummyChat;
        m_dummyChat = nullptr;
        m_chats.clear();
        addChat();
        delete dummy;
        endResetModel();
    }
    if (m_chats.isEmpty())
        addChat();
    addServerChat();
 }
--- a/gpt4all-chat/chatlistmodel.h
+++ b/gpt4all-chat/chatlistmodel.h
@ -84,7 +84,7 @@ public:
    Q_INVOKABLE void addChat()
    {
        // Don't add a new chat if we already have one
-        if (m_newChat || m_dummyChat)
+        if (m_newChat)
            return;
        // Create a new chat pointer and connect it to determine when it is populated
@ -101,18 +101,6 @@ public:
        setCurrentChat(m_newChat);
    }
    Q_INVOKABLE void addDummyChat()
    {
        // Create a new dummy chat pointer and don't connect it
        m_dummyChat = new Chat(this);
        beginInsertRows(QModelIndex(), 0, 0);
        m_chats.prepend(m_dummyChat);
        endInsertRows();
        emit countChanged();
        m_currentChat = m_dummyChat;
        emit currentChatChanged();
    }
    Q_INVOKABLE void addServerChat()
    {
        // Create a new dummy chat pointer and don't connect it
@ -252,7 +240,6 @@ private Q_SLOTS:
 private:
    Chat* m_newChat;
    Chat* m_dummyChat;
    Chat* m_serverChat;
    Chat* m_currentChat;
    QList<Chat*> m_chats;
--- a/gpt4all-chat/metadata/models2.json
+++ b/gpt4all-chat/metadata/models2.json
@ -110,17 +110,17 @@
  },
  {
    "order": "i",
-    "md5sum": "aae346fe095e60139ca39b3fda4ac7ae",
+    "md5sum": "0e769317b90ac30d6e09486d61fefa26",
    "name": "Mini Orca (Small)",
-    "filename": "orca-mini-3b.q4_0.gguf",
+    "filename": "orca-mini-3b-gguf2-q4_0.gguf",
-    "filesize": "1928648352",
+    "filesize": "1979946720",
    "requires": "2.5.0",
    "ramrequired": "4",
    "parameters": "3 billion",
    "quant": "q4_0",
    "type": "OpenLLaMa",
-    "description": "<strong>Small version of new model with novel dataset</strong><br><ul><li>Instruction based<li>Explain tuned datasets<li>Orca Research Paper dataset construction approaches<li>Licensed for commercial use</ul>",
+    "description": "<strong>Small version of new model with novel dataset</strong><br><ul><li>Instruction based<li>Explain tuned datasets<li>Orca Research Paper dataset construction approaches<li>Cannot be used commercially</ul>",
-    "url": "https://gpt4all.io/models/gguf/orca-mini-3b.q4_0.gguf",
+    "url": "https://gpt4all.io/models/gguf/orca-mini-3b-gguf2-q4_0.gguf",
    "promptTemplate": "### User:\n%1\n### Response:\n",
    "systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
  },
--- a/gpt4all-chat/mysettings.cpp
+++ b/gpt4all-chat/mysettings.cpp
@ -10,8 +10,7 @@
 #include <QUrl>
 static int      default_threadCount         = std::min(4, (int32_t) std::thread::hardware_concurrency());
-static bool     default_saveChats           = false;
+static bool     default_saveChatsContext    = false;
 static bool     default_saveChatGPTChats    = true;
 static bool     default_serverChat          = false;
 static QString  default_userDefaultModel    = "Application default";
 static bool     default_forceMetal          = false;
@ -103,8 +102,7 @@ void MySettings::restoreApplicationDefaults()
    setFontSize(default_fontSize);
    setDevice(default_device);
    setThreadCount(default_threadCount);
-    setSaveChats(default_saveChats);
+    setSaveChatsContext(default_saveChatsContext);
    setSaveChatGPTChats(default_saveChatGPTChats);
    setServerChat(default_serverChat);
    setModelPath(defaultLocalModelsPath());
    setUserDefaultModel(default_userDefaultModel);
@ -397,40 +395,22 @@ void MySettings::setThreadCount(int c)
    emit threadCountChanged();
 }
-bool MySettings::saveChats() const
+bool MySettings::saveChatsContext() const
 {
    QSettings setting;
    setting.sync();
-    return setting.value("saveChats", default_saveChats).toBool();
+    return setting.value("saveChatsContext", default_saveChatsContext).toBool();
 }
-void MySettings::setSaveChats(bool b)
+void MySettings::setSaveChatsContext(bool b)
 {
-    if (saveChats() == b)
+    if (saveChatsContext() == b)
        return;
    QSettings setting;
-    setting.setValue("saveChats", b);
+    setting.setValue("saveChatsContext", b);
    setting.sync();
-    emit saveChatsChanged();
+    emit saveChatsContextChanged();
 }
 bool MySettings::saveChatGPTChats() const
 {
    QSettings setting;
    setting.sync();
    return setting.value("saveChatGPTChats", default_saveChatGPTChats).toBool();
 }
 void MySettings::setSaveChatGPTChats(bool b)
 {
    if (saveChatGPTChats() == b)
        return;
    QSettings setting;
    setting.setValue("saveChatGPTChats", b);
    setting.sync();
    emit saveChatGPTChatsChanged();
 }
 bool MySettings::serverChat() const
--- a/gpt4all-chat/mysettings.h
+++ b/gpt4all-chat/mysettings.h
@ -10,8 +10,7 @@ class MySettings : public QObject
 {
    Q_OBJECT
    Q_PROPERTY(int threadCount READ threadCount WRITE setThreadCount NOTIFY threadCountChanged)
-    Q_PROPERTY(bool saveChats READ saveChats WRITE setSaveChats NOTIFY saveChatsChanged)
+    Q_PROPERTY(bool saveChatsContext READ saveChatsContext WRITE setSaveChatsContext NOTIFY saveChatsContextChanged)
    Q_PROPERTY(bool saveChatGPTChats READ saveChatGPTChats WRITE setSaveChatGPTChats NOTIFY saveChatGPTChatsChanged)
    Q_PROPERTY(bool serverChat READ serverChat WRITE setServerChat NOTIFY serverChatChanged)
    Q_PROPERTY(QString modelPath READ modelPath WRITE setModelPath NOTIFY modelPathChanged)
    Q_PROPERTY(QString userDefaultModel READ userDefaultModel WRITE setUserDefaultModel NOTIFY userDefaultModelChanged)
@ -64,10 +63,8 @@ public:
    // Application settings
    int threadCount() const;
    void setThreadCount(int c);
-    bool saveChats() const;
+    bool saveChatsContext() const;
-    void setSaveChats(bool b);
+    void setSaveChatsContext(bool b);
    bool saveChatGPTChats() const;
    void setSaveChatGPTChats(bool b);
    bool serverChat() const;
    void setServerChat(bool b);
    QString modelPath() const;
@ -122,8 +119,7 @@ Q_SIGNALS:
    void promptTemplateChanged(const ModelInfo &model);
    void systemPromptChanged(const ModelInfo &model);
    void threadCountChanged();
-    void saveChatsChanged();
+    void saveChatsContextChanged();
    void saveChatGPTChatsChanged();
    void serverChatChanged();
    void modelPathChanged();
    void userDefaultModelChanged();
--- a/gpt4all-chat/network.cpp
+++ b/gpt4all-chat/network.cpp
@ -317,16 +317,6 @@ void Network::sendNetworkToggled(bool isActive)
    sendMixpanelEvent("network_toggled", QVector<KeyValue>{kv});
 }
 void Network::sendSaveChatsToggled(bool isActive)
 {
    if (!MySettings::globalInstance()->networkUsageStatsActive())
        return;
    KeyValue kv;
    kv.key = QString("isActive");
    kv.value = QJsonValue(isActive);
    sendMixpanelEvent("savechats_toggled", QVector<KeyValue>{kv});
 }
 void Network::sendNewChat(int count)
 {
    if (!MySettings::globalInstance()->networkUsageStatsActive())
--- a/gpt4all-chat/network.h
+++ b/gpt4all-chat/network.h
@ -38,7 +38,6 @@ public Q_SLOTS:
    void sendDownloadFinished(const QString &model, bool success);
    Q_INVOKABLE void sendSettingsDialog();
    Q_INVOKABLE void sendNetworkToggled(bool active);
    Q_INVOKABLE void sendSaveChatsToggled(bool active);
    Q_INVOKABLE void sendNewChat(int count);
    Q_INVOKABLE void sendRemoveChat();
    Q_INVOKABLE void sendRenameChat();
--- a/gpt4all-chat/qml/ApplicationSettings.qml
+++ b/gpt4all-chat/qml/ApplicationSettings.qml
@ -234,53 +234,35 @@ MySettingsTab {
            Accessible.description: ToolTip.text
        }
        Label {
-            id: saveChatsLabel
+            id: saveChatsContextLabel
-            text: qsTr("Save chats to disk:")
+            text: qsTr("Save chats context to disk:")
            color: theme.textColor
            font.pixelSize: theme.fontSizeLarge
            Layout.row: 7
            Layout.column: 0
        }
        MyCheckBox {
-            id: saveChatsBox
+            id: saveChatsContextBox
            Layout.row: 7
            Layout.column: 1
-            checked: MySettings.saveChats
+            checked: MySettings.saveChatsContext
            onClicked: {
-                Network.sendSaveChatsToggled(saveChatsBox.checked);
+                MySettings.saveChatsContext = !MySettings.saveChatsContext
                MySettings.saveChats = !MySettings.saveChats
            }
            ToolTip.text: qsTr("WARNING: Saving chats to disk can be ~2GB per chat")
            ToolTip.visible: hovered
        }
        Label {
            id: saveChatGPTChatsLabel
            text: qsTr("Save ChatGPT chats to disk:")
            color: theme.textColor
            font.pixelSize: theme.fontSizeLarge
            Layout.row: 8
            Layout.column: 0
        }
        MyCheckBox {
            id: saveChatGPTChatsBox
            Layout.row: 8
            Layout.column: 1
            checked: MySettings.saveChatGPTChats
            onClicked: {
                MySettings.saveChatGPTChats = !MySettings.saveChatGPTChats
            }
        }
        Label {
            id: serverChatLabel
            text: qsTr("Enable API server:")
            color: theme.textColor
            font.pixelSize: theme.fontSizeLarge
-            Layout.row: 9
+            Layout.row: 8
            Layout.column: 0
        }
        MyCheckBox {
            id: serverChatBox
-            Layout.row: 9
+            Layout.row: 8
            Layout.column: 1
            checked: MySettings.serverChat
            onClicked: {
@ -290,7 +272,7 @@ MySettingsTab {
            ToolTip.visible: hovered
        }
        Rectangle {
-            Layout.row: 10
+            Layout.row: 9
            Layout.column: 0
            Layout.columnSpan: 3
            Layout.fillWidth: true
Author	SHA1	Message	Date
Aaron Miller	10f9b49313	update mini-orca 3b to gguf2, license Signed-off-by: Aaron Miller <apage43@ninjawhale.com>	2023-10-12 14:57:07 -04:00
Aaron Miller	2490977f89	q6k, q4_1 mat*mat	2023-10-12 14:56:54 -04:00
niansa/tuxifan	a35f1ab784	Updated chat wishlist (#1351 )	2023-10-12 14:01:44 -04:00
cebtenzzre	4d4275d1b8	python: replace deprecated pkg_resources with importlib (#1505 )	2023-10-12 13:35:27 -04:00
Alex Soto	3c45a555e9	Improves Java API signatures maintaining back compatibility	2023-10-12 07:53:12 -04:00
Aaron Miller	f39df0906e	fix embed4all filename https://discordapp.com/channels/1076964370942267462/1093558720690143283/1161778216462192692 Signed-off-by: Aaron Miller <apage43@ninjawhale.com>	2023-10-12 07:52:56 -04:00
umarmnaq	005c092943	Update README.md Signed-off-by: umarmnaq <102142660+umarmnaq@users.noreply.github.com>	2023-10-12 07:52:36 -04:00
Adam Treat	908aec27fe	Always save chats to disk, but save them as text by default. This also changes the UI behavior to always open a 'New Chat' and setting it as current instead of setting a restored chat as current. This improves usability by not requiring the user to wait if they want to immediately start chatting.	2023-10-12 07:52:11 -04:00
		`@ -1 +1 @@`
			`Subproject commit 3742085b0429cbe0ede49bcb9f891e4a5e25a724`				`Subproject commit 500689ad356a81a471a7fb68cc70f7aee5a5f56e`