Compare commits

..

No commits in common. "main" and "python-v2.8.0" have entirely different histories.

192 changed files with 17030 additions and 27291 deletions

View File

@ -1,17 +1,13 @@
version: 2.1 version: 2.1
setup: true setup: true
orbs: orbs:
path-filtering: circleci/path-filtering@1.3.0 path-filtering: circleci/path-filtering@0.0.1
workflows: workflows:
version: 2.1 version: 2.1
generate-config: generate-config:
jobs: jobs:
- path-filtering/filter: - path-filtering/filter:
filters:
tags:
only:
- /.*/
base-revision: main base-revision: main
config-path: .circleci/continue_config.yml config-path: .circleci/continue_config.yml
mapping: | mapping: |
@ -20,3 +16,4 @@ workflows:
gpt4all-bindings/python/.* run-python-workflow true gpt4all-bindings/python/.* run-python-workflow true
gpt4all-bindings/typescript/.* run-ts-workflow true gpt4all-bindings/typescript/.* run-ts-workflow true
gpt4all-chat/.* run-chat-workflow true gpt4all-chat/.* run-chat-workflow true
.* run-default-workflow true

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,3 @@
[codespell] [codespell]
ignore-words-list = blong, afterall, assistent, crasher, requestor ignore-words-list = blong, afterall, som, assistent, crasher
skip = ./.git,./gpt4all-chat/translations,*.pdf,*.svg,*.lock skip = .git,*.pdf,*.svg,*.lock,*.ts

2
.gitignore vendored
View File

@ -181,8 +181,6 @@ CMakeLists.txt.user
gpt4all-chat/models/* gpt4all-chat/models/*
build_* build_*
build-* build-*
cmake-build-*
/gpt4all-chat/tests/python/config.py
# IntelliJ # IntelliJ
.idea/ .idea/

22
.gitmodules vendored
View File

@ -1,25 +1,7 @@
[submodule "llama.cpp-mainline"] [submodule "llama.cpp-mainline"]
path = gpt4all-backend/deps/llama.cpp-mainline path = gpt4all-backend/llama.cpp-mainline
url = https://github.com/nomic-ai/llama.cpp.git url = https://github.com/nomic-ai/llama.cpp.git
branch = master branch = master
[submodule "gpt4all-chat/usearch"] [submodule "gpt4all-chat/usearch"]
path = gpt4all-chat/deps/usearch path = gpt4all-chat/usearch
url = https://github.com/nomic-ai/usearch.git url = https://github.com/nomic-ai/usearch.git
[submodule "gpt4all-chat/deps/SingleApplication"]
path = gpt4all-chat/deps/SingleApplication
url = https://github.com/nomic-ai/SingleApplication.git
[submodule "gpt4all-chat/deps/fmt"]
path = gpt4all-chat/deps/fmt
url = https://github.com/fmtlib/fmt.git
[submodule "gpt4all-chat/deps/DuckX"]
path = gpt4all-chat/deps/DuckX
url = https://github.com/nomic-ai/DuckX.git
[submodule "gpt4all-chat/deps/QXlsx"]
path = gpt4all-chat/deps/QXlsx
url = https://github.com/nomic-ai/QXlsx.git
[submodule "gpt4all-chat/deps/minja"]
path = gpt4all-chat/deps/minja
url = https://github.com/nomic-ai/minja.git
[submodule "gpt4all-chat/deps/json"]
path = gpt4all-chat/deps/json
url = https://github.com/nlohmann/json.git

View File

@ -51,6 +51,11 @@ Thiago Ramos ([@thiagojramos](https://github.com/thiagojramos))<br/>
E-mail: thiagojramos@outlook.com<br/> E-mail: thiagojramos@outlook.com<br/>
- pt\_BR translation - pt\_BR translation
Victor Emanuel ([@SINAPSA-IC](https://github.com/SINAPSA-IC))<br/>
E-mail: contact@sinapsaro.ro<br/>
Discord: `@sinapsa_ic_56124_99632`
- ro\_RO translation
不知火 Shiranui ([@supersonictw](https://github.com/supersonictw))<br/> 不知火 Shiranui ([@supersonictw](https://github.com/supersonictw))<br/>
E-mail: supersonic@livemail.tw<br/> E-mail: supersonic@livemail.tw<br/>
Discord: `@supersonictw` Discord: `@supersonictw`
@ -72,6 +77,6 @@ Discord: `@Tim453`
- Flatpak - Flatpak
Jack ([@wuodoo](https://github.com/wuodoo))<br/> Jack ([@wuodoo](https://github.com/wuodoo))<br/>
E-mail: 2296103047@qq.com<br/> E-mail: 2296103047@qq.com><br/>
Discord: `@mikage` Discord: `@mikage`
- zh\_CN translation - zh\_CN translation

105
README.md
View File

@ -1,77 +1,48 @@
<h1 align="center">GPT4All</h1> <h1 align="center">GPT4All</h1>
<p align="center"> <p align="center">GPT4All runs large language models (LLMs) privately on everyday desktops & laptops. <br> <br> No API calls or GPUs required - you can just download the application and <a href="https://docs.gpt4all.io/gpt4all_desktop/quickstart.html#quickstart">get started</a>
Now with support for DeepSeek R1 Distillations
</p>
<p align="center">
<a href="https://www.nomic.ai/gpt4all">Website</a> &bull; <a href="https://docs.gpt4all.io">Documentation</a> &bull; <a href="https://discord.gg/mGZE39AS3e">Discord</a> &bull; <a href="https://www.youtube.com/watch?v=gQcZDXRVJok">YouTube Tutorial</a>
</p>
<p align="center">
GPT4All runs large language models (LLMs) privately on everyday desktops & laptops.
</p>
<p align="center">
No API calls or GPUs required - you can just download the application and <a href="https://docs.gpt4all.io/gpt4all_desktop/quickstart.html#quickstart">get started</a>.
</p>
<p align="center">
Read about what's new in <a href="https://www.nomic.ai/blog/tag/gpt4all">our blog</a>.
</p>
<p align="center">
<a href="https://nomic.ai/gpt4all/#newsletter-form">Subscribe to the newsletter</a>
</p>
https://github.com/nomic-ai/gpt4all/assets/70534565/513a0f15-4964-4109-89e4-4f9a9011f311 https://github.com/nomic-ai/gpt4all/assets/70534565/513a0f15-4964-4109-89e4-4f9a9011f311
<p align="center">
<a href="https://gpt4all.io/installers/gpt4all-installer-win64.exe">
<img src="gpt4all-bindings/python/docs/assets/windows.png" width="80" height="80"><br>
Download for Windows
</a>
</p>
<p align="center">
<a href="https://gpt4all.io/installers/gpt4all-installer-darwin.dmg">
<img src="gpt4all-bindings/python/docs/assets/mac.png" width="85" height="100"><br>
Download for MacOS
</a>
</p>
<p align="center">
<a href="https://gpt4all.io/installers/gpt4all-installer-linux.run">
<img src="gpt4all-bindings/python/docs/assets/ubuntu.svg" width="120" height="120"><br>
Download for Ubuntu
</a>
</p>
<p align="center">
<a href='https://flathub.org/apps/io.gpt4all.gpt4all'>
<img width='240' alt='Get it on Flathub' src='https://flathub.org/api/badge?locale=en'><br>
Get it on Flathub (community maintained)
</a>
</p>
<p align="center">
<a href="https://gpt4all.io">Website</a> &bull; <a href="https://docs.gpt4all.io">Documentation</a> &bull; <a href="https://discord.gg/mGZE39AS3e">Discord</a>
</p>
<p align="center">
<a href="https://forms.nomic.ai/gpt4all-release-notes-signup">Subscribe to the newsletter</a>
</p>
<p align="center"> <p align="center">
GPT4All is made possible by our compute partner <a href="https://www.paperspace.com/">Paperspace</a>. GPT4All is made possible by our compute partner <a href="https://www.paperspace.com/">Paperspace</a>.
</p> </p>
<p align="center">
## Download Links <a href="https://www.phorm.ai/query?projectId=755eecd3-24ad-49cc-abf4-0ab84caacf63"><img src="https://img.shields.io/badge/Phorm-Ask_AI-%23F2777A.svg" alt="phorm.ai"></a>
<p>
&mdash; <a href="https://gpt4all.io/installers/gpt4all-installer-win64.exe">
<img src="gpt4all-bindings/python/docs/assets/windows.png" style="height: 1em; width: auto" /> Windows Installer
</a> &mdash;
</p>
<p>
&mdash; <a href="https://gpt4all.io/installers/gpt4all-installer-win64-arm.exe">
<img src="gpt4all-bindings/python/docs/assets/windows.png" style="height: 1em; width: auto" /> Windows ARM Installer
</a> &mdash;
</p>
<p>
&mdash; <a href="https://gpt4all.io/installers/gpt4all-installer-darwin.dmg">
<img src="gpt4all-bindings/python/docs/assets/mac.png" style="height: 1em; width: auto" /> macOS Installer
</a> &mdash;
</p>
<p>
&mdash; <a href="https://gpt4all.io/installers/gpt4all-installer-linux.run">
<img src="gpt4all-bindings/python/docs/assets/ubuntu.svg" style="height: 1em; width: auto" /> Ubuntu Installer
</a> &mdash;
</p>
<p>
The Windows and Linux builds require Intel Core i3 2nd Gen / AMD Bulldozer, or better.
</p>
<p>
The Windows ARM build supports Qualcomm Snapdragon and Microsoft SQ1/SQ2 processors.
</p>
<p>
The Linux build is x86-64 only (no ARM).
</p>
<p>
The macOS build requires Monterey 12.6 or newer. Best results with Apple Silicon M-series processors.
</p>
See the full [System Requirements](gpt4all-chat/system_requirements.md) for more details.
<br/>
<br/>
<p>
<a href='https://flathub.org/apps/io.gpt4all.gpt4all'>
<img style="height: 2em; width: auto" alt='Get it on Flathub' src='https://flathub.org/api/badge'><br/>
Flathub (community maintained)
</a>
</p> </p>
## Install GPT4All Python ## Install GPT4All Python
@ -104,7 +75,7 @@ with model.chat_session():
- Improved user workflow for LocalDocs - Improved user workflow for LocalDocs
- Expanded access to more model architectures - Expanded access to more model architectures
- **October 19th, 2023**: GGUF Support Launches with Support for: - **October 19th, 2023**: GGUF Support Launches with Support for:
- Mistral 7b base model, an updated model gallery on our website, several new local code models including Rift Coder v1.5 - Mistral 7b base model, an updated model gallery on [gpt4all.io](https://gpt4all.io), several new local code models including Rift Coder v1.5
- [Nomic Vulkan](https://blog.nomic.ai/posts/gpt4all-gpu-inference-with-vulkan) support for Q4\_0 and Q4\_1 quantizations in GGUF. - [Nomic Vulkan](https://blog.nomic.ai/posts/gpt4all-gpu-inference-with-vulkan) support for Q4\_0 and Q4\_1 quantizations in GGUF.
- Offline build support for running old versions of the GPT4All Local LLM Chat Client. - Offline build support for running old versions of the GPT4All Local LLM Chat Client.
- **September 18th, 2023**: [Nomic Vulkan](https://blog.nomic.ai/posts/gpt4all-gpu-inference-with-vulkan) launches supporting local LLM inference on NVIDIA and AMD GPUs. - **September 18th, 2023**: [Nomic Vulkan](https://blog.nomic.ai/posts/gpt4all-gpu-inference-with-vulkan) launches supporting local LLM inference on NVIDIA and AMD GPUs.

View File

@ -1,41 +0,0 @@
function(gpt4all_add_warning_options target)
if (MSVC)
return()
endif()
target_compile_options("${target}" PRIVATE
# base options
-Wall
-Wextra
# extra options
-Wcast-align
-Wextra-semi
-Wformat=2
-Wmissing-include-dirs
-Wsuggest-override
-Wvla
# errors
-Werror=format-security
-Werror=init-self
-Werror=pointer-arith
-Werror=undef
# disabled warnings
-Wno-sign-compare
-Wno-unused-parameter
)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
target_compile_options("${target}" PRIVATE
-Wduplicated-branches
-Wduplicated-cond
-Wlogical-op
-Wno-reorder
-Wno-null-dereference
)
elseif (CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
target_compile_options("${target}" PRIVATE
-Wunreachable-code-break
-Wunreachable-code-return
-Werror=pointer-integer-compare
-Wno-reorder-ctor
)
endif()
endfunction()

View File

@ -1,7 +1,4 @@
cmake_minimum_required(VERSION 3.23) # for FILE_SET cmake_minimum_required(VERSION 3.21) # for PROJECT_IS_TOP_LEVEL
include(../common/common.cmake)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@ -36,7 +33,7 @@ set(LLMODEL_VERSION_PATCH 0)
set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}") set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C) project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
set(BUILD_SHARED_LIBS ON) set(BUILD_SHARED_LIBS ON)
@ -50,7 +47,7 @@ else()
message(STATUS "Interprocedural optimization support detected") message(STATUS "Interprocedural optimization support detected")
endif() endif()
set(DIRECTORY deps/llama.cpp-mainline) set(DIRECTORY llama.cpp-mainline)
include(llama.cpp.cmake) include(llama.cpp.cmake)
set(BUILD_VARIANTS) set(BUILD_VARIANTS)
@ -66,23 +63,9 @@ if (LLMODEL_VULKAN)
list(APPEND BUILD_VARIANTS vulkan vulkan-avxonly) list(APPEND BUILD_VARIANTS vulkan vulkan-avxonly)
endif() endif()
if (LLMODEL_CUDA) if (LLMODEL_CUDA)
cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES if (DEFINED CMAKE_CUDA_ARCHITECTURES)
set(GGML_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}")
# Defaults must be set before enable_language(CUDA).
# Keep this in sync with the arch list in ggml/src/CMakeLists.txt (plus 5.0 for non-F16 branch).
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
# 61 == integer CUDA intrinsics
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics
else()
set(CMAKE_CUDA_ARCHITECTURES "50;52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
endif()
endif() endif()
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
include(CheckLanguage) include(CheckLanguage)
check_language(CUDA) check_language(CUDA)
@ -97,6 +80,8 @@ if (LLMODEL_ROCM)
list(APPEND BUILD_VARIANTS rocm rocm-avxonly) list(APPEND BUILD_VARIANTS rocm rocm-avxonly)
endif() endif()
set(CMAKE_VERBOSE_MAKEFILE ON)
# Go through each build variant # Go through each build variant
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS) foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
# Determine flags # Determine flags
@ -129,10 +114,6 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
# Include GGML # Include GGML
include_ggml(-mainline-${BUILD_VARIANT}) include_ggml(-mainline-${BUILD_VARIANT})
if (BUILD_VARIANT MATCHES metal)
set(GGML_METALLIB "${GGML_METALLIB}" PARENT_SCOPE)
endif()
# Function for preparing individual implementations # Function for preparing individual implementations
function(prepare_target TARGET_NAME BASE_LIB) function(prepare_target TARGET_NAME BASE_LIB)
set(TARGET_NAME ${TARGET_NAME}-${BUILD_VARIANT}) set(TARGET_NAME ${TARGET_NAME}-${BUILD_VARIANT})
@ -151,13 +132,9 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
# Add each individual implementations # Add each individual implementations
add_library(llamamodel-mainline-${BUILD_VARIANT} SHARED add_library(llamamodel-mainline-${BUILD_VARIANT} SHARED
src/llamamodel.cpp src/llmodel_shared.cpp) llamamodel.cpp llmodel_shared.cpp)
gpt4all_add_warning_options(llamamodel-mainline-${BUILD_VARIANT})
target_compile_definitions(llamamodel-mainline-${BUILD_VARIANT} PRIVATE target_compile_definitions(llamamodel-mainline-${BUILD_VARIANT} PRIVATE
LLAMA_VERSIONS=>=3 LLAMA_DATE=999999) LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
target_include_directories(llamamodel-mainline-${BUILD_VARIANT} PRIVATE
src include/gpt4all-backend
)
prepare_target(llamamodel-mainline llama-mainline) prepare_target(llamamodel-mainline llama-mainline)
if (NOT PROJECT_IS_TOP_LEVEL AND BUILD_VARIANT STREQUAL cuda) if (NOT PROJECT_IS_TOP_LEVEL AND BUILD_VARIANT STREQUAL cuda)
@ -166,20 +143,11 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
endforeach() endforeach()
add_library(llmodel add_library(llmodel
src/dlhandle.cpp llmodel.h llmodel.cpp llmodel_shared.cpp
src/llmodel.cpp llmodel_c.h llmodel_c.cpp
src/llmodel_c.cpp dlhandle.cpp
src/llmodel_shared.cpp
)
gpt4all_add_warning_options(llmodel)
target_sources(llmodel PUBLIC
FILE_SET public_headers TYPE HEADERS BASE_DIRS include
FILES include/gpt4all-backend/llmodel.h
include/gpt4all-backend/llmodel_c.h
include/gpt4all-backend/sysinfo.h
) )
target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}") target_compile_definitions(llmodel PRIVATE LIB_FILE_EXT="${CMAKE_SHARED_LIBRARY_SUFFIX}")
target_include_directories(llmodel PRIVATE src include/gpt4all-backend)
set_target_properties(llmodel PROPERTIES set_target_properties(llmodel PROPERTIES
VERSION ${PROJECT_VERSION} VERSION ${PROJECT_VERSION}

View File

@ -27,7 +27,7 @@ Unfortunately, no for three reasons:
# What is being done to make them more compatible? # What is being done to make them more compatible?
A few things. Number one, we are maintaining compatibility with our current model zoo by way of the submodule pinning. However, we are also exploring how we can update to newer versions of llama.cpp without breaking our current models. This might involve an additional magic header check or it could possibly involve keeping the currently pinned submodule and also adding a new submodule with later changes and differentiating them with namespaces or some other manner. Investigations continue. A few things. Number one, we are maintaining compatibility with our current model zoo by way of the submodule pinning. However, we are also exploring how we can update to newer versions of llama.cpp without breaking our current models. This might involve an additional magic header check or it could possibly involve keeping the currently pinned submodule and also adding a new submodule with later changes and differienting them with namespaces or some other manner. Investigations continue.
# What about GPU inference? # What about GPU inference?

@ -1 +0,0 @@
Subproject commit 11f734c3b0334dbae4823b4a7467764e447fc6d6

@ -0,0 +1 @@
Subproject commit add387854ea73d83770a62282089dea666fa266f

View File

@ -378,7 +378,19 @@ function(include_ggml SUFFIX)
find_package(CUDAToolkit REQUIRED) find_package(CUDAToolkit REQUIRED)
set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE) set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE)
# architectures are set in gpt4all-backend/CMakeLists.txt if (NOT DEFINED GGML_CUDA_ARCHITECTURES)
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
# 61 == integer CUDA intrinsics
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
set(GGML_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics
else()
set(GGML_CUDA_ARCHITECTURES "52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
#set(GGML_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
endif()
endif()
message(STATUS "Using CUDA architectures: ${GGML_CUDA_ARCHITECTURES}")
set(GGML_HEADERS_CUDA ${DIRECTORY}/ggml/include/ggml-cuda.h) set(GGML_HEADERS_CUDA ${DIRECTORY}/ggml/include/ggml-cuda.h)
file(GLOB GGML_HEADERS_CUDA "${DIRECTORY}/ggml/src/ggml-cuda/*.cuh") file(GLOB GGML_HEADERS_CUDA "${DIRECTORY}/ggml/src/ggml-cuda/*.cuh")
@ -811,8 +823,7 @@ function(include_ggml SUFFIX)
list(APPEND XC_FLAGS -std=${GGML_METAL_STD}) list(APPEND XC_FLAGS -std=${GGML_METAL_STD})
endif() endif()
set(GGML_METALLIB "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib") set(GGML_METALLIB ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib)
set(GGML_METALLIB "${GGML_METALLIB}" PARENT_SCOPE)
add_custom_command( add_custom_command(
OUTPUT ${GGML_METALLIB} OUTPUT ${GGML_METALLIB}
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
@ -823,6 +834,7 @@ function(include_ggml SUFFIX)
DEPENDS ${DIRECTORY}/ggml/src/ggml-metal.metal ${DIRECTORY}/ggml/src/ggml-common.h DEPENDS ${DIRECTORY}/ggml/src/ggml-metal.metal ${DIRECTORY}/ggml/src/ggml-common.h
COMMENT "Compiling Metal kernels" COMMENT "Compiling Metal kernels"
) )
set_source_files_properties(${GGML_METALLIB} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES GENERATED ON)
add_custom_target( add_custom_target(
ggml-metal ALL ggml-metal ALL
@ -978,13 +990,10 @@ function(include_ggml SUFFIX)
add_library(llama${SUFFIX} STATIC add_library(llama${SUFFIX} STATIC
${DIRECTORY}/include/llama.h ${DIRECTORY}/include/llama.h
${DIRECTORY}/src/llama-grammar.cpp
${DIRECTORY}/src/llama-sampling.cpp
${DIRECTORY}/src/llama-vocab.cpp
${DIRECTORY}/src/llama.cpp ${DIRECTORY}/src/llama.cpp
${DIRECTORY}/src/unicode-data.cpp
${DIRECTORY}/src/unicode.cpp
${DIRECTORY}/src/unicode.h ${DIRECTORY}/src/unicode.h
${DIRECTORY}/src/unicode.cpp
${DIRECTORY}/src/unicode-data.cpp
) )
target_include_directories(llama${SUFFIX} PUBLIC ${DIRECTORY}/include ${DIRECTORY}/ggml/include) target_include_directories(llama${SUFFIX} PUBLIC ${DIRECTORY}/include ${DIRECTORY}/ggml/include)
@ -1009,6 +1018,9 @@ function(include_ggml SUFFIX)
C_STANDARD 11 C_STANDARD 11
C_STANDARD_REQUIRED true C_STANDARD_REQUIRED true
) )
if (GGML_CUDA_ARCHITECTURES)
set_property(TARGET ggml${SUFFIX} llama${SUFFIX} PROPERTY CUDA_ARCHITECTURES "${GGML_CUDA_ARCHITECTURES}")
endif()
target_compile_options(ggml${SUFFIX} PRIVATE "${GGML_COMPILE_OPTS}") target_compile_options(ggml${SUFFIX} PRIVATE "${GGML_COMPILE_OPTS}")
target_compile_options(llama${SUFFIX} PRIVATE "${GGML_COMPILE_OPTS}") target_compile_options(llama${SUFFIX} PRIVATE "${GGML_COMPILE_OPTS}")

View File

@ -2,7 +2,6 @@
#include "llamamodel_impl.h" #include "llamamodel_impl.h"
#include "llmodel.h" #include "llmodel.h"
#include "utils.h"
#include <ggml.h> #include <ggml.h>
#include <llama.h> #include <llama.h>
@ -53,8 +52,6 @@ static const std::vector<const char *> KNOWN_ARCHES {
"gpt2", "gpt2",
// "gptj", -- no inference code // "gptj", -- no inference code
"gptneox", "gptneox",
"granite",
"granitemoe",
"mpt", "mpt",
"baichuan", "baichuan",
"starcoder", "starcoder",
@ -82,7 +79,6 @@ static const std::vector<const char *> KNOWN_ARCHES {
"command-r", "command-r",
// "dbrx", -- 16x12B parameters // "dbrx", -- 16x12B parameters
"olmo", "olmo",
"olmoe",
"openelm", "openelm",
// "arctic", -- 10B+128x3.66B parameters // "arctic", -- 10B+128x3.66B parameters
"deepseek2", "deepseek2",
@ -107,34 +103,26 @@ static bool llama_verbose()
return var && *var; return var && *var;
} }
static void llama_log_callback(ggml_log_level level, const char *text, void *userdata, bool warn) static void llama_log_callback(enum ggml_log_level level, const char *text, void *userdata)
{ {
(void)userdata; (void)userdata;
if (llama_verbose() || level <= GGML_LOG_LEVEL_ERROR) {
static ggml_log_level lastlevel = GGML_LOG_LEVEL_NONE; fputs(text, stderr);
if (!llama_verbose()) {
auto efflevel = level == GGML_LOG_LEVEL_CONT ? lastlevel : level;
lastlevel = efflevel;
switch (efflevel) {
case GGML_LOG_LEVEL_CONT:
UNREACHABLE();
break;
case GGML_LOG_LEVEL_WARN:
if (warn) break;
[[fallthrough]];
case GGML_LOG_LEVEL_NONE: // not used?
case GGML_LOG_LEVEL_INFO:
case GGML_LOG_LEVEL_DEBUG:
return; // suppress
case GGML_LOG_LEVEL_ERROR:
;
}
} }
fputs(text, stderr);
} }
#ifdef GGML_USE_CUDA
static void cuda_log_callback(enum ggml_log_level level, const char *text, void *userdata)
{
(void)userdata;
if (llama_verbose() || level <= GGML_LOG_LEVEL_WARN) {
fputs(text, stderr);
}
}
#endif
struct gpt_params { struct gpt_params {
int32_t seed = -1; // RNG seed
int32_t n_keep = 0; // number of tokens to keep from initial prompt int32_t n_keep = 0; // number of tokens to keep from initial prompt
// sampling parameters // sampling parameters
@ -149,6 +137,36 @@ struct gpt_params {
bool use_mlock = false; // use mlock to keep model in memory bool use_mlock = false; // use mlock to keep model in memory
}; };
static int llama_sample_top_p_top_k(
llama_context *ctx,
const llama_token *last_n_tokens_data,
int last_n_tokens_size,
int top_k,
float top_p,
float min_p,
float temp,
float repeat_penalty) {
auto logits = llama_get_logits_ith(ctx, -1);
auto n_vocab = llama_n_vocab(llama_get_model(ctx));
// Populate initial list of all candidates
std::vector<llama_token_data> candidates;
candidates.reserve(n_vocab);
for (int token_id = 0; token_id < n_vocab; token_id++) {
candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
}
llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false};
// Sample repeat penalty
llama_sample_repetition_penalties(nullptr, &candidates_p, last_n_tokens_data, last_n_tokens_size, repeat_penalty, 0.0f, 0.0f);
// Temperature sampling
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
llama_sample_tail_free(ctx, &candidates_p, 1.0f, 1);
llama_sample_typical(ctx, &candidates_p, 1.0f, 1);
llama_sample_top_p(ctx, &candidates_p, top_p, 1);
llama_sample_min_p(ctx, &candidates_p, min_p, 1);
llama_sample_temp(ctx, &candidates_p, temp);
return llama_sample_token(ctx, &candidates_p);
}
const char *get_arch_name(gguf_context *ctx_gguf) const char *get_arch_name(gguf_context *ctx_gguf)
{ {
const int kid = gguf_find_key(ctx_gguf, "general.architecture"); const int kid = gguf_find_key(ctx_gguf, "general.architecture");
@ -205,7 +223,7 @@ static int32_t get_arch_key_u32(std::string const &modelPath, std::string const
if (keyidx != -1) { if (keyidx != -1) {
value = gguf_get_val_u32(ctx, keyidx); value = gguf_get_val_u32(ctx, keyidx);
} else { } else {
std::cerr << __func__ << ": " << key << " not found in " << modelPath << "\n"; std::cerr << __func__ << ": " << key << "not found in " << modelPath << "\n";
} }
} }
@ -215,27 +233,21 @@ cleanup:
} }
struct LLamaPrivate { struct LLamaPrivate {
bool modelLoaded = false; const std::string modelPath;
int device = -1; bool modelLoaded = false;
std::string deviceName; int device = -1;
int64_t n_threads = 0; std::string deviceName;
std::vector<LLModel::Token> end_tokens; llama_model *model = nullptr;
const char *backend_name = nullptr; llama_context *ctx = nullptr;
std::vector<LLModel::Token> inputTokens; llama_model_params model_params;
llama_context_params ctx_params;
llama_model *model = nullptr; int64_t n_threads = 0;
llama_context *ctx = nullptr; std::vector<LLModel::Token> end_tokens;
llama_model_params model_params; const char *backend_name = nullptr;
llama_context_params ctx_params;
llama_sampler *sampler_chain;
}; };
LLamaModel::LLamaModel() LLamaModel::LLamaModel()
: d_ptr(std::make_unique<LLamaPrivate>()) : d_ptr(new LLamaPrivate) {}
{
auto sparams = llama_sampler_chain_default_params();
d_ptr->sampler_chain = llama_sampler_chain_init(sparams);
}
// default hparams (LLaMA 7B) // default hparams (LLaMA 7B)
struct llama_file_hparams { struct llama_file_hparams {
@ -424,9 +436,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
} }
} }
d_ptr->ctx_params.n_ctx = n_ctx; d_ptr->ctx_params.n_ctx = n_ctx;
d_ptr->ctx_params.type_k = params.kv_type; d_ptr->ctx_params.seed = params.seed;
d_ptr->ctx_params.type_v = params.kv_type; d_ptr->ctx_params.type_k = params.kv_type;
d_ptr->ctx_params.type_v = params.kv_type;
// The new batch API provides space for n_vocab*n_tokens logits. Tell llama.cpp early // The new batch API provides space for n_vocab*n_tokens logits. Tell llama.cpp early
// that we want this many logits so the state serializes consistently. // that we want this many logits so the state serializes consistently.
@ -492,7 +505,6 @@ LLamaModel::~LLamaModel()
llama_free(d_ptr->ctx); llama_free(d_ptr->ctx);
} }
llama_free_model(d_ptr->model); llama_free_model(d_ptr->model);
llama_sampler_free(d_ptr->sampler_chain);
} }
bool LLamaModel::isModelLoaded() const bool LLamaModel::isModelLoaded() const
@ -502,41 +514,38 @@ bool LLamaModel::isModelLoaded() const
size_t LLamaModel::stateSize() const size_t LLamaModel::stateSize() const
{ {
return llama_state_get_size(d_ptr->ctx); return llama_get_state_size(d_ptr->ctx);
} }
size_t LLamaModel::saveState(std::span<uint8_t> stateOut, std::vector<Token> &inputTokensOut) const size_t LLamaModel::saveState(uint8_t *dest) const
{ {
size_t bytesWritten = llama_state_get_data(d_ptr->ctx, stateOut.data(), stateOut.size()); return llama_copy_state_data(d_ptr->ctx, dest);
if (bytesWritten)
inputTokensOut.assign(d_ptr->inputTokens.begin(), d_ptr->inputTokens.end());
return bytesWritten;
} }
size_t LLamaModel::restoreState(std::span<const uint8_t> state, std::span<const Token> inputTokens) size_t LLamaModel::restoreState(const uint8_t *src)
{ {
size_t bytesRead = llama_state_set_data(d_ptr->ctx, state.data(), state.size()); // const_cast is required, see: https://github.com/ggerganov/llama.cpp/pull/1540
if (bytesRead) return llama_set_state_data(d_ptr->ctx, const_cast<uint8_t*>(src));
d_ptr->inputTokens.assign(inputTokens.begin(), inputTokens.end());
return bytesRead;
} }
std::vector<LLModel::Token> LLamaModel::tokenize(std::string_view str) const std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, const std::string &str, bool special)
{ {
bool atStart = m_tokenize_last_token == -1;
bool insertSpace = atStart || (
llama_token_get_attr(d_ptr->model, m_tokenize_last_token)
& (LLAMA_TOKEN_ATTR_CONTROL | LLAMA_TOKEN_ATTR_USER_DEFINED | LLAMA_TOKEN_ATTR_UNKNOWN)
);
std::vector<LLModel::Token> fres(str.length() + 4); std::vector<LLModel::Token> fres(str.length() + 4);
int32_t fres_len = llama_tokenize( int32_t fres_len = llama_tokenize_gpt4all(
d_ptr->model, str.data(), str.length(), fres.data(), fres.size(), /*add_special*/ true, /*parse_special*/ true d_ptr->model, str.c_str(), str.length(), fres.data(), fres.size(), /*add_special*/ atStart,
/*parse_special*/ special, /*insert_space*/ insertSpace
); );
fres.resize(fres_len); fres.resize(fres_len);
if (fres_len)
m_tokenize_last_token = fres.back();
return fres; return fres;
} }
bool LLamaModel::isSpecialToken(Token id) const
{
return llama_token_get_attr(d_ptr->model, id)
& (LLAMA_TOKEN_ATTR_CONTROL | LLAMA_TOKEN_ATTR_USER_DEFINED | LLAMA_TOKEN_ATTR_UNKNOWN);
}
std::string LLamaModel::tokenToString(Token id) const std::string LLamaModel::tokenToString(Token id) const
{ {
std::vector<char> result(8, 0); std::vector<char> result(8, 0);
@ -553,58 +562,18 @@ std::string LLamaModel::tokenToString(Token id) const
return std::string(result.data(), result.size()); return std::string(result.data(), result.size());
} }
void LLamaModel::initSampler(const PromptContext &promptCtx) LLModel::Token LLamaModel::sampleToken(PromptContext &promptCtx) const
{ {
auto *model = d_ptr->model; const size_t n_prev_toks = std::min((size_t) promptCtx.repeat_last_n, promptCtx.tokens.size());
auto *chain = d_ptr->sampler_chain; return llama_sample_top_p_top_k(d_ptr->ctx,
promptCtx.tokens.data() + promptCtx.tokens.size() - n_prev_toks,
// clear sampler chain n_prev_toks, promptCtx.top_k, promptCtx.top_p, promptCtx.min_p, promptCtx.temp,
for (int i = llama_sampler_chain_n(chain) - 1; i >= 0; i--) { promptCtx.repeat_penalty);
auto *smpl = llama_sampler_chain_remove(chain, i);
llama_sampler_free(smpl);
}
// build new chain
llama_sampler_chain_add(chain,
llama_sampler_init_penalties(
llama_n_vocab(model),
llama_token_eos(model),
llama_token_nl(model),
promptCtx.repeat_last_n,
promptCtx.repeat_penalty,
// TODO(jared): consider making the below configurable
/*penalty_freq*/ 0.0f,
/*penalty_present*/ 0.0f,
/*penalize_nl*/ true,
/*ignore_eos*/ false
)
);
if (promptCtx.temp == 0.0f) {
llama_sampler_chain_add(chain, llama_sampler_init_greedy());
} else {
struct llama_sampler *samplers[] = {
llama_sampler_init_top_k(promptCtx.top_k),
llama_sampler_init_top_p(promptCtx.top_p, 1),
llama_sampler_init_min_p(promptCtx.min_p, 1),
llama_sampler_init_temp(promptCtx.temp),
llama_sampler_init_softmax(),
llama_sampler_init_dist(LLAMA_DEFAULT_SEED),
};
for (auto *smpl : samplers)
llama_sampler_chain_add(chain, smpl);
}
} }
LLModel::Token LLamaModel::sampleToken() const bool LLamaModel::evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const
{ {
return llama_sampler_sample(d_ptr->sampler_chain, d_ptr->ctx, -1); llama_kv_cache_seq_rm(d_ptr->ctx, 0, ctx.n_past, -1);
}
bool LLamaModel::evalTokens(int32_t nPast, std::span<const Token> tokens) const
{
assert(!tokens.empty());
llama_kv_cache_seq_rm(d_ptr->ctx, 0, nPast, -1);
llama_batch batch = llama_batch_init(tokens.size(), 0, 1); llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
@ -612,7 +581,7 @@ bool LLamaModel::evalTokens(int32_t nPast, std::span<const Token> tokens) const
for (int32_t i = 0; i < batch.n_tokens; i++) { for (int32_t i = 0; i < batch.n_tokens; i++) {
batch.token [i] = tokens[i]; batch.token [i] = tokens[i];
batch.pos [i] = nPast + i; batch.pos [i] = ctx.n_past + i;
batch.n_seq_id[i] = 1; batch.n_seq_id[i] = 1;
batch.seq_id [i][0] = 0; batch.seq_id [i][0] = 0;
batch.logits [i] = false; batch.logits [i] = false;
@ -626,86 +595,11 @@ bool LLamaModel::evalTokens(int32_t nPast, std::span<const Token> tokens) const
return res == 0; return res == 0;
} }
void LLamaModel::shiftContext(const PromptContext &promptCtx, int32_t *nPast)
{
// infinite text generation via context shifting
// erase up to n_ctx*contextErase tokens
int n_keep = shouldAddBOS();
int n_past = *nPast;
int n_discard = std::min(n_past - n_keep, int(contextLength() * promptCtx.contextErase));
assert(n_discard > 0);
if (n_discard <= 0)
return;
std::cerr << "Llama: context full, swapping: n_past = " << n_past << ", n_keep = " << n_keep
<< ", n_discard = " << n_discard << "\n";
// erase the first n_discard tokens from the context
llama_kv_cache_seq_rm (d_ptr->ctx, 0, n_keep, n_keep + n_discard);
llama_kv_cache_seq_add(d_ptr->ctx, 0, n_keep + n_discard, n_past, -n_discard);
auto &inp = d_ptr->inputTokens;
inp.erase(inp.begin() + n_keep, inp.begin() + n_keep + n_discard);
*nPast = inp.size();
}
int32_t LLamaModel::contextLength() const int32_t LLamaModel::contextLength() const
{ {
return llama_n_ctx(d_ptr->ctx); return llama_n_ctx(d_ptr->ctx);
} }
auto LLamaModel::specialTokens() -> std::unordered_map<std::string, std::string> const
{
if (!d_ptr->model)
throw std::logic_error("model not loaded");
std::unordered_map<std::string, std::string> tokens;
if (auto id = llama_token_bos(d_ptr->model); id != LLAMA_TOKEN_NULL)
tokens.emplace("bos_token", tokenToString(id));
if (auto id = llama_token_eos(d_ptr->model); id != LLAMA_TOKEN_NULL)
tokens.emplace("eos_token", tokenToString(id));
return tokens;
}
int32_t LLamaModel::inputLength() const
{
return d_ptr->inputTokens.size();
}
int32_t LLamaModel::computeModelInputPosition(std::span<const Token> input) const
{
// find common prefix
auto cacheIt = d_ptr->inputTokens.begin();
auto inputIt = input.begin();
while (cacheIt < d_ptr->inputTokens.end() && inputIt < input.end() && *cacheIt == *inputIt) {
++cacheIt; ++inputIt;
}
// tell the caller to ignore the tokens between [begin, inputIt)
return inputIt - input.begin();
}
void LLamaModel::setModelInputPosition(int32_t pos)
{
auto &inp = d_ptr->inputTokens;
assert(pos >= 0);
assert(pos <= inp.size());
// truncate token cache to end at the new n_past
if (pos < inp.size())
inp.resize(pos);
}
void LLamaModel::appendInputToken(Token tok)
{
d_ptr->inputTokens.push_back(tok);
}
auto LLamaModel::inputTokens() const -> std::span<const Token>
{
return d_ptr->inputTokens;
}
const std::vector<LLModel::Token> &LLamaModel::endTokens() const const std::vector<LLModel::Token> &LLamaModel::endTokens() const
{ {
return d_ptr->end_tokens; return d_ptr->end_tokens;
@ -726,37 +620,6 @@ int32_t LLamaModel::layerCount(std::string const &modelPath) const
return get_arch_key_u32(modelPath, "block_count"); return get_arch_key_u32(modelPath, "block_count");
} }
// TODO(jared): reduce redundant code and operations by combining all metadata getters for unloaded
// models into a class that keeps the model file open
auto LLamaModel::chatTemplate(const char *modelPath) const -> std::expected<std::string, std::string>
{
auto *ctx = load_gguf(modelPath);
if (!ctx)
return std::unexpected("failed to open model file");
std::expected<std::string, std::string> result;
enum gguf_type ktype;
const int kid = gguf_find_key(ctx, "tokenizer.chat_template");
if (kid == -1) {
result = std::unexpected("key not found");
goto cleanup;
}
ktype = gguf_get_kv_type(ctx, kid);
if (ktype != GGUF_TYPE_STRING) {
result = std::unexpected(
"expected key type STRING (" + std::to_string(GGUF_TYPE_STRING) + "), got " + std::to_string(ktype)
);
goto cleanup;
}
result = gguf_get_val_str(ctx, kid);
cleanup:
gguf_free(ctx);
return result;
}
#ifdef GGML_USE_VULKAN #ifdef GGML_USE_VULKAN
static const char *getVulkanVendorName(uint32_t vendorID) static const char *getVulkanVendorName(uint32_t vendorID)
{ {
@ -1329,9 +1192,9 @@ DLL_EXPORT bool is_arch_supported(const char *arch)
DLL_EXPORT LLModel *construct() DLL_EXPORT LLModel *construct()
{ {
llama_log_set([](auto l, auto t, auto u) { llama_log_callback(l, t, u, false); }, nullptr); llama_log_set(llama_log_callback, nullptr);
#ifdef GGML_USE_CUDA #ifdef GGML_USE_CUDA
ggml_backend_cuda_log_set_callback([](auto l, auto t, auto u) { llama_log_callback(l, t, u, true); }, nullptr); ggml_backend_cuda_log_set_callback(cuda_log_callback, nullptr);
#endif #endif
return new LLamaModel; return new LLamaModel;
} }

View File

@ -6,12 +6,10 @@
#include "llmodel.h" #include "llmodel.h"
#include <functional>
#include <memory> #include <memory>
#include <span>
#include <string> #include <string>
#include <string_view>
#include <vector> #include <vector>
#include <unordered_map>
struct LLamaPrivate; struct LLamaPrivate;
struct EmbModelSpec; struct EmbModelSpec;
@ -29,8 +27,8 @@ public:
bool isModelLoaded() const override; bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) override; size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) override;
size_t stateSize() const override; size_t stateSize() const override;
size_t saveState(std::span<uint8_t> stateOut, std::vector<Token> &inputTokensOut) const override; size_t saveState(uint8_t *dest) const override;
size_t restoreState(std::span<const uint8_t> state, std::span<const Token> inputTokens) override; size_t restoreState(const uint8_t *src) override;
void setThreadCount(int32_t n_threads) override; void setThreadCount(int32_t n_threads) override;
int32_t threadCount() const override; int32_t threadCount() const override;
std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired = 0) const override; std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired = 0) const override;
@ -49,36 +47,25 @@ public:
void embed(const std::vector<std::string> &texts, float *embeddings, bool isRetrieval, int dimensionality = -1, void embed(const std::vector<std::string> &texts, float *embeddings, bool isRetrieval, int dimensionality = -1,
size_t *tokenCount = nullptr, bool doMean = true, bool atlas = false) override; size_t *tokenCount = nullptr, bool doMean = true, bool atlas = false) override;
int32_t contextLength() const override;
auto specialTokens() -> std::unordered_map<std::string, std::string> const override;
protected:
std::vector<Token> tokenize(std::string_view str) const override;
bool isSpecialToken(Token id) const override;
std::string tokenToString(Token id) const override;
void initSampler(const PromptContext &ctx) override;
Token sampleToken() const override;
bool evalTokens(int32_t nPast, std::span<const Token> tokens) const override;
void shiftContext(const PromptContext &promptCtx, int32_t *nPast) override;
int32_t inputLength() const override;
int32_t computeModelInputPosition(std::span<const Token> input) const override;
void setModelInputPosition(int32_t pos) override;
void appendInputToken(Token tok) override;
std::span<const Token> inputTokens() const override;
const std::vector<Token> &endTokens() const override;
bool shouldAddBOS() const override;
int32_t maxContextLength(std::string const &modelPath) const override;
int32_t layerCount(std::string const &modelPath) const override;
auto chatTemplate(const char *modelPath) const -> std::expected<std::string, std::string> override;
void embedInternal(const std::vector<std::string> &texts, float *embeddings, std::string prefix, int dimensionality,
size_t *tokenCount, bool doMean, bool atlas, EmbedCancelCallback *cancelCb,
const EmbModelSpec *spec);
private: private:
std::unique_ptr<LLamaPrivate> d_ptr; std::unique_ptr<LLamaPrivate> d_ptr;
bool m_supportsEmbedding = false; bool m_supportsEmbedding = false;
bool m_supportsCompletion = false; bool m_supportsCompletion = false;
protected:
std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override;
std::string tokenToString(Token id) const override;
Token sampleToken(PromptContext &ctx) const override;
bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override;
int32_t contextLength() const override;
const std::vector<Token> &endTokens() const override;
bool shouldAddBOS() const override;
int32_t maxContextLength(std::string const &modelPath) const override;
int32_t layerCount(std::string const &modelPath) const override;
void embedInternal(const std::vector<std::string> &texts, float *embeddings, std::string prefix, int dimensionality,
size_t *tokenCount, bool doMean, bool atlas, EmbedCancelCallback *cancelCb,
const EmbModelSpec *spec);
}; };
#endif // LLAMAMODEL_H #endif // LLAMAMODEL_H

View File

@ -140,14 +140,9 @@ const std::vector<LLModel::Implementation> &LLModel::Implementation::implementat
std::string path; std::string path;
// Split the paths string by the delimiter and process each path. // Split the paths string by the delimiter and process each path.
while (std::getline(ss, path, ';')) { while (std::getline(ss, path, ';')) {
fs::directory_iterator iter; std::u8string u8_path(path.begin(), path.end());
try {
iter = fs::directory_iterator(std::u8string(path.begin(), path.end()));
} catch (const fs::filesystem_error &) {
continue; // skip nonexistent path
}
// Iterate over all libraries // Iterate over all libraries
for (const auto &f : iter) { for (const auto &f : fs::directory_iterator(u8_path)) {
const fs::path &p = f.path(); const fs::path &p = f.path();
if (p.extension() != LIB_FILE_EXT) continue; if (p.extension() != LIB_FILE_EXT) continue;
@ -331,12 +326,6 @@ bool LLModel::Implementation::isEmbeddingModel(const std::string &modelPath)
return llama && llama->isEmbeddingModel(modelPath); return llama && llama->isEmbeddingModel(modelPath);
} }
auto LLModel::Implementation::chatTemplate(const char *modelPath) -> std::expected<std::string, std::string>
{
auto *llama = constructGlobalLlama();
return llama ? llama->chatTemplate(modelPath) : std::unexpected("backend not available");
}
void LLModel::Implementation::setImplementationsSearchPath(const std::string& path) void LLModel::Implementation::setImplementationsSearchPath(const std::string& path)
{ {
s_implementations_search_path = path; s_implementations_search_path = path;

View File

@ -5,10 +5,8 @@
#include <cassert> #include <cassert>
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <expected>
#include <functional> #include <functional>
#include <optional> #include <optional>
#include <span>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <string_view> #include <string_view>
@ -25,10 +23,6 @@ using namespace std::string_literals;
class LLModel { class LLModel {
public: public:
using Token = int32_t; using Token = int32_t;
using PromptCallback = std::function<bool(std::span<const Token> batch, bool cached)>;
using ResponseCallback = std::function<bool(Token token, std::string_view piece)>;
using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);
using ProgressCallback = std::function<bool(float progress)>;
class BadArchError: public std::runtime_error { class BadArchError: public std::runtime_error {
public: public:
@ -106,7 +100,6 @@ public:
static int32_t maxContextLength(const std::string &modelPath); static int32_t maxContextLength(const std::string &modelPath);
static int32_t layerCount(const std::string &modelPath); static int32_t layerCount(const std::string &modelPath);
static bool isEmbeddingModel(const std::string &modelPath); static bool isEmbeddingModel(const std::string &modelPath);
static auto chatTemplate(const char *modelPath) -> std::expected<std::string, std::string>;
static void setImplementationsSearchPath(const std::string &path); static void setImplementationsSearchPath(const std::string &path);
static const std::string &implementationsSearchPath(); static const std::string &implementationsSearchPath();
static bool hasSupportedCPU(); static bool hasSupportedCPU();
@ -130,6 +123,9 @@ public:
}; };
struct PromptContext { struct PromptContext {
std::vector<int32_t> tokens; // current tokens in the context window
int32_t n_past = 0; // number of tokens in past conversation
int32_t n_ctx = 0; // number of tokens possible in context window
int32_t n_predict = 200; int32_t n_predict = 200;
int32_t top_k = 40; int32_t top_k = 40;
float top_p = 0.9f; float top_p = 0.9f;
@ -138,31 +134,37 @@ public:
int32_t n_batch = 9; int32_t n_batch = 9;
float repeat_penalty = 1.10f; float repeat_penalty = 1.10f;
int32_t repeat_last_n = 64; // last n tokens to penalize int32_t repeat_last_n = 64; // last n tokens to penalize
float contextErase = 0.5f; // percent of context to erase if we exceed the context window float contextErase = 0.75f; // percent of context to erase if we exceed the context window
}; };
using ProgressCallback = std::function<bool(float progress)>;
explicit LLModel() {} explicit LLModel() {}
virtual ~LLModel() {} virtual ~LLModel() {}
virtual bool supportsEmbedding() const = 0; virtual bool supportsEmbedding() const = 0;
virtual bool supportsCompletion() const = 0; virtual bool supportsCompletion() const = 0;
virtual bool loadModel(const std::string &modelPath, int n_ctx, int ngl) = 0; virtual bool loadModel(const std::string &modelPath, int n_ctx, int ngl) = 0;
virtual bool isModelBlacklisted(const std::string &modelPath) const { (void)modelPath; return false; } virtual bool isModelBlacklisted(const std::string &modelPath) const { (void)modelPath; return false; };
virtual bool isEmbeddingModel(const std::string &modelPath) const { (void)modelPath; return false; } virtual bool isEmbeddingModel(const std::string &modelPath) const { (void)modelPath; return false; }
virtual bool isModelLoaded() const = 0; virtual bool isModelLoaded() const = 0;
virtual size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) = 0; virtual size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) = 0;
virtual size_t stateSize() const = 0; virtual size_t stateSize() const { return 0; }
virtual size_t saveState(std::span<uint8_t> stateOut, std::vector<Token> &inputTokensOut) const = 0; virtual size_t saveState(uint8_t *dest) const { (void)dest; return 0; }
virtual size_t restoreState(std::span<const uint8_t> state, std::span<const Token> inputTokens) = 0; virtual size_t restoreState(const uint8_t *src) { (void)src; return 0; }
// This method requires the model to return true from supportsCompletion otherwise it will throw // This method requires the model to return true from supportsCompletion otherwise it will throw
// an error // an error
virtual void prompt(std::string_view prompt, virtual void prompt(const std::string &prompt,
const PromptCallback &promptCallback, const std::string &promptTemplate,
const ResponseCallback &responseCallback, std::function<bool(int32_t)> promptCallback,
const PromptContext &ctx); std::function<bool(int32_t, const std::string&)> responseCallback,
std::function<bool(bool)> recalculateCallback,
PromptContext &ctx,
bool special = false,
std::string *fakeReply = nullptr);
virtual int32_t countPromptTokens(std::string_view prompt) const; using EmbedCancelCallback = bool(unsigned *batchSizes, unsigned nBatch, const char *backend);
virtual size_t embeddingSize() const { virtual size_t embeddingSize() const {
throw std::logic_error(std::string(implementation().modelType()) + " does not support embeddings"); throw std::logic_error(std::string(implementation().modelType()) + " does not support embeddings");
@ -207,24 +209,14 @@ public:
void setProgressCallback(ProgressCallback callback) { m_progressCallback = callback; } void setProgressCallback(ProgressCallback callback) { m_progressCallback = callback; }
virtual int32_t contextLength() const = 0;
virtual auto specialTokens() -> std::unordered_map<std::string, std::string> const = 0;
protected: protected:
// These are pure virtual because subclasses need to implement as the default implementation of // These are pure virtual because subclasses need to implement as the default implementation of
// 'prompt' above calls these functions // 'prompt' above calls these functions
virtual std::vector<Token> tokenize(std::string_view str) const = 0; virtual std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special = false) = 0;
virtual bool isSpecialToken(Token id) const = 0;
virtual std::string tokenToString(Token id) const = 0; virtual std::string tokenToString(Token id) const = 0;
virtual void initSampler(const PromptContext &ctx) = 0; virtual Token sampleToken(PromptContext &ctx) const = 0;
virtual Token sampleToken() const = 0; virtual bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const = 0;
virtual bool evalTokens(int32_t nPast, std::span<const Token> tokens) const = 0; virtual int32_t contextLength() const = 0;
virtual void shiftContext(const PromptContext &promptCtx, int32_t *nPast) = 0;
virtual int32_t inputLength() const = 0;
virtual int32_t computeModelInputPosition(std::span<const Token> input) const = 0;
virtual void setModelInputPosition(int32_t pos) = 0;
virtual void appendInputToken(Token tok) = 0;
virtual std::span<const Token> inputTokens() const = 0;
virtual const std::vector<Token> &endTokens() const = 0; virtual const std::vector<Token> &endTokens() const = 0;
virtual bool shouldAddBOS() const = 0; virtual bool shouldAddBOS() const = 0;
@ -240,11 +232,9 @@ protected:
return -1; return -1;
} }
virtual auto chatTemplate(const char *modelPath) const -> std::expected<std::string, std::string> // This is a helper function called from the default implementation of 'prompt' but it can be
{ // shared by all base classes so it isn't virtual
(void)modelPath; void recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate);
return std::unexpected("not implemented");
}
const Implementation *m_implementation = nullptr; const Implementation *m_implementation = nullptr;
@ -257,15 +247,16 @@ protected:
return true; return true;
} }
// prefill context with prompt bool decodePrompt(std::function<bool(int32_t)> promptCallback,
auto decodePrompt(const PromptCallback &promptCallback, std::function<bool(int32_t, const std::string&)> responseCallback,
const PromptContext &promptCtx, std::function<bool(bool)> recalculateCallback,
std::vector<Token> embd_inp) PromptContext &promptCtx,
-> std::optional<int32_t>; std::vector<Token> embd_inp);
// generate a response void generateResponse(std::function<bool(int32_t, const std::string&)> responseCallback,
void generateResponse(const ResponseCallback &responseCallback, std::function<bool(bool)> recalculateCallback,
const PromptContext &promptCtx, PromptContext &promptCtx);
int32_t nPast);
Token m_tokenize_last_token = -1; // not serialized
friend class LLMImplementation; friend class LLMImplementation;
}; };

View File

@ -7,20 +7,16 @@
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <exception> #include <exception>
#include <functional>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <optional> #include <optional>
#include <string> #include <string>
#include <string_view>
#include <vector> #include <vector>
#include <span>
namespace ranges = std::ranges;
static_assert(sizeof(token_t) == sizeof(LLModel::Token));
struct LLModelWrapper { struct LLModelWrapper {
LLModel *llModel = nullptr; LLModel *llModel = nullptr;
LLModel::PromptContext promptContext;
~LLModelWrapper() { delete llModel; } ~LLModelWrapper() { delete llModel; }
}; };
@ -88,80 +84,77 @@ bool llmodel_isModelLoaded(llmodel_model model)
return wrapper->llModel->isModelLoaded(); return wrapper->llModel->isModelLoaded();
} }
uint64_t llmodel_state_get_size(llmodel_model model) uint64_t llmodel_get_state_size(llmodel_model model)
{ {
auto *wrapper = static_cast<LLModelWrapper *>(model); auto *wrapper = static_cast<LLModelWrapper *>(model);
return wrapper->llModel->stateSize(); return wrapper->llModel->stateSize();
} }
uint64_t llmodel_state_get_data(llmodel_model model, uint8_t *state_out, uint64_t state_size, uint64_t llmodel_save_state_data(llmodel_model model, uint8_t *dest)
token_t **input_tokens_out, uint64_t *n_input_tokens)
{ {
auto *wrapper = static_cast<LLModelWrapper *>(model); auto *wrapper = static_cast<LLModelWrapper *>(model);
std::vector<LLModel::Token> inputTokens; return wrapper->llModel->saveState(dest);
auto bytesWritten = wrapper->llModel->saveState({state_out, size_t(state_size)}, inputTokens);
if (bytesWritten) {
auto *buf = new LLModel::Token[inputTokens.size()];
ranges::copy(inputTokens, buf);
*input_tokens_out = buf;
*n_input_tokens = uint64_t(inputTokens.size());
} else {
*input_tokens_out = nullptr;
*n_input_tokens = 0;
}
return bytesWritten;
} }
void llmodel_state_free_input_tokens(LLModel::Token *input_tokens) uint64_t llmodel_restore_state_data(llmodel_model model, const uint8_t *src)
{
delete[] input_tokens;
}
uint64_t llmodel_state_set_data(llmodel_model model, const uint8_t *state, uint64_t state_size,
const token_t *input_tokens, uint64_t n_input_tokens)
{ {
auto *wrapper = static_cast<LLModelWrapper *>(model); auto *wrapper = static_cast<LLModelWrapper *>(model);
return wrapper->llModel->restoreState({state, size_t(state_size)}, {input_tokens, size_t(n_input_tokens)}); return wrapper->llModel->restoreState(src);
} }
bool llmodel_prompt(llmodel_model model, void llmodel_prompt(llmodel_model model, const char *prompt,
const char *prompt, const char *prompt_template,
llmodel_prompt_callback prompt_callback, llmodel_prompt_callback prompt_callback,
llmodel_response_callback response_callback, llmodel_response_callback response_callback,
llmodel_prompt_context *ctx, llmodel_recalculate_callback recalculate_callback,
const char **error) llmodel_prompt_context *ctx,
bool special,
const char *fake_reply)
{ {
auto *wrapper = static_cast<LLModelWrapper *>(model); auto *wrapper = static_cast<LLModelWrapper *>(model);
auto response_func = [response_callback](int32_t token_id, const std::string &response) {
return response_callback(token_id, response.c_str());
};
// Copy the C prompt context // Copy the C prompt context
LLModel::PromptContext promptContext { wrapper->promptContext.n_past = ctx->n_past;
.n_predict = ctx->n_predict, wrapper->promptContext.n_ctx = ctx->n_ctx;
.top_k = ctx->top_k, wrapper->promptContext.n_predict = ctx->n_predict;
.top_p = ctx->top_p, wrapper->promptContext.top_k = ctx->top_k;
.min_p = ctx->min_p, wrapper->promptContext.top_p = ctx->top_p;
.temp = ctx->temp, wrapper->promptContext.min_p = ctx->min_p;
.n_batch = ctx->n_batch, wrapper->promptContext.temp = ctx->temp;
.repeat_penalty = ctx->repeat_penalty, wrapper->promptContext.n_batch = ctx->n_batch;
.repeat_last_n = ctx->repeat_last_n, wrapper->promptContext.repeat_penalty = ctx->repeat_penalty;
.contextErase = ctx->context_erase, wrapper->promptContext.repeat_last_n = ctx->repeat_last_n;
}; wrapper->promptContext.contextErase = ctx->context_erase;
auto prompt_func = [prompt_callback](std::span<const LLModel::Token> token_ids, bool cached) { std::string fake_reply_str;
return prompt_callback(token_ids.data(), token_ids.size(), cached); if (fake_reply) { fake_reply_str = fake_reply; }
}; auto *fake_reply_p = fake_reply ? &fake_reply_str : nullptr;
auto response_func = [response_callback](LLModel::Token token_id, std::string_view piece) {
return response_callback(token_id, piece.data());
};
// Call the C++ prompt method // Call the C++ prompt method
try { wrapper->llModel->prompt(prompt, prompt_template, prompt_callback, response_func, recalculate_callback,
wrapper->llModel->prompt(prompt, prompt_func, response_func, promptContext); wrapper->promptContext, special, fake_reply_p);
} catch (std::exception const &e) {
llmodel_set_error(error, e.what());
return false;
}
return true; // Update the C context by giving access to the wrappers raw pointers to std::vector data
// which involves no copies
ctx->tokens = wrapper->promptContext.tokens.data();
ctx->tokens_size = wrapper->promptContext.tokens.size();
// Update the rest of the C prompt context
ctx->n_past = wrapper->promptContext.n_past;
ctx->n_ctx = wrapper->promptContext.n_ctx;
ctx->n_predict = wrapper->promptContext.n_predict;
ctx->top_k = wrapper->promptContext.top_k;
ctx->top_p = wrapper->promptContext.top_p;
ctx->min_p = wrapper->promptContext.min_p;
ctx->temp = wrapper->promptContext.temp;
ctx->n_batch = wrapper->promptContext.n_batch;
ctx->repeat_penalty = wrapper->promptContext.repeat_penalty;
ctx->repeat_last_n = wrapper->promptContext.repeat_last_n;
ctx->context_erase = wrapper->promptContext.contextErase;
} }
float *llmodel_embed( float *llmodel_embed(
@ -300,21 +293,3 @@ const char *llmodel_model_gpu_device_name(llmodel_model model)
const auto *wrapper = static_cast<LLModelWrapper *>(model); const auto *wrapper = static_cast<LLModelWrapper *>(model);
return wrapper->llModel->gpuDeviceName(); return wrapper->llModel->gpuDeviceName();
} }
int32_t llmodel_count_prompt_tokens(llmodel_model model, const char *prompt, const char **error)
{
auto *wrapper = static_cast<const LLModelWrapper *>(model);
try {
return wrapper->llModel->countPromptTokens(prompt);
} catch (const std::exception& e) {
llmodel_set_error(error, e.what());
return -1;
}
}
void llmodel_model_foreach_special_token(llmodel_model model, llmodel_special_token_callback callback)
{
auto *wrapper = static_cast<const LLModelWrapper *>(model);
for (auto &[name, token] : wrapper->llModel->specialTokens())
callback(name.c_str(), token.c_str());
}

View File

@ -23,11 +23,6 @@ extern "C" {
*/ */
typedef void *llmodel_model; typedef void *llmodel_model;
/**
* A token.
*/
typedef int32_t token_t;
/** /**
* llmodel_prompt_context structure for holding the prompt context. * llmodel_prompt_context structure for holding the prompt context.
* NOTE: The implementation takes care of all the memory handling of the raw logits pointer and the * NOTE: The implementation takes care of all the memory handling of the raw logits pointer and the
@ -35,15 +30,19 @@ typedef int32_t token_t;
* behavior. * behavior.
*/ */
struct llmodel_prompt_context { struct llmodel_prompt_context {
int32_t *tokens; // current tokens in the context window
size_t tokens_size; // the size of the raw tokens vector
int32_t n_past; // number of tokens in past conversation
int32_t n_ctx; // number of tokens possible in context window
int32_t n_predict; // number of tokens to predict int32_t n_predict; // number of tokens to predict
int32_t top_k; // top k logits to sample from int32_t top_k; // top k logits to sample from
float top_p; // nucleus sampling probability threshold float top_p; // nucleus sampling probability threshold
float min_p; // Min P sampling float min_p; // Min P sampling
float temp; // temperature to adjust model's output distribution float temp; // temperature to adjust model's output distribution
int32_t n_batch; // number of predictions to generate in parallel int32_t n_batch; // number of predictions to generate in parallel
float repeat_penalty; // penalty factor for repeated tokens float repeat_penalty; // penalty factor for repeated tokens
int32_t repeat_last_n; // last n tokens to penalize int32_t repeat_last_n; // last n tokens to penalize
float context_erase; // percent of context to erase if we exceed the context window float context_erase; // percent of context to erase if we exceed the context window
}; };
struct llmodel_gpu_device { struct llmodel_gpu_device {
@ -62,12 +61,10 @@ typedef struct llmodel_gpu_device llmodel_gpu_device;
/** /**
* Callback type for prompt processing. * Callback type for prompt processing.
* @param token_ids An array of token ids of the prompt. * @param token_id The token id of the prompt.
* @param n_token_ids The number of tokens in the array.
* @param cached Whether the tokens were already in cache.
* @return a bool indicating whether the model should keep processing. * @return a bool indicating whether the model should keep processing.
*/ */
typedef bool (*llmodel_prompt_callback)(const token_t *token_ids, size_t n_token_ids, bool cached); typedef bool (*llmodel_prompt_callback)(int32_t token_id);
/** /**
* Callback type for response. * Callback type for response.
@ -75,7 +72,14 @@ typedef bool (*llmodel_prompt_callback)(const token_t *token_ids, size_t n_token
* @param response The response string. NOTE: a token_id of -1 indicates the string is an error string. * @param response The response string. NOTE: a token_id of -1 indicates the string is an error string.
* @return a bool indicating whether the model should keep generating. * @return a bool indicating whether the model should keep generating.
*/ */
typedef bool (*llmodel_response_callback)(token_t token_id, const char *response); typedef bool (*llmodel_response_callback)(int32_t token_id, const char *response);
/**
* Callback type for recalculation of context.
* @param whether the model is recalculating the context.
* @return a bool indicating whether the model should keep generating.
*/
typedef bool (*llmodel_recalculate_callback)(bool is_recalculating);
/** /**
* Embedding cancellation callback for use with llmodel_embed. * Embedding cancellation callback for use with llmodel_embed.
@ -86,8 +90,6 @@ typedef bool (*llmodel_response_callback)(token_t token_id, const char *response
*/ */
typedef bool (*llmodel_emb_cancel_callback)(unsigned *batch_sizes, unsigned n_batch, const char *backend); typedef bool (*llmodel_emb_cancel_callback)(unsigned *batch_sizes, unsigned n_batch, const char *backend);
typedef void (*llmodel_special_token_callback)(const char *name, const char *token);
/** /**
* Create a llmodel instance. * Create a llmodel instance.
* Recognises correct model type from file at model_path * Recognises correct model type from file at model_path
@ -146,57 +148,46 @@ bool llmodel_isModelLoaded(llmodel_model model);
* @param model A pointer to the llmodel_model instance. * @param model A pointer to the llmodel_model instance.
* @return the size in bytes of the internal state of the model * @return the size in bytes of the internal state of the model
*/ */
uint64_t llmodel_state_get_size(llmodel_model model); uint64_t llmodel_get_state_size(llmodel_model model);
/** /**
* Saves the internal state of the model. * Saves the internal state of the model to the specified destination address.
* NOTE: This state data is specific to the type of model you have created. * NOTE: This state data is specific to the type of model you have created.
* @param model A pointer to the llmodel_model instance. * @param model A pointer to the llmodel_model instance.
* @param state Where to store the state. This must be a buffer of at least llmodel_state_get_size() bytes. * @param dest A pointer to the destination.
* @param state_size The size of the destination for the state. * @return the number of bytes copied
* @param input_tokens_out Where to store the address of the token cache state. This is dynamically allocated and must
* be freed with llmodel_state_free_input_tokens.
* @param n_input_tokens Where to store the size of the token cache state.
* @return The number of bytes copied. On error, zero is returned, the token cache is set to NULL, and the token cache
* size is set to zero.
*/ */
uint64_t llmodel_state_get_data(llmodel_model model, uint8_t *state_out, uint64_t state_size, uint64_t llmodel_save_state_data(llmodel_model model, uint8_t *dest);
token_t **input_tokens_out, uint64_t *n_input_tokens);
/**
* Frees the temporary token cache buffer created by a call to llmodel_state_get_data().
* @param input_tokens The token cache buffer.
*/
void llmodel_state_free_input_tokens(token_t *input_tokens);
/** /**
* Restores the internal state of the model using data from the specified address. * Restores the internal state of the model using data from the specified address.
* NOTE: This state data is specific to the type of model you have created. * NOTE: This state data is specific to the type of model you have created.
* @param model A pointer to the llmodel_model instance. * @param model A pointer to the llmodel_model instance.
* @param state A pointer to the state data. * @param src A pointer to the src.
* @param state_size The size of the state data. * @return the number of bytes read
* @param input_tokens The token cache associated with the saved state.
* @param n_input_tokens The number of tokens in input_tokens.
* @return The number of bytes read, or zero on error.
*/ */
uint64_t llmodel_state_set_data(llmodel_model model, const uint8_t *state, uint64_t state_size, uint64_t llmodel_restore_state_data(llmodel_model model, const uint8_t *src);
const token_t *input_tokens, uint64_t n_input_tokens);
/** /**
* Generate a response using the model. * Generate a response using the model.
* @param model A pointer to the llmodel_model instance. * @param model A pointer to the llmodel_model instance.
* @param prompt A string representing the input prompt. * @param prompt A string representing the input prompt.
* @param prompt_template A string representing the input prompt template.
* @param prompt_callback A callback function for handling the processing of prompt. * @param prompt_callback A callback function for handling the processing of prompt.
* @param response_callback A callback function for handling the generated response. * @param response_callback A callback function for handling the generated response.
* @param recalculate_callback A callback function for handling recalculation requests.
* @param special True if special tokens in the prompt should be processed, false otherwise.
* @param fake_reply A string to insert into context as the model's reply, or NULL to generate one.
* @param ctx A pointer to the llmodel_prompt_context structure. * @param ctx A pointer to the llmodel_prompt_context structure.
* @param error A pointer to a string; will only be set on error.
*/ */
bool llmodel_prompt(llmodel_model model, void llmodel_prompt(llmodel_model model, const char *prompt,
const char *prompt, const char *prompt_template,
llmodel_prompt_callback prompt_callback, llmodel_prompt_callback prompt_callback,
llmodel_response_callback response_callback, llmodel_response_callback response_callback,
llmodel_prompt_context *ctx, llmodel_recalculate_callback recalculate_callback,
const char **error); llmodel_prompt_context *ctx,
bool special,
const char *fake_reply);
/** /**
* Generate an embedding using the model. * Generate an embedding using the model.
@ -308,10 +299,6 @@ const char *llmodel_model_backend_name(llmodel_model model);
*/ */
const char *llmodel_model_gpu_device_name(llmodel_model model); const char *llmodel_model_gpu_device_name(llmodel_model model);
int32_t llmodel_count_prompt_tokens(llmodel_model model, const char *prompt, const char **error);
void llmodel_model_foreach_special_token(llmodel_model model, llmodel_special_token_callback callback);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -0,0 +1,322 @@
#include "llmodel.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <iostream>
#include <optional>
#include <regex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_set>
#include <vector>
// TODO(cebtenzzre): replace this with llama_kv_cache_seq_shift for llamamodel (GPT-J needs this as-is)
// FIXME(jared): if recalculate returns false, we leave n_past<tokens.size() and do not tell the caller to stop
// FIXME(jared): if we get here during chat name or follow-up generation, bad things will happen when we try to restore
// the old prompt context afterwards
void LLModel::recalculateContext(PromptContext &promptCtx, std::function<bool(bool)> recalculate)
{
int n_keep = shouldAddBOS();
const int32_t n_discard = (promptCtx.n_ctx - n_keep) * promptCtx.contextErase;
// Erase the first percentage of context from the tokens
std::cerr << implementation().modelType() << ": reached the end of the context window so resizing\n";
promptCtx.tokens.erase(promptCtx.tokens.begin() + n_keep, promptCtx.tokens.begin() + n_keep + n_discard);
size_t i = n_keep;
promptCtx.n_past = n_keep;
while (i < promptCtx.tokens.size()) {
size_t batch_end = std::min(i + promptCtx.n_batch, promptCtx.tokens.size());
std::vector<int32_t> batch(promptCtx.tokens.begin() + i, promptCtx.tokens.begin() + batch_end);
assert(promptCtx.n_past + int32_t(batch.size()) <= promptCtx.n_ctx);
if (!evalTokens(promptCtx, batch)) {
std::cerr << "LLModel ERROR: Failed to process prompt\n";
goto stop_generating;
}
promptCtx.n_past += batch.size();
if (!recalculate(true))
goto stop_generating;
i = batch_end;
}
assert(promptCtx.n_past == int32_t(promptCtx.tokens.size()));
stop_generating:
recalculate(false);
}
static bool parsePromptTemplate(const std::string &tmpl, std::vector<std::smatch> &placeholders, std::string &err)
{
static const std::regex placeholderRegex(R"(%[1-2](?![0-9]))");
auto it = std::sregex_iterator(tmpl.begin(), tmpl.end(), placeholderRegex);
placeholders.clear();
placeholders.insert(placeholders.end(), it, std::sregex_iterator());
if (placeholders.size() > 2) {
err = "ERROR: expected at most two placeholders, got " + std::to_string(placeholders.size());
return false;
}
if (placeholders.size() >= 1 && placeholders[0].str() != "%1") {
err = "ERROR: first placeholder must be %1, got " + placeholders[0].str();
return false;
}
if (placeholders.size() >= 2 && placeholders[1].str() != "%2") {
err = "ERROR: second placeholder must be %2, got " + placeholders[1].str();
return false;
}
return true;
}
void LLModel::prompt(const std::string &prompt,
const std::string &promptTemplate,
std::function<bool(int32_t)> promptCallback,
std::function<bool(int32_t, const std::string&)> responseCallback,
std::function<bool(bool)> recalculateCallback,
PromptContext &promptCtx,
bool special,
std::string *fakeReply)
{
if (!isModelLoaded()) {
std::cerr << implementation().modelType() << " ERROR: prompt won't work with an unloaded model!\n";
return;
}
if (!supportsCompletion()) {
std::string errorMessage = "ERROR: this model does not support text completion or chat!";
responseCallback(-1, errorMessage);
std::cerr << implementation().modelType() << " " << errorMessage << "\n";
return;
}
// make sure token cache matches decode offset
if (promptCtx.tokens.size() < promptCtx.n_past) {
std::ostringstream ss;
ss << "expected n_past to be at most " << promptCtx.tokens.size() << ", got " << promptCtx.n_past;
throw std::out_of_range(ss.str());
}
if (promptCtx.n_past < promptCtx.tokens.size())
promptCtx.tokens.resize(promptCtx.n_past);
m_tokenize_last_token = promptCtx.tokens.empty() ? -1 : promptCtx.tokens.back(); // not serialized
// parse the prompt template
std::vector<std::smatch> placeholders;
{
std::string err;
if (!parsePromptTemplate(promptTemplate, placeholders, err)) {
responseCallback(-1, err);
std::cerr << err << "\n";
return;
}
}
auto old_n_past = promptCtx.n_past; // prepare to fake n_past for tokenize
// tokenize the user prompt
std::vector<Token> embd_inp;
if (placeholders.empty()) {
// this is unusual, but well-defined
std::cerr << __func__ << ": prompt template has no placeholder\n";
embd_inp = tokenize(promptCtx, promptTemplate, true);
} else {
// template: beginning of user prompt
const auto &phUser = placeholders[0];
std::string userPrefix(phUser.prefix());
if (!userPrefix.empty()) {
embd_inp = tokenize(promptCtx, userPrefix, true);
promptCtx.n_past += embd_inp.size();
}
// user input (shouldn't have special token processing)
auto tokens = tokenize(promptCtx, prompt, special);
embd_inp.insert(embd_inp.end(), tokens.begin(), tokens.end());
promptCtx.n_past += tokens.size();
// template: end of user prompt + start of assistant prompt
size_t start = phUser.position() + phUser.length();
size_t end = placeholders.size() >= 2 ? placeholders[1].position() : promptTemplate.length();
auto userToAsst = promptTemplate.substr(start, end - start);
if (!userToAsst.empty()) {
tokens = tokenize(promptCtx, userToAsst, true);
embd_inp.insert(embd_inp.end(), tokens.begin(), tokens.end());
promptCtx.n_past += tokens.size();
}
}
promptCtx.n_past = old_n_past; // restore n_past so decodePrompt can increment it
// decode the user prompt
if (!decodePrompt(promptCallback, responseCallback, recalculateCallback, promptCtx, embd_inp))
return; // error
// decode the assistant's reply, either generated or spoofed
if (fakeReply == nullptr) {
generateResponse(responseCallback, recalculateCallback, promptCtx);
} else {
embd_inp = tokenize(promptCtx, *fakeReply, false);
if (!decodePrompt(promptCallback, responseCallback, recalculateCallback, promptCtx, embd_inp))
return; // error
}
// decode the rest of the prompt template
// template: end of assistant prompt
std::string asstSuffix;
if (placeholders.size() >= 2) {
size_t start = placeholders[1].position() + placeholders[1].length();
asstSuffix = promptTemplate.substr(start);
} else {
asstSuffix = "\n\n"; // default to a blank link, good for e.g. Alpaca
}
if (!asstSuffix.empty()) {
embd_inp = tokenize(promptCtx, asstSuffix, true);
decodePrompt(promptCallback, responseCallback, recalculateCallback, promptCtx, embd_inp);
}
}
// returns false on error
bool LLModel::decodePrompt(std::function<bool(int32_t)> promptCallback,
std::function<bool(int32_t, const std::string&)> responseCallback,
std::function<bool(bool)> recalculateCallback,
PromptContext &promptCtx,
std::vector<Token> embd_inp) {
// save the context size
promptCtx.n_ctx = contextLength();
if ((int) embd_inp.size() > promptCtx.n_ctx - 4) {
responseCallback(-1, "ERROR: The prompt size exceeds the context window size and cannot be processed.");
std::cerr << implementation().modelType() << " ERROR: The prompt is " << embd_inp.size() <<
" tokens and the context window is " << promptCtx.n_ctx << "!\n";
return false;
}
promptCtx.n_predict = std::min(promptCtx.n_predict, promptCtx.n_ctx - (int) embd_inp.size());
promptCtx.n_past = std::min(promptCtx.n_past, promptCtx.n_ctx);
promptCtx.n_batch = std::min(promptCtx.n_batch, LLMODEL_MAX_PROMPT_BATCH);
// process the prompt in batches
size_t i = 0;
while (i < embd_inp.size()) {
size_t batch_end = std::min(i + promptCtx.n_batch, embd_inp.size());
std::vector<Token> batch(embd_inp.begin() + i, embd_inp.begin() + batch_end);
// Check if the context has run out...
if (promptCtx.n_past + int32_t(batch.size()) > promptCtx.n_ctx) {
recalculateContext(promptCtx, recalculateCallback);
assert(promptCtx.n_past + int32_t(batch.size()) <= promptCtx.n_ctx);
}
if (!evalTokens(promptCtx, batch)) {
std::cerr << implementation().modelType() << " ERROR: Failed to process prompt\n";
return false;
}
size_t tokens = batch_end - i;
for (size_t t = 0; t < tokens; ++t) {
promptCtx.tokens.push_back(batch.at(t));
promptCtx.n_past += 1;
if (!promptCallback(batch.at(t)))
return false;
}
i = batch_end;
}
return true;
}
void LLModel::generateResponse(std::function<bool(int32_t, const std::string&)> responseCallback,
std::function<bool(bool)> recalculateCallback,
PromptContext &promptCtx) {
std::string cachedResponse;
std::vector<Token> cachedTokens;
std::unordered_set<std::string> reversePrompts
= { "### Instruction", "### Prompt", "### Response", "### Human", "### Assistant", "### Context" };
// predict next tokens
for (int i = 0; i < promptCtx.n_predict; i++) {
// sample next token
auto id = sampleToken(promptCtx);
// Check if the context has run out...
if (promptCtx.n_past + 1 > promptCtx.n_ctx) {
recalculateContext(promptCtx, recalculateCallback);
assert(promptCtx.n_past + 1 <= promptCtx.n_ctx);
}
if (!evalTokens(promptCtx, { id })) {
std::cerr << implementation().modelType() << " ERROR: Failed to predict next token\n";
return;
}
// display text
for (const auto token : endTokens()) {
if (id == token) return;
}
const std::string str = tokenToString(id);
// Check if the provided str is part of our reverse prompts
bool foundPartialReversePrompt = false;
const std::string completed = cachedResponse + std::string(str);
if (reversePrompts.find(completed) != reversePrompts.end())
return;
// Check if it partially matches our reverse prompts and if so, cache
for (const auto& s : reversePrompts) {
if (s.compare(0, completed.size(), completed) == 0) {
foundPartialReversePrompt = true;
cachedResponse = completed;
break;
}
}
// Regardless the token gets added to our cache
cachedTokens.push_back(id);
// Continue if we have found a partial match
if (foundPartialReversePrompt)
continue;
// Empty the cache
for (auto t : cachedTokens) {
promptCtx.tokens.push_back(t);
promptCtx.n_past += 1;
//TODO: Conversion to std::string can be avoided here...
if (!responseCallback(t, std::string(tokenToString(t))))
return;
}
cachedTokens.clear();
}
}
void LLModel::embed(
const std::vector<std::string> &texts, float *embeddings, std::optional<std::string> prefix, int dimensionality,
size_t *tokenCount, bool doMean, bool atlas, EmbedCancelCallback *cancelCb
) {
(void)texts;
(void)embeddings;
(void)prefix;
(void)dimensionality;
(void)tokenCount;
(void)doMean;
(void)atlas;
(void)cancelCb;
throw std::logic_error(std::string(implementation().modelType()) + " does not support embeddings");
}
void LLModel::embed(
const std::vector<std::string> &texts, float *embeddings, bool isRetrieval, int dimensionality, size_t *tokenCount,
bool doMean, bool atlas
) {
(void)texts;
(void)embeddings;
(void)isRetrieval;
(void)dimensionality;
(void)tokenCount;
(void)doMean;
(void)atlas;
throw std::logic_error(std::string(implementation().modelType()) + " does not support embeddings");
}

View File

@ -0,0 +1,49 @@
#pragma once
#include <ggml.h>
#include <cstddef>
#include <cstdint>
#include <vector>
struct llm_buffer {
uint8_t * addr = NULL;
size_t size = 0;
void resize(size_t size) {
delete[] addr;
addr = new uint8_t[size];
this->size = size;
}
~llm_buffer() {
delete[] addr;
}
};
struct llm_kv_cache {
struct ggml_tensor * k;
struct ggml_tensor * v;
struct ggml_context * ctx = NULL;
llm_buffer buf;
int n; // number of tokens currently in the cache
~llm_kv_cache() {
if (ctx) {
ggml_free(ctx);
}
}
};
inline void ggml_graph_compute_g4a(llm_buffer& buf, ggml_cgraph * graph, int n_threads)
{
struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
if (plan.work_size > 0) {
buf.resize(plan.work_size);
plan.work_data = buf.addr;
}
ggml_graph_compute(graph, &plan);
}

View File

@ -1,298 +0,0 @@
#include "llmodel.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <iterator>
#include <optional>
#include <ranges>
#include <stdexcept>
#include <string>
#include <string_view>
#include <vector>
namespace ranges = std::ranges;
namespace views = std::ranges::views;
void LLModel::prompt(
std::string_view prompt,
const PromptCallback &promptCallback,
const ResponseCallback &responseCallback,
const PromptContext &promptCtx
) {
if (!isModelLoaded())
throw std::invalid_argument("Attempted to prompt an unloaded model.");
if (!supportsCompletion())
throw std::invalid_argument("Not a text completion model.");
if (!promptCtx.n_batch)
throw std::invalid_argument("Batch size cannot be zero.");
if (!promptCtx.n_predict)
return; // nothing requested
auto embd_inp = tokenize(prompt);
if (embd_inp.empty())
throw std::invalid_argument("Prompt tokenized to zero tokens.");
if (auto res = decodePrompt(promptCallback, promptCtx, std::move(embd_inp)))
generateResponse(responseCallback, promptCtx, /*n_past*/ *res);
}
int32_t LLModel::countPromptTokens(std::string_view prompt) const
{
if (!isModelLoaded())
throw std::invalid_argument("Attempted to tokenize with an unloaded model.");
return int32_t(tokenize(prompt).size());
}
auto LLModel::decodePrompt(
const PromptCallback &promptCallback,
const PromptContext &promptCtx,
std::vector<Token> embd_inp
) -> std::optional<int32_t>
{
assert(!embd_inp.empty());
int32_t nCtx = contextLength();
int32_t n_batch = std::min(promptCtx.n_batch, LLMODEL_MAX_PROMPT_BATCH);
// Find the greatest n_past where the beginning of embd_inp matches the end of the token cache, starting at the
// requested n_past.
// This is used to skip unnecessary work when the prompt shares a common prefix with the previous result.
int32_t nPast = computeModelInputPosition(embd_inp);
// always decode up to a full batch before generating, even if cached
nPast -= std::min(n_batch, nPast);
// TODO(jared): generalize this to find the smallest new_embd_inp.size() - nPast given the cache
if (!nPast && int32_t(embd_inp.size()) > nCtx) {
// no cache hit -> shift the input before even processing
int32_t nKeep = shouldAddBOS();
auto newLength = int32_t(nCtx * (1.f - promptCtx.contextErase));
int32_t nDiscard = int32_t(embd_inp.size()) - std::max(1, std::min(nCtx, newLength));
// execute the callback even for skipped tokens. this misrepresents the position of BOS but we don't care
auto discardedTokens = embd_inp | views::drop(nKeep) | views::take(nDiscard);
if (!promptCallback(discardedTokens, true))
return std::nullopt;
// erase nDiscard tokens
embd_inp.erase(discardedTokens.begin(), discardedTokens.end());
assert(int32_t(embd_inp.size()) <= nCtx);
// check the cache again, just in case
nPast = computeModelInputPosition(embd_inp);
nPast -= std::min(n_batch, nPast);
}
setModelInputPosition(nPast);
// execute the callback even for skipped tokens
if (!promptCallback(embd_inp | views::take(nPast), true))
return std::nullopt;
// process the prompt in batches
for (int32_t i = nPast; i < embd_inp.size();) {
auto batch_end = std::min(i + n_batch, int32_t(embd_inp.size()));
std::span batch(embd_inp.begin() + i, embd_inp.begin() + batch_end);
// Check if the context has run out...
if (nPast + int32_t(batch.size()) > nCtx) {
shiftContext(promptCtx, &nPast);
assert(nPast + int32_t(batch.size()) <= nCtx);
}
// FIXME(Adam): We should find a way to bubble these strings to the UI level to allow for translation
if (!evalTokens(nPast, batch))
throw std::runtime_error("An internal error was encountered during prompt processing.");
for (auto &tok : batch) {
appendInputToken(tok);
nPast++;
if (!promptCallback({ &tok, 1 }, false))
return std::nullopt;
}
i = batch_end;
}
return nPast;
}
/*
* If string s overlaps with the string key such that some prefix of the key is at the end
* of the string, return the position in s where the first match starts. Otherwise, return
* std::string::npos. Examples:
* s = "bfo", key = "foo" -> 1
* s = "fooa", key = "foo" -> npos
*/
static std::string::size_type stringsOverlap(const std::string &s, const std::string &key)
{
if (s.empty() || key.empty())
throw std::invalid_argument("arguments to stringsOverlap must not be empty");
for (int start = std::max(0, int(s.size()) - int(key.size())); start < s.size(); start++) {
if (s.compare(start, s.size(), key, 0, s.size() - start) == 0)
return start;
}
return std::string::npos;
}
void LLModel::generateResponse(
const ResponseCallback &responseCallback,
const PromptContext &promptCtx,
int32_t nPast
) {
static const char *stopSequences[] {
"### System", "### Instruction", "### Human", "### User", "### Response", "### Assistant", "### Context",
"<|im_start|>", "<|im_end|>", "<|endoftext|>",
};
initSampler(promptCtx);
std::string cachedResponse;
std::vector<Token> cachedTokens;
int n_predicted = 0;
// Predict next tokens
for (bool stop = false; !stop;) {
// Sample next token
std::optional<Token> new_tok = sampleToken();
std::string new_piece = tokenToString(new_tok.value());
cachedTokens.push_back(new_tok.value());
cachedResponse += new_piece;
auto accept = [this, &promptCtx, &new_tok, &nPast] {
// Shift context if out of space
if (nPast >= contextLength()) {
shiftContext(promptCtx, &nPast);
assert(nPast < contextLength());
}
// Accept the token
Token tok = std::exchange(new_tok, std::nullopt).value();
if (!evalTokens(nPast, { &tok, 1 }))
throw std::runtime_error("An internal error was encountered during response generation.");
appendInputToken(tok);
nPast++;
};
// Check for EOS
auto lengthLimit = std::string::npos;
for (const auto token : endTokens()) {
if (new_tok == token) {
stop = true;
lengthLimit = cachedResponse.size() - new_piece.size();
}
}
if (lengthLimit != std::string::npos) {
// EOS matched
} else if (!isSpecialToken(new_tok.value())) {
// Check if the response contains a stop sequence
for (const auto &p : stopSequences) {
auto match = cachedResponse.find(p);
if (match != std::string::npos) stop = true;
lengthLimit = std::min(lengthLimit, match);
if (match == 0) break;
}
// Check if the response matches the start of a stop sequence
if (lengthLimit == std::string::npos) {
for (const auto &p : stopSequences) {
auto match = stringsOverlap(cachedResponse, p);
lengthLimit = std::min(lengthLimit, match);
if (match == 0) break;
}
}
} else if (ranges::find(stopSequences, new_piece) < std::end(stopSequences)) {
// Special tokens must exactly match a stop sequence
stop = true;
lengthLimit = cachedResponse.size() - new_piece.size();
}
// Empty the cache, up to the length limit
std::string::size_type responseLength = 0;
while (!cachedTokens.empty()) {
Token tok = cachedTokens.front();
std::string piece = tokenToString(tok);
// Stop if the piece (or part of it) does not fit within the length limit
if (responseLength + (stop ? 1 : piece.size()) > lengthLimit)
break;
// Remove token from cache
assert(cachedResponse.starts_with(piece));
cachedTokens.erase(cachedTokens.begin(), cachedTokens.begin() + 1);
cachedResponse.erase(cachedResponse.begin(), cachedResponse.begin() + piece.size());
// Accept the token, if needed (not cached)
if (cachedTokens.empty() && new_tok)
accept();
// Send the token
if (!responseCallback(tok, piece) || ++n_predicted >= promptCtx.n_predict) {
stop = true;
break;
}
// FIXME(jared): we could avoid printing partial stop sequences if we didn't have to
// output token IDs and could cache a partial token for the next prompt call
responseLength += piece.size();
}
assert(cachedTokens.empty() == cachedResponse.empty());
// Accept the token, if needed (in cache)
if (new_tok) {
assert(!cachedTokens.empty() && cachedTokens.back() == new_tok);
if (stop) {
cachedTokens.pop_back();
} else {
accept();
}
}
}
if (inputLength() < cachedTokens.size()) {
/* This is theoretically possible if the longest stop sequence is greater than
* n_ctx * contextErase tokens. */
throw std::runtime_error("shifted too much context, can't go back");
}
#ifndef NDEBUG
auto inp = inputTokens();
auto discard_start = inp.end() - cachedTokens.size();
assert(std::equal(discard_start, inp.end(), cachedTokens.begin()));
#endif
}
void LLModel::embed(
const std::vector<std::string> &texts, float *embeddings, std::optional<std::string> prefix, int dimensionality,
size_t *tokenCount, bool doMean, bool atlas, EmbedCancelCallback *cancelCb
) {
(void)texts;
(void)embeddings;
(void)prefix;
(void)dimensionality;
(void)tokenCount;
(void)doMean;
(void)atlas;
(void)cancelCb;
throw std::logic_error(std::string(implementation().modelType()) + " does not support embeddings");
}
void LLModel::embed(
const std::vector<std::string> &texts, float *embeddings, bool isRetrieval, int dimensionality, size_t *tokenCount,
bool doMean, bool atlas
) {
(void)texts;
(void)embeddings;
(void)isRetrieval;
(void)dimensionality;
(void)tokenCount;
(void)doMean;
(void)atlas;
throw std::logic_error(std::string(implementation().modelType()) + " does not support embeddings");
}

View File

@ -1,17 +0,0 @@
#pragma once
#include <cassert>
#ifdef NDEBUG
# ifdef __has_builtin
# if __has_builtin(__builtin_unreachable)
# define UNREACHABLE() __builtin_unreachable()
# else
# define UNREACHABLE() do {} while (0)
# endif
# else
# define UNREACHABLE() do {} while (0)
# endif
#else
# define UNREACHABLE() assert(!"Unreachable statement was reached")
#endif

339
gpt4all-backend/utils.cpp Normal file
View File

@ -0,0 +1,339 @@
#include "utils.h"
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iterator>
#include <regex>
#include <utility>
void replace(std::string & str, const std::string & needle, const std::string & replacement)
{
size_t pos = 0;
while ((pos = str.find(needle, pos)) != std::string::npos) {
str.replace(pos, needle.length(), replacement);
pos += replacement.length();
}
}
std::map<std::string, int32_t> json_parse(const std::string & fname)
{
std::map<std::string, int32_t> result;
// read file into string
std::string json;
{
std::ifstream ifs(fname);
if (!ifs) {
fprintf(stderr, "Failed to open %s\n", fname.c_str());
exit(1);
}
json = std::string((std::istreambuf_iterator<char>(ifs)),
(std::istreambuf_iterator<char>()));
}
if (json[0] != '{') {
return result;
}
// parse json
{
bool has_key = false;
bool in_token = false;
std::string str_key = "";
std::string str_val = "";
int n = json.size();
for (int i = 1; i < n; ++i) {
if (!in_token) {
if (json[i] == ' ') continue;
if (json[i] == '"') {
in_token = true;
continue;
}
} else {
if (json[i] == '\\' && i+1 < n) {
if (has_key == false) {
str_key += json[i];
} else {
str_val += json[i];
}
++i;
} else if (json[i] == '"') {
if (has_key == false) {
has_key = true;
++i;
while (json[i] == ' ') ++i;
++i; // :
while (json[i] == ' ') ++i;
if (json[i] != '\"') {
while (json[i] != ',' && json[i] != '}') {
str_val += json[i++];
}
has_key = false;
} else {
in_token = true;
continue;
}
} else {
has_key = false;
}
::replace(str_key, "\\u0120", " " ); // \u0120 -> space
::replace(str_key, "\\u010a", "\n"); // \u010a -> new line
::replace(str_key, "\\\"", "\""); // \\\" -> "
try {
result[str_key] = std::stoi(str_val);
} catch (...) {
//fprintf(stderr, "%s: ignoring key '%s' with value '%s'\n", fname.c_str(), str_key.c_str(), str_val.c_str());
}
str_key = "";
str_val = "";
in_token = false;
continue;
}
if (has_key == false) {
str_key += json[i];
} else {
str_val += json[i];
}
}
}
}
return result;
}
std::vector<gpt_vocab::id> gpt_tokenize_inner(const gpt_vocab & vocab, const std::string & text)
{
std::vector<std::string> words;
// first split the text into words
{
std::string str = text;
std::string pat = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
std::regex re(pat);
std::smatch m;
while (std::regex_search(str, m, re)) {
for (auto x : m) {
words.push_back(x);
}
str = m.suffix();
}
}
// find the longest tokens that form the words:
std::vector<gpt_vocab::id> tokens;
for (const auto & word : words) {
if (word.size() == 0) continue;
int i = 0;
int n = word.size();
while (i < n) {
int j = n;
while (j > i) {
auto it = vocab.token_to_id.find(word.substr(i, j-i));
if (it != vocab.token_to_id.end()) {
tokens.push_back(it->second);
i = j;
break;
}
--j;
}
if (i == n) {
break;
}
if (j == i) {
auto sub = word.substr(i, 1);
if (vocab.token_to_id.find(sub) != vocab.token_to_id.end()) {
tokens.push_back(vocab.token_to_id.at(sub));
} else {
fprintf(stderr, "%s: unknown token '%s'\n", __func__, sub.data());
}
++i;
}
}
}
return tokens;
}
std::string regex_escape(const std::string &s)
{
static const std::regex metacharacters(R"([\.\^\$\-\+\(\)\[\]\{\}\|\?\*])");
return std::regex_replace(s, metacharacters, "\\$&");
}
std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text)
{
// Generate the subpattern from the special_tokens vector if it's not empty
if (!vocab.special_tokens.empty()) {
std::vector<gpt_vocab::id> out;
std::vector<std::string> chunks;
std::string str = text;
std::string special_tokens_subpattern;
for (const auto &token : vocab.special_tokens) {
if (!special_tokens_subpattern.empty()) {
special_tokens_subpattern += "|";
}
special_tokens_subpattern += regex_escape(token);
}
std::regex re(special_tokens_subpattern);
std::smatch m;
while (std::regex_search(str, m, re)) {
auto tok = vocab.token_to_id.find(m.str());
if (tok != vocab.token_to_id.end()) {
auto tokid = tok->second;
auto pfxtoks = gpt_tokenize_inner(vocab, m.prefix());
out.insert(out.end(), pfxtoks.begin(), pfxtoks.end());
out.push_back(tokid);
str = m.suffix();
}
}
if (!str.empty()) {
auto tokrest = gpt_tokenize_inner(vocab, str);
out.insert(out.end(), tokrest.begin(), tokrest.end());
}
return out;
} else {
return gpt_tokenize_inner(vocab, text);
}
}
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab)
{
printf("%s: loading vocab from '%s'\n", __func__, fname.c_str());
vocab.token_to_id = ::json_parse(fname);
for (const auto & kv : vocab.token_to_id) {
vocab.id_to_token[kv.second] = kv.first;
}
printf("%s: vocab size = %d\n", __func__, (int) vocab.token_to_id.size());
// print the vocabulary
//for (auto kv : vocab.token_to_id) {
// printf("'%s' -> %d\n", kv.first.data(), kv.second);
//}
return true;
}
gpt_vocab::id gpt_sample_top_k_top_p(
const size_t actualVocabSize,
const int32_t * last_n_tokens_data,
int last_n_tokens_size,
const std::vector<float> logits,
int top_k,
double top_p,
double temp,
float repeat_penalty,
std::mt19937 & rng) {
int n_logits = actualVocabSize;
const auto last_n_tokens = std::vector<int32_t>(last_n_tokens_data, last_n_tokens_data + last_n_tokens_size);
const auto * plogits = logits.data();
if (temp <= 0) {
// select the token with the highest logit directly
float max_logit = plogits[0];
gpt_vocab::id max_id = 0;
for (int i = 1; i < n_logits; ++i) {
if (plogits[i] > max_logit) {
max_logit = plogits[i];
max_id = i;
}
}
return max_id;
}
std::vector<std::pair<double, gpt_vocab::id>> logits_id;
logits_id.reserve(n_logits);
{
const float scale = 1.0f/temp;
for (int i = 0; i < n_logits; ++i) {
// repetition penalty from ctrl paper (https://arxiv.org/abs/1909.05858)
// credit https://github.com/facebookresearch/llama/compare/main...shawwn:llama:main
if (std::find(last_n_tokens.begin(), last_n_tokens.end(), i) != last_n_tokens.end()) {
// if score < 0 then repetition penalty has to multiplied to reduce the previous token probability
if (plogits[i] < 0.0f) {
logits_id.push_back(std::make_pair(plogits[i]*scale*repeat_penalty, i));
} else {
logits_id.push_back(std::make_pair(plogits[i]*scale/repeat_penalty, i));
}
} else {
logits_id.push_back(std::make_pair(plogits[i]*scale, i));
}
}
}
// find the top K tokens
std::partial_sort(
logits_id.begin(),
logits_id.begin() + top_k, logits_id.end(),
[](const std::pair<double, gpt_vocab::id> & a, const std::pair<double, gpt_vocab::id> & b) {
return a.first > b.first;
});
logits_id.resize(top_k);
double maxl = -INFINITY;
for (const auto & kv : logits_id) {
maxl = std::max(maxl, kv.first);
}
// compute probs for the top K tokens
std::vector<double> probs;
probs.reserve(logits_id.size());
double sum = 0.0;
for (const auto & kv : logits_id) {
double p = exp(kv.first - maxl);
probs.push_back(p);
sum += p;
}
// normalize the probs
for (auto & p : probs) {
p /= sum;
}
if (top_p < 1.0f) {
double cumsum = 0.0f;
for (int i = 0; i < top_k; i++) {
cumsum += probs[i];
if (cumsum >= top_p) {
top_k = i + 1;
probs.resize(top_k);
logits_id.resize(top_k);
break;
}
}
cumsum = 1.0/cumsum;
for (int i = 0; i < (int) probs.size(); i++) {
probs[i] *= cumsum;
}
}
//printf("\n");
//for (int i = 0; i < (int) probs.size(); i++) {
// printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]);
//}
//exit(0);
std::discrete_distribution<> dist(probs.begin(), probs.end());
int idx = dist(rng);
return logits_id[idx].second;
}

101
gpt4all-backend/utils.h Normal file
View File

@ -0,0 +1,101 @@
// Various helper functions and utilities
#pragma once
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <map>
#include <random>
#include <string>
#include <thread>
#include <vector>
//
// General purpose inline functions
//
constexpr inline unsigned long long operator ""_MiB(unsigned long long bytes)
{
return bytes*1024*1024;
}
//
// CLI argument parsing
//
struct gpt_params {
int32_t seed = -1; // RNG seed
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
int32_t n_predict = 200; // new tokens to predict
// sampling parameters
int32_t top_k = 40;
float top_p = 0.9f;
float temp = 0.9f;
int32_t n_batch = 8; // batch size for prompt processing
std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
std::string prompt;
};
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
std::string gpt_random_prompt(std::mt19937 & rng);
//
// Vocab utils
//
struct gpt_vocab {
using id = int32_t;
using token = std::string;
std::map<token, id> token_to_id;
std::map<id, token> id_to_token;
std::vector<std::string> special_tokens;
void add_special_token(const std::string &token) {
special_tokens.push_back(token);
}
};
void replace(std::string & str, const std::string & needle, const std::string & replacement);
// poor-man's JSON parsing
std::map<std::string, int32_t> json_parse(const std::string & fname);
// split text into tokens
//
// ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53
//
// Regex (Python):
// r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
//
// Regex (C++):
// R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"
//
std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text);
// load the tokens from encoder.json
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
// sample next token given probabilities for each embedding
//
// - consider only the top K tokens
// - from them, consider only the top tokens with cumulative probability > P
//
// TODO: not sure if this implementation is correct
//
gpt_vocab::id gpt_sample_top_k_top_p(
const size_t actualVocabSize,
const int32_t * last_n_tokens_data,
int last_n_tokens_size,
const std::vector<float> logits,
int top_k,
double top_p,
double temp,
float repeat_penalty,
std::mt19937 & rng);

View File

@ -113,7 +113,10 @@ def _old_loop(gpt4all_instance):
full_response = gpt4all_instance.chat_completion( full_response = gpt4all_instance.chat_completion(
MESSAGES, MESSAGES,
# preferential kwargs for chat ux # preferential kwargs for chat ux
logits_size=0,
tokens_size=0,
n_past=0, n_past=0,
n_ctx=0,
n_predict=200, n_predict=200,
top_k=40, top_k=40,
top_p=0.9, top_p=0.9,

View File

@ -4,41 +4,6 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
## [Unreleased]
### Added
- Warn on Windows if the Microsoft Visual C++ runtime libraries are not found ([#2920](https://github.com/nomic-ai/gpt4all/pull/2920))
- Basic cache for faster prefill when the input shares a prefix with previous context ([#3073](https://github.com/nomic-ai/gpt4all/pull/3073))
- Add ability to modify or replace the history of an active chat session ([#3147](https://github.com/nomic-ai/gpt4all/pull/3147))
### Changed
- Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998))
- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004))
- Fix CalledProcessError on Intel Macs since v2.8.0 ([#3045](https://github.com/nomic-ai/gpt4all/pull/3045))
- Use Jinja for chat templates instead of per-message QString.arg-style templates ([#3147](https://github.com/nomic-ai/gpt4all/pull/3147))
## [2.8.2] - 2024-08-14
### Fixed
- Fixed incompatibility with Python 3.8 since v2.7.0 and Python <=3.11 since v2.8.1 ([#2871](https://github.com/nomic-ai/gpt4all/pull/2871))
## [2.8.1] - 2024-08-13
### Added
- Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
### Changed
- Search for pip-installed CUDA 11 as well as CUDA 12 ([#2802](https://github.com/nomic-ai/gpt4all/pull/2802))
- Stop shipping CUBINs to reduce wheel size ([#2802](https://github.com/nomic-ai/gpt4all/pull/2802))
- Use llama\_kv\_cache ops to shift context faster ([#2781](https://github.com/nomic-ai/gpt4all/pull/2781))
- Don't stop generating at end of context ([#2781](https://github.com/nomic-ai/gpt4all/pull/2781))
### Fixed
- Make reverse prompt detection work more reliably and prevent it from breaking output ([#2781](https://github.com/nomic-ai/gpt4all/pull/2781))
- Explicitly target macOS 12.6 in CI to fix Metal compatibility on older macOS ([#2849](https://github.com/nomic-ai/gpt4all/pull/2849))
- Do not initialize Vulkan driver when only using CPU ([#2843](https://github.com/nomic-ai/gpt4all/pull/2843))
- Fix a segfault on exit when using CPU mode on Linux with NVIDIA and EGL ([#2843](https://github.com/nomic-ai/gpt4all/pull/2843))
## [2.8.0] - 2024-08-05 ## [2.8.0] - 2024-08-05
### Added ### Added
@ -51,7 +16,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- Detect use of a Python interpreter under Rosetta for a clearer error message ([#2793](https://github.com/nomic-ai/gpt4all/pull/2793)) - Detect use of a Python interpreter under Rosetta for a clearer error message ([#2793](https://github.com/nomic-ai/gpt4all/pull/2793))
### Changed ### Changed
- Build against CUDA 11.8 instead of CUDA 12 for better compatibility with older drivers ([#2639](https://github.com/nomic-ai/gpt4all/pull/2639))
- Update llama.cpp to commit 87e397d00 from July 19th ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694)) - Update llama.cpp to commit 87e397d00 from July 19th ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
### Removed ### Removed
@ -69,7 +33,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- Restore leading space removal logic that was incorrectly removed in [#2694](https://github.com/nomic-ai/gpt4all/pull/2694) - Restore leading space removal logic that was incorrectly removed in [#2694](https://github.com/nomic-ai/gpt4all/pull/2694)
- CUDA: Cherry-pick llama.cpp DMMV cols requirement fix that caused a crash with long conversations since [#2694](https://github.com/nomic-ai/gpt4all/pull/2694) - CUDA: Cherry-pick llama.cpp DMMV cols requirement fix that caused a crash with long conversations since [#2694](https://github.com/nomic-ai/gpt4all/pull/2694)
[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/python-v2.8.2...HEAD
[2.8.2]: https://github.com/nomic-ai/gpt4all/compare/python-v2.8.1...python-v2.8.2
[2.8.1]: https://github.com/nomic-ai/gpt4all/compare/python-v2.8.0...python-v2.8.1
[2.8.0]: https://github.com/nomic-ai/gpt4all/compare/python-v2.7.0...python-v2.8.0 [2.8.0]: https://github.com/nomic-ai/gpt4all/compare/python-v2.7.0...python-v2.8.0

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 272 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 448 KiB

View File

@ -1,86 +0,0 @@
# GPT4All API Server
GPT4All provides a local API server that allows you to run LLMs over an HTTP API.
## Key Features
- **Local Execution**: Run models on your own hardware for privacy and offline use.
- **LocalDocs Integration**: Run the API with relevant text snippets provided to your LLM from a [LocalDocs collection](../gpt4all_desktop/localdocs.md).
- **OpenAI API Compatibility**: Use existing OpenAI-compatible clients and tools with your local models.
## Activating the API Server
1. Open the GPT4All Chat Desktop Application.
2. Go to `Settings` > `Application` and scroll down to `Advanced`.
3. Check the box for the `"Enable Local API Server"` setting.
4. The server listens on port 4891 by default. You can choose another port number in the `"API Server Port"` setting.
## Connecting to the API Server
The base URL used for the API server is `http://localhost:4891/v1` (or `http://localhost:<PORT_NUM>/v1` if you are using a different port number).
The server only accepts HTTP connections (not HTTPS) and only listens on localhost (127.0.0.1) (e.g. not to the IPv6 localhost address `::1`.)
## Examples
!!! note "Example GPT4All API calls"
=== "cURL"
```bash
curl -X POST http://localhost:4891/v1/chat/completions -d '{
"model": "Phi-3 Mini Instruct",
"messages": [{"role":"user","content":"Who is Lionel Messi?"}],
"max_tokens": 50,
"temperature": 0.28
}'
```
=== "PowerShell"
```powershell
Invoke-WebRequest -URI http://localhost:4891/v1/chat/completions -Method POST -ContentType application/json -Body '{
"model": "Phi-3 Mini Instruct",
"messages": [{"role":"user","content":"Who is Lionel Messi?"}],
"max_tokens": 50,
"temperature": 0.28
}'
```
## API Endpoints
| Method | Path | Description |
|--------|------|-------------|
| GET | `/v1/models` | List available models |
| GET | `/v1/models/<name>` | Get details of a specific model |
| POST | `/v1/completions` | Generate text completions |
| POST | `/v1/chat/completions` | Generate chat completions |
## LocalDocs Integration
You can use LocalDocs with the API server:
1. Open the Chats view in the GPT4All application.
2. Scroll to the bottom of the chat history sidebar.
3. Select the server chat (it has a different background color).
4. Activate LocalDocs collections in the right sidebar.
(Note: LocalDocs can currently only be activated through the GPT4All UI, not via the API itself).
Now, your API calls to your local LLM will have relevant references from your LocalDocs collection retrieved and placed in the input message for the LLM to respond to.
The references retrieved for your API call can be accessed in the API response object at
`response["choices"][0]["references"]`
The data included in the `references` are:
- `text`: the actual text content from the snippet that was extracted from the reference document
- `author`: the author of the reference document (if available)
- `date`: the date of creation of the reference document (if available)
- `page`: the page number the snippet is from (only available for PDF documents for now)
- `title`: the title of the reference document (if available)

View File

@ -1,206 +0,0 @@
## What are chat templates?
Natively, large language models only know how to complete plain text and do not know the difference between their input and their output. In order to support a chat with a person, LLMs are designed to use a template to convert the conversation to plain text using a specific format.
For a given model, it is important to use an appropriate chat template, as each model is designed to work best with a specific format. The chat templates included with the built-in models should be sufficient for most purposes.
There are two reasons you would want to alter the chat template:
- You are sideloading a model and there is no chat template available,
- You would like to have greater control over the input to the LLM than a system message provides.
## What is a system message?
A system message is a message that controls the responses from the LLM in a way that affects the entire conversation. System messages can be short, such as "Speak like a pirate.", or they can be long and contain a lot of context for the LLM to keep in mind.
Not all models are designed to use a system message, so they work with some models better than others.
## How do I customize the chat template or system message?
To customize the chat template or system message, go to Settings > Model. Make sure to select the correct model at the top. If you clone a model, you can use a different chat template or system message from the base model, enabling you to use different settings for each conversation.
These settings take effect immediately. After changing them, you can click "Redo last response" in the chat view, and the response will take the new settings into account.
## Do I need to write a chat template?
You typically do not need to write your own chat template. The exception is models that are not in the official model list and do not come with a chat template built-in. These will show a "Clear" option above the chat template field in the Model Settings page instead of a "Reset" option. See the section on [finding] or [creating] a chat template.
[finding]: #how-do-i-find-a-chat-template
[creating]: #advanced-how-do-chat-templates-work
## What changed in GPT4All v3.5?
GPT4All v3.5 overhauled the chat template system. There are three crucial differences:
- The chat template now formats an entire conversation instead of a single pair of messages,
- The chat template now uses Jinja syntax instead of `%1` and `%2` placeholders,
- And the system message should no longer contain control tokens or trailing whitespace.
If you are using any chat templates or system messages that had been added or altered from the default before upgrading to GPT4All v3.5 or newer, these will no longer work. See below for how to solve common errors you may see after upgrading.
## Error/Warning: System message is not plain text.
This is easy to fix. Go to the model's settings and look at the system prompt. There are three things to look for:
- Control tokens such as `<|im_start|>`, `<|start_header_id|>`, or `<|system|>`
- A prefix such as `### System` or `SYSTEM:`
- Trailing whitespace, such as a space character or blank line.
If you see any of these things, remove them. For example, this legacy system prompt:
```
<|start_header_id|>system<|end_header_id|>
You are a helpful assistant.<|eot_id|>
```
Should become this:
```
You are a helpful assistant.
```
If you do not see anything that needs to be changed, you can dismiss the error by making a minor modification to the message and then changing it back.
If you see a warning, your system message does not appear to be plain text. If you believe this warning is incorrect, it can be safely ignored. If in doubt, ask on the [Discord].
[Discord]: https://discord.gg/mGZE39AS3e
## Error: Legacy system prompt needs to be updated in Settings.
This is the same as [above][above-1], but appears on the chat page.
[above-1]: #errorwarning-system-message-is-not-plain-text
## Error/Warning: Chat template is not in Jinja format.
This is the result of attempting to use an old-style template (possibly from a previous version) in GPT4All 3.5+.
Go to the Model Settings page and select the affected model. If you see a "Reset" button, and you have not intentionally modified the prompt template, you can click "Reset". Otherwise, this is what you can do:
1. Back up your chat template by copying it safely to a text file and saving it. In the next step, it will be removed from GPT4All.
2. Click "Reset" or "Clear".
3. If you clicked "Clear", the chat template is now gone. Follow the steps to [find][finding] or [create][creating] a basic chat template for your model.
4. Customize the chat template to suit your needs. For help, read the section about [creating] a chat template.
## Error: Legacy prompt template needs to be updated in Settings.
This is the same as [above][above-2], but appears on the chat page.
[above-2]: #errorwarning-chat-template-is-not-in-jinja-format
## The chat template has a syntax error.
If there is a syntax error while editing the chat template, the details will be displayed in an error message above the input box. This could be because the chat template is not actually in Jinja format (see [above][above-2]).
Otherwise, you have either typed something correctly, or the model comes with a template that is incompatible with GPT4All. See [the below section][creating] on creating chat templates and make sure that everything is correct. When in doubt, ask on the [Discord].
## Error: No chat template configured.
This may appear for models that are not from the official model list and do not include a chat template. Older versions of GPT4All picked a poor default in this case. You will get much better results if you follow the steps to [find][finding] or [create][creating] a chat template for your model.
## Error: The chat template cannot be blank.
If the button above the chat template on the Model Settings page says "Clear", see [above][above-3]. If you see "Reset", click that button to restore a reasonable default. Also see the section on [syntax errors][chat-syntax-error].
[above-3]: #error-no-chat-template-configured
[chat-syntax-error]: #the-chat-template-has-a-syntax-error
## How do I find a chat template?
When in doubt, you can always ask the [Discord] community for help. Below are the instructions to find one on your own.
The authoritative source for a model's chat template is the HuggingFace repo that the original (non-GGUF) model came from. First, you should find this page. If you just have a model file, you can try a google search for the model's name. If you know the page you downloaded the GGUF model from, its README usually links to the original non-GGUF model.
Once you have located the original model, there are two methods you can use to extract its chat template. Pick whichever one you are most comfortable with.
### Using the CLI (all models)
1. Install `jq` using your preferred package manager - e.g. Chocolatey (Windows), Homebrew (macOS), or apt (Ubuntu).
2. Download `tokenizer_config.json` from the model's "Files and versions" tab.
3. Open a command prompt in the directory which you have downloaded the model file.
4. Run `jq -r ".chat_template" tokenizer_config.json`. This shows the chat template in a human-readable form. You can copy this and paste it into the settings page.
5. (Optional) You can save the output to a text file like this: `jq -r ".chat_template" tokenizer_config.json >chat_template.txt`
If the output is "null", the model does not provide a chat template. See the [below instructions][creating] on creating a chat template.
### Python (open models)
1. Install `transformers` using your preferred python package manager, e.g. `pip install transformers`. Make sure it is at least version v4.43.0.
2. Copy the ID of the HuggingFace model, using the clipboard icon next to the name. For example, if the URL is `https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B`, the ID is `NousResearch/Hermes-2-Pro-Llama-3-8B`.
3. Open a python interpreter (`python`) and run the following commands. Change the model ID in the example to the one you copied.
```
>>> from transformers import AutoTokenizer
>>> tokenizer = AutoTokenizer.from_pretrained('NousResearch/Hermes-2-Pro-Llama-3-8B')
>>> print(tokenizer.get_chat_template())
```
You can copy the output and paste it into the settings page.
4. (Optional) You can save the output to a text file like this:
```
>>> open('chat_template.txt', 'w').write(tokenizer.get_chat_template())
```
If you get a ValueError exception, this model does not provide a chat template. See the [below instructions][creating] on creating a chat template.
### Python (gated models)
Some models, such as Llama and Mistral, do not allow public access to their chat template. You must either use the CLI method above, or follow the following instructions to use Python:
1. For these steps, you must have git and git-lfs installed.
2. You must have a HuggingFace account and be logged in.
3. You must already have access to the gated model. Otherwise, request access.
4. You must have an SSH key configured for git access to HuggingFace.
5. `git clone` the model's HuggingFace repo using the SSH clone URL. There is no need to download the entire model, which is very large. A good way to do this on Linux is:
```console
$ GIT_LFS_SKIP_SMUDGE=1 git clone hf.co:meta-llama/Llama-3.1-8B-Instruct.git
$ cd Llama-3.1-8B-Instruct
$ git lfs pull -I "tokenizer.*"
```
6. Follow the above instructions for open models, but replace the model ID with the path to the directory containing `tokenizer\_config.json`:
```
>>> tokenizer = AutoTokenizer.from_pretrained('.')
```
## Advanced: How do chat templates work?
The chat template is applied to the entire conversation you see in the chat window. The template loops over the list of messages, each containing `role` and `content` fields. `role` is either `user`, `assistant`, or `system`.
GPT4All also supports the special variables `bos_token`, `eos_token`, and `add_generation_prompt`. See the [HuggingFace docs] for what those do.
[HuggingFace docs]: https://huggingface.co/docs/transformers/v4.46.3/en/chat_templating#special-variables
## Advanced: How do I make a chat template?
The best way to create a chat template is to start by using an existing one as a reference. Then, modify it to use the format documented for the given model. Its README page may explicitly give an example of its template. Or, it may mention the name of a well-known standard template, such as ChatML, Alpaca, Vicuna. GPT4All does not yet include presets for these templates, so they will have to be found in other models or taken from the community.
For more information, see the very helpful [HuggingFace guide]. Some of this is not applicable, such as the information about tool calling and RAG - GPT4All implements those features differently.
Some models use a prompt template that does not intuitively map to a multi-turn chat, because it is more intended for single instructions. The [FastChat] implementation of these templates is a useful reference for the correct way to extend them to multiple messages.
[HuggingFace guide]: https://huggingface.co/docs/transformers/v4.46.3/en/chat_templating#advanced-template-writing-tips
[FastChat]: https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
# Advanced: What are GPT4All v1 templates?
GPT4All supports its own template syntax, which is nonstandard but provides complete control over the way LocalDocs sources and file attachments are inserted into the conversation. These templates begin with `{# gpt4all v1 #}` and look similar to the example below.
For standard templates, GPT4All combines the user message, sources, and attachments into the `content` field. For GPT4All v1 templates, this is not done, so they must be used directly in the template for those features to work correctly.
```jinja
{# gpt4all v1 #}
{%- for message in messages %}
{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}
{%- if message['role'] == 'user' %}
{%- for source in message['sources'] %}
{%- if loop.first %}
{{- '### Context:\n' }}
{%- endif %}
{{- 'Collection: ' + source['collection'] + '\n' +
'Path: ' + source['path'] + '\n' +
'Excerpt: ' + source['text'] + '\n\n' }}
{%- endfor %}
{%- endif %}
{%- for attachment in message['prompt_attachments'] %}
{{- attachment['processed_content'] + '\n\n' }}
{%- endfor %}
{{- message['content'] | trim }}
{{- '<|eot_id|>' }}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
{%- endif %}
```

View File

@ -1,85 +0,0 @@
# Using GPT4All to Privately Chat with your Microsoft Excel Spreadsheets
Local and Private AI Chat with your Microsoft Excel Spreadsheets
Microsoft Excel allows you to create, manage, and analyze data in spreadsheet format. By attaching your spreadsheets directly to GPT4All, you can privately chat with the AI to query and explore the data, enabling you to summarize, generate reports, and glean insights from your files—all within your conversation.
<div style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden;">
<iframe src="../../assets/gpt4all_xlsx_attachment.mp4" style="position: absolute; top: 0; left: 0; width: 100%; height: 100%; border:0;" allowfullscreen title="YouTube Video"></iframe>
</div>
## Attach Microsoft Excel to your GPT4All Conversation
!!! note "Attach Microsoft Excel to your GPT4All Conversation"
1. **Install GPT4All and Open **:
- Go to [nomic.ai/gpt4all](https://nomic.ai/gpt4all) to install GPT4All for your operating system.
- Navigate to the Chats view within GPT4All.
<table>
<tr>
<td>
<!-- Screenshot of Chat view -->
<img width="1348" alt="Chat view" src="../../assets/chat_window.png">
</td>
</tr>
</table>
2. **Example Spreadsheet **:
<table>
<tr>
<td>
<!-- Screenshot of Spreadsheet view -->
<img width="1348" alt="Spreadsheet view" src="../../assets/disney_spreadsheet.png">
</td>
</tr>
</table>
3. **Attach to GPT4All conversration**
<table>
<tr>
<td>
<!-- Screenshot of Attach view -->
<img width="1348" alt="Attach view" src="../../assets/attach_spreadsheet.png">
</td>
</tr>
</table>
4. **Have GPT4All Summarize and Generate a Report**
<table>
<tr>
<td>
<!-- Screenshot of Attach view -->
<img width="1348" alt="Attach view" src="../../assets/spreadsheet_chat.png">
</td>
</tr>
</table>
## How It Works
GPT4All parses your attached excel spreadsheet into Markdown, a format understandable to LLMs, and adds the markdown text to the context for your LLM chat. You can view the code that converts `.xslx` to Markdown [here](https://github.com/nomic-ai/gpt4all/blob/main/gpt4all-chat/src/xlsxtomd.cpp) in the GPT4All github repo.
For example, the above spreadsheet titled `disney_income_stmt.xlsx` would be formatted the following way:
```markdown
## disney_income_stmt
|Walt Disney Co.|||||||
|---|---|---|---|---|---|---|
|Consolidated Income Statement|||||||
|||||||||
|US$ in millions|||||||
|12 months ended:|2023-09-30 00:00:00|2022-10-01 00:00:00|2021-10-02 00:00:00|2020-10-03 00:00:00|2019-09-28 00:00:00|2018-09-29 00:00:00|
|Services|79562|74200|61768|59265|60542|50869|
...
...
...
```
## Limitations
It is important to double-check the claims LLMs make about the spreadsheets you provide. LLMs can make mistakes about the data they are presented, particularly for the LLMs with smaller parameter counts (~8B) that fit within the memory of consumer hardware.

View File

@ -4,8 +4,6 @@ The GPT4All Desktop Application allows you to download and run large language mo
With GPT4All, you can chat with models, turn your local files into information sources for models [(LocalDocs)](localdocs.md), or browse models available online to download onto your device. With GPT4All, you can chat with models, turn your local files into information sources for models [(LocalDocs)](localdocs.md), or browse models available online to download onto your device.
[Official Video Tutorial](https://www.youtube.com/watch?v=gQcZDXRVJok)
## Quickstart ## Quickstart
!!! note "Quickstart" !!! note "Quickstart"

View File

@ -8,11 +8,10 @@
| --- | --- | --- | | --- | --- | --- |
| **Theme** | Color theme for the application. Options are `Light`, `Dark`, and `LegacyDark` | `Light` | | **Theme** | Color theme for the application. Options are `Light`, `Dark`, and `LegacyDark` | `Light` |
| **Font Size** | Font size setting for text throughout the application. Options are Small, Medium, and Large | Small | | **Font Size** | Font size setting for text throughout the application. Options are Small, Medium, and Large | Small |
| **Language and Locale** | The language and locale of that language you wish to use | System Locale |
| **Device** | Device that will run your models. Options are `Auto` (GPT4All chooses), `Metal` (Apple Silicon M1+), `CPU`, and `GPU` | `Auto` | | **Device** | Device that will run your models. Options are `Auto` (GPT4All chooses), `Metal` (Apple Silicon M1+), `CPU`, and `GPU` | `Auto` |
| **Default Model** | Choose your preferred LLM to load by default on startup| Auto | | **Default Model** | Choose your preferred LLM to load by default on startup| Auto |
| **Suggestion Mode** | Generate suggested follow up questions at the end of responses | When chatting with LocalDocs |
| **Download Path** | Select a destination on your device to save downloaded models | Windows: `C:\Users\{username}\AppData\Local\nomic.ai\GPT4All`<br><br>Mac: `/Users/{username}/Library/Application Support/nomic.ai/GPT4All/`<br><br>Linux: `/home/{username}/.local/share/nomic.ai/GPT4All` | | **Download Path** | Select a destination on your device to save downloaded models | Windows: `C:\Users\{username}\AppData\Local\nomic.ai\GPT4All`<br><br>Mac: `/Users/{username}/Library/Application Support/nomic.ai/GPT4All/`<br><br>Linux: `/home/{username}/.local/share/nomic.ai/GPT4All` |
| **Enable Datalake** | Opt-in to sharing interactions with GPT4All community (**anonymous** and **optional**) | Off | | **Enable Datalake** | Opt-in to sharing interactions with GPT4All community (**anonymous** and **optional**) | Off |
!!! note "Advanced Application Settings" !!! note "Advanced Application Settings"
@ -20,7 +19,7 @@
| Setting | Description | Default Value | | Setting | Description | Default Value |
| --- | --- | --- | | --- | --- | --- |
| **CPU Threads** | Number of concurrently running CPU threads (more can speed up responses) | 4 | | **CPU Threads** | Number of concurrently running CPU threads (more can speed up responses) | 4 |
| **Enable System Tray** | The application will minimize to the system tray / taskbar when the window is closed | Off | | **Save Chat Context** | Save chat context to disk to pick up exactly where a model left off. | Off |
| **Enable Local Server** | Allow any application on your device to use GPT4All via an OpenAI-compatible GPT4All API | Off | | **Enable Local Server** | Allow any application on your device to use GPT4All via an OpenAI-compatible GPT4All API | Off |
| **API Server Port** | Local HTTP port for the local API server | 4891 | | **API Server Port** | Local HTTP port for the local API server | 4891 |
@ -31,11 +30,8 @@
| Setting | Description | Default Value | | Setting | Description | Default Value |
| --- | --- | --- | | --- | --- | --- |
| **Name** | Unique name of this model / character| set by model uploader | | **Name** | Unique name of this model / character| set by model uploader |
| **Model File** | Filename (.gguf) of the model | set by model uploader | | **System Prompt** | General instructions for the chats this model will be used for | set by model uploader |
| **System Message** | General instructions for the chats this model will be used for | set by model uploader | | **Prompt Template** | Format of user <-> assistant interactions for the chats this model will be used for | set by model uploader |
| **Chat Template** | Format of user <-> assistant interactions for the chats this model will be used for | set by model uploader |
| **Chat Name Prompt** | Prompt used to automatically generate chat names | Describe the above conversation in seven words or less. |
| **Suggested FollowUp Prompt** | Prompt used to automatically generate follow up questions after a chat response | Suggest three very short factual follow-up questions that have not been answered yet or cannot be found inspired by the previous conversation and excerpts. |
### Clone ### Clone

View File

@ -4,7 +4,7 @@
It is possible you are trying to load a model from HuggingFace whose weights are not compatible with our [backend](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings). It is possible you are trying to load a model from HuggingFace whose weights are not compatible with our [backend](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings).
Try downloading one of the officially supported models listed on the main models page in the application. If the problem persists, please share your experience on our [Discord](https://discord.com/channels/1076964370942267462). Try downloading one of the officially supported models mentioned our [website](https://gpt4all.io/). If the problem persists, please share your experience on our [Discord](https://discord.com/channels/1076964370942267462).
## Bad Responses ## Bad Responses

View File

@ -3,13 +3,14 @@ from __future__ import annotations
import ctypes import ctypes
import os import os
import platform import platform
import re
import subprocess import subprocess
import sys import sys
import textwrap import textwrap
import threading import threading
from enum import Enum from enum import Enum
from queue import Queue from queue import Queue
from typing import TYPE_CHECKING, Any, Callable, Generic, Iterable, Iterator, Literal, NoReturn, TypeVar, overload from typing import TYPE_CHECKING, Any, Callable, Generic, Iterable, Literal, NoReturn, TypeVar, overload
if sys.version_info >= (3, 9): if sys.version_info >= (3, 9):
import importlib.resources as importlib_resources import importlib.resources as importlib_resources
@ -23,75 +24,40 @@ else:
from typing import TypedDict from typing import TypedDict
if TYPE_CHECKING: if TYPE_CHECKING:
from typing_extensions import ParamSpec, TypeAlias from typing_extensions import TypeAlias
T = TypeVar("T")
P = ParamSpec("P")
EmbeddingsType = TypeVar('EmbeddingsType', bound='list[Any]') EmbeddingsType = TypeVar('EmbeddingsType', bound='list[Any]')
cuda_found: bool = False
# TODO(jared): use operator.call after we drop python 3.10 support
def _operator_call(obj: Callable[P, T], /, *args: P.args, **kwargs: P.kwargs) -> T:
return obj(*args, **kwargs)
# Detect Rosetta 2 # Detect Rosetta 2
@_operator_call if platform.system() == "Darwin" and platform.processor() == "i386":
def check_rosetta() -> None: if subprocess.run(
if platform.system() == "Darwin" and platform.processor() == "i386": "sysctl -n sysctl.proc_translated".split(), check=True, capture_output=True, text=True,
p = subprocess.run("sysctl -n sysctl.proc_translated".split(), capture_output=True, text=True) ).stdout.strip() == "1":
if p.returncode == 0 and p.stdout.strip() == "1": raise RuntimeError(textwrap.dedent("""\
raise RuntimeError(textwrap.dedent("""\ Running GPT4All under Rosetta is not supported due to CPU feature requirements.
Running GPT4All under Rosetta is not supported due to CPU feature requirements. Please install GPT4All in an environment that uses a native ARM64 Python interpreter.
Please install GPT4All in an environment that uses a native ARM64 Python interpreter. """))
""").strip())
# Find CUDA libraries from the official packages
# Check for C++ runtime libraries cuda_found = False
if platform.system() == "Windows": if platform.system() in ('Linux', 'Windows'):
try: try:
ctypes.CDLL("msvcp140.dll") from nvidia import cuda_runtime, cublas
ctypes.CDLL("vcruntime140.dll") except ImportError:
ctypes.CDLL("vcruntime140_1.dll") pass # CUDA is optional
except OSError as e: else:
print(textwrap.dedent(f"""\ if platform.system() == 'Linux':
{e!r} cudalib = 'lib/libcudart.so.12'
The Microsoft Visual C++ runtime libraries were not found. Please install them from cublaslib = 'lib/libcublas.so.12'
https://aka.ms/vs/17/release/vc_redist.x64.exe
"""), file=sys.stderr)
@_operator_call
def find_cuda() -> None:
global cuda_found
def _load_cuda(rtver: str, blasver: str) -> None:
if platform.system() == "Linux":
cudalib = f"lib/libcudart.so.{rtver}"
cublaslib = f"lib/libcublas.so.{blasver}"
else: # Windows else: # Windows
cudalib = fr"bin\cudart64_{rtver.replace('.', '')}.dll" cudalib = r'bin\cudart64_12.dll'
cublaslib = fr"bin\cublas64_{blasver}.dll" cublaslib = r'bin\cublas64_12.dll'
# preload the CUDA libs so the backend can find them # preload the CUDA libs so the backend can find them
ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL) ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL)
ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL) ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL)
cuda_found = True
# Find CUDA libraries from the official packages
if platform.system() in ("Linux", "Windows"):
try:
from nvidia import cuda_runtime, cublas
except ImportError:
pass # CUDA is optional
else:
for rtver, blasver in [("12", "12"), ("11.0", "11")]:
try:
_load_cuda(rtver, blasver)
cuda_found = True
except OSError: # dlopen() does not give specific error codes
pass # try the next one
# TODO: provide a config file to make this more robust # TODO: provide a config file to make this more robust
@ -118,18 +84,21 @@ llmodel = load_llmodel_library()
class LLModelPromptContext(ctypes.Structure): class LLModelPromptContext(ctypes.Structure):
_fields_ = [ _fields_ = [
("n_predict", ctypes.c_int32), ("tokens", ctypes.POINTER(ctypes.c_int32)),
("top_k", ctypes.c_int32), ("tokens_size", ctypes.c_size_t),
("top_p", ctypes.c_float), ("n_past", ctypes.c_int32),
("min_p", ctypes.c_float), ("n_ctx", ctypes.c_int32),
("temp", ctypes.c_float), ("n_predict", ctypes.c_int32),
("n_batch", ctypes.c_int32), ("top_k", ctypes.c_int32),
("top_p", ctypes.c_float),
("min_p", ctypes.c_float),
("temp", ctypes.c_float),
("n_batch", ctypes.c_int32),
("repeat_penalty", ctypes.c_float), ("repeat_penalty", ctypes.c_float),
("repeat_last_n", ctypes.c_int32), ("repeat_last_n", ctypes.c_int32),
("context_erase", ctypes.c_float), ("context_erase", ctypes.c_float),
] ]
class LLModelGPUDevice(ctypes.Structure): class LLModelGPUDevice(ctypes.Structure):
_fields_ = [ _fields_ = [
("backend", ctypes.c_char_p), ("backend", ctypes.c_char_p),
@ -140,7 +109,6 @@ class LLModelGPUDevice(ctypes.Structure):
("vendor", ctypes.c_char_p), ("vendor", ctypes.c_char_p),
] ]
# Define C function signatures using ctypes # Define C function signatures using ctypes
llmodel.llmodel_model_create.argtypes = [ctypes.c_char_p] llmodel.llmodel_model_create.argtypes = [ctypes.c_char_p]
llmodel.llmodel_model_create.restype = ctypes.c_void_p llmodel.llmodel_model_create.restype = ctypes.c_void_p
@ -158,21 +126,24 @@ llmodel.llmodel_required_mem.restype = ctypes.c_size_t
llmodel.llmodel_isModelLoaded.argtypes = [ctypes.c_void_p] llmodel.llmodel_isModelLoaded.argtypes = [ctypes.c_void_p]
llmodel.llmodel_isModelLoaded.restype = ctypes.c_bool llmodel.llmodel_isModelLoaded.restype = ctypes.c_bool
PromptCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.POINTER(ctypes.c_int32), ctypes.c_size_t, ctypes.c_bool) PromptCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_int32)
ResponseCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_int32, ctypes.c_char_p) ResponseCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_int32, ctypes.c_char_p)
EmbCancelCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.POINTER(ctypes.c_uint), ctypes.c_uint, ctypes.c_char_p) RecalculateCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_bool)
SpecialTokenCallback = ctypes.CFUNCTYPE(None, ctypes.c_char_p, ctypes.c_char_p) EmbCancelCallback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.POINTER(ctypes.c_uint), ctypes.c_uint, ctypes.c_char_p)
llmodel.llmodel_prompt.argtypes = [ llmodel.llmodel_prompt.argtypes = [
ctypes.c_void_p, ctypes.c_void_p,
ctypes.c_char_p, ctypes.c_char_p,
ctypes.c_char_p,
PromptCallback, PromptCallback,
ResponseCallback, ResponseCallback,
RecalculateCallback,
ctypes.POINTER(LLModelPromptContext), ctypes.POINTER(LLModelPromptContext),
ctypes.POINTER(ctypes.c_char_p), ctypes.c_bool,
ctypes.c_char_p,
] ]
llmodel.llmodel_prompt.restype = ctypes.c_bool llmodel.llmodel_prompt.restype = None
llmodel.llmodel_embed.argtypes = [ llmodel.llmodel_embed.argtypes = [
ctypes.c_void_p, ctypes.c_void_p,
@ -221,12 +192,6 @@ llmodel.llmodel_model_backend_name.restype = ctypes.c_char_p
llmodel.llmodel_model_gpu_device_name.argtypes = [ctypes.c_void_p] llmodel.llmodel_model_gpu_device_name.argtypes = [ctypes.c_void_p]
llmodel.llmodel_model_gpu_device_name.restype = ctypes.c_char_p llmodel.llmodel_model_gpu_device_name.restype = ctypes.c_char_p
llmodel.llmodel_count_prompt_tokens.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_char_p)]
llmodel.llmodel_count_prompt_tokens.restype = ctypes.c_int32
llmodel.llmodel_model_foreach_special_token.argtypes = [ctypes.c_void_p, SpecialTokenCallback]
llmodel.llmodel_model_foreach_special_token.restype = None
ResponseCallbackType = Callable[[int, str], bool] ResponseCallbackType = Callable[[int, str], bool]
RawResponseCallbackType = Callable[[int, bytes], bool] RawResponseCallbackType = Callable[[int, bytes], bool]
EmbCancelCallbackType: TypeAlias = 'Callable[[list[int], str], bool]' EmbCancelCallbackType: TypeAlias = 'Callable[[list[int], str], bool]'
@ -271,6 +236,7 @@ class LLModel:
self.model_path = model_path.encode() self.model_path = model_path.encode()
self.n_ctx = n_ctx self.n_ctx = n_ctx
self.ngl = ngl self.ngl = ngl
self.context: LLModelPromptContext | None = None
self.buffer = bytearray() self.buffer = bytearray()
self.buff_expecting_cont_bytes: int = 0 self.buff_expecting_cont_bytes: int = 0
@ -290,10 +256,6 @@ class LLModel:
raise RuntimeError(f"Unable to instantiate model: {errmsg}") raise RuntimeError(f"Unable to instantiate model: {errmsg}")
self.model: ctypes.c_void_p | None = model self.model: ctypes.c_void_p | None = model
self.special_tokens_map: dict[str, str] = {}
llmodel.llmodel_model_foreach_special_token(
self.model, lambda n, t: self.special_tokens_map.__setitem__(n.decode(), t.decode()),
)
def __del__(self, llmodel=llmodel): def __del__(self, llmodel=llmodel):
if hasattr(self, 'model'): if hasattr(self, 'model'):
@ -320,19 +282,6 @@ class LLModel:
dev = llmodel.llmodel_model_gpu_device_name(self.model) dev = llmodel.llmodel_model_gpu_device_name(self.model)
return None if dev is None else dev.decode() return None if dev is None else dev.decode()
def count_prompt_tokens(self, prompt: str) -> int:
if self.model is None:
self._raise_closed()
err = ctypes.c_char_p()
n_tok = llmodel.llmodel_count_prompt_tokens(self.model, prompt, ctypes.byref(err))
if n_tok < 0:
s = err.value
errmsg = 'null' if s is None else s.decode()
raise RuntimeError(f'Unable to count prompt tokens: {errmsg}')
return n_tok
llmodel.llmodel_count_prompt_tokens.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
@staticmethod @staticmethod
def list_gpus(mem_required: int = 0) -> list[str]: def list_gpus(mem_required: int = 0) -> list[str]:
""" """
@ -396,6 +345,50 @@ class LLModel:
raise Exception("Model not loaded") raise Exception("Model not loaded")
return llmodel.llmodel_threadCount(self.model) return llmodel.llmodel_threadCount(self.model)
def _set_context(
self,
n_predict: int = 4096,
top_k: int = 40,
top_p: float = 0.9,
min_p: float = 0.0,
temp: float = 0.1,
n_batch: int = 8,
repeat_penalty: float = 1.2,
repeat_last_n: int = 10,
context_erase: float = 0.75,
reset_context: bool = False,
):
if self.context is None:
context = LLModelPromptContext(
tokens_size=0,
n_past=0,
n_ctx=0,
n_predict=n_predict,
top_k=top_k,
top_p=top_p,
min_p=min_p,
temp=temp,
n_batch=n_batch,
repeat_penalty=repeat_penalty,
repeat_last_n=repeat_last_n,
context_erase=context_erase,
)
self.context = context
else:
context = self.context
if reset_context:
self.context.n_past = 0
self.context.n_predict = n_predict
self.context.top_k = top_k
self.context.top_p = top_p
self.context.min_p = min_p
self.context.temp = temp
self.context.n_batch = n_batch
self.context.repeat_penalty = repeat_penalty
self.context.repeat_last_n = repeat_last_n
self.context.context_erase = context_erase
@overload @overload
def generate_embeddings( def generate_embeddings(
self, text: str, prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool, self, text: str, prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
@ -465,18 +458,20 @@ class LLModel:
def prompt_model( def prompt_model(
self, self,
prompt : str, prompt: str,
callback : ResponseCallbackType, prompt_template: str,
n_predict : int = 4096, callback: ResponseCallbackType,
top_k : int = 40, n_predict: int = 4096,
top_p : float = 0.9, top_k: int = 40,
min_p : float = 0.0, top_p: float = 0.9,
temp : float = 0.1, min_p: float = 0.0,
n_batch : int = 8, temp: float = 0.1,
repeat_penalty : float = 1.2, n_batch: int = 8,
repeat_last_n : int = 10, repeat_penalty: float = 1.2,
context_erase : float = 0.75, repeat_last_n: int = 10,
reset_context : bool = False, context_erase: float = 0.75,
reset_context: bool = False,
special: bool = False,
): ):
""" """
Generate response from model from a prompt. Generate response from model from a prompt.
@ -499,38 +494,35 @@ class LLModel:
self.buffer.clear() self.buffer.clear()
self.buff_expecting_cont_bytes = 0 self.buff_expecting_cont_bytes = 0
context = LLModelPromptContext( self._set_context(
n_predict = n_predict, n_predict=n_predict,
top_k = top_k, top_k=top_k,
top_p = top_p, top_p=top_p,
min_p = min_p, min_p=min_p,
temp = temp, temp=temp,
n_batch = n_batch, n_batch=n_batch,
repeat_penalty = repeat_penalty, repeat_penalty=repeat_penalty,
repeat_last_n = repeat_last_n, repeat_last_n=repeat_last_n,
context_erase = context_erase, context_erase=context_erase,
reset_context=reset_context,
) )
error_msg: bytes | None = None llmodel.llmodel_prompt(
def error_callback(msg: bytes) -> None:
nonlocal error_msg
error_msg = msg
err = ctypes.c_char_p()
if not llmodel.llmodel_prompt(
self.model, self.model,
ctypes.c_char_p(prompt.encode()), ctypes.c_char_p(prompt.encode()),
ctypes.c_char_p(prompt_template.encode()),
PromptCallback(self._prompt_callback), PromptCallback(self._prompt_callback),
ResponseCallback(self._callback_decoder(callback)), ResponseCallback(self._callback_decoder(callback)),
context, RecalculateCallback(self._recalculate_callback),
ctypes.byref(err), self.context,
): special,
s = err.value ctypes.c_char_p(),
raise RuntimeError(f"prompt error: {'null' if s is None else s.decode()}") )
def prompt_model_streaming( def prompt_model_streaming(
self, prompt: str, callback: ResponseCallbackType = empty_response_callback, **kwargs: Any, self, prompt: str, prompt_template: str, callback: ResponseCallbackType = empty_response_callback, **kwargs
) -> Iterator[str]: ) -> Iterable[str]:
if self.model is None: if self.model is None:
self._raise_closed() self._raise_closed()
@ -549,15 +541,15 @@ class LLModel:
return _generator_callback return _generator_callback
def run_llmodel_prompt(prompt: str, callback: ResponseCallbackType, **kwargs): def run_llmodel_prompt(prompt: str, prompt_template: str, callback: ResponseCallbackType, **kwargs):
self.prompt_model(prompt, callback, **kwargs) self.prompt_model(prompt, prompt_template, callback, **kwargs)
output_queue.put(Sentinel.TERMINATING_SYMBOL) output_queue.put(Sentinel.TERMINATING_SYMBOL)
# Kick off llmodel_prompt in separate thread so we can return generator # Kick off llmodel_prompt in separate thread so we can return generator
# immediately # immediately
thread = threading.Thread( thread = threading.Thread(
target=run_llmodel_prompt, target=run_llmodel_prompt,
args=(prompt, _generator_callback_wrapper(callback)), args=(prompt, prompt_template, _generator_callback_wrapper(callback)),
kwargs=kwargs, kwargs=kwargs,
) )
thread.start() thread.start()
@ -612,5 +604,10 @@ class LLModel:
# Empty prompt callback # Empty prompt callback
@staticmethod @staticmethod
def _prompt_callback(token_ids: ctypes._Pointer[ctypes.c_int32], n_token_ids: int, cached: bool) -> bool: def _prompt_callback(token_id: int) -> bool:
return True return True
# Empty recalculate callback
@staticmethod
def _recalculate_callback(is_recalculating: bool) -> bool:
return is_recalculating

View File

@ -4,66 +4,38 @@ Python only API for running all GPT4All models.
from __future__ import annotations from __future__ import annotations
import hashlib import hashlib
import json
import os import os
import platform import platform
import re import re
import sys import sys
import time
import warnings import warnings
from contextlib import contextmanager from contextlib import contextmanager
from datetime import datetime
from pathlib import Path from pathlib import Path
from types import TracebackType from types import TracebackType
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, NamedTuple, NoReturn, Protocol, TypedDict, overload from typing import TYPE_CHECKING, Any, Iterable, Literal, Protocol, overload
import jinja2
import requests import requests
from jinja2.sandbox import ImmutableSandboxedEnvironment
from requests.exceptions import ChunkedEncodingError from requests.exceptions import ChunkedEncodingError
from tqdm import tqdm from tqdm import tqdm
from urllib3.exceptions import IncompleteRead, ProtocolError from urllib3.exceptions import IncompleteRead, ProtocolError
from ._pyllmodel import (CancellationError as CancellationError, EmbCancelCallbackType, EmbedResult as EmbedResult, from ._pyllmodel import (CancellationError as CancellationError, EmbCancelCallbackType, EmbedResult as EmbedResult,
LLModel, ResponseCallbackType, _operator_call, empty_response_callback) LLModel, ResponseCallbackType, empty_response_callback)
if TYPE_CHECKING: if TYPE_CHECKING:
from typing_extensions import Self, TypeAlias from typing_extensions import Self, TypeAlias
if sys.platform == "darwin": if sys.platform == 'darwin':
import fcntl import fcntl
# TODO: move to config # TODO: move to config
DEFAULT_MODEL_DIRECTORY = Path.home() / ".cache" / "gpt4all" DEFAULT_MODEL_DIRECTORY = Path.home() / ".cache" / "gpt4all"
ConfigType: TypeAlias = "dict[str, Any]" DEFAULT_PROMPT_TEMPLATE = "### Human:\n{0}\n\n### Assistant:\n"
# Environment setup adapted from HF transformers ConfigType: TypeAlias = 'dict[str, Any]'
@_operator_call MessageType: TypeAlias = 'dict[str, str]'
def _jinja_env() -> ImmutableSandboxedEnvironment:
def raise_exception(message: str) -> NoReturn:
raise jinja2.exceptions.TemplateError(message)
def tojson(obj: Any, indent: int | None = None) -> str:
return json.dumps(obj, ensure_ascii=False, indent=indent)
def strftime_now(fmt: str) -> str:
return datetime.now().strftime(fmt)
env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True)
env.filters["tojson" ] = tojson
env.globals["raise_exception"] = raise_exception
env.globals["strftime_now" ] = strftime_now
return env
class MessageType(TypedDict):
role: str
content: str
class ChatSession(NamedTuple):
template: jinja2.Template
history: list[MessageType]
class Embed4All: class Embed4All:
@ -83,7 +55,7 @@ class Embed4All:
kwargs: Remaining keyword arguments are passed to the `GPT4All` constructor. kwargs: Remaining keyword arguments are passed to the `GPT4All` constructor.
""" """
if model_name is None: if model_name is None:
model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf" model_name = 'all-MiniLM-L6-v2.gguf2.f16.gguf'
self.gpt4all = GPT4All(model_name, n_threads=n_threads, device=device, **kwargs) self.gpt4all = GPT4All(model_name, n_threads=n_threads, device=device, **kwargs)
def __enter__(self) -> Self: def __enter__(self) -> Self:
@ -174,18 +146,18 @@ class Embed4All:
dimensionality = -1 dimensionality = -1
else: else:
if dimensionality <= 0: if dimensionality <= 0:
raise ValueError(f"Dimensionality must be None or a positive integer, got {dimensionality}") raise ValueError(f'Dimensionality must be None or a positive integer, got {dimensionality}')
if dimensionality < self.MIN_DIMENSIONALITY: if dimensionality < self.MIN_DIMENSIONALITY:
warnings.warn( warnings.warn(
f"Dimensionality {dimensionality} is less than the suggested minimum of {self.MIN_DIMENSIONALITY}." f'Dimensionality {dimensionality} is less than the suggested minimum of {self.MIN_DIMENSIONALITY}.'
" Performance may be degraded." ' Performance may be degraded.'
) )
try: try:
do_mean = {"mean": True, "truncate": False}[long_text_mode] do_mean = {"mean": True, "truncate": False}[long_text_mode]
except KeyError: except KeyError:
raise ValueError(f"Long text mode must be one of 'mean' or 'truncate', got {long_text_mode!r}") raise ValueError(f"Long text mode must be one of 'mean' or 'truncate', got {long_text_mode!r}")
result = self.gpt4all.model.generate_embeddings(text, prefix, dimensionality, do_mean, atlas, cancel_cb) result = self.gpt4all.model.generate_embeddings(text, prefix, dimensionality, do_mean, atlas, cancel_cb)
return result if return_dict else result["embeddings"] return result if return_dict else result['embeddings']
class GPT4All: class GPT4All:
@ -233,30 +205,31 @@ class GPT4All:
""" """
self.model_type = model_type self.model_type = model_type
self._chat_session: ChatSession | None = None self._history: list[MessageType] | None = None
self._current_prompt_template: str = "{0}"
device_init = None device_init = None
if sys.platform == "darwin": if sys.platform == 'darwin':
if device is None: if device is None:
backend = "auto" # "auto" is effectively "metal" due to currently non-functional fallback backend = 'auto' # 'auto' is effectively 'metal' due to currently non-functional fallback
elif device == "cpu": elif device == 'cpu':
backend = "cpu" backend = 'cpu'
else: else:
if platform.machine() != "arm64" or device != "gpu": if platform.machine() != 'arm64' or device != 'gpu':
raise ValueError(f"Unknown device for this platform: {device}") raise ValueError(f'Unknown device for this platform: {device}')
backend = "metal" backend = 'metal'
else: else:
backend = "kompute" backend = 'kompute'
if device is None or device == "cpu": if device is None or device == 'cpu':
pass # use kompute with no device pass # use kompute with no device
elif device in ("cuda", "kompute"): elif device in ('cuda', 'kompute'):
backend = device backend = device
device_init = "gpu" device_init = 'gpu'
elif device.startswith("cuda:"): elif device.startswith('cuda:'):
backend = "cuda" backend = 'cuda'
device_init = _remove_prefix(device, "cuda:") device_init = device.removeprefix('cuda:')
else: else:
device_init = _remove_prefix(device, "kompute:") device_init = device.removeprefix('kompute:')
# Retrieve model and download if allowed # Retrieve model and download if allowed
self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose) self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose)
@ -292,13 +265,7 @@ class GPT4All:
@property @property
def current_chat_session(self) -> list[MessageType] | None: def current_chat_session(self) -> list[MessageType] | None:
return None if self._chat_session is None else self._chat_session.history return None if self._history is None else list(self._history)
@current_chat_session.setter
def current_chat_session(self, history: list[MessageType]) -> None:
if self._chat_session is None:
raise ValueError("current_chat_session may only be set when there is an active chat session")
self._chat_session.history[:] = history
@staticmethod @staticmethod
def list_models() -> list[ConfigType]: def list_models() -> list[ConfigType]:
@ -310,7 +277,7 @@ class GPT4All:
""" """
resp = requests.get("https://gpt4all.io/models/models3.json") resp = requests.get("https://gpt4all.io/models/models3.json")
if resp.status_code != 200: if resp.status_code != 200:
raise ValueError(f"Request failed: HTTP {resp.status_code} {resp.reason}") raise ValueError(f'Request failed: HTTP {resp.status_code} {resp.reason}')
return resp.json() return resp.json()
@classmethod @classmethod
@ -340,9 +307,15 @@ class GPT4All:
# get the config for the model # get the config for the model
config: ConfigType = {} config: ConfigType = {}
if allow_download: if allow_download:
models = cls.list_models() available_models = cls.list_models()
if (model := next((m for m in models if m["filename"] == model_filename), None)) is not None:
config.update(model) for m in available_models:
if model_filename == m["filename"]:
tmpl = m.get("promptTemplate", DEFAULT_PROMPT_TEMPLATE)
# change to Python-style formatting
m["promptTemplate"] = tmpl.replace("%1", "{0}", 1).replace("%2", "{1}", 1)
config.update(m)
break
# Validate download directory # Validate download directory
if model_path is None: if model_path is None:
@ -384,7 +357,7 @@ class GPT4All:
expected_md5: str | None = None, expected_md5: str | None = None,
) -> str | os.PathLike[str]: ) -> str | os.PathLike[str]:
""" """
Download model from gpt4all.io. Download model from https://gpt4all.io.
Args: Args:
model_filename: Filename of model (with .gguf extension). model_filename: Filename of model (with .gguf extension).
@ -406,13 +379,13 @@ class GPT4All:
headers = {} headers = {}
if offset: if offset:
print(f"\nDownload interrupted, resuming from byte position {offset}", file=sys.stderr) print(f"\nDownload interrupted, resuming from byte position {offset}", file=sys.stderr)
headers["Range"] = f"bytes={offset}-" # resume incomplete response headers['Range'] = f'bytes={offset}-' # resume incomplete response
headers["Accept-Encoding"] = "identity" # Content-Encoding changes meaning of ranges headers["Accept-Encoding"] = "identity" # Content-Encoding changes meaning of ranges
response = requests.get(url, stream=True, headers=headers) response = requests.get(url, stream=True, headers=headers)
if response.status_code not in (200, 206): if response.status_code not in (200, 206):
raise ValueError(f"Request failed: HTTP {response.status_code} {response.reason}") raise ValueError(f'Request failed: HTTP {response.status_code} {response.reason}')
if offset and (response.status_code != 206 or str(offset) not in response.headers.get("Content-Range", "")): if offset and (response.status_code != 206 or str(offset) not in response.headers.get('Content-Range', '')):
raise ValueError("Connection was interrupted and server does not support range requests") raise ValueError('Connection was interrupted and server does not support range requests')
if (enc := response.headers.get("Content-Encoding")) is not None: if (enc := response.headers.get("Content-Encoding")) is not None:
raise ValueError(f"Expected identity Content-Encoding, got {enc}") raise ValueError(f"Expected identity Content-Encoding, got {enc}")
return response return response
@ -511,19 +484,19 @@ class GPT4All:
def generate( def generate(
self, self,
prompt : str, prompt: str,
*, *,
max_tokens : int = 200, max_tokens: int = 200,
temp : float = 0.7, temp: float = 0.7,
top_k : int = 40, top_k: int = 40,
top_p : float = 0.4, top_p: float = 0.4,
min_p : float = 0.0, min_p: float = 0.0,
repeat_penalty : float = 1.18, repeat_penalty: float = 1.18,
repeat_last_n : int = 64, repeat_last_n: int = 64,
n_batch : int = 8, n_batch: int = 8,
n_predict : int | None = None, n_predict: int | None = None,
streaming : bool = False, streaming: bool = False,
callback : ResponseCallbackType = empty_response_callback, callback: ResponseCallbackType = empty_response_callback,
) -> Any: ) -> Any:
""" """
Generate outputs from any GPT4All model. Generate outputs from any GPT4All model.
@ -548,94 +521,122 @@ class GPT4All:
# Preparing the model request # Preparing the model request
generate_kwargs: dict[str, Any] = dict( generate_kwargs: dict[str, Any] = dict(
temp = temp, temp=temp,
top_k = top_k, top_k=top_k,
top_p = top_p, top_p=top_p,
min_p = min_p, min_p=min_p,
repeat_penalty = repeat_penalty, repeat_penalty=repeat_penalty,
repeat_last_n = repeat_last_n, repeat_last_n=repeat_last_n,
n_batch = n_batch, n_batch=n_batch,
n_predict = n_predict if n_predict is not None else max_tokens, n_predict=n_predict if n_predict is not None else max_tokens,
) )
# Prepare the callback, process the model response if self._history is not None:
full_response = "" # check if there is only one message, i.e. system prompt:
reset = len(self._history) == 1
self._history.append({"role": "user", "content": prompt})
def _callback_wrapper(token_id: int, response: str) -> bool: fct_func = self._format_chat_prompt_template.__func__ # type: ignore[attr-defined]
nonlocal full_response if fct_func is GPT4All._format_chat_prompt_template:
full_response += response if reset:
return callback(token_id, response) # ingest system prompt
# use "%1%2" and not "%1" to avoid implicit whitespace
last_msg_rendered = prompt self.model.prompt_model(self._history[0]["content"], "%1%2",
if self._chat_session is not None: empty_response_callback,
session = self._chat_session n_batch=n_batch, n_predict=0, reset_context=True, special=True)
def render(messages: list[MessageType]) -> str: prompt_template = self._current_prompt_template.format("%1", "%2")
return session.template.render( else:
messages=messages, warnings.warn(
add_generation_prompt=True, "_format_chat_prompt_template is deprecated. Please use a chat session with a prompt template.",
**self.model.special_tokens_map, DeprecationWarning,
) )
session.history.append(MessageType(role="user", content=prompt)) # special tokens won't be processed
prompt = render(session.history) prompt = self._format_chat_prompt_template(
if len(session.history) > 1: self._history[-1:],
last_msg_rendered = render(session.history[-1:]) self._history[0]["content"] if reset else "",
)
prompt_template = "%1"
generate_kwargs["reset_context"] = reset
else:
prompt_template = "%1"
generate_kwargs["reset_context"] = True
# Check request length # Prepare the callback, process the model response
last_msg_len = self.model.count_prompt_tokens(last_msg_rendered) output_collector: list[MessageType]
if last_msg_len > (limit := self.model.n_ctx - 4): output_collector = [
raise ValueError(f"Your message was too long and could not be processed ({last_msg_len} > {limit}).") {"content": ""}
] # placeholder for the self._history if chat session is not activated
if self._history is not None:
self._history.append({"role": "assistant", "content": ""})
output_collector = self._history
def _callback_wrapper(
callback: ResponseCallbackType,
output_collector: list[MessageType],
) -> ResponseCallbackType:
def _callback(token_id: int, response: str) -> bool:
nonlocal callback, output_collector
output_collector[-1]["content"] += response
return callback(token_id, response)
return _callback
# Send the request to the model # Send the request to the model
if streaming: if streaming:
def stream() -> Iterator[str]: return self.model.prompt_model_streaming(
yield from self.model.prompt_model_streaming(prompt, _callback_wrapper, **generate_kwargs) prompt,
if self._chat_session is not None: prompt_template,
self._chat_session.history.append(MessageType(role="assistant", content=full_response)) _callback_wrapper(callback, output_collector),
return stream() **generate_kwargs,
)
self.model.prompt_model(prompt, _callback_wrapper, **generate_kwargs) self.model.prompt_model(
if self._chat_session is not None: prompt,
self._chat_session.history.append(MessageType(role="assistant", content=full_response)) prompt_template,
return full_response _callback_wrapper(callback, output_collector),
**generate_kwargs,
)
return output_collector[-1]["content"]
@contextmanager @contextmanager
def chat_session( def chat_session(
self, self,
system_message: str | Literal[False] | None = None, system_prompt: str | None = None,
chat_template: str | None = None, prompt_template: str | None = None,
): ):
""" """
Context manager to hold an inference optimized chat session with a GPT4All model. Context manager to hold an inference optimized chat session with a GPT4All model.
Args: Args:
system_message: An initial instruction for the model, None to use the model default, or False to disable. Defaults to None. system_prompt: An initial instruction for the model.
chat_template: Jinja template for the conversation, or None to use the model default. Defaults to None. prompt_template: Template for the prompts with {0} being replaced by the user message.
""" """
if system_message is None: if system_prompt is None:
system_message = self.config.get("systemMessage", False) system_prompt = self.config.get("systemPrompt", "")
if chat_template is None: if prompt_template is None:
if "name" not in self.config: if (tmpl := self.config.get("promptTemplate")) is None:
raise ValueError("For sideloaded models or with allow_download=False, you must specify a chat template.") warnings.warn("Use of a sideloaded model or allow_download=False without specifying a prompt template "
if "chatTemplate" not in self.config: "is deprecated. Defaulting to Alpaca.", DeprecationWarning)
raise NotImplementedError("This model appears to have a built-in chat template, but loading it is not " tmpl = DEFAULT_PROMPT_TEMPLATE
"currently implemented. Please pass a template to chat_session() directly.") prompt_template = tmpl
if (tmpl := self.config["chatTemplate"]) is None:
raise ValueError(f"The model {self.config['name']!r} does not support chat.")
chat_template = tmpl
history = [] if re.search(r"%1(?![0-9])", prompt_template):
if system_message is not False: raise ValueError("Prompt template containing a literal '%1' is not supported. For a prompt "
history.append(MessageType(role="system", content=system_message)) "placeholder, please use '{0}' instead.")
self._chat_session = ChatSession(
template=_jinja_env.from_string(chat_template), self._history = [{"role": "system", "content": system_prompt}]
history=history, self._current_prompt_template = prompt_template
)
try: try:
yield self yield self
finally: finally:
self._chat_session = None self._history = None
self._current_prompt_template = "{0}"
@staticmethod @staticmethod
def list_gpus() -> list[str]: def list_gpus() -> list[str]:
@ -647,6 +648,43 @@ class GPT4All:
""" """
return LLModel.list_gpus() return LLModel.list_gpus()
def _format_chat_prompt_template(
self,
messages: list[MessageType],
default_prompt_header: str = "",
default_prompt_footer: str = "",
) -> str:
"""
Helper method for building a prompt from list of messages using the self._current_prompt_template as a template for each message.
Warning:
This function was deprecated in version 2.3.0, and will be removed in a future release.
Args:
messages: List of dictionaries. Each dictionary should have a "role" key
with value of "system", "assistant", or "user" and a "content" key with a
string value. Messages are organized such that "system" messages are at top of prompt,
and "user" and "assistant" messages are displayed in order. Assistant messages get formatted as
"Response: {content}".
Returns:
Formatted prompt.
"""
full_prompt = default_prompt_header + "\n\n" if default_prompt_header != "" else ""
for message in messages:
if message["role"] == "user":
user_message = self._current_prompt_template.format(message["content"])
full_prompt += user_message
if message["role"] == "assistant":
assistant_message = message["content"] + "\n"
full_prompt += assistant_message
full_prompt += "\n\n" + default_prompt_footer if default_prompt_footer != "" else ""
return full_prompt
def append_extension_if_missing(model_name): def append_extension_if_missing(model_name):
if not model_name.endswith((".bin", ".gguf")): if not model_name.endswith((".bin", ".gguf")):
@ -659,7 +697,7 @@ class _HasFileno(Protocol):
def _fsync(fd: int | _HasFileno) -> None: def _fsync(fd: int | _HasFileno) -> None:
if sys.platform == "darwin": if sys.platform == 'darwin':
# Apple's fsync does not flush the drive write cache # Apple's fsync does not flush the drive write cache
try: try:
fcntl.fcntl(fd, fcntl.F_FULLFSYNC) fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
@ -668,7 +706,3 @@ def _fsync(fd: int | _HasFileno) -> None:
else: else:
return return
os.fsync(fd) os.fsync(fd)
def _remove_prefix(s: str, prefix: str) -> str:
return s[len(prefix):] if s.startswith(prefix) else s

View File

@ -14,14 +14,10 @@ nav:
- 'Models' : 'gpt4all_desktop/models.md' - 'Models' : 'gpt4all_desktop/models.md'
- 'LocalDocs' : 'gpt4all_desktop/localdocs.md' - 'LocalDocs' : 'gpt4all_desktop/localdocs.md'
- 'Settings' : 'gpt4all_desktop/settings.md' - 'Settings' : 'gpt4all_desktop/settings.md'
- 'Chat Templates' : 'gpt4all_desktop/chat_templates.md'
- 'Cookbook': - 'Cookbook':
- 'Local AI Chat with Microsoft Excel': 'gpt4all_desktop/cookbook/use-local-ai-models-to-privately-chat-with-microsoft-excel.md'
- 'Local AI Chat with your Google Drive': 'gpt4all_desktop/cookbook/use-local-ai-models-to-privately-chat-with-google-drive.md' - 'Local AI Chat with your Google Drive': 'gpt4all_desktop/cookbook/use-local-ai-models-to-privately-chat-with-google-drive.md'
- 'Local AI Chat with your Obsidian Vault': 'gpt4all_desktop/cookbook/use-local-ai-models-to-privately-chat-with-Obsidian.md' - 'Local AI Chat with your Obsidian Vault': 'gpt4all_desktop/cookbook/use-local-ai-models-to-privately-chat-with-Obsidian.md'
- 'Local AI Chat with your OneDrive': 'gpt4all_desktop/cookbook/use-local-ai-models-to-privately-chat-with-One-Drive.md' - 'Local AI Chat with your OneDrive': 'gpt4all_desktop/cookbook/use-local-ai-models-to-privately-chat-with-One-Drive.md'
- 'API Server':
- 'gpt4all_api_server/home.md'
- 'Python SDK': - 'Python SDK':
- 'gpt4all_python/home.md' - 'gpt4all_python/home.md'
- 'Monitoring': 'gpt4all_python/monitoring.md' - 'Monitoring': 'gpt4all_python/monitoring.md'

View File

@ -68,17 +68,16 @@ def get_long_description():
setup( setup(
name=package_name, name=package_name,
version="2.8.3.dev0", version="2.8.0",
description="Python bindings for GPT4All", description="Python bindings for GPT4All",
long_description=get_long_description(), long_description=get_long_description(),
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
author="Nomic and the Open Source Community", author="Nomic and the Open Source Community",
author_email="support@nomic.ai", author_email="support@nomic.ai",
url="https://www.nomic.ai/gpt4all", url="https://gpt4all.io/",
project_urls={ project_urls={
"Documentation": "https://docs.gpt4all.io/gpt4all_python.html", "Documentation": "https://docs.gpt4all.io/gpt4all_python.html",
"Source code": "https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python", "Source code": "https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python",
"Changelog": "https://github.com/nomic-ai/gpt4all/blob/main/gpt4all-bindings/python/CHANGELOG.md",
}, },
classifiers = [ classifiers = [
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
@ -88,16 +87,15 @@ setup(
python_requires='>=3.8', python_requires='>=3.8',
packages=find_packages(), packages=find_packages(),
install_requires=[ install_requires=[
'importlib_resources; python_version < "3.9"',
'jinja2~=3.1',
'requests', 'requests',
'tqdm', 'tqdm',
'importlib_resources; python_version < "3.9"',
'typing-extensions>=4.3.0; python_version >= "3.9" and python_version < "3.11"', 'typing-extensions>=4.3.0; python_version >= "3.9" and python_version < "3.11"',
], ],
extras_require={ extras_require={
'cuda': [ 'cuda': [
'nvidia-cuda-runtime-cu11', 'nvidia-cuda-runtime-cu12',
'nvidia-cublas-cu11', 'nvidia-cublas-cu12',
], ],
'all': [ 'all': [
'gpt4all[cuda]; platform_system == "Windows" or platform_system == "Linux"', 'gpt4all[cuda]; platform_system == "Windows" or platform_system == "Linux"',

View File

@ -1,5 +0,0 @@
# vim: set syntax=dosini:
[flake8]
exclude = .*,__pycache__
max-line-length = 120
extend-ignore = B001,C408,D,DAR,E221,E303,E722,E741,E800,N801,N806,P101,S101,S324,S404,S406,S410,S603,WPS100,WPS110,WPS111,WPS113,WPS114,WPS115,WPS120,WPS2,WPS300,WPS301,WPS304,WPS305,WPS306,WPS309,WPS316,WPS317,WPS318,WPS319,WPS322,WPS323,WPS326,WPS329,WPS330,WPS332,WPS336,WPS337,WPS347,WPS360,WPS361,WPS407,WPS414,WPS420,WPS421,WPS429,WPS430,WPS431,WPS432,WPS433,WPS437,WPS440,WPS440,WPS441,WPS442,WPS457,WPS458,WPS460,WPS462,WPS463,WPS473,WPS501,WPS504,WPS505,WPS508,WPS509,WPS510,WPS515,WPS516,WPS519,WPS520,WPS529,WPS531,WPS602,WPS604,WPS605,WPS608,WPS609,WPS613,WPS615

View File

@ -1,335 +0,0 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
## [3.10.0] - 2025-02-24
### Added
- Whitelist Granite (non-MoE) model architecture (by [@ThiloteE](https://github.com/ThiloteE) in [#3487](https://github.com/nomic-ai/gpt4all/pull/3487))
- Add support for CUDA compute 5.0 GPUs such as the GTX 750 ([#3499](https://github.com/nomic-ai/gpt4all/pull/3499))
- Add a Remote Providers tab to the Add Model page ([#3506](https://github.com/nomic-ai/gpt4all/pull/3506))
### Changed
- Substitute prettier default templates for OLMoE 7B 0924/0125 and Granite 3.1 3B/8B (by [@ThiloteE](https://github.com/ThiloteE) in [#3471](https://github.com/nomic-ai/gpt4all/pull/3471))
- Build with LLVM Clang 19 on macOS and Ubuntu ([#3500](https://github.com/nomic-ai/gpt4all/pull/3500))
### Fixed
- Fix several potential crashes ([#3465](https://github.com/nomic-ai/gpt4all/pull/3465))
- Fix visual spacing issues with deepseek models ([#3470](https://github.com/nomic-ai/gpt4all/pull/3470))
- Add missing strings to Italian translation (by [@Harvester62](https://github.com/Harvester62) in [#3496](https://github.com/nomic-ai/gpt4all/pull/3496))
- Update Simplified Chinese translation (by [@Junior2Ran](https://github.com/Junior2Ran) in [#3467](https://github.com/nomic-ai/pull/3467))
## [3.9.0] - 2025-02-04
### Added
- Whitelist OLMoE and Granite MoE model architectures (no Vulkan) (by [@ThiloteE](https://github.com/ThiloteE) in [#3449](https://github.com/nomic-ai/gpt4all/pull/3449))
### Fixed
- Fix "index N is not a prompt" when using LocalDocs with reasoning ([#3451](https://github.com/nomic-ai/gpt4all/pull/3451))
- Work around rendering artifacts on Snapdragon SoCs with Windows ([#3450](https://github.com/nomic-ai/gpt4all/pull/3450))
- Prevent DeepSeek-R1 reasoning from appearing in chat names and follow-up questions ([#3458](https://github.com/nomic-ai/gpt4all/pull/3458))
- Fix LocalDocs crash on Windows ARM when reading PDFs ([#3460](https://github.com/nomic-ai/gpt4all/pull/3460))
- Fix UI freeze when chat template is `{#` ([#3446](https://github.com/nomic-ai/gpt4all/pull/3446))
## [3.8.0] - 2025-01-30
### Added
- Support DeepSeek-R1 Qwen models ([#3431](https://github.com/nomic-ai/gpt4all/pull/3431))
- Support for think tags in the GUI ([#3440](https://github.com/nomic-ai/gpt4all/pull/3440))
- Support specifying SHA256 hash in models3.json instead of MD5 ([#3437](https://github.com/nomic-ai/gpt4all/pull/3437))
### Changed
- Use minja instead of Jinja2Cpp for significantly improved template compatibility ([#3433](https://github.com/nomic-ai/gpt4all/pull/3433))
### Fixed
- Fix regression while using localdocs with server API ([#3410](https://github.com/nomic-ai/gpt4all/pull/3410))
- Don't show system messages in server chat view ([#3411](https://github.com/nomic-ai/gpt4all/pull/3411))
- Fix `codesign --verify` failure on macOS ([#3413](https://github.com/nomic-ai/gpt4all/pull/3413))
- Code Interpreter: Fix console.log not accepting a single string after v3.7.0 ([#3426](https://github.com/nomic-ai/gpt4all/pull/3426))
- Fix Phi 3.1 Mini 128K Instruct template (by [@ThiloteE](https://github.com/ThiloteE) in [#3412](https://github.com/nomic-ai/gpt4all/pull/3412))
- Don't block the gui thread for reasoning ([#3435](https://github.com/nomic-ai/gpt4all/pull/3435))
- Fix corruption of unicode in output of reasoning models ([#3443](https://github.com/nomic-ai/gpt4all/pull/3443))
## [3.7.0] - 2025-01-21
### Added
- Add support for the Windows ARM64 target platform (CPU-only) ([#3385](https://github.com/nomic-ai/gpt4all/pull/3385))
### Changed
- Update from Qt 6.5.1 to 6.8.1 ([#3386](https://github.com/nomic-ai/gpt4all/pull/3386))
### Fixed
- Fix the timeout error in code interpreter ([#3369](https://github.com/nomic-ai/gpt4all/pull/3369))
- Fix code interpreter console.log not accepting multiple arguments ([#3371](https://github.com/nomic-ai/gpt4all/pull/3371))
- Remove 'X is defined' checks from templates for better compatibility ([#3372](https://github.com/nomic-ai/gpt4all/pull/3372))
- Jinja2Cpp: Add 'if' requirement for 'else' parsing to fix crash ([#3373](https://github.com/nomic-ai/gpt4all/pull/3373))
- Save chats on quit, even if the window isn't closed first ([#3387](https://github.com/nomic-ai/gpt4all/pull/3387))
- Add chat template replacements for five new models and fix EM German Mistral ([#3393](https://github.com/nomic-ai/gpt4all/pull/3393))
- Fix crash when entering `{{ a["foo"(` as chat template ([#3394](https://github.com/nomic-ai/gpt4all/pull/3394))
- Sign the maintenance tool on macOS to prevent crash on Sequoia ([#3391](https://github.com/nomic-ai/gpt4all/pull/3391))
- Jinja2Cpp: Fix operator precedence in 'not X is defined' ([#3402](https://github.com/nomic-ai/gpt4all/pull/3402))
## [3.6.1] - 2024-12-20
### Fixed
- Fix the stop generation button no longer working in v3.6.0 ([#3336](https://github.com/nomic-ai/gpt4all/pull/3336))
- Fix the copy entire conversation button no longer working in v3.6.0 ([#3336](https://github.com/nomic-ai/gpt4all/pull/3336))
## [3.6.0] - 2024-12-19
### Added
- Automatically substitute chat templates that are not compatible with Jinja2Cpp in GGUFs ([#3327](https://github.com/nomic-ai/gpt4all/pull/3327))
- Built-in javascript code interpreter tool plus model ([#3173](https://github.com/nomic-ai/gpt4all/pull/3173))
### Fixed
- Fix remote model template to allow for XML in messages ([#3318](https://github.com/nomic-ai/gpt4all/pull/3318))
- Fix Jinja2Cpp bug that broke system message detection in chat templates ([#3325](https://github.com/nomic-ai/gpt4all/pull/3325))
- Fix LocalDocs sources displaying in unconsolidated form after v3.5.0 ([#3328](https://github.com/nomic-ai/gpt4all/pull/3328))
## [3.5.3] - 2024-12-16
### Fixed
- Fix LocalDocs not using information from sources in v3.5.2 ([#3302](https://github.com/nomic-ai/gpt4all/pull/3302))
## [3.5.2] - 2024-12-13
### Added
- Create separate download pages for built-in and HuggingFace models ([#3269](https://github.com/nomic-ai/gpt4all/pull/3269))
### Fixed
- Fix API server ignoring assistant messages in history after v3.5.0 ([#3256](https://github.com/nomic-ai/gpt4all/pull/3256))
- Fix API server replying with incorrect token counts and stop reason after v3.5.0 ([#3256](https://github.com/nomic-ai/gpt4all/pull/3256))
- Fix API server remembering previous, unrelated conversations after v3.5.0 ([#3256](https://github.com/nomic-ai/gpt4all/pull/3256))
- Fix mishandling of default chat template and system message of cloned models in v3.5.0 ([#3262](https://github.com/nomic-ai/gpt4all/pull/3262))
- Fix untranslated text on the startup dialog ([#3293](https://github.com/nomic-ai/gpt4all/pull/3293))
## [3.5.1] - 2024-12-10
### Fixed
- Fix an incorrect value for currentResponse ([#3245](https://github.com/nomic-ai/gpt4all/pull/3245))
- Fix the default model button so it works again after 3.5.0 ([#3246](https://github.com/nomic-ai/gpt4all/pull/3246))
- Fix chat templates for Nous Hermes 2 Mistral, Mistral OpenOrca, Qwen 2, and remote models ([#3250](https://github.com/nomic-ai/gpt4all/pull/3250))
- Fix chat templates for Llama 3.2 models ([#3251](https://github.com/nomic-ai/gpt4all/pull/3251))
## [3.5.0] - 2024-12-09
### Changed
- Update Italian translation (by [@Harvester62](https://github.com/Harvester62) in [#3236](https://github.com/nomic-ai/gpt4all/pull/3236))
- Update Romanian translation (by [@SINAPSA-IC](https://github.com/SINAPSA-IC) in [#3232](https://github.com/nomic-ai/gpt4all/pull/3232))
### Fixed
- Fix a few more problems with the Jinja changes ([#3239](https://github.com/nomic-ai/gpt4all/pull/3239))
## [3.5.0-rc2] - 2024-12-06
### Changed
- Fade messages out with an animation when they are removed from the chat view ([#3227](https://github.com/nomic-ai/gpt4all/pull/3227))
- Tweak wording of edit/redo confirmation dialogs ([#3228](https://github.com/nomic-ai/gpt4all/pull/3228))
- Make edit/redo buttons disabled instead of invisible when they are temporarily unavailable ([#3228](https://github.com/nomic-ai/gpt4all/pull/3228))
## [3.5.0-rc1] - 2024-12-04
### Added
- Add ability to attach text, markdown, and rst files to chat ([#3135](https://github.com/nomic-ai/gpt4all/pull/3135))
- Add feature to minimize to system tray (by [@bgallois](https://github.com/bgallois) in [#3109](https://github.com/nomic-ai/gpt4all/pull/3109))
- Basic cache for faster prefill when the input shares a prefix with previous context ([#3073](https://github.com/nomic-ai/gpt4all/pull/3073))
- Add ability to edit prompts and regenerate any response ([#3147](https://github.com/nomic-ai/gpt4all/pull/3147))
### Changed
- Implement Qt 6.8 compatibility ([#3121](https://github.com/nomic-ai/gpt4all/pull/3121))
- Use Jinja for chat templates instead of per-message QString.arg-style templates ([#3147](https://github.com/nomic-ai/gpt4all/pull/3147))
- API server: Use system message(s) from client instead of settings ([#3147](https://github.com/nomic-ai/gpt4all/pull/3147))
- API server: Accept messages in any order supported by the model instead of requiring user/assistant pairs ([#3147](https://github.com/nomic-ai/gpt4all/pull/3147))
- Remote models: Pass system message with "system" role instead of joining with user message ([#3147](https://github.com/nomic-ai/gpt4all/pull/3147))
### Removed
- Remove option to save binary model state to disk ([#3147](https://github.com/nomic-ai/gpt4all/pull/3147))
### Fixed
- Fix bug in GUI when localdocs encounters binary data ([#3137](https://github.com/nomic-ai/gpt4all/pull/3137))
- Fix LocalDocs bugs that prevented some docx files from fully chunking ([#3140](https://github.com/nomic-ai/gpt4all/pull/3140))
- Fix missing softmax that was causing crashes and effectively infinite temperature since 3.4.0 ([#3202](https://github.com/nomic-ai/gpt4all/pull/3202))
## [3.4.2] - 2024-10-16
### Fixed
- Limit bm25 retrieval to only specified collections ([#3083](https://github.com/nomic-ai/gpt4all/pull/3083))
- Fix bug removing documents because of a wrong case sensitive file suffix check ([#3083](https://github.com/nomic-ai/gpt4all/pull/3083))
- Fix bug with hybrid localdocs search where database would get out of sync ([#3083](https://github.com/nomic-ai/gpt4all/pull/3083))
- Fix GUI bug where the localdocs embedding device appears blank ([#3083](https://github.com/nomic-ai/gpt4all/pull/3083))
- Prevent LocalDocs from not making progress in certain cases ([#3094](https://github.com/nomic-ai/gpt4all/pull/3094))
## [3.4.1] - 2024-10-11
### Fixed
- Improve the Italian translation ([#3048](https://github.com/nomic-ai/gpt4all/pull/3048))
- Fix models.json cache location ([#3052](https://github.com/nomic-ai/gpt4all/pull/3052))
- Fix LocalDocs regressions caused by docx change ([#3079](https://github.com/nomic-ai/gpt4all/pull/3079))
- Fix Go code being highlighted as Java ([#3080](https://github.com/nomic-ai/gpt4all/pull/3080))
## [3.4.0] - 2024-10-08
### Added
- Add bm25 hybrid search to localdocs ([#2969](https://github.com/nomic-ai/gpt4all/pull/2969))
- LocalDocs support for .docx files ([#2986](https://github.com/nomic-ai/gpt4all/pull/2986))
- Add support for attaching Excel spreadsheet to chat ([#3007](https://github.com/nomic-ai/gpt4all/pull/3007), [#3028](https://github.com/nomic-ai/gpt4all/pull/3028))
### Changed
- Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998))
- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004))
- Simplify chatmodel to get rid of unnecessary field and bump chat version ([#3016](https://github.com/nomic-ai/gpt4all/pull/3016))
- Allow ChatLLM to have direct access to ChatModel for restoring state from text ([#3018](https://github.com/nomic-ai/gpt4all/pull/3018))
- Improvements to XLSX conversion and UI fix ([#3022](https://github.com/nomic-ai/gpt4all/pull/3022))
### Fixed
- Fix a crash when attempting to continue a chat loaded from disk ([#2995](https://github.com/nomic-ai/gpt4all/pull/2995))
- Fix the local server rejecting min\_p/top\_p less than 1 ([#2996](https://github.com/nomic-ai/gpt4all/pull/2996))
- Fix "regenerate" always forgetting the most recent message ([#3011](https://github.com/nomic-ai/gpt4all/pull/3011))
- Fix loaded chats forgetting context when there is a system prompt ([#3015](https://github.com/nomic-ai/gpt4all/pull/3015))
- Make it possible to downgrade and keep some chats, and avoid crash for some model types ([#3030](https://github.com/nomic-ai/gpt4all/pull/3030))
- Fix scroll positition being reset in model view, and attempt a better fix for the clone issue ([#3042](https://github.com/nomic-ai/gpt4all/pull/3042))
## [3.3.1] - 2024-09-27 ([v3.3.y](https://github.com/nomic-ai/gpt4all/tree/v3.3.y))
### Fixed
- Fix a crash when attempting to continue a chat loaded from disk ([#2995](https://github.com/nomic-ai/gpt4all/pull/2995))
- Fix the local server rejecting min\_p/top\_p less than 1 ([#2996](https://github.com/nomic-ai/gpt4all/pull/2996))
## [3.3.0] - 2024-09-20
### Added
- Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
- Use configured system prompt in server mode and ignore system messages ([#2921](https://github.com/nomic-ai/gpt4all/pull/2921), [#2924](https://github.com/nomic-ai/gpt4all/pull/2924))
- Add more system information to anonymous usage stats ([#2939](https://github.com/nomic-ai/gpt4all/pull/2939))
- Check for unsupported Ubuntu and macOS versions at install time ([#2940](https://github.com/nomic-ai/gpt4all/pull/2940))
### Changed
- The offline update button now directs users to the offline installer releases page. (by [@3Simplex](https://github.com/3Simplex) in [#2888](https://github.com/nomic-ai/gpt4all/pull/2888))
- Change the website link on the home page to point to the new URL ([#2915](https://github.com/nomic-ai/gpt4all/pull/2915))
- Smaller default window size, dynamic minimum size, and scaling tweaks ([#2904](https://github.com/nomic-ai/gpt4all/pull/2904))
- Only allow a single instance of program to be run at a time ([#2923](https://github.com/nomic-ai/gpt4all/pull/2923]))
### Fixed
- Bring back "Auto" option for Embeddings Device as "Application default," which went missing in v3.1.0 ([#2873](https://github.com/nomic-ai/gpt4all/pull/2873))
- Correct a few strings in the Italian translation (by [@Harvester62](https://github.com/Harvester62) in [#2872](https://github.com/nomic-ai/gpt4all/pull/2872) and [#2909](https://github.com/nomic-ai/gpt4all/pull/2909))
- Correct typos in Traditional Chinese translation (by [@supersonictw](https://github.com/supersonictw) in [#2852](https://github.com/nomic-ai/gpt4all/pull/2852))
- Set the window icon on Linux ([#2880](https://github.com/nomic-ai/gpt4all/pull/2880))
- Corrections to the Romanian translation (by [@SINAPSA-IC](https://github.com/SINAPSA-IC) in [#2890](https://github.com/nomic-ai/gpt4all/pull/2890))
- Fix singular/plural forms of LocalDocs "x Sources" (by [@cosmic-snow](https://github.com/cosmic-snow) in [#2885](https://github.com/nomic-ai/gpt4all/pull/2885))
- Fix a typo in Model Settings (by [@3Simplex](https://github.com/3Simplex) in [#2916](https://github.com/nomic-ai/gpt4all/pull/2916))
- Fix the antenna icon tooltip when using the local server ([#2922](https://github.com/nomic-ai/gpt4all/pull/2922))
- Fix a few issues with locating files and handling errors when loading remote models on startup ([#2875](https://github.com/nomic-ai/gpt4all/pull/2875))
- Significantly improve API server request parsing and response correctness ([#2929](https://github.com/nomic-ai/gpt4all/pull/2929))
- Remove unnecessary dependency on Qt WaylandCompositor module ([#2949](https://github.com/nomic-ai/gpt4all/pull/2949))
- Update translations ([#2970](https://github.com/nomic-ai/gpt4all/pull/2970))
- Fix macOS installer and remove extra installed copy of Nomic Embed ([#2973](https://github.com/nomic-ai/gpt4all/pull/2973))
## [3.2.1] - 2024-08-13
### Fixed
- Do not initialize Vulkan driver when only using CPU ([#2843](https://github.com/nomic-ai/gpt4all/pull/2843))
- Fix a potential crash on exit when using only CPU on Linux with NVIDIA (does not affect X11) ([#2843](https://github.com/nomic-ai/gpt4all/pull/2843))
- Fix default CUDA architecture list after [#2802](https://github.com/nomic-ai/gpt4all/pull/2802) ([#2855](https://github.com/nomic-ai/gpt4all/pull/2855))
## [3.2.0] - 2024-08-12
### Added
- Add Qwen2-1.5B-Instruct to models3.json (by [@ThiloteE](https://github.com/ThiloteE) in [#2759](https://github.com/nomic-ai/gpt4all/pull/2759))
- Enable translation feature for seven languages: English, Spanish, Italian, Portuguese, Chinese Simplified, Chinese Traditional, Romanian ([#2830](https://github.com/nomic-ai/gpt4all/pull/2830))
### Changed
- Add missing entries to Italian transltation (by [@Harvester62](https://github.com/Harvester62) in [#2783](https://github.com/nomic-ai/gpt4all/pull/2783))
- Use llama\_kv\_cache ops to shift context faster ([#2781](https://github.com/nomic-ai/gpt4all/pull/2781))
- Don't stop generating at end of context ([#2781](https://github.com/nomic-ai/gpt4all/pull/2781))
### Fixed
- Case-insensitive LocalDocs source icon detection (by [@cosmic-snow](https://github.com/cosmic-snow) in [#2761](https://github.com/nomic-ai/gpt4all/pull/2761))
- Fix comparison of pre- and post-release versions for update check and models3.json ([#2762](https://github.com/nomic-ai/gpt4all/pull/2762), [#2772](https://github.com/nomic-ai/gpt4all/pull/2772))
- Fix several backend issues ([#2778](https://github.com/nomic-ai/gpt4all/pull/2778))
- Restore leading space removal logic that was incorrectly removed in [#2694](https://github.com/nomic-ai/gpt4all/pull/2694)
- CUDA: Cherry-pick llama.cpp DMMV cols requirement fix that caused a crash with long conversations since [#2694](https://github.com/nomic-ai/gpt4all/pull/2694)
- Make reverse prompt detection work more reliably and prevent it from breaking output ([#2781](https://github.com/nomic-ai/gpt4all/pull/2781))
- Disallow context shift for chat name and follow-up generation to prevent bugs ([#2781](https://github.com/nomic-ai/gpt4all/pull/2781))
- Explicitly target macOS 12.6 in CI to fix Metal compatibility on older macOS ([#2846](https://github.com/nomic-ai/gpt4all/pull/2846))
## [3.1.1] - 2024-07-27
### Added
- Add Llama 3.1 8B Instruct to models3.json (by [@3Simplex](https://github.com/3Simplex) in [#2731](https://github.com/nomic-ai/gpt4all/pull/2731) and [#2732](https://github.com/nomic-ai/gpt4all/pull/2732))
- Portuguese (BR) translation (by [thiagojramos](https://github.com/thiagojramos) in [#2733](https://github.com/nomic-ai/gpt4all/pull/2733))
- Support adding arbitrary OpenAI-compatible models by URL (by [@supersonictw](https://github.com/supersonictw) in [#2683](https://github.com/nomic-ai/gpt4all/pull/2683))
- Support Llama 3.1 RoPE scaling ([#2758](https://github.com/nomic-ai/gpt4all/pull/2758))
### Changed
- Add missing entries to Chinese (Simplified) translation (by [wuodoo](https://github.com/wuodoo) in [#2716](https://github.com/nomic-ai/gpt4all/pull/2716) and [#2749](https://github.com/nomic-ai/gpt4all/pull/2749))
- Update translation files and add missing paths to CMakeLists.txt ([#2735](https://github.com/nomic-ai/gpt4all/2735))
## [3.1.0] - 2024-07-24
### Added
- Generate suggested follow-up questions ([#2634](https://github.com/nomic-ai/gpt4all/pull/2634), [#2723](https://github.com/nomic-ai/gpt4all/pull/2723))
- Also add options for the chat name and follow-up question prompt templates
- Scaffolding for translations ([#2612](https://github.com/nomic-ai/gpt4all/pull/2612))
- Spanish (MX) translation (by [@jstayco](https://github.com/jstayco) in [#2654](https://github.com/nomic-ai/gpt4all/pull/2654))
- Chinese (Simplified) translation by mikage ([#2657](https://github.com/nomic-ai/gpt4all/pull/2657))
- Dynamic changes of language and locale at runtime ([#2659](https://github.com/nomic-ai/gpt4all/pull/2659), [#2677](https://github.com/nomic-ai/gpt4all/pull/2677))
- Romanian translation by [@SINAPSA\_IC](https://github.com/SINAPSA_IC) ([#2662](https://github.com/nomic-ai/gpt4all/pull/2662))
- Chinese (Traditional) translation (by [@supersonictw](https://github.com/supersonictw) in [#2661](https://github.com/nomic-ai/gpt4all/pull/2661))
- Italian translation (by [@Harvester62](https://github.com/Harvester62) in [#2700](https://github.com/nomic-ai/gpt4all/pull/2700))
### Changed
- Customize combo boxes and context menus to fit the new style ([#2535](https://github.com/nomic-ai/gpt4all/pull/2535))
- Improve view bar scaling and Model Settings layout ([#2520](https://github.com/nomic-ai/gpt4all/pull/2520)
- Make the logo spin while the model is generating ([#2557](https://github.com/nomic-ai/gpt4all/pull/2557))
- Server: Reply to wrong GET/POST method with HTTP 405 instead of 404 (by [@cosmic-snow](https://github.com/cosmic-snow) in [#2615](https://github.com/nomic-ai/gpt4all/pull/2615))
- Update theme for menus (by [@3Simplex](https://github.com/3Simplex) in [#2578](https://github.com/nomic-ai/gpt4all/pull/2578))
- Move the "stop" button to the message box ([#2561](https://github.com/nomic-ai/gpt4all/pull/2561))
- Build with CUDA 11.8 for better compatibility ([#2639](https://github.com/nomic-ai/gpt4all/pull/2639))
- Make links in latest news section clickable ([#2643](https://github.com/nomic-ai/gpt4all/pull/2643))
- Support translation of settings choices ([#2667](https://github.com/nomic-ai/gpt4all/pull/2667), [#2690](https://github.com/nomic-ai/gpt4all/pull/2690))
- Improve LocalDocs view's error message (by @cosmic-snow in [#2679](https://github.com/nomic-ai/gpt4all/pull/2679))
- Ignore case of LocalDocs file extensions ([#2642](https://github.com/nomic-ai/gpt4all/pull/2642), [#2684](https://github.com/nomic-ai/gpt4all/pull/2684))
- Update llama.cpp to commit 87e397d00 from July 19th ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694), [#2702](https://github.com/nomic-ai/gpt4all/pull/2702))
- Add support for GPT-NeoX, Gemma 2, OpenELM, ChatGLM, and Jais architectures (all with Vulkan support)
- Add support for DeepSeek-V2 architecture (no Vulkan support)
- Enable Vulkan support for StarCoder2, XVERSE, Command R, and OLMo
- Show scrollbar in chat collections list as needed (by [@cosmic-snow](https://github.com/cosmic-snow) in [#2691](https://github.com/nomic-ai/gpt4all/pull/2691))
### Removed
- Remove support for GPT-J models ([#2676](https://github.com/nomic-ai/gpt4all/pull/2676), [#2693](https://github.com/nomic-ai/gpt4all/pull/2693))
### Fixed
- Fix placement of thumbs-down and datalake opt-in dialogs ([#2540](https://github.com/nomic-ai/gpt4all/pull/2540))
- Select the correct folder with the Linux fallback folder dialog ([#2541](https://github.com/nomic-ai/gpt4all/pull/2541))
- Fix clone button sometimes producing blank model info ([#2545](https://github.com/nomic-ai/gpt4all/pull/2545))
- Fix jerky chat view scrolling ([#2555](https://github.com/nomic-ai/gpt4all/pull/2555))
- Fix "reload" showing for chats with missing models ([#2520](https://github.com/nomic-ai/gpt4all/pull/2520)
- Fix property binding loop warning ([#2601](https://github.com/nomic-ai/gpt4all/pull/2601))
- Fix UI hang with certain chat view content ([#2543](https://github.com/nomic-ai/gpt4all/pull/2543))
- Fix crash when Kompute falls back to CPU ([#2640](https://github.com/nomic-ai/gpt4all/pull/2640))
- Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
- Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))
[3.10.0]: https://github.com/nomic-ai/gpt4all/compare/v3.9.0...v3.10.0
[3.9.0]: https://github.com/nomic-ai/gpt4all/compare/v3.8.0...v3.9.0
[3.8.0]: https://github.com/nomic-ai/gpt4all/compare/v3.7.0...v3.8.0
[3.7.0]: https://github.com/nomic-ai/gpt4all/compare/v3.6.1...v3.7.0
[3.6.1]: https://github.com/nomic-ai/gpt4all/compare/v3.6.0...v3.6.1
[3.6.0]: https://github.com/nomic-ai/gpt4all/compare/v3.5.3...v3.6.0
[3.5.3]: https://github.com/nomic-ai/gpt4all/compare/v3.5.2...v3.5.3
[3.5.2]: https://github.com/nomic-ai/gpt4all/compare/v3.5.1...v3.5.2
[3.5.1]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0...v3.5.1
[3.5.0]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0-rc2...v3.5.0
[3.5.0-rc2]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0-rc1...v3.5.0-rc2
[3.5.0-rc1]: https://github.com/nomic-ai/gpt4all/compare/v3.4.2...v3.5.0-rc1
[3.4.2]: https://github.com/nomic-ai/gpt4all/compare/v3.4.1...v3.4.2
[3.4.1]: https://github.com/nomic-ai/gpt4all/compare/v3.4.0...v3.4.1
[3.4.0]: https://github.com/nomic-ai/gpt4all/compare/v3.3.0...v3.4.0
[3.3.1]: https://github.com/nomic-ai/gpt4all/compare/v3.3.0...v3.3.1
[3.3.0]: https://github.com/nomic-ai/gpt4all/compare/v3.2.1...v3.3.0
[3.2.1]: https://github.com/nomic-ai/gpt4all/compare/v3.2.0...v3.2.1
[3.2.0]: https://github.com/nomic-ai/gpt4all/compare/v3.1.1...v3.2.0
[3.1.1]: https://github.com/nomic-ai/gpt4all/compare/v3.1.0...v3.1.1
[3.1.0]: https://github.com/nomic-ai/gpt4all/compare/v3.0.0...v3.1.0

View File

@ -1,18 +1,8 @@
cmake_minimum_required(VERSION 3.25) # for try_compile SOURCE_FROM_VAR cmake_minimum_required(VERSION 3.16)
include(../common/common.cmake) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 20)
set(APP_VERSION_MAJOR 3) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(APP_VERSION_MINOR 10)
set(APP_VERSION_PATCH 1)
set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
set(APP_VERSION "${APP_VERSION_BASE}-dev0")
project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install CACHE PATH "..." FORCE)
endif()
if(APPLE) if(APPLE)
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" OFF) option(BUILD_UNIVERSAL "Build a Universal binary on macOS" OFF)
@ -26,88 +16,38 @@ if(APPLE)
endif() endif()
endif() endif()
find_package(Python3 3.12 QUIET COMPONENTS Interpreter) set(APP_VERSION_MAJOR 3)
set(APP_VERSION_MINOR 1)
option(GPT4ALL_TEST "Build the tests" ${Python3_FOUND}) set(APP_VERSION_PATCH 2)
option(GPT4ALL_LOCALHOST "Build installer for localhost repo" OFF) set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
option(GPT4ALL_OFFLINE_INSTALLER "Build an offline installer" OFF) set(APP_VERSION "${APP_VERSION_BASE}-dev0")
option(GPT4ALL_SIGN_INSTALL "Sign installed binaries and installers (requires signing identities)" OFF)
option(GPT4ALL_GEN_CPACK_CONFIG "Generate the CPack config.xml in the package step and nothing else." OFF)
set(GPT4ALL_USE_QTPDF "AUTO" CACHE STRING "Whether to Use QtPDF for LocalDocs. If OFF or not available on this platform, PDFium is used.")
set_property(CACHE GPT4ALL_USE_QTPDF PROPERTY STRINGS AUTO ON OFF)
set(GPT4ALL_FORCE_D3D12 "AUTO" CACHE STRING "Whether to use Direct3D 12 as the Qt scene graph backend. Defaults to ON on Windows ARM.")
set_property(CACHE GPT4ALL_FORCE_D3D12 PROPERTY STRINGS AUTO ON OFF)
include(cmake/cpack_config.cmake)
if (GPT4ALL_GEN_CPACK_CONFIG)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/cpack-steal-config.cmake.in"
"${CMAKE_BINARY_DIR}/cmake/cpack-steal-config.cmake" @ONLY)
set(CPACK_POST_BUILD_SCRIPTS ${CMAKE_BINARY_DIR}/cmake/cpack-steal-config.cmake)
include(CPack)
include(CPackIFW)
return()
endif()
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if (MSVC)
# Enable accurate __cplusplus macro
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/Zc:__cplusplus>)
endif()
# conftests
function(check_cpp_feature FEATURE_NAME MIN_VALUE)
message(CHECK_START "Checking for ${FEATURE_NAME} >= ${MIN_VALUE}")
string(CONCAT SRC
"#include <version>\n"
"#if !defined(${FEATURE_NAME}) || ${FEATURE_NAME} < ${MIN_VALUE}\n"
"# error \"${FEATURE_NAME} is not defined or less than ${MIN_VALUE}\"\n"
"#endif\n"
"int main() { return 0; }\n"
)
try_compile(HAS_FEATURE SOURCE_FROM_VAR "test_${FEATURE_NAME}.cpp" SRC)
if (NOT HAS_FEATURE)
message(CHECK_FAIL "fail")
message(FATAL_ERROR
"The C++ compiler\n \"${CMAKE_CXX_COMPILER}\"\n"
"is too old to support ${FEATURE_NAME} >= ${MIN_VALUE}.\n"
"Please specify a newer compiler via -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER."
)
endif()
message(CHECK_PASS "pass")
endfunction()
# check for monadic operations in std::optional (e.g. transform)
check_cpp_feature("__cpp_lib_optional" "202110L")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules")
# Include the binary directory for the generated header file # Include the binary directory for the generated header file
include_directories("${CMAKE_CURRENT_BINARY_DIR}") include_directories("${CMAKE_CURRENT_BINARY_DIR}")
project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)
set(CMAKE_AUTOMOC ON) set(CMAKE_AUTOMOC ON)
set(CMAKE_AUTORCC ON) set(CMAKE_AUTORCC ON)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON) option(GPT4ALL_TRANSLATIONS OFF "Build with translations")
set(GPT4ALL_QT_COMPONENTS Core HttpServer LinguistTools Quick QuickDialogs2 Sql Svg) option(GPT4ALL_LOCALHOST OFF "Build installer for localhost repo")
set(GPT4ALL_USING_QTPDF OFF) option(GPT4ALL_OFFLINE_INSTALLER "Build an offline installer" OFF)
if (CMAKE_SYSTEM_NAME MATCHES Windows AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") option(GPT4ALL_SIGN_INSTALL "Sign installed binaries and installers (requires signing identities)" OFF)
# QtPDF is not available.
if (GPT4ALL_USE_QTPDF STREQUAL "ON")
message(FATAL_ERROR "QtPDF is not available on Windows ARM64.")
endif()
elseif (GPT4ALL_USE_QTPDF MATCHES "^(ON|AUTO)$")
set(GPT4ALL_USING_QTPDF ON)
list(APPEND GPT4ALL_QT_COMPONENTS Pdf)
endif()
find_package(Qt6 6.8 COMPONENTS ${GPT4ALL_QT_COMPONENTS} REQUIRED)
if (QT_KNOWN_POLICY_QTP0004) # Generate a header file with the version number
qt_policy(SET QTP0004 NEW) # generate extra qmldir files on Qt 6.8+ configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/config.h"
)
if(LINUX)
find_package(Qt6 6.4 COMPONENTS Core Quick WaylandCompositor QuickDialogs2 Svg HttpServer Sql Pdf LinguistTools REQUIRED)
else()
find_package(Qt6 6.4 COMPONENTS Core Quick QuickDialogs2 Svg HttpServer Sql Pdf LinguistTools REQUIRED)
endif() endif()
# Get the Qt6Core target properties # Get the Qt6Core target properties
@ -124,62 +64,15 @@ get_filename_component(Qt6_ROOT_DIR "${Qt6_ROOT_DIR}/.." ABSOLUTE)
message(STATUS "qmake binary: ${QMAKE_EXECUTABLE}") message(STATUS "qmake binary: ${QMAKE_EXECUTABLE}")
message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}") message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(GPT4ALL_CONFIG_FORCE_D3D12 -1)
if (NOT CMAKE_SYSTEM_NAME MATCHES Windows OR Qt6_VERSION VERSION_LESS "6.6")
# Direct3D 12 is not available.
if (GPT4ALL_FORCE_D3D12 STREQUAL "ON")
message(FATAL_ERROR "Cannot use Direct3D 12 on this platform.")
endif()
elseif (GPT4ALL_FORCE_D3D12 MATCHES "^(ON|AUTO)$")
if (GPT4ALL_FORCE_D3D12 STREQUAL "ON" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$")
set(GPT4ALL_CONFIG_FORCE_D3D12 1)
endif()
endif()
# Generate a header file for configuration
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/config.h"
)
add_subdirectory(deps)
add_subdirectory(../gpt4all-backend llmodel) add_subdirectory(../gpt4all-backend llmodel)
if (GPT4ALL_TEST)
enable_testing()
# Llama-3.2-1B model
set(TEST_MODEL "Llama-3.2-1B-Instruct-Q4_0.gguf")
set(TEST_MODEL_MD5 "48ff0243978606fdba19d899b77802fc")
set(TEST_MODEL_PATH "${CMAKE_BINARY_DIR}/resources/${TEST_MODEL}")
set(TEST_MODEL_URL "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/${TEST_MODEL}")
# Create a custom command to download the file if it does not exist or if the checksum does not match
add_custom_command(
OUTPUT "${TEST_MODEL_PATH}"
COMMAND ${CMAKE_COMMAND} -E echo "Downloading test model from ${TEST_MODEL_URL} ..."
COMMAND ${CMAKE_COMMAND} -DURL="${TEST_MODEL_URL}" -DOUTPUT_PATH="${TEST_MODEL_PATH}" -DEXPECTED_MD5="${TEST_MODEL_MD5}" -P "${CMAKE_SOURCE_DIR}/cmake/download_model.cmake"
DEPENDS "${CMAKE_SOURCE_DIR}/cmake/download_model.cmake"
)
# Define a custom target that depends on the downloaded model
add_custom_target(download_test_model
DEPENDS "${TEST_MODEL_PATH}"
)
add_subdirectory(tests)
# The 'check' target makes sure the tests and their dependencies are up-to-date before running them
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure DEPENDS download_test_model chat gpt4all_tests)
endif()
set(CHAT_EXE_RESOURCES) set(CHAT_EXE_RESOURCES)
# Metal shader library # Metal shader library
if (APPLE) if (APPLE)
list(APPEND CHAT_EXE_RESOURCES "${GGML_METALLIB}") list(APPEND CHAT_EXE_RESOURCES "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib")
endif() endif()
# App icon # App icon
@ -193,6 +86,8 @@ elseif (APPLE)
# And the following tells CMake where to find and install the file itself. # And the following tells CMake where to find and install the file itself.
set(APP_ICON_RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.icns") set(APP_ICON_RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.icns")
set_source_files_properties(${APP_ICON_RESOURCE} PROPERTIES
MACOSX_PACKAGE_LOCATION "Resources")
list(APPEND CHAT_EXE_RESOURCES "${APP_ICON_RESOURCE}") list(APPEND CHAT_EXE_RESOURCES "${APP_ICON_RESOURCE}")
endif() endif()
@ -212,49 +107,26 @@ if (APPLE)
list(APPEND CHAT_EXE_RESOURCES "${LOCAL_EMBEDDING_MODEL_PATH}") list(APPEND CHAT_EXE_RESOURCES "${LOCAL_EMBEDDING_MODEL_PATH}")
endif() endif()
if (DEFINED GGML_METALLIB)
set_source_files_properties("${GGML_METALLIB}" PROPERTIES GENERATED ON)
endif()
if (APPLE)
set_source_files_properties(${CHAT_EXE_RESOURCES} PROPERTIES MACOSX_PACKAGE_LOCATION Resources)
endif()
set(MACOS_SOURCES)
if (APPLE)
find_library(COCOA_LIBRARY Cocoa)
list(APPEND MACOS_SOURCES src/macosdock.mm src/macosdock.h)
endif()
qt_add_executable(chat qt_add_executable(chat
src/main.cpp main.cpp
src/chat.cpp src/chat.h chat.h chat.cpp
src/chatapi.cpp src/chatapi.h chatllm.h chatllm.cpp
src/chatlistmodel.cpp src/chatlistmodel.h chatmodel.h chatlistmodel.h chatlistmodel.cpp
src/chatllm.cpp src/chatllm.h chatapi.h chatapi.cpp
src/chatmodel.h src/chatmodel.cpp chatviewtextprocessor.h chatviewtextprocessor.cpp
src/chatviewtextprocessor.cpp src/chatviewtextprocessor.h database.h database.cpp
src/codeinterpreter.cpp src/codeinterpreter.h download.h download.cpp
src/database.cpp src/database.h embllm.cpp embllm.h
src/download.cpp src/download.h localdocs.h localdocs.cpp localdocsmodel.h localdocsmodel.cpp
src/embllm.cpp src/embllm.h llm.h llm.cpp
src/jinja_helpers.cpp src/jinja_helpers.h modellist.h modellist.cpp
src/jinja_replacements.cpp src/jinja_replacements.h mysettings.h mysettings.cpp
src/llm.cpp src/llm.h network.h network.cpp
src/localdocs.cpp src/localdocs.h server.h server.cpp
src/localdocsmodel.cpp src/localdocsmodel.h logger.h logger.cpp
src/logger.cpp src/logger.h ${APP_ICON_RESOURCE}
src/modellist.cpp src/modellist.h
src/mysettings.cpp src/mysettings.h
src/network.cpp src/network.h
src/server.cpp src/server.h
src/tool.cpp src/tool.h
src/toolcallparser.cpp src/toolcallparser.h
src/toolmodel.cpp src/toolmodel.h
src/xlsxtomd.cpp src/xlsxtomd.h
${CHAT_EXE_RESOURCES} ${CHAT_EXE_RESOURCES}
${MACOS_SOURCES}
) )
gpt4all_add_warning_options(chat)
qt_add_qml_module(chat qt_add_qml_module(chat
URI gpt4all URI gpt4all
@ -264,15 +136,8 @@ qt_add_qml_module(chat
main.qml main.qml
qml/AddCollectionView.qml qml/AddCollectionView.qml
qml/AddModelView.qml qml/AddModelView.qml
qml/AddGPT4AllModelView.qml
qml/AddHFModelView.qml
qml/AddRemoteModelView.qml
qml/ApplicationSettings.qml qml/ApplicationSettings.qml
qml/ChatDrawer.qml qml/ChatDrawer.qml
qml/ChatCollapsibleItem.qml
qml/ChatItemView.qml
qml/ChatMessageButton.qml
qml/ChatTextItem.qml
qml/ChatView.qml qml/ChatView.qml
qml/CollectionsDrawer.qml qml/CollectionsDrawer.qml
qml/HomeView.qml qml/HomeView.qml
@ -285,21 +150,17 @@ qt_add_qml_module(chat
qml/PopupDialog.qml qml/PopupDialog.qml
qml/SettingsView.qml qml/SettingsView.qml
qml/StartupDialog.qml qml/StartupDialog.qml
qml/ConfirmationDialog.qml qml/SwitchModelDialog.qml
qml/Theme.qml qml/Theme.qml
qml/ThumbsDownDialog.qml qml/ThumbsDownDialog.qml
qml/Toast.qml qml/Toast.qml
qml/ToastManager.qml qml/ToastManager.qml
qml/MyBusyIndicator.qml qml/MyBusyIndicator.qml
qml/MyButton.qml qml/MyButton.qml
qml/MyTabButton.qml
qml/MyCheckBox.qml qml/MyCheckBox.qml
qml/MyComboBox.qml qml/MyComboBox.qml
qml/MyDialog.qml qml/MyDialog.qml
qml/MyDirectoryField.qml qml/MyDirectoryField.qml
qml/MyFileDialog.qml
qml/MyFileIcon.qml
qml/MyFolderDialog.qml
qml/MyFancyLink.qml qml/MyFancyLink.qml
qml/MyMenu.qml qml/MyMenu.qml
qml/MyMenuItem.qml qml/MyMenuItem.qml
@ -315,7 +176,6 @@ qt_add_qml_module(chat
qml/MyTextField.qml qml/MyTextField.qml
qml/MyToolButton.qml qml/MyToolButton.qml
qml/MyWelcomeButton.qml qml/MyWelcomeButton.qml
qml/RemoteModelCard.qml
RESOURCES RESOURCES
icons/antenna_1.svg icons/antenna_1.svg
icons/antenna_2.svg icons/antenna_2.svg
@ -333,12 +193,9 @@ qt_add_qml_module(chat
icons/edit.svg icons/edit.svg
icons/eject.svg icons/eject.svg
icons/email.svg icons/email.svg
icons/file-doc.svg
icons/file-docx.svg
icons/file-md.svg icons/file-md.svg
icons/file-pdf.svg icons/file-pdf.svg
icons/file-txt.svg icons/file-txt.svg
icons/file-xls.svg
icons/file.svg icons/file.svg
icons/github.svg icons/github.svg
icons/globe.svg icons/globe.svg
@ -346,7 +203,6 @@ qt_add_qml_module(chat
icons/gpt4all-48.png icons/gpt4all-48.png
icons/gpt4all.svg icons/gpt4all.svg
icons/gpt4all_transparent.svg icons/gpt4all_transparent.svg
icons/groq.svg
icons/home.svg icons/home.svg
icons/image.svg icons/image.svg
icons/info.svg icons/info.svg
@ -354,14 +210,10 @@ qt_add_qml_module(chat
icons/left_panel_open.svg icons/left_panel_open.svg
icons/local-docs.svg icons/local-docs.svg
icons/models.svg icons/models.svg
icons/mistral.svg
icons/network.svg icons/network.svg
icons/nomic_logo.svg icons/nomic_logo.svg
icons/notes.svg icons/notes.svg
icons/paperclip.svg
icons/plus.svg icons/plus.svg
icons/plus_circle.svg
icons/openai.svg
icons/recycle.svg icons/recycle.svg
icons/regenerate.svg icons/regenerate.svg
icons/search.svg icons/search.svg
@ -374,20 +226,21 @@ qt_add_qml_module(chat
icons/trash.svg icons/trash.svg
icons/twitter.svg icons/twitter.svg
icons/up_down.svg icons/up_down.svg
icons/webpage.svg
icons/you.svg icons/you.svg
) )
qt_add_translations(chat if (GPT4ALL_TRANSLATIONS)
TS_FILES qt_add_translations(chat
${CMAKE_SOURCE_DIR}/translations/gpt4all_en_US.ts TS_FILES
${CMAKE_SOURCE_DIR}/translations/gpt4all_es_MX.ts ${CMAKE_SOURCE_DIR}/translations/gpt4all_en.ts
${CMAKE_SOURCE_DIR}/translations/gpt4all_zh_CN.ts ${CMAKE_SOURCE_DIR}/translations/gpt4all_es_MX.ts
${CMAKE_SOURCE_DIR}/translations/gpt4all_zh_TW.ts ${CMAKE_SOURCE_DIR}/translations/gpt4all_zh_CN.ts
${CMAKE_SOURCE_DIR}/translations/gpt4all_ro_RO.ts ${CMAKE_SOURCE_DIR}/translations/gpt4all_zh_TW.ts
${CMAKE_SOURCE_DIR}/translations/gpt4all_it_IT.ts ${CMAKE_SOURCE_DIR}/translations/gpt4all_ro_RO.ts
${CMAKE_SOURCE_DIR}/translations/gpt4all_pt_BR.ts ${CMAKE_SOURCE_DIR}/translations/gpt4all_it_IT.ts
) ${CMAKE_SOURCE_DIR}/translations/gpt4all_pt_BR.ts
)
endif()
set_target_properties(chat PROPERTIES set_target_properties(chat PROPERTIES
WIN32_EXECUTABLE TRUE WIN32_EXECUTABLE TRUE
@ -406,20 +259,19 @@ if (APPLE)
MACOSX_BUNDLE_GUI_IDENTIFIER gpt4all MACOSX_BUNDLE_GUI_IDENTIFIER gpt4all
MACOSX_BUNDLE_BUNDLE_VERSION ${PROJECT_VERSION} MACOSX_BUNDLE_BUNDLE_VERSION ${PROJECT_VERSION}
MACOSX_BUNDLE_SHORT_VERSION_STRING ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR} MACOSX_BUNDLE_SHORT_VERSION_STRING ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}
RESOURCE "${CHAT_EXE_RESOURCES}"
OUTPUT_NAME gpt4all OUTPUT_NAME gpt4all
) )
add_dependencies(chat ggml-metal) add_dependencies(chat ggml-metal)
endif()
if (APPLE AND GPT4ALL_SIGN_INSTALL) if(NOT MAC_SIGNING_IDENTITY)
if (NOT MAC_SIGNING_IDENTITY) if(NOT DEFINED ENV{MAC_SIGNING_CERT_NAME} AND GPT4ALL_SIGN_INSTALL)
if (NOT DEFINED ENV{MAC_SIGNING_CERT_NAME})
REPORT_MISSING_SIGNING_CONTEXT() REPORT_MISSING_SIGNING_CONTEXT()
endif() endif()
set(MAC_SIGNING_IDENTITY $ENV{MAC_SIGNING_CERT_NAME}) set(MAC_SIGNING_IDENTITY $ENV{MAC_SIGNING_CERT_NAME})
endif() endif()
if (NOT MAC_SIGNING_TID) if(NOT MAC_SIGNING_TID)
if (NOT DEFINED ENV{MAC_NOTARIZATION_TID}) if(NOT DEFINED ENV{MAC_NOTARIZATION_TID} AND GPT4ALL_SIGN_INSTALL)
REPORT_MISSING_SIGNING_CONTEXT() REPORT_MISSING_SIGNING_CONTEXT()
endif() endif()
set(MAC_SIGNING_TID $ENV{MAC_NOTARIZATION_TID}) set(MAC_SIGNING_TID $ENV{MAC_NOTARIZATION_TID})
@ -438,47 +290,37 @@ endif()
target_compile_definitions(chat target_compile_definitions(chat
PRIVATE $<$<OR:$<CONFIG:Debug>,$<CONFIG:RelWithDebInfo>>:QT_QML_DEBUG>) PRIVATE $<$<OR:$<CONFIG:Debug>,$<CONFIG:RelWithDebInfo>>:QT_QML_DEBUG>)
target_include_directories(chat PRIVATE src)
# usearch uses the identifier 'slots' which conflicts with Qt's 'slots' keyword # usearch uses the identifier 'slots' which conflicts with Qt's 'slots' keyword
target_compile_definitions(chat PRIVATE QT_NO_SIGNALS_SLOTS_KEYWORDS) target_compile_definitions(chat PRIVATE QT_NO_SIGNALS_SLOTS_KEYWORDS)
target_include_directories(chat PRIVATE deps/usearch/include target_include_directories(chat PRIVATE usearch/include
deps/usearch/fp16/include) usearch/fp16/include)
target_link_libraries(chat if(LINUX)
PRIVATE Qt6::Core Qt6::HttpServer Qt6::Quick Qt6::Sql Qt6::Svg) target_link_libraries(chat
if (GPT4ALL_USING_QTPDF) PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf Qt6::WaylandCompositor)
target_compile_definitions(chat PRIVATE GPT4ALL_USE_QTPDF)
target_link_libraries(chat PRIVATE Qt6::Pdf)
else() else()
# Link PDFium target_link_libraries(chat
target_link_libraries(chat PRIVATE pdfium) PRIVATE Qt6::Quick Qt6::Svg Qt6::HttpServer Qt6::Sql Qt6::Pdf)
endif() endif()
target_link_libraries(chat target_link_libraries(chat
PRIVATE llmodel SingleApplication fmt::fmt duckx::duckx QXlsx) PRIVATE llmodel)
target_include_directories(chat PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/deps/json/include)
target_include_directories(chat PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/deps/json/include/nlohmann)
target_include_directories(chat PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/deps/minja/include)
if (APPLE)
target_link_libraries(chat PRIVATE ${COCOA_LIBRARY})
endif()
# -- install -- # -- install --
if (APPLE) set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
set(GPT4ALL_LIB_DEST bin/gpt4all.app/Contents/Frameworks)
else() if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(GPT4ALL_LIB_DEST lib) set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install CACHE PATH "..." FORCE)
endif() endif()
install(TARGETS chat DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN}) install(TARGETS chat DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN})
install( install(
TARGETS llmodel TARGETS llmodel
LIBRARY DESTINATION ${GPT4ALL_LIB_DEST} COMPONENT ${COMPONENT_NAME_MAIN} # .so/.dylib LIBRARY DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .so/.dylib
RUNTIME DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN} # .dll RUNTIME DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN} # .dll
) )
# We should probably iterate through the list of the cmake for backend, but these need to be installed # We should probably iterate through the list of the cmake for backend, but these need to be installed
@ -501,8 +343,8 @@ endif()
install( install(
TARGETS ${MODEL_IMPL_TARGETS} TARGETS ${MODEL_IMPL_TARGETS}
LIBRARY DESTINATION ${GPT4ALL_LIB_DEST} COMPONENT ${COMPONENT_NAME_MAIN} # .so/.dylib LIBRARY DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .so/.dylib
RUNTIME DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .dll RUNTIME DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .dll
) )
if(APPLE AND GPT4ALL_SIGN_INSTALL) if(APPLE AND GPT4ALL_SIGN_INSTALL)
@ -531,7 +373,7 @@ if (LLMODEL_CUDA)
TARGETS llamamodel-mainline-cuda TARGETS llamamodel-mainline-cuda
llamamodel-mainline-cuda-avxonly llamamodel-mainline-cuda-avxonly
RUNTIME_DEPENDENCY_SET llama-cuda-deps RUNTIME_DEPENDENCY_SET llama-cuda-deps
LIBRARY DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .so LIBRARY DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .so/.dylib
RUNTIME DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .dll RUNTIME DESTINATION lib COMPONENT ${COMPONENT_NAME_MAIN} # .dll
) )
if (WIN32) if (WIN32)
@ -545,38 +387,65 @@ if (LLMODEL_CUDA)
endif() endif()
endif() endif()
if (NOT GPT4ALL_USING_QTPDF)
# Install PDFium
if (WIN32)
install(FILES ${PDFium_LIBRARY} DESTINATION bin COMPONENT ${COMPONENT_NAME_MAIN}) # .dll
else()
install(FILES ${PDFium_LIBRARY} DESTINATION ${GPT4ALL_LIB_DEST} COMPONENT ${COMPONENT_NAME_MAIN}) # .so/.dylib
endif()
endif()
if (NOT APPLE) if (NOT APPLE)
install(FILES "${LOCAL_EMBEDDING_MODEL_PATH}" install(FILES "${CMAKE_BINARY_DIR}/resources/${LOCAL_EMBEDDING_MODEL}"
DESTINATION resources DESTINATION resources
COMPONENT ${COMPONENT_NAME_MAIN}) COMPONENT ${COMPONENT_NAME_MAIN})
endif() endif()
if (CMAKE_SYSTEM_NAME MATCHES Linux) set(CPACK_GENERATOR "IFW")
set(CPACK_VERBATIM_VARIABLES YES)
set(CPACK_IFW_VERBOSE ON)
if(${CMAKE_SYSTEM_NAME} MATCHES Linux)
find_program(LINUXDEPLOYQT linuxdeployqt HINTS "$ENV{HOME}/dev/linuxdeployqt/build/tools/linuxdeployqt" "$ENV{HOME}/project/linuxdeployqt/bin") find_program(LINUXDEPLOYQT linuxdeployqt HINTS "$ENV{HOME}/dev/linuxdeployqt/build/tools/linuxdeployqt" "$ENV{HOME}/project/linuxdeployqt/bin")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/deploy-qt-linux.cmake.in" configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/deploy-qt-linux.cmake.in"
"${CMAKE_BINARY_DIR}/cmake/deploy-qt-linux.cmake" @ONLY) "${CMAKE_BINARY_DIR}/cmake/deploy-qt-linux.cmake" @ONLY)
set(CPACK_PRE_BUILD_SCRIPTS ${CMAKE_BINARY_DIR}/cmake/deploy-qt-linux.cmake) set(CPACK_PRE_BUILD_SCRIPTS ${CMAKE_BINARY_DIR}/cmake/deploy-qt-linux.cmake)
elseif (CMAKE_SYSTEM_NAME MATCHES Windows) set(CPACK_IFW_ROOT "~/Qt/Tools/QtInstallerFramework/4.6")
find_program(WINDEPLOYQT windeployqt) set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-linux")
set(CPACK_IFW_TARGET_DIRECTORY "@HomeDir@/${COMPONENT_NAME_MAIN}")
elseif(${CMAKE_SYSTEM_NAME} MATCHES Windows)
find_program(WINDEPLOYQT windeployqt HINTS ${_qt_bin_dir})
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/deploy-qt-windows.cmake.in" configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/deploy-qt-windows.cmake.in"
"${CMAKE_BINARY_DIR}/cmake/deploy-qt-windows.cmake" @ONLY) "${CMAKE_BINARY_DIR}/cmake/deploy-qt-windows.cmake" @ONLY)
set(CPACK_PRE_BUILD_SCRIPTS ${CMAKE_BINARY_DIR}/cmake/deploy-qt-windows.cmake) set(CPACK_PRE_BUILD_SCRIPTS ${CMAKE_BINARY_DIR}/cmake/deploy-qt-windows.cmake)
elseif (CMAKE_SYSTEM_NAME MATCHES Darwin) set(CPACK_IFW_ROOT "C:/Qt/Tools/QtInstallerFramework/4.6")
find_program(MACDEPLOYQT macdeployqt) set(CPACK_IFW_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.ico")
set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-win64")
set(CPACK_IFW_TARGET_DIRECTORY "@HomeDir@\\${COMPONENT_NAME_MAIN}")
elseif(${CMAKE_SYSTEM_NAME} MATCHES Darwin)
find_program(MACDEPLOYQT macdeployqt HINTS ${_qt_bin_dir})
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/deploy-qt-mac.cmake.in" configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/deploy-qt-mac.cmake.in"
"${CMAKE_BINARY_DIR}/cmake/deploy-qt-mac.cmake" @ONLY) "${CMAKE_BINARY_DIR}/cmake/deploy-qt-mac.cmake" @ONLY)
set(CPACK_PRE_BUILD_SCRIPTS ${CMAKE_BINARY_DIR}/cmake/deploy-qt-mac.cmake) set(CPACK_PRE_BUILD_SCRIPTS ${CMAKE_BINARY_DIR}/cmake/deploy-qt-mac.cmake)
set(CPACK_IFW_ROOT "~/Qt/Tools/QtInstallerFramework/4.6")
set(CPACK_IFW_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.icns")
set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-darwin")
set(CPACK_IFW_TARGET_DIRECTORY "@ApplicationsDir@/${COMPONENT_NAME_MAIN}")
set(CPACK_BUNDLE_NAME ${COMPONENT_NAME_MAIN})
set(CPACK_BUNDLE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.icns")
endif() endif()
set(CPACK_PACKAGE_INSTALL_DIRECTORY ${COMPONENT_NAME_MAIN})
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
SET(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
set(CPACK_PACKAGE_HOMEPAGE_URL "https://gpt4all.io")
set(CPACK_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-48.png")
set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE)
set(CPACK_RESOURCE_FILE_README ${CMAKE_CURRENT_SOURCE_DIR}/README.md)
set(CPACK_PACKAGE_EXECUTABLES "GPT4All")
set(CPACK_CREATE_DESKTOP_LINKS "GPT4All")
set(CPACK_IFW_PACKAGE_NAME "GPT4All")
set(CPACK_IFW_PACKAGE_TITLE "GPT4All Installer")
set(CPACK_IFW_PACKAGE_PUBLISHER "Nomic, Inc.")
set(CPACK_IFW_PRODUCT_URL "https://gpt4all.io")
set(CPACK_IFW_PACKAGE_WIZARD_STYLE "Aero")
set(CPACK_IFW_PACKAGE_LOGO "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-48.png")
set(CPACK_IFW_PACKAGE_WINDOW_ICON "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-32.png")
set(CPACK_IFW_PACKAGE_WIZARD_SHOW_PAGE_LIST OFF)
include(InstallRequiredSystemLibraries) include(InstallRequiredSystemLibraries)
include(CPack) include(CPack)
include(CPackIFW) include(CPackIFW)
@ -588,35 +457,20 @@ endif()
cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} ESSENTIAL FORCED_INSTALLATION) cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} ESSENTIAL FORCED_INSTALLATION)
cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} VERSION ${APP_VERSION}) cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} VERSION ${APP_VERSION})
cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} LICENSES "MIT LICENSE" ${CPACK_RESOURCE_FILE_LICENSE}) cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} LICENSES "MIT LICENSE" ${CPACK_RESOURCE_FILE_LICENSE})
cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/cmake/installer_gpt4all_component.qs") cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/cmake/installerscript.qs")
cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} REPLACES "gpt4all-chat") #Was used in very earliest prototypes cpack_ifw_configure_component(${COMPONENT_NAME_MAIN} REPLACES "gpt4all-chat") #Was used in very earliest prototypes
if (APPLE AND GPT4ALL_SIGN_INSTALL)
if (GPT4ALL_OFFLINE_INSTALLER)
cpack_add_component(maintenancetool HIDDEN)
else()
cpack_add_component(maintenancetool HIDDEN DOWNLOADED)
endif()
cpack_ifw_configure_component(maintenancetool ESSENTIAL FORCED_INSTALLATION)
cpack_ifw_configure_component(maintenancetool VERSION ${APP_VERSION})
cpack_ifw_configure_component(maintenancetool SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/cmake/installer_maintenancetool_component.qs")
endif()
if (GPT4ALL_LOCALHOST) if (GPT4ALL_LOCALHOST)
cpack_ifw_add_repository("GPT4AllRepository" URL "http://localhost/repository") cpack_ifw_add_repository("GPT4AllRepository" URL "http://localhost/repository")
elseif (GPT4ALL_OFFLINE_INSTALLER) elseif(GPT4ALL_OFFLINE_INSTALLER)
add_compile_definitions(GPT4ALL_OFFLINE_INSTALLER) add_compile_definitions(GPT4ALL_OFFLINE_INSTALLER)
else() else()
if (CMAKE_SYSTEM_NAME MATCHES Linux) if(${CMAKE_SYSTEM_NAME} MATCHES Linux)
cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/linux/repository") cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/linux/repository")
elseif (CMAKE_SYSTEM_NAME MATCHES Windows) elseif(${CMAKE_SYSTEM_NAME} MATCHES Windows)
# To sign the target on windows have to create a batch script add use it as a custom target and then use CPACK_IFW_EXTRA_TARGETS to set this extra target #To sign the target on windows have to create a batch script add use it as a custom target and then use CPACK_IFW_EXTRA_TARGETS to set this extra target
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|amd64)$") cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows/repository")
cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows/repository") elseif(${CMAKE_SYSTEM_NAME} MATCHES Darwin)
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$") cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/mac/repository")
cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/windows_arm/repository") endif()
endif()
elseif (CMAKE_SYSTEM_NAME MATCHES Darwin)
cpack_ifw_add_repository("GPT4AllRepository" URL "https://gpt4all.io/installer_repos/mac/repository")
endif()
endif() endif()

45
gpt4all-chat/README.md Normal file
View File

@ -0,0 +1,45 @@
# gpt4all-chat
Cross platform Qt based GUI for GPT4All versions with GPT-J as the base
model. NOTE: The model seen in the screenshot is actually a preview of a
new training run for GPT4All based on GPT-J. The GPT4All project is busy
at work getting ready to release this model including installers for all
three major OS's. In the meantime, you can try this UI out with the original
GPT-J model by following build instructions below.
![image](https://user-images.githubusercontent.com/50458173/231464085-da9edff6-a593-410e-8f38-7513f75c8aab.png)
## Install
One click installers for macOS, Linux, and Windows at https://gpt4all.io
## Features
* Cross-platform (Linux, Windows, MacOSX)
* The UI is made to look and feel like you've come to expect from a chatty gpt
* Check for updates so you can always stay fresh with latest models
* Easy to install with precompiled binaries available for all three major desktop platforms
* Multi-modal - Ability to load more than one model and switch between them
* Multi-chat - a list of current and past chats and the ability to save/delete/export and switch between
* Supports models that are supported by llama.cpp
* Model downloader in GUI featuring many popular open source models
* Settings dialog to change temp, top_p, min_p, top_k, threads, etc
* Copy your conversation to clipboard
* RAG via LocalDocs feature
* Check for updates to get the very latest GUI
## Building and running
* Follow the visual instructions on the [build_and_run](build_and_run.md) page
## Getting the latest
If you've already checked out the source code and/or built the program make sure when you do a git fetch to get the latest changes and that you also do `git submodule update --init --recursive` to update the submodules. (If you ever run into trouble, deinitializing via `git submodule deinit -f .` and then initializing again via `git submodule update --init --recursive` fixes most issues)
## Contributing
* Pull requests welcome. See the feature wish list for ideas :)
## License
The source code of this chat interface is currently under a MIT license.

View File

@ -12,21 +12,21 @@ On Windows and Linux, building GPT4All with full GPU support requires the [Vulka
## Note for Linux users ## Note for Linux users
Linux users may install Qt via their distro's official packages instead of using the Qt installer. You need at least Qt 6.5, with support for QPdf and the Qt HTTP Server. You may build from the CLI using CMake and Ninja, or with Qt Creator as described later in this document. Linux users may install Qt via their distro's official packages instead of using the Qt installer. You need at least Qt 6.5, with support for QPdf and the Qt HTTP Server. It should be straightforward to build with just cmake and make, but you may continue to follow these instructions to build with Qt Creator.
On Arch Linux, this looks like: On Arch Linux, this looks like:
``` ```
sudo pacman -S --needed cmake gcc ninja qt6-5compat qt6-base qt6-declarative qt6-httpserver qt6-svg qtcreator sudo pacman -S --needed base-devel qt6-base qt6-declarative qt6-wayland qt6-svg qt6-httpserver qt6-webengine qt6-5compat qt6-shadertools qtcreator cmake ninja
``` ```
On Ubuntu 23.04, this looks like: On Ubuntu 23.04, this looks like:
``` ```
sudo apt install cmake g++ libgl-dev libqt6core5compat6 ninja-build qml6-module-qt5compat-graphicaleffects qt6-base-private-dev qt6-declarative-dev qt6-httpserver-dev qt6-svg-dev qtcreator sudo apt install build-essential qt6-base-dev qt6-declarative-dev qt6-wayland-dev qt6-svg-dev qt6-httpserver-dev qt6-webengine-dev libqt6core5compat6 qml6-module-qt5compat-graphicaleffects libqt6shadertools6 qtcreator cmake ninja-build
``` ```
On Fedora 39, this looks like: On Fedora 39, this looks like:
``` ```
sudo dnf install cmake gcc-c++ ninja-build qt-creator qt5-qtgraphicaleffects qt6-qt5compat qt6-qtbase-private-devel qt6-qtdeclarative-devel qt6-qthttpserver-devel qt6-qtsvg-devel sudo dnf install make gcc gcc-c++ qt6-qtbase-devel qt6-qtdeclarative-devel qt6-qtwayland-devel qt6-qtsvg-devel qt6-qthttpserver-devel qt6-qtwebengine-devel qt6-qt5compat qt5-qtgraphicaleffects qt6-qtshadertools qt-creator cmake ninja-build
``` ```
## Download Qt ## Download Qt
@ -49,7 +49,10 @@ Under this release (e.g. Qt 6.5.0), select the target platform:
- On Windows, it is called "MSVC 2019 64-bit" (for 64-bit x86 CPUs). MinGW has not been tested. - On Windows, it is called "MSVC 2019 64-bit" (for 64-bit x86 CPUs). MinGW has not been tested.
Under this release, select the following additional components: Under this release, select the following additional components:
- Qt Quick 3D
- Qt Wayland Compositor (for Linux only)
- Qt 5 Compatibility Module - Qt 5 Compatibility Module
- Qt Shader Tools
- Additional Libraries: - Additional Libraries:
- Qt HTTP Server - Qt HTTP Server
- Qt PDF - Qt PDF

View File

@ -1,32 +1,24 @@
#include "chat.h" #include "chat.h"
#include "chatlistmodel.h" #include "chatlistmodel.h"
#include "mysettings.h"
#include "network.h" #include "network.h"
#include "server.h" #include "server.h"
#include "tool.h"
#include "toolcallparser.h"
#include "toolmodel.h"
#include <QByteArray>
#include <QDataStream> #include <QDataStream>
#include <QDateTime>
#include <QDebug> #include <QDebug>
#include <QFile>
#include <QFileInfo>
#include <QIODevice>
#include <QLatin1String> #include <QLatin1String>
#include <QMap> #include <QMap>
#include <QRegularExpression>
#include <QString> #include <QString>
#include <QStringList>
#include <QTextStream>
#include <Qt> #include <Qt>
#include <QtAssert> #include <QtGlobal>
#include <QtLogging> #include <QtLogging>
#include <optional>
#include <utility> #include <utility>
using namespace ToolEnums;
Chat::Chat(QObject *parent) Chat::Chat(QObject *parent)
: QObject(parent) : QObject(parent)
, m_id(Network::globalInstance()->generateUniqueId()) , m_id(Network::globalInstance()->generateUniqueId())
@ -40,7 +32,7 @@ Chat::Chat(QObject *parent)
connectLLM(); connectLLM();
} }
Chat::Chat(server_tag_t, QObject *parent) Chat::Chat(bool isServer, QObject *parent)
: QObject(parent) : QObject(parent)
, m_id(Network::globalInstance()->generateUniqueId()) , m_id(Network::globalInstance()->generateUniqueId())
, m_name(tr("Server Chat")) , m_name(tr("Server Chat"))
@ -70,23 +62,26 @@ void Chat::connectLLM()
connect(m_llmodel, &ChatLLM::responseStopped, this, &Chat::responseStopped, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::responseStopped, this, &Chat::responseStopped, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::modelLoadingError, this, &Chat::handleModelLoadingError, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::modelLoadingError, this, &Chat::handleModelLoadingError, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::modelLoadingWarning, this, &Chat::modelLoadingWarning, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::modelLoadingWarning, this, &Chat::modelLoadingWarning, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::recalcChanged, this, &Chat::handleRecalculating, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::generatedNameChanged, this, &Chat::generatedNameChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::generatedNameChanged, this, &Chat::generatedNameChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::generatedQuestionFinished, this, &Chat::generatedQuestionFinished, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::generatedQuestionFinished, this, &Chat::generatedQuestionFinished, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::reportSpeed, this, &Chat::handleTokenSpeedChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::reportSpeed, this, &Chat::handleTokenSpeedChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::loadedModelInfoChanged, this, &Chat::loadedModelInfoChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::loadedModelInfoChanged, this, &Chat::loadedModelInfoChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::databaseResultsChanged, this, &Chat::handleDatabaseResultsChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::databaseResultsChanged, this, &Chat::handleDatabaseResultsChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::modelInfoChanged, this, &Chat::handleModelChanged, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::modelInfoChanged, this, &Chat::handleModelInfoChanged, Qt::QueuedConnection);
connect(m_llmodel, &ChatLLM::trySwitchContextOfLoadedModelCompleted, this, &Chat::handleTrySwitchContextOfLoadedModelCompleted, Qt::QueuedConnection); connect(m_llmodel, &ChatLLM::trySwitchContextOfLoadedModelCompleted, this, &Chat::handleTrySwitchContextOfLoadedModelCompleted, Qt::QueuedConnection);
connect(this, &Chat::promptRequested, m_llmodel, &ChatLLM::prompt, Qt::QueuedConnection); connect(this, &Chat::promptRequested, m_llmodel, &ChatLLM::prompt, Qt::QueuedConnection);
connect(this, &Chat::modelChangeRequested, m_llmodel, &ChatLLM::modelChangeRequested, Qt::QueuedConnection); connect(this, &Chat::modelChangeRequested, m_llmodel, &ChatLLM::modelChangeRequested, Qt::QueuedConnection);
connect(this, &Chat::loadDefaultModelRequested, m_llmodel, &ChatLLM::loadDefaultModel, Qt::QueuedConnection); connect(this, &Chat::loadDefaultModelRequested, m_llmodel, &ChatLLM::loadDefaultModel, Qt::QueuedConnection);
connect(this, &Chat::loadModelRequested, m_llmodel, &ChatLLM::loadModel, Qt::QueuedConnection);
connect(this, &Chat::generateNameRequested, m_llmodel, &ChatLLM::generateName, Qt::QueuedConnection); connect(this, &Chat::generateNameRequested, m_llmodel, &ChatLLM::generateName, Qt::QueuedConnection);
connect(this, &Chat::regenerateResponseRequested, m_llmodel, &ChatLLM::regenerateResponse, Qt::QueuedConnection); connect(this, &Chat::regenerateResponseRequested, m_llmodel, &ChatLLM::regenerateResponse, Qt::QueuedConnection);
connect(this, &Chat::resetResponseRequested, m_llmodel, &ChatLLM::resetResponse, Qt::QueuedConnection);
connect(this, &Chat::resetContextRequested, m_llmodel, &ChatLLM::resetContext, Qt::QueuedConnection);
connect(this, &Chat::processSystemPromptRequested, m_llmodel, &ChatLLM::processSystemPrompt, Qt::QueuedConnection);
connect(this, &Chat::collectionListChanged, m_collectionModel, &LocalDocsCollectionsModel::setCollections); connect(this, &Chat::collectionListChanged, m_collectionModel, &LocalDocsCollectionsModel::setCollections);
connect(ModelList::globalInstance(), &ModelList::modelInfoChanged, this, &Chat::handleModelInfoChanged);
} }
void Chat::reset() void Chat::reset()
@ -94,15 +89,25 @@ void Chat::reset()
stopGenerating(); stopGenerating();
// Erase our current on disk representation as we're completely resetting the chat along with id // Erase our current on disk representation as we're completely resetting the chat along with id
ChatListModel::globalInstance()->removeChatFile(this); ChatListModel::globalInstance()->removeChatFile(this);
emit resetContextRequested();
m_id = Network::globalInstance()->generateUniqueId(); m_id = Network::globalInstance()->generateUniqueId();
emit idChanged(m_id); emit idChanged(m_id);
// NOTE: We deliberately do no reset the name or creation date to indicate that this was originally // NOTE: We deliberately do no reset the name or creation date to indicate that this was originally
// an older chat that was reset for another purpose. Resetting this data will lead to the chat // an older chat that was reset for another purpose. Resetting this data will lead to the chat
// name label changing back to 'New Chat' and showing up in the chat model list as a 'New Chat' // name label changing back to 'New Chat' and showing up in the chat model list as a 'New Chat'
// further down in the list. This might surprise the user. In the future, we might get rid of // further down in the list. This might surprise the user. In the future, we might get rid of
// the "reset context" button in the UI. // the "reset context" button in the UI. Right now, by changing the model in the combobox dropdown
// we effectively do a reset context. We *have* to do this right now when switching between different
// types of models. The only way to get rid of that would be a very long recalculate where we rebuild
// the context if we switch between different types of models. Probably the right way to fix this
// is to allow switching models but throwing up a dialog warning users if we switch between types
// of models that a long recalculation will ensue.
m_chatModel->clear(); m_chatModel->clear();
m_needsSave = true; }
void Chat::processSystemPrompt()
{
emit processSystemPromptRequested();
} }
void Chat::resetResponseState() void Chat::resetResponseState()
@ -120,88 +125,45 @@ void Chat::resetResponseState()
emit responseStateChanged(); emit responseStateChanged();
} }
void Chat::newPromptResponsePair(const QString &prompt, const QList<QUrl> &attachedUrls) void Chat::prompt(const QString &prompt)
{
QStringList attachedContexts;
QList<PromptAttachment> attachments;
for (const QUrl &url : attachedUrls) {
Q_ASSERT(url.isLocalFile());
const QString localFilePath = url.toLocalFile();
const QFileInfo info(localFilePath);
Q_ASSERT(
info.suffix().toLower() == "xlsx" ||
info.suffix().toLower() == "txt" ||
info.suffix().toLower() == "md" ||
info.suffix().toLower() == "rst"
);
PromptAttachment attached;
attached.url = url;
QFile file(localFilePath);
if (file.open(QIODevice::ReadOnly)) {
attached.content = file.readAll();
file.close();
} else {
qWarning() << "ERROR: Failed to open the attachment:" << localFilePath;
continue;
}
attachments << attached;
attachedContexts << attached.processedContent();
}
QString promptPlusAttached = prompt;
if (!attachedContexts.isEmpty())
promptPlusAttached = attachedContexts.join("\n\n") + "\n\n" + prompt;
resetResponseState();
if (int count = m_chatModel->count())
m_chatModel->updateCurrentResponse(count - 1, false);
m_chatModel->appendPrompt(prompt, attachments);
m_chatModel->appendResponse();
emit promptRequested(m_collections);
m_needsSave = true;
}
void Chat::regenerateResponse(int index)
{ {
resetResponseState(); resetResponseState();
emit regenerateResponseRequested(index); emit promptRequested(m_collections, prompt);
m_needsSave = true;
} }
QVariant Chat::popPrompt(int index) void Chat::regenerateResponse()
{ {
auto content = m_llmodel->popPrompt(index); const int index = m_chatModel->count() - 1;
m_needsSave = true; m_chatModel->updateSources(index, QList<ResultInfo>());
if (content) return *content; emit regenerateResponseRequested();
return QVariant::fromValue(nullptr);
} }
void Chat::stopGenerating() void Chat::stopGenerating()
{ {
// In future if we have more than one tool we'll have to keep track of which tools are possibly
// running, but for now we only have one
Tool *toolInstance = ToolModel::globalInstance()->get(ToolCallConstants::CodeInterpreterFunction);
Q_ASSERT(toolInstance);
toolInstance->interrupt();
m_llmodel->stopGenerating(); m_llmodel->stopGenerating();
} }
QString Chat::response() const
{
return m_response;
}
Chat::ResponseState Chat::responseState() const Chat::ResponseState Chat::responseState() const
{ {
return m_responseState; return m_responseState;
} }
void Chat::handleResponseChanged() void Chat::handleResponseChanged(const QString &response)
{ {
if (m_responseState != Chat::ResponseGeneration) { if (m_responseState != Chat::ResponseGeneration) {
m_responseState = Chat::ResponseGeneration; m_responseState = Chat::ResponseGeneration;
emit responseStateChanged(); emit responseStateChanged();
} }
m_response = response;
const int index = m_chatModel->count() - 1;
m_chatModel->updateValue(index, this->response());
emit responseChanged();
} }
void Chat::handleModelLoadingPercentageChanged(float loadingPercentage) void Chat::handleModelLoadingPercentageChanged(float loadingPercentage)
@ -228,7 +190,7 @@ void Chat::handleModelLoadingPercentageChanged(float loadingPercentage)
void Chat::promptProcessing() void Chat::promptProcessing()
{ {
m_responseState = !databaseResults().isEmpty() ? Chat::LocalDocsProcessing : Chat::PromptProcessing; m_responseState = !databaseResults().isEmpty() ? Chat::LocalDocsProcessing : Chat::PromptProcessing;
emit responseStateChanged(); emit responseStateChanged();
} }
void Chat::generatingQuestions() void Chat::generatingQuestions()
@ -241,79 +203,20 @@ void Chat::responseStopped(qint64 promptResponseMs)
{ {
m_tokenSpeed = QString(); m_tokenSpeed = QString();
emit tokenSpeedChanged(); emit tokenSpeedChanged();
emit responseChanged();
m_responseInProgress = false; m_responseInProgress = false;
m_responseState = Chat::ResponseStopped; m_responseState = Chat::ResponseStopped;
emit responseInProgressChanged(); emit responseInProgressChanged();
emit responseStateChanged(); emit responseStateChanged();
if (m_generatedName.isEmpty())
emit generateNameRequested();
const QString possibleToolcall = m_chatModel->possibleToolcall(); Network::globalInstance()->trackChatEvent("response_complete", {
Network::globalInstance()->trackChatEvent("response_stopped", {
{"first", m_firstResponse}, {"first", m_firstResponse},
{"message_count", chatModel()->count()}, {"message_count", chatModel()->count()},
{"$duration", promptResponseMs / 1000.}, {"$duration", promptResponseMs / 1000.},
}); });
ToolCallParser parser;
parser.update(possibleToolcall.toUtf8());
if (parser.state() == ToolEnums::ParseState::Complete && parser.startTag() != ToolCallConstants::ThinkStartTag)
processToolCall(parser.toolCall());
else
responseComplete();
}
void Chat::processToolCall(const QString &toolCall)
{
m_responseState = Chat::ToolCallGeneration;
emit responseStateChanged();
// Regex to remove the formatting around the code
static const QRegularExpression regex("^\\s*```javascript\\s*|\\s*```\\s*$");
QString code = toolCall;
code.remove(regex);
code = code.trimmed();
// Right now the code interpreter is the only available tool
Tool *toolInstance = ToolModel::globalInstance()->get(ToolCallConstants::CodeInterpreterFunction);
Q_ASSERT(toolInstance);
connect(toolInstance, &Tool::runComplete, this, &Chat::toolCallComplete, Qt::SingleShotConnection);
// The param is the code
const ToolParam param = { "code", ToolEnums::ParamType::String, code };
m_responseInProgress = true;
emit responseInProgressChanged();
toolInstance->run({param});
}
void Chat::toolCallComplete(const ToolCallInfo &info)
{
// Update the current response with meta information about toolcall and re-parent
m_chatModel->updateToolCall(info);
++m_consecutiveToolCalls;
m_responseInProgress = false;
emit responseInProgressChanged();
// We limit the number of consecutive toolcalls otherwise we get into a potentially endless loop
if (m_consecutiveToolCalls < 3 || info.error == ToolEnums::Error::NoError) {
resetResponseState();
emit promptRequested(m_collections); // triggers a new response
return;
}
responseComplete();
}
void Chat::responseComplete()
{
if (m_generatedName.isEmpty())
emit generateNameRequested();
m_responseState = Chat::ResponseStopped;
emit responseStateChanged();
m_consecutiveToolCalls = 0;
m_firstResponse = false; m_firstResponse = false;
} }
@ -324,16 +227,36 @@ ModelInfo Chat::modelInfo() const
void Chat::setModelInfo(const ModelInfo &modelInfo) void Chat::setModelInfo(const ModelInfo &modelInfo)
{ {
if (m_modelInfo != modelInfo) { if (m_modelInfo == modelInfo && isModelLoaded())
m_modelInfo = modelInfo;
m_needsSave = true;
} else if (isModelLoaded())
return; return;
m_modelInfo = modelInfo;
emit modelInfoChanged(); emit modelInfoChanged();
emit modelChangeRequested(modelInfo); emit modelChangeRequested(modelInfo);
} }
void Chat::newPromptResponsePair(const QString &prompt)
{
resetResponseState();
m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false);
m_chatModel->appendPrompt("Prompt: ", prompt);
m_chatModel->appendResponse("Response: ", prompt);
emit resetResponseRequested();
}
void Chat::serverNewPromptResponsePair(const QString &prompt)
{
resetResponseState();
m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false);
m_chatModel->appendPrompt("Prompt: ", prompt);
m_chatModel->appendResponse("Response: ", prompt);
}
bool Chat::isRecalc() const
{
return m_llmodel->isRecalc();
}
void Chat::unloadAndDeleteLater() void Chat::unloadAndDeleteLater()
{ {
if (!isModelLoaded()) { if (!isModelLoaded()) {
@ -383,17 +306,24 @@ void Chat::trySwitchContextOfLoadedModel()
void Chat::generatedNameChanged(const QString &name) void Chat::generatedNameChanged(const QString &name)
{ {
m_generatedName = name; // Only use the first three words maximum and remove newlines and extra spaces
m_name = name; m_generatedName = name.simplified();
QStringList words = m_generatedName.split(' ', Qt::SkipEmptyParts);
int wordCount = qMin(7, words.size());
m_name = words.mid(0, wordCount).join(' ');
emit nameChanged(); emit nameChanged();
m_needsSave = true;
} }
void Chat::generatedQuestionFinished(const QString &question) void Chat::generatedQuestionFinished(const QString &question)
{ {
m_generatedQuestions << question; m_generatedQuestions << question;
emit generatedQuestionsChanged(); emit generatedQuestionsChanged();
m_needsSave = true; }
void Chat::handleRecalculating()
{
Network::globalInstance()->trackChatEvent("recalc_context", { {"length", m_chatModel->count()} });
emit recalcChanged();
} }
void Chat::handleModelLoadingError(const QString &error) void Chat::handleModelLoadingError(const QString &error)
@ -430,26 +360,17 @@ QString Chat::fallbackReason() const
void Chat::handleDatabaseResultsChanged(const QList<ResultInfo> &results) void Chat::handleDatabaseResultsChanged(const QList<ResultInfo> &results)
{ {
m_databaseResults = results; m_databaseResults = results;
m_needsSave = true; const int index = m_chatModel->count() - 1;
m_chatModel->updateSources(index, m_databaseResults);
} }
// we need to notify listeners of the modelInfo property when its properties are updated,
// since it's a gadget and can't do that on its own
void Chat::handleModelInfoChanged(const ModelInfo &modelInfo) void Chat::handleModelInfoChanged(const ModelInfo &modelInfo)
{
if (!m_modelInfo.id().isNull() && modelInfo.id() == m_modelInfo.id())
emit modelInfoChanged();
}
// react if a new model is loaded
void Chat::handleModelChanged(const ModelInfo &modelInfo)
{ {
if (m_modelInfo == modelInfo) if (m_modelInfo == modelInfo)
return; return;
m_modelInfo = modelInfo; m_modelInfo = modelInfo;
emit modelInfoChanged(); emit modelInfoChanged();
m_needsSave = true;
} }
void Chat::handleTrySwitchContextOfLoadedModelCompleted(int value) void Chat::handleTrySwitchContextOfLoadedModelCompleted(int value)
@ -464,14 +385,17 @@ bool Chat::serialize(QDataStream &stream, int version) const
stream << m_id; stream << m_id;
stream << m_name; stream << m_name;
stream << m_userName; stream << m_userName;
if (version >= 5) if (version > 4)
stream << m_modelInfo.id(); stream << m_modelInfo.id();
else else
stream << m_modelInfo.filename(); stream << m_modelInfo.filename();
if (version >= 3) if (version > 2)
stream << m_collections; stream << m_collections;
if (!m_llmodel->serialize(stream, version)) const bool serializeKV = MySettings::globalInstance()->saveChatsContext();
if (version > 5)
stream << serializeKV;
if (!m_llmodel->serialize(stream, version, serializeKV))
return false; return false;
if (!m_chatModel->serialize(stream, version)) if (!m_chatModel->serialize(stream, version))
return false; return false;
@ -490,7 +414,7 @@ bool Chat::deserialize(QDataStream &stream, int version)
QString modelId; QString modelId;
stream >> modelId; stream >> modelId;
if (version >= 5) { if (version > 4) {
if (ModelList::globalInstance()->contains(modelId)) if (ModelList::globalInstance()->contains(modelId))
m_modelInfo = ModelList::globalInstance()->modelInfo(modelId); m_modelInfo = ModelList::globalInstance()->modelInfo(modelId);
} else { } else {
@ -500,23 +424,27 @@ bool Chat::deserialize(QDataStream &stream, int version)
if (!m_modelInfo.id().isEmpty()) if (!m_modelInfo.id().isEmpty())
emit modelInfoChanged(); emit modelInfoChanged();
if (version >= 3) { bool discardKV = m_modelInfo.id().isEmpty();
if (version > 2) {
stream >> m_collections; stream >> m_collections;
emit collectionListChanged(m_collections); emit collectionListChanged(m_collections);
} }
bool deserializeKV = true;
if (version > 5)
stream >> deserializeKV;
m_llmodel->setModelInfo(m_modelInfo); m_llmodel->setModelInfo(m_modelInfo);
if (!m_llmodel->deserialize(stream, version)) if (!m_llmodel->deserialize(stream, version, deserializeKV, discardKV))
return false; return false;
if (!m_chatModel->deserialize(stream, version)) if (!m_chatModel->deserialize(stream, version))
return false; return false;
emit chatModelChanged(); m_llmodel->setStateFromText(m_chatModel->text());
if (stream.status() != QDataStream::Ok)
return false;
m_needsSave = false; emit chatModelChanged();
return true; return stream.status() == QDataStream::Ok;
} }
QList<QString> Chat::collectionList() const QList<QString> Chat::collectionList() const
@ -536,7 +464,6 @@ void Chat::addCollection(const QString &collection)
m_collections.append(collection); m_collections.append(collection);
emit collectionListChanged(m_collections); emit collectionListChanged(m_collections);
m_needsSave = true;
} }
void Chat::removeCollection(const QString &collection) void Chat::removeCollection(const QString &collection)
@ -546,5 +473,4 @@ void Chat::removeCollection(const QString &collection)
m_collections.removeAll(collection); m_collections.removeAll(collection);
emit collectionListChanged(m_collections); emit collectionListChanged(m_collections);
m_needsSave = true;
} }

View File

@ -3,26 +3,18 @@
#include "chatllm.h" #include "chatllm.h"
#include "chatmodel.h" #include "chatmodel.h"
#include "database.h" #include "database.h" // IWYU pragma: keep
#include "localdocsmodel.h" #include "localdocsmodel.h" // IWYU pragma: keep
#include "modellist.h" #include "modellist.h"
#include "tool.h"
#include <QDateTime>
#include <QList> #include <QList>
#include <QObject> #include <QObject>
#include <QQmlEngine> // IWYU pragma: keep #include <QQmlEngine>
#include <QString> #include <QString>
#include <QStringList> // IWYU pragma: keep #include <QtGlobal>
#include <QUrl>
#include <QVariant>
#include <QtTypes>
// IWYU pragma: no_forward_declare LocalDocsCollectionsModel
// IWYU pragma: no_forward_declare ToolCallInfo
class QDataStream; class QDataStream;
class Chat : public QObject class Chat : public QObject
{ {
Q_OBJECT Q_OBJECT
@ -32,13 +24,15 @@ class Chat : public QObject
Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged) Q_PROPERTY(bool isModelLoaded READ isModelLoaded NOTIFY isModelLoadedChanged)
Q_PROPERTY(bool isCurrentlyLoading READ isCurrentlyLoading NOTIFY isCurrentlyLoadingChanged) Q_PROPERTY(bool isCurrentlyLoading READ isCurrentlyLoading NOTIFY isCurrentlyLoadingChanged)
Q_PROPERTY(float modelLoadingPercentage READ modelLoadingPercentage NOTIFY modelLoadingPercentageChanged) Q_PROPERTY(float modelLoadingPercentage READ modelLoadingPercentage NOTIFY modelLoadingPercentageChanged)
Q_PROPERTY(QString response READ response NOTIFY responseChanged)
Q_PROPERTY(ModelInfo modelInfo READ modelInfo WRITE setModelInfo NOTIFY modelInfoChanged) Q_PROPERTY(ModelInfo modelInfo READ modelInfo WRITE setModelInfo NOTIFY modelInfoChanged)
Q_PROPERTY(bool responseInProgress READ responseInProgress NOTIFY responseInProgressChanged) Q_PROPERTY(bool responseInProgress READ responseInProgress NOTIFY responseInProgressChanged)
Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)
Q_PROPERTY(bool isServer READ isServer NOTIFY isServerChanged) Q_PROPERTY(bool isServer READ isServer NOTIFY isServerChanged)
Q_PROPERTY(ResponseState responseState READ responseState NOTIFY responseStateChanged) Q_PROPERTY(ResponseState responseState READ responseState NOTIFY responseStateChanged)
Q_PROPERTY(QList<QString> collectionList READ collectionList NOTIFY collectionListChanged) Q_PROPERTY(QList<QString> collectionList READ collectionList NOTIFY collectionListChanged)
Q_PROPERTY(QString modelLoadingError READ modelLoadingError NOTIFY modelLoadingErrorChanged) Q_PROPERTY(QString modelLoadingError READ modelLoadingError NOTIFY modelLoadingErrorChanged)
Q_PROPERTY(QString tokenSpeed READ tokenSpeed NOTIFY tokenSpeedChanged) Q_PROPERTY(QString tokenSpeed READ tokenSpeed NOTIFY tokenSpeedChanged);
Q_PROPERTY(QString deviceBackend READ deviceBackend NOTIFY loadedModelInfoChanged) Q_PROPERTY(QString deviceBackend READ deviceBackend NOTIFY loadedModelInfoChanged)
Q_PROPERTY(QString device READ device NOTIFY loadedModelInfoChanged) Q_PROPERTY(QString device READ device NOTIFY loadedModelInfoChanged)
Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY loadedModelInfoChanged) Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY loadedModelInfoChanged)
@ -50,23 +44,18 @@ class Chat : public QObject
QML_UNCREATABLE("Only creatable from c++!") QML_UNCREATABLE("Only creatable from c++!")
public: public:
// tag for constructing a server chat
struct server_tag_t { explicit server_tag_t() = default; };
static inline constexpr server_tag_t server_tag = server_tag_t();
enum ResponseState { enum ResponseState {
ResponseStopped, ResponseStopped,
LocalDocsRetrieval, LocalDocsRetrieval,
LocalDocsProcessing, LocalDocsProcessing,
PromptProcessing, PromptProcessing,
GeneratingQuestions, GeneratingQuestions,
ResponseGeneration, ResponseGeneration
ToolCallGeneration
}; };
Q_ENUM(ResponseState) Q_ENUM(ResponseState)
explicit Chat(QObject *parent = nullptr); explicit Chat(QObject *parent = nullptr);
explicit Chat(server_tag_t, QObject *parent = nullptr); explicit Chat(bool isServer, QObject *parent = nullptr);
virtual ~Chat(); virtual ~Chat();
void destroy() { m_llmodel->destroy(); } void destroy() { m_llmodel->destroy(); }
void connectLLM(); void connectLLM();
@ -77,27 +66,29 @@ public:
{ {
m_userName = name; m_userName = name;
emit nameChanged(); emit nameChanged();
m_needsSave = true;
} }
ChatModel *chatModel() { return m_chatModel; } ChatModel *chatModel() { return m_chatModel; }
bool isNewChat() const { return m_name == tr("New Chat") && !m_chatModel->count(); } bool isNewChat() const { return m_name == tr("New Chat") && !m_chatModel->count(); }
Q_INVOKABLE void reset(); Q_INVOKABLE void reset();
Q_INVOKABLE void processSystemPrompt();
bool isModelLoaded() const { return m_modelLoadingPercentage == 1.0f; } bool isModelLoaded() const { return m_modelLoadingPercentage == 1.0f; }
bool isCurrentlyLoading() const { return m_modelLoadingPercentage > 0.0f && m_modelLoadingPercentage < 1.0f; } bool isCurrentlyLoading() const { return m_modelLoadingPercentage > 0.0f && m_modelLoadingPercentage < 1.0f; }
float modelLoadingPercentage() const { return m_modelLoadingPercentage; } float modelLoadingPercentage() const { return m_modelLoadingPercentage; }
Q_INVOKABLE void newPromptResponsePair(const QString &prompt, const QList<QUrl> &attachedUrls = {}); Q_INVOKABLE void prompt(const QString &prompt);
Q_INVOKABLE void regenerateResponse(int index); Q_INVOKABLE void regenerateResponse();
Q_INVOKABLE QVariant popPrompt(int index);
Q_INVOKABLE void stopGenerating(); Q_INVOKABLE void stopGenerating();
Q_INVOKABLE void newPromptResponsePair(const QString &prompt);
QList<ResultInfo> databaseResults() const { return m_databaseResults; } QList<ResultInfo> databaseResults() const { return m_databaseResults; }
QString response() const;
bool responseInProgress() const { return m_responseInProgress; } bool responseInProgress() const { return m_responseInProgress; }
ResponseState responseState() const; ResponseState responseState() const;
ModelInfo modelInfo() const; ModelInfo modelInfo() const;
void setModelInfo(const ModelInfo &modelInfo); void setModelInfo(const ModelInfo &modelInfo);
bool isRecalc() const;
Q_INVOKABLE void unloadModel(); Q_INVOKABLE void unloadModel();
Q_INVOKABLE void reloadModel(); Q_INVOKABLE void reloadModel();
@ -118,6 +109,7 @@ public:
Q_INVOKABLE bool hasCollection(const QString &collection) const; Q_INVOKABLE bool hasCollection(const QString &collection) const;
Q_INVOKABLE void addCollection(const QString &collection); Q_INVOKABLE void addCollection(const QString &collection);
Q_INVOKABLE void removeCollection(const QString &collection); Q_INVOKABLE void removeCollection(const QString &collection);
void resetResponseState();
QString modelLoadingError() const { return m_modelLoadingError; } QString modelLoadingError() const { return m_modelLoadingError; }
@ -131,11 +123,8 @@ public:
QList<QString> generatedQuestions() const { return m_generatedQuestions; } QList<QString> generatedQuestions() const { return m_generatedQuestions; }
bool needsSave() const { return m_needsSave; }
void setNeedsSave(bool n) { m_needsSave = n; }
public Q_SLOTS: public Q_SLOTS:
void resetResponseState(); void serverNewPromptResponsePair(const QString &prompt);
Q_SIGNALS: Q_SIGNALS:
void idChanged(const QString &id); void idChanged(const QString &id);
@ -145,15 +134,19 @@ Q_SIGNALS:
void isCurrentlyLoadingChanged(); void isCurrentlyLoadingChanged();
void modelLoadingPercentageChanged(); void modelLoadingPercentageChanged();
void modelLoadingWarning(const QString &warning); void modelLoadingWarning(const QString &warning);
void responseChanged();
void responseInProgressChanged(); void responseInProgressChanged();
void responseStateChanged(); void responseStateChanged();
void promptRequested(const QStringList &enabledCollections); void promptRequested(const QList<QString> &collectionList, const QString &prompt);
void regenerateResponseRequested(int index); void regenerateResponseRequested();
void resetResponseRequested(); void resetResponseRequested();
void resetContextRequested(); void resetContextRequested();
void processSystemPromptRequested();
void modelChangeRequested(const ModelInfo &modelInfo); void modelChangeRequested(const ModelInfo &modelInfo);
void modelInfoChanged(); void modelInfoChanged();
void recalcChanged();
void loadDefaultModelRequested(); void loadDefaultModelRequested();
void loadModelRequested(const ModelInfo &modelInfo);
void generateNameRequested(); void generateNameRequested();
void modelLoadingErrorChanged(); void modelLoadingErrorChanged();
void isServerChanged(); void isServerChanged();
@ -167,21 +160,18 @@ Q_SIGNALS:
void generatedQuestionsChanged(); void generatedQuestionsChanged();
private Q_SLOTS: private Q_SLOTS:
void handleResponseChanged(); void handleResponseChanged(const QString &response);
void handleModelLoadingPercentageChanged(float); void handleModelLoadingPercentageChanged(float);
void promptProcessing(); void promptProcessing();
void generatingQuestions(); void generatingQuestions();
void responseStopped(qint64 promptResponseMs); void responseStopped(qint64 promptResponseMs);
void processToolCall(const QString &toolCall);
void toolCallComplete(const ToolCallInfo &info);
void responseComplete();
void generatedNameChanged(const QString &name); void generatedNameChanged(const QString &name);
void generatedQuestionFinished(const QString &question); void generatedQuestionFinished(const QString &question);
void handleRecalculating();
void handleModelLoadingError(const QString &error); void handleModelLoadingError(const QString &error);
void handleTokenSpeedChanged(const QString &tokenSpeed); void handleTokenSpeedChanged(const QString &tokenSpeed);
void handleDatabaseResultsChanged(const QList<ResultInfo> &results); void handleDatabaseResultsChanged(const QList<ResultInfo> &results);
void handleModelInfoChanged(const ModelInfo &modelInfo); void handleModelInfoChanged(const ModelInfo &modelInfo);
void handleModelChanged(const ModelInfo &modelInfo);
void handleTrySwitchContextOfLoadedModelCompleted(int value); void handleTrySwitchContextOfLoadedModelCompleted(int value);
private: private:
@ -194,6 +184,7 @@ private:
QString m_tokenSpeed; QString m_tokenSpeed;
QString m_device; QString m_device;
QString m_fallbackReason; QString m_fallbackReason;
QString m_response;
QList<QString> m_collections; QList<QString> m_collections;
QList<QString> m_generatedQuestions; QList<QString> m_generatedQuestions;
ChatModel *m_chatModel; ChatModel *m_chatModel;
@ -209,11 +200,6 @@ private:
bool m_firstResponse = true; bool m_firstResponse = true;
int m_trySwitchContextInProgress = 0; int m_trySwitchContextInProgress = 0;
bool m_isCurrentlyLoading = false; bool m_isCurrentlyLoading = false;
// True if we need to serialize the chat to disk, because of one of two reasons:
// - The chat was freshly created during this launch.
// - The chat was changed after loading it from disk.
bool m_needsSave = true;
int m_consecutiveToolCalls = 0;
}; };
#endif // CHAT_H #endif // CHAT_H

View File

@ -1,40 +1,29 @@
#include "chatapi.h" #include "chatapi.h"
#include "utils.h" #include "../gpt4all-backend/llmodel.h"
#include <fmt/format.h>
#include <QAnyStringView>
#include <QCoreApplication> #include <QCoreApplication>
#include <QDebug>
#include <QGuiApplication> #include <QGuiApplication>
#include <QDebug>
#include <QJsonArray> #include <QJsonArray>
#include <QJsonDocument> #include <QJsonDocument>
#include <QJsonObject> #include <QJsonObject>
#include <QJsonValue> #include <QJsonValue>
#include <QLatin1String>
#include <QNetworkAccessManager> #include <QNetworkAccessManager>
#include <QNetworkRequest> #include <QNetworkRequest>
#include <QStringView>
#include <QThread> #include <QThread>
#include <QUrl> #include <QUrl>
#include <QUtf8StringView> // IWYU pragma: keep
#include <QVariant> #include <QVariant>
#include <QXmlStreamReader>
#include <Qt> #include <Qt>
#include <QtAssert> #include <QtGlobal>
#include <QtLogging> #include <QtLogging>
#include <expected>
#include <functional>
#include <iostream> #include <iostream>
#include <utility>
using namespace Qt::Literals::StringLiterals; using namespace Qt::Literals::StringLiterals;
//#define DEBUG //#define DEBUG
ChatAPI::ChatAPI() ChatAPI::ChatAPI()
: QObject(nullptr) : QObject(nullptr)
, m_modelName("gpt-3.5-turbo") , m_modelName("gpt-3.5-turbo")
@ -62,6 +51,7 @@ bool ChatAPI::loadModel(const std::string &modelPath, int n_ctx, int ngl)
void ChatAPI::setThreadCount(int32_t n_threads) void ChatAPI::setThreadCount(int32_t n_threads)
{ {
Q_UNUSED(n_threads); Q_UNUSED(n_threads);
qt_noop();
} }
int32_t ChatAPI::threadCount() const int32_t ChatAPI::threadCount() const
@ -78,119 +68,89 @@ bool ChatAPI::isModelLoaded() const
return true; return true;
} }
static auto parsePrompt(QXmlStreamReader &xml) -> std::expected<QJsonArray, QString> // All three of the state virtual functions are handled custom inside of chatllm save/restore
size_t ChatAPI::stateSize() const
{ {
QJsonArray messages; return 0;
auto xmlError = [&xml] {
return std::unexpected(u"%1:%2: %3"_s.arg(xml.lineNumber()).arg(xml.columnNumber()).arg(xml.errorString()));
};
if (xml.hasError())
return xmlError();
if (xml.atEnd())
return messages;
// skip header
bool foundElement = false;
do {
switch (xml.readNext()) {
using enum QXmlStreamReader::TokenType;
case Invalid:
return xmlError();
case EndDocument:
return messages;
default:
foundElement = true;
case StartDocument:
case Comment:
case DTD:
case ProcessingInstruction:
;
}
} while (!foundElement);
// document body loop
bool foundRoot = false;
for (;;) {
switch (xml.tokenType()) {
using enum QXmlStreamReader::TokenType;
case StartElement:
{
auto name = xml.name();
if (!foundRoot) {
if (name != "chat"_L1)
return std::unexpected(u"unexpected tag: %1"_s.arg(name));
foundRoot = true;
} else {
if (name != "user"_L1 && name != "assistant"_L1 && name != "system"_L1)
return std::unexpected(u"unknown role: %1"_s.arg(name));
auto content = xml.readElementText();
if (xml.tokenType() != EndElement)
return xmlError();
messages << makeJsonObject({
{ "role"_L1, name.toString().trimmed() },
{ "content"_L1, content },
});
}
break;
}
case Characters:
if (!xml.isWhitespace())
return std::unexpected(u"unexpected text: %1"_s.arg(xml.text()));
case Comment:
case ProcessingInstruction:
case EndElement:
break;
case EndDocument:
return messages;
case Invalid:
return xmlError();
default:
return std::unexpected(u"unexpected token: %1"_s.arg(xml.tokenString()));
}
xml.readNext();
}
} }
void ChatAPI::prompt( size_t ChatAPI::saveState(uint8_t *dest) const
std::string_view prompt, {
const PromptCallback &promptCallback, Q_UNUSED(dest);
const ResponseCallback &responseCallback, return 0;
const PromptContext &promptCtx }
) {
Q_UNUSED(promptCallback)
if (!isModelLoaded()) size_t ChatAPI::restoreState(const uint8_t *src)
throw std::invalid_argument("Attempted to prompt an unloaded model."); {
if (!promptCtx.n_predict) Q_UNUSED(src);
return; // nothing requested return 0;
}
void ChatAPI::prompt(const std::string &prompt,
const std::string &promptTemplate,
std::function<bool(int32_t)> promptCallback,
std::function<bool(int32_t, const std::string&)> responseCallback,
std::function<bool(bool)> recalculateCallback,
PromptContext &promptCtx,
bool special,
std::string *fakeReply) {
Q_UNUSED(promptCallback);
Q_UNUSED(recalculateCallback);
Q_UNUSED(special);
if (!isModelLoaded()) {
std::cerr << "ChatAPI ERROR: prompt won't work with an unloaded model!\n";
return;
}
if (!promptCtx.n_past) { m_queuedPrompts.clear(); }
Q_ASSERT(promptCtx.n_past <= m_context.size());
m_context.resize(promptCtx.n_past);
// FIXME(cebtenzzre): We're assuming people don't try to use %2 with ChatGPT. What would that even mean?
m_queuedPrompts << QString::fromStdString(promptTemplate).arg(QString::fromStdString(prompt));
if (!promptCtx.n_predict && !fakeReply) {
return; // response explicitly suppressed, queue prompt for later
}
QString formattedPrompt = m_queuedPrompts.join("");
m_queuedPrompts.clear();
if (fakeReply) {
promptCtx.n_past += 1;
m_context.append(formattedPrompt);
m_context.append(QString::fromStdString(*fakeReply));
return;
}
// FIXME: We don't set the max_tokens on purpose because in order to do so safely without encountering // FIXME: We don't set the max_tokens on purpose because in order to do so safely without encountering
// an error we need to be able to count the tokens in our prompt. The only way to do this is to use // an error we need to be able to count the tokens in our prompt. The only way to do this is to use
// the OpenAI tiktoken library or to implement our own tokenization function that matches precisely // the OpenAI tiktokken library or to implement our own tokenization function that matches precisely
// the tokenization used by the OpenAI model we're calling. OpenAI has not introduced any means of // the tokenization used by the OpenAI model we're calling. OpenAI has not introduced any means of
// using the REST API to count tokens in a prompt. // using the REST API to count tokens in a prompt.
auto root = makeJsonObject({ QJsonObject root;
{ "model"_L1, m_modelName }, root.insert("model", m_modelName);
{ "stream"_L1, true }, root.insert("stream", true);
{ "temperature"_L1, promptCtx.temp }, root.insert("temperature", promptCtx.temp);
{ "top_p"_L1, promptCtx.top_p }, root.insert("top_p", promptCtx.top_p);
});
// conversation history // conversation history
{ QJsonArray messages;
QUtf8StringView promptUtf8(prompt); for (int i = 0; i < m_context.count(); ++i) {
QXmlStreamReader xml(promptUtf8); QJsonObject message;
auto messages = parsePrompt(xml); message.insert("role", i % 2 == 0 ? "user" : "assistant");
if (!messages) { message.insert("content", m_context.at(i));
auto error = fmt::format("Failed to parse API model prompt: {}", messages.error()); messages.append(message);
qDebug().noquote() << "ChatAPI ERROR:" << error << "Prompt:\n\n" << promptUtf8 << '\n';
throw std::invalid_argument(error);
}
root.insert("messages"_L1, *messages);
} }
QJsonObject promptObject;
promptObject.insert("role", "user");
promptObject.insert("content", formattedPrompt);
messages.append(promptObject);
root.insert("messages", messages);
QJsonDocument doc(root); QJsonDocument doc(root);
#if defined(DEBUG) #if defined(DEBUG)
@ -207,9 +167,12 @@ void ChatAPI::prompt(
connect(&worker, &ChatAPIWorker::finished, &workerThread, &QThread::quit, Qt::DirectConnection); connect(&worker, &ChatAPIWorker::finished, &workerThread, &QThread::quit, Qt::DirectConnection);
connect(this, &ChatAPI::request, &worker, &ChatAPIWorker::request, Qt::QueuedConnection); connect(this, &ChatAPI::request, &worker, &ChatAPIWorker::request, Qt::QueuedConnection);
workerThread.start(); workerThread.start();
emit request(m_apiKey, doc.toJson(QJsonDocument::Compact)); emit request(m_apiKey, &promptCtx, doc.toJson(QJsonDocument::Compact));
workerThread.wait(); workerThread.wait();
promptCtx.n_past += 1;
m_context.append(formattedPrompt);
m_context.append(worker.currentResponse());
m_responseCallback = nullptr; m_responseCallback = nullptr;
#if defined(DEBUG) #if defined(DEBUG)
@ -227,8 +190,12 @@ bool ChatAPI::callResponse(int32_t token, const std::string& string)
return m_responseCallback(token, string); return m_responseCallback(token, string);
} }
void ChatAPIWorker::request(const QString &apiKey, const QByteArray &array) void ChatAPIWorker::request(const QString &apiKey,
LLModel::PromptContext *promptCtx,
const QByteArray &array)
{ {
m_ctx = promptCtx;
QUrl apiUrl(m_chat->url()); QUrl apiUrl(m_chat->url());
const QString authorization = u"Bearer %1"_s.arg(apiKey).trimmed(); const QString authorization = u"Bearer %1"_s.arg(apiKey).trimmed();
QNetworkRequest request(apiUrl); QNetworkRequest request(apiUrl);
@ -335,6 +302,7 @@ void ChatAPIWorker::handleReadyRead()
const QJsonObject choice = choices.first().toObject(); const QJsonObject choice = choices.first().toObject();
const QJsonObject delta = choice.value("delta").toObject(); const QJsonObject delta = choice.value("delta").toObject();
const QString content = delta.value("content").toString(); const QString content = delta.value("content").toString();
Q_ASSERT(m_ctx);
m_currentResponse += content; m_currentResponse += content;
if (!m_chat->callResponse(0, content.toStdString())) { if (!m_chat->callResponse(0, content.toStdString())) {
reply->abort(); reply->abort();

144
gpt4all-chat/chatapi.h Normal file
View File

@ -0,0 +1,144 @@
#ifndef CHATAPI_H
#define CHATAPI_H
#include "../gpt4all-backend/llmodel.h"
#include <QByteArray>
#include <QNetworkReply>
#include <QObject>
#include <QString>
#include <QStringList>
#include <QList>
#include <cstddef>
#include <cstdint>
#include <stdexcept>
#include <functional>
#include <string>
#include <vector>
class QNetworkAccessManager;
class ChatAPI;
class ChatAPIWorker : public QObject {
Q_OBJECT
public:
ChatAPIWorker(ChatAPI *chatAPI)
: QObject(nullptr)
, m_ctx(nullptr)
, m_networkManager(nullptr)
, m_chat(chatAPI) {}
virtual ~ChatAPIWorker() {}
QString currentResponse() const { return m_currentResponse; }
void request(const QString &apiKey,
LLModel::PromptContext *promptCtx,
const QByteArray &array);
Q_SIGNALS:
void finished();
private Q_SLOTS:
void handleFinished();
void handleReadyRead();
void handleErrorOccurred(QNetworkReply::NetworkError code);
private:
ChatAPI *m_chat;
LLModel::PromptContext *m_ctx;
QNetworkAccessManager *m_networkManager;
QString m_currentResponse;
};
class ChatAPI : public QObject, public LLModel {
Q_OBJECT
public:
ChatAPI();
virtual ~ChatAPI();
bool supportsEmbedding() const override { return false; }
bool supportsCompletion() const override { return true; }
bool loadModel(const std::string &modelPath, int n_ctx, int ngl) override;
bool isModelLoaded() const override;
size_t requiredMem(const std::string &modelPath, int n_ctx, int ngl) override;
size_t stateSize() const override;
size_t saveState(uint8_t *dest) const override;
size_t restoreState(const uint8_t *src) override;
void prompt(const std::string &prompt,
const std::string &promptTemplate,
std::function<bool(int32_t)> promptCallback,
std::function<bool(int32_t, const std::string&)> responseCallback,
std::function<bool(bool)> recalculateCallback,
PromptContext &ctx,
bool special,
std::string *fakeReply) override;
void setThreadCount(int32_t n_threads) override;
int32_t threadCount() const override;
void setModelName(const QString &modelName) { m_modelName = modelName; }
void setAPIKey(const QString &apiKey) { m_apiKey = apiKey; }
void setRequestURL(const QString &requestURL) { m_requestURL = requestURL; }
QString url() const { return m_requestURL; }
QList<QString> context() const { return m_context; }
void setContext(const QList<QString> &context) { m_context = context; }
bool callResponse(int32_t token, const std::string &string);
Q_SIGNALS:
void request(const QString &apiKey,
LLModel::PromptContext *ctx,
const QByteArray &array);
protected:
// We have to implement these as they are pure virtual in base class, but we don't actually use
// them as they are only called from the default implementation of 'prompt' which we override and
// completely replace
std::vector<Token> tokenize(PromptContext &ctx, const std::string &str, bool special) override {
(void)ctx;
(void)str;
(void)special;
throw std::logic_error("not implemented");
}
std::string tokenToString(Token id) const override {
(void)id;
throw std::logic_error("not implemented");
}
Token sampleToken(PromptContext &ctx) const override {
(void)ctx;
throw std::logic_error("not implemented");
}
bool evalTokens(PromptContext &ctx, const std::vector<int32_t> &tokens) const override {
(void)ctx;
(void)tokens;
throw std::logic_error("not implemented");
}
int32_t contextLength() const override {
throw std::logic_error("not implemented");
}
const std::vector<Token> &endTokens() const override {
throw std::logic_error("not implemented");
}
bool shouldAddBOS() const override {
throw std::logic_error("not implemented");
}
private:
std::function<bool(int32_t, const std::string&)> m_responseCallback;
QString m_modelName;
QString m_apiKey;
QString m_requestURL;
QList<QString> m_context;
QStringList m_queuedPrompts;
};
#endif // CHATAPI_H

View File

@ -1,27 +1,25 @@
#include "chatlistmodel.h" #include "chatlistmodel.h"
#include "database.h" // IWYU pragma: keep
#include "mysettings.h" #include "mysettings.h"
#include <QCoreApplication>
#include <QDataStream> #include <QDataStream>
#include <QDir> #include <QDir>
#include <QElapsedTimer> #include <QElapsedTimer>
#include <QEvent>
#include <QFile> #include <QFile>
#include <QFileInfo> #include <QFileInfo>
#include <QGlobalStatic> #include <QGlobalStatic>
#include <QGuiApplication> #include <QGuiApplication>
#include <QIODevice> #include <QIODevice>
#include <QSettings> #include <QSettings>
#include <QStringList> // IWYU pragma: keep #include <QString>
#include <QStringList>
#include <Qt> #include <Qt>
#include <QtTypes>
#include <algorithm> #include <algorithm>
#define CHAT_FORMAT_MAGIC 0xF5D553CC
static constexpr quint32 CHAT_FORMAT_MAGIC = 0xF5D553CC; #define CHAT_FORMAT_VERSION 9
static constexpr qint32 CHAT_FORMAT_VERSION = 12;
class MyChatListModel: public ChatListModel { }; class MyChatListModel: public ChatListModel { };
Q_GLOBAL_STATIC(MyChatListModel, chatListModelInstance) Q_GLOBAL_STATIC(MyChatListModel, chatListModelInstance)
@ -53,12 +51,6 @@ void ChatListModel::loadChats()
connect(thread, &ChatsRestoreThread::finished, thread, &QObject::deleteLater); connect(thread, &ChatsRestoreThread::finished, thread, &QObject::deleteLater);
thread->start(); thread->start();
m_chatSaver = std::make_unique<ChatSaver>();
connect(this, &ChatListModel::requestSaveChats, m_chatSaver.get(), &ChatSaver::saveChats, Qt::QueuedConnection);
connect(m_chatSaver.get(), &ChatSaver::saveChatsFinished, this, &ChatListModel::saveChatsFinished, Qt::QueuedConnection);
// save chats on application quit
connect(QCoreApplication::instance(), &QCoreApplication::aboutToQuit, this, &ChatListModel::saveChatsSync);
connect(MySettings::globalInstance(), &MySettings::serverChatChanged, this, &ChatListModel::handleServerEnabledChanged); connect(MySettings::globalInstance(), &MySettings::serverChatChanged, this, &ChatListModel::handleServerEnabledChanged);
} }
@ -81,59 +73,33 @@ ChatSaver::ChatSaver()
m_thread.start(); m_thread.start();
} }
ChatSaver::~ChatSaver()
{
m_thread.quit();
m_thread.wait();
}
QVector<Chat *> ChatListModel::getChatsToSave() const
{
QVector<Chat *> toSave;
for (auto *chat : m_chats)
if (chat != m_serverChat && !chat->isNewChat())
toSave << chat;
return toSave;
}
void ChatListModel::saveChats() void ChatListModel::saveChats()
{ {
auto toSave = getChatsToSave(); QVector<Chat*> toSave;
for (Chat *chat : m_chats) {
if (chat == m_serverChat)
continue;
if (chat->isNewChat())
continue;
toSave.append(chat);
}
if (toSave.isEmpty()) { if (toSave.isEmpty()) {
emit saveChatsFinished(); emit saveChatsFinished();
return; return;
} }
ChatSaver *saver = new ChatSaver;
connect(this, &ChatListModel::requestSaveChats, saver, &ChatSaver::saveChats, Qt::QueuedConnection);
connect(saver, &ChatSaver::saveChatsFinished, this, &ChatListModel::saveChatsFinished, Qt::QueuedConnection);
emit requestSaveChats(toSave); emit requestSaveChats(toSave);
} }
void ChatListModel::saveChatsForQuit()
{
saveChats();
m_startedFinalSave = true;
}
void ChatListModel::saveChatsSync()
{
auto toSave = getChatsToSave();
if (!m_startedFinalSave && !toSave.isEmpty())
m_chatSaver->saveChats(toSave);
}
void ChatSaver::saveChats(const QVector<Chat *> &chats) void ChatSaver::saveChats(const QVector<Chat *> &chats)
{ {
// we can be called from the main thread instead of a worker thread at quit time, so take a lock
QMutexLocker locker(&m_mutex);
QElapsedTimer timer; QElapsedTimer timer;
timer.start(); timer.start();
const QString savePath = MySettings::globalInstance()->modelPath(); const QString savePath = MySettings::globalInstance()->modelPath();
qsizetype nSavedChats = 0;
for (Chat *chat : chats) { for (Chat *chat : chats) {
if (!chat->needsSave())
continue;
++nSavedChats;
QString fileName = "gpt4all-" + chat->id() + ".chat"; QString fileName = "gpt4all-" + chat->id() + ".chat";
QString filePath = savePath + "/" + fileName; QString filePath = savePath + "/" + fileName;
QFile originalFile(filePath); QFile originalFile(filePath);
@ -146,8 +112,8 @@ void ChatSaver::saveChats(const QVector<Chat *> &chats)
} }
QDataStream out(&tempFile); QDataStream out(&tempFile);
out << CHAT_FORMAT_MAGIC; out << (quint32)CHAT_FORMAT_MAGIC;
out << CHAT_FORMAT_VERSION; out << (qint32)CHAT_FORMAT_VERSION;
out.setVersion(QDataStream::Qt_6_2); out.setVersion(QDataStream::Qt_6_2);
qDebug() << "serializing chat" << fileName; qDebug() << "serializing chat" << fileName;
@ -157,14 +123,13 @@ void ChatSaver::saveChats(const QVector<Chat *> &chats)
continue; continue;
} }
chat->setNeedsSave(false);
if (originalFile.exists()) if (originalFile.exists())
originalFile.remove(); originalFile.remove();
tempFile.rename(filePath); tempFile.rename(filePath);
} }
qint64 elapsedTime = timer.elapsed(); qint64 elapsedTime = timer.elapsed();
qDebug() << "serializing chats took" << elapsedTime << "ms, saved" << nSavedChats << "/" << chats.size() << "chats"; qDebug() << "serializing chats took:" << elapsedTime << "ms";
emit saveChatsFinished(); emit saveChatsFinished();
} }
@ -229,16 +194,11 @@ void ChatsRestoreThread::run()
qint32 version; qint32 version;
in >> version; in >> version;
if (version < 1) { if (version < 1) {
qWarning() << "WARNING: Chat file version" << version << "is not supported:" << file.fileName(); qWarning() << "ERROR: Chat file has non supported version:" << file.fileName();
continue;
}
if (version > CHAT_FORMAT_VERSION) {
qWarning().nospace() << "WARNING: Chat file is from a future version (have " << version << " want "
<< CHAT_FORMAT_VERSION << "): " << file.fileName();
continue; continue;
} }
if (version < 2) if (version <= 1)
in.setVersion(QDataStream::Qt_6_2); in.setVersion(QDataStream::Qt_6_2);
FileInfo info; FileInfo info;
@ -279,21 +239,18 @@ void ChatsRestoreThread::run()
continue; continue;
} }
if (version < 2) if (version <= 1)
in.setVersion(QDataStream::Qt_6_2); in.setVersion(QDataStream::Qt_6_2);
} }
qDebug() << "deserializing chat" << f.file; qDebug() << "deserializing chat" << f.file;
auto chat = std::make_unique<Chat>(); Chat *chat = new Chat;
chat->moveToThread(qGuiApp->thread()); chat->moveToThread(qGuiApp->thread());
bool ok = chat->deserialize(in, version); if (!chat->deserialize(in, version)) {
if (!ok) {
qWarning() << "ERROR: Couldn't deserialize chat from file:" << file.fileName(); qWarning() << "ERROR: Couldn't deserialize chat from file:" << file.fileName();
} else if (!in.atEnd()) {
qWarning().nospace() << "error loading chat from " << file.fileName() << ": extra data at end of file";
} else { } else {
emit chatRestored(chat.release()); emit chatRestored(chat);
} }
if (f.oldFile) if (f.oldFile)
file.remove(); // No longer storing in this directory file.remove(); // No longer storing in this directory

View File

@ -7,23 +7,16 @@
#include <QAbstractListModel> #include <QAbstractListModel>
#include <QByteArray> #include <QByteArray>
#include <QDate>
#include <QDebug> #include <QDebug>
#include <QHash> #include <QHash>
#include <QList> #include <QList>
#include <QMutex>
#include <QObject> #include <QObject>
#include <QString>
#include <QThread> #include <QThread>
#include <QVariant> #include <QVariant>
#include <QVector> // IWYU pragma: keep #include <QVector>
#include <Qt> #include <Qt>
#include <QtAssert> #include <QtGlobal>
#include <QtLogging> #include <QtLogging>
#include <QtPreprocessorSupport>
#include <memory>
class ChatsRestoreThread : public QThread class ChatsRestoreThread : public QThread
{ {
@ -40,7 +33,7 @@ class ChatSaver : public QObject
Q_OBJECT Q_OBJECT
public: public:
explicit ChatSaver(); explicit ChatSaver();
~ChatSaver() override; void stop();
Q_SIGNALS: Q_SIGNALS:
void saveChatsFinished(); void saveChatsFinished();
@ -50,7 +43,6 @@ public Q_SLOTS:
private: private:
QThread m_thread; QThread m_thread;
QMutex m_mutex;
}; };
class ChatListModel : public QAbstractListModel class ChatListModel : public QAbstractListModel
@ -155,7 +147,7 @@ public:
if (m_serverChat) if (m_serverChat)
return; return;
m_serverChat = new Chat(Chat::server_tag, this); m_serverChat = new Chat(true /*isServer*/, this);
beginInsertRows(QModelIndex(), m_chats.size(), m_chats.size()); beginInsertRows(QModelIndex(), m_chats.size(), m_chats.size());
m_chats.append(m_serverChat); m_chats.append(m_serverChat);
endInsertRows(); endInsertRows();
@ -237,7 +229,6 @@ public:
void removeChatFile(Chat *chat) const; void removeChatFile(Chat *chat) const;
Q_INVOKABLE void saveChats(); Q_INVOKABLE void saveChats();
Q_INVOKABLE void saveChatsForQuit();
void restoreChat(Chat *chat); void restoreChat(Chat *chat);
void chatsRestoredFinished(); void chatsRestoredFinished();
@ -247,6 +238,7 @@ public Q_SLOTS:
Q_SIGNALS: Q_SIGNALS:
void countChanged(); void countChanged();
void currentChatChanged(); void currentChatChanged();
void chatsSavedFinished();
void requestSaveChats(const QVector<Chat*> &); void requestSaveChats(const QVector<Chat*> &);
void saveChatsFinished(); void saveChatsFinished();
@ -254,9 +246,6 @@ protected:
bool eventFilter(QObject *obj, QEvent *ev) override; bool eventFilter(QObject *obj, QEvent *ev) override;
private Q_SLOTS: private Q_SLOTS:
// Used with QCoreApplication::aboutToQuit. Does not require an event loop.
void saveChatsSync();
void newChatCountChanged() void newChatCountChanged()
{ {
Q_ASSERT(m_newChat && m_newChat->chatModel()->count()); Q_ASSERT(m_newChat && m_newChat->chatModel()->count());
@ -287,16 +276,11 @@ private Q_SLOTS:
} }
} }
private:
QVector<Chat *> getChatsToSave() const;
private: private:
Chat* m_newChat = nullptr; Chat* m_newChat = nullptr;
Chat* m_serverChat = nullptr; Chat* m_serverChat = nullptr;
Chat* m_currentChat = nullptr; Chat* m_currentChat = nullptr;
QList<Chat*> m_chats; QList<Chat*> m_chats;
std::unique_ptr<ChatSaver> m_chatSaver;
bool m_startedFinalSave = false;
private: private:
explicit ChatListModel(); explicit ChatListModel();

1366
gpt4all-chat/chatllm.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,94 +1,42 @@
#ifndef CHATLLM_H #ifndef CHATLLM_H
#define CHATLLM_H #define CHATLLM_H
#include "chatmodel.h" #include "database.h" // IWYU pragma: keep
#include "database.h"
#include "modellist.h" #include "modellist.h"
#include <gpt4all-backend/llmodel.h> #include "../gpt4all-backend/llmodel.h"
#include <QByteArray> #include <QByteArray>
#include <QElapsedTimer> #include <QElapsedTimer>
#include <QFileInfo> #include <QFileInfo>
#include <QList> #include <QList>
#include <QObject> #include <QObject>
#include <QPointer> #include <QPair>
#include <QString> #include <QString>
#include <QStringList> // IWYU pragma: keep
#include <QThread> #include <QThread>
#include <QVariantMap> // IWYU pragma: keep #include <QVariantMap>
#include <QtNumeric> #include <QVector>
#include <QtGlobal>
#include <atomic> #include <atomic>
#include <cstdint>
#include <memory> #include <memory>
#include <optional> #include <optional>
#include <span>
#include <string> #include <string>
#include <string_view>
#include <variant>
#include <vector>
using namespace Qt::Literals::StringLiterals; using namespace Qt::Literals::StringLiterals;
class ChatLLM;
class QDataStream; class QDataStream;
// NOTE: values serialized to disk, do not change or reuse // NOTE: values serialized to disk, do not change or reuse
enum class LLModelTypeV0 { // chat versions 2-5 enum LLModelType {
MPT = 0, GPTJ_ = 0, // no longer used
GPTJ = 1, LLAMA_ = 1,
LLAMA = 2, API_ = 2,
CHATGPT = 3, BERT_ = 3, // no longer used
REPLIT = 4,
FALCON = 5,
BERT = 6, // not used
STARCODER = 7,
};
enum class LLModelTypeV1 { // since chat version 6 (v2.5.0)
GPTJ = 0, // not for new chats
LLAMA = 1,
API = 2,
BERT = 3, // not used
// none of the below are used in new chats
REPLIT = 4,
FALCON = 5,
MPT = 6,
STARCODER = 7,
NONE = -1, // no state
}; };
inline LLModelTypeV1 parseLLModelTypeV1(int type) class ChatLLM;
{
switch (LLModelTypeV1(type)) {
case LLModelTypeV1::GPTJ:
case LLModelTypeV1::LLAMA:
case LLModelTypeV1::API:
// case LLModelTypeV1::BERT: -- not used
case LLModelTypeV1::REPLIT:
case LLModelTypeV1::FALCON:
case LLModelTypeV1::MPT:
case LLModelTypeV1::STARCODER:
return LLModelTypeV1(type);
default:
return LLModelTypeV1::NONE;
}
}
inline LLModelTypeV1 parseLLModelTypeV0(int v0)
{
switch (LLModelTypeV0(v0)) {
case LLModelTypeV0::MPT: return LLModelTypeV1::MPT;
case LLModelTypeV0::GPTJ: return LLModelTypeV1::GPTJ;
case LLModelTypeV0::LLAMA: return LLModelTypeV1::LLAMA;
case LLModelTypeV0::CHATGPT: return LLModelTypeV1::API;
case LLModelTypeV0::REPLIT: return LLModelTypeV1::REPLIT;
case LLModelTypeV0::FALCON: return LLModelTypeV1::FALCON;
// case LLModelTypeV0::BERT: -- not used
case LLModelTypeV0::STARCODER: return LLModelTypeV1::STARCODER;
default: return LLModelTypeV1::NONE;
}
}
struct LLModelInfo { struct LLModelInfo {
std::unique_ptr<LLModel> model; std::unique_ptr<LLModel> model;
@ -145,6 +93,7 @@ class Chat;
class ChatLLM : public QObject class ChatLLM : public QObject
{ {
Q_OBJECT Q_OBJECT
Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)
Q_PROPERTY(QString deviceBackend READ deviceBackend NOTIFY loadedModelInfoChanged) Q_PROPERTY(QString deviceBackend READ deviceBackend NOTIFY loadedModelInfoChanged)
Q_PROPERTY(QString device READ device NOTIFY loadedModelInfoChanged) Q_PROPERTY(QString device READ device NOTIFY loadedModelInfoChanged)
Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY loadedModelInfoChanged) Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY loadedModelInfoChanged)
@ -152,14 +101,12 @@ public:
ChatLLM(Chat *parent, bool isServer = false); ChatLLM(Chat *parent, bool isServer = false);
virtual ~ChatLLM(); virtual ~ChatLLM();
static void destroyStore();
static std::optional<std::string> checkJinjaTemplateError(const std::string &source);
void destroy(); void destroy();
static void destroyStore();
bool isModelLoaded() const; bool isModelLoaded() const;
void regenerateResponse(int index); void regenerateResponse();
// used to implement edit functionality void resetResponse();
std::optional<QString> popPrompt(int index); void resetContext();
void stopGenerating() { m_stopGenerating = true; } void stopGenerating() { m_stopGenerating = true; }
@ -169,9 +116,13 @@ public:
void setForceUnloadModel(bool b) { m_forceUnloadModel = b; } void setForceUnloadModel(bool b) { m_forceUnloadModel = b; }
void setMarkedForDeletion(bool b) { m_markedForDeletion = b; } void setMarkedForDeletion(bool b) { m_markedForDeletion = b; }
QString response() const;
ModelInfo modelInfo() const; ModelInfo modelInfo() const;
void setModelInfo(const ModelInfo &info); void setModelInfo(const ModelInfo &info);
bool isRecalc() const { return m_isRecalc; }
void acquireModel(); void acquireModel();
void resetModel(); void resetModel();
@ -196,11 +147,14 @@ public:
return m_llModelInfo.fallbackReason.value_or(u""_s); return m_llModelInfo.fallbackReason.value_or(u""_s);
} }
bool serialize(QDataStream &stream, int version); QString generatedName() const { return QString::fromStdString(m_nameResponse); }
bool deserialize(QDataStream &stream, int version);
bool serialize(QDataStream &stream, int version, bool serializeKV);
bool deserialize(QDataStream &stream, int version, bool deserializeKV, bool discardKV);
void setStateFromText(const QVector<QPair<QString, QString>> &stateFromText) { m_stateFromText = stateFromText; }
public Q_SLOTS: public Q_SLOTS:
void prompt(const QStringList &enabledCollections); bool prompt(const QList<QString> &collectionList, const QString &prompt);
bool loadDefaultModel(); bool loadDefaultModel();
void trySwitchContextOfLoadedModel(const ModelInfo &modelInfo); void trySwitchContextOfLoadedModel(const ModelInfo &modelInfo);
bool loadModel(const ModelInfo &modelInfo); bool loadModel(const ModelInfo &modelInfo);
@ -208,19 +162,22 @@ public Q_SLOTS:
void unloadModel(); void unloadModel();
void reloadModel(); void reloadModel();
void generateName(); void generateName();
void generateQuestions(qint64 elapsed);
void handleChatIdChanged(const QString &id); void handleChatIdChanged(const QString &id);
void handleShouldBeLoadedChanged(); void handleShouldBeLoadedChanged();
void handleThreadStarted(); void handleThreadStarted();
void handleForceMetalChanged(bool forceMetal); void handleForceMetalChanged(bool forceMetal);
void handleDeviceChanged(); void handleDeviceChanged();
void processSystemPrompt();
void processRestoreStateFromText();
Q_SIGNALS: Q_SIGNALS:
void recalcChanged();
void loadedModelInfoChanged(); void loadedModelInfoChanged();
void modelLoadingPercentageChanged(float); void modelLoadingPercentageChanged(float);
void modelLoadingError(const QString &error); void modelLoadingError(const QString &error);
void modelLoadingWarning(const QString &warning); void modelLoadingWarning(const QString &warning);
void responseChanged(); void responseChanged(const QString &response);
void responseFailed();
void promptProcessing(); void promptProcessing();
void generatingQuestions(); void generatingQuestions();
void responseStopped(qint64 promptResponseMs); void responseStopped(qint64 promptResponseMs);
@ -239,53 +196,59 @@ Q_SIGNALS:
void modelInfoChanged(const ModelInfo &modelInfo); void modelInfoChanged(const ModelInfo &modelInfo);
protected: protected:
struct PromptResult { bool promptInternal(const QList<QString> &collectionList, const QString &prompt, const QString &promptTemplate,
QByteArray response; // raw UTF-8 int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty,
int promptTokens; // note: counts *entire* history, even if cached int32_t repeat_penalty_tokens);
int responseTokens; bool handlePrompt(int32_t token);
}; bool handleResponse(int32_t token, const std::string &response);
bool handleRecalculate(bool isRecalc);
bool handleNamePrompt(int32_t token);
bool handleNameResponse(int32_t token, const std::string &response);
bool handleNameRecalculate(bool isRecalc);
bool handleSystemPrompt(int32_t token);
bool handleSystemResponse(int32_t token, const std::string &response);
bool handleSystemRecalculate(bool isRecalc);
bool handleRestoreStateFromTextPrompt(int32_t token);
bool handleRestoreStateFromTextResponse(int32_t token, const std::string &response);
bool handleRestoreStateFromTextRecalculate(bool isRecalc);
bool handleQuestionPrompt(int32_t token);
bool handleQuestionResponse(int32_t token, const std::string &response);
bool handleQuestionRecalculate(bool isRecalc);
void saveState();
void restoreState();
struct ChatPromptResult : PromptResult { protected:
QList<ResultInfo> databaseResults; LLModel::PromptContext m_ctx;
}; quint32 m_promptTokens;
quint32 m_promptResponseTokens;
ChatPromptResult promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
qsizetype startOffset = 0);
// passing a string_view directly skips templating and uses the raw string
PromptResult promptInternal(const std::variant<std::span<const MessageItem>, std::string_view> &prompt,
const LLModel::PromptContext &ctx,
bool usedLocalDocs);
private: private:
bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps); bool loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps);
std::vector<MessageItem> forkConversation(const QString &prompt) const; std::string m_response;
std::string m_nameResponse;
// Applies the Jinja template. Query mode returns only the last message without special tokens. QString m_questionResponse;
// Returns a (# of messages, rendered prompt) pair.
std::string applyJinjaTemplate(std::span<const MessageItem> items) const;
void generateQuestions(qint64 elapsed);
protected:
QPointer<ChatModel> m_chatModel;
private:
const Chat *m_chat;
LLModelInfo m_llModelInfo; LLModelInfo m_llModelInfo;
LLModelTypeV1 m_llModelType = LLModelTypeV1::NONE; LLModelType m_llModelType;
ModelInfo m_modelInfo; ModelInfo m_modelInfo;
TokenTimer *m_timer; TokenTimer *m_timer;
QByteArray m_state;
QThread m_llmThread; QThread m_llmThread;
std::atomic<bool> m_stopGenerating; std::atomic<bool> m_stopGenerating;
std::atomic<bool> m_shouldBeLoaded; std::atomic<bool> m_shouldBeLoaded;
std::atomic<bool> m_isRecalc;
std::atomic<bool> m_forceUnloadModel; std::atomic<bool> m_forceUnloadModel;
std::atomic<bool> m_markedForDeletion; std::atomic<bool> m_markedForDeletion;
bool m_isServer; bool m_isServer;
bool m_forceMetal; bool m_forceMetal;
bool m_reloadingToChangeVariant; bool m_reloadingToChangeVariant;
friend class ChatViewResponseHandler; bool m_processedSystemPrompt;
friend class SimpleResponseHandler; bool m_restoreStateFromText;
// m_pristineLoadedState is set if saveSate is unnecessary, either because:
// - an unload was queued during LLModel::restoreState()
// - the chat will be restored from text and hasn't been interacted with yet
bool m_pristineLoadedState = false;
QVector<QPair<QString, QString>> m_stateFromText;
}; };
#endif // CHATLLM_H #endif // CHATLLM_H

474
gpt4all-chat/chatmodel.h Normal file
View File

@ -0,0 +1,474 @@
#ifndef CHATMODEL_H
#define CHATMODEL_H
#include "database.h"
#include <QAbstractListModel>
#include <QByteArray>
#include <QDataStream>
#include <QHash>
#include <QList>
#include <QObject>
#include <QPair>
#include <QString>
#include <QVariant>
#include <QVector>
#include <Qt>
#include <QtGlobal>
struct ChatItem
{
Q_GADGET
Q_PROPERTY(int id MEMBER id)
Q_PROPERTY(QString name MEMBER name)
Q_PROPERTY(QString value MEMBER value)
Q_PROPERTY(QString prompt MEMBER prompt)
Q_PROPERTY(QString newResponse MEMBER newResponse)
Q_PROPERTY(bool currentResponse MEMBER currentResponse)
Q_PROPERTY(bool stopped MEMBER stopped)
Q_PROPERTY(bool thumbsUpState MEMBER thumbsUpState)
Q_PROPERTY(bool thumbsDownState MEMBER thumbsDownState)
Q_PROPERTY(QList<ResultInfo> sources MEMBER sources)
Q_PROPERTY(QList<ResultInfo> consolidatedSources MEMBER consolidatedSources)
public:
// TODO: Maybe we should include the model name here as well as timestamp?
int id = 0;
QString name;
QString value;
QString prompt;
QString newResponse;
QList<ResultInfo> sources;
QList<ResultInfo> consolidatedSources;
bool currentResponse = false;
bool stopped = false;
bool thumbsUpState = false;
bool thumbsDownState = false;
};
Q_DECLARE_METATYPE(ChatItem)
class ChatModel : public QAbstractListModel
{
Q_OBJECT
Q_PROPERTY(int count READ count NOTIFY countChanged)
public:
explicit ChatModel(QObject *parent = nullptr) : QAbstractListModel(parent) {}
enum Roles {
IdRole = Qt::UserRole + 1,
NameRole,
ValueRole,
PromptRole,
NewResponseRole,
CurrentResponseRole,
StoppedRole,
ThumbsUpStateRole,
ThumbsDownStateRole,
SourcesRole,
ConsolidatedSourcesRole
};
int rowCount(const QModelIndex &parent = QModelIndex()) const override
{
Q_UNUSED(parent)
return m_chatItems.size();
}
QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const override
{
if (!index.isValid() || index.row() < 0 || index.row() >= m_chatItems.size())
return QVariant();
const ChatItem &item = m_chatItems.at(index.row());
switch (role) {
case IdRole:
return item.id;
case NameRole:
return item.name;
case ValueRole:
return item.value;
case PromptRole:
return item.prompt;
case NewResponseRole:
return item.newResponse;
case CurrentResponseRole:
return item.currentResponse;
case StoppedRole:
return item.stopped;
case ThumbsUpStateRole:
return item.thumbsUpState;
case ThumbsDownStateRole:
return item.thumbsDownState;
case SourcesRole:
return QVariant::fromValue(item.sources);
case ConsolidatedSourcesRole:
return QVariant::fromValue(item.consolidatedSources);
}
return QVariant();
}
QHash<int, QByteArray> roleNames() const override
{
QHash<int, QByteArray> roles;
roles[IdRole] = "id";
roles[NameRole] = "name";
roles[ValueRole] = "value";
roles[PromptRole] = "prompt";
roles[NewResponseRole] = "newResponse";
roles[CurrentResponseRole] = "currentResponse";
roles[StoppedRole] = "stopped";
roles[ThumbsUpStateRole] = "thumbsUpState";
roles[ThumbsDownStateRole] = "thumbsDownState";
roles[SourcesRole] = "sources";
roles[ConsolidatedSourcesRole] = "consolidatedSources";
return roles;
}
void appendPrompt(const QString &name, const QString &value)
{
ChatItem item;
item.name = name;
item.value = value;
beginInsertRows(QModelIndex(), m_chatItems.size(), m_chatItems.size());
m_chatItems.append(item);
endInsertRows();
emit countChanged();
}
void appendResponse(const QString &name, const QString &prompt)
{
ChatItem item;
item.id = m_chatItems.count(); // This is only relevant for responses
item.name = name;
item.prompt = prompt;
item.currentResponse = true;
beginInsertRows(QModelIndex(), m_chatItems.size(), m_chatItems.size());
m_chatItems.append(item);
endInsertRows();
emit countChanged();
}
Q_INVOKABLE void clear()
{
if (m_chatItems.isEmpty()) return;
beginResetModel();
m_chatItems.clear();
endResetModel();
emit countChanged();
}
Q_INVOKABLE ChatItem get(int index)
{
if (index < 0 || index >= m_chatItems.size()) return ChatItem();
return m_chatItems.at(index);
}
Q_INVOKABLE void updateCurrentResponse(int index, bool b)
{
if (index < 0 || index >= m_chatItems.size()) return;
ChatItem &item = m_chatItems[index];
if (item.currentResponse != b) {
item.currentResponse = b;
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {CurrentResponseRole});
}
}
Q_INVOKABLE void updateStopped(int index, bool b)
{
if (index < 0 || index >= m_chatItems.size()) return;
ChatItem &item = m_chatItems[index];
if (item.stopped != b) {
item.stopped = b;
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {StoppedRole});
}
}
Q_INVOKABLE void updateValue(int index, const QString &value)
{
if (index < 0 || index >= m_chatItems.size()) return;
ChatItem &item = m_chatItems[index];
if (item.value != value) {
item.value = value;
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {ValueRole});
emit valueChanged(index, value);
}
}
QList<ResultInfo> consolidateSources(const QList<ResultInfo> &sources) {
QMap<QString, ResultInfo> groupedData;
for (const ResultInfo &info : sources) {
if (groupedData.contains(info.file)) {
groupedData[info.file].text += "\n---\n" + info.text;
} else {
groupedData[info.file] = info;
}
}
QList<ResultInfo> consolidatedSources = groupedData.values();
return consolidatedSources;
}
Q_INVOKABLE void updateSources(int index, const QList<ResultInfo> &sources)
{
if (index < 0 || index >= m_chatItems.size()) return;
ChatItem &item = m_chatItems[index];
item.sources = sources;
item.consolidatedSources = consolidateSources(sources);
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {SourcesRole});
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {ConsolidatedSourcesRole});
}
Q_INVOKABLE void updateThumbsUpState(int index, bool b)
{
if (index < 0 || index >= m_chatItems.size()) return;
ChatItem &item = m_chatItems[index];
if (item.thumbsUpState != b) {
item.thumbsUpState = b;
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {ThumbsUpStateRole});
}
}
Q_INVOKABLE void updateThumbsDownState(int index, bool b)
{
if (index < 0 || index >= m_chatItems.size()) return;
ChatItem &item = m_chatItems[index];
if (item.thumbsDownState != b) {
item.thumbsDownState = b;
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {ThumbsDownStateRole});
}
}
Q_INVOKABLE void updateNewResponse(int index, const QString &newResponse)
{
if (index < 0 || index >= m_chatItems.size()) return;
ChatItem &item = m_chatItems[index];
if (item.newResponse != newResponse) {
item.newResponse = newResponse;
emit dataChanged(createIndex(index, 0), createIndex(index, 0), {NewResponseRole});
}
}
int count() const { return m_chatItems.size(); }
bool serialize(QDataStream &stream, int version) const
{
stream << count();
for (const auto &c : m_chatItems) {
stream << c.id;
stream << c.name;
stream << c.value;
stream << c.prompt;
stream << c.newResponse;
stream << c.currentResponse;
stream << c.stopped;
stream << c.thumbsUpState;
stream << c.thumbsDownState;
if (version > 7) {
stream << c.sources.size();
for (const ResultInfo &info : c.sources) {
Q_ASSERT(!info.file.isEmpty());
stream << info.collection;
stream << info.path;
stream << info.file;
stream << info.title;
stream << info.author;
stream << info.date;
stream << info.text;
stream << info.page;
stream << info.from;
stream << info.to;
}
} else if (version > 2) {
QList<QString> references;
QList<QString> referencesContext;
int validReferenceNumber = 1;
for (const ResultInfo &info : c.sources) {
if (info.file.isEmpty())
continue;
QString reference;
{
QTextStream stream(&reference);
stream << (validReferenceNumber++) << ". ";
if (!info.title.isEmpty())
stream << "\"" << info.title << "\". ";
if (!info.author.isEmpty())
stream << "By " << info.author << ". ";
if (!info.date.isEmpty())
stream << "Date: " << info.date << ". ";
stream << "In " << info.file << ". ";
if (info.page != -1)
stream << "Page " << info.page << ". ";
if (info.from != -1) {
stream << "Lines " << info.from;
if (info.to != -1)
stream << "-" << info.to;
stream << ". ";
}
stream << "[Context](context://" << validReferenceNumber - 1 << ")";
}
references.append(reference);
referencesContext.append(info.text);
}
stream << references.join("\n");
stream << referencesContext;
}
}
return stream.status() == QDataStream::Ok;
}
bool deserialize(QDataStream &stream, int version)
{
int size;
stream >> size;
for (int i = 0; i < size; ++i) {
ChatItem c;
stream >> c.id;
stream >> c.name;
stream >> c.value;
stream >> c.prompt;
stream >> c.newResponse;
stream >> c.currentResponse;
stream >> c.stopped;
stream >> c.thumbsUpState;
stream >> c.thumbsDownState;
if (version > 7) {
qsizetype count;
stream >> count;
QList<ResultInfo> sources;
for (int i = 0; i < count; ++i) {
ResultInfo info;
stream >> info.collection;
stream >> info.path;
stream >> info.file;
stream >> info.title;
stream >> info.author;
stream >> info.date;
stream >> info.text;
stream >> info.page;
stream >> info.from;
stream >> info.to;
sources.append(info);
}
c.sources = sources;
c.consolidatedSources = consolidateSources(sources);
}else if (version > 2) {
QString references;
QList<QString> referencesContext;
stream >> references;
stream >> referencesContext;
if (!references.isEmpty()) {
QList<ResultInfo> sources;
QList<QString> referenceList = references.split("\n");
// Ignore empty lines and those that begin with "---" which is no longer used
for (auto it = referenceList.begin(); it != referenceList.end();) {
if (it->trimmed().isEmpty() || it->trimmed().startsWith("---"))
it = referenceList.erase(it);
else
++it;
}
Q_ASSERT(referenceList.size() == referencesContext.size());
for (int j = 0; j < referenceList.size(); ++j) {
QString reference = referenceList[j];
QString context = referencesContext[j];
ResultInfo info;
QTextStream refStream(&reference);
QString dummy;
int validReferenceNumber;
refStream >> validReferenceNumber >> dummy;
// Extract title (between quotes)
if (reference.contains("\"")) {
int startIndex = reference.indexOf('"') + 1;
int endIndex = reference.indexOf('"', startIndex);
info.title = reference.mid(startIndex, endIndex - startIndex);
}
// Extract author (after "By " and before the next period)
if (reference.contains("By ")) {
int startIndex = reference.indexOf("By ") + 3;
int endIndex = reference.indexOf('.', startIndex);
info.author = reference.mid(startIndex, endIndex - startIndex).trimmed();
}
// Extract date (after "Date: " and before the next period)
if (reference.contains("Date: ")) {
int startIndex = reference.indexOf("Date: ") + 6;
int endIndex = reference.indexOf('.', startIndex);
info.date = reference.mid(startIndex, endIndex - startIndex).trimmed();
}
// Extract file name (after "In " and before the "[Context]")
if (reference.contains("In ") && reference.contains(". [Context]")) {
int startIndex = reference.indexOf("In ") + 3;
int endIndex = reference.indexOf(". [Context]", startIndex);
info.file = reference.mid(startIndex, endIndex - startIndex).trimmed();
}
// Extract page number (after "Page " and before the next space)
if (reference.contains("Page ")) {
int startIndex = reference.indexOf("Page ") + 5;
int endIndex = reference.indexOf(' ', startIndex);
if (endIndex == -1) endIndex = reference.length();
info.page = reference.mid(startIndex, endIndex - startIndex).toInt();
}
// Extract lines (after "Lines " and before the next space or hyphen)
if (reference.contains("Lines ")) {
int startIndex = reference.indexOf("Lines ") + 6;
int endIndex = reference.indexOf(' ', startIndex);
if (endIndex == -1) endIndex = reference.length();
int hyphenIndex = reference.indexOf('-', startIndex);
if (hyphenIndex != -1 && hyphenIndex < endIndex) {
info.from = reference.mid(startIndex, hyphenIndex - startIndex).toInt();
info.to = reference.mid(hyphenIndex + 1, endIndex - hyphenIndex - 1).toInt();
} else {
info.from = reference.mid(startIndex, endIndex - startIndex).toInt();
}
}
info.text = context;
sources.append(info);
}
c.sources = sources;
c.consolidatedSources = consolidateSources(sources);
}
}
beginInsertRows(QModelIndex(), m_chatItems.size(), m_chatItems.size());
m_chatItems.append(c);
endInsertRows();
}
emit countChanged();
return stream.status() == QDataStream::Ok;
}
QVector<QPair<QString, QString>> text() const
{
QVector<QPair<QString, QString>> result;
for (const auto &c : m_chatItems)
result << qMakePair(c.name, c.value);
return result;
}
Q_SIGNALS:
void countChanged();
void valueChanged(int index, const QString &value);
private:
QList<ChatItem> m_chatItems;
};
#endif // CHATMODEL_H

View File

@ -1,32 +1,29 @@
#include "chatviewtextprocessor.h" #include "chatviewtextprocessor.h"
#include <QAbstractTextDocumentLayout>
#include <QBrush> #include <QBrush>
#include <QChar> #include <QChar>
#include <QClipboard> #include <QClipboard>
#include <QDebug>
#include <QFlag>
#include <QFont> #include <QFont>
#include <QFontMetricsF>
#include <QGuiApplication> #include <QGuiApplication>
#include <QList> // IWYU pragma: keep #include <QList>
#include <QPair> #include <QPainter>
#include <QQuickTextDocument> #include <QQuickTextDocument>
#include <QRegularExpression> #include <QRegularExpression>
#include <QStringList> // IWYU pragma: keep #include <QStringList>
#include <QTextBlock> // IWYU pragma: keep #include <QTextBlock>
#include <QTextCharFormat> // IWYU pragma: keep #include <QTextCharFormat>
#include <QTextCursor> #include <QTextCursor>
#include <QTextDocument> #include <QTextDocument>
#include <QTextDocumentFragment> #include <QTextDocumentFragment>
#include <QTextFrame> // IWYU pragma: keep #include <QTextFrame>
#include <QTextFrameFormat> // IWYU pragma: keep #include <QTextFrameFormat>
#include <QTextTableCell> #include <QTextTableCell>
#include <QtAssert> #include <QVariant>
#include <QtLogging> #include <Qt>
#include <QtGlobal>
#include <algorithm> #include <algorithm>
#include <utility>
enum Language { enum Language {
None, None,
@ -741,7 +738,7 @@ void SyntaxHighlighter::highlightBlock(const QString &text)
case Java: case Java:
rules = javaHighlightingRules(); break; rules = javaHighlightingRules(); break;
case Go: case Go:
rules = goHighlightingRules(); break; rules = javaHighlightingRules(); break;
case Json: case Json:
rules = jsonHighlightingRules(); break; rules = jsonHighlightingRules(); break;
case Latex: case Latex:
@ -970,6 +967,8 @@ void ChatViewTextProcessor::handleCodeBlocks()
cursor.setPosition(matchesCode[index].capturedEnd(), QTextCursor::KeepAnchor); cursor.setPosition(matchesCode[index].capturedEnd(), QTextCursor::KeepAnchor);
cursor.removeSelectedText(); cursor.removeSelectedText();
int startPos = cursor.position();
QTextFrameFormat frameFormat = frameFormatBase; QTextFrameFormat frameFormat = frameFormatBase;
QString capturedText = matchesCode[index].captured(1); QString capturedText = matchesCode[index].captured(1);
QString codeLanguage; QString codeLanguage;
@ -1005,7 +1004,7 @@ void ChatViewTextProcessor::handleCodeBlocks()
QTextFrame *mainFrame = cursor.currentFrame(); QTextFrame *mainFrame = cursor.currentFrame();
cursor.setCharFormat(textFormat); cursor.setCharFormat(textFormat);
cursor.insertFrame(frameFormat); QTextFrame *frame = cursor.insertFrame(frameFormat);
QTextTable *table = cursor.insertTable(codeLanguage.isEmpty() ? 1 : 2, 1, tableFormat); QTextTable *table = cursor.insertTable(codeLanguage.isEmpty() ? 1 : 2, 1, tableFormat);
if (!codeLanguage.isEmpty()) { if (!codeLanguage.isEmpty()) {
@ -1017,6 +1016,7 @@ void ChatViewTextProcessor::handleCodeBlocks()
headerCursor.insertText(codeLanguage); headerCursor.insertText(codeLanguage);
QTextTableCell copy = headerTable->cellAt(0, 1); QTextTableCell copy = headerTable->cellAt(0, 1);
QTextCursor copyCursor = copy.firstCursorPosition(); QTextCursor copyCursor = copy.firstCursorPosition();
int startPos = copyCursor.position();
CodeCopy newCopy; CodeCopy newCopy;
newCopy.text = lines.join("\n"); newCopy.text = lines.join("\n");
newCopy.startPos = copyCursor.position(); newCopy.startPos = copyCursor.position();

View File

@ -3,15 +3,18 @@
#include <QColor> #include <QColor>
#include <QObject> #include <QObject>
#include <QQmlEngine> // IWYU pragma: keep #include <QQmlEngine>
#include <QQuickTextDocument> #include <QQuickTextDocument> // IWYU pragma: keep
#include <QRectF>
#include <QSizeF>
#include <QString> #include <QString>
#include <QSyntaxHighlighter> #include <QSyntaxHighlighter>
#include <QVector> // IWYU pragma: keep #include <QTextObjectInterface>
#include <QtTypes> #include <QVector>
// IWYU pragma: no_forward_declare QQuickTextDocument
class QPainter;
class QTextDocument;
class QTextFormat;
struct CodeColors { struct CodeColors {
Q_GADGET Q_GADGET

View File

@ -3,7 +3,7 @@ function(sign_target_windows tgt)
add_custom_command(TARGET ${tgt} add_custom_command(TARGET ${tgt}
POST_BUILD POST_BUILD
COMMAND AzureSignTool.exe sign COMMAND AzureSignTool.exe sign
-du "https://www.nomic.ai/gpt4all" -du "https://gpt4all.io/index.html"
-kvu https://gpt4all.vault.azure.net -kvu https://gpt4all.vault.azure.net
-kvi "$Env{AZSignGUID}" -kvi "$Env{AZSignGUID}"
-kvs "$Env{AZSignPWD}" -kvs "$Env{AZSignPWD}"

View File

@ -0,0 +1,6 @@
#ifndef CONFIG_H
#define CONFIG_H
#define APP_VERSION "@APP_VERSION@"
#endif // CONFIG_H

View File

@ -1,2 +0,0 @@
set(OUTPUT_DIR "@CMAKE_BINARY_DIR@")
file(COPY ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/config DESTINATION ${OUTPUT_DIR}/cpack-config)

View File

@ -1,50 +0,0 @@
set(COMPONENT_NAME_MAIN "gpt4all")
set(CPACK_GENERATOR "IFW")
set(CPACK_VERBATIM_VARIABLES YES)
set(CPACK_IFW_VERBOSE ON)
if (CMAKE_SYSTEM_NAME MATCHES Linux)
set(CPACK_IFW_ROOT "~/Qt/Tools/QtInstallerFramework/4.6")
set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-linux")
set(CPACK_IFW_TARGET_DIRECTORY "@HomeDir@/${COMPONENT_NAME_MAIN}")
elseif (CMAKE_SYSTEM_NAME MATCHES Windows)
set(CPACK_IFW_ROOT "C:/Qt/Tools/QtInstallerFramework/4.6")
set(CPACK_IFW_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.ico")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|amd64)$")
set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-win64")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$")
set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-win64-arm")
else()
message(FATAL_ERROR "Unrecognized processor: ${CMAKE_SYSTEM_PROCESSOR}")
endif()
set(CPACK_IFW_TARGET_DIRECTORY "@HomeDir@\\${COMPONENT_NAME_MAIN}")
elseif (CMAKE_SYSTEM_NAME MATCHES Darwin)
set(CPACK_IFW_ROOT "~/Qt/Tools/QtInstallerFramework/4.6")
set(CPACK_IFW_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.icns")
set(CPACK_PACKAGE_FILE_NAME "${COMPONENT_NAME_MAIN}-installer-darwin")
set(CPACK_IFW_TARGET_DIRECTORY "@ApplicationsDir@/${COMPONENT_NAME_MAIN}")
endif()
set(CPACK_COMPONENTS_ALL ${COMPONENT_NAME_MAIN}) # exclude development components
if (APPLE AND GPT4ALL_SIGN_INSTALL)
list(APPEND CPACK_COMPONENTS_ALL maintenancetool)
endif()
set(CPACK_PACKAGE_INSTALL_DIRECTORY ${COMPONENT_NAME_MAIN})
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
set(CPACK_PACKAGE_HOMEPAGE_URL "https://www.nomic.ai/gpt4all")
set(CPACK_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-48.png")
set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE)
set(CPACK_PACKAGE_EXECUTABLES "GPT4All")
set(CPACK_CREATE_DESKTOP_LINKS "GPT4All")
set(CPACK_IFW_PACKAGE_NAME "GPT4All")
set(CPACK_IFW_PACKAGE_TITLE "GPT4All Installer")
set(CPACK_IFW_PACKAGE_PUBLISHER "Nomic, Inc.")
set(CPACK_IFW_PRODUCT_URL "https://www.nomic.ai/gpt4all")
set(CPACK_IFW_PACKAGE_WIZARD_STYLE "Aero")
set(CPACK_IFW_PACKAGE_LOGO "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-48.png")
set(CPACK_IFW_PACKAGE_WINDOW_ICON "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-32.png")
set(CPACK_IFW_PACKAGE_WIZARD_SHOW_PAGE_LIST OFF)
set(CPACK_IFW_PACKAGE_CONTROL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/cmake/installer_control.qs")

View File

@ -1,26 +1,17 @@
set(MACDEPLOYQT "@MACDEPLOYQT@") set(MACDEPLOYQT "@MACDEPLOYQT@")
set(COMPONENT_NAME_MAIN "@COMPONENT_NAME_MAIN@") set(COMPONENT_NAME_MAIN "@COMPONENT_NAME_MAIN@")
set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@") set(CMAKE_CURRENT_SOURCE_DIR "@CMAKE_CURRENT_SOURCE_DIR@")
set(GPT4ALL_SIGN_INSTALL "@GPT4ALL_SIGN_INSTALL@")
set(GPT4ALL_SIGNING_ID "@MAC_SIGNING_IDENTITY@") set(GPT4ALL_SIGNING_ID "@MAC_SIGNING_IDENTITY@")
set(CPACK_CONFIG_DIR "@CMAKE_BINARY_DIR@") execute_process(COMMAND ${MACDEPLOYQT} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app -qmldir=${CMAKE_CURRENT_SOURCE_DIR} -verbose=2 -sign-for-notarization=${GPT4ALL_SIGNING_ID})
if (GPT4ALL_SIGN_INSTALL) file(GLOB MYLLAMALIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllama*)
set(MAC_NOTARIZE -sign-for-notarization=${GPT4ALL_SIGNING_ID}) file(GLOB MYLLMODELLIBS ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/lib/libllmodel.*)
endif() file(COPY ${MYLLAMALIBS}
execute_process(COMMAND ${MACDEPLOYQT} ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app -qmldir=${CMAKE_CURRENT_SOURCE_DIR} -verbose=2 ${MAC_NOTARIZE}) DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY ${MYLLMODELLIBS}
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data/bin/gpt4all.app/Contents/Frameworks)
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-32.png" file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-32.png"
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data) DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data)
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-48.png" file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/icons/gpt4all-48.png"
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data) DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data)
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.icns" file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/resources/gpt4all.icns"
DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data) DESTINATION ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/${COMPONENT_NAME_MAIN}/data)
if (GPT4ALL_SIGN_INSTALL)
# Create signed MaintenanceTool
set(MT_DATA_DIR ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/packages/maintenancetool/data)
file(MAKE_DIRECTORY ${MT_DATA_DIR})
execute_process(
COMMAND binarycreator --config ${CPACK_CONFIG_DIR}/cpack-config/config/config.xml --create-maintenancetool --sign ${GPT4ALL_SIGNING_ID}
WORKING_DIRECTORY ${MT_DATA_DIR}
)
endif()

View File

@ -1,12 +0,0 @@
if(NOT DEFINED URL OR NOT DEFINED OUTPUT_PATH OR NOT DEFINED EXPECTED_MD5)
message(FATAL_ERROR "Usage: cmake -DURL=<url> -DOUTPUT_PATH=<path> -DEXPECTED_MD5=<md5> -P download_model.cmake")
endif()
message(STATUS "Downloading model from ${URL} to ${OUTPUT_PATH} ...")
file(DOWNLOAD "${URL}" "${OUTPUT_PATH}" EXPECTED_MD5 "${EXPECTED_MD5}" STATUS status)
list(GET status 0 status_code)
if(NOT status_code EQUAL 0)
message(FATAL_ERROR "Failed to download model: ${status}")
endif()

View File

@ -1,44 +0,0 @@
var finishedText = null;
function cancelInstaller(message) {
installer.setDefaultPageVisible(QInstaller.Introduction, false);
installer.setDefaultPageVisible(QInstaller.TargetDirectory, false);
installer.setDefaultPageVisible(QInstaller.ComponentSelection, false);
installer.setDefaultPageVisible(QInstaller.ReadyForInstallation, false);
installer.setDefaultPageVisible(QInstaller.StartMenuSelection, false);
installer.setDefaultPageVisible(QInstaller.PerformInstallation, false);
installer.setDefaultPageVisible(QInstaller.LicenseCheck, false);
finishedText = message;
installer.setCanceled();
}
function vercmp(a, b) {
return a.localeCompare(b, undefined, { numeric: true, sensitivity: "base" });
}
function Controller() {
}
Controller.prototype.TargetDirectoryPageCallback = function() {
var failedReq = null;
if (systemInfo.productType === "ubuntu" && vercmp(systemInfo.productVersion, "22.04") < 0) {
failedReq = "Ubuntu 22.04 LTS";
} else if (systemInfo.productType === "macos" && vercmp(systemInfo.productVersion, "12.6") < 0) {
failedReq = "macOS Monterey 12.6";
}
if (failedReq !== null) {
cancelInstaller(
"Installation cannot continue because GPT4All does not support your operating system: " +
`${systemInfo.prettyProductName}<br/><br/>` +
`GPT4All requires ${failedReq} or newer.`
);
}
}
Controller.prototype.FinishedPageCallback = function() {
const widget = gui.currentPageWidget();
if (widget != null && finishedText != null) {
widget.MessageLabel.setText(finishedText);
}
}

View File

@ -1,19 +0,0 @@
function Component()
{
component.ifwVersion = installer.value("FrameworkVersion");
installer.installationStarted.connect(this, Component.prototype.onInstallationStarted);
}
Component.prototype.onInstallationStarted = function()
{
if (component.updateRequested() || component.installationRequested()) {
if (installer.value("os") == "win") {
component.installerbaseBinaryPath = "@TargetDir@/installerbase.exe";
} else if (installer.value("os") == "x11") {
component.installerbaseBinaryPath = "@TargetDir@/installerbase";
} else if (installer.value("os") == "mac") {
component.installerbaseBinaryPath = "@TargetDir@/MaintenanceTool.app";
}
installer.setInstallerBaseBinary(component.installerbaseBinaryPath);
}
}

View File

@ -6,7 +6,8 @@ Component.prototype.beginInstallation = function() {
targetDirectory = installer.value("TargetDir"); targetDirectory = installer.value("TargetDir");
}; };
Component.prototype.createOperations = function() { Component.prototype.createOperations = function()
{
try { try {
// call the base create operations function // call the base create operations function
component.createOperations(); component.createOperations();
@ -29,7 +30,7 @@ Component.prototype.createOperations = function() {
"workingDirectory=" + targetDirectory + "/bin", "workingDirectory=" + targetDirectory + "/bin",
"iconPath=" + targetDirectory + "/gpt4all.ico", "iconPath=" + targetDirectory + "/gpt4all.ico",
"iconId=0", "description=Open GPT4All"); "iconId=0", "description=Open GPT4All");
} else if (systemInfo.productType === "macos") { } else if (systemInfo.productType === "macos" || systemInfo.productType === "osx") {
var gpt4allAppPath = targetDirectory + "/bin/gpt4all.app"; var gpt4allAppPath = targetDirectory + "/bin/gpt4all.app";
var symlinkPath = targetDirectory + "/../GPT4All.app"; var symlinkPath = targetDirectory + "/../GPT4All.app";
// Remove the symlink if it already exists // Remove the symlink if it already exists
@ -55,7 +56,7 @@ Component.prototype.createOperationsForArchive = function(archive)
{ {
component.createOperationsForArchive(archive); component.createOperationsForArchive(archive);
if (systemInfo.productType === "macos") { if (systemInfo.productType === "macos" || systemInfo.productType === "osx") {
var uninstallTargetDirectory = installer.value("TargetDir"); var uninstallTargetDirectory = installer.value("TargetDir");
var symlinkPath = uninstallTargetDirectory + "/../GPT4All.app"; var symlinkPath = uninstallTargetDirectory + "/../GPT4All.app";

File diff suppressed because it is too large Load Diff

View File

@ -1,76 +1,52 @@
#ifndef DATABASE_H #ifndef DATABASE_H
#define DATABASE_H #define DATABASE_H
#include "embllm.h" #include "embllm.h" // IWYU pragma: keep
#include <QByteArray>
#include <QChar>
#include <QDateTime> #include <QDateTime>
#include <QElapsedTimer>
#include <QFileInfo> #include <QFileInfo>
#include <QHash> #include <QHash>
#include <QLatin1String> #include <QLatin1String>
#include <QList> #include <QList>
#include <QMap>
#include <QObject> #include <QObject>
#include <QQueue>
#include <QSet> #include <QSet>
#include <QSqlDatabase> #include <QSqlDatabase>
#include <QString> #include <QString>
#include <QStringList> // IWYU pragma: keep #include <QStringList>
#include <QThread> #include <QThread>
#include <QUrl> #include <QUrl>
#include <QVector> // IWYU pragma: keep #include <QVector>
#include <QtAssert>
#include <atomic>
#include <cstddef> #include <cstddef>
#include <list>
#include <map>
#include <memory>
#include <optional>
#include <utility>
#include <vector> // IWYU pragma: keep
using namespace Qt::Literals::StringLiterals; using namespace Qt::Literals::StringLiterals;
class Database;
class DocumentReader;
class QFileSystemWatcher; class QFileSystemWatcher;
class QSqlQuery; class QSqlError;
class QTextStream; class QTextStream;
class QTimer; class QTimer;
/* Version 0: GPT4All v2.4.3, full-text search /* Version 0: GPT4All v2.4.3, full-text search
* Version 1: GPT4All v2.5.3, embeddings in hsnwlib * Version 1: GPT4All v2.5.3, embeddings in hsnwlib
* Version 2: GPT4All v3.0.0, embeddings in sqlite * Version 2: GPT4All v3.0.0, embeddings in sqlite */
* Version 3: GPT4All v3.4.0, hybrid search
*/
// minimum supported version // minimum supported version
static const int LOCALDOCS_MIN_VER = 1; static const int LOCALDOCS_MIN_VER = 1;
// FIXME: (Adam) The next time we bump the version we should add triggers to manage the fts external
// content table as recommended in the official documentation to keep the fts index in sync
// See: https://www.sqlite.org/fts5.html#external_content_tables
// FIXME: (Adam) The fts virtual table should include the chunk_id explicitly instead of relying upon
// the id of the two tables to be in sync
// current version // current version
static const int LOCALDOCS_VERSION = 3; static const int LOCALDOCS_VERSION = 2;
struct DocumentInfo struct DocumentInfo
{ {
using key_type = std::pair<int, QString>; int folder;
QFileInfo doc;
int folder; int currentPage = 0;
QFileInfo file; size_t currentPosition = 0;
bool currentlyProcessing = false; bool currentlyProcessing = false;
bool isPdf() const {
key_type key() const { return {folder, file.canonicalFilePath()}; } // for comparison return doc.suffix().compare(u"pdf"_s, Qt::CaseInsensitive) == 0;
}
bool isPdf () const { return !file.suffix().compare("pdf"_L1, Qt::CaseInsensitive); }
bool isDocx() const { return !file.suffix().compare("docx"_L1, Qt::CaseInsensitive); }
}; };
struct ResultInfo { struct ResultInfo {
@ -165,36 +141,6 @@ struct CollectionItem {
}; };
Q_DECLARE_METATYPE(CollectionItem) Q_DECLARE_METATYPE(CollectionItem)
class ChunkStreamer {
public:
enum class Status { DOC_COMPLETE, INTERRUPTED, ERROR, BINARY_SEEN };
explicit ChunkStreamer(Database *database);
~ChunkStreamer();
void setDocument(DocumentInfo doc, int documentId, const QString &embeddingModel);
std::optional<DocumentInfo::key_type> currentDocKey() const;
void reset();
Status step();
private:
Database *m_database;
std::optional<DocumentInfo::key_type> m_docKey;
std::unique_ptr<DocumentReader> m_reader; // may be invalid, always compare key first
int m_documentId;
QString m_embeddingModel;
QString m_title;
QString m_author;
QString m_subject;
QString m_keywords;
// working state
QString m_chunk; // has a trailing space for convenience
int m_nChunkWords = 0;
int m_page = 0;
};
class Database : public QObject class Database : public QObject
{ {
Q_OBJECT Q_OBJECT
@ -206,7 +152,6 @@ public:
public Q_SLOTS: public Q_SLOTS:
void start(); void start();
bool scanQueueInterrupted() const;
void scanQueueBatch(); void scanQueueBatch();
void scanDocuments(int folder_id, const QString &folder_path); void scanDocuments(int folder_id, const QString &folder_path);
void forceIndexing(const QString &collection, const QString &embedding_model); void forceIndexing(const QString &collection, const QString &embedding_model);
@ -236,12 +181,6 @@ private:
void commit(); void commit();
void rollback(); void rollback();
bool addChunk(QSqlQuery &q, int document_id, const QString &chunk_text, const QString &file,
const QString &title, const QString &author, const QString &subject, const QString &keywords,
int page, int from, int to, int words, int *chunk_id);
bool refreshDocumentIdCache(QSqlQuery &q);
bool removeChunksByDocumentId(QSqlQuery &q, int document_id);
bool sqlRemoveDocsByFolderPath(QSqlQuery &q, const QString &path);
bool hasContent(); bool hasContent();
// not found -> 0, , exists and has content -> 1, error -> -1 // not found -> 0, , exists and has content -> 1, error -> -1
int openDatabase(const QString &modelPath, bool create = true, int ver = LOCALDOCS_VERSION); int openDatabase(const QString &modelPath, bool create = true, int ver = LOCALDOCS_VERSION);
@ -255,35 +194,19 @@ private:
void appendChunk(const EmbeddingChunk &chunk); void appendChunk(const EmbeddingChunk &chunk);
void sendChunkList(); void sendChunkList();
void updateFolderToIndex(int folder_id, size_t countForFolder, bool sendChunks = true); void updateFolderToIndex(int folder_id, size_t countForFolder, bool sendChunks = true);
void handleDocumentError(const QString &errorMessage,
int document_id, const QString &document_path, const QSqlError &error);
size_t countOfDocuments(int folder_id) const; size_t countOfDocuments(int folder_id) const;
size_t countOfBytes(int folder_id) const; size_t countOfBytes(int folder_id) const;
DocumentInfo dequeueDocument(); DocumentInfo dequeueDocument();
void removeFolderFromDocumentQueue(int folder_id); void removeFolderFromDocumentQueue(int folder_id);
void enqueueDocumentInternal(DocumentInfo &&info, bool prepend = false); void enqueueDocumentInternal(const DocumentInfo &info, bool prepend = false);
void enqueueDocuments(int folder_id, std::list<DocumentInfo> &&infos); void enqueueDocuments(int folder_id, const QVector<DocumentInfo> &infos);
void scanQueue(); void scanQueue();
bool ftsIntegrityCheck();
bool cleanDB(); bool cleanDB();
void addFolderToWatch(const QString &path); void addFolderToWatch(const QString &path);
void removeFolderFromWatch(const QString &path); void removeFolderFromWatch(const QString &path);
static QList<int> searchEmbeddingsHelper(const std::vector<float> &query, QSqlQuery &q, int nNeighbors); QList<int> searchEmbeddings(const std::vector<float> &query, const QList<QString> &collections, int nNeighbors);
QList<int> searchEmbeddings(const std::vector<float> &query, const QList<QString> &collections,
int nNeighbors);
struct BM25Query {
QString input;
QString query;
bool isExact = false;
int qlength = 0;
int ilength = 0;
int rlength = 0;
};
QList<Database::BM25Query> queriesForFTS5(const QString &input);
QList<int> searchBM25(const QString &query, const QList<QString> &collections, BM25Query &bm25q, int k);
QList<int> scoreChunks(const std::vector<float> &query, const QList<int> &chunks);
float computeBM25Weight(const BM25Query &bm25q);
QList<int> reciprocalRankFusion(const std::vector<float> &query, const QList<int> &embeddingResults,
const QList<int> &bm25Results, const BM25Query &bm25q, int k);
QList<int> searchDatabase(const QString &query, const QList<QString> &collections, int k);
void setStartUpdateTime(CollectionItem &item); void setStartUpdateTime(CollectionItem &item);
void setLastUpdateTime(CollectionItem &item); void setLastUpdateTime(CollectionItem &item);
@ -300,9 +223,8 @@ private:
QSqlDatabase m_db; QSqlDatabase m_db;
int m_chunkSize; int m_chunkSize;
QStringList m_scannedFileExtensions; QStringList m_scannedFileExtensions;
QTimer *m_scanIntervalTimer; QTimer *m_scanTimer;
QElapsedTimer m_scanDurationTimer; QMap<int, QQueue<DocumentInfo>> m_docsToScan;
std::map<int, std::list<DocumentInfo>> m_docsToScan;
QList<ResultInfo> m_retrieve; QList<ResultInfo> m_retrieve;
QThread m_dbThread; QThread m_dbThread;
QFileSystemWatcher *m_watcher; QFileSystemWatcher *m_watcher;
@ -311,10 +233,6 @@ private:
QVector<EmbeddingChunk> m_chunkList; QVector<EmbeddingChunk> m_chunkList;
QHash<int, CollectionItem> m_collectionMap; // used only for tracking indexing/embedding progress QHash<int, CollectionItem> m_collectionMap; // used only for tracking indexing/embedding progress
std::atomic<bool> m_databaseValid; std::atomic<bool> m_databaseValid;
ChunkStreamer m_chunkStreamer;
QSet<int> m_documentIdCache; // cached list of documents with chunks for fast lookup
friend class ChunkStreamer;
}; };
#endif // DATABASE_H #endif // DATABASE_H

View File

@ -1,51 +0,0 @@
include(FetchContent)
set(BUILD_SHARED_LIBS OFF)
set(FMT_INSTALL OFF)
add_subdirectory(fmt)
set(QAPPLICATION_CLASS QApplication)
add_subdirectory(SingleApplication)
set(DUCKX_INSTALL OFF)
add_subdirectory(DuckX)
set(QT_VERSION_MAJOR 6)
add_subdirectory(QXlsx/QXlsx)
if (NOT GPT4ALL_USING_QTPDF)
# If we do not use QtPDF, we need to get PDFium.
set(GPT4ALL_PDFIUM_TAG "chromium/6996")
if (CMAKE_SYSTEM_NAME MATCHES Linux)
FetchContent_Declare(
pdfium
URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-linux-x64.tgz"
URL_HASH "SHA256=68b381b87efed539f2e33ae1e280304c9a42643a878cc296c1d66a93b0cb4335"
)
elseif (CMAKE_SYSTEM_NAME MATCHES Windows)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64|amd64)$")
FetchContent_Declare(
pdfium
URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-win-x64.tgz"
URL_HASH "SHA256=83e714c302ceacccf403826d5cb57ea39b77f393d83b8d5781283012774a9378"
)
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|AARCH64|arm64|ARM64)$")
FetchContent_Declare(
pdfium
URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-win-arm64.tgz"
URL_HASH "SHA256=78e77e871453a4915cbf66fb381b951c9932f88a747c6b2b33c9f27ec2371445"
)
endif()
elseif (CMAKE_SYSTEM_NAME MATCHES Darwin)
FetchContent_Declare(
pdfium
URL "https://github.com/bblanchon/pdfium-binaries/releases/download/${GPT4ALL_PDFIUM_TAG}/pdfium-mac-univ.tgz"
URL_HASH "SHA256=e7577f3242ff9c1df50025f9615673a43601a201bc51ee4792975f98920793a2"
)
endif()
FetchContent_MakeAvailable(pdfium)
find_package(PDFium REQUIRED PATHS "${pdfium_SOURCE_DIR}" NO_DEFAULT_PATH)
endif()

@ -1 +0,0 @@
Subproject commit 6e31dfb280e2107fbf4f6a15098c38b014f1bbcc

@ -1 +0,0 @@
Subproject commit 29e81b369128525749dcb6516195b6b062eda955

@ -1 +0,0 @@
Subproject commit 21bdef01eddcbd78044eea1d50b9dee08d218ff2

@ -1 +0,0 @@
Subproject commit 0c9fce2ffefecfdce794e1859584e25877b7b592

@ -1 +0,0 @@
Subproject commit 606b6347edf0758c531abb6c36743e09a4c48a84

@ -1 +0,0 @@
Subproject commit e97bb2442cd6ab3d5bb5f5a3e8a1f7d6081d613b

@ -1 +0,0 @@
Subproject commit 9e59f1036657303b29eaf709945f339e403e5f2f

View File

@ -1,11 +0,0 @@
-r test-requirements.txt
# dev tools
flake8~=7.1
mypy~=1.12
pytype>=2024.10.11
wemake-python-styleguide~=0.19.2
# type stubs and other optional modules
types-requests~=2.32
urllib3[socks]

View File

@ -10,37 +10,32 @@
#include <QDebug> #include <QDebug>
#include <QGlobalStatic> #include <QGlobalStatic>
#include <QGuiApplication> #include <QGuiApplication>
#include <QIODevice> // IWYU pragma: keep #include <QIODevice>
#include <QJsonArray> #include <QJsonArray>
#include <QJsonDocument> #include <QJsonDocument>
#include <QJsonObject> #include <QJsonObject>
#include <QJsonValue> #include <QJsonValue>
#include <QKeyValueIterator>
#include <QLocale> #include <QLocale>
#include <QNetworkRequest> #include <QNetworkRequest>
#include <QPair> // IWYU pragma: keep #include <QPair>
#include <QRegularExpression>
#include <QRegularExpressionMatch>
#include <QSettings> #include <QSettings>
#include <QSslConfiguration> #include <QSslConfiguration>
#include <QSslSocket> #include <QSslSocket>
#include <QStringList> // IWYU pragma: keep #include <QStringList>
#include <QTextStream> #include <QTextStream>
#include <QUrl> #include <QUrl>
#include <QVariant> #include <QVariant>
#include <QVector> // IWYU pragma: keep #include <QVector>
#include <Qt> #include <Qt>
#include <QtAssert>
#include <QtLogging> #include <QtLogging>
#include <QtMinMax>
#include <algorithm>
#include <compare> #include <compare>
#include <cstddef> #include <cstddef>
#include <utility> #include <utility>
using namespace Qt::Literals::StringLiterals; using namespace Qt::Literals::StringLiterals;
class MyDownload: public Download { }; class MyDownload: public Download { };
Q_GLOBAL_STATIC(MyDownload, downloadInstance) Q_GLOBAL_STATIC(MyDownload, downloadInstance)
Download *Download::globalInstance() Download *Download::globalInstance()
@ -63,6 +58,11 @@ Download::Download()
m_startTime = QDateTime::currentDateTime(); m_startTime = QDateTime::currentDateTime();
} }
static bool operator==(const ReleaseInfo& lhs, const ReleaseInfo& rhs)
{
return lhs.version == rhs.version;
}
std::strong_ordering Download::compareAppVersions(const QString &a, const QString &b) std::strong_ordering Download::compareAppVersions(const QString &a, const QString &b)
{ {
static QRegularExpression versionRegex(R"(^(\d+(?:\.\d+){0,2})(-.+)?$)"); static QRegularExpression versionRegex(R"(^(\d+(?:\.\d+){0,2})(-.+)?$)");
@ -396,9 +396,8 @@ void Download::parseReleaseJsonFile(const QByteArray &jsonData)
QJsonObject obj = value.toObject(); QJsonObject obj = value.toObject();
QString version = obj["version"].toString(); QString version = obj["version"].toString();
// "notes" field intentionally has a trailing newline for compatibility QString notes = obj["notes"].toString();
QString notes = obj["notes"].toString().trimmed(); QString contributors = obj["contributors"].toString();
QString contributors = obj["contributors"].toString().trimmed();
ReleaseInfo releaseInfo; ReleaseInfo releaseInfo;
releaseInfo.version = version; releaseInfo.version = version;
releaseInfo.notes = notes; releaseInfo.notes = notes;

View File

@ -13,14 +13,10 @@
#include <QSslError> #include <QSslError>
#include <QString> #include <QString>
#include <QThread> #include <QThread>
#include <QtTypes> #include <QtGlobal>
// IWYU pragma: no_forward_declare QFile
// IWYU pragma: no_forward_declare QList
// IWYU pragma: no_forward_declare QSslError
class QByteArray; class QByteArray;
struct ReleaseInfo { struct ReleaseInfo {
Q_GADGET Q_GADGET
Q_PROPERTY(QString version MEMBER version) Q_PROPERTY(QString version MEMBER version)

View File

@ -1,35 +1,35 @@
#include "embllm.h" #include "embllm.h"
#include "modellist.h"
#include "mysettings.h" #include "mysettings.h"
#include <gpt4all-backend/llmodel.h> #include "../gpt4all-backend/llmodel.h"
#include <QCoreApplication> #include <QCoreApplication>
#include <QDebug> #include <QDebug>
#include <QFile>
#include <QFileInfo> #include <QFileInfo>
#include <QGuiApplication> #include <QGuiApplication>
#include <QIODevice>
#include <QJsonArray> #include <QJsonArray>
#include <QJsonDocument> #include <QJsonDocument>
#include <QJsonObject> #include <QJsonObject>
#include <QJsonValue>
#include <QList> #include <QList>
#include <QMutexLocker> // IWYU pragma: keep #include <QMutexLocker>
#include <QNetworkAccessManager> #include <QNetworkAccessManager>
#include <QNetworkReply> #include <QNetworkReply>
#include <QNetworkRequest> #include <QNetworkRequest>
#include <QUrl> #include <QUrl>
#include <Qt> #include <Qt>
#include <QtAssert> #include <QtGlobal>
#include <QtLogging> #include <QtLogging>
#include <exception> #include <exception>
#include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
using namespace Qt::Literals::StringLiterals; using namespace Qt::Literals::StringLiterals;
static const QString EMBEDDING_MODEL_NAME = u"nomic-embed-text-v1.5"_s; static const QString EMBEDDING_MODEL_NAME = u"nomic-embed-text-v1.5"_s;
static const QString LOCAL_EMBEDDING_MODEL = u"nomic-embed-text-v1.5.f16.gguf"_s; static const QString LOCAL_EMBEDDING_MODEL = u"nomic-embed-text-v1.5.f16.gguf"_s;
@ -359,11 +359,8 @@ void EmbeddingLLMWorker::handleFinished()
if (retrievedData.isValid() && retrievedData.canConvert<QVector<EmbeddingChunk>>()) if (retrievedData.isValid() && retrievedData.canConvert<QVector<EmbeddingChunk>>())
chunks = retrievedData.value<QVector<EmbeddingChunk>>(); chunks = retrievedData.value<QVector<EmbeddingChunk>>();
QVariant response; QVariant response = reply->attribute(QNetworkRequest::HttpStatusCodeAttribute);
if (reply->error() != QNetworkReply::NoError) { Q_ASSERT(response.isValid());
response = reply->attribute(QNetworkRequest::HttpStatusCodeAttribute);
Q_ASSERT(response.isValid());
}
bool ok; bool ok;
int code = response.toInt(&ok); int code = response.toInt(&ok);
if (!ok || code != 200) { if (!ok || code != 200) {

View File

@ -5,10 +5,10 @@
#include <QMutex> #include <QMutex>
#include <QObject> #include <QObject>
#include <QString> #include <QString>
#include <QStringList> // IWYU pragma: keep #include <QStringList>
#include <QThread> #include <QThread>
#include <QVariant> #include <QVariant>
#include <QVector> // IWYU pragma: keep #include <QVector>
#include <atomic> #include <atomic>
#include <vector> #include <vector>
@ -16,7 +16,6 @@
class LLModel; class LLModel;
class QNetworkAccessManager; class QNetworkAccessManager;
struct EmbeddingChunk { struct EmbeddingChunk {
QString model; // TODO(jared): use to select model QString model; // TODO(jared): use to select model
int folder_id; int folder_id;

View File

@ -32,7 +32,7 @@
<image>https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/flatpak-manifest/screenshots/model.png</image> <image>https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/flatpak-manifest/screenshots/model.png</image>
</screenshot> </screenshot>
</screenshots> </screenshots>
<url type="homepage">https://www.nomic.ai/gpt4all</url> <url type="homepage">https://gpt4all.io</url>
<url type="bugtracker">https://github.com/nomic-ai/gpt4all/issues</url> <url type="bugtracker">https://github.com/nomic-ai/gpt4all/issues</url>
<url type="vcs-browser">https://github.com/nomic-ai/gpt4all</url> <url type="vcs-browser">https://github.com/nomic-ai/gpt4all</url>
<releases> <releases>

View File

@ -1 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" width="32" height="32" fill="#000000" viewBox="0 0 256 256"><path d="M227.31,73.37,182.63,28.68a16,16,0,0,0-22.63,0L36.69,152A15.86,15.86,0,0,0,32,163.31V208a16,16,0,0,0,16,16H92.69A15.86,15.86,0,0,0,104,219.31L227.31,96a16,16,0,0,0,0-22.63ZM92.69,208H48V163.31l88-88L180.69,120ZM192,108.68,147.31,64l24-24L216,84.68Z"></path></svg> <svg width="32" height="32" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M28.4138 9.17125L22.8288 3.585C22.643 3.39924 22.4225 3.25188 22.1799 3.15134C21.9372 3.0508 21.6771 2.99905 21.4144 2.99905C21.1517 2.99905 20.8916 3.0508 20.6489 3.15134C20.4062 3.25188 20.1857 3.39924 20 3.585L4.58626 19C4.39973 19.185 4.25185 19.4053 4.15121 19.648C4.05057 19.8907 3.99917 20.151 4.00001 20.4138V26C4.00001 26.5304 4.21072 27.0391 4.5858 27.4142C4.96087 27.7893 5.46958 28 6.00001 28H11.5863C11.849 28.0008 12.1093 27.9494 12.352 27.8488C12.5947 27.7482 12.815 27.6003 13 27.4138L28.4138 12C28.5995 11.8143 28.7469 11.5938 28.8474 11.3511C28.948 11.1084 28.9997 10.8483 28.9997 10.5856C28.9997 10.3229 28.948 10.0628 28.8474 9.82015C28.7469 9.57747 28.5995 9.35698 28.4138 9.17125ZM6.41376 20L17 9.41375L19.0863 11.5L8.50001 22.085L6.41376 20ZM6.00001 22.4138L9.58626 26H6.00001V22.4138ZM12 25.5863L9.91376 23.5L20.5 12.9138L22.5863 15L12 25.5863ZM24 13.5863L18.4138 8L21.4138 5L27 10.585L24 13.5863Z" fill="black"/>
</svg>

Before

Width:  |  Height:  |  Size: 372 B

After

Width:  |  Height:  |  Size: 1.0 KiB

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 256"><rect width="256" height="256" fill="none"/><path d="M36,152v56H52a28,28,0,0,0,0-56Z" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><path d="M216,200.87A22.12,22.12,0,0,1,200,208c-13.26,0-24-12.54-24-28s10.74-28,24-28a22.12,22.12,0,0,1,16,7.13" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><path d="M48,112V40a8,8,0,0,1,8-8h96l56,56v24" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><polyline points="152 32 152 88 208 88" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><ellipse cx="128" cy="180" rx="24" ry="28" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/></svg>

Before

Width:  |  Height:  |  Size: 897 B

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 256"><rect width="256" height="256" fill="none"/><line x1="152" y1="96" x2="208" y2="96" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><line x1="152" y1="160" x2="208" y2="160" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><path d="M64,72V40a8,8,0,0,1,8-8H200a8,8,0,0,1,8,8V216a8,8,0,0,1-8,8H72a8,8,0,0,1-8-8V184" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><polyline points="64 104 76 152 92 120 108 152 120 104" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><rect x="32" y="72" width="120" height="112" rx="8" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/></svg>

Before

Width:  |  Height:  |  Size: 893 B

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 256"><rect width="256" height="256" fill="none"/><polyline points="148 208 120 208 120 152" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><path d="M48,112V40a8,8,0,0,1,8-8h96l56,56v24" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><polyline points="152 32 152 88 208 88" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><line x1="48" y1="152" x2="88" y2="208" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><line x1="88" y1="152" x2="48" y2="208" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/><path d="M203.9,153.6s-29.43-7.78-31.8,11,38.43,10.12,35.78,30.72c-2.47,19.16-31.78,11-31.78,11" fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="16"/></svg>

Before

Width:  |  Height:  |  Size: 1019 B

View File

@ -1,3 +0,0 @@
<?xml version="1.0" encoding="utf-8" ?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 26.3 26.3"><defs><style>.cls-1{fill:#f05237;}.cls-2{fill:#fff;}</style></defs><g id="Layer_2" data-name="Layer 2"><g id="Content"><circle class="cls-1" cx="13.15" cy="13.15" r="13.15"/><path class="cls-2" d="M13.17,6.88a4.43,4.43,0,0,0,0,8.85h1.45V14.07H13.17a2.77,2.77,0,1,1,2.77-2.76v4.07a2.74,2.74,0,0,1-4.67,2L10.1,18.51a4.37,4.37,0,0,0,3.07,1.29h.06a4.42,4.42,0,0,0,4.36-4.4V11.2a4.43,4.43,0,0,0-4.42-4.32"/></g></g></svg>

Before

Width:  |  Height:  |  Size: 620 B

View File

@ -1 +0,0 @@
<svg viewBox="0 0 512 512" xmlns="http://www.w3.org/2000/svg" fill-rule="evenodd" clip-rule="evenodd" stroke-linejoin="round" stroke-miterlimit="2"><path d="M189.08 303.228H94.587l.044-94.446h94.497l-.048 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M283.528 397.674h-94.493l.044-94.446h94.496l-.047 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M283.575 303.228H189.08l.046-94.446h94.496l-.047 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M378.07 303.228h-94.495l.044-94.446h94.498l-.047 94.446zM189.128 208.779H94.633l.044-94.448h94.498l-.047 94.448zM378.115 208.779h-94.494l.045-94.448h94.496l-.047 94.448zM94.587 303.227H.093l.044-96.017h94.496l-.046 96.017z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M94.633 208.779H.138l.046-94.448H94.68l-.047 94.448z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M94.68 115.902H.185L.23 19.885h94.498l-.047 96.017zM472.657 114.331h-94.495l.044-94.446h94.497l-.046 94.446zM94.54 399.244H.046l.044-97.588h94.497l-.047 97.588z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M94.495 492.123H0l.044-94.446H94.54l-.045 94.446zM472.563 303.228H378.07l.044-94.446h94.496l-.047 94.446zM472.61 208.779h-94.495l.044-94.448h94.498l-.047 94.448z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M472.517 397.674h-94.494l.044-94.446h94.497l-.047 94.446z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M472.47 492.121h-94.493l.044-96.017h94.496l-.047 96.017z" fill="#1c1c1b" fill-rule="nonzero"/><path d="M228.375 303.22h-96.061l.046-94.446h96.067l-.052 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M322.827 397.666h-94.495l.044-96.018h94.498l-.047 96.018z" fill="#ff4900" fill-rule="nonzero"/><path d="M324.444 303.22h-97.636l.046-94.446h97.638l-.048 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M418.938 303.22h-96.064l.045-94.446h96.066l-.047 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M228.423 208.77H132.36l.045-94.445h96.066l-.05 94.446zM418.985 208.77H322.92l.044-94.445h96.069l-.048 94.446z" fill="#ffa300" fill-rule="nonzero"/><path d="M133.883 304.79H39.392l.044-96.017h94.496l-.049 96.017z" fill="#ff7000" fill-rule="nonzero"/><path d="M133.929 208.77H39.437l.044-95.445h94.496l-.048 95.445z" fill="#ffa300" fill-rule="nonzero"/><path d="M133.976 114.325H39.484l.044-94.448h94.497l-.05 94.448zM511.954 115.325h-94.493l.044-95.448h94.497l-.048 95.448z" fill="#ffce00" fill-rule="nonzero"/><path d="M133.836 399.667H39.345l.044-96.447h94.496l-.049 96.447z" fill="#ff4900" fill-rule="nonzero"/><path d="M133.79 492.117H39.3l.044-94.448h94.496l-.049 94.448z" fill="#ff0107" fill-rule="nonzero"/><path d="M511.862 303.22h-94.495l.046-94.446h94.496l-.047 94.446z" fill="#ff7000" fill-rule="nonzero"/><path d="M511.907 208.77h-94.493l.044-94.445h94.496l-.047 94.446z" fill="#ffa300" fill-rule="nonzero"/><path d="M511.815 398.666h-94.493l.044-95.447h94.496l-.047 95.447z" fill="#ff4900" fill-rule="nonzero"/><path d="M511.77 492.117h-94.496l.046-94.448h94.496l-.047 94.448z" fill="#ff0107" fill-rule="nonzero"/></svg>

Before

Width:  |  Height:  |  Size: 2.9 KiB

View File

@ -1,2 +0,0 @@
<?xml version="1.0" encoding="utf-8"?><!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
<svg fill="#000000" width="800px" height="800px" viewBox="0 0 24 24" role="img" xmlns="http://www.w3.org/2000/svg"><title>OpenAI icon</title><path d="M22.2819 9.8211a5.9847 5.9847 0 0 0-.5157-4.9108 6.0462 6.0462 0 0 0-6.5098-2.9A6.0651 6.0651 0 0 0 4.9807 4.1818a5.9847 5.9847 0 0 0-3.9977 2.9 6.0462 6.0462 0 0 0 .7427 7.0966 5.98 5.98 0 0 0 .511 4.9107 6.051 6.051 0 0 0 6.5146 2.9001A5.9847 5.9847 0 0 0 13.2599 24a6.0557 6.0557 0 0 0 5.7718-4.2058 5.9894 5.9894 0 0 0 3.9977-2.9001 6.0557 6.0557 0 0 0-.7475-7.0729zm-9.022 12.6081a4.4755 4.4755 0 0 1-2.8764-1.0408l.1419-.0804 4.7783-2.7582a.7948.7948 0 0 0 .3927-.6813v-6.7369l2.02 1.1686a.071.071 0 0 1 .038.052v5.5826a4.504 4.504 0 0 1-4.4945 4.4944zm-9.6607-4.1254a4.4708 4.4708 0 0 1-.5346-3.0137l.142.0852 4.783 2.7582a.7712.7712 0 0 0 .7806 0l5.8428-3.3685v2.3324a.0804.0804 0 0 1-.0332.0615L9.74 19.9502a4.4992 4.4992 0 0 1-6.1408-1.6464zM2.3408 7.8956a4.485 4.485 0 0 1 2.3655-1.9728V11.6a.7664.7664 0 0 0 .3879.6765l5.8144 3.3543-2.0201 1.1685a.0757.0757 0 0 1-.071 0l-4.8303-2.7865A4.504 4.504 0 0 1 2.3408 7.872zm16.5963 3.8558L13.1038 8.364 15.1192 7.2a.0757.0757 0 0 1 .071 0l4.8303 2.7913a4.4944 4.4944 0 0 1-.6765 8.1042v-5.6772a.79.79 0 0 0-.407-.667zm2.0107-3.0231l-.142-.0852-4.7735-2.7818a.7759.7759 0 0 0-.7854 0L9.409 9.2297V6.8974a.0662.0662 0 0 1 .0284-.0615l4.8303-2.7866a4.4992 4.4992 0 0 1 6.6802 4.66zM8.3065 12.863l-2.02-1.1638a.0804.0804 0 0 1-.038-.0567V6.0742a4.4992 4.4992 0 0 1 7.3757-3.4537l-.142.0805L8.704 5.459a.7948.7948 0 0 0-.3927.6813zm1.0976-2.3654l2.602-1.4998 2.6069 1.4998v2.9994l-2.5974 1.4997-2.6067-1.4997Z"/></svg>

Before

Width:  |  Height:  |  Size: 1.7 KiB

View File

@ -1,45 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
viewBox="0 0 256 256"
version="1.1"
id="svg6"
sodipodi:docname="paperclip-horizontal.svg"
inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<defs
id="defs10" />
<sodipodi:namedview
id="namedview8"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
showgrid="false"
inkscape:zoom="4.421875"
inkscape:cx="127.88693"
inkscape:cy="127.88693"
inkscape:window-width="2560"
inkscape:window-height="1495"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:current-layer="svg6" />
<rect
width="256"
height="256"
fill="none"
id="rect2" />
<path
d="m 144,80 v 112 a -16,16 0 0 1 -32,0 V 48 a -32,32 0 0 1 64,0 v 144 a -48,48 0 0 1 -96,0 V 80"
fill="none"
stroke="currentColor"
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="16"
id="path4" />
</svg>

Before

Width:  |  Height:  |  Size: 1.3 KiB

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="32" height="32" fill="#000000" viewBox="0 0 256 256"><path d="M128,24A104,104,0,1,0,232,128,104.11,104.11,0,0,0,128,24Zm0,192a88,88,0,1,1,88-88A88.1,88.1,0,0,1,128,216Zm48-88a8,8,0,0,1-8,8H136v32a8,8,0,0,1-16,0V136H88a8,8,0,0,1,0-16h32V88a8,8,0,0,1,16,0v32h32A8,8,0,0,1,176,128Z"></path></svg>

Before

Width:  |  Height:  |  Size: 340 B

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="32" height="32" fill="#000000" viewBox="0 0 256 256"><path d="M216,40H40A16,16,0,0,0,24,56V200a16,16,0,0,0,16,16H216a16,16,0,0,0,16-16V56A16,16,0,0,0,216,40Zm0,16V88H40V56Zm0,144H40V104H216v96Z"></path></svg>

Before

Width:  |  Height:  |  Size: 255 B

View File

@ -1,7 +1,7 @@
#include "llm.h" #include "llm.h"
#include <gpt4all-backend/llmodel.h> #include "../gpt4all-backend/llmodel.h"
#include <gpt4all-backend/sysinfo.h> #include "../gpt4all-backend/sysinfo.h"
#include <QCoreApplication> #include <QCoreApplication>
#include <QDebug> #include <QDebug>
@ -12,9 +12,6 @@
#include <QSettings> #include <QSettings>
#include <QUrl> #include <QUrl>
#include <QtLogging> #include <QtLogging>
#include <QtSystemDetection>
#include <string>
#ifdef GPT4ALL_OFFLINE_INSTALLER #ifdef GPT4ALL_OFFLINE_INSTALLER
# include <QDesktopServices> # include <QDesktopServices>
@ -22,13 +19,8 @@
# include "network.h" # include "network.h"
#endif #endif
#ifdef Q_OS_MAC
#include "macosdock.h"
#endif
using namespace Qt::Literals::StringLiterals; using namespace Qt::Literals::StringLiterals;
class MyLLM: public LLM { }; class MyLLM: public LLM { };
Q_GLOBAL_STATIC(MyLLM, llmInstance) Q_GLOBAL_STATIC(MyLLM, llmInstance)
LLM *LLM::globalInstance() LLM *LLM::globalInstance()
@ -59,7 +51,7 @@ bool LLM::checkForUpdates() const
{ {
#ifdef GPT4ALL_OFFLINE_INSTALLER #ifdef GPT4ALL_OFFLINE_INSTALLER
# pragma message(__FILE__ ": WARNING: offline installer build will not check for updates!") # pragma message(__FILE__ ": WARNING: offline installer build will not check for updates!")
return QDesktopServices::openUrl(QUrl("https://github.com/nomic-ai/gpt4all/releases")); return QDesktopServices::openUrl(QUrl("https://gpt4all.io/"));
#else #else
Network::globalInstance()->trackEvent("check_for_updates"); Network::globalInstance()->trackEvent("check_for_updates");
@ -113,21 +105,3 @@ bool LLM::isNetworkOnline() const
auto * netinfo = QNetworkInformation::instance(); auto * netinfo = QNetworkInformation::instance();
return !netinfo || netinfo->reachability() == QNetworkInformation::Reachability::Online; return !netinfo || netinfo->reachability() == QNetworkInformation::Reachability::Online;
} }
void LLM::showDockIcon() const
{
#ifdef Q_OS_MAC
MacOSDock::showIcon();
#else
qt_noop();
#endif
}
void LLM::hideDockIcon() const
{
#ifdef Q_OS_MAC
MacOSDock::hideIcon();
#else
qt_noop();
#endif
}

View File

@ -3,8 +3,7 @@
#include <QObject> #include <QObject>
#include <QString> #include <QString>
#include <QtTypes> #include <QtGlobal>
class LLM : public QObject class LLM : public QObject
{ {
@ -24,9 +23,6 @@ public:
Q_INVOKABLE QString systemTotalRAMInGBString() const; Q_INVOKABLE QString systemTotalRAMInGBString() const;
Q_INVOKABLE bool isNetworkOnline() const; Q_INVOKABLE bool isNetworkOnline() const;
Q_INVOKABLE void showDockIcon() const;
Q_INVOKABLE void hideDockIcon() const;
Q_SIGNALS: Q_SIGNALS:
void isNetworkOnlineChanged(); void isNetworkOnlineChanged();

View File

@ -5,14 +5,10 @@
#include "mysettings.h" #include "mysettings.h"
#include <QCoreApplication> #include <QCoreApplication>
#include <QDebug>
#include <QGlobalStatic> #include <QGlobalStatic>
#include <QGuiApplication> #include <QGuiApplication>
#include <QList>
#include <QUrl> #include <QUrl>
#include <Qt> #include <Qt>
#include <QtLogging>
class MyLocalDocs: public LocalDocs { }; class MyLocalDocs: public LocalDocs { };
Q_GLOBAL_STATIC(MyLocalDocs, localDocsInstance) Q_GLOBAL_STATIC(MyLocalDocs, localDocsInstance)

View File

@ -2,14 +2,11 @@
#define LOCALDOCS_H #define LOCALDOCS_H
#include "database.h" #include "database.h"
#include "localdocsmodel.h" #include "localdocsmodel.h" // IWYU pragma: keep
#include <QObject> #include <QObject>
#include <QString> #include <QString>
#include <QStringList> // IWYU pragma: keep #include <QStringList>
// IWYU pragma: no_forward_declare LocalDocsModel
class LocalDocs : public QObject class LocalDocs : public QObject
{ {

Some files were not shown because too many files have changed in this diff Show More