Compare commits

...

4 Commits

Author SHA1 Message Date
Jared Van Bortel
78a26cc5e4
models2.json: use ChatML for Mistral OpenOrca (#1935)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2024-02-06 12:43:10 -05:00
Jared Van Bortel
bf493bb048
Mixtral crash fix and python bindings v2.2.0 (#1931)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2024-02-06 11:01:15 -05:00
Adam Treat
1b524c4617 Reverse patch so we can minimize down to lowest HD form factor.
Signed-off-by: Adam Treat <treat.adam@gmail.com>
2024-02-06 09:59:26 -05:00
Adam Treat
cb10465127 Make the collection dialog progress bar more readable.
Signed-off-by: Adam Treat <treat.adam@gmail.com>
2024-02-06 09:35:07 -05:00
12 changed files with 96 additions and 117 deletions

View File

@ -713,10 +713,16 @@ bool Bert::loadModel(const std::string &modelPath, int n_ctx, int ngl)
{
(void)n_ctx;
(void)ngl;
d_ptr->ctx = bert_load_from_file(modelPath.c_str());
d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
d_ptr->modelLoaded = d_ptr->ctx != nullptr;
d_ptr->modelLoaded = false;
auto * ctx = bert_load_from_file(modelPath.c_str());
fflush(stdout);
if (!ctx)
return false;
d_ptr->ctx = ctx;
d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
d_ptr->modelLoaded = true;
return true;
}

View File

@ -685,18 +685,21 @@ size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx, int ngl) {
bool GPTJ::loadModel(const std::string &modelPath, int n_ctx, int ngl) {
(void)n_ctx;
(void)ngl;
d_ptr->modelLoaded = false;
std::mt19937 rng(time(NULL));
d_ptr->rng = rng;
// load the model
if (!gptj_model_load(modelPath, *d_ptr->model, d_ptr->vocab)) {
bool ok = gptj_model_load(modelPath, *d_ptr->model, d_ptr->vocab);
fflush(stdout);
if (!ok) {
std::cerr << "GPT-J ERROR: failed to load model from " << modelPath;
return false;
}
d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
d_ptr->modelLoaded = true;
fflush(stdout);
return true;
}

@ -1 +1 @@
Subproject commit cd1b5a104b9d3e211a50b9f6c261aced3bf09834
Subproject commit 315102f89109f1b67c8f89f12d98ab646685e333

View File

@ -150,6 +150,8 @@ size_t LLamaModel::requiredMem(const std::string &modelPath, int n_ctx, int ngl)
bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
{
d_ptr->modelLoaded = false;
// clean up after previous loadModel()
if (d_ptr->model) {
llama_free_model(d_ptr->model);
@ -195,6 +197,7 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
d_ptr->model = llama_load_model_from_file_gpt4all(modelPath.c_str(), &d_ptr->model_params);
if (!d_ptr->model) {
fflush(stdout);
d_ptr->device = -1;
std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
return false;
@ -225,6 +228,7 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
if (!d_ptr->ctx) {
fflush(stdout);
std::cerr << "LLAMA ERROR: failed to init context for model " << modelPath << std::endl;
llama_free_model(d_ptr->model);
d_ptr->model = nullptr;
@ -240,8 +244,8 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
}
#endif
fflush(stdout);
d_ptr->modelLoaded = true;
fflush(stderr);
return true;
}

View File

@ -1,2 +1 @@
from .gpt4all import Embed4All as Embed4All, GPT4All as GPT4All
from .pyllmodel import LLModel as LLModel

View File

@ -142,15 +142,6 @@ def empty_response_callback(token_id: int, response: str) -> bool:
return True
def _create_model(model_path: bytes) -> ctypes.c_void_p:
err = ctypes.c_char_p()
model = llmodel.llmodel_model_create2(model_path, b"auto", ctypes.byref(err))
if model is None:
s = err.value
raise ValueError("Unable to instantiate model: {'null' if s is None else s.decode()}")
return model
# Symbol to terminate from generator
class Sentinel(Enum):
TERMINATING_SYMBOL = 0
@ -161,116 +152,77 @@ class LLModel:
Base class and universal wrapper for GPT4All language models
built around llmodel C-API.
Attributes
Parameters
----------
model: llmodel_model
Ctype pointer to underlying model
model_name: str
Model name
model_path : str
Path to the model.
n_ctx : int
Maximum size of context window
ngl : int
Number of GPU layers to use (Vulkan)
"""
def __init__(self):
self.model = None
self.model_name = None
self.context = None
self.llmodel_lib = llmodel
def __init__(self, model_path: str, n_ctx: int, ngl: int):
self.model_path = model_path.encode()
self.n_ctx = n_ctx
self.ngl = ngl
self.context: LLModelPromptContext | None = None
self.buffer = bytearray()
self.buff_expecting_cont_bytes: int = 0
# Construct a model implementation
err = ctypes.c_char_p()
model = llmodel.llmodel_model_create2(self.model_path, b"auto", ctypes.byref(err))
if model is None:
s = err.value
raise ValueError("Unable to instantiate model: {'null' if s is None else s.decode()}")
self.model = model
def __del__(self):
if self.model is not None:
self.llmodel_lib.llmodel_model_destroy(self.model)
def memory_needed(self, model_path: str, n_ctx: int, ngl: int) -> int:
self.model = None
return self._memory_needed(model_path, n_ctx, ngl)
def _memory_needed(self, model_path: str, n_ctx: int, ngl: int) -> int:
if self.model is None:
self.model = _create_model(model_path.encode())
return llmodel.llmodel_required_mem(self.model, model_path.encode(), n_ctx, ngl)
def list_gpu(self, model_path: str, n_ctx: int, ngl: int) -> list[LLModelGPUDevice]:
"""
Lists available GPU devices that satisfy the model's memory requirements.
Parameters
----------
model_path : str
Path to the model.
n_ctx : int
Maximum size of context window
ngl : int
Number of GPU layers to use (Vulkan)
Returns
-------
list
A list of LLModelGPUDevice structures representing available GPU devices.
"""
mem_required = self._memory_needed(model_path, n_ctx, ngl)
return self._list_gpu(mem_required)
if hasattr(self, 'model'):
llmodel.llmodel_model_destroy(self.model)
def _list_gpu(self, mem_required: int) -> list[LLModelGPUDevice]:
num_devices = ctypes.c_int32(0)
devices_ptr = self.llmodel_lib.llmodel_available_gpu_devices(self.model, mem_required, ctypes.byref(num_devices))
devices_ptr = llmodel.llmodel_available_gpu_devices(self.model, mem_required, ctypes.byref(num_devices))
if not devices_ptr:
raise ValueError("Unable to retrieve available GPU devices")
return devices_ptr[:num_devices.value]
def init_gpu(self, model_path: str, device: str, n_ctx: int, ngl: int):
mem_required = self._memory_needed(model_path, n_ctx, ngl)
def init_gpu(self, device: str):
mem_required = llmodel.llmodel_required_mem(self.model, self.model_path, self.n_ctx, self.ngl)
success = self.llmodel_lib.llmodel_gpu_init_gpu_device_by_string(self.model, mem_required, device.encode())
if not success:
# Retrieve all GPUs without considering memory requirements.
num_devices = ctypes.c_int32(0)
all_devices_ptr = self.llmodel_lib.llmodel_available_gpu_devices(self.model, 0, ctypes.byref(num_devices))
if not all_devices_ptr:
raise ValueError("Unable to retrieve list of all GPU devices")
all_gpus = [d.name.decode() for d in all_devices_ptr[:num_devices.value]]
if llmodel.llmodel_gpu_init_gpu_device_by_string(self.model, mem_required, device.encode()):
return
# Retrieve GPUs that meet the memory requirements using list_gpu
available_gpus = [device.name.decode() for device in self._list_gpu(mem_required)]
# Retrieve all GPUs without considering memory requirements.
num_devices = ctypes.c_int32(0)
all_devices_ptr = llmodel.llmodel_available_gpu_devices(self.model, 0, ctypes.byref(num_devices))
if not all_devices_ptr:
raise ValueError("Unable to retrieve list of all GPU devices")
all_gpus = [d.name.decode() for d in all_devices_ptr[:num_devices.value]]
# Identify GPUs that are unavailable due to insufficient memory or features
unavailable_gpus = set(all_gpus).difference(available_gpus)
# Retrieve GPUs that meet the memory requirements using list_gpu
available_gpus = [device.name.decode() for device in self._list_gpu(mem_required)]
# Formulate the error message
error_msg = "Unable to initialize model on GPU: '{}'.".format(device)
error_msg += "\nAvailable GPUs: {}.".format(available_gpus)
error_msg += "\nUnavailable GPUs due to insufficient memory or features: {}.".format(unavailable_gpus)
raise ValueError(error_msg)
# Identify GPUs that are unavailable due to insufficient memory or features
unavailable_gpus = set(all_gpus).difference(available_gpus)
def load_model(self, model_path: str, n_ctx: int, ngl: int) -> bool:
# Formulate the error message
error_msg = "Unable to initialize model on GPU: '{}'.".format(device)
error_msg += "\nAvailable GPUs: {}.".format(available_gpus)
error_msg += "\nUnavailable GPUs due to insufficient memory or features: {}.".format(unavailable_gpus)
raise ValueError(error_msg)
def load_model(self) -> bool:
"""
Load model from a file.
Parameters
----------
model_path : str
Model filepath
n_ctx : int
Maximum size of context window
ngl : int
Number of GPU layers to use (Vulkan)
Returns
-------
True if model loaded successfully, False otherwise
"""
self.model = _create_model(model_path.encode())
llmodel.llmodel_loadModel(self.model, model_path.encode(), n_ctx, ngl)
filename = os.path.basename(model_path)
self.model_name = os.path.splitext(filename)[0]
if llmodel.llmodel_isModelLoaded(self.model):
return True
else:
return False
return llmodel.llmodel_loadModel(self.model, self.model_path, self.n_ctx, self.ngl)
def set_thread_count(self, n_threads):
if not llmodel.llmodel_isModelLoaded(self.model):
@ -295,7 +247,7 @@ class LLModel:
reset_context: bool = False,
):
if self.context is None:
self.context = LLModelPromptContext(
context = LLModelPromptContext(
logits_size=0,
tokens_size=0,
n_past=0,
@ -309,8 +261,11 @@ class LLModel:
repeat_last_n=repeat_last_n,
context_erase=context_erase,
)
elif reset_context:
self.context.n_past = 0
self.context = context
else:
context = self.context
if reset_context:
self.context.n_past = 0
self.context.n_predict = n_predict
self.context.top_k = top_k

View File

@ -15,7 +15,7 @@ from requests.exceptions import ChunkedEncodingError
from tqdm import tqdm
from urllib3.exceptions import IncompleteRead, ProtocolError
from . import pyllmodel
from . import _pyllmodel
# TODO: move to config
DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")
@ -97,12 +97,12 @@ class GPT4All:
verbose: If True, print debug messages.
"""
self.model_type = model_type
self.model = pyllmodel.LLModel()
# Retrieve model and download if allowed
self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose)
self.model = _pyllmodel.LLModel(self.config["path"], n_ctx, ngl)
if device is not None and device != "cpu":
self.model.init_gpu(model_path=self.config["path"], device=device, n_ctx=n_ctx, ngl=ngl)
self.model.load_model(self.config["path"], n_ctx, ngl)
self.model.init_gpu(device)
self.model.load_model()
# Set n_threads
if n_threads is not None:
self.model.set_thread_count(n_threads)
@ -292,7 +292,7 @@ class GPT4All:
n_batch: int = 8,
n_predict: Optional[int] = None,
streaming: bool = False,
callback: pyllmodel.ResponseCallbackType = pyllmodel.empty_response_callback,
callback: _pyllmodel.ResponseCallbackType = _pyllmodel.empty_response_callback,
) -> Union[str, Iterable[str]]:
"""
Generate outputs from any GPT4All model.
@ -350,9 +350,9 @@ class GPT4All:
output_collector = self.current_chat_session
def _callback_wrapper(
callback: pyllmodel.ResponseCallbackType,
callback: _pyllmodel.ResponseCallbackType,
output_collector: List[MessageType],
) -> pyllmodel.ResponseCallbackType:
) -> _pyllmodel.ResponseCallbackType:
def _callback(token_id: int, response: str) -> bool:
nonlocal callback, output_collector

View File

@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECTORY,
setup(
name=package_name,
version="2.1.0",
version="2.2.0",
description="Python bindings for GPT4All",
author="Nomic and the Open Source Community",
author_email="support@nomic.ai",

View File

@ -16,8 +16,8 @@ Window {
id: window
width: 1920
height: 1080
minimumWidth: 1280
minimumHeight: 720
minimumWidth: 720
minimumHeight: 480
visible: true
title: qsTr("GPT4All v") + Qt.application.version

View File

@ -10,9 +10,10 @@
"parameters": "7 billion",
"quant": "q4_0",
"type": "Mistral",
"systemPrompt": " ",
"description": "<strong>Best overall fast chat model</strong><br><ul><li>Fast responses</li><li>Chat based model</li><li>Trained by Mistral AI<li>Finetuned on OpenOrca dataset curated via <a href=\"https://atlas.nomic.ai/\">Nomic Atlas</a><li>Licensed for commercial use</ul>",
"url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.Q4_0.gguf"
"url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.Q4_0.gguf",
"promptTemplate": "<|im_start|>user\n%1<|im_end|><|im_start|>assistant\n",
"systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>"
},
{
"order": "b",

View File

@ -121,7 +121,7 @@ MyDialog {
}
Label {
id: speedLabel
color: theme.textColor
color: theme.progressText
visible: model.indexing || model.currentEmbeddingsToIndex !== model.totalEmbeddingsToIndex
anchors.verticalCenter: itemProgressBar.verticalCenter
anchors.left: itemProgressBar.left

View File

@ -222,6 +222,17 @@ QtObject {
}
}
property color progressText: {
switch (MySettings.chatTheme) {
case "LegacyDark":
return "#ffffff";
case "Dark":
return "#000000";
default:
return "#000000";
}
}
property color checkboxBorder: {
switch (MySettings.chatTheme) {
case "LegacyDark":