Compare commits

..

No commits in common. "cfd70b69fcf5e587b8e0e3e9b9aaa90e19cbbc51" and "620090067799c96d7436a064139941ab9c8d5097" have entirely different histories.

11 changed files with 34 additions and 256 deletions

View File

@ -14,7 +14,6 @@
#include <regex>
#include <thread>
#include <algorithm>
#include <numeric>
//#define DEBUG_BERT
@ -463,6 +462,11 @@ void bert_eval(
ggml_set_f32(sum, 1.0f / N);
inpL = ggml_mul_mat(ctx0, inpL, sum);
// normalizer
ggml_tensor *length = ggml_sqrt(ctx0,
ggml_sum(ctx0, ggml_sqr(ctx0, inpL)));
inpL = ggml_scale(ctx0, inpL, ggml_div(ctx0, ggml_new_f32(ctx0, 1.0f), length));
ggml_tensor *output = inpL;
// run the computation
ggml_build_forward_expand(&gf, output);
@ -871,7 +875,7 @@ struct bert_ctx * bert_load_from_file(const char *fname)
// TODO: Max tokens should be a param?
int32_t N = new_bert->model.hparams.n_max_tokens;
new_bert->mem_per_input = 2.2 * (new_bert->mem_per_token * N); // add 10% to account for ggml object overhead
new_bert->mem_per_input = 1.9 * (new_bert->mem_per_token * N); // add 10% to account for ggml object overhead
}
#if defined(DEBUG_BERT)
@ -983,9 +987,6 @@ std::vector<float> Bert::embedding(const std::string &text)
}
std::transform(embeddingsSum.begin(), embeddingsSum.end(), embeddingsSum.begin(), [embeddingsSumTotal](float num){ return num / embeddingsSumTotal; });
double magnitude = std::sqrt(std::inner_product(embeddingsSum.begin(), embeddingsSum.end(), embeddingsSum.begin(), 0.0));
for (auto &value : embeddingsSum)
value /= magnitude;
std::vector<float> finalEmbeddings(embeddingsSum.begin(), embeddingsSum.end());
return finalEmbeddings;
}

View File

@ -1,7 +1,8 @@
# GPT4All Python Generation API
# GPT4All Python API
The `GPT4All` python package provides bindings to our C/C++ model backend libraries.
The source code and local build instructions can be found [here](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python).
## Quickstart
```bash
@ -108,5 +109,5 @@ with model.chat_session():
print(model.current_chat_session)
```
### API documentation
::: gpt4all.gpt4all.GPT4All

View File

@ -1,35 +0,0 @@
# Embeddings
GPT4All supports generating high quality embeddings of arbitrary length documents of text using a CPU optimized contrastively trained [Sentence Transformer](https://www.sbert.net/). These embeddings are comparable in quality for many tasks with OpenAI.
## Quickstart
```bash
pip install gpt4all
```
### Generating embeddings
The embedding model will automatically be downloaded if not installed.
=== "Embed4All Example"
``` py
from gpt4all import GPT4All, Embed4All
text = 'The quick brown fox jumps over the lazy dog'
embedder = Embed4All()
output = embedder.embed(text)
print(output)
```
=== "Output"
```
[0.034696947783231735, -0.07192722707986832, 0.06923297047615051, ...]
```
### Speed of embedding generation
The following table lists the generation speed for text document captured on an Intel i913900HX CPU with DDR5 5600 running with 8 threads under stable load.
| Tokens | 128 | 512 | 2048 | 8129 | 16,384 |
| --------------- | ---- | ---- | ---- | ---- | ---- |
| Wall time (s) | .02 | .08 | .24 | .96 | 1.9 |
| Tokens / Second | 6508 | 6431 | 8622 | 8509 | 8369 |
### API documentation
::: gpt4all.gpt4all.Embed4All

View File

@ -1,2 +1,2 @@
from .gpt4all import GPT4All, Embed4All # noqa
from .gpt4all import GPT4All, embed # noqa
from .pyllmodel import LLModel # noqa

View File

@ -15,36 +15,20 @@ from . import pyllmodel
# TODO: move to config
DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")
class Embed4All:
def embed(
text: str
) -> list[float]:
"""
Python class that handles embeddings for GPT4All.
Generate an embedding for all GPT4All.
Args:
text: The text document to generate an embedding for.
Returns:
An embedding of your document of text.
"""
def __init__(
self,
n_threads: Optional[int] = None,
):
"""
Constructor
Args:
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
"""
self.gpt4all = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin', n_threads=n_threads)
def embed(
self,
text: str
) -> list[float]:
"""
Generate an embedding.
Args:
text: The text document to generate an embedding for.
Returns:
An embedding of your document of text.
"""
return self.gpt4all.model.generate_embedding(text)
model = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin')
return model.model.generate_embedding(text)
class GPT4All:
"""
@ -69,7 +53,7 @@ class GPT4All:
model_type: Model architecture. This argument currently does not have any functionality and is just used as
descriptive identifier for user. Default is None.
allow_download: Allow API to download models from gpt4all.io. Default is True.
n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
n_threads: number of CPU threads used by GPT4All. Default is None, than the number of threads are determined automatically.
"""
self.model_type = model_type
self.model = pyllmodel.LLModel()

View File

@ -154,11 +154,10 @@ class LLModel:
self.model = None
self.model_name = None
self.context = None
self.llmodel_lib = llmodel
def __del__(self):
if self.model is not None:
self.llmodel_lib.llmodel_model_destroy(self.model)
llmodel.llmodel_model_destroy(self.model)
def memory_needed(self, model_path: str) -> int:
model_path_enc = model_path.encode("utf-8")
@ -254,7 +253,7 @@ class LLModel:
embedding_size = ctypes.c_size_t()
c_text = ctypes.c_char_p(text.encode('utf-8'))
embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
embedding_array = [embedding_ptr[i] for i in range(embedding_size.value)]
embedding_array = ctypes.cast(embedding_ptr, ctypes.POINTER(ctypes.c_float * embedding_size.value)).contents
llmodel.llmodel_free_embedding(embedding_ptr)
return list(embedding_array)

View File

@ -1,18 +0,0 @@
import sys
from io import StringIO
from gpt4all import GPT4All, Embed4All
import time
def time_embedding(i, embedder):
text = 'foo bar ' * i
start_time = time.time()
output = embedder.embed(text)
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Time report: {2 * i / elapsed_time} tokens/second with {2 * i} tokens taking {elapsed_time} seconds")
if __name__ == "__main__":
embedder = Embed4All(n_threads=8)
for i in [2**n for n in range(6, 14)]:
time_embedding(i, embedder)

File diff suppressed because one or more lines are too long

View File

@ -10,9 +10,7 @@ use_directory_urls: false
nav:
- 'index.md'
- 'Bindings':
- 'GPT4All in Python':
- 'Generation': 'gpt4all_python.md'
- 'Embedding': 'gpt4all_python_embedding.md'
- 'GPT4All in Python': 'gpt4all_python.md'
- 'GPT4All Chat Client': 'gpt4all_chat.md'
- 'gpt4all_cli.md'
# - 'Tutorials':
@ -70,4 +68,4 @@ plugins:
#- mkdocs-jupyter:
# ignore_h1_titles: True
# show_input: True
# show_input: True

View File

@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECtORY,
setup(
name=package_name,
version="1.0.6",
version="1.0.4",
description="Python bindings for GPT4All",
author="Richard Guo",
author_email="richard@nomic.ai",

View File

@ -18,9 +18,6 @@ enum Language {
Go,
Json,
Csharp,
Latex,
Html,
Php
};
static QColor keywordColor = "#2e95d3"; // blue
@ -36,11 +33,6 @@ static QColor commandColor = functionCallColor;
static QColor variableColor = numberColor;
static QColor keyColor = functionColor;
static QColor valueColor = stringColor;
static QColor parameterColor = stringColor;
static QColor attributeNameColor = numberColor;
static QColor attributeValueColor = stringColor;
static QColor specialCharacterColor = functionColor;
static QColor doctypeColor = commentColor;
static Language stringToLanguage(const QString &language)
{
@ -70,12 +62,6 @@ static Language stringToLanguage(const QString &language)
return Go;
if (language == "json")
return Json;
if (language == "latex")
return Latex;
if (language == "html")
return Html;
if (language == "php")
return Php;
return None;
}
@ -575,135 +561,6 @@ static QVector<HighlightingRule> bashHighlightingRules()
return highlightingRules;
}
static QVector<HighlightingRule> latexHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
if (highlightingRules.isEmpty()) {
HighlightingRule rule;
QTextCharFormat commandFormat;
commandFormat.setForeground(commandColor); // commandColor needs to be set to your liking
rule.pattern = QRegularExpression("\\\\[A-Za-z]+"); // Pattern for LaTeX commands
rule.format = commandFormat;
highlightingRules.append(rule);
QTextCharFormat commentFormat;
commentFormat.setForeground(commentColor); // commentColor needs to be set to your liking
rule.pattern = QRegularExpression("%[^\n]*"); // Pattern for LaTeX comments
rule.format = commentFormat;
highlightingRules.append(rule);
}
return highlightingRules;
}
static QVector<HighlightingRule> htmlHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
if (highlightingRules.isEmpty()) {
HighlightingRule rule;
QTextCharFormat attributeNameFormat;
attributeNameFormat.setForeground(attributeNameColor);
rule.pattern = QRegularExpression("\\b(\\w+)\\s*=");
rule.format = attributeNameFormat;
highlightingRules.append(rule);
QTextCharFormat attributeValueFormat;
attributeValueFormat.setForeground(attributeValueColor);
rule.pattern = QRegularExpression("\".*?\"|'.*?'");
rule.format = attributeValueFormat;
highlightingRules.append(rule);
QTextCharFormat commentFormat;
commentFormat.setForeground(commentColor);
rule.pattern = QRegularExpression("<!--.*?-->");
rule.format = commentFormat;
highlightingRules.append(rule);
QTextCharFormat specialCharacterFormat;
specialCharacterFormat.setForeground(specialCharacterColor);
rule.pattern = QRegularExpression("&[a-zA-Z0-9#]*;");
rule.format = specialCharacterFormat;
highlightingRules.append(rule);
QTextCharFormat doctypeFormat;
doctypeFormat.setForeground(doctypeColor);
rule.pattern = QRegularExpression("<!DOCTYPE.*?>");
rule.format = doctypeFormat;
highlightingRules.append(rule);
}
return highlightingRules;
}
static QVector<HighlightingRule> phpHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
if (highlightingRules.isEmpty()) {
HighlightingRule rule;
QTextCharFormat functionCallFormat;
functionCallFormat.setForeground(functionCallColor);
rule.pattern = QRegularExpression("\\b(\\w+)\\s*(?=\\()");
rule.format = functionCallFormat;
highlightingRules.append(rule);
QTextCharFormat functionFormat;
functionFormat.setForeground(functionColor);
rule.pattern = QRegularExpression("\\bfunction\\s+(\\w+)\\b");
rule.format = functionFormat;
highlightingRules.append(rule);
QTextCharFormat numberFormat;
numberFormat.setForeground(numberColor);
rule.pattern = QRegularExpression("\\b[0-9]*\\.?[0-9]+\\b");
rule.format = numberFormat;
highlightingRules.append(rule);
QTextCharFormat keywordFormat;
keywordFormat.setForeground(keywordColor);
QStringList keywordPatterns = {
"\\bif\\b", "\\belse\\b", "\\belseif\\b", "\\bwhile\\b", "\\bfor\\b",
"\\bforeach\\b", "\\breturn\\b", "\\bprint\\b", "\\binclude\\b", "\\brequire\\b",
"\\binclude_once\\b", "\\brequire_once\\b", "\\btry\\b", "\\bcatch\\b",
"\\bfinally\\b", "\\bcontinue\\b", "\\bbreak\\b", "\\bclass\\b", "\\bfunction\\b",
"\\bnew\\b", "\\bthrow\\b", "\\barray\\b", "\\bpublic\\b", "\\bprivate\\b",
"\\bprotected\\b", "\\bstatic\\b", "\\bglobal\\b", "\\bisset\\b", "\\bunset\\b",
"\\bnull\\b", "\\btrue\\b", "\\bfalse\\b"
};
for (const QString &pattern : keywordPatterns) {
rule.pattern = QRegularExpression(pattern);
rule.format = keywordFormat;
highlightingRules.append(rule);
}
QTextCharFormat stringFormat;
stringFormat.setForeground(stringColor);
rule.pattern = QRegularExpression("\".*?\"");
rule.format = stringFormat;
highlightingRules.append(rule);
rule.pattern = QRegularExpression("\'.*?\'");
rule.format = stringFormat;
highlightingRules.append(rule);
QTextCharFormat commentFormat;
commentFormat.setForeground(commentColor);
rule.pattern = QRegularExpression("//[^\n]*");
rule.format = commentFormat;
highlightingRules.append(rule);
rule.pattern = QRegularExpression("/\\*.*?\\*/");
rule.format = commentFormat;
highlightingRules.append(rule);
}
return highlightingRules;
}
static QVector<HighlightingRule> jsonHighlightingRules()
{
static QVector<HighlightingRule> highlightingRules;
@ -759,12 +616,6 @@ void SyntaxHighlighter::highlightBlock(const QString &text)
rules = javaHighlightingRules();
else if (block.userState() == Json)
rules = jsonHighlightingRules();
else if (block.userState() == Latex)
rules = latexHighlightingRules();
else if (block.userState() == Html)
rules = htmlHighlightingRules();
else if (block.userState() == Php)
rules = phpHighlightingRules();
for (const HighlightingRule &rule : qAsConst(rules)) {
QRegularExpressionMatchIterator matchIterator = rule.pattern.globalMatch(text);
@ -970,10 +821,7 @@ void ResponseText::handleCodeBlocks()
|| firstWord == "java"
|| firstWord == "go"
|| firstWord == "golang"
|| firstWord == "json"
|| firstWord == "latex"
|| firstWord == "html"
|| firstWord == "php") {
|| firstWord == "json") {
codeLanguage = firstWord;
capturedText.remove(0, match.captured(0).length());
}