2025-08-16 00:04:55 -04:00
11 changed files with 34 additions and 256 deletions
--- a/gpt4all-backend/bert.cpp
+++ b/gpt4all-backend/bert.cpp
@ -14,7 +14,6 @@
 #include <regex>
 #include <thread>
 #include <algorithm>
-#include <numeric>

 //#define DEBUG_BERT

@ -463,6 +462,11 @@ void bert_eval(
    ggml_set_f32(sum, 1.0f / N);
    inpL = ggml_mul_mat(ctx0, inpL, sum);

+    // normalizer
+    ggml_tensor *length = ggml_sqrt(ctx0,
+                                    ggml_sum(ctx0, ggml_sqr(ctx0, inpL)));
+    inpL = ggml_scale(ctx0, inpL, ggml_div(ctx0, ggml_new_f32(ctx0, 1.0f), length));
+
    ggml_tensor *output = inpL;
    // run the computation
    ggml_build_forward_expand(&gf, output);
@ -871,7 +875,7 @@ struct bert_ctx * bert_load_from_file(const char *fname)

        // TODO: Max tokens should be a param?
        int32_t N = new_bert->model.hparams.n_max_tokens;
-        new_bert->mem_per_input = 2.2 * (new_bert->mem_per_token * N); // add 10% to account for ggml object overhead
+        new_bert->mem_per_input = 1.9 * (new_bert->mem_per_token * N); // add 10% to account for ggml object overhead

    }
 #if defined(DEBUG_BERT)
@ -983,9 +987,6 @@ std::vector<float> Bert::embedding(const std::string &text)
    }

    std::transform(embeddingsSum.begin(), embeddingsSum.end(), embeddingsSum.begin(), [embeddingsSumTotal](float num){ return num / embeddingsSumTotal; });
-    double magnitude = std::sqrt(std::inner_product(embeddingsSum.begin(), embeddingsSum.end(), embeddingsSum.begin(), 0.0));
-    for (auto &value : embeddingsSum)
-        value /= magnitude;
    std::vector<float> finalEmbeddings(embeddingsSum.begin(), embeddingsSum.end());
    return finalEmbeddings;
 }
--- a/gpt4all-bindings/python/docs/gpt4all_python.md
+++ b/gpt4all-bindings/python/docs/gpt4all_python.md
@ -1,7 +1,8 @@
-# GPT4All Python Generation API
+# GPT4All Python API
 The `GPT4All` python package provides bindings to our C/C++ model backend libraries.
 The source code and local build instructions can be found [here](https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python).

+
 ## Quickstart

 ```bash
@ -108,5 +109,5 @@ with model.chat_session():
    print(model.current_chat_session)
 ```

-### API documentation
+
 ::: gpt4all.gpt4all.GPT4All
--- a/gpt4all-bindings/python/docs/gpt4all_python_embedding.md
+++ b/gpt4all-bindings/python/docs/gpt4all_python_embedding.md
@ -1,35 +0,0 @@
-# Embeddings
-GPT4All supports generating high quality embeddings of arbitrary length documents of text using a CPU optimized contrastively trained [Sentence Transformer](https://www.sbert.net/). These embeddings are comparable in quality for many tasks with OpenAI.
-
-## Quickstart
-
-```bash
-pip install gpt4all
-```
-
-### Generating embeddings
-The embedding model will automatically be downloaded if not installed.
-
-=== "Embed4All Example"
-    ``` py
-    from gpt4all import GPT4All, Embed4All
-    text = 'The quick brown fox jumps over the lazy dog'
-    embedder = Embed4All()
-    output = embedder.embed(text)
-    print(output)
-    ```
-=== "Output"
-    ```
-    [0.034696947783231735, -0.07192722707986832, 0.06923297047615051, ...]
-    ```
-### Speed of embedding generation
-The following table lists the generation speed for text document captured on an Intel i913900HX CPU with DDR5 5600 running with 8 threads under stable load.
-
-| Tokens          | 128  | 512  | 2048 | 8129 | 16,384 |
-| --------------- | ---- | ---- | ---- | ---- | ---- |
-| Wall time (s)   | .02  | .08  | .24  | .96  | 1.9  |
-| Tokens / Second | 6508 | 6431 | 8622 | 8509 | 8369 |
-
-
-### API documentation
-::: gpt4all.gpt4all.Embed4All
--- a/gpt4all-bindings/python/gpt4all/init.py
+++ b/gpt4all-bindings/python/gpt4all/init.py
@ -1,2 +1,2 @@
-from .gpt4all import GPT4All, Embed4All  # noqa
+from .gpt4all import GPT4All, embed  # noqa
 from .pyllmodel import LLModel  # noqa
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@ -15,36 +15,20 @@ from . import pyllmodel
 # TODO: move to config
 DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")

-class Embed4All:
+def embed(
+    text: str
+) -> list[float]:
    """
-    Python class that handles embeddings for GPT4All.
+    Generate an embedding for all GPT4All.
+
+    Args:
+        text: The text document to generate an embedding for.
+
+    Returns:
+        An embedding of your document of text.
    """
-    def __init__(
-        self,
-        n_threads: Optional[int] = None,
-    ):
-        """
-        Constructor
-
-        Args:
-            n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
-        """
-        self.gpt4all = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin', n_threads=n_threads)
-
-    def embed(
-        self,
-        text: str
-    ) -> list[float]:
-        """
-        Generate an embedding.
-
-        Args:
-            text: The text document to generate an embedding for.
-
-        Returns:
-            An embedding of your document of text.
-        """
-        return self.gpt4all.model.generate_embedding(text)
+    model = GPT4All(model_name='ggml-all-MiniLM-L6-v2-f16.bin')
+    return model.model.generate_embedding(text)

 class GPT4All:
    """
@ -69,7 +53,7 @@ class GPT4All:
            model_type: Model architecture. This argument currently does not have any functionality and is just used as
                descriptive identifier for user. Default is None.
            allow_download: Allow API to download models from gpt4all.io. Default is True.
-            n_threads: number of CPU threads used by GPT4All. Default is None, then the number of threads are determined automatically.
+            n_threads: number of CPU threads used by GPT4All. Default is None, than the number of threads are determined automatically.
        """
        self.model_type = model_type
        self.model = pyllmodel.LLModel()
--- a/gpt4all-bindings/python/gpt4all/pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/pyllmodel.py
@ -154,11 +154,10 @@ class LLModel:
        self.model = None
        self.model_name = None
        self.context = None
-        self.llmodel_lib = llmodel

    def __del__(self):
        if self.model is not None:
-            self.llmodel_lib.llmodel_model_destroy(self.model)
+            llmodel.llmodel_model_destroy(self.model)

    def memory_needed(self, model_path: str) -> int:
        model_path_enc = model_path.encode("utf-8")
@ -254,7 +253,7 @@ class LLModel:
        embedding_size = ctypes.c_size_t()
        c_text = ctypes.c_char_p(text.encode('utf-8'))
        embedding_ptr = llmodel.llmodel_embedding(self.model, c_text, ctypes.byref(embedding_size))
-        embedding_array = [embedding_ptr[i] for i in range(embedding_size.value)]
+        embedding_array = ctypes.cast(embedding_ptr, ctypes.POINTER(ctypes.c_float * embedding_size.value)).contents
        llmodel.llmodel_free_embedding(embedding_ptr)
        return list(embedding_array)

--- a/gpt4all-bindings/python/gpt4all/tests/test_embed_timings.py
+++ b/gpt4all-bindings/python/gpt4all/tests/test_embed_timings.py
@ -1,18 +0,0 @@
-import sys
-from io import StringIO
-
-from gpt4all import GPT4All, Embed4All
-import time
-
-def time_embedding(i, embedder):
-    text = 'foo bar ' * i
-    start_time = time.time()
-    output = embedder.embed(text)
-    end_time = time.time()
-    elapsed_time = end_time - start_time
-    print(f"Time report: {2 * i / elapsed_time} tokens/second with {2 * i} tokens taking {elapsed_time} seconds")
-
-if __name__ == "__main__":
-    embedder = Embed4All(n_threads=8)
-    for i in [2**n for n in range(6, 14)]:
-        time_embedding(i, embedder)
--- a/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/tests/test_gpt4all.py
--- a/gpt4all-bindings/python/mkdocs.yml
+++ b/gpt4all-bindings/python/mkdocs.yml
@ -10,9 +10,7 @@ use_directory_urls: false
 nav:
    - 'index.md'
    - 'Bindings':
-      - 'GPT4All in Python':
-        - 'Generation': 'gpt4all_python.md'
-        - 'Embedding': 'gpt4all_python_embedding.md'
+      - 'GPT4All in Python': 'gpt4all_python.md'
      - 'GPT4All Chat Client': 'gpt4all_chat.md'
      - 'gpt4all_cli.md'
 #    - 'Tutorials':
@ -70,4 +68,4 @@ plugins:

  #- mkdocs-jupyter:
  #    ignore_h1_titles: True
-  #    show_input: True
+  #    show_input: True
--- a/gpt4all-bindings/python/setup.py
+++ b/gpt4all-bindings/python/setup.py
@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECtORY,

 setup(
    name=package_name,
-    version="1.0.6",
+    version="1.0.4",
    description="Python bindings for GPT4All",
    author="Richard Guo",
    author_email="richard@nomic.ai",
--- a/gpt4all-chat/responsetext.cpp
+++ b/gpt4all-chat/responsetext.cpp
@ -18,9 +18,6 @@ enum Language {
    Go,
    Json,
    Csharp,
-    Latex,
-    Html,
-    Php
 };

 static QColor keywordColor      = "#2e95d3"; // blue
@ -36,11 +33,6 @@ static QColor commandColor = functionCallColor;
 static QColor variableColor = numberColor;
 static QColor keyColor = functionColor;
 static QColor valueColor = stringColor;
-static QColor parameterColor = stringColor;
-static QColor attributeNameColor = numberColor;
-static QColor attributeValueColor = stringColor;
-static QColor specialCharacterColor = functionColor;
-static QColor doctypeColor = commentColor;

 static Language stringToLanguage(const QString &language)
 {
@ -70,12 +62,6 @@ static Language stringToLanguage(const QString &language)
        return Go;
    if (language == "json")
        return Json;
-    if (language == "latex")
-        return Latex;
-    if (language == "html")
-        return Html;
-    if (language == "php")
-        return Php;
    return None;
 }

@ -575,135 +561,6 @@ static QVector<HighlightingRule> bashHighlightingRules()
    return highlightingRules;
 }

-static QVector<HighlightingRule> latexHighlightingRules()
-{
-    static QVector<HighlightingRule> highlightingRules;
-    if (highlightingRules.isEmpty()) {
-
-        HighlightingRule rule;
-
-        QTextCharFormat commandFormat;
-        commandFormat.setForeground(commandColor); // commandColor needs to be set to your liking
-        rule.pattern = QRegularExpression("\\\\[A-Za-z]+"); // Pattern for LaTeX commands
-        rule.format = commandFormat;
-        highlightingRules.append(rule);
-
-        QTextCharFormat commentFormat;
-        commentFormat.setForeground(commentColor); // commentColor needs to be set to your liking
-        rule.pattern = QRegularExpression("%[^\n]*"); // Pattern for LaTeX comments
-        rule.format = commentFormat;
-        highlightingRules.append(rule);
-    }
-    return highlightingRules;
-}
-
-static QVector<HighlightingRule> htmlHighlightingRules()
-{
-    static QVector<HighlightingRule> highlightingRules;
-    if (highlightingRules.isEmpty()) {
-
-        HighlightingRule rule;
-
-        QTextCharFormat attributeNameFormat;
-        attributeNameFormat.setForeground(attributeNameColor);
-        rule.pattern = QRegularExpression("\\b(\\w+)\\s*=");
-        rule.format = attributeNameFormat;
-        highlightingRules.append(rule);
-
-        QTextCharFormat attributeValueFormat;
-        attributeValueFormat.setForeground(attributeValueColor);
-        rule.pattern = QRegularExpression("\".*?\"|'.*?'");
-        rule.format = attributeValueFormat;
-        highlightingRules.append(rule);
-
-        QTextCharFormat commentFormat;
-        commentFormat.setForeground(commentColor);
-        rule.pattern = QRegularExpression("<!--.*?-->");
-        rule.format = commentFormat;
-        highlightingRules.append(rule);
-
-        QTextCharFormat specialCharacterFormat;
-        specialCharacterFormat.setForeground(specialCharacterColor);
-        rule.pattern = QRegularExpression("&[a-zA-Z0-9#]*;");
-        rule.format = specialCharacterFormat;
-        highlightingRules.append(rule);
-
-        QTextCharFormat doctypeFormat;
-        doctypeFormat.setForeground(doctypeColor);
-        rule.pattern = QRegularExpression("<!DOCTYPE.*?>");
-        rule.format = doctypeFormat;
-        highlightingRules.append(rule);
-    }
-    return highlightingRules;
-}
-
-static QVector<HighlightingRule> phpHighlightingRules()
-{
-    static QVector<HighlightingRule> highlightingRules;
-    if (highlightingRules.isEmpty()) {
-
-        HighlightingRule rule;
-
-        QTextCharFormat functionCallFormat;
-        functionCallFormat.setForeground(functionCallColor);
-        rule.pattern = QRegularExpression("\\b(\\w+)\\s*(?=\\()");
-        rule.format = functionCallFormat;
-        highlightingRules.append(rule);
-
-        QTextCharFormat functionFormat;
-        functionFormat.setForeground(functionColor);
-        rule.pattern = QRegularExpression("\\bfunction\\s+(\\w+)\\b");
-        rule.format = functionFormat;
-        highlightingRules.append(rule);
-
-        QTextCharFormat numberFormat;
-        numberFormat.setForeground(numberColor);
-        rule.pattern = QRegularExpression("\\b[0-9]*\\.?[0-9]+\\b");
-        rule.format = numberFormat;
-        highlightingRules.append(rule);
-
-        QTextCharFormat keywordFormat;
-        keywordFormat.setForeground(keywordColor);
-        QStringList keywordPatterns = {
-            "\\bif\\b", "\\belse\\b", "\\belseif\\b", "\\bwhile\\b", "\\bfor\\b",
-            "\\bforeach\\b", "\\breturn\\b", "\\bprint\\b", "\\binclude\\b", "\\brequire\\b",
-            "\\binclude_once\\b", "\\brequire_once\\b", "\\btry\\b", "\\bcatch\\b",
-            "\\bfinally\\b", "\\bcontinue\\b", "\\bbreak\\b", "\\bclass\\b", "\\bfunction\\b",
-            "\\bnew\\b", "\\bthrow\\b", "\\barray\\b", "\\bpublic\\b", "\\bprivate\\b",
-            "\\bprotected\\b", "\\bstatic\\b", "\\bglobal\\b", "\\bisset\\b", "\\bunset\\b",
-            "\\bnull\\b", "\\btrue\\b", "\\bfalse\\b"
-        };
-
-        for (const QString &pattern : keywordPatterns) {
-            rule.pattern = QRegularExpression(pattern);
-            rule.format = keywordFormat;
-            highlightingRules.append(rule);
-        }
-
-        QTextCharFormat stringFormat;
-        stringFormat.setForeground(stringColor);
-        rule.pattern = QRegularExpression("\".*?\"");
-        rule.format = stringFormat;
-        highlightingRules.append(rule);
-
-        rule.pattern = QRegularExpression("\'.*?\'");
-        rule.format = stringFormat;
-        highlightingRules.append(rule);
-
-        QTextCharFormat commentFormat;
-        commentFormat.setForeground(commentColor);
-        rule.pattern = QRegularExpression("//[^\n]*");
-        rule.format = commentFormat;
-        highlightingRules.append(rule);
-
-        rule.pattern = QRegularExpression("/\\*.*?\\*/");
-        rule.format = commentFormat;
-        highlightingRules.append(rule);
-    }
-    return highlightingRules;
-}
-
-
 static QVector<HighlightingRule> jsonHighlightingRules()
 {
    static QVector<HighlightingRule> highlightingRules;
@ -759,12 +616,6 @@ void SyntaxHighlighter::highlightBlock(const QString &text)
        rules = javaHighlightingRules();
    else if (block.userState() == Json)
        rules = jsonHighlightingRules();
-    else if (block.userState() == Latex)
-        rules = latexHighlightingRules();
-    else if (block.userState() == Html)
-        rules = htmlHighlightingRules();
-    else if (block.userState() == Php)
-        rules = phpHighlightingRules();

    for (const HighlightingRule &rule : qAsConst(rules)) {
        QRegularExpressionMatchIterator matchIterator = rule.pattern.globalMatch(text);
@ -970,10 +821,7 @@ void ResponseText::handleCodeBlocks()
                || firstWord == "java"
                || firstWord == "go"
                || firstWord == "golang"
-                || firstWord == "json"
-                || firstWord == "latex"
-                || firstWord == "html"
-                || firstWord == "php") {
+                || firstWord == "json") {
                codeLanguage = firstWord;
                capturedText.remove(0, match.captured(0).length());
            }