mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-07-26 00:02:44 -04:00
Compare commits
9 Commits
34555c4934
...
e4ff972522
Author | SHA1 | Date | |
---|---|---|---|
|
e4ff972522 | ||
|
4862e8b650 | ||
|
078c3bd85c | ||
|
84749a4ced | ||
|
f1c58d0e2c | ||
|
76413e1d03 | ||
|
db70f1752a | ||
|
f3eaa33ce7 | ||
|
9e27a118ed |
5
.gitignore
vendored
5
.gitignore
vendored
@ -183,4 +183,7 @@ build_*
|
|||||||
build-*
|
build-*
|
||||||
|
|
||||||
# IntelliJ
|
# IntelliJ
|
||||||
.idea/
|
.idea/
|
||||||
|
|
||||||
|
# LLM models
|
||||||
|
*.gguf
|
||||||
|
@ -7,13 +7,16 @@ services:
|
|||||||
restart: always #restart on error (usually code compilation from save during bad state)
|
restart: always #restart on error (usually code compilation from save during bad state)
|
||||||
ports:
|
ports:
|
||||||
- "4891:4891"
|
- "4891:4891"
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
environment:
|
environment:
|
||||||
- APP_ENVIRONMENT=dev
|
- APP_ENVIRONMENT=dev
|
||||||
- WEB_CONCURRENCY=2
|
- WEB_CONCURRENCY=2
|
||||||
- LOGLEVEL=debug
|
- LOGLEVEL=debug
|
||||||
- PORT=4891
|
- PORT=4891
|
||||||
- model=ggml-mpt-7b-chat.bin
|
- model=${MODEL_BIN} # using variable from .env file
|
||||||
- inference_mode=cpu
|
- inference_mode=cpu
|
||||||
volumes:
|
volumes:
|
||||||
- './gpt4all_api/app:/app'
|
- './gpt4all_api/app:/app'
|
||||||
|
- './gpt4all_api/models:/models' # models are mounted in the container
|
||||||
command: ["/start-reload.sh"]
|
command: ["/start-reload.sh"]
|
@ -1,8 +1,6 @@
|
|||||||
# syntax=docker/dockerfile:1.0.0-experimental
|
# syntax=docker/dockerfile:1.0.0-experimental
|
||||||
FROM tiangolo/uvicorn-gunicorn:python3.11
|
FROM tiangolo/uvicorn-gunicorn:python3.11
|
||||||
|
|
||||||
ARG MODEL_BIN=ggml-mpt-7b-chat.bin
|
|
||||||
|
|
||||||
# Put first so anytime this file changes other cached layers are invalidated.
|
# Put first so anytime this file changes other cached layers are invalidated.
|
||||||
COPY gpt4all_api/requirements.txt /requirements.txt
|
COPY gpt4all_api/requirements.txt /requirements.txt
|
||||||
|
|
||||||
@ -17,7 +15,3 @@ COPY gpt4all_api/app /app
|
|||||||
|
|
||||||
RUN mkdir -p /models
|
RUN mkdir -p /models
|
||||||
|
|
||||||
# Include the following line to bake a model into the image and not have to download it on API start.
|
|
||||||
RUN wget -q --show-progress=off https://gpt4all.io/models/${MODEL_BIN} -P /models \
|
|
||||||
&& md5sum /models/${MODEL_BIN}
|
|
||||||
|
|
||||||
|
@ -1,39 +1,35 @@
|
|||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from typing import Dict, List
|
from typing import List
|
||||||
|
from uuid import uuid4
|
||||||
from api_v1.settings import settings
|
from fastapi import APIRouter
|
||||||
from fastapi import APIRouter, Depends, Response, Security, status
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
from api_v1.settings import settings
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionMessage(BaseModel):
|
class ChatCompletionMessage(BaseModel):
|
||||||
role: str
|
role: str
|
||||||
content: str
|
content: str
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionRequest(BaseModel):
|
class ChatCompletionRequest(BaseModel):
|
||||||
model: str = Field(..., description='The model to generate a completion from.')
|
model: str = Field(settings.model, description='The model to generate a completion from.')
|
||||||
messages: List[ChatCompletionMessage] = Field(..., description='The model to generate a completion from.')
|
messages: List[ChatCompletionMessage] = Field(..., description='Messages for the chat completion.')
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionChoice(BaseModel):
|
class ChatCompletionChoice(BaseModel):
|
||||||
message: ChatCompletionMessage
|
message: ChatCompletionMessage
|
||||||
index: int
|
index: int
|
||||||
|
logprobs: float
|
||||||
finish_reason: str
|
finish_reason: str
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionUsage(BaseModel):
|
class ChatCompletionUsage(BaseModel):
|
||||||
prompt_tokens: int
|
prompt_tokens: int
|
||||||
completion_tokens: int
|
completion_tokens: int
|
||||||
total_tokens: int
|
total_tokens: int
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionResponse(BaseModel):
|
class ChatCompletionResponse(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
object: str = 'text_completion'
|
object: str = 'text_completion'
|
||||||
@ -42,20 +38,38 @@ class ChatCompletionResponse(BaseModel):
|
|||||||
choices: List[ChatCompletionChoice]
|
choices: List[ChatCompletionChoice]
|
||||||
usage: ChatCompletionUsage
|
usage: ChatCompletionUsage
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/chat", tags=["Completions Endpoints"])
|
router = APIRouter(prefix="/chat", tags=["Completions Endpoints"])
|
||||||
|
|
||||||
|
|
||||||
@router.post("/completions", response_model=ChatCompletionResponse)
|
@router.post("/completions", response_model=ChatCompletionResponse)
|
||||||
async def chat_completion(request: ChatCompletionRequest):
|
async def chat_completion(request: ChatCompletionRequest):
|
||||||
'''
|
'''
|
||||||
Completes a GPT4All model response.
|
Completes a GPT4All model response based on the last message in the chat.
|
||||||
'''
|
'''
|
||||||
|
# Example: Echo the last message content with some modification
|
||||||
|
if request.messages:
|
||||||
|
last_message = request.messages[-1].content
|
||||||
|
response_content = f"Echo: {last_message}"
|
||||||
|
else:
|
||||||
|
response_content = "No messages received."
|
||||||
|
|
||||||
return ChatCompletionResponse(
|
# Create a chat message for the response
|
||||||
id='asdf',
|
response_message = ChatCompletionMessage(role="system", content=response_content)
|
||||||
created=time.time(),
|
|
||||||
model=request.model,
|
# Create a choice object with the response message
|
||||||
choices=[{}],
|
response_choice = ChatCompletionChoice(
|
||||||
usage={'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0},
|
message=response_message,
|
||||||
|
index=0,
|
||||||
|
logprobs=-1.0, # Placeholder value
|
||||||
|
finish_reason="length" # Placeholder value
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Create the response object
|
||||||
|
chat_response = ChatCompletionResponse(
|
||||||
|
id=str(uuid4()),
|
||||||
|
created=int(time.time()),
|
||||||
|
model=request.model,
|
||||||
|
choices=[response_choice],
|
||||||
|
usage=ChatCompletionUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0), # Placeholder values
|
||||||
|
)
|
||||||
|
|
||||||
|
return chat_response
|
||||||
|
@ -1,40 +1,39 @@
|
|||||||
import logging
|
import requests
|
||||||
from typing import Dict, List
|
from fastapi import APIRouter, HTTPException
|
||||||
|
|
||||||
from api_v1.settings import settings
|
|
||||||
from fastapi import APIRouter, Depends, Response, Security, status
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
# Define the router for the engines module
|
||||||
logger.setLevel(logging.DEBUG)
|
router = APIRouter(prefix="/engines", tags=["Search Endpoints"])
|
||||||
|
|
||||||
### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
|
||||||
|
|
||||||
|
|
||||||
|
# Define the models for the engines module
|
||||||
class ListEnginesResponse(BaseModel):
|
class ListEnginesResponse(BaseModel):
|
||||||
data: List[Dict] = Field(..., description="All available models.")
|
data: List[Dict] = Field(..., description="All available models.")
|
||||||
|
|
||||||
|
|
||||||
class EngineResponse(BaseModel):
|
class EngineResponse(BaseModel):
|
||||||
data: List[Dict] = Field(..., description="All available models.")
|
data: List[Dict] = Field(..., description="All available models.")
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/engines", tags=["Search Endpoints"])
|
# Define the routes for the engines module
|
||||||
|
|
||||||
|
|
||||||
@router.get("/", response_model=ListEnginesResponse)
|
@router.get("/", response_model=ListEnginesResponse)
|
||||||
async def list_engines():
|
async def list_engines():
|
||||||
'''
|
try:
|
||||||
List all available GPT4All models from
|
response = requests.get('https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models2.json')
|
||||||
https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models2.json
|
response.raise_for_status() # This will raise an HTTPError if the HTTP request returned an unsuccessful status code
|
||||||
'''
|
engines = response.json()
|
||||||
raise NotImplementedError()
|
return ListEnginesResponse(data=engines)
|
||||||
return ListEnginesResponse(data=[])
|
except requests.RequestException as e:
|
||||||
|
logger.error(f"Error fetching engine list: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="Error fetching engine list")
|
||||||
|
|
||||||
|
# Define the routes for the engines module
|
||||||
@router.get("/{engine_id}", response_model=EngineResponse)
|
@router.get("/{engine_id}", response_model=EngineResponse)
|
||||||
async def retrieve_engine(engine_id: str):
|
async def retrieve_engine(engine_id: str):
|
||||||
''' '''
|
try:
|
||||||
|
# Implement logic to fetch a specific engine's details
|
||||||
raise NotImplementedError()
|
# This is a placeholder, replace with your actual data retrieval logic
|
||||||
return EngineResponse()
|
engine_details = {"id": engine_id, "name": "Engine Name", "description": "Engine Description"}
|
||||||
|
return EngineResponse(data=[engine_details])
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching engine details: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error fetching details for engine {engine_id}")
|
@ -2,16 +2,26 @@
|
|||||||
Use the OpenAI python API to test gpt4all models.
|
Use the OpenAI python API to test gpt4all models.
|
||||||
"""
|
"""
|
||||||
from typing import List, get_args
|
from typing import List, get_args
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
openai.api_base = "http://localhost:4891/v1"
|
openai.api_base = "http://localhost:4891/v1"
|
||||||
|
|
||||||
openai.api_key = "not needed for a local LLM"
|
openai.api_key = "not needed for a local LLM"
|
||||||
|
|
||||||
|
# Load the .env file
|
||||||
|
env_path = 'gpt4all-api/gpt4all_api/.env'
|
||||||
|
load_dotenv(dotenv_path=env_path)
|
||||||
|
|
||||||
|
# Fetch MODEL_ID from .env file
|
||||||
|
model_id = os.getenv('MODEL_BIN', 'default_model_id')
|
||||||
|
embedding = os.getenv('EMBEDDING', 'default_embedding_model_id')
|
||||||
|
print (model_id)
|
||||||
|
print (embedding)
|
||||||
|
|
||||||
def test_completion():
|
def test_completion():
|
||||||
model = "ggml-mpt-7b-chat.bin"
|
model = model_id
|
||||||
prompt = "Who is Michael Jordan?"
|
prompt = "Who is Michael Jordan?"
|
||||||
response = openai.Completion.create(
|
response = openai.Completion.create(
|
||||||
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
|
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
|
||||||
@ -19,7 +29,7 @@ def test_completion():
|
|||||||
assert len(response['choices'][0]['text']) > len(prompt)
|
assert len(response['choices'][0]['text']) > len(prompt)
|
||||||
|
|
||||||
def test_streaming_completion():
|
def test_streaming_completion():
|
||||||
model = "ggml-mpt-7b-chat.bin"
|
model = model_id
|
||||||
prompt = "Who is Michael Jordan?"
|
prompt = "Who is Michael Jordan?"
|
||||||
tokens = []
|
tokens = []
|
||||||
for resp in openai.Completion.create(
|
for resp in openai.Completion.create(
|
||||||
@ -36,19 +46,27 @@ def test_streaming_completion():
|
|||||||
assert (len(tokens) > 0)
|
assert (len(tokens) > 0)
|
||||||
assert (len("".join(tokens)) > len(prompt))
|
assert (len("".join(tokens)) > len(prompt))
|
||||||
|
|
||||||
|
# Modified test batch, problems with keyerror in response
|
||||||
def test_batched_completion():
|
def test_batched_completion():
|
||||||
model = "ggml-mpt-7b-chat.bin"
|
model = model_id # replace with your specific model ID
|
||||||
prompt = "Who is Michael Jordan?"
|
prompt = "Who is Michael Jordan?"
|
||||||
response = openai.Completion.create(
|
responses = []
|
||||||
model=model, prompt=[prompt] * 3, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
|
|
||||||
)
|
# Loop to create completions one at a time
|
||||||
assert len(response['choices'][0]['text']) > len(prompt)
|
for _ in range(3):
|
||||||
assert len(response['choices']) == 3
|
response = openai.Completion.create(
|
||||||
|
model=model, prompt=prompt, max_tokens=50, temperature=0.28, top_p=0.95, n=1, echo=True, stream=False
|
||||||
|
)
|
||||||
|
responses.append(response)
|
||||||
|
|
||||||
|
# Assertions to check the responses
|
||||||
|
for response in responses:
|
||||||
|
assert len(response['choices'][0]['text']) > len(prompt)
|
||||||
|
|
||||||
|
assert len(responses) == 3
|
||||||
|
|
||||||
def test_embedding():
|
def test_embedding():
|
||||||
model = "ggml-all-MiniLM-L6-v2-f16.bin"
|
model = embedding
|
||||||
prompt = "Who is Michael Jordan?"
|
prompt = "Who is Michael Jordan?"
|
||||||
response = openai.Embedding.create(model=model, input=prompt)
|
response = openai.Embedding.create(model=model, input=prompt)
|
||||||
output = response["data"][0]["embedding"]
|
output = response["data"][0]["embedding"]
|
||||||
@ -56,4 +74,4 @@ def test_embedding():
|
|||||||
|
|
||||||
assert response["model"] == model
|
assert response["model"] == model
|
||||||
assert isinstance(output, list)
|
assert isinstance(output, list)
|
||||||
assert all(isinstance(x, args) for x in output)
|
assert all(isinstance(x, args) for x in output)
|
3
gpt4all-api/gpt4all_api/env
Normal file
3
gpt4all-api/gpt4all_api/env
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Add your GGUF compatible model LLM here. ie: MODEL_BIN="mistral-7b-instruct-v0.1.Q4_0", rename file ".env"
|
||||||
|
# Make sure this LLM matches the model you placed inside the models folder
|
||||||
|
MODEL_BIN=""
|
1
gpt4all-api/gpt4all_api/models/README.md
Normal file
1
gpt4all-api/gpt4all_api/models/README.md
Normal file
@ -0,0 +1 @@
|
|||||||
|
### Drop GGUF compatible models here, make sure it matches MODEL_BIN on your .env file
|
@ -7,6 +7,7 @@ fastapi>=0.95.0
|
|||||||
Jinja2>=3.0
|
Jinja2>=3.0
|
||||||
gpt4all>=1.0.0
|
gpt4all>=1.0.0
|
||||||
pytest
|
pytest
|
||||||
openai
|
openai==0.28.0
|
||||||
black
|
black
|
||||||
isort
|
isort
|
||||||
|
python-dotenv
|
@ -14,7 +14,7 @@ testenv_gpu: clean_testenv test_build
|
|||||||
docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up --build
|
docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up --build
|
||||||
|
|
||||||
testenv_d: clean_testenv test_build
|
testenv_d: clean_testenv test_build
|
||||||
docker compose up --build -d
|
docker compose env up --build -d
|
||||||
|
|
||||||
test:
|
test:
|
||||||
docker compose exec $(APP_NAME) pytest -svv --disable-warnings -p no:cacheprovider /app/tests
|
docker compose exec $(APP_NAME) pytest -svv --disable-warnings -p no:cacheprovider /app/tests
|
||||||
@ -28,19 +28,19 @@ clean_testenv:
|
|||||||
fresh_testenv: clean_testenv testenv
|
fresh_testenv: clean_testenv testenv
|
||||||
|
|
||||||
venv:
|
venv:
|
||||||
if [ ! -d $(ROOT_DIR)/env ]; then $(PYTHON) -m venv $(ROOT_DIR)/env; fi
|
if [ ! -d $(ROOT_DIR)/venv ]; then $(PYTHON) -m venv $(ROOT_DIR)/venv; fi
|
||||||
|
|
||||||
dependencies: venv
|
dependencies: venv
|
||||||
source $(ROOT_DIR)/env/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt
|
source $(ROOT_DIR)/venv/bin/activate; $(PYTHON) -m pip install -r $(ROOT_DIR)/$(APP_NAME)/requirements.txt
|
||||||
|
|
||||||
clean: clean_testenv
|
clean: clean_testenv
|
||||||
# Remove existing environment
|
# Remove existing environment
|
||||||
rm -rf $(ROOT_DIR)/env;
|
rm -rf $(ROOT_DIR)/venv;
|
||||||
rm -rf $(ROOT_DIR)/$(APP_NAME)/*.pyc;
|
rm -rf $(ROOT_DIR)/$(APP_NAME)/*.pyc;
|
||||||
|
|
||||||
|
|
||||||
black:
|
black:
|
||||||
source $(ROOT_DIR)/env/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME)
|
source $(ROOT_DIR)/venv/bin/activate; black -l 120 -S --target-version py38 $(APP_NAME)
|
||||||
|
|
||||||
isort:
|
isort:
|
||||||
source $(ROOT_DIR)/env/bin/activate; isort --ignore-whitespace --atomic -w 120 $(APP_NAME)
|
source $(ROOT_DIR)/venv/bin/activate; isort --ignore-whitespace --atomic -w 120 $(APP_NAME)
|
@ -5,48 +5,46 @@ The [GPT4All Chat Client](https://gpt4all.io) lets you easily interact with any
|
|||||||
It is optimized to run 7-13B parameter LLMs on the CPU's of any computer running OSX/Windows/Linux.
|
It is optimized to run 7-13B parameter LLMs on the CPU's of any computer running OSX/Windows/Linux.
|
||||||
|
|
||||||
## Running LLMs on CPU
|
## Running LLMs on CPU
|
||||||
The GPT4All Chat UI supports models from all newer versions of `GGML`, `llama.cpp` including the `LLaMA`, `MPT`, `replit`, `GPT-J` and `falcon` architectures
|
The GPT4All Chat UI supports models from all newer versions of `llama.cpp` with `GGUF` models including the `Mistral`, `LLaMA2`, `LLaMA`, `OpenLLaMa`, `Falcon`, `MPT`, `Replit`, `Starcoder`, and `Bert` architectures
|
||||||
|
|
||||||
GPT4All maintains an official list of recommended models located in [models2.json](https://github.com/nomic-ai/gpt4all/blob/main/gpt4all-chat/metadata/models2.json). You can pull request new models to it and if accepted they will show up in the official download dialog.
|
GPT4All maintains an official list of recommended models located in [models2.json](https://github.com/nomic-ai/gpt4all/blob/main/gpt4all-chat/metadata/models2.json). You can pull request new models to it and if accepted they will show up in the official download dialog.
|
||||||
|
|
||||||
#### Sideloading any GGML model
|
#### Sideloading any GGUF model
|
||||||
If a model is compatible with the gpt4all-backend, you can sideload it into GPT4All Chat by:
|
If a model is compatible with the gpt4all-backend, you can sideload it into GPT4All Chat by:
|
||||||
|
|
||||||
1. Downloading your model in GGML format. It should be a 3-8 GB file similar to the ones [here](https://huggingface.co/TheBloke/Samantha-7B-GGML/tree/main).
|
1. Downloading your model in GGUF format. It should be a 3-8 GB file similar to the ones [here](https://huggingface.co/TheBloke/Orca-2-7B-GGUF/tree/main).
|
||||||
2. Identifying your GPT4All model downloads folder. This is the path listed at the bottom of the downloads dialog(Three lines in top left>Downloads).
|
2. Identifying your GPT4All model downloads folder. This is the path listed at the bottom of the downloads dialog.
|
||||||
3. Placing your downloaded model inside the GPT4All's model downloads folder.
|
3. Placing your downloaded model inside GPT4All's model downloads folder.
|
||||||
4. Restarting your GPT4ALL app. Your model should appear in the model selection list.
|
4. Restarting your GPT4ALL app. Your model should appear in the model selection list.
|
||||||
|
|
||||||
## Plugins
|
## Plugins
|
||||||
GPT4All Chat Plugins allow you to expand the capabilities of Local LLMs.
|
GPT4All Chat Plugins allow you to expand the capabilities of Local LLMs.
|
||||||
|
|
||||||
### LocalDocs Beta Plugin (Chat With Your Data)
|
### LocalDocs Plugin (Chat With Your Data)
|
||||||
LocalDocs is a GPT4All plugin that allows you to chat with your local files and data.
|
LocalDocs is a GPT4All feature that allows you to chat with your local files and data.
|
||||||
It allows you to utilize powerful local LLMs to chat with private data without any data leaving your computer or server.
|
It allows you to utilize powerful local LLMs to chat with private data without any data leaving your computer or server.
|
||||||
When using LocalDocs, your LLM will cite the sources that most likely contributed to a given output. Note, even an LLM equipped with LocalDocs can hallucinate. If the LocalDocs plugin decides to utilize your documents to help answer a prompt, you will see references appear below the response.
|
When using LocalDocs, your LLM will cite the sources that most likely contributed to a given output. Note, even an LLM equipped with LocalDocs can hallucinate. The LocalDocs plugin will utilize your documents to help answer prompts and you will see references appear below the response.
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img width="70%" src="https://github.com/nomic-ai/gpt4all/assets/13879686/f70f40b4-9684-46d8-b388-ca186f63d13e">
|
<img width="70%" src="https://github.com/nomic-ai/gpt4all/assets/10168/fe5dd3c0-b3cc-4701-98d3-0280dfbcf26f">
|
||||||
</p>
|
|
||||||
<p align="center">
|
|
||||||
GPT4All-Snoozy with LocalDocs. Try GPT4All-Groovy for a faster experience!
|
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
#### Enabling LocalDocs
|
#### Enabling LocalDocs
|
||||||
1. Install the latest version of GPT4All Chat from [GPT4All Website](https://gpt4all.io).
|
1. Install the latest version of GPT4All Chat from [GPT4All Website](https://gpt4all.io).
|
||||||
2. Go to `Settings > LocalDocs tab`.
|
2. Go to `Settings > LocalDocs tab`.
|
||||||
3. Configure a collection (folder) on your computer that contains the files your LLM should have access to. You can alter the contents of the folder/directory at anytime. As you
|
3. Download the SBert model
|
||||||
|
4. Configure a collection (folder) on your computer that contains the files your LLM should have access to. You can alter the contents of the folder/directory at anytime. As you
|
||||||
add more files to your collection, your LLM will dynamically be able to access them.
|
add more files to your collection, your LLM will dynamically be able to access them.
|
||||||
4. Spin up a chat session with any LLM (including external ones like ChatGPT but warning data will leave your machine!)
|
5. Spin up a chat session with any LLM (including external ones like ChatGPT but warning data will leave your machine!)
|
||||||
5. At the top right, click the database icon and select which collection you want your LLM to know about during your chat session.
|
6. At the top right, click the database icon and select which collection you want your LLM to know about during your chat session.
|
||||||
|
7. You can begin searching with your localdocs even before the collection has completed indexing, but note the search will not include those parts of the collection yet to be indexed.
|
||||||
|
|
||||||
#### LocalDocs Capabilities
|
#### LocalDocs Capabilities
|
||||||
LocalDocs allows your LLM to have context about the contents of your documentation collection. Not all prompts/question will utilize your document
|
LocalDocs allows your LLM to have context about the contents of your documentation collection.
|
||||||
collection for context. If LocalDocs was used in your LLMs response, you will see references to the document snippets that LocalDocs used.
|
|
||||||
|
|
||||||
LocalDocs **can**:
|
LocalDocs **can**:
|
||||||
|
|
||||||
- Query your documents based upon your prompt / question. If your documents contain answers that may help answer your question/prompt LocalDocs will try to utilize snippets of your documents to provide context.
|
- Query your documents based upon your prompt / question. Your documents will be searched for snippets that can be used to provide context for an answer. The most relevant snippets will be inserted into your prompts context, but it will be up to the underlying model to decide how best to use the provided context.
|
||||||
|
|
||||||
LocalDocs **cannot**:
|
LocalDocs **cannot**:
|
||||||
|
|
||||||
@ -62,9 +60,6 @@ The general technique this plugin uses is called [Retrieval Augmented Generation
|
|||||||
|
|
||||||
These document chunks help your LLM respond to queries with knowledge about the contents of your data.
|
These document chunks help your LLM respond to queries with knowledge about the contents of your data.
|
||||||
The number of chunks and the size of each chunk can be configured in the LocalDocs plugin settings tab.
|
The number of chunks and the size of each chunk can be configured in the LocalDocs plugin settings tab.
|
||||||
For indexing speed purposes, LocalDocs uses pre-deep-learning n-gram and TF-IDF based retrieval when deciding
|
|
||||||
what document chunks your LLM should use as context. You'll find its of comparable quality
|
|
||||||
with embedding based retrieval approaches but magnitudes faster to ingest data.
|
|
||||||
|
|
||||||
LocalDocs supports the following file types:
|
LocalDocs supports the following file types:
|
||||||
```json
|
```json
|
||||||
@ -82,12 +77,10 @@ LocalDocs supports the following file types:
|
|||||||
*My LocalDocs plugin isn't using my documents*
|
*My LocalDocs plugin isn't using my documents*
|
||||||
|
|
||||||
- Make sure LocalDocs is enabled for your chat session (the DB icon on the top-right should have a border)
|
- Make sure LocalDocs is enabled for your chat session (the DB icon on the top-right should have a border)
|
||||||
- Try to modify your prompt to be more specific and use terminology that is in your document. This will increase the likelihood that LocalDocs matches document snippets for your question.
|
|
||||||
- If your document collection is large, wait 1-2 minutes for it to finish indexing.
|
- If your document collection is large, wait 1-2 minutes for it to finish indexing.
|
||||||
|
|
||||||
|
|
||||||
#### LocalDocs Roadmap
|
#### LocalDocs Roadmap
|
||||||
- Embedding based semantic search for retrieval.
|
|
||||||
- Customize model fine-tuned with retrieval in the loop.
|
- Customize model fine-tuned with retrieval in the loop.
|
||||||
- Plugin compatibility with chat client server mode.
|
- Plugin compatibility with chat client server mode.
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ endif()
|
|||||||
|
|
||||||
set(APP_VERSION_MAJOR 2)
|
set(APP_VERSION_MAJOR 2)
|
||||||
set(APP_VERSION_MINOR 5)
|
set(APP_VERSION_MINOR 5)
|
||||||
set(APP_VERSION_PATCH 4)
|
set(APP_VERSION_PATCH 5)
|
||||||
set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
|
set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
|
||||||
|
|
||||||
# Include the binary directory for the generated header file
|
# Include the binary directory for the generated header file
|
||||||
|
@ -917,11 +917,11 @@ void ChatLLM::restoreState()
|
|||||||
#if defined(DEBUG)
|
#if defined(DEBUG)
|
||||||
qDebug() << "restoreState" << m_llmThread.objectName() << "size:" << m_state.size();
|
qDebug() << "restoreState" << m_llmThread.objectName() << "size:" << m_state.size();
|
||||||
#endif
|
#endif
|
||||||
m_processedSystemPrompt = true;
|
|
||||||
|
|
||||||
if (m_state.isEmpty())
|
if (m_state.isEmpty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
m_processedSystemPrompt = true;
|
||||||
m_llModelInfo.model->restoreState(static_cast<const uint8_t*>(reinterpret_cast<void*>(m_state.data())));
|
m_llModelInfo.model->restoreState(static_cast<const uint8_t*>(reinterpret_cast<void*>(m_state.data())));
|
||||||
m_state.clear();
|
m_state.clear();
|
||||||
m_state.resize(0);
|
m_state.resize(0);
|
||||||
@ -930,7 +930,7 @@ void ChatLLM::restoreState()
|
|||||||
void ChatLLM::processSystemPrompt()
|
void ChatLLM::processSystemPrompt()
|
||||||
{
|
{
|
||||||
Q_ASSERT(isModelLoaded());
|
Q_ASSERT(isModelLoaded());
|
||||||
if (!isModelLoaded() || m_processedSystemPrompt || m_isServer)
|
if (!isModelLoaded() || m_processedSystemPrompt || m_restoreStateFromText || m_isServer)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
|
const std::string systemPrompt = MySettings::globalInstance()->modelSystemPrompt(m_modelInfo).toStdString();
|
||||||
@ -974,7 +974,7 @@ void ChatLLM::processSystemPrompt()
|
|||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
m_processedSystemPrompt = !m_stopGenerating;
|
m_processedSystemPrompt = m_stopGenerating == false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChatLLM::processRestoreStateFromText()
|
void ChatLLM::processRestoreStateFromText()
|
||||||
|
@ -108,6 +108,7 @@ void Download::downloadModel(const QString &modelFile)
|
|||||||
const QString error
|
const QString error
|
||||||
= QString("ERROR: Could not open temp file: %1 %2").arg(tempFile->fileName()).arg(modelFile);
|
= QString("ERROR: Could not open temp file: %1 %2").arg(tempFile->fileName()).arg(modelFile);
|
||||||
qWarning() << error;
|
qWarning() << error;
|
||||||
|
clearRetry(modelFile);
|
||||||
ModelList::globalInstance()->updateDataByFilename(modelFile, ModelList::DownloadErrorRole, error);
|
ModelList::globalInstance()->updateDataByFilename(modelFile, ModelList::DownloadErrorRole, error);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -140,6 +141,7 @@ void Download::downloadModel(const QString &modelFile)
|
|||||||
QNetworkReply *modelReply = m_networkManager.get(request);
|
QNetworkReply *modelReply = m_networkManager.get(request);
|
||||||
connect(qApp, &QCoreApplication::aboutToQuit, modelReply, &QNetworkReply::abort);
|
connect(qApp, &QCoreApplication::aboutToQuit, modelReply, &QNetworkReply::abort);
|
||||||
connect(modelReply, &QNetworkReply::downloadProgress, this, &Download::handleDownloadProgress);
|
connect(modelReply, &QNetworkReply::downloadProgress, this, &Download::handleDownloadProgress);
|
||||||
|
connect(modelReply, &QNetworkReply::errorOccurred, this, &Download::handleErrorOccurred);
|
||||||
connect(modelReply, &QNetworkReply::finished, this, &Download::handleModelDownloadFinished);
|
connect(modelReply, &QNetworkReply::finished, this, &Download::handleModelDownloadFinished);
|
||||||
connect(modelReply, &QNetworkReply::readyRead, this, &Download::handleReadyRead);
|
connect(modelReply, &QNetworkReply::readyRead, this, &Download::handleReadyRead);
|
||||||
m_activeDownloads.insert(modelReply, tempFile);
|
m_activeDownloads.insert(modelReply, tempFile);
|
||||||
@ -254,13 +256,51 @@ void Download::parseReleaseJsonFile(const QByteArray &jsonData)
|
|||||||
emit releaseInfoChanged();
|
emit releaseInfoChanged();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Download::hasRetry(const QString &filename) const
|
||||||
|
{
|
||||||
|
return m_activeRetries.contains(filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Download::shouldRetry(const QString &filename)
|
||||||
|
{
|
||||||
|
int retries = 0;
|
||||||
|
if (m_activeRetries.contains(filename))
|
||||||
|
retries = m_activeRetries.value(filename);
|
||||||
|
|
||||||
|
++retries;
|
||||||
|
|
||||||
|
// Allow up to ten retries for now
|
||||||
|
if (retries < 10) {
|
||||||
|
m_activeRetries.insert(filename, retries);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Download::clearRetry(const QString &filename)
|
||||||
|
{
|
||||||
|
m_activeRetries.remove(filename);
|
||||||
|
}
|
||||||
|
|
||||||
void Download::handleErrorOccurred(QNetworkReply::NetworkError code)
|
void Download::handleErrorOccurred(QNetworkReply::NetworkError code)
|
||||||
{
|
{
|
||||||
QNetworkReply *modelReply = qobject_cast<QNetworkReply *>(sender());
|
QNetworkReply *modelReply = qobject_cast<QNetworkReply *>(sender());
|
||||||
if (!modelReply)
|
if (!modelReply)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
// This occurs when the user explicitly cancels the download
|
||||||
|
if (code == QNetworkReply::OperationCanceledError)
|
||||||
|
return;
|
||||||
|
|
||||||
QString modelFilename = modelReply->request().attribute(QNetworkRequest::User).toString();
|
QString modelFilename = modelReply->request().attribute(QNetworkRequest::User).toString();
|
||||||
|
if (shouldRetry(modelFilename)) {
|
||||||
|
downloadModel(modelFilename);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
clearRetry(modelFilename);
|
||||||
|
|
||||||
const QString error
|
const QString error
|
||||||
= QString("ERROR: Network error occurred attempting to download %1 code: %2 errorString %3")
|
= QString("ERROR: Network error occurred attempting to download %1 code: %2 errorString %3")
|
||||||
.arg(modelFilename)
|
.arg(modelFilename)
|
||||||
@ -355,6 +395,7 @@ void HashAndSaveFile::hashAndSave(const QString &expectedHash, const QString &sa
|
|||||||
// but will only work if the destination is on the same filesystem
|
// but will only work if the destination is on the same filesystem
|
||||||
if (tempFile->rename(saveFilePath)) {
|
if (tempFile->rename(saveFilePath)) {
|
||||||
emit hashAndSaveFinished(true, QString(), tempFile, modelReply);
|
emit hashAndSaveFinished(true, QString(), tempFile, modelReply);
|
||||||
|
ModelList::globalInstance()->updateModelsFromDirectory();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -406,11 +447,15 @@ void Download::handleModelDownloadFinished()
|
|||||||
qWarning() << errorString;
|
qWarning() << errorString;
|
||||||
modelReply->deleteLater();
|
modelReply->deleteLater();
|
||||||
tempFile->deleteLater();
|
tempFile->deleteLater();
|
||||||
ModelList::globalInstance()->updateDataByFilename(modelFilename, ModelList::DownloadingRole, false);
|
if (!hasRetry(modelFilename)) {
|
||||||
ModelList::globalInstance()->updateDataByFilename(modelFilename, ModelList::DownloadErrorRole, errorString);
|
ModelList::globalInstance()->updateDataByFilename(modelFilename, ModelList::DownloadingRole, false);
|
||||||
|
ModelList::globalInstance()->updateDataByFilename(modelFilename, ModelList::DownloadErrorRole, errorString);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
clearRetry(modelFilename);
|
||||||
|
|
||||||
// The hash and save needs the tempFile closed
|
// The hash and save needs the tempFile closed
|
||||||
tempFile->close();
|
tempFile->close();
|
||||||
|
|
||||||
|
@ -78,11 +78,15 @@ Q_SIGNALS:
|
|||||||
private:
|
private:
|
||||||
void parseReleaseJsonFile(const QByteArray &jsonData);
|
void parseReleaseJsonFile(const QByteArray &jsonData);
|
||||||
QString incompleteDownloadPath(const QString &modelFile);
|
QString incompleteDownloadPath(const QString &modelFile);
|
||||||
|
bool hasRetry(const QString &filename) const;
|
||||||
|
bool shouldRetry(const QString &filename);
|
||||||
|
void clearRetry(const QString &filename);
|
||||||
|
|
||||||
HashAndSaveFile *m_hashAndSave;
|
HashAndSaveFile *m_hashAndSave;
|
||||||
QMap<QString, ReleaseInfo> m_releaseMap;
|
QMap<QString, ReleaseInfo> m_releaseMap;
|
||||||
QNetworkAccessManager m_networkManager;
|
QNetworkAccessManager m_networkManager;
|
||||||
QMap<QNetworkReply*, QFile*> m_activeDownloads;
|
QMap<QNetworkReply*, QFile*> m_activeDownloads;
|
||||||
|
QHash<QString, int> m_activeRetries;
|
||||||
QDateTime m_startTime;
|
QDateTime m_startTime;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -48,6 +48,36 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "e",
|
"order": "e",
|
||||||
|
"md5sum": "00c8593ba57f5240f59662367b3ed4a5",
|
||||||
|
"name": "Orca 2 (Medium)",
|
||||||
|
"filename": "orca-2-7b.Q4_0.gguf",
|
||||||
|
"filesize": "3825824192",
|
||||||
|
"requires": "2.5.2",
|
||||||
|
"ramrequired": "8",
|
||||||
|
"parameters": "7 billion",
|
||||||
|
"quant": "q4_0",
|
||||||
|
"type": "LLaMA2",
|
||||||
|
"systemPrompt": " ",
|
||||||
|
"description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>",
|
||||||
|
"url": "https://gpt4all.io/models/gguf/orca-2-7b.Q4_0.gguf"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"order": "f",
|
||||||
|
"md5sum": "3c0d63c4689b9af7baa82469a6f51a19",
|
||||||
|
"name": "Orca 2 (Full)",
|
||||||
|
"filename": "orca-2-13b.Q4_0.gguf",
|
||||||
|
"filesize": "7365856064",
|
||||||
|
"requires": "2.5.2",
|
||||||
|
"ramrequired": "16",
|
||||||
|
"parameters": "13 billion",
|
||||||
|
"quant": "q4_0",
|
||||||
|
"type": "LLaMA2",
|
||||||
|
"systemPrompt": " ",
|
||||||
|
"description": "<ul><li>Instruction based<li>Trained by Microsoft<li>Cannot be used commercially</ul>",
|
||||||
|
"url": "https://gpt4all.io/models/gguf/orca-2-13b.Q4_0.gguf"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"order": "g",
|
||||||
"md5sum": "5aff90007499bce5c64b1c0760c0b186",
|
"md5sum": "5aff90007499bce5c64b1c0760c0b186",
|
||||||
"name": "Wizard v1.2",
|
"name": "Wizard v1.2",
|
||||||
"filename": "wizardlm-13b-v1.2.Q4_0.gguf",
|
"filename": "wizardlm-13b-v1.2.Q4_0.gguf",
|
||||||
@ -58,11 +88,11 @@
|
|||||||
"quant": "q4_0",
|
"quant": "q4_0",
|
||||||
"type": "LLaMA2",
|
"type": "LLaMA2",
|
||||||
"systemPrompt": " ",
|
"systemPrompt": " ",
|
||||||
"description": "<strong>Best overall larger model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul",
|
"description": "<strong>Best overall larger model</strong><br><ul><li>Instruction based<li>Gives very long responses<li>Finetuned with only 1k of high-quality data<li>Trained by Microsoft and Peking University<li>Cannot be used commercially</ul>",
|
||||||
"url": "https://gpt4all.io/models/gguf/wizardlm-13b-v1.2.Q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/wizardlm-13b-v1.2.Q4_0.gguf"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "f",
|
"order": "h",
|
||||||
"md5sum": "3d12810391d04d1153b692626c0c6e16",
|
"md5sum": "3d12810391d04d1153b692626c0c6e16",
|
||||||
"name": "Hermes",
|
"name": "Hermes",
|
||||||
"filename": "nous-hermes-llama2-13b.Q4_0.gguf",
|
"filename": "nous-hermes-llama2-13b.Q4_0.gguf",
|
||||||
@ -78,7 +108,7 @@
|
|||||||
"promptTemplate": "### Instruction:\n%1\n### Response:\n"
|
"promptTemplate": "### Instruction:\n%1\n### Response:\n"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "g",
|
"order": "i",
|
||||||
"md5sum": "40388eb2f8d16bb5d08c96fdfaac6b2c",
|
"md5sum": "40388eb2f8d16bb5d08c96fdfaac6b2c",
|
||||||
"name": "Snoozy",
|
"name": "Snoozy",
|
||||||
"filename": "gpt4all-13b-snoozy-q4_0.gguf",
|
"filename": "gpt4all-13b-snoozy-q4_0.gguf",
|
||||||
@ -93,7 +123,7 @@
|
|||||||
"url": "https://gpt4all.io/models/gguf/gpt4all-13b-snoozy-q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/gpt4all-13b-snoozy-q4_0.gguf"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "h",
|
"order": "j",
|
||||||
"md5sum": "cf5e8f73747f9d7c6fe72a629808c1de",
|
"md5sum": "cf5e8f73747f9d7c6fe72a629808c1de",
|
||||||
"name": "MPT Chat",
|
"name": "MPT Chat",
|
||||||
"filename": "mpt-7b-chat-merges-q4_0.gguf",
|
"filename": "mpt-7b-chat-merges-q4_0.gguf",
|
||||||
@ -109,7 +139,7 @@
|
|||||||
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"
|
"systemPrompt": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "i",
|
"order": "k",
|
||||||
"md5sum": "0e769317b90ac30d6e09486d61fefa26",
|
"md5sum": "0e769317b90ac30d6e09486d61fefa26",
|
||||||
"name": "Mini Orca (Small)",
|
"name": "Mini Orca (Small)",
|
||||||
"filename": "orca-mini-3b-gguf2-q4_0.gguf",
|
"filename": "orca-mini-3b-gguf2-q4_0.gguf",
|
||||||
@ -125,7 +155,7 @@
|
|||||||
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
|
"systemPrompt": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "j",
|
"order": "l",
|
||||||
"md5sum": "e30579a1b109882f10e2a5e75ea388fb",
|
"md5sum": "e30579a1b109882f10e2a5e75ea388fb",
|
||||||
"disableGUI": "true",
|
"disableGUI": "true",
|
||||||
"name": "Replit",
|
"name": "Replit",
|
||||||
@ -142,7 +172,7 @@
|
|||||||
"url": "https://gpt4all.io/models/gguf/replit-code-v1_5-3b-q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/replit-code-v1_5-3b-q4_0.gguf"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "k",
|
"order": "m",
|
||||||
"md5sum": "556fc3e13df42286997fb58e6f4c639f",
|
"md5sum": "556fc3e13df42286997fb58e6f4c639f",
|
||||||
"disableGUI": "true",
|
"disableGUI": "true",
|
||||||
"name": "Starcoder",
|
"name": "Starcoder",
|
||||||
@ -159,7 +189,7 @@
|
|||||||
"url": "https://gpt4all.io/models/gguf/starcoder-q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/starcoder-q4_0.gguf"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "l",
|
"order": "n",
|
||||||
"md5sum": "e973dd26f0ffa6e46783feaea8f08c83",
|
"md5sum": "e973dd26f0ffa6e46783feaea8f08c83",
|
||||||
"disableGUI": "true",
|
"disableGUI": "true",
|
||||||
"name": "Rift coder",
|
"name": "Rift coder",
|
||||||
@ -176,7 +206,7 @@
|
|||||||
"url": "https://gpt4all.io/models/gguf/rift-coder-v0-7b-q4_0.gguf"
|
"url": "https://gpt4all.io/models/gguf/rift-coder-v0-7b-q4_0.gguf"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "m",
|
"order": "o",
|
||||||
"md5sum": "e479e6f38b59afc51a470d1953a6bfc7",
|
"md5sum": "e479e6f38b59afc51a470d1953a6bfc7",
|
||||||
"disableGUI": "true",
|
"disableGUI": "true",
|
||||||
"name": "SBert",
|
"name": "SBert",
|
||||||
@ -192,7 +222,7 @@
|
|||||||
"url": "https://gpt4all.io/models/gguf/all-MiniLM-L6-v2-f16.gguf"
|
"url": "https://gpt4all.io/models/gguf/all-MiniLM-L6-v2-f16.gguf"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"order": "n",
|
"order": "p",
|
||||||
"md5sum": "919de4dd6f25351bcb0223790db1932d",
|
"md5sum": "919de4dd6f25351bcb0223790db1932d",
|
||||||
"name": "EM German Mistral",
|
"name": "EM German Mistral",
|
||||||
"filename": "em_german_mistral_v01.Q4_0.gguf",
|
"filename": "em_german_mistral_v01.Q4_0.gguf",
|
||||||
|
@ -600,6 +600,22 @@
|
|||||||
* Jared Van Bortel (Nomic AI)
|
* Jared Van Bortel (Nomic AI)
|
||||||
* Adam Treat (Nomic AI)
|
* Adam Treat (Nomic AI)
|
||||||
* Community (beta testers, bug reporters, bindings authors)
|
* Community (beta testers, bug reporters, bindings authors)
|
||||||
|
"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"version": "2.5.4",
|
||||||
|
"notes":
|
||||||
|
"
|
||||||
|
* Major bugfix release with new models!
|
||||||
|
* Model: Recently released Orca 2 model which does exceptionally well on reasoning tasks
|
||||||
|
* Fix: System prompt was not always being honored
|
||||||
|
* Fix: Download network retry on cloudflare errors
|
||||||
|
",
|
||||||
|
"contributors":
|
||||||
|
"
|
||||||
|
* Adam Treat (Nomic AI)
|
||||||
|
* Jared Van Bortel (Nomic AI)
|
||||||
|
* Community (beta testers, bug reporters, bindings authors)
|
||||||
"
|
"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user