Module gguf_modeldb.db
Expand source code
import bs4
import requests
from typing import Union, Optional
from util_helper.file_handler import create_dir, list_files_in_dir, copy_large_file, get_absolute_path
from util_helper.compare_strings import compare_two_strings
from .model_data import ModelData
from .db_settings import VERIFIED_MODELS_DB_DIR
class ModelDB:
"""Class for managing a database of ModelData objects.
Handles loading models from a directory, searching, adding new models,
and interfacing with HuggingFace to import models.
Attributes:
gguf_db_dir (str): Path to directory containing ModelData files
models (List[ModelData]): List of ModelData objects
"""
def __init__(self, model_db_dir:Optional[str]=None, copy_verified_models=True):
"""Initialize ModelDB object.
Args:
model_db_dir (str, optional): Path to database directory. Defaults to VERIFIED_MODELS_DB_DIR.
copy_verified_models (bool, optional): Whether to copy example models to the new directory. Defaults to True.
"""
self.gguf_db_dir = None
self.models = []
if model_db_dir is None:
model_db_dir = VERIFIED_MODELS_DB_DIR
else:
model_db_dir = get_absolute_path(model_db_dir)
self.set_model_db_dir(model_db_dir)
if model_db_dir != VERIFIED_MODELS_DB_DIR:
if copy_verified_models:
print(f"Copying examples to {model_db_dir}...")
for file in list_files_in_dir(VERIFIED_MODELS_DB_DIR, False, True, [".json"], absolute=True):
f_mdt = ModelData.from_json(file)
f_mdt.set_save_dir(model_db_dir)
f_mdt.save_json()
print(f"Saved a copy of {file} to {model_db_dir}.")
else:
print(f"Using default model db dir: {model_db_dir}, reconfiguring models...")
for file in list_files_in_dir(VERIFIED_MODELS_DB_DIR, False, True, [".json"], absolute=True):
f_mdt = ModelData.from_json(file)
f_mdt.set_save_dir(model_db_dir)
f_mdt.save_json()
print(f"Reconfigured {file}.")
self.load_models()
def set_model_db_dir(self, model_db_dir:str) -> None:
"""Set the database directory.
Args:
model_db_dir (str): Path to database directory
"""
print(f"ModelDB dir set to {model_db_dir}.")
self.gguf_db_dir = create_dir(model_db_dir)
def load_models(self) -> None:
"""Load ModelData objects from the database directory."""
self.models = []
files = list_files_in_dir(self.gguf_db_dir, False, True, [".json"], absolute=True)
for file in files:
try:
model_data = ModelData.from_json(file)
self.models.append(model_data)
except Exception as e:
print(f"Error trying to load from {file}: \t\n{e}, \nskipping...")
continue
print(f"Loaded {len(self.models)} models from {self.gguf_db_dir}.")
def find_models(self, name_query:Optional[str]=None,
quantization_query:Optional[str]=None,
keywords_query:Optional[str]=None,
treshold:float=0.6,
only_downloaded:bool=False) -> Union[None, list]:
"""Search for models based on name, quantization, and keywords.
Args:
name_query (str, optional): Search query for name
quantization_query (str, optional): Search query for quantization
keywords_query (str, optional): Search query for keywords
treshold (float, optional): Minimum similarity score threshold. Defaults to 0.6.
Returns:
Union[None, list]: Sorted list of models exceeding threshold,
or None if no query provided
"""
if name_query is None and quantization_query is None and keywords_query is None:
return None
scoring_models_dict = {}
for i, model in enumerate(self.models):
scoring_models_dict[i] = {"model":model, "score":0}
for id in scoring_models_dict.keys():
model = scoring_models_dict[id]["model"]
model:ModelData = model
model_name = model.name
model_quantization = model.model_quantization
model_keywords = model.keywords
if name_query is not None:
#print(f"Searching for name: {name_query}")
top_name_score = 0
for model_subname in model_name.split("-"):
name_score = compare_two_strings(name_query, model_subname)
if name_score > top_name_score:
top_name_score = name_score
if top_name_score > treshold:
scoring_models_dict[id]["score"] += top_name_score*4
#print(f"Model {model_name} {model_quantization} top score: {top_name_score} treshold: {treshold}")
if quantization_query is not None:
#print(f"Searching for quantization: {quantization_query}")
quantization_score = compare_two_strings(quantization_query, model_quantization)
if quantization_score > treshold:
scoring_models_dict[id]["score"] += quantization_score*2
#print(f"Model {model_name} {model_quantization} score: {quantization_score} treshold: {treshold}")
if keywords_query is not None:
#print(f"Searching for keyword: {keywords_query}")
best_keyword_score = 0
for keyword in model_keywords:
keyword_score = compare_two_strings(keywords_query, keyword)
if keyword_score > best_keyword_score:
best_keyword_score = keyword_score
if best_keyword_score > treshold:
scoring_models_dict[id]["score"] += best_keyword_score
#print(f"Model {model_name} {model_quantization} score: {best_keyword_score} treshold: {treshold}")
#print(f"Model {model_name} {model_quantization} score: {scoring_models_dict[id]['score']}")
sorted_models = sorted(scoring_models_dict.items(), key=lambda x: x[1]["score"], reverse=True)
#keep just the list of model data
sorted_models = [x[1]["model"] for x in sorted_models]
if only_downloaded:
sorted_models = [x for x in sorted_models if x.is_downloaded()]
#print(f"Found {len(sorted_models)} models.")
#print(sorted_models)
return sorted_models
def find_model(self, name_query:Optional[str]=None,
quantization_query:Optional[str]=None,
keywords_query:Optional[str]=None,
only_downloaded:bool=False
) -> Optional[ModelData]:
"""Find top matching model based on queries.
Args:
name_query (str, optional): Search query for name
quantization_query (str, optional): Search query for quantization
keywords_query (str, optional): Search query for keywords
Returns:
Optional[ModelData]: Top matching ModelData object or None
"""
sorted_models = self.find_models(name_query, quantization_query, keywords_query, only_downloaded=only_downloaded)
if sorted_models is None or len(sorted_models) == 0:
if len(self.models) == 0:
print(f"There were no models to be searched. Try importing a verified model or using the defualt db dir.")
raise Exception(f"Could not find a model matching the query: {name_query} {quantization_query} {keywords_query}")
else:
#print(f"Found {len(sorted_models)} models.")
#print(sorted_models)
return sorted_models[0]
def get_model_by_url(self, url:str) -> Optional[ModelData]:
"""Get ModelData by exact URL match.
Args:
url (str): ggUF URL
Returns:
Optional[ModelData]: Matching ModelData or None if not found
"""
for model in self.models:
model:ModelData = model
if model.gguf_url == url:
return model
return None
def get_model_by_gguf_path(self, gguf_path:str) -> Optional[ModelData]:
"""Get ModelData by exact ggUF path match.
Args:
gguf_path (str): ggUF path
Returns:
Optional[ModelData]: Matching ModelData or None if not found
"""
for model in self.models:
model:ModelData = model
if model.gguf_file_path == gguf_path:
return model
return None
def add_model_data(self, model_data:ModelData, save_model=True) -> None:
"""Add a ModelData object to the database.
Args:
model_data (ModelData): ModelData object to add
save_model (bool, optional): Whether to save ModelData to file. Defaults to True.
"""
self.models.append(model_data)
if save_model:
model_data.save_json()
def add_model_by_url(self, url:str, ) -> None:
"""Add a model by URL.
Args:
url (str): ggUF URL
"""
model_data = ModelData(url, db_dir=self.gguf_db_dir)
self.add_model_data(model_data)
def add_model_by_json(self, json_file_path:str) -> None:
"""Add a model from a JSON file.
Args:
json_file_path (str): Path to ModelData JSON file
"""
model_data = ModelData.from_json(json_file_path)
self.add_model_data(model_data)
def save_all_models(self) -> None:
"""Save all ModelData objects to file."""
for model in self.models:
model:ModelData = model
model.save_json()
@staticmethod
def _model_links_from_repo(hf_repo_url:str):
"""Extract ggUF model links from a HuggingFace repo page.
Args:
hf_repo_url (str): URL of HuggingFace model repo
Returns:
list: List of ggUF URLs
"""
#extract models from hf
response = requests.get(hf_repo_url)
html = response.text
soup = bs4.BeautifulSoup(html, 'html.parser')
#find all links that end with .gguf
print(f"Looking for {hf_repo_url} gguf files...")
model_links = []
for link in soup.find_all('a'):
href = link.get('href')
if href is not None and href.endswith(".gguf"):
print(f"Found model: {href}")
model_links.append(href)
return model_links
def load_models_data_from_repo(self, hf_repo_url:str,
user_tags:Optional[list[str]]=None,
ai_tags:Optional[list[str]]=None,
system_tags:Optional[list[str]]=None,
keywords:Optional[list[str]]=None,
description:Optional[str]=None):
"""Load model data from a HuggingFace repo page.
Args:
hf_repo_url (str): URL of HuggingFace model repo
user_tags (Optional[list[str]], optional): User tags to apply. Defaults to None.
ai_tags (Optional[list[str]], optional): AI tags to apply. Defaults to None.
system_tags (Optional[list[str]], optional): System tags to apply. Defaults to None.
keywords (Optional[list[str]], optional): Keywords to apply. Defaults to None.
description (Optional[str], optional): Description to apply. Defaults to None.
Returns:
list: List of loaded ModelData objects
"""
#create model data from hf repo
model_links = ModelDB._model_links_from_repo(hf_repo_url)
model_datas = []
for model_link in model_links:
model_data = ModelData(gguf_url=model_link, db_dir=self.gguf_db_dir, user_tags=user_tags, ai_tags=ai_tags, system_tags=system_tags, description=description, keywords=keywords)
model_datas.append(model_data)
model_data.save_json()
self.models.extend(model_datas)
return model_datas
def import_models_from_repo(self, hf_repo_url:str,
user_tags:Optional[list[str]]=None,
ai_tags:Optional[list[str]]=None,
system_tags:Optional[list[str]]=None,
keywords:Optional[list[str]]=None,
description:Optional[str]=None,
replace_existing:bool=False,
):
"""Import models from a HuggingFace repo page.
Args:
hf_repo_url (str): URL of HuggingFace model repo
user_tags (Optional[list[str]], optional): User tags to apply. Defaults to None.
ai_tags (Optional[list[str]], optional): AI tags to apply. Defaults to None.
system_tags (Optional[list[str]], optional): System tags to apply. Defaults to None.
keywords (Optional[list[str]], optional): Keywords to apply. Defaults to None.
description (Optional[str], optional): Description to apply. Defaults to None.
replace_existing (bool, optional): Whether to overwrite existing files. Defaults to False.
"""
#create model data from hf repo
model_links = ModelDB._model_links_from_repo(hf_repo_url)
print(f"Loaded {len(model_links)} models from {hf_repo_url}.")
for model_link in model_links:
model_data = ModelData(gguf_url=model_link, db_dir=self.gguf_db_dir, user_tags=user_tags, ai_tags=ai_tags, system_tags=system_tags, description=description, keywords=keywords)
model_data.save_json(replace_existing=replace_existing)
self.load_models()
def import_verified_model(self,
name_search:Optional[str]=None,
quantization_search:Optional[str]=None,
keywords_search:Optional[str]=None,
copy_gguf:bool=True) -> None:
"""Import a verified model from the verified model database with ready configurations into your selected db dir.
Use this to selectively add models from the verified model database to your own database.
Models inlcude official dolphin, mistral, mixtral, solar and zephyr models in all available quantizations.
Args:
name_search (Optional[str], optional): Search query for name. Defaults to None.
quantization_search (Optional[str], optional): Search query for quantization. Defaults to None.
keywords_search (Optional[str], optional): Search query for keywords. Defaults to None.
"""
if self.gguf_db_dir == VERIFIED_MODELS_DB_DIR:
print("Cannot import verified model to the default database directory. All models should be already available here.")
else:
vmdb = ModelDB()
if name_search is None and quantization_search is None and keywords_search is None:
print(f"Importing all verified models to {self.gguf_db_dir}...")
models = vmdb.models
else:
print(f"Importing a verified model matching {name_search} {quantization_search} {keywords_search} to {self.gguf_db_dir}...")
models = [vmdb.find_model(name_search, quantization_search, keywords_search)]
for model in models:
if copy_gguf and model.is_downloaded():
source_file = model.gguf_file_path
target_file = model.gguf_file_path.replace(vmdb.gguf_db_dir, self.gguf_db_dir)
print(f"Copying {source_file} to {target_file}...")
copy_large_file(source_file, target_file)
model.set_save_dir(self.gguf_db_dir)
model.save_json()
self.load_models()
def list_available_models(self) -> list[str]:
"""Get a list of available model names.
Returns:
list[str]: List of model names
"""
print(f"Available models in {self.gguf_db_dir}:")
models = []
for model in self.models:
model:ModelData = model
if model.name not in models:
models.append(model.name)
return models
def list_models_quantizations(self, model_name:str) -> list[str]:
"""Get list of quantizations for a model.
Args:
model_name (str): Name of model
Returns:
list[str]: List of quantizations
"""
quantizations = []
for model in self.models:
model:ModelData = model
if model.name == model_name:
quantizations.append(model.model_quantization)
return quantizations
def show_db_info(self) -> None:
"""Print summary information about the database."""
print(f"ModelDB summary:")
print(f"ModelDB dir: {self.gguf_db_dir}")
print(f"Number of models: {len(self.models)}")
print(f"Available models:")
models_info = {}
for model in self.models:
model:ModelData = model
if model.name not in models_info.keys():
models_info[model.name] = {}
models_info[model.name]["quantizations"] = []
models_info[model.name]["description"] = model.description
models_info[model.name]["keywords"] = model.keywords
if model.model_quantization not in models_info[model.name]["quantizations"]:
models_info[model.name]["quantizations"].append(model.model_quantization)
for model_name, models_info in models_info.items():
print(f"\t{model_name}:")
print(f"\t\tQuantizations: {models_info['quantizations']}")
print(f"\t\tKeywords: {models_info['keywords']}")
print(f"\t\tDescription: {models_info['description']}")
print(f"\t-------------------------------")
Classes
class ModelDB (model_db_dir: Optional[str] = None, copy_verified_models=True)
-
Class for managing a database of ModelData objects.
Handles loading models from a directory, searching, adding new models, and interfacing with HuggingFace to import models.
Attributes
gguf_db_dir
:str
- Path to directory containing ModelData files
models
:List[ModelData]
- List of ModelData objects
Initialize ModelDB object.
Args
model_db_dir
:str
, optional- Path to database directory. Defaults to VERIFIED_MODELS_DB_DIR.
copy_verified_models
:bool
, optional- Whether to copy example models to the new directory. Defaults to True.
Expand source code
class ModelDB: """Class for managing a database of ModelData objects. Handles loading models from a directory, searching, adding new models, and interfacing with HuggingFace to import models. Attributes: gguf_db_dir (str): Path to directory containing ModelData files models (List[ModelData]): List of ModelData objects """ def __init__(self, model_db_dir:Optional[str]=None, copy_verified_models=True): """Initialize ModelDB object. Args: model_db_dir (str, optional): Path to database directory. Defaults to VERIFIED_MODELS_DB_DIR. copy_verified_models (bool, optional): Whether to copy example models to the new directory. Defaults to True. """ self.gguf_db_dir = None self.models = [] if model_db_dir is None: model_db_dir = VERIFIED_MODELS_DB_DIR else: model_db_dir = get_absolute_path(model_db_dir) self.set_model_db_dir(model_db_dir) if model_db_dir != VERIFIED_MODELS_DB_DIR: if copy_verified_models: print(f"Copying examples to {model_db_dir}...") for file in list_files_in_dir(VERIFIED_MODELS_DB_DIR, False, True, [".json"], absolute=True): f_mdt = ModelData.from_json(file) f_mdt.set_save_dir(model_db_dir) f_mdt.save_json() print(f"Saved a copy of {file} to {model_db_dir}.") else: print(f"Using default model db dir: {model_db_dir}, reconfiguring models...") for file in list_files_in_dir(VERIFIED_MODELS_DB_DIR, False, True, [".json"], absolute=True): f_mdt = ModelData.from_json(file) f_mdt.set_save_dir(model_db_dir) f_mdt.save_json() print(f"Reconfigured {file}.") self.load_models() def set_model_db_dir(self, model_db_dir:str) -> None: """Set the database directory. Args: model_db_dir (str): Path to database directory """ print(f"ModelDB dir set to {model_db_dir}.") self.gguf_db_dir = create_dir(model_db_dir) def load_models(self) -> None: """Load ModelData objects from the database directory.""" self.models = [] files = list_files_in_dir(self.gguf_db_dir, False, True, [".json"], absolute=True) for file in files: try: model_data = ModelData.from_json(file) self.models.append(model_data) except Exception as e: print(f"Error trying to load from {file}: \t\n{e}, \nskipping...") continue print(f"Loaded {len(self.models)} models from {self.gguf_db_dir}.") def find_models(self, name_query:Optional[str]=None, quantization_query:Optional[str]=None, keywords_query:Optional[str]=None, treshold:float=0.6, only_downloaded:bool=False) -> Union[None, list]: """Search for models based on name, quantization, and keywords. Args: name_query (str, optional): Search query for name quantization_query (str, optional): Search query for quantization keywords_query (str, optional): Search query for keywords treshold (float, optional): Minimum similarity score threshold. Defaults to 0.6. Returns: Union[None, list]: Sorted list of models exceeding threshold, or None if no query provided """ if name_query is None and quantization_query is None and keywords_query is None: return None scoring_models_dict = {} for i, model in enumerate(self.models): scoring_models_dict[i] = {"model":model, "score":0} for id in scoring_models_dict.keys(): model = scoring_models_dict[id]["model"] model:ModelData = model model_name = model.name model_quantization = model.model_quantization model_keywords = model.keywords if name_query is not None: #print(f"Searching for name: {name_query}") top_name_score = 0 for model_subname in model_name.split("-"): name_score = compare_two_strings(name_query, model_subname) if name_score > top_name_score: top_name_score = name_score if top_name_score > treshold: scoring_models_dict[id]["score"] += top_name_score*4 #print(f"Model {model_name} {model_quantization} top score: {top_name_score} treshold: {treshold}") if quantization_query is not None: #print(f"Searching for quantization: {quantization_query}") quantization_score = compare_two_strings(quantization_query, model_quantization) if quantization_score > treshold: scoring_models_dict[id]["score"] += quantization_score*2 #print(f"Model {model_name} {model_quantization} score: {quantization_score} treshold: {treshold}") if keywords_query is not None: #print(f"Searching for keyword: {keywords_query}") best_keyword_score = 0 for keyword in model_keywords: keyword_score = compare_two_strings(keywords_query, keyword) if keyword_score > best_keyword_score: best_keyword_score = keyword_score if best_keyword_score > treshold: scoring_models_dict[id]["score"] += best_keyword_score #print(f"Model {model_name} {model_quantization} score: {best_keyword_score} treshold: {treshold}") #print(f"Model {model_name} {model_quantization} score: {scoring_models_dict[id]['score']}") sorted_models = sorted(scoring_models_dict.items(), key=lambda x: x[1]["score"], reverse=True) #keep just the list of model data sorted_models = [x[1]["model"] for x in sorted_models] if only_downloaded: sorted_models = [x for x in sorted_models if x.is_downloaded()] #print(f"Found {len(sorted_models)} models.") #print(sorted_models) return sorted_models def find_model(self, name_query:Optional[str]=None, quantization_query:Optional[str]=None, keywords_query:Optional[str]=None, only_downloaded:bool=False ) -> Optional[ModelData]: """Find top matching model based on queries. Args: name_query (str, optional): Search query for name quantization_query (str, optional): Search query for quantization keywords_query (str, optional): Search query for keywords Returns: Optional[ModelData]: Top matching ModelData object or None """ sorted_models = self.find_models(name_query, quantization_query, keywords_query, only_downloaded=only_downloaded) if sorted_models is None or len(sorted_models) == 0: if len(self.models) == 0: print(f"There were no models to be searched. Try importing a verified model or using the defualt db dir.") raise Exception(f"Could not find a model matching the query: {name_query} {quantization_query} {keywords_query}") else: #print(f"Found {len(sorted_models)} models.") #print(sorted_models) return sorted_models[0] def get_model_by_url(self, url:str) -> Optional[ModelData]: """Get ModelData by exact URL match. Args: url (str): ggUF URL Returns: Optional[ModelData]: Matching ModelData or None if not found """ for model in self.models: model:ModelData = model if model.gguf_url == url: return model return None def get_model_by_gguf_path(self, gguf_path:str) -> Optional[ModelData]: """Get ModelData by exact ggUF path match. Args: gguf_path (str): ggUF path Returns: Optional[ModelData]: Matching ModelData or None if not found """ for model in self.models: model:ModelData = model if model.gguf_file_path == gguf_path: return model return None def add_model_data(self, model_data:ModelData, save_model=True) -> None: """Add a ModelData object to the database. Args: model_data (ModelData): ModelData object to add save_model (bool, optional): Whether to save ModelData to file. Defaults to True. """ self.models.append(model_data) if save_model: model_data.save_json() def add_model_by_url(self, url:str, ) -> None: """Add a model by URL. Args: url (str): ggUF URL """ model_data = ModelData(url, db_dir=self.gguf_db_dir) self.add_model_data(model_data) def add_model_by_json(self, json_file_path:str) -> None: """Add a model from a JSON file. Args: json_file_path (str): Path to ModelData JSON file """ model_data = ModelData.from_json(json_file_path) self.add_model_data(model_data) def save_all_models(self) -> None: """Save all ModelData objects to file.""" for model in self.models: model:ModelData = model model.save_json() @staticmethod def _model_links_from_repo(hf_repo_url:str): """Extract ggUF model links from a HuggingFace repo page. Args: hf_repo_url (str): URL of HuggingFace model repo Returns: list: List of ggUF URLs """ #extract models from hf response = requests.get(hf_repo_url) html = response.text soup = bs4.BeautifulSoup(html, 'html.parser') #find all links that end with .gguf print(f"Looking for {hf_repo_url} gguf files...") model_links = [] for link in soup.find_all('a'): href = link.get('href') if href is not None and href.endswith(".gguf"): print(f"Found model: {href}") model_links.append(href) return model_links def load_models_data_from_repo(self, hf_repo_url:str, user_tags:Optional[list[str]]=None, ai_tags:Optional[list[str]]=None, system_tags:Optional[list[str]]=None, keywords:Optional[list[str]]=None, description:Optional[str]=None): """Load model data from a HuggingFace repo page. Args: hf_repo_url (str): URL of HuggingFace model repo user_tags (Optional[list[str]], optional): User tags to apply. Defaults to None. ai_tags (Optional[list[str]], optional): AI tags to apply. Defaults to None. system_tags (Optional[list[str]], optional): System tags to apply. Defaults to None. keywords (Optional[list[str]], optional): Keywords to apply. Defaults to None. description (Optional[str], optional): Description to apply. Defaults to None. Returns: list: List of loaded ModelData objects """ #create model data from hf repo model_links = ModelDB._model_links_from_repo(hf_repo_url) model_datas = [] for model_link in model_links: model_data = ModelData(gguf_url=model_link, db_dir=self.gguf_db_dir, user_tags=user_tags, ai_tags=ai_tags, system_tags=system_tags, description=description, keywords=keywords) model_datas.append(model_data) model_data.save_json() self.models.extend(model_datas) return model_datas def import_models_from_repo(self, hf_repo_url:str, user_tags:Optional[list[str]]=None, ai_tags:Optional[list[str]]=None, system_tags:Optional[list[str]]=None, keywords:Optional[list[str]]=None, description:Optional[str]=None, replace_existing:bool=False, ): """Import models from a HuggingFace repo page. Args: hf_repo_url (str): URL of HuggingFace model repo user_tags (Optional[list[str]], optional): User tags to apply. Defaults to None. ai_tags (Optional[list[str]], optional): AI tags to apply. Defaults to None. system_tags (Optional[list[str]], optional): System tags to apply. Defaults to None. keywords (Optional[list[str]], optional): Keywords to apply. Defaults to None. description (Optional[str], optional): Description to apply. Defaults to None. replace_existing (bool, optional): Whether to overwrite existing files. Defaults to False. """ #create model data from hf repo model_links = ModelDB._model_links_from_repo(hf_repo_url) print(f"Loaded {len(model_links)} models from {hf_repo_url}.") for model_link in model_links: model_data = ModelData(gguf_url=model_link, db_dir=self.gguf_db_dir, user_tags=user_tags, ai_tags=ai_tags, system_tags=system_tags, description=description, keywords=keywords) model_data.save_json(replace_existing=replace_existing) self.load_models() def import_verified_model(self, name_search:Optional[str]=None, quantization_search:Optional[str]=None, keywords_search:Optional[str]=None, copy_gguf:bool=True) -> None: """Import a verified model from the verified model database with ready configurations into your selected db dir. Use this to selectively add models from the verified model database to your own database. Models inlcude official dolphin, mistral, mixtral, solar and zephyr models in all available quantizations. Args: name_search (Optional[str], optional): Search query for name. Defaults to None. quantization_search (Optional[str], optional): Search query for quantization. Defaults to None. keywords_search (Optional[str], optional): Search query for keywords. Defaults to None. """ if self.gguf_db_dir == VERIFIED_MODELS_DB_DIR: print("Cannot import verified model to the default database directory. All models should be already available here.") else: vmdb = ModelDB() if name_search is None and quantization_search is None and keywords_search is None: print(f"Importing all verified models to {self.gguf_db_dir}...") models = vmdb.models else: print(f"Importing a verified model matching {name_search} {quantization_search} {keywords_search} to {self.gguf_db_dir}...") models = [vmdb.find_model(name_search, quantization_search, keywords_search)] for model in models: if copy_gguf and model.is_downloaded(): source_file = model.gguf_file_path target_file = model.gguf_file_path.replace(vmdb.gguf_db_dir, self.gguf_db_dir) print(f"Copying {source_file} to {target_file}...") copy_large_file(source_file, target_file) model.set_save_dir(self.gguf_db_dir) model.save_json() self.load_models() def list_available_models(self) -> list[str]: """Get a list of available model names. Returns: list[str]: List of model names """ print(f"Available models in {self.gguf_db_dir}:") models = [] for model in self.models: model:ModelData = model if model.name not in models: models.append(model.name) return models def list_models_quantizations(self, model_name:str) -> list[str]: """Get list of quantizations for a model. Args: model_name (str): Name of model Returns: list[str]: List of quantizations """ quantizations = [] for model in self.models: model:ModelData = model if model.name == model_name: quantizations.append(model.model_quantization) return quantizations def show_db_info(self) -> None: """Print summary information about the database.""" print(f"ModelDB summary:") print(f"ModelDB dir: {self.gguf_db_dir}") print(f"Number of models: {len(self.models)}") print(f"Available models:") models_info = {} for model in self.models: model:ModelData = model if model.name not in models_info.keys(): models_info[model.name] = {} models_info[model.name]["quantizations"] = [] models_info[model.name]["description"] = model.description models_info[model.name]["keywords"] = model.keywords if model.model_quantization not in models_info[model.name]["quantizations"]: models_info[model.name]["quantizations"].append(model.model_quantization) for model_name, models_info in models_info.items(): print(f"\t{model_name}:") print(f"\t\tQuantizations: {models_info['quantizations']}") print(f"\t\tKeywords: {models_info['keywords']}") print(f"\t\tDescription: {models_info['description']}") print(f"\t-------------------------------")
Methods
def add_model_by_json(self, json_file_path: str) ‑> None
-
Add a model from a JSON file.
Args
json_file_path
:str
- Path to ModelData JSON file
Expand source code
def add_model_by_json(self, json_file_path:str) -> None: """Add a model from a JSON file. Args: json_file_path (str): Path to ModelData JSON file """ model_data = ModelData.from_json(json_file_path) self.add_model_data(model_data)
def add_model_by_url(self, url: str) ‑> None
-
Add a model by URL.
Args
url
:str
- ggUF URL
Expand source code
def add_model_by_url(self, url:str, ) -> None: """Add a model by URL. Args: url (str): ggUF URL """ model_data = ModelData(url, db_dir=self.gguf_db_dir) self.add_model_data(model_data)
def add_model_data(self, model_data: ModelData, save_model=True) ‑> None
-
Add a ModelData object to the database.
Args
model_data
:ModelData
- ModelData object to add
save_model
:bool
, optional- Whether to save ModelData to file. Defaults to True.
Expand source code
def add_model_data(self, model_data:ModelData, save_model=True) -> None: """Add a ModelData object to the database. Args: model_data (ModelData): ModelData object to add save_model (bool, optional): Whether to save ModelData to file. Defaults to True. """ self.models.append(model_data) if save_model: model_data.save_json()
def find_model(self, name_query: Optional[str] = None, quantization_query: Optional[str] = None, keywords_query: Optional[str] = None, only_downloaded: bool = False) ‑> Optional[ModelData]
-
Find top matching model based on queries.
Args
name_query
:str
, optional- Search query for name
quantization_query
:str
, optional- Search query for quantization
keywords_query
:str
, optional- Search query for keywords
Returns
Optional[ModelData]
- Top matching ModelData object or None
Expand source code
def find_model(self, name_query:Optional[str]=None, quantization_query:Optional[str]=None, keywords_query:Optional[str]=None, only_downloaded:bool=False ) -> Optional[ModelData]: """Find top matching model based on queries. Args: name_query (str, optional): Search query for name quantization_query (str, optional): Search query for quantization keywords_query (str, optional): Search query for keywords Returns: Optional[ModelData]: Top matching ModelData object or None """ sorted_models = self.find_models(name_query, quantization_query, keywords_query, only_downloaded=only_downloaded) if sorted_models is None or len(sorted_models) == 0: if len(self.models) == 0: print(f"There were no models to be searched. Try importing a verified model or using the defualt db dir.") raise Exception(f"Could not find a model matching the query: {name_query} {quantization_query} {keywords_query}") else: #print(f"Found {len(sorted_models)} models.") #print(sorted_models) return sorted_models[0]
def find_models(self, name_query: Optional[str] = None, quantization_query: Optional[str] = None, keywords_query: Optional[str] = None, treshold: float = 0.6, only_downloaded: bool = False) ‑> Optional[None]
-
Search for models based on name, quantization, and keywords.
Args
name_query
:str
, optional- Search query for name
quantization_query
:str
, optional- Search query for quantization
keywords_query
:str
, optional- Search query for keywords
treshold
:float
, optional- Minimum similarity score threshold. Defaults to 0.6.
Returns
Union[None, list]
- Sorted list of models exceeding threshold, or None if no query provided
Expand source code
def find_models(self, name_query:Optional[str]=None, quantization_query:Optional[str]=None, keywords_query:Optional[str]=None, treshold:float=0.6, only_downloaded:bool=False) -> Union[None, list]: """Search for models based on name, quantization, and keywords. Args: name_query (str, optional): Search query for name quantization_query (str, optional): Search query for quantization keywords_query (str, optional): Search query for keywords treshold (float, optional): Minimum similarity score threshold. Defaults to 0.6. Returns: Union[None, list]: Sorted list of models exceeding threshold, or None if no query provided """ if name_query is None and quantization_query is None and keywords_query is None: return None scoring_models_dict = {} for i, model in enumerate(self.models): scoring_models_dict[i] = {"model":model, "score":0} for id in scoring_models_dict.keys(): model = scoring_models_dict[id]["model"] model:ModelData = model model_name = model.name model_quantization = model.model_quantization model_keywords = model.keywords if name_query is not None: #print(f"Searching for name: {name_query}") top_name_score = 0 for model_subname in model_name.split("-"): name_score = compare_two_strings(name_query, model_subname) if name_score > top_name_score: top_name_score = name_score if top_name_score > treshold: scoring_models_dict[id]["score"] += top_name_score*4 #print(f"Model {model_name} {model_quantization} top score: {top_name_score} treshold: {treshold}") if quantization_query is not None: #print(f"Searching for quantization: {quantization_query}") quantization_score = compare_two_strings(quantization_query, model_quantization) if quantization_score > treshold: scoring_models_dict[id]["score"] += quantization_score*2 #print(f"Model {model_name} {model_quantization} score: {quantization_score} treshold: {treshold}") if keywords_query is not None: #print(f"Searching for keyword: {keywords_query}") best_keyword_score = 0 for keyword in model_keywords: keyword_score = compare_two_strings(keywords_query, keyword) if keyword_score > best_keyword_score: best_keyword_score = keyword_score if best_keyword_score > treshold: scoring_models_dict[id]["score"] += best_keyword_score #print(f"Model {model_name} {model_quantization} score: {best_keyword_score} treshold: {treshold}") #print(f"Model {model_name} {model_quantization} score: {scoring_models_dict[id]['score']}") sorted_models = sorted(scoring_models_dict.items(), key=lambda x: x[1]["score"], reverse=True) #keep just the list of model data sorted_models = [x[1]["model"] for x in sorted_models] if only_downloaded: sorted_models = [x for x in sorted_models if x.is_downloaded()] #print(f"Found {len(sorted_models)} models.") #print(sorted_models) return sorted_models
def get_model_by_gguf_path(self, gguf_path: str) ‑> Optional[ModelData]
-
Get ModelData by exact ggUF path match.
Args
gguf_path
:str
- ggUF path
Returns
Optional[ModelData]
- Matching ModelData or None if not found
Expand source code
def get_model_by_gguf_path(self, gguf_path:str) -> Optional[ModelData]: """Get ModelData by exact ggUF path match. Args: gguf_path (str): ggUF path Returns: Optional[ModelData]: Matching ModelData or None if not found """ for model in self.models: model:ModelData = model if model.gguf_file_path == gguf_path: return model return None
def get_model_by_url(self, url: str) ‑> Optional[ModelData]
-
Get ModelData by exact URL match.
Args
url
:str
- ggUF URL
Returns
Optional[ModelData]
- Matching ModelData or None if not found
Expand source code
def get_model_by_url(self, url:str) -> Optional[ModelData]: """Get ModelData by exact URL match. Args: url (str): ggUF URL Returns: Optional[ModelData]: Matching ModelData or None if not found """ for model in self.models: model:ModelData = model if model.gguf_url == url: return model return None
def import_models_from_repo(self, hf_repo_url: str, user_tags: Optional[list[str]] = None, ai_tags: Optional[list[str]] = None, system_tags: Optional[list[str]] = None, keywords: Optional[list[str]] = None, description: Optional[str] = None, replace_existing: bool = False)
-
Import models from a HuggingFace repo page.
Args
hf_repo_url
:str
- URL of HuggingFace model repo
user_tags
:Optional[list[str]]
, optional- User tags to apply. Defaults to None.
ai_tags
:Optional[list[str]]
, optional- AI tags to apply. Defaults to None.
system_tags
:Optional[list[str]]
, optional- System tags to apply. Defaults to None.
keywords
:Optional[list[str]]
, optional- Keywords to apply. Defaults to None.
description
:Optional[str]
, optional- Description to apply. Defaults to None.
replace_existing
:bool
, optional- Whether to overwrite existing files. Defaults to False.
Expand source code
def import_models_from_repo(self, hf_repo_url:str, user_tags:Optional[list[str]]=None, ai_tags:Optional[list[str]]=None, system_tags:Optional[list[str]]=None, keywords:Optional[list[str]]=None, description:Optional[str]=None, replace_existing:bool=False, ): """Import models from a HuggingFace repo page. Args: hf_repo_url (str): URL of HuggingFace model repo user_tags (Optional[list[str]], optional): User tags to apply. Defaults to None. ai_tags (Optional[list[str]], optional): AI tags to apply. Defaults to None. system_tags (Optional[list[str]], optional): System tags to apply. Defaults to None. keywords (Optional[list[str]], optional): Keywords to apply. Defaults to None. description (Optional[str], optional): Description to apply. Defaults to None. replace_existing (bool, optional): Whether to overwrite existing files. Defaults to False. """ #create model data from hf repo model_links = ModelDB._model_links_from_repo(hf_repo_url) print(f"Loaded {len(model_links)} models from {hf_repo_url}.") for model_link in model_links: model_data = ModelData(gguf_url=model_link, db_dir=self.gguf_db_dir, user_tags=user_tags, ai_tags=ai_tags, system_tags=system_tags, description=description, keywords=keywords) model_data.save_json(replace_existing=replace_existing) self.load_models()
def import_verified_model(self, name_search: Optional[str] = None, quantization_search: Optional[str] = None, keywords_search: Optional[str] = None, copy_gguf: bool = True) ‑> None
-
Import a verified model from the verified model database with ready configurations into your selected db dir. Use this to selectively add models from the verified model database to your own database. Models inlcude official dolphin, mistral, mixtral, solar and zephyr models in all available quantizations.
Args
name_search
:Optional[str]
, optional- Search query for name. Defaults to None.
quantization_search
:Optional[str]
, optional- Search query for quantization. Defaults to None.
keywords_search
:Optional[str]
, optional- Search query for keywords. Defaults to None.
Expand source code
def import_verified_model(self, name_search:Optional[str]=None, quantization_search:Optional[str]=None, keywords_search:Optional[str]=None, copy_gguf:bool=True) -> None: """Import a verified model from the verified model database with ready configurations into your selected db dir. Use this to selectively add models from the verified model database to your own database. Models inlcude official dolphin, mistral, mixtral, solar and zephyr models in all available quantizations. Args: name_search (Optional[str], optional): Search query for name. Defaults to None. quantization_search (Optional[str], optional): Search query for quantization. Defaults to None. keywords_search (Optional[str], optional): Search query for keywords. Defaults to None. """ if self.gguf_db_dir == VERIFIED_MODELS_DB_DIR: print("Cannot import verified model to the default database directory. All models should be already available here.") else: vmdb = ModelDB() if name_search is None and quantization_search is None and keywords_search is None: print(f"Importing all verified models to {self.gguf_db_dir}...") models = vmdb.models else: print(f"Importing a verified model matching {name_search} {quantization_search} {keywords_search} to {self.gguf_db_dir}...") models = [vmdb.find_model(name_search, quantization_search, keywords_search)] for model in models: if copy_gguf and model.is_downloaded(): source_file = model.gguf_file_path target_file = model.gguf_file_path.replace(vmdb.gguf_db_dir, self.gguf_db_dir) print(f"Copying {source_file} to {target_file}...") copy_large_file(source_file, target_file) model.set_save_dir(self.gguf_db_dir) model.save_json() self.load_models()
def list_available_models(self) ‑> list[str]
-
Get a list of available model names.
Returns
list[str]
- List of model names
Expand source code
def list_available_models(self) -> list[str]: """Get a list of available model names. Returns: list[str]: List of model names """ print(f"Available models in {self.gguf_db_dir}:") models = [] for model in self.models: model:ModelData = model if model.name not in models: models.append(model.name) return models
def list_models_quantizations(self, model_name: str) ‑> list[str]
-
Get list of quantizations for a model.
Args
model_name
:str
- Name of model
Returns
list[str]
- List of quantizations
Expand source code
def list_models_quantizations(self, model_name:str) -> list[str]: """Get list of quantizations for a model. Args: model_name (str): Name of model Returns: list[str]: List of quantizations """ quantizations = [] for model in self.models: model:ModelData = model if model.name == model_name: quantizations.append(model.model_quantization) return quantizations
def load_models(self) ‑> None
-
Load ModelData objects from the database directory.
Expand source code
def load_models(self) -> None: """Load ModelData objects from the database directory.""" self.models = [] files = list_files_in_dir(self.gguf_db_dir, False, True, [".json"], absolute=True) for file in files: try: model_data = ModelData.from_json(file) self.models.append(model_data) except Exception as e: print(f"Error trying to load from {file}: \t\n{e}, \nskipping...") continue print(f"Loaded {len(self.models)} models from {self.gguf_db_dir}.")
def load_models_data_from_repo(self, hf_repo_url: str, user_tags: Optional[list[str]] = None, ai_tags: Optional[list[str]] = None, system_tags: Optional[list[str]] = None, keywords: Optional[list[str]] = None, description: Optional[str] = None)
-
Load model data from a HuggingFace repo page.
Args
hf_repo_url
:str
- URL of HuggingFace model repo
user_tags
:Optional[list[str]]
, optional- User tags to apply. Defaults to None.
ai_tags
:Optional[list[str]]
, optional- AI tags to apply. Defaults to None.
system_tags
:Optional[list[str]]
, optional- System tags to apply. Defaults to None.
keywords
:Optional[list[str]]
, optional- Keywords to apply. Defaults to None.
description
:Optional[str]
, optional- Description to apply. Defaults to None.
Returns
list
- List of loaded ModelData objects
Expand source code
def load_models_data_from_repo(self, hf_repo_url:str, user_tags:Optional[list[str]]=None, ai_tags:Optional[list[str]]=None, system_tags:Optional[list[str]]=None, keywords:Optional[list[str]]=None, description:Optional[str]=None): """Load model data from a HuggingFace repo page. Args: hf_repo_url (str): URL of HuggingFace model repo user_tags (Optional[list[str]], optional): User tags to apply. Defaults to None. ai_tags (Optional[list[str]], optional): AI tags to apply. Defaults to None. system_tags (Optional[list[str]], optional): System tags to apply. Defaults to None. keywords (Optional[list[str]], optional): Keywords to apply. Defaults to None. description (Optional[str], optional): Description to apply. Defaults to None. Returns: list: List of loaded ModelData objects """ #create model data from hf repo model_links = ModelDB._model_links_from_repo(hf_repo_url) model_datas = [] for model_link in model_links: model_data = ModelData(gguf_url=model_link, db_dir=self.gguf_db_dir, user_tags=user_tags, ai_tags=ai_tags, system_tags=system_tags, description=description, keywords=keywords) model_datas.append(model_data) model_data.save_json() self.models.extend(model_datas) return model_datas
def save_all_models(self) ‑> None
-
Save all ModelData objects to file.
Expand source code
def save_all_models(self) -> None: """Save all ModelData objects to file.""" for model in self.models: model:ModelData = model model.save_json()
def set_model_db_dir(self, model_db_dir: str) ‑> None
-
Set the database directory.
Args
model_db_dir
:str
- Path to database directory
Expand source code
def set_model_db_dir(self, model_db_dir:str) -> None: """Set the database directory. Args: model_db_dir (str): Path to database directory """ print(f"ModelDB dir set to {model_db_dir}.") self.gguf_db_dir = create_dir(model_db_dir)
def show_db_info(self) ‑> None
-
Print summary information about the database.
Expand source code
def show_db_info(self) -> None: """Print summary information about the database.""" print(f"ModelDB summary:") print(f"ModelDB dir: {self.gguf_db_dir}") print(f"Number of models: {len(self.models)}") print(f"Available models:") models_info = {} for model in self.models: model:ModelData = model if model.name not in models_info.keys(): models_info[model.name] = {} models_info[model.name]["quantizations"] = [] models_info[model.name]["description"] = model.description models_info[model.name]["keywords"] = model.keywords if model.model_quantization not in models_info[model.name]["quantizations"]: models_info[model.name]["quantizations"].append(model.model_quantization) for model_name, models_info in models_info.items(): print(f"\t{model_name}:") print(f"\t\tQuantizations: {models_info['quantizations']}") print(f"\t\tKeywords: {models_info['keywords']}") print(f"\t\tDescription: {models_info['description']}") print(f"\t-------------------------------")