Package gguf_modeldb
Expand source code
from .db import ModelDB
from .model_data import ModelData
from .db_settings import VERIFIED_MODELS_DB_DIR
__all__ = ['ModelData','ModelDB', 'VERIFIED_MODELS_DB_DIR']
Sub-modules
gguf_modeldb.db
gguf_modeldb.db_settings
gguf_modeldb.model_data
Classes
class ModelDB (model_db_dir: Optional[str] = None, copy_verified_models=True)
-
Class for managing a database of ModelData objects.
Handles loading models from a directory, searching, adding new models, and interfacing with HuggingFace to import models.
Attributes
gguf_db_dir
:str
- Path to directory containing ModelData files
models
:List[ModelData]
- List of ModelData objects
Initialize ModelDB object.
Args
model_db_dir
:str
, optional- Path to database directory. Defaults to VERIFIED_MODELS_DB_DIR.
copy_verified_models
:bool
, optional- Whether to copy example models to the new directory. Defaults to True.
Expand source code
class ModelDB: """Class for managing a database of ModelData objects. Handles loading models from a directory, searching, adding new models, and interfacing with HuggingFace to import models. Attributes: gguf_db_dir (str): Path to directory containing ModelData files models (List[ModelData]): List of ModelData objects """ def __init__(self, model_db_dir:Optional[str]=None, copy_verified_models=True): """Initialize ModelDB object. Args: model_db_dir (str, optional): Path to database directory. Defaults to VERIFIED_MODELS_DB_DIR. copy_verified_models (bool, optional): Whether to copy example models to the new directory. Defaults to True. """ self.gguf_db_dir = None self.models = [] if model_db_dir is None: model_db_dir = VERIFIED_MODELS_DB_DIR else: model_db_dir = get_absolute_path(model_db_dir) self.set_model_db_dir(model_db_dir) if model_db_dir != VERIFIED_MODELS_DB_DIR: if copy_verified_models: print(f"Copying examples to {model_db_dir}...") for file in list_files_in_dir(VERIFIED_MODELS_DB_DIR, False, True, [".json"], absolute=True): f_mdt = ModelData.from_json(file) f_mdt.set_save_dir(model_db_dir) f_mdt.save_json() print(f"Saved a copy of {file} to {model_db_dir}.") else: print(f"Using default model db dir: {model_db_dir}, reconfiguring models...") for file in list_files_in_dir(VERIFIED_MODELS_DB_DIR, False, True, [".json"], absolute=True): f_mdt = ModelData.from_json(file) f_mdt.set_save_dir(model_db_dir) f_mdt.save_json() print(f"Reconfigured {file}.") self.load_models() def set_model_db_dir(self, model_db_dir:str) -> None: """Set the database directory. Args: model_db_dir (str): Path to database directory """ print(f"ModelDB dir set to {model_db_dir}.") self.gguf_db_dir = create_dir(model_db_dir) def load_models(self) -> None: """Load ModelData objects from the database directory.""" self.models = [] files = list_files_in_dir(self.gguf_db_dir, False, True, [".json"], absolute=True) for file in files: try: model_data = ModelData.from_json(file) self.models.append(model_data) except Exception as e: print(f"Error trying to load from {file}: \t\n{e}, \nskipping...") continue print(f"Loaded {len(self.models)} models from {self.gguf_db_dir}.") def find_models(self, name_query:Optional[str]=None, quantization_query:Optional[str]=None, keywords_query:Optional[str]=None, treshold:float=0.6, only_downloaded:bool=False) -> Union[None, list]: """Search for models based on name, quantization, and keywords. Args: name_query (str, optional): Search query for name quantization_query (str, optional): Search query for quantization keywords_query (str, optional): Search query for keywords treshold (float, optional): Minimum similarity score threshold. Defaults to 0.6. Returns: Union[None, list]: Sorted list of models exceeding threshold, or None if no query provided """ if name_query is None and quantization_query is None and keywords_query is None: return None scoring_models_dict = {} for i, model in enumerate(self.models): scoring_models_dict[i] = {"model":model, "score":0} for id in scoring_models_dict.keys(): model = scoring_models_dict[id]["model"] model:ModelData = model model_name = model.name model_quantization = model.model_quantization model_keywords = model.keywords if name_query is not None: #print(f"Searching for name: {name_query}") top_name_score = 0 for model_subname in model_name.split("-"): name_score = compare_two_strings(name_query, model_subname) if name_score > top_name_score: top_name_score = name_score if top_name_score > treshold: scoring_models_dict[id]["score"] += top_name_score*4 #print(f"Model {model_name} {model_quantization} top score: {top_name_score} treshold: {treshold}") if quantization_query is not None: #print(f"Searching for quantization: {quantization_query}") quantization_score = compare_two_strings(quantization_query, model_quantization) if quantization_score > treshold: scoring_models_dict[id]["score"] += quantization_score*2 #print(f"Model {model_name} {model_quantization} score: {quantization_score} treshold: {treshold}") if keywords_query is not None: #print(f"Searching for keyword: {keywords_query}") best_keyword_score = 0 for keyword in model_keywords: keyword_score = compare_two_strings(keywords_query, keyword) if keyword_score > best_keyword_score: best_keyword_score = keyword_score if best_keyword_score > treshold: scoring_models_dict[id]["score"] += best_keyword_score #print(f"Model {model_name} {model_quantization} score: {best_keyword_score} treshold: {treshold}") #print(f"Model {model_name} {model_quantization} score: {scoring_models_dict[id]['score']}") sorted_models = sorted(scoring_models_dict.items(), key=lambda x: x[1]["score"], reverse=True) #keep just the list of model data sorted_models = [x[1]["model"] for x in sorted_models] if only_downloaded: sorted_models = [x for x in sorted_models if x.is_downloaded()] #print(f"Found {len(sorted_models)} models.") #print(sorted_models) return sorted_models def find_model(self, name_query:Optional[str]=None, quantization_query:Optional[str]=None, keywords_query:Optional[str]=None, only_downloaded:bool=False ) -> Optional[ModelData]: """Find top matching model based on queries. Args: name_query (str, optional): Search query for name quantization_query (str, optional): Search query for quantization keywords_query (str, optional): Search query for keywords Returns: Optional[ModelData]: Top matching ModelData object or None """ sorted_models = self.find_models(name_query, quantization_query, keywords_query, only_downloaded=only_downloaded) if sorted_models is None or len(sorted_models) == 0: if len(self.models) == 0: print(f"There were no models to be searched. Try importing a verified model or using the defualt db dir.") raise Exception(f"Could not find a model matching the query: {name_query} {quantization_query} {keywords_query}") else: #print(f"Found {len(sorted_models)} models.") #print(sorted_models) return sorted_models[0] def get_model_by_url(self, url:str) -> Optional[ModelData]: """Get ModelData by exact URL match. Args: url (str): ggUF URL Returns: Optional[ModelData]: Matching ModelData or None if not found """ for model in self.models: model:ModelData = model if model.gguf_url == url: return model return None def get_model_by_gguf_path(self, gguf_path:str) -> Optional[ModelData]: """Get ModelData by exact ggUF path match. Args: gguf_path (str): ggUF path Returns: Optional[ModelData]: Matching ModelData or None if not found """ for model in self.models: model:ModelData = model if model.gguf_file_path == gguf_path: return model return None def add_model_data(self, model_data:ModelData, save_model=True) -> None: """Add a ModelData object to the database. Args: model_data (ModelData): ModelData object to add save_model (bool, optional): Whether to save ModelData to file. Defaults to True. """ self.models.append(model_data) if save_model: model_data.save_json() def add_model_by_url(self, url:str, ) -> None: """Add a model by URL. Args: url (str): ggUF URL """ model_data = ModelData(url, db_dir=self.gguf_db_dir) self.add_model_data(model_data) def add_model_by_json(self, json_file_path:str) -> None: """Add a model from a JSON file. Args: json_file_path (str): Path to ModelData JSON file """ model_data = ModelData.from_json(json_file_path) self.add_model_data(model_data) def save_all_models(self) -> None: """Save all ModelData objects to file.""" for model in self.models: model:ModelData = model model.save_json() @staticmethod def _model_links_from_repo(hf_repo_url:str): """Extract ggUF model links from a HuggingFace repo page. Args: hf_repo_url (str): URL of HuggingFace model repo Returns: list: List of ggUF URLs """ #extract models from hf response = requests.get(hf_repo_url) html = response.text soup = bs4.BeautifulSoup(html, 'html.parser') #find all links that end with .gguf print(f"Looking for {hf_repo_url} gguf files...") model_links = [] for link in soup.find_all('a'): href = link.get('href') if href is not None and href.endswith(".gguf"): print(f"Found model: {href}") model_links.append(href) return model_links def load_models_data_from_repo(self, hf_repo_url:str, user_tags:Optional[list[str]]=None, ai_tags:Optional[list[str]]=None, system_tags:Optional[list[str]]=None, keywords:Optional[list[str]]=None, description:Optional[str]=None): """Load model data from a HuggingFace repo page. Args: hf_repo_url (str): URL of HuggingFace model repo user_tags (Optional[list[str]], optional): User tags to apply. Defaults to None. ai_tags (Optional[list[str]], optional): AI tags to apply. Defaults to None. system_tags (Optional[list[str]], optional): System tags to apply. Defaults to None. keywords (Optional[list[str]], optional): Keywords to apply. Defaults to None. description (Optional[str], optional): Description to apply. Defaults to None. Returns: list: List of loaded ModelData objects """ #create model data from hf repo model_links = ModelDB._model_links_from_repo(hf_repo_url) model_datas = [] for model_link in model_links: model_data = ModelData(gguf_url=model_link, db_dir=self.gguf_db_dir, user_tags=user_tags, ai_tags=ai_tags, system_tags=system_tags, description=description, keywords=keywords) model_datas.append(model_data) model_data.save_json() self.models.extend(model_datas) return model_datas def import_models_from_repo(self, hf_repo_url:str, user_tags:Optional[list[str]]=None, ai_tags:Optional[list[str]]=None, system_tags:Optional[list[str]]=None, keywords:Optional[list[str]]=None, description:Optional[str]=None, replace_existing:bool=False, ): """Import models from a HuggingFace repo page. Args: hf_repo_url (str): URL of HuggingFace model repo user_tags (Optional[list[str]], optional): User tags to apply. Defaults to None. ai_tags (Optional[list[str]], optional): AI tags to apply. Defaults to None. system_tags (Optional[list[str]], optional): System tags to apply. Defaults to None. keywords (Optional[list[str]], optional): Keywords to apply. Defaults to None. description (Optional[str], optional): Description to apply. Defaults to None. replace_existing (bool, optional): Whether to overwrite existing files. Defaults to False. """ #create model data from hf repo model_links = ModelDB._model_links_from_repo(hf_repo_url) print(f"Loaded {len(model_links)} models from {hf_repo_url}.") for model_link in model_links: model_data = ModelData(gguf_url=model_link, db_dir=self.gguf_db_dir, user_tags=user_tags, ai_tags=ai_tags, system_tags=system_tags, description=description, keywords=keywords) model_data.save_json(replace_existing=replace_existing) self.load_models() def import_verified_model(self, name_search:Optional[str]=None, quantization_search:Optional[str]=None, keywords_search:Optional[str]=None, copy_gguf:bool=True) -> None: """Import a verified model from the verified model database with ready configurations into your selected db dir. Use this to selectively add models from the verified model database to your own database. Models inlcude official dolphin, mistral, mixtral, solar and zephyr models in all available quantizations. Args: name_search (Optional[str], optional): Search query for name. Defaults to None. quantization_search (Optional[str], optional): Search query for quantization. Defaults to None. keywords_search (Optional[str], optional): Search query for keywords. Defaults to None. """ if self.gguf_db_dir == VERIFIED_MODELS_DB_DIR: print("Cannot import verified model to the default database directory. All models should be already available here.") else: vmdb = ModelDB() if name_search is None and quantization_search is None and keywords_search is None: print(f"Importing all verified models to {self.gguf_db_dir}...") models = vmdb.models else: print(f"Importing a verified model matching {name_search} {quantization_search} {keywords_search} to {self.gguf_db_dir}...") models = [vmdb.find_model(name_search, quantization_search, keywords_search)] for model in models: if copy_gguf and model.is_downloaded(): source_file = model.gguf_file_path target_file = model.gguf_file_path.replace(vmdb.gguf_db_dir, self.gguf_db_dir) print(f"Copying {source_file} to {target_file}...") copy_large_file(source_file, target_file) model.set_save_dir(self.gguf_db_dir) model.save_json() self.load_models() def list_available_models(self) -> list[str]: """Get a list of available model names. Returns: list[str]: List of model names """ print(f"Available models in {self.gguf_db_dir}:") models = [] for model in self.models: model:ModelData = model if model.name not in models: models.append(model.name) return models def list_models_quantizations(self, model_name:str) -> list[str]: """Get list of quantizations for a model. Args: model_name (str): Name of model Returns: list[str]: List of quantizations """ quantizations = [] for model in self.models: model:ModelData = model if model.name == model_name: quantizations.append(model.model_quantization) return quantizations def show_db_info(self) -> None: """Print summary information about the database.""" print(f"ModelDB summary:") print(f"ModelDB dir: {self.gguf_db_dir}") print(f"Number of models: {len(self.models)}") print(f"Available models:") models_info = {} for model in self.models: model:ModelData = model if model.name not in models_info.keys(): models_info[model.name] = {} models_info[model.name]["quantizations"] = [] models_info[model.name]["description"] = model.description models_info[model.name]["keywords"] = model.keywords if model.model_quantization not in models_info[model.name]["quantizations"]: models_info[model.name]["quantizations"].append(model.model_quantization) for model_name, models_info in models_info.items(): print(f"\t{model_name}:") print(f"\t\tQuantizations: {models_info['quantizations']}") print(f"\t\tKeywords: {models_info['keywords']}") print(f"\t\tDescription: {models_info['description']}") print(f"\t-------------------------------")
Methods
def add_model_by_json(self, json_file_path: str) ‑> None
-
Add a model from a JSON file.
Args
json_file_path
:str
- Path to ModelData JSON file
Expand source code
def add_model_by_json(self, json_file_path:str) -> None: """Add a model from a JSON file. Args: json_file_path (str): Path to ModelData JSON file """ model_data = ModelData.from_json(json_file_path) self.add_model_data(model_data)
def add_model_by_url(self, url: str) ‑> None
-
Add a model by URL.
Args
url
:str
- ggUF URL
Expand source code
def add_model_by_url(self, url:str, ) -> None: """Add a model by URL. Args: url (str): ggUF URL """ model_data = ModelData(url, db_dir=self.gguf_db_dir) self.add_model_data(model_data)
def add_model_data(self, model_data: ModelData, save_model=True) ‑> None
-
Add a ModelData object to the database.
Args
model_data
:ModelData
- ModelData object to add
save_model
:bool
, optional- Whether to save ModelData to file. Defaults to True.
Expand source code
def add_model_data(self, model_data:ModelData, save_model=True) -> None: """Add a ModelData object to the database. Args: model_data (ModelData): ModelData object to add save_model (bool, optional): Whether to save ModelData to file. Defaults to True. """ self.models.append(model_data) if save_model: model_data.save_json()
def find_model(self, name_query: Optional[str] = None, quantization_query: Optional[str] = None, keywords_query: Optional[str] = None, only_downloaded: bool = False) ‑> Optional[ModelData]
-
Find top matching model based on queries.
Args
name_query
:str
, optional- Search query for name
quantization_query
:str
, optional- Search query for quantization
keywords_query
:str
, optional- Search query for keywords
Returns
Optional[ModelData]
- Top matching ModelData object or None
Expand source code
def find_model(self, name_query:Optional[str]=None, quantization_query:Optional[str]=None, keywords_query:Optional[str]=None, only_downloaded:bool=False ) -> Optional[ModelData]: """Find top matching model based on queries. Args: name_query (str, optional): Search query for name quantization_query (str, optional): Search query for quantization keywords_query (str, optional): Search query for keywords Returns: Optional[ModelData]: Top matching ModelData object or None """ sorted_models = self.find_models(name_query, quantization_query, keywords_query, only_downloaded=only_downloaded) if sorted_models is None or len(sorted_models) == 0: if len(self.models) == 0: print(f"There were no models to be searched. Try importing a verified model or using the defualt db dir.") raise Exception(f"Could not find a model matching the query: {name_query} {quantization_query} {keywords_query}") else: #print(f"Found {len(sorted_models)} models.") #print(sorted_models) return sorted_models[0]
def find_models(self, name_query: Optional[str] = None, quantization_query: Optional[str] = None, keywords_query: Optional[str] = None, treshold: float = 0.6, only_downloaded: bool = False) ‑> Optional[None]
-
Search for models based on name, quantization, and keywords.
Args
name_query
:str
, optional- Search query for name
quantization_query
:str
, optional- Search query for quantization
keywords_query
:str
, optional- Search query for keywords
treshold
:float
, optional- Minimum similarity score threshold. Defaults to 0.6.
Returns
Union[None, list]
- Sorted list of models exceeding threshold, or None if no query provided
Expand source code
def find_models(self, name_query:Optional[str]=None, quantization_query:Optional[str]=None, keywords_query:Optional[str]=None, treshold:float=0.6, only_downloaded:bool=False) -> Union[None, list]: """Search for models based on name, quantization, and keywords. Args: name_query (str, optional): Search query for name quantization_query (str, optional): Search query for quantization keywords_query (str, optional): Search query for keywords treshold (float, optional): Minimum similarity score threshold. Defaults to 0.6. Returns: Union[None, list]: Sorted list of models exceeding threshold, or None if no query provided """ if name_query is None and quantization_query is None and keywords_query is None: return None scoring_models_dict = {} for i, model in enumerate(self.models): scoring_models_dict[i] = {"model":model, "score":0} for id in scoring_models_dict.keys(): model = scoring_models_dict[id]["model"] model:ModelData = model model_name = model.name model_quantization = model.model_quantization model_keywords = model.keywords if name_query is not None: #print(f"Searching for name: {name_query}") top_name_score = 0 for model_subname in model_name.split("-"): name_score = compare_two_strings(name_query, model_subname) if name_score > top_name_score: top_name_score = name_score if top_name_score > treshold: scoring_models_dict[id]["score"] += top_name_score*4 #print(f"Model {model_name} {model_quantization} top score: {top_name_score} treshold: {treshold}") if quantization_query is not None: #print(f"Searching for quantization: {quantization_query}") quantization_score = compare_two_strings(quantization_query, model_quantization) if quantization_score > treshold: scoring_models_dict[id]["score"] += quantization_score*2 #print(f"Model {model_name} {model_quantization} score: {quantization_score} treshold: {treshold}") if keywords_query is not None: #print(f"Searching for keyword: {keywords_query}") best_keyword_score = 0 for keyword in model_keywords: keyword_score = compare_two_strings(keywords_query, keyword) if keyword_score > best_keyword_score: best_keyword_score = keyword_score if best_keyword_score > treshold: scoring_models_dict[id]["score"] += best_keyword_score #print(f"Model {model_name} {model_quantization} score: {best_keyword_score} treshold: {treshold}") #print(f"Model {model_name} {model_quantization} score: {scoring_models_dict[id]['score']}") sorted_models = sorted(scoring_models_dict.items(), key=lambda x: x[1]["score"], reverse=True) #keep just the list of model data sorted_models = [x[1]["model"] for x in sorted_models] if only_downloaded: sorted_models = [x for x in sorted_models if x.is_downloaded()] #print(f"Found {len(sorted_models)} models.") #print(sorted_models) return sorted_models
def get_model_by_gguf_path(self, gguf_path: str) ‑> Optional[ModelData]
-
Get ModelData by exact ggUF path match.
Args
gguf_path
:str
- ggUF path
Returns
Optional[ModelData]
- Matching ModelData or None if not found
Expand source code
def get_model_by_gguf_path(self, gguf_path:str) -> Optional[ModelData]: """Get ModelData by exact ggUF path match. Args: gguf_path (str): ggUF path Returns: Optional[ModelData]: Matching ModelData or None if not found """ for model in self.models: model:ModelData = model if model.gguf_file_path == gguf_path: return model return None
def get_model_by_url(self, url: str) ‑> Optional[ModelData]
-
Get ModelData by exact URL match.
Args
url
:str
- ggUF URL
Returns
Optional[ModelData]
- Matching ModelData or None if not found
Expand source code
def get_model_by_url(self, url:str) -> Optional[ModelData]: """Get ModelData by exact URL match. Args: url (str): ggUF URL Returns: Optional[ModelData]: Matching ModelData or None if not found """ for model in self.models: model:ModelData = model if model.gguf_url == url: return model return None
def import_models_from_repo(self, hf_repo_url: str, user_tags: Optional[list[str]] = None, ai_tags: Optional[list[str]] = None, system_tags: Optional[list[str]] = None, keywords: Optional[list[str]] = None, description: Optional[str] = None, replace_existing: bool = False)
-
Import models from a HuggingFace repo page.
Args
hf_repo_url
:str
- URL of HuggingFace model repo
user_tags
:Optional[list[str]]
, optional- User tags to apply. Defaults to None.
ai_tags
:Optional[list[str]]
, optional- AI tags to apply. Defaults to None.
system_tags
:Optional[list[str]]
, optional- System tags to apply. Defaults to None.
keywords
:Optional[list[str]]
, optional- Keywords to apply. Defaults to None.
description
:Optional[str]
, optional- Description to apply. Defaults to None.
replace_existing
:bool
, optional- Whether to overwrite existing files. Defaults to False.
Expand source code
def import_models_from_repo(self, hf_repo_url:str, user_tags:Optional[list[str]]=None, ai_tags:Optional[list[str]]=None, system_tags:Optional[list[str]]=None, keywords:Optional[list[str]]=None, description:Optional[str]=None, replace_existing:bool=False, ): """Import models from a HuggingFace repo page. Args: hf_repo_url (str): URL of HuggingFace model repo user_tags (Optional[list[str]], optional): User tags to apply. Defaults to None. ai_tags (Optional[list[str]], optional): AI tags to apply. Defaults to None. system_tags (Optional[list[str]], optional): System tags to apply. Defaults to None. keywords (Optional[list[str]], optional): Keywords to apply. Defaults to None. description (Optional[str], optional): Description to apply. Defaults to None. replace_existing (bool, optional): Whether to overwrite existing files. Defaults to False. """ #create model data from hf repo model_links = ModelDB._model_links_from_repo(hf_repo_url) print(f"Loaded {len(model_links)} models from {hf_repo_url}.") for model_link in model_links: model_data = ModelData(gguf_url=model_link, db_dir=self.gguf_db_dir, user_tags=user_tags, ai_tags=ai_tags, system_tags=system_tags, description=description, keywords=keywords) model_data.save_json(replace_existing=replace_existing) self.load_models()
def import_verified_model(self, name_search: Optional[str] = None, quantization_search: Optional[str] = None, keywords_search: Optional[str] = None, copy_gguf: bool = True) ‑> None
-
Import a verified model from the verified model database with ready configurations into your selected db dir. Use this to selectively add models from the verified model database to your own database. Models inlcude official dolphin, mistral, mixtral, solar and zephyr models in all available quantizations.
Args
name_search
:Optional[str]
, optional- Search query for name. Defaults to None.
quantization_search
:Optional[str]
, optional- Search query for quantization. Defaults to None.
keywords_search
:Optional[str]
, optional- Search query for keywords. Defaults to None.
Expand source code
def import_verified_model(self, name_search:Optional[str]=None, quantization_search:Optional[str]=None, keywords_search:Optional[str]=None, copy_gguf:bool=True) -> None: """Import a verified model from the verified model database with ready configurations into your selected db dir. Use this to selectively add models from the verified model database to your own database. Models inlcude official dolphin, mistral, mixtral, solar and zephyr models in all available quantizations. Args: name_search (Optional[str], optional): Search query for name. Defaults to None. quantization_search (Optional[str], optional): Search query for quantization. Defaults to None. keywords_search (Optional[str], optional): Search query for keywords. Defaults to None. """ if self.gguf_db_dir == VERIFIED_MODELS_DB_DIR: print("Cannot import verified model to the default database directory. All models should be already available here.") else: vmdb = ModelDB() if name_search is None and quantization_search is None and keywords_search is None: print(f"Importing all verified models to {self.gguf_db_dir}...") models = vmdb.models else: print(f"Importing a verified model matching {name_search} {quantization_search} {keywords_search} to {self.gguf_db_dir}...") models = [vmdb.find_model(name_search, quantization_search, keywords_search)] for model in models: if copy_gguf and model.is_downloaded(): source_file = model.gguf_file_path target_file = model.gguf_file_path.replace(vmdb.gguf_db_dir, self.gguf_db_dir) print(f"Copying {source_file} to {target_file}...") copy_large_file(source_file, target_file) model.set_save_dir(self.gguf_db_dir) model.save_json() self.load_models()
def list_available_models(self) ‑> list[str]
-
Get a list of available model names.
Returns
list[str]
- List of model names
Expand source code
def list_available_models(self) -> list[str]: """Get a list of available model names. Returns: list[str]: List of model names """ print(f"Available models in {self.gguf_db_dir}:") models = [] for model in self.models: model:ModelData = model if model.name not in models: models.append(model.name) return models
def list_models_quantizations(self, model_name: str) ‑> list[str]
-
Get list of quantizations for a model.
Args
model_name
:str
- Name of model
Returns
list[str]
- List of quantizations
Expand source code
def list_models_quantizations(self, model_name:str) -> list[str]: """Get list of quantizations for a model. Args: model_name (str): Name of model Returns: list[str]: List of quantizations """ quantizations = [] for model in self.models: model:ModelData = model if model.name == model_name: quantizations.append(model.model_quantization) return quantizations
def load_models(self) ‑> None
-
Load ModelData objects from the database directory.
Expand source code
def load_models(self) -> None: """Load ModelData objects from the database directory.""" self.models = [] files = list_files_in_dir(self.gguf_db_dir, False, True, [".json"], absolute=True) for file in files: try: model_data = ModelData.from_json(file) self.models.append(model_data) except Exception as e: print(f"Error trying to load from {file}: \t\n{e}, \nskipping...") continue print(f"Loaded {len(self.models)} models from {self.gguf_db_dir}.")
def load_models_data_from_repo(self, hf_repo_url: str, user_tags: Optional[list[str]] = None, ai_tags: Optional[list[str]] = None, system_tags: Optional[list[str]] = None, keywords: Optional[list[str]] = None, description: Optional[str] = None)
-
Load model data from a HuggingFace repo page.
Args
hf_repo_url
:str
- URL of HuggingFace model repo
user_tags
:Optional[list[str]]
, optional- User tags to apply. Defaults to None.
ai_tags
:Optional[list[str]]
, optional- AI tags to apply. Defaults to None.
system_tags
:Optional[list[str]]
, optional- System tags to apply. Defaults to None.
keywords
:Optional[list[str]]
, optional- Keywords to apply. Defaults to None.
description
:Optional[str]
, optional- Description to apply. Defaults to None.
Returns
list
- List of loaded ModelData objects
Expand source code
def load_models_data_from_repo(self, hf_repo_url:str, user_tags:Optional[list[str]]=None, ai_tags:Optional[list[str]]=None, system_tags:Optional[list[str]]=None, keywords:Optional[list[str]]=None, description:Optional[str]=None): """Load model data from a HuggingFace repo page. Args: hf_repo_url (str): URL of HuggingFace model repo user_tags (Optional[list[str]], optional): User tags to apply. Defaults to None. ai_tags (Optional[list[str]], optional): AI tags to apply. Defaults to None. system_tags (Optional[list[str]], optional): System tags to apply. Defaults to None. keywords (Optional[list[str]], optional): Keywords to apply. Defaults to None. description (Optional[str], optional): Description to apply. Defaults to None. Returns: list: List of loaded ModelData objects """ #create model data from hf repo model_links = ModelDB._model_links_from_repo(hf_repo_url) model_datas = [] for model_link in model_links: model_data = ModelData(gguf_url=model_link, db_dir=self.gguf_db_dir, user_tags=user_tags, ai_tags=ai_tags, system_tags=system_tags, description=description, keywords=keywords) model_datas.append(model_data) model_data.save_json() self.models.extend(model_datas) return model_datas
def save_all_models(self) ‑> None
-
Save all ModelData objects to file.
Expand source code
def save_all_models(self) -> None: """Save all ModelData objects to file.""" for model in self.models: model:ModelData = model model.save_json()
def set_model_db_dir(self, model_db_dir: str) ‑> None
-
Set the database directory.
Args
model_db_dir
:str
- Path to database directory
Expand source code
def set_model_db_dir(self, model_db_dir:str) -> None: """Set the database directory. Args: model_db_dir (str): Path to database directory """ print(f"ModelDB dir set to {model_db_dir}.") self.gguf_db_dir = create_dir(model_db_dir)
def show_db_info(self) ‑> None
-
Print summary information about the database.
Expand source code
def show_db_info(self) -> None: """Print summary information about the database.""" print(f"ModelDB summary:") print(f"ModelDB dir: {self.gguf_db_dir}") print(f"Number of models: {len(self.models)}") print(f"Available models:") models_info = {} for model in self.models: model:ModelData = model if model.name not in models_info.keys(): models_info[model.name] = {} models_info[model.name]["quantizations"] = [] models_info[model.name]["description"] = model.description models_info[model.name]["keywords"] = model.keywords if model.model_quantization not in models_info[model.name]["quantizations"]: models_info[model.name]["quantizations"].append(model.model_quantization) for model_name, models_info in models_info.items(): print(f"\t{model_name}:") print(f"\t\tQuantizations: {models_info['quantizations']}") print(f"\t\tKeywords: {models_info['keywords']}") print(f"\t\tDescription: {models_info['description']}") print(f"\t-------------------------------")
class ModelData (gguf_url: str, db_dir: str, user_tags: Union[dict, list, set] = ('', ''), ai_tags: Union[dict, list, set] = ('', ''), system_tags: Union[dict, list, set, ForwardRef(None)] = None, description: Optional[str] = None, keywords: Optional[None] = None)
-
Class for storing and managing model data. Provides methods for downloading gguf model files, saving metadata to JSON files and loading from JSON files. As well as for retreiving model tags and paths.
Public methods: download_gguf(force_redownload:bool=False) -> str: Download gguf model file save_json(replace_existing:bool=True) -> str: Save ModelData to JSON file from_json(json_file_path:str) -> "ModelData": Create ModelData from JSON file from_url(url:str, save_dir:str, user_tags:Union[dict, list, set] = ("", ""), ai_tags:Union[dict, list, set] = ("", ""), system_tags:Union[dict, list, set] = ("", ""), description:Optional[str] = None, keywords:Optional[list] = None) -> "ModelData": Create ModelData from URL from_file(gguf_file_path:str, save_dir:Optional[str]=None, user_tags:Union[dict, list, set] = ("", ""), ai_tags:Union[dict, list, set] = ("", ""), system_tags:Union[dict, list, set] = ("", ""), description:Optional[str] = None, keywords:Optional[list] = None) -> "ModelData": Create ModelData from gguf file model_path() -> str: Get model file path get_ai_tag_open() -> str: Get opening AI tag get_ai_tag_close() -> str: Get closing AI tag get_user_tag_open() -> str: Get opening user tag get_user_tag_close() -> str: Get closing user tag get_system_tag_open() -> str: Get opening system tag get_system_tag_close() -> str: Get closing system tag get_ai_tags() -> list[str]: Get list of AI tags get_user_tags() -> list[str]: Get list of user tags get_system_tags() -> list[str]: Get list of system tags has_system_tags() -> bool: Check if system tags are set
Attributes
gguf_url
:str
- URL of gguf file for model
gguf_file_path
:str
- Local file path for downloaded gguf model file
name
:str
- Name of the model
model_quantization
:str
- Quantization used for the model
description
:str
- Description of the model
keywords
:List[str]
- List of keywords for the model
user_tags
:Dict[str, str]
- Dictionary of opening and closing tags for user markup
ai_tags
:Dict[str, str]
- Dictionary of opening and closing tags for AI markup
system_tags
:Dict[str, str]
- Dictionary of opening and closing tags for system markup
save_dir
:str
- Directory to save model file and metadata
Initialize ModelData object.
Args
gguf_url
:str
- URL of gguf file for model
db_dir
:str
- Directory to save model file and metadata
user_tags
:Union[dict, list, set]
, optional- User markup tags. Defaults to ("","").
ai_tags
:Union[dict, list, set]
, optional- AI markup tags. Defaults to ("","").
system_tags
:Optional[Union[dict, list, set]]
, optional- System markup tags. Defaults to None.
description
:Optional[str]
, optional- Description of model. Defaults to None.
keywords
:Optional[list]
, optional- List of keywords. Defaults to None.
Expand source code
class ModelData: """Class for storing and managing model data. Provides methods for downloading gguf model files, saving metadata to JSON files and loading from JSON files. As well as for retreiving model tags and paths. Public methods: download_gguf(force_redownload:bool=False) -> str: Download gguf model file save_json(replace_existing:bool=True) -> str: Save ModelData to JSON file from_json(json_file_path:str) -> "ModelData": Create ModelData from JSON file from_url(url:str, save_dir:str, user_tags:Union[dict, list, set] = ("", ""), ai_tags:Union[dict, list, set] = ("", ""), system_tags:Union[dict, list, set] = ("", ""), description:Optional[str] = None, keywords:Optional[list] = None) -> "ModelData": Create ModelData from URL from_file(gguf_file_path:str, save_dir:Optional[str]=None, user_tags:Union[dict, list, set] = ("", ""), ai_tags:Union[dict, list, set] = ("", ""), system_tags:Union[dict, list, set] = ("", ""), description:Optional[str] = None, keywords:Optional[list] = None) -> "ModelData": Create ModelData from gguf file model_path() -> str: Get model file path get_ai_tag_open() -> str: Get opening AI tag get_ai_tag_close() -> str: Get closing AI tag get_user_tag_open() -> str: Get opening user tag get_user_tag_close() -> str: Get closing user tag get_system_tag_open() -> str: Get opening system tag get_system_tag_close() -> str: Get closing system tag get_ai_tags() -> list[str]: Get list of AI tags get_user_tags() -> list[str]: Get list of user tags get_system_tags() -> list[str]: Get list of system tags has_system_tags() -> bool: Check if system tags are set Attributes: gguf_url (str): URL of gguf file for model gguf_file_path (str): Local file path for downloaded gguf model file name (str): Name of the model model_quantization (str): Quantization used for the model description (str): Description of the model keywords (List[str]): List of keywords for the model user_tags (Dict[str, str]): Dictionary of opening and closing tags for user markup ai_tags (Dict[str, str]): Dictionary of opening and closing tags for AI markup system_tags (Dict[str, str]): Dictionary of opening and closing tags for system markup save_dir (str): Directory to save model file and metadata """ def __init__(self, gguf_url:str, db_dir:str, user_tags:Union[dict, list, set] = ("", ""), ai_tags:Union[dict, list, set] = ("", ""), system_tags:Optional[Union[dict, list, set]] = None, description:Optional[str] = None, keywords:Optional[list] = None, ): """Initialize ModelData object. Args: gguf_url (str): URL of gguf file for model db_dir (str): Directory to save model file and metadata user_tags (Union[dict, list, set], optional): User markup tags. Defaults to ("",""). ai_tags (Union[dict, list, set], optional): AI markup tags. Defaults to ("",""). system_tags (Optional[Union[dict, list, set]], optional): System markup tags. Defaults to None. description (Optional[str], optional): Description of model. Defaults to None. keywords (Optional[list], optional): List of keywords. Defaults to None. """ #init all as None self.gguf_url = None self.gguf_file_path = None self.name = None self.model_quantization = None self.description = None self.keywords = None self.user_tags = None self.ai_tags = None self.system_tags = None self.save_dir = None #set values self.gguf_url = gguf_url self.set_save_dir(db_dir) self.gguf_file_path = self._url_to_file_path(db_dir, gguf_url) self.name = self._url_extract_model_name(gguf_url) self.model_quantization = self._url_extract_quantization(gguf_url) self.description = description if description is not None else "" self.keywords = keywords if keywords is not None else [] self.set_tags(ai_tags, user_tags, system_tags) def __str__(self) -> str: """Return string representation of ModelData object.""" t = f"""ModelData( ---required--- gguf_url: {self.gguf_url}, ---required with defaults--- save_dir: {self.save_dir}, user_tags: {self.user_tags}, ai_tags: {self.ai_tags}, ---optionally provided, no defaults--- system_tags: {self.system_tags}, description: {self.description}, keywords: {self.keywords}, ---automatically generated--- gguf_file_path: {self.gguf_file_path}, model_name: {self.name}, model_quantization: {self.model_quantization} )""" return t def __repr__(self) -> str: """Return representation of ModelData object.""" return self.__str__() def __dict__(self) -> dict: """Return dictionary representation of ModelData object.""" return self.to_dict() @staticmethod def _hf_url_to_download_url(url) -> str: """Convert HuggingFace URL to download URL. Args: url (str): HuggingFace URL Returns: str: Download URL """ #to download replace blob with resolve and add download=true if not "huggingface.co" in url: raise ValueError(f"Invalid url: {url}, must be a huggingface.co url, other sources aren't implemented yet.") url = url.replace("blob", "resolve") if url.endswith("/"): url = url[:-1] if not url.endswith("?download=true"): url = url + "?download=true" return url @staticmethod def _url_to_file_path(save_dir:str, url:str)->str: """Convert URL to local file path. Args: save_dir (str): Directory to save file url (str): URL of file Returns: str: Local file path """ #create_dirs_for(save_dir) file_path = join_paths(save_dir, ModelData._url_extract_file_name(url)) return file_path @staticmethod def _url_extract_file_name(url:str) -> str: """Extract file name from URL. Args: url (str): URL Returns: str: File name """ f_name = url.split("/")[-1] if is_file_format(f_name, ".gguf"): return f_name else: raise ValueError(f"File {f_name} is not a gguf file.") @staticmethod def _url_extract_quantization(url:str) -> str: """Extract quantization from URL. Args: url (str): URL Returns: str: Quantization """ quantization = ModelData._url_extract_file_name(url).split(".")[-2] return quantization @staticmethod def _url_extract_model_name(url:str) -> str: """Extract model name from URL. Args: url (str): URL Returns: str: Model name """ model_name = ModelData._url_extract_file_name(url).split(".")[0:-2] return ".".join(model_name) def set_ai_tags(self, ai_tags:Union[dict, set[str], list[str], tuple[str]]) -> None: """Set AI markup tags. Args: ai_tags (Union[dict, set, list, tuple]): AI tags """ if isinstance(ai_tags, dict): if "open" in ai_tags and "close" in ai_tags: self.ai_tags = ai_tags else: raise ValueError(f"Invalid user tags: {ai_tags}, for dict tags both 'open' and 'close' keys must be present.") elif isinstance(ai_tags, set) or isinstance(ai_tags, list) or isinstance(ai_tags, tuple): self.ai_tags = { "open": ai_tags[0], "close": ai_tags[1] } else: raise TypeError(f"Invalid type for user tags: {type(ai_tags)}, must be dict, set or list.") def set_user_tags(self, user_tags:Union[dict, set[str], list[str], tuple[str]]) -> None: """Set user markup tags. Args: user_tags (Union[dict, set, list, tuple]): User tags """ if isinstance(user_tags, dict): if "open" in user_tags and "close" in user_tags: self.user_tags = user_tags else: raise ValueError(f"Invalid user tags: {user_tags}, for dict tags both 'open' and 'close' keys must be present.") elif isinstance(user_tags, set) or isinstance(user_tags, list) or isinstance(user_tags, tuple): self.user_tags = { "open": user_tags[0], "close": user_tags[1] } else: raise TypeError(f"Invalid type for user tags: {type(user_tags)}, must be dict, set or list.") def set_system_tags(self, system_tags:Union[dict, set[str], list[str], tuple[str]]) -> None: """Set system markup tags. Args: system_tags (Union[dict, set, list, tuple]): System tags """ if isinstance(system_tags, dict): if "open" in system_tags and "close" in system_tags: self.system_tags = system_tags else: raise ValueError(f"Invalid system tags: {system_tags}, for dict tags both 'open' and 'close' keys must be present.") elif isinstance(system_tags, set) or isinstance(system_tags, list) or isinstance(system_tags, tuple): self.system_tags = { "open": system_tags[0], "close": system_tags[1] } else: raise TypeError(f"Invalid type for system tags: {type(system_tags)}, must be dict, set or list.") def set_tags(self, ai_tags:Optional[Union[dict, set[str], list[str], tuple[str]]], user_tags:Optional[Union[dict, set[str], list[str], tuple[str]]], system_tags:Optional[Union[dict, set[str], list[str], tuple[str]]], ) -> None: """Sets any of the provided tags. Args: ai_tags (Optional[Union[dict, set, list, tuple]]): AI tags user_tags (Optional[Union[dict, set, list, tuple]]): User tags system_tags (Optional[Union[dict, set, list, tuple]]): System tags """ if ai_tags is not None: self.set_ai_tags(ai_tags) if user_tags is not None: self.set_user_tags(user_tags) if system_tags is not None: self.set_system_tags(system_tags) def set_save_dir(self, save_dir:str) -> None: """Set save directory and update save file path for the model. Args: save_dir (str): Save directory """ self.save_dir = save_dir self.gguf_file_path = self._url_to_file_path(save_dir, self.gguf_url) def to_dict(self): """Convert ModelData to dictionary. "url": str, "save_dir": str, "user_tags": Union[dict, list, set], "ai_tags": Union[dict, list, set], "description": str, "keywords": list, "system_tags": Union[dict, list, set] Returns: dict: Dictionary representation of ModelData """ model_data = { "url": self.gguf_url, "gguf_file_path": self.gguf_file_path, "model_name": self.name, "model_quantization": self.model_quantization, "description": self.description, "keywords": self.keywords, "user_tags": self.user_tags, "ai_tags": self.ai_tags, "system_tags": self.system_tags, "save_dir": self.save_dir, } return model_data @staticmethod def from_dict(model_data:dict) -> "ModelData": """Create ModelData from dictionary. Args: model_data (dict): Dictionary representation of ModelData Needs to contain the following keys: "url": str, "save_dir": str, "user_tags": Union[dict, list, set], "ai_tags": Union[dict, list, set], and optionally: "description": str, "keywords": list, "system_tags": Union[dict, list, set] Returns: ModelData: ModelData object """ url = model_data["url"] save_dir = model_data["save_dir"] description = model_data["description"] if "description" in model_data else None keywords = model_data["keywords"] if "keywords" in model_data else None user_tags = model_data["user_tags"] ai_tags = model_data["ai_tags"] system_tags = model_data["system_tags"] if "system_tags" in model_data else None new_model_data = ModelData(url, save_dir, user_tags, ai_tags, system_tags, description, keywords) return new_model_data def is_downloaded(self) -> bool: """Check if model file is downloaded. Returns: bool: True if downloaded, False otherwise """ return does_file_exist(self.gguf_file_path) def has_json(self) -> bool: """Check if JSON metadata file exists. Returns: bool: True if exists, False otherwise """ return does_file_exist(self.json_path()) def download_gguf(self, force_redownload:bool=False) -> str: """Download gguf model file. Args: force_redownload (bool, optional): Force redownload if exists. Defaults to False. Returns: str: File path of downloaded file """ print(f"Preparing {self.gguf_file_path}\n for {self.name} : {self.model_quantization}...") if not does_file_exist(self.gguf_file_path) or force_redownload: print(f"Downloading {self.name} : {self.model_quantization}...") gguf_download_url = self._hf_url_to_download_url(self.gguf_url) response = requests.get(gguf_download_url, stream=True) total_size = int(response.headers.get('content-length', 0)) block_size = 1024000 # 100 KB progress_bar = f"Please wait, downloading {self.name} : {self.model_quantization}: {{0:0.2f}}% | {{1:0.3f}}/{{2:0.3f}} GB) | {{3:0.3f}} MB/s" unfinished_save_path = self.gguf_file_path + ".unfinished" with open(unfinished_save_path, "wb") as f: downloaded_size = 0 start_time = time.time() elapsed_time = 0 downloaded_since_last = 0 for data in response.iter_content(block_size): downloaded_size += len(data) downloaded_since_last += len(data) f.write(data) elapsed_time = time.time() - start_time download_speed = (downloaded_since_last*10/(1024**3)) / elapsed_time if elapsed_time > 0 else 0 progress = downloaded_size / total_size * 100 gb_downloaded = downloaded_size/(1024**3) gb_total = total_size/(1024**3) if elapsed_time >= 1: print(progress_bar.format(progress, gb_downloaded, gb_total, download_speed), end='\r') downloaded_since_last = 0 start_time = time.time() print(progress_bar.format(100, gb_downloaded, gb_total, download_speed)) rename_file(unfinished_save_path, self.gguf_file_path) else: print(f"File {self.gguf_file_path} already exists. Skipping download.") return self.gguf_file_path def json_path(self) -> str: """Get path for JSON metadata file. Returns: str: JSON file path """ return change_extension(self.gguf_file_path, ".json") def save_json(self, replace_existing:bool=True) -> str: """Save ModelData to JSON file. Args: replace_existing (bool, optional): Overwrite if exists. Defaults to True. Returns: str: JSON file path """ if replace_existing or not self.has_json(): save_json_file(self.json_path(), self.to_dict()) else: print(f"File {self.json_path()} already exists and replace_existing={replace_existing}. Skipping save.") return self.json_path() @staticmethod def from_json(json_file_path:str) -> "ModelData": """Create ModelData from JSON file. Args: json_file_path (str): Path to JSON file containing model data Returns: ModelData: ModelData object """ model_data = load_json_file(json_file_path) return ModelData.from_dict(model_data) @staticmethod def from_url(url:str, save_dir:str, user_tags:Union[dict, list, set] = ("", ""), ai_tags:Union[dict, list, set] = ("", ""), system_tags:Union[dict, list, set] = ("", ""), description:Optional[str] = None, keywords:Optional[list] = None) -> "ModelData": """Create ModelData from URL. Args: url (str): gguf URL save_dir (str): Directory to save model user_tags (Union[dict, list, set], optional): User markup tags. Defaults to ("", ""). ai_tags (Union[dict, list, set], optional): AI markup tags. Defaults to ("", ""). system_tags (Union[dict, list, set], optional): System markup tags. Defaults to ("", ""). description (Optional[str], optional): Model description. Defaults to None. keywords (Optional[list], optional): List of keywords. Defaults to None. Returns: ModelData: ModelData object """ return ModelData(url, save_dir, user_tags, ai_tags, system_tags, description, keywords) def model_path(self) -> str: """Get model file path. Returns: str: gguf file path """ return self.gguf_file_path @staticmethod def from_file(gguf_file_path:str, save_dir:Optional[str]=None, user_tags:Union[dict, list, set] = ("", ""), ai_tags:Union[dict, list, set] = ("", ""), system_tags:Union[dict, list, set] = ("", ""), description:Optional[str] = None, keywords:Optional[list] = None) -> "ModelData": """Create ModelData from gguf file. Args: gguf_file_path (str): Path to gguf file save_dir (Optional[str], optional): Directory to save. Defaults to None. user_tags (Union[dict, list, set], optional): User markup tags. Defaults to ("", ""). ai_tags (Union[dict, list, set], optional): AI markup tags. Defaults to ("", ""). system_tags (Union[dict, list, set], optional): System markup tags. Defaults to ("", ""). description (Optional[str], optional): Model description. Defaults to None. keywords (Optional[list], optional): List of keywords. Defaults to None. Returns: ModelData: ModelData object """ #creates a model where url is also the file path save_dir = get_directory(gguf_file_path) if save_dir is None else save_dir url = gguf_file_path return ModelData(url, save_dir, user_tags, ai_tags, system_tags, description, keywords) def get_ai_tag_open(self) -> str: """Get opening AI tag. Returns: str: Opening AI tag """ return self.ai_tags["open"] def get_ai_tag_close(self) -> str: """Get closing AI tag. Returns: str: Closing AI tag """ return self.ai_tags["close"] def get_user_tag_open(self) -> str: """Get opening user tag. Returns: str: Opening user tag """ return self.user_tags["open"] def get_user_tag_close(self) -> str: """Get closing user tag. Returns: str: Closing user tag """ return self.user_tags["close"] def get_system_tag_open(self) -> str: """Get opening system tag. Returns: str: Opening system tag """ return self.system_tags["open"] def get_system_tag_close(self) -> str: """Get closing system tag. Returns: str: Closing system tag """ return self.system_tags["close"] def get_ai_tags(self) -> list[str]: """Get list of AI tags. Returns: list[str]: List of opening and closing AI tags """ return [self.get_ai_tag_open(), self.get_ai_tag_close()] def get_user_tags(self) -> list[str]: """Get list of user tags. Returns: list[str]: List of opening and closing user tags """ return [self.get_user_tag_open(), self.get_user_tag_close()] def get_system_tags(self) -> list[str]: """Get list of system tags. Returns: list[str]: List of opening and closing system tags """ return [self.get_system_tag_open(), self.get_system_tag_close()] def has_system_tags(self) -> bool: """Check if system tags are set. Returns: bool: True if system tags set, False otherwise """ if self.system_tags is None: return False elif self.system_tags["open"] is None or self.system_tags["close"] is None: return False else: return True
Static methods
def from_dict(model_data: dict) ‑> ModelData
-
Create ModelData from dictionary.
Args
model_data
:dict
- Dictionary representation of ModelData
Needs to contain the following keys: "url": str, "save_dir": str, "user_tags": Union[dict, list, set], "ai_tags": Union[dict, list, set], and optionally: "description": str, "keywords": list, "system_tags": Union[dict, list, set]
Returns
ModelData
- ModelData object
Expand source code
@staticmethod def from_dict(model_data:dict) -> "ModelData": """Create ModelData from dictionary. Args: model_data (dict): Dictionary representation of ModelData Needs to contain the following keys: "url": str, "save_dir": str, "user_tags": Union[dict, list, set], "ai_tags": Union[dict, list, set], and optionally: "description": str, "keywords": list, "system_tags": Union[dict, list, set] Returns: ModelData: ModelData object """ url = model_data["url"] save_dir = model_data["save_dir"] description = model_data["description"] if "description" in model_data else None keywords = model_data["keywords"] if "keywords" in model_data else None user_tags = model_data["user_tags"] ai_tags = model_data["ai_tags"] system_tags = model_data["system_tags"] if "system_tags" in model_data else None new_model_data = ModelData(url, save_dir, user_tags, ai_tags, system_tags, description, keywords) return new_model_data
def from_file(gguf_file_path: str, save_dir: Optional[str] = None, user_tags: Union[dict, list, set] = ('', ''), ai_tags: Union[dict, list, set] = ('', ''), system_tags: Union[dict, list, set] = ('', ''), description: Optional[str] = None, keywords: Optional[None] = None) ‑> ModelData
-
Create ModelData from gguf file.
Args
gguf_file_path
:str
- Path to gguf file
save_dir
:Optional[str]
, optional- Directory to save. Defaults to None.
user_tags
:Union[dict, list, set]
, optional- User markup tags. Defaults to ("", "").
ai_tags
:Union[dict, list, set]
, optional- AI markup tags. Defaults to ("", "").
system_tags
:Union[dict, list, set]
, optional- System markup tags. Defaults to ("", "").
description
:Optional[str]
, optional- Model description. Defaults to None.
keywords
:Optional[list]
, optional- List of keywords. Defaults to None.
Returns
ModelData
- ModelData object
Expand source code
@staticmethod def from_file(gguf_file_path:str, save_dir:Optional[str]=None, user_tags:Union[dict, list, set] = ("", ""), ai_tags:Union[dict, list, set] = ("", ""), system_tags:Union[dict, list, set] = ("", ""), description:Optional[str] = None, keywords:Optional[list] = None) -> "ModelData": """Create ModelData from gguf file. Args: gguf_file_path (str): Path to gguf file save_dir (Optional[str], optional): Directory to save. Defaults to None. user_tags (Union[dict, list, set], optional): User markup tags. Defaults to ("", ""). ai_tags (Union[dict, list, set], optional): AI markup tags. Defaults to ("", ""). system_tags (Union[dict, list, set], optional): System markup tags. Defaults to ("", ""). description (Optional[str], optional): Model description. Defaults to None. keywords (Optional[list], optional): List of keywords. Defaults to None. Returns: ModelData: ModelData object """ #creates a model where url is also the file path save_dir = get_directory(gguf_file_path) if save_dir is None else save_dir url = gguf_file_path return ModelData(url, save_dir, user_tags, ai_tags, system_tags, description, keywords)
def from_json(json_file_path: str) ‑> ModelData
-
Create ModelData from JSON file.
Args
json_file_path
:str
- Path to JSON file containing model data
Returns
ModelData
- ModelData object
Expand source code
@staticmethod def from_json(json_file_path:str) -> "ModelData": """Create ModelData from JSON file. Args: json_file_path (str): Path to JSON file containing model data Returns: ModelData: ModelData object """ model_data = load_json_file(json_file_path) return ModelData.from_dict(model_data)
def from_url(url: str, save_dir: str, user_tags: Union[dict, list, set] = ('', ''), ai_tags: Union[dict, list, set] = ('', ''), system_tags: Union[dict, list, set] = ('', ''), description: Optional[str] = None, keywords: Optional[None] = None) ‑> ModelData
-
Create ModelData from URL.
Args
url
:str
- gguf URL
save_dir
:str
- Directory to save model
user_tags
:Union[dict, list, set]
, optional- User markup tags. Defaults to ("", "").
ai_tags
:Union[dict, list, set]
, optional- AI markup tags. Defaults to ("", "").
system_tags
:Union[dict, list, set]
, optional- System markup tags. Defaults to ("", "").
description
:Optional[str]
, optional- Model description. Defaults to None.
keywords
:Optional[list]
, optional- List of keywords. Defaults to None.
Returns
ModelData
- ModelData object
Expand source code
@staticmethod def from_url(url:str, save_dir:str, user_tags:Union[dict, list, set] = ("", ""), ai_tags:Union[dict, list, set] = ("", ""), system_tags:Union[dict, list, set] = ("", ""), description:Optional[str] = None, keywords:Optional[list] = None) -> "ModelData": """Create ModelData from URL. Args: url (str): gguf URL save_dir (str): Directory to save model user_tags (Union[dict, list, set], optional): User markup tags. Defaults to ("", ""). ai_tags (Union[dict, list, set], optional): AI markup tags. Defaults to ("", ""). system_tags (Union[dict, list, set], optional): System markup tags. Defaults to ("", ""). description (Optional[str], optional): Model description. Defaults to None. keywords (Optional[list], optional): List of keywords. Defaults to None. Returns: ModelData: ModelData object """ return ModelData(url, save_dir, user_tags, ai_tags, system_tags, description, keywords)
Methods
def download_gguf(self, force_redownload: bool = False) ‑> str
-
Download gguf model file.
Args
force_redownload
:bool
, optional- Force redownload if exists. Defaults to False.
Returns
str
- File path of downloaded file
Expand source code
def download_gguf(self, force_redownload:bool=False) -> str: """Download gguf model file. Args: force_redownload (bool, optional): Force redownload if exists. Defaults to False. Returns: str: File path of downloaded file """ print(f"Preparing {self.gguf_file_path}\n for {self.name} : {self.model_quantization}...") if not does_file_exist(self.gguf_file_path) or force_redownload: print(f"Downloading {self.name} : {self.model_quantization}...") gguf_download_url = self._hf_url_to_download_url(self.gguf_url) response = requests.get(gguf_download_url, stream=True) total_size = int(response.headers.get('content-length', 0)) block_size = 1024000 # 100 KB progress_bar = f"Please wait, downloading {self.name} : {self.model_quantization}: {{0:0.2f}}% | {{1:0.3f}}/{{2:0.3f}} GB) | {{3:0.3f}} MB/s" unfinished_save_path = self.gguf_file_path + ".unfinished" with open(unfinished_save_path, "wb") as f: downloaded_size = 0 start_time = time.time() elapsed_time = 0 downloaded_since_last = 0 for data in response.iter_content(block_size): downloaded_size += len(data) downloaded_since_last += len(data) f.write(data) elapsed_time = time.time() - start_time download_speed = (downloaded_since_last*10/(1024**3)) / elapsed_time if elapsed_time > 0 else 0 progress = downloaded_size / total_size * 100 gb_downloaded = downloaded_size/(1024**3) gb_total = total_size/(1024**3) if elapsed_time >= 1: print(progress_bar.format(progress, gb_downloaded, gb_total, download_speed), end='\r') downloaded_since_last = 0 start_time = time.time() print(progress_bar.format(100, gb_downloaded, gb_total, download_speed)) rename_file(unfinished_save_path, self.gguf_file_path) else: print(f"File {self.gguf_file_path} already exists. Skipping download.") return self.gguf_file_path
def get_ai_tag_close(self) ‑> str
-
Get closing AI tag.
Returns
str
- Closing AI tag
Expand source code
def get_ai_tag_close(self) -> str: """Get closing AI tag. Returns: str: Closing AI tag """ return self.ai_tags["close"]
def get_ai_tag_open(self) ‑> str
-
Get opening AI tag.
Returns
str
- Opening AI tag
Expand source code
def get_ai_tag_open(self) -> str: """Get opening AI tag. Returns: str: Opening AI tag """ return self.ai_tags["open"]
-
Get list of AI tags.
Returns
list[str]
- List of opening and closing AI tags
Expand source code
def get_ai_tags(self) -> list[str]: """Get list of AI tags. Returns: list[str]: List of opening and closing AI tags """ return [self.get_ai_tag_open(), self.get_ai_tag_close()]
def get_system_tag_close(self) ‑> str
-
Get closing system tag.
Returns
str
- Closing system tag
Expand source code
def get_system_tag_close(self) -> str: """Get closing system tag. Returns: str: Closing system tag """ return self.system_tags["close"]
def get_system_tag_open(self) ‑> str
-
Get opening system tag.
Returns
str
- Opening system tag
Expand source code
def get_system_tag_open(self) -> str: """Get opening system tag. Returns: str: Opening system tag """ return self.system_tags["open"]
-
Get list of system tags.
Returns
list[str]
- List of opening and closing system tags
Expand source code
def get_system_tags(self) -> list[str]: """Get list of system tags. Returns: list[str]: List of opening and closing system tags """ return [self.get_system_tag_open(), self.get_system_tag_close()]
def get_user_tag_close(self) ‑> str
-
Get closing user tag.
Returns
str
- Closing user tag
Expand source code
def get_user_tag_close(self) -> str: """Get closing user tag. Returns: str: Closing user tag """ return self.user_tags["close"]
def get_user_tag_open(self) ‑> str
-
Get opening user tag.
Returns
str
- Opening user tag
Expand source code
def get_user_tag_open(self) -> str: """Get opening user tag. Returns: str: Opening user tag """ return self.user_tags["open"]
-
Get list of user tags.
Returns
list[str]
- List of opening and closing user tags
Expand source code
def get_user_tags(self) -> list[str]: """Get list of user tags. Returns: list[str]: List of opening and closing user tags """ return [self.get_user_tag_open(), self.get_user_tag_close()]
def has_json(self) ‑> bool
-
Check if JSON metadata file exists.
Returns
bool
- True if exists, False otherwise
Expand source code
def has_json(self) -> bool: """Check if JSON metadata file exists. Returns: bool: True if exists, False otherwise """ return does_file_exist(self.json_path())
-
Check if system tags are set.
Returns
bool
- True if system tags set, False otherwise
Expand source code
def has_system_tags(self) -> bool: """Check if system tags are set. Returns: bool: True if system tags set, False otherwise """ if self.system_tags is None: return False elif self.system_tags["open"] is None or self.system_tags["close"] is None: return False else: return True
def is_downloaded(self) ‑> bool
-
Check if model file is downloaded.
Returns
bool
- True if downloaded, False otherwise
Expand source code
def is_downloaded(self) -> bool: """Check if model file is downloaded. Returns: bool: True if downloaded, False otherwise """ return does_file_exist(self.gguf_file_path)
def json_path(self) ‑> str
-
Get path for JSON metadata file.
Returns
str
- JSON file path
Expand source code
def json_path(self) -> str: """Get path for JSON metadata file. Returns: str: JSON file path """ return change_extension(self.gguf_file_path, ".json")
def model_path(self) ‑> str
-
Get model file path.
Returns
str
- gguf file path
Expand source code
def model_path(self) -> str: """Get model file path. Returns: str: gguf file path """ return self.gguf_file_path
def save_json(self, replace_existing: bool = True) ‑> str
-
Save ModelData to JSON file.
Args
replace_existing
:bool
, optional- Overwrite if exists. Defaults to True.
Returns
str
- JSON file path
Expand source code
def save_json(self, replace_existing:bool=True) -> str: """Save ModelData to JSON file. Args: replace_existing (bool, optional): Overwrite if exists. Defaults to True. Returns: str: JSON file path """ if replace_existing or not self.has_json(): save_json_file(self.json_path(), self.to_dict()) else: print(f"File {self.json_path()} already exists and replace_existing={replace_existing}. Skipping save.") return self.json_path()
-
Set AI markup tags.
Args
ai_tags
:Union[dict, set, list, tuple]
- AI tags
Expand source code
def set_ai_tags(self, ai_tags:Union[dict, set[str], list[str], tuple[str]]) -> None: """Set AI markup tags. Args: ai_tags (Union[dict, set, list, tuple]): AI tags """ if isinstance(ai_tags, dict): if "open" in ai_tags and "close" in ai_tags: self.ai_tags = ai_tags else: raise ValueError(f"Invalid user tags: {ai_tags}, for dict tags both 'open' and 'close' keys must be present.") elif isinstance(ai_tags, set) or isinstance(ai_tags, list) or isinstance(ai_tags, tuple): self.ai_tags = { "open": ai_tags[0], "close": ai_tags[1] } else: raise TypeError(f"Invalid type for user tags: {type(ai_tags)}, must be dict, set or list.")
def set_save_dir(self, save_dir: str) ‑> None
-
Set save directory and update save file path for the model.
Args
save_dir
:str
- Save directory
Expand source code
def set_save_dir(self, save_dir:str) -> None: """Set save directory and update save file path for the model. Args: save_dir (str): Save directory """ self.save_dir = save_dir self.gguf_file_path = self._url_to_file_path(save_dir, self.gguf_url)
-
Set system markup tags.
Args
system_tags
:Union[dict, set, list, tuple]
- System tags
Expand source code
def set_system_tags(self, system_tags:Union[dict, set[str], list[str], tuple[str]]) -> None: """Set system markup tags. Args: system_tags (Union[dict, set, list, tuple]): System tags """ if isinstance(system_tags, dict): if "open" in system_tags and "close" in system_tags: self.system_tags = system_tags else: raise ValueError(f"Invalid system tags: {system_tags}, for dict tags both 'open' and 'close' keys must be present.") elif isinstance(system_tags, set) or isinstance(system_tags, list) or isinstance(system_tags, tuple): self.system_tags = { "open": system_tags[0], "close": system_tags[1] } else: raise TypeError(f"Invalid type for system tags: {type(system_tags)}, must be dict, set or list.")
-
Sets any of the provided tags.
Args
ai_tags
:Optional[Union[dict, set, list, tuple]]
- AI tags
user_tags
:Optional[Union[dict, set, list, tuple]]
- User tags
system_tags
:Optional[Union[dict, set, list, tuple]]
- System tags
Expand source code
def set_tags(self, ai_tags:Optional[Union[dict, set[str], list[str], tuple[str]]], user_tags:Optional[Union[dict, set[str], list[str], tuple[str]]], system_tags:Optional[Union[dict, set[str], list[str], tuple[str]]], ) -> None: """Sets any of the provided tags. Args: ai_tags (Optional[Union[dict, set, list, tuple]]): AI tags user_tags (Optional[Union[dict, set, list, tuple]]): User tags system_tags (Optional[Union[dict, set, list, tuple]]): System tags """ if ai_tags is not None: self.set_ai_tags(ai_tags) if user_tags is not None: self.set_user_tags(user_tags) if system_tags is not None: self.set_system_tags(system_tags)
-
Set user markup tags.
Args
user_tags
:Union[dict, set, list, tuple]
- User tags
Expand source code
def set_user_tags(self, user_tags:Union[dict, set[str], list[str], tuple[str]]) -> None: """Set user markup tags. Args: user_tags (Union[dict, set, list, tuple]): User tags """ if isinstance(user_tags, dict): if "open" in user_tags and "close" in user_tags: self.user_tags = user_tags else: raise ValueError(f"Invalid user tags: {user_tags}, for dict tags both 'open' and 'close' keys must be present.") elif isinstance(user_tags, set) or isinstance(user_tags, list) or isinstance(user_tags, tuple): self.user_tags = { "open": user_tags[0], "close": user_tags[1] } else: raise TypeError(f"Invalid type for user tags: {type(user_tags)}, must be dict, set or list.")
def to_dict(self)
-
Convert ModelData to dictionary. "url": str, "save_dir": str, "user_tags": Union[dict, list, set], "ai_tags": Union[dict, list, set], "description": str, "keywords": list, "system_tags": Union[dict, list, set]
Returns
dict
- Dictionary representation of ModelData
Expand source code
def to_dict(self): """Convert ModelData to dictionary. "url": str, "save_dir": str, "user_tags": Union[dict, list, set], "ai_tags": Union[dict, list, set], "description": str, "keywords": list, "system_tags": Union[dict, list, set] Returns: dict: Dictionary representation of ModelData """ model_data = { "url": self.gguf_url, "gguf_file_path": self.gguf_file_path, "model_name": self.name, "model_quantization": self.model_quantization, "description": self.description, "keywords": self.keywords, "user_tags": self.user_tags, "ai_tags": self.ai_tags, "system_tags": self.system_tags, "save_dir": self.save_dir, } return model_data