Spaces:

DeathReaper0965
/

Qwen2.5-SQL-Query-Generator-GRPO

Sleeping

Qwen2.5-SQL-Query-Generator-GRPO / app.py

pp542-0965

Add note change

4ebb27c 11 months ago

3.95 kB

	import re

	import torch

	import gradio as gr

	from peft import PeftModel
	from transformers import AutoModelForCausalLM, AutoTokenizer


	def load_model_tokenizer():
	model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B-Instruct", max_length=2560)
	model = PeftModel.from_pretrained(model, "DeathReaper0965/Qwen2.5-3B-Inst-SQL-Reasoning-GRPO", is_trainable=False)

	tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct", max_length = 2560)

	return model, tokenizer


	model, tokenizer = load_model_tokenizer()


	def create_prompt(schemas, question):
	prompt = [
	{
	'role': 'system',
	'content': """\
	You are an expert SQL Query Writer.
	Given relevant Schemas and the Question, you first understand the problem entirely and then reason about the best possible approach to come up with an answer.
	Once, you are confident in your reasoning, you will then start generating the SQL Query as the answer that accurately solves the given question leveraging some or all schemas.

	Remember that you should place all your reasoning between <reason> and </reason> tags.
	Also, you should provide your solution between <answer> and </answer> tags.

	An example generation is as follows:
	<reason>
	This is a sample reasoning that solves the question based on the schema.
	</reason>
	<answer>
	SELECT
	COLUMN
	FROM TABLE_NAME
	WHERE
	CONDITION
	</answer>"""
	},
	{
	'role': 'user',
	'content': f"""\
	SCHEMAS:
	---------------

	{schemas}

	---------------

	QUESTION: "{question}"\
	"""
	}
	]

	return prompt


	def extract_answer(gen_output):
	answer_start_token = "<answer>"
	answer_end_token = "</answer>"
	answer_match_format = re.compile(rf"{answer_start_token}(.+?){answer_end_token}", flags = re.MULTILINE \| re.DOTALL \| re.IGNORECASE)

	answer_match = answer_match_format.search(gen_output)

	final_answer = None

	if answer_match is not None:
	final_answer = answer_match.group(1)

	return final_answer


	def response(user_schemas, user_question):
	user_prompt = create_prompt(user_schemas, user_question)

	inputs = tokenizer.apply_chat_template(user_prompt,
	tokenize=True,
	add_generation_prompt=True,
	return_dict=True,
	return_tensors="pt")

	with torch.inference_mode():
	outputs = model.generate(**inputs, max_new_tokens=1024)

	outputs = tokenizer.batch_decode(outputs)
	output = outputs[0].split("<\|im_start\|>assistant")[-1].strip()

	final_answer = extract_answer(output)

	final_output = output + "\n\n" + "="*20 + "\n\nFinal Answer: \n" + final_answer

	return final_output


	desc="""
	NOTE: This HF Space is running on Free Version so the generation process will be very slow.<br>

	Please use the "Table Schemas" field to provide the required schemas to to generate the SQL Query for - separated by new lines.<br>
	Example:
	```python
	CREATE TABLE demographic (
	subject_id text,
	admission_type text,
	hadm_id text)

	CREATE TABLE diagnoses (
	subject_id text,
	hadm_id text)
	```

	Finally, use the "Question" field to provide the relevant question to be answered based on the provided schemas.<br>
	Example: How many patients whose admission type is emergency.
	"""

	demo = gr.Interface(
	fn=response,
	inputs=[gr.Textbox(label="Table Schemas",
	placeholder="Expected to have CREATE TABLE statements with datatypes separated by new lines"),
	gr.Textbox(label="Question",
	placeholder="Eg. How many patients whose admission type is emergency")
	],
	outputs=gr.Textbox(label="Generated SQL Query with reasoning"),
	title="SQL Query Generator trained with GRPO to elicit reasoning",
	description=desc
	)

	demo.launch()