text2sql

Model Card for MistralSQL-7B

Model Information

Model Parameters

Inference Parameters

Hardware and Software

License

Instruction Format

To leverage instruction fine-tuning, prompts should be surrounded by [INST] and [/INST] tokens. The very first instruction should begin with a begin of sentence id. The next instructions should not. The assistant generation will be ended by the end-of-sentence token id.

For example:

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline
)
import torch

model_name = "mistralai/Mistral-7B-Instruct-v0.1"
adapter_name = "bugdaryan/MistralSQL-7b-QLoRA"
base_model = AutoModelForCausalLM.from_pretrained(model_name,
                                                    device_map='auto',
                                                    low_cpu_mem_usage=True,
                                                    torch_dtype=torch.float16,
                                                    return_dict=True,
                                                   )
model = PeftModel.from_pretrained(base_model, adapter_name)
model = model.merge_and_unload()
tokenizer = AutoTokenizer.from_pretrained(model_name)

pipe = pipeline('text-generation', model=model, tokenizer=tokenizer)

table = "CREATE TABLE sales ( sale_id number PRIMARY KEY, product_id number, customer_id number, salesperson_id number, sale_date DATE, quantity number, FOREIGN KEY (product_id) REFERENCES products(product_id), FOREIGN KEY (customer_id) REFERENCES customers(customer_id), FOREIGN KEY (salesperson_id) REFERENCES salespeople(salesperson_id)); CREATE TABLE product_suppliers ( supplier_id number PRIMARY KEY, product_id number, supply_price number, FOREIGN KEY (product_id) REFERENCES products(product_id)); CREATE TABLE customers ( customer_id number PRIMARY KEY, name text, address text ); CREATE TABLE salespeople ( salesperson_id number PRIMARY KEY, name text, region text ); CREATE TABLE product_suppliers ( supplier_id number PRIMARY KEY, product_id number, supply_price number );"

question = 'Find the salesperson who made the most sales.'

prompt = f"[INST] Write SQLite query to answer the following question given the database schema. Please wrap your code answer using ```: Schema: {table} Question: {question} [/INST] Here is the SQLite query to answer to the question: {question}: ``` "

ans = pipe(prompt, max_new_tokens=100)
print(ans[0]['generated_text'].split('```')[2])