Base Model:t5-small
#Training Result
[17610/17610 1:32:31, Epoch 9/10] Step Training Loss Validation Loss 1000 2.682400 0.829368 2000 0.914000 0.568155 3000 0.707700 0.465733 4000 0.613500 0.408758 5000 0.557300 0.374811 6000 0.515800 0.350752 7000 0.487000 0.331517 8000 0.466100 0.319071 9000 0.449400 0.309488 10000 0.438800 0.301829 11000 0.430000 0.296482 12000 0.420200 0.292672 13000 0.418200 0.290445 14000 0.413400 0.288662 15000 0.410100 0.287757 16000 0.412600 0.287280 17000 0.410000 0.287134
question: what is id with name jui and age equal 25 table: ['id', 'name', 'age'] SELECT ID FROM table WHEREname = jui AND age equal 25
#Copy below piece of code to your notebook to use the model
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("DebeshSahoo/text2sql-finetune")
Load the model
model = AutoModelForSeq2SeqLM.from_pretrained("DebeshSahoo/text2sql-finetune")
Rest of the code for preparing input, generating predictions, and decoding the output...
from typing import List
table_prefix = "table:" question_prefix = "question:"
def prepare_input(question: str, table: List[str]): print("question:", question) print("table:", table) join_table = ",".join(table) inputs = f"{question_prefix} {question} {table_prefix} {join_table}" input_ids = tokenizer(inputs, max_length=700, return_tensors="pt").input_ids return input_ids
def inference(question: str, table: List[str]) -> str: input_data = prepare_input(question=question, table=table) input_data = input_data.to(model.device) outputs = model.generate(inputs=input_data, num_beams=10, top_k=10, max_length=512) result = tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True) return result
test_id = 1000 print("model result:", inference(dataset["test"][test_id]["question"], dataset["test"][test_id]["table"]["header"])) print("real result:", dataset["test"][test_id]["sql"]["human_readable"])
inference("what is id with name jui and age equal 25", ["id","name", "age"])