-
-
Save AlexMikhalev/53881d5be3f757fce7ae44d202da318f to your computer and use it in GitHub Desktop.
π€ Huggingface Bert on RedisAI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import BertForQuestionAnswering | |
import torch | |
bert_name = "bert-large-uncased-whole-word-masking-finetuned-squad" | |
model = BertForQuestionAnswering.from_pretrained(bert_name, torchscript=True) | |
model.eval() | |
inputs = [torch.ones(1, 2, dtype=torch.int64), | |
torch.ones(1, 2, dtype=torch.int64), | |
torch.ones(1, 2, dtype=torch.int64)] | |
with torch.no_grad(): | |
traced_model = torch.jit.trace(model, inputs) | |
torch.jit.save(traced_model, "traced_bert_qa.pt") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import redisai | |
r = redisai.Client() | |
model_file = 'traced_bert_qa.pt' | |
with open(model_file, 'rb') as f: | |
model = f.read() | |
chunk_size = 500 * 1024 * 1024 | |
model_chunks = [model[i:i + chunk_size] for i in range(0, len(model), chunk_size)] | |
r.modelset('bert-qa', 'TORCH', 'CPU', model) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import redisai | |
from transformers import AutoTokenizer | |
import numpy as np | |
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") | |
r = redisai.Client() | |
text = r""" | |
At a very high level, one of the most critical steps in any ML pipeline is called AI serving, a task usually performed by an AI inference engine. The AI inference engine is responsible for the model deployment and performance monitoring steps in the figure above, and represents a whole new world that will eventually determine whether applications can use AI technologies to improve operational efficiencies and solve real business problems. | |
""" | |
questions = [ | |
"What is the most critical step in any ML pipeline?", | |
"What is AI serving?", | |
"What is an AI inference engine?", | |
] | |
for question in questions: | |
inputs = tokenizer.encode_plus(question, text, add_special_tokens=True, return_tensors="pt") | |
input_ids = inputs['input_ids'].numpy() | |
attention_mask = inputs['attention_mask'].numpy() | |
token_type_ids = inputs['token_type_ids'].numpy() | |
r.tensorset('input_ids', input_ids) | |
r.tensorset('attention_mask', attention_mask) | |
r.tensorset('token_type_ids', token_type_ids) | |
r.modelrun('bert-qa', ['input_ids', 'attention_mask', 'token_type_ids'], | |
['answer_start_scores', 'answer_end_scores']) | |
answer_start_scores = r.tensorget('answer_start_scores') | |
answer_end_scores = r.tensorget('answer_end_scores') | |
answer_start = np.argmax(answer_start_scores) | |
answer_end = np.argmax(answer_end_scores) + 1 | |
input_ids = inputs["input_ids"].tolist()[0] | |
output_tokens = tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]) | |
answer = tokenizer.convert_tokens_to_string(output_tokens) | |
print(f"Question: {question}") | |
print(f"Answer: {answer}\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment