-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
140 lines (123 loc) · 4.89 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from flask import Flask, request
from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering, AutoModelForQuestionAnswering
import torch
import logging
import pandas as pd
from io import StringIO
logging.basicConfig(level=logging.INFO)
MAX_TOKENS = 512
MAX_TOKENS_XL = 4096
modelNames = []
models = []
tokenizers = []
app = Flask(__name__)
@app.route('/set-model-text', methods=['POST'])
def seModelText():
modelNames.clear()
models.clear()
tokenizers.clear()
body = request.get_json()
modelName = body["modelName"]
modelNames.append(modelName)
logging.info("Loading " + modelName + " tokenizer")
tokenizer = AutoTokenizer.from_pretrained(modelName)
logging.info(modelName + " tokenizer loaded")
logging.info("Loading " + modelName + " model")
model = AutoModelForQuestionAnswering.from_pretrained(modelName)
logging.info(modelName + " model loaded")
print("------------------------------------")
models.append(model)
tokenizers.append(tokenizer)
response = {
"status": "done"
}
return response, 200
@app.route('/set-model-table', methods=['POST'])
def setModelTable():
modelNames.clear()
models.clear()
tokenizers.clear()
body = request.get_json()
modelName = body["modelName"]
modelNames.append(modelName)
logging.info("Loading " + modelName + " tokenizer")
tokenizer = AutoTokenizer.from_pretrained("google/tapas-base-finetuned-wtq", drop_rows_to_fit = True)
# tokenizer = TapasTokenizer.from_pretrained("google/tapas-base-finetuned-tabfact", drop_rows_to_fit=True)
logging.info(modelName + " tokenizer loaded")
logging.info("Loading " + modelName + " model")
model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-base-finetuned-wtq")
# model = TapasForQuestionAnswering.from_pretrained("google/tapas-base-finetuned-tabfact")
logging.info(modelName + " model loaded")
print("------------------------------------")
models.append(model)
tokenizers.append(tokenizer)
response = {
"status": "done"
}
return response, 200
@app.route('/qa-text', methods=['POST'])
def QAText():
body = request.get_json()
question = body["question"]
corpus = body["corpus"]
response = {}
for i, modelName in enumerate(modelNames):
tokenizer = tokenizers[i]
model = models[i]
inputs = tokenizer(question, corpus, add_special_tokens=True, truncation=True, return_tensors="pt")
input_ids = inputs["input_ids"].tolist()[0]
# corpus_tokens = tokenizer.convert_ids_to_tokens(input_ids)
outputs = model(**inputs)
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits
answer_start = torch.argmax(answer_start_scores) # Get the most likely beginning of answer with the argmax of the score
answer_end = torch.argmax(answer_end_scores) + 1 # Get the most likely end of answer with the argmax of the score
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
response[modelName] = {
"question": question,
"answer": answer,
# "beginPosition": answer_start.item(),
# "endPosition": answer_end.item()
}
return response, 200
@app.route('/qa-table', methods=['POST'])
def QATable():
body = request.get_json()
question = body["question"]
corpus = body["corpus"]
response = {}
for i, modelName in enumerate(modelNames):
tokenizer = tokenizers[i]
model = models[i]
table = pd.read_csv(StringIO(corpus), sep=",")
table = table.applymap(str)
inputs = tokenizer(table=table, queries=question, padding='max_length', truncation=True, return_tensors="pt")
outputs = model(**inputs)
predicted_table_cell_coords, predicted_aggregation_operators = tokenizer.convert_logits_to_predictions(
inputs,
outputs.logits.detach(),
outputs.logits_aggregation.detach()
)
aggregation_operators = {0: "NONE", 1: "SUM", 2: "AVERAGE", 3:"COUNT"}
aggregation_prediction = aggregation_operators[predicted_aggregation_operators[0]]
coordinates = predicted_table_cell_coords[0]
"""
if len(coordinates) == 1:
# 1 cell
answer = table.iat[coordinates[0]]
else:
# > 1 cell
cell_values = []
for coordinate in coordinates:
cell_values.append(table.iat[coordinate])
answer = (", ".join(cell_values))
"""
answer = []
for coordinate in coordinates:
answer.append(table.iat[coordinate])
response[modelName] = {
"question": question,
"answer": answer,
"aggregation": aggregation_prediction
}
return response, 200