[de07e6]: / src / Matcher / crossencoder_reranker.py

Download this file

59 lines (49 with data), 1.7 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import json
import logging
from typing import List
import torch
from sagemaker_inference import encoder
from transformers import AutoModelForSequenceClassification, AutoTokenizer
PAIRS = "pairs"
SCORES = "scores"
class CrossEncoder:
def __init__(self) -> None:
self.device = (
torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
)
logging.info(f"Using device: {self.device}")
model_name = "BAAI/bge-reranker-base"
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
self.model = self.model.to(self.device)
def __call__(self, pairs: List[List[str]]) -> List[float]:
with torch.inference_mode():
inputs = self.tokenizer(
pairs,
padding=True,
truncation=True,
return_tensors="pt",
max_length=512,
)
inputs = inputs.to(self.device)
scores = (
self.model(**inputs, return_dict=True)
.logits.view(
-1,
)
.float()
)
return scores.detach().cpu().tolist()
def model_fn(model_dir: str) -> CrossEncoder:
try:
return CrossEncoder()
except Exception:
logging.exception(f"Failed to load model from: {model_dir}")
raise
def transform_fn(
cross_encoder: CrossEncoder, input_data: bytes, content_type: str, accept: str
) -> bytes:
payload = json.loads(input_data)
model_output = cross_encoder(**payload)
output = {SCORES: model_output}
return encoder.encode(output, accept)