In [11]:
!pip install -q -U transformers[sentencepiece] rouge git+https://github.com/deepset-ai/haystack.git grpcio-tools==1.34.1 spacy

In [12]:
import spacy
import nltk
import json
from tqdm import tqdm
import pandas as pd 
from rouge import Rouge
from pprint import pprint
from typing import List
from haystack import Document
from haystack.reader import TransformersReader
from haystack.pipeline import ExtractiveQAPipeline 
from haystack.retriever.dense import DensePassageRetriever 
from haystack.document_store.faiss import FAISSDocumentStore
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

In [13]:
!spacy download en_core_web_md 
!spacy link en_core_web_md en

Collecting en-core-web-md==3.1.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.1.0/en_core_web_md-3.1.0-py3-none-any.whl (45.4 MB)
[K     |████████████████████████████████| 45.4 MB 17 kB/s 
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ As of spaCy v3.0, model symlinks are not supported anymore. You can
load trained pipeline packages using their full names or from a directory
path.[0m


In [22]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [61]:
import spacy
# nlp = spacy.load('en_core_web_md')
nlp = English() 
nlp.add_pipe("sentencizer")

<spacy.pipeline.sentencizer.Sentencizer at 0x7f4ea4ea7640>

In [15]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
with open('drive/MyDrive/qa_test.json', "r") as f:
    qa = json.loads(f.read())['data']

df = pd.read_csv('drive/MyDrive/ex-QA.csv', index_col=0)
df = df.replace(r'\n',' ', regex=True) 

In [17]:
titles = list(df["title"].values)
texts  = list(df["text"].values)
documents: List[Document] = []
 
for title, text in zip(titles, texts):
    documents.append(
        Document(
            text=text,
            meta={
                "name": title or ""
            }
        )
    )

In [85]:
document_store = FAISSDocumentStore(
    faiss_index_factory_str="Flat",
    return_embedding=True
)

retriever = DensePassageRetriever(
    document_store=document_store,
    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
    use_gpu=True,
    embed_title=True,
)

# retriever = DensePassageRetriever(
#     document_store=document_store,
#     query_embedding_model="drive/MyDrive/bert-large-finetuned",
#     passage_embedding_model="drive/MyDrive/bert-large-finetuned",
#     use_gpu=True,
#     embed_title=True,
# )

document_store.delete_documents()
document_store.write_documents(documents)
document_store.update_embeddings(
    retriever=retriever
)

09/24/2021 21:46:02 - INFO - haystack.document_store.faiss -   Updating embeddings for 7330 docs...
Updating Embedding:   0%|          | 0/7330 [00:00<?, ? docs/s]

Create embeddings:   0%|          | 0/7344 [00:00<?, ? Docs/s]

Documents Processed: 10000 docs [03:58, 41.92 docs/s]


In [165]:
# reader = TransformersReader(model_name_or_path="ahotrod/albert_xxlargev1_squad2_512", use_gpu=0)
# reader = TransformersReader(model_name_or_path="bert-large-uncased-whole-word-masking-finetuned-squad", use_gpu=0)

reader = TransformersReader(model_name_or_path="ktrapeznikov/albert-xlarge-v2-squad-v2", 
                            context_window_size=70,
                            max_seq_len=256,
                            doc_stride=128,
                            use_gpu=0)

# reader = TransformersReader(model_name_or_path="drive/MyDrive/bert_basefi_qafi", 
#                             context_window_size=70,
#                             max_seq_len=256,
#                             doc_stride=128,
#                             use_gpu=0)

pipe = ExtractiveQAPipeline(reader, retriever)

# Answers Bleu and Rouge

In [166]:
bleu_scores = []
rouge1_scores = []
rouge2_scores = []
rougel_scores = []
context_detection = []

rouge = Rouge()
smoothie = SmoothingFunction().method4

for data in tqdm(qa):
    true_context = data['context']
    true_context = true_context.replace('\n', ' ')

    for q_a in data['qas']:
        question = q_a['question']
        reference = " ".join(q_a['answers'])
        preds = pipe.run(
            query=question,
            params={"Retriever": {"top_k": 3}, "Reader": {"top_k": 2}}
            )
        
        candidate_sent_list = []
        pred_context_list = [pred.to_dict()['text'] for pred in preds['documents']]
        pred_context = ' '.join(pred_context_list)
        pred_context = pred_context.replace('\n', ' ')
        doc = nlp(pred_context)
        pred_context_sents = list(doc.sents)

        for pred_co in pred_context_list:
            pred_co = "".join(pred_co.rstrip().lstrip())
            if pred_co in true_context:
                context_detection.append(1)
            else:
                context_detection.append(0)

        for pred in preds['answers']:
            pred_answer = pred['answer']

            if pred_answer is not None:
                pred_answer = pred_answer.replace('\n', ' ')
                doc = nlp(pred_answer)
                pred_answer_sents = list(doc.sents)

                for pred_context_sent in pred_context_sents:
                    for pred_answer_sent in pred_answer_sents:
                        pred_answer_sent = "".join(pred_answer_sent.text.rstrip().lstrip())

                        if pred_answer_sent in pred_context_sent.text:
                            candidate_sent_list.append(pred_context_sent.text)


        candidate_sent_set = set(candidate_sent_list)
        candidate = " ".join(candidate_sent_set)
        token_reference = nltk.word_tokenize(reference)
        token_candidate = nltk.word_tokenize(candidate)

        bleu_score = sentence_bleu(token_reference, 
                                    token_candidate, 
                                    smoothing_function=smoothie, 
                                    weights=(1, 0, 0, 0))
        rouge_score = rouge.get_scores(candidate, reference)

        bleu_scores.append(bleu_score)
        rouge1_scores.append(rouge_score[0]['rouge-1']['f'])
        rouge2_scores.append(rouge_score[0]['rouge-2']['f'])
        rougel_scores.append(rouge_score[0]['rouge-l']['f'])

100%|██████████| 38/38 [10:04<00:00, 15.90s/it]


In [167]:
context_detection.count(1) / len(context_detection)

0.046413502109704644

In [168]:
print("bleu -->", sum(bleu_scores)/len(bleu_scores))
print("rouge1 -->", sum(rouge1_scores)/len(rouge1_scores))
print("rouge2 -->", sum(rouge2_scores)/len(rouge2_scores))
print("rougel -->", sum(rougel_scores)/len(rougel_scores))

bleu --> 0.0700302475143219
rouge1 --> 0.2025044353996794
rouge2 --> 0.11069682602623314
rougel --> 0.1903201590307057


# facebook/dpr-question_encoder-single-nq-base + Fine Tuned Bert on (Squad + Our Dataset)

# 3, 2

```
bleu --> 0.06931287859597637
rouge1 --> 0.19821744629020724
rouge2 --> 0.10658866102635696
rougel --> 0.1868117643779736
```

# 3, 5

```
bleu --> 0.03326668172125407
rouge1 --> 0.20946994043485553
rouge2 --> 0.10167689243447016
rougel --> 0.19906621627604376
```

# 3, 7

```
bleu --> 0.02560506763859031
rouge1 --> 0.19673322385299405
rouge2 --> 0.08888356388695941
rougel --> 0.18626961745270976
```

# 5, 2

```
bleu --> 0.06345328647077997
rouge1 --> 0.20406716478695625
rouge2 --> 0.1060285107744637
rougel --> 0.19283290551462437
```

# 5, 3

```
bleu --> 0.04911886384092217
rouge1 --> 0.21052577428485436
rouge2 --> 0.10274851606324212
rougel --> 0.19822657706745186
```

# 5, 5

```
bleu --> 0.03170644661963682
rouge1 --> 0.21191987555043731
rouge2 --> 0.1037352111344695
rougel --> 0.20209905658726562
```

#10, 2

```
bleu --> 0.057428091668584556
rouge1 --> 0.20646246870472995
rouge2 --> 0.10519838762813453
rougel --> 0.1950256050028359
```

# 10, 5

```
bleu --> 0.028692153647818627
rouge1 --> 0.21279947143169525
rouge2 --> 0.1004407817858144
rougel --> 0.20142881253757677
```

# ktrapeznikov/albert-xlarge-v2-squad-v2 + Fine Tuned Bert on (Squad + Our Dataset)

# 3, 2

```
bleu --> 0.0700302475143219
rouge1 --> 0.2025044353996794
rouge2 --> 0.11069682602623314
rougel --> 0.1903201590307057
```