--- a +++ b/PTAB_Model_Decisions_github.ipynb @@ -0,0 +1,1540 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "<a href=\"https://colab.research.google.com/github/bahrad/PTAB/blob/master/PTAB_Model_Decisions_github.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p-aVXISlSKYH" + }, + "source": [ + "#Initialization" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wOFk8JfaSMSV" + }, + "source": [ + "##Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ESf61HRPSERn" + }, + "outputs": [], + "source": [ + "%tensorflow_version 2.x\n", + "\n", + "%xmode Context\n", + "# Verbose\n", + "\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import pickle\n", + "import os\n", + "import itertools\n", + "\n", + "from collections import Counter, defaultdict\n", + "import random\n", + "from pandas import DataFrame\n", + "import datetime\n", + "from datetime import datetime\n", + "import dateutil\n", + "from dateutil.parser import parse as dateparse\n", + "from tqdm.notebook import tqdm\n", + "import time\n", + "\n", + "import xgboost as xgb\n", + "\n", + "import sklearn as sk\n", + "from sklearn.preprocessing import MultiLabelBinarizer, QuantileTransformer, OneHotEncoder, StandardScaler\n", + "from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV\n", + "from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit\n", + "from sklearn.linear_model import LogisticRegression\n", + "import sklearn.metrics\n", + "from sklearn.metrics import accuracy_score,classification_report, make_scorer, balanced_accuracy_score, f1_score, coverage_error, roc_auc_score, confusion_matrix, plot_confusion_matrix\n", + "from sklearn.cluster import KMeans\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.utils import resample, shuffle\n", + "from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin\n", + "from sklearn.neighbors import NearestNeighbors\n", + "from sklearn.manifold import TSNE\n", + "from sklearn.utils import class_weight\n", + "\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "\n", + "from imblearn.over_sampling import SMOTE, RandomOverSampler, ADASYN, BorderlineSMOTE\n", + "from imblearn.under_sampling import RandomUnderSampler, EditedNearestNeighbours, CondensedNearestNeighbour, AllKNN\n", + "from imblearn.combine import SMOTEENN, SMOTETomek\n", + "from imblearn.pipeline import make_pipeline,Pipeline\n", + "\n", + "from tensorflow.keras.preprocessing.text import Tokenizer\n", + "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", + "\n", + "import string\n", + "import re\n", + "# import unicodedata\n", + "\n", + "import nltk\n", + "nltk.download('stopwords')\n", + "from nltk.corpus import stopwords\n", + "STOPWORDS = set(stopwords.words('english'))\n", + "\n", + "nltk.download('averaged_perceptron_tagger')\n", + "nltk.download('wordnet')\n", + "nltk.download('punkt')\n", + "\n", + "!pip install lime\n", + "import lime\n", + "from lime import lime_text\n", + "from lime.lime_text import LimeTextExplainer\n", + "from lime.explanation import Explanation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8KTO164A7t1G" + }, + "outputs": [], + "source": [ + "# COMMENT OUT FOR PUBLIC CODE\n", + "from google.colab import drive, files\n", + "# drive.mount('/content/drive')\n", + "\n", + "# FILELOC = \"DATA/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xvw4VK92r4-b" + }, + "outputs": [], + "source": [ + "try:\n", + " tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection\n", + " print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])\n", + " tf.config.experimental_connect_to_cluster(tpu)\n", + " tf.tpu.experimental.initialize_tpu_system(tpu)\n", + " tpu_strategy = tf.distribute.TPUStrategy(tpu)\n", + " tpu_env=True\n", + "except ValueError:\n", + " print('Not connected to a TPU runtime.')\n", + " tpu_env=False" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oSiHsBoY4_bv" + }, + "source": [ + "#Functions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "E3qcPlglEzjC" + }, + "source": [ + "##Define Models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Va27pqfoegI_" + }, + "outputs": [], + "source": [ + "def EmbedNN(Params):\n", + "\n", + " inpTensor = keras.Input(shape=(Params['text_length'],))\n", + " \n", + "\n", + " \n", + " if Params['pretrained_embeddings']:\n", + " embedding = keras.layers.Embedding(Params['vocab_size'],\n", + " Params['embedding_dim'],\n", + " weights=[Params['embeddings']],\n", + " input_length=Params['text_length'],\n", + " mask_zero=True,\n", + " trainable=False,\n", + " ) \n", + " else:\n", + " embedding = keras.layers.Embedding(Params['vocab_size'],\n", + " Params['embedding_dim'],\n", + " mask_zero=True,\n", + " trainable=True,\n", + " name='embedding',\n", + " )\n", + " x = embedding(inpTensor)\n", + "\n", + " convs = []\n", + " filter_sizes = list(range(Params['min_filter_size'],Params['max_filter_size']+1))\n", + " for filter_size in filter_sizes:\n", + " l_conv = keras.layers.Conv1D(filters=Params['num_filters'], \n", + " kernel_size=filter_size,\n", + " kernel_regularizer=keras.regularizers.l2(Params['kernel_L2_reg']),\n", + " activation='relu')(x)\n", + " h = keras.layers.TimeDistributed(keras.layers.Dense(Params['num_filters'],\n", + " activation='tanh'))(l_conv)\n", + " attention = keras.layers.TimeDistributed(keras.layers.Dense(1, activation='tanh'))(h)\n", + " attention = keras.layers.Flatten()(attention) \n", + " attention = keras.layers.Softmax(axis=1,\n", + " name='attention_'+str(filter_size))(attention)\n", + " attention = keras.layers.RepeatVector(Params['num_filters'])(attention)\n", + " attention = keras.layers.Permute([2, 1])(attention)\n", + " representation = keras.layers.multiply([h, attention])\n", + " representation = tf.math.reduce_sum(representation, axis = 1)\n", + " convs.append(representation)\n", + " # l_pool = keras.layers.GlobalMaxPooling1D()(l_conv)\n", + " # convs.append(l_pool)\n", + " l_merge = keras.layers.concatenate(convs, axis=1)\n", + " \n", + " x = keras.layers.Dropout(Params['dropout_after_convs'])(l_merge) \n", + "\n", + " dense1 = keras.layers.Dense(Params['num_dense'],\n", + " kernel_constraint=Params['kernel_constraint'],\n", + " activation = 'relu')(x)\n", + " x = dense1\n", + " dropout1 = keras.layers.Dropout(Params['dropout_after_Dense'])(x)\n", + " x = dropout1\n", + "\n", + " if not Params['ifMulticlass']:\n", + " finalOut = keras.layers.Dense(1, activation='sigmoid',\n", + " bias_initializer=tf.keras.initializers.Constant(Params['initial_bias'])\n", + " )(x)\n", + " else:\n", + " finalOut = keras.layers.Dense(Params['nclasses'], activation='softmax')(x)\n", + "\n", + " # define the model's start and end points \n", + " model = keras.Model(inpTensor,finalOut)\n", + "\n", + " return model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xAfa4FkoIhU6" + }, + "source": [ + "#Define Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fDoDihkSvq7h" + }, + "outputs": [], + "source": [ + "Params = {}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ndFDWyjpIhU8" + }, + "outputs": [], + "source": [ + "Params['num_epochs'] = 50\n", + "\n", + "Params['learning_rate'] = 1e-4\n", + "if tpu_env:\n", + " Params['batch_size'] = 48\n", + "else:\n", + " Params['batch_size'] = 48\n", + "\n", + "Params['embedding_dim'] = 128 # 128\n", + "\n", + "# CNN parameters\n", + "Params['min_filter_size'] = 2\n", + "Params['max_filter_size'] = 12 # 12\n", + "Params['num_filters'] = 256 # 256\n", + "Params['dropout_after_convs'] = 0.4 # 0.4\n", + "# Dense Layer Parameters\n", + "Params['num_dense'] = 256 # 256\n", + "Params['dropout_after_Dense'] = 0.4\n", + "\n", + "# Transformer+Attention Model parameters\n", + "Params['embdim'] = 2000\n", + "Params['mask_zero'] = True\n", + "Params['numheads'] = 8\n", + "Params['ffdim'] = 64\n", + "Params['trans_drop'] = 0.4\n", + "Params['Nt'] = 1\n", + "Params['ifPreCNN'] = False\n", + "if Params['ifPreCNN']:\n", + " Params['W'] = 500\n", + " Params['Nc'] = 1\n", + " Params['Nl'] = 1\n", + "Params['num_dense_embed'] = 64 # 256\n", + "Params['dropout_after_Dense_embed'] = 0.0 #0.2\n", + "\n", + "Params['kernel_constraint'] = keras.constraints.max_norm(1.0)\n", + "Params['kernel_L2_reg'] = 0.1\n", + "Params['bias_L2_reg'] = 0.1\n", + "Params['activity_L2_reg'] = 0.1\n", + "\n", + "Params['ifMulticlass'] = False\n", + "Params['nclasses'] = 2\n", + "\n", + "Params['sample_weighting'] = True\n", + "\n", + "Params['loss'] = keras.losses.BinaryCrossentropy(from_logits=False)\n", + "if not tpu_env:\n", + " # otherwise have to define in the TPU environment\n", + " Params['metrics'] = [\n", + " # keras.metrics.TruePositives(name='tp'),\n", + " # keras.metrics.FalsePositives(name='fp'),\n", + " # keras.metrics.TrueNegatives(name='tn'),\n", + " # keras.metrics.FalseNegatives(name='fn'),\n", + " keras.metrics.BinaryAccuracy(name='acc'),\n", + " # keras.metrics.PrecisionAtRecall(0.5, name='par50'),\n", + " # keras.metrics.Precision(name='prec'),\n", + " # keras.metrics.Recall(name='rec'),\n", + " keras.metrics.AUC(name='auc'),\n", + " ]\n", + "\n", + "# Params['initial_bias'] = np.log(num1/num0)\n", + "# Params['initial_bias'] = np.log(2) # default\n", + "# Params['initial_bias'] = None\n", + "\n", + "Params['ifEarlyStopping'] = True\n", + "# Params['ifEarlyStopping'] = False\n", + "# Params['monitor'] = 'loss'\n", + "Params['monitor'] = 'val_auc'\n", + "Params['patience'] = 10\n", + "early_stopping = tf.keras.callbacks.EarlyStopping(\n", + " monitor = Params['monitor'],\n", + " verbose = 2,\n", + " patience = Params['patience'],\n", + " mode = 'auto',\n", + " min_delta = 0,\n", + " restore_best_weights = True\n", + " )\n", + "Params['callbacks'] = [early_stopping]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wsYToQkTO5iF" + }, + "source": [ + "#Text Preprocessing & Tokenization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tDd45DbnVWDp" + }, + "outputs": [], + "source": [ + "DOCTYPE = 'Responses'\n", + "\n", + "Params['pretrained_embeddings'] = False\n", + "\n", + "# Params['num_words_to_use'] = None\n", + "Params['num_words_to_use'] = 20000\n", + "# Params['num_words_to_use'] = 1000\n", + "\n", + "\n", + "# Params['text_length'] = 4000\n", + "Params['text_length'] = 8000\n", + "Params['text_start'] = 0 # 100\n", + "Params['text_end'] = Params['text_start'] + Params['text_length']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uor3Kbin2lfZ" + }, + "outputs": [], + "source": [ + "ptab = pd.read_csv(FILELOC + 'PTAB_Institution_Proceedings_to_20211231.tsv', sep='\\t')\n", + "# print(len(ptab))\n", + "# ptab.drop_duplicates('Proceeding', inplace=True)\n", + "# print(len(ptab))\n", + "# ptab['date'] = ptab['Case Filing Date'].apply(dateparse)\n", + "\n", + "# trainingvariable = 'Responses'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qx6RjOg82lcU" + }, + "outputs": [], + "source": [ + "# with open(FILELOC + 'IPR_Proceeding_PartyNames_12312022.txt', 'r', encoding=\"ISO-8859-1\") as f:\n", + "# outfile = [line.rstrip('\\n') for line in f]\n", + "# case = []; number = []\n", + "# for k in range(0,len(outfile),2):\n", + "# case.append(outfile[k])\n", + "# number.append(outfile[k+1])\n", + "# casedf = pd.DataFrame.from_dict({'case':case, 'proc':number})\n", + "# casedf.drop_duplicates('proc', inplace=True)\n", + "# casedf['proc'] = casedf['proc'].apply(lambda x: x.split('(')[0].strip())\n", + "# casedf['name'] = casedf['case'].apply(lambda x: x.strip(\"\\\"\"))\n", + "# casedf[casedf.name.str.contains('Petition')].to_csv('a.csv')\n", + "\n", + "# common_names = set(['business', 'doing', 'company', 'corporation', 'formerly', 'et', 'al'])\n", + "\n", + "# def f(x):\n", + "# y = x\n", + "# if 'Petition' in x:\n", + "# if 'Covered' in x:\n", + "# y = x.replace(\"Petition for Covered Business Method Patent Review by\",\"\")\n", + "# elif 'Inter' in x:\n", + "# y = x.replace(\"Petition for Inter Partes Review by\", \"\")\n", + "# y = y.translate(str.maketrans('', '', string.punctuation))\n", + "# if 'v' in y:\n", + "# y = y.replace(\"v\", \"\")\n", + "# y = [s.strip() for s in y.strip().split(' ') if s != \"\" and s not in STOPWORDS|common_names]\n", + "# return y\n", + "# casedf['party_names'] = casedf['name'].apply(f)\n", + "\n", + "# ptdf = pd.merge(ptab,casedf,left_on='Proceeding',right_on='proc',how='inner')\n", + "# print(len(ptab), len(casedf), len(ptdf))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YxdJ9pvjXJSk" + }, + "outputs": [], + "source": [ + "# # Drop error messages and duplicates\n", + "\n", + "# ptdf.drop(columns=list({'Petitions','Responses','Decisions'}-{DOCTYPE}), inplace=True)\n", + "# ptdf.drop(columns=['case','proc'], inplace=True)\n", + "\n", + "# ptdf[DOCTYPE] = ptdf[DOCTYPE].fillna('NA')\n", + "\n", + "# # clean up texts by removing (cid:##) which is likely an artifact of the PDF reading process\n", + "# cid_str = re.compile(\"\\(cid:\\d+\\)\")\n", + "# def f(x):\n", + "# return re.sub(cid_str, \"\", x)\n", + "# ptdf[DOCTYPE] = ptdf[DOCTYPE].apply(f)\n", + "\n", + "# def get_word_count(text):\n", + "# return len(text.split())\n", + "# ptdf[f'{DOCTYPE}_Len'] = ptdf[DOCTYPE].apply(get_word_count)\n", + "# MIN_LENGTH = 50\n", + "\n", + "# print(len(ptdf))\n", + "# ptdf.drop(ptdf[ptdf[f'{DOCTYPE}_Len'] < MIN_LENGTH].index, inplace=True)\n", + "# print(len(ptdf))\n", + "# ptdf.drop_duplicates(DOCTYPE, keep=False, inplace=True)\n", + "# print(len(ptdf))\n", + "\n", + "# ptdf.reset_index(inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pUFUMGsMiLmt" + }, + "outputs": [], + "source": [ + "Params['remove_stop_words'] = True\n", + "Params['remove_alphanumeric'] = True\n", + "Params['remove_punctuation'] = True\n", + "Params['remove_shortword_size'] = 3\n", + "Params['remove_propernouns'] = True\n", + "\n", + "Params['clean_all'] = True\n", + "Params['remove_shortword_size'] = 3\n", + "\n", + "Params['use_lowercase'] = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WMKf8CVm39nn" + }, + "outputs": [], + "source": [ + "# remove_shortword_size = Params['remove_shortword_size']\n", + "# def process_docs(x):\n", + "# doc = x.replace(\"‘\", \"\\'\").replace(\"’\", \"\\'\").replace(\"´\", \"\\'\").replace(\"“\", \"\\\"\").replace(\"”\", \"\\\"\")\n", + "# t = nltk.tokenize.word_tokenize(doc)\n", + "# PUNCT = set(string.punctuation + u\"‘’´`“”–-§\")\n", + "# tags = nltk.tag.pos_tag(t)\n", + "# propernouns = set([a for a,b in tags if b=='NNP'])\n", + "# # shortwords = set([tt for tt in t if len(tt) <= remove_shortword_size])\n", + "# noisewords = set([tt for tt in t if (len(tt) <= 2) and any(map(lambda x: x in PUNCT, tt))])\n", + "# numwords = set([tt for tt in t if any(map(str.isdigit, tt))])\n", + "# emailwords = set([tt for tt in t if '@' in tt])\n", + "# dotwords = set([tt for tt in t if '.' in tt])\n", + " \n", + "# # reject_list = PUNCT|propernouns|STOPWORDS|shortwords|numwords|emailwords|dotwords\n", + "# reject_list = PUNCT|propernouns|numwords|emailwords|dotwords|noisewords\n", + "# proct = [tt for tt in t if tt not in reject_list]\n", + "# return proct\n", + "\n", + "# doclist = ptdf[DOCTYPE].tolist()\n", + "# # docmap = map(process_docs, doclist)\n", + "# # tokdocs = [doc for doc in tqdm(docmap)]\n", + "# tokdocs = [process_docs(doc) for doc in tqdm(doclist)]\n", + "# with open(FILELOC + 'Tokenized_Responses_20220212.pkl', 'wb') as f:\n", + "# pickle.dump([ptdf, tokdocs], f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LkiSq11eoXag" + }, + "outputs": [], + "source": [ + "# if Params['remove_propernouns'] or Params['clean_all']:\n", + "# def f(x):\n", + "# if 'v.' not in x:\n", + "# return 'NO_PARTY'\n", + "# else:\n", + "# y = x.split('v.')\n", + "# petitioner = y[0].split()[0].strip().replace(',', '')\n", + "# patentowner = y[1].split()[0].strip().replace(',', '')\n", + "# return [petitioner, patentowner]\n", + "\n", + "# parties_first = casedf['name'].apply(f).values\n", + "# CASENAMES = set(itertools.chain.from_iterable(parties_first))\n", + "\n", + "# docs = ptdf[DOCTYPE].values\n", + "# partyname_list = ptdf['party_names'].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_7WJ3Cqxbb5j" + }, + "outputs": [], + "source": [ + "# if Params['clean_all']:\n", + "# remove_shortword_size = Params['remove_shortword_size']\n", + "# def process_docs(x):\n", + "# doc = x\n", + "# t = nltk.tokenize.word_tokenize(doc)\n", + "# PUNCT = string.punctuation + u\"‘’´“”–-\"\n", + "# propernouns = set([a for a,b in nltk.tag.pos_tag(t) if b=='NNP'])\n", + "# shortwords = set([tt for tt in t if len(tt) <= remove_shortword_size])\n", + "# reject_list = set(PUNCT)|propernouns|STOPWORDS|shortwords\n", + "\n", + "# proct = [tt for tt in t if tt.isalpha() and tt not in reject_list]\n", + "# return proct\n", + "\n", + "# else:\n", + "# remove_punct = Params['remove_punctuation']\n", + "# remove_stopwords = Params['remove_stop_words']\n", + "# remove_alphanumeric = Params['remove_alphanumeric']\n", + "# # set to False or None if not used otherwise remove this length or less\n", + "# remove_shortword_size = Params['remove_shortword_size']\n", + "# remove_proper = Params['remove_propernouns']\n", + "\n", + "# def process_docs(x):\n", + "# doc, partynames = x\n", + "# t = nltk.tokenize.word_tokenize(doc)\n", + "# PUNCT = string.punctuation + u\"‘’´“”–-\"\n", + "# if remove_punct:\n", + "# proct = [tt for tt in t if tt not in set(PUNCT)]\n", + "# if remove_stopwords:\n", + "# proct = [tt for tt in proct if tt not in STOPWORDS]\n", + "# if remove_alphanumeric:\n", + "# proct = [tt for tt in proct if tt.isalpha()]\n", + "# if remove_shortword_size:\n", + "# proct = [tt for tt in proct if len(tt) > remove_shortword_size]\n", + "# propernouns = set([a for a,b in nltk.tag.pos_tag(proct) if b=='NNP'])\n", + "# if Params['keep_case_names']:\n", + "# propernouns = propernouns - (CASENAMES - set(partynames))\n", + "# if remove_proper:\n", + "# proct = [tt for tt in proct if tt not in propernouns]\n", + "# return proct\n", + "\n", + "# if Params['clean_all']:\n", + "# tokdocs = ptdf[DOCTYPE].apply(process_docs)\n", + "# else:\n", + "# tokdocs = [process_docs([docs[ind], partyname_list[ind]]) for ind in tqdm(ptdf.index)]\n", + "\n", + "# # with open(FILELOC + 'Tokenized_Responses_20220131.pkl', 'wb') as f:\n", + "# # pickle.dump([ptdf, tokdocs], f)\n", + "# # with open(FILELOC + 'Tokenized_Decisions_20220131.pkl', 'wb') as f:\n", + "# # pickle.dump([ptdf, tokdocs], f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "panQ-y442lY6" + }, + "outputs": [], + "source": [ + "# with open(FILELOC + 'Tokenized_Responses_20220131.pkl', 'rb') as f:\n", + "# ptdf, tokdocs = pickle.load(f)\n", + "\n", + "# with open(FILELOC + 'Tokenized_Responses_noproper_20220131.pkl', 'rb') as f:\n", + "# ptdf, tokdocs = pickle.load(f)\n", + "\n", + "with open(FILELOC + 'Tokenized_Responses_20220212.pkl', 'rb') as f:\n", + " ptdf, tokdocs = pickle.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yXE3Y-_523jO" + }, + "outputs": [], + "source": [ + "# take a list of tokenized documents (i.e. list of lists) and derive an integer\n", + "# mapping dictionary (0 = not used, 1 = out of vocabular, 2+ are tokens) for the\n", + "# all (if num_words=None) or num_words most common words\n", + "# It will generate a 2D array of truncated / padded document vectors (vec_len)\n", + "# If lowercase set to True then converts all tokens to lowercase\n", + "# Out of vocabulary string is \"oov_str\" (default '<OOV>')\n", + "\n", + "class Token2Int(BaseEstimator,TransformerMixin):\n", + " def __init__(self, vec_len, num_words=None, oov_str='<OOV>', lowercase=True):\n", + " self.vec_len = vec_len\n", + " self.num_words = num_words\n", + " self.oov_str = oov_str\n", + " self.lowercase = lowercase\n", + "\n", + " def fit(self, X, y=None):\n", + " if type(X[0]) is not list:\n", + " X = [X] # only a single document was passed\n", + " if self.lowercase:\n", + " X = [[d.lower() for d in doc] for doc in X]\n", + " wc = Counter(itertools.chain.from_iterable(X))\n", + " self.word_count = wc\n", + " vocab = [w for w,c in wc.most_common(self.num_words)]\n", + " vocab.insert(0, self.oov_str) # assign 1 to OOV\n", + " self.vocab = vocab\n", + " self.vocab_size = len(vocab)\n", + " wordmap = {n:m+1 for m,n in enumerate(vocab)}\n", + " self.word_index = wordmap\n", + " self.index_word = {n:m for m,n in wordmap.items()}\n", + " return self\n", + "\n", + " def transform(self, X):\n", + " if type(X[0]) is not list:\n", + " X = [X] # only a single document was passed\n", + " # X = np.array(list(itertools.zip_longest(*X, fillvalue=0))).T\n", + " if self.lowercase:\n", + " # X = np.vectorize(str.lower)(X)\n", + " X = [[d.lower() for d in doc] for doc in X]\n", + " wordmap = self.word_index\n", + " vocab = self.vocab\n", + " veclen = self.vec_len\n", + " numdocs = len(X)\n", + " # wordmap['0'] = 0\n", + " # # textpad = np.array([t[:veclen] if len(t) >= veclen else t + ['0']*(veclen-len(t)) for t in X]).astype(str)\n", + " # X = [[wordmap.get(x, 1) for x in t] for t in X]\n", + " # return pad_sequences(X, maxlen=veclen, padding='post', truncating='post')\n", + " textpad = np.zeros((numdocs, veclen))\n", + " for d in tqdm(range(numdocs)):\n", + " doc = X[d]\n", + " doclen = min(len(doc), veclen)\n", + " textpad[d,:doclen] = [wordmap.get(word, 1) for word in doc[:doclen]]\n", + " # textpad[d,:doclen] = [wordmap[word] if word in vocab else 1 for word in doc[:doclen]]\n", + " return textpad\n", + "\n", + " def reverse(self, textpad):\n", + " texts = []\n", + " for row in textpad:\n", + " int2text = ['' if w==0 else self.index_word[w] for w in row]\n", + " texts.append(' '.join(int2text).strip())\n", + " return texts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "t1TbHH0S23f9", + "outputId": "73a6aa85-4ead-4027-aac6-0188633a8e57" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "9283\n", + "9182\n" + ] + } + ], + "source": [ + "map_outcome2unpat = {'Denied': 0,\n", + " 'Denied on Rehearing': -1,\n", + " 'Mixed': 0,\n", + " 'Granted': 1,\n", + " 'Granted on Rehearing': -1,\n", + " 'Indefinite': -1,\n", + " }\n", + "ptdf['Unpatentable'] = ptdf['Decision'].map(map_outcome2unpat)\n", + "\n", + "selind = ptdf[ptdf['Unpatentable'] != -1].index\n", + "\n", + "print(len(ptdf))\n", + "ptdf.drop(ptdf[ptdf['Unpatentable'] == -1].index, inplace=True)\n", + "print(len(ptdf))\n", + "ptdf.reset_index(inplace=True)\n", + "tokdocs = [tokdocs[ind] for ind in range(len(tokdocs)) if ind in selind]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_jbe0xu14bkn" + }, + "outputs": [], + "source": [ + "def f_pet(x):\n", + " case, partyname = x\n", + " if ' v.' in case:\n", + " try:\n", + " pet,po = case.split(' v.')\n", + " except:\n", + " print(case)\n", + " return pet.strip()\n", + " else:\n", + " return ' '.join(partyname)\n", + "\n", + "def f_po(x):\n", + " case = x\n", + " if ' v.' in case:\n", + " pet,po = case.split(' v.')\n", + " return po.strip()\n", + " else:\n", + " return 'UNKNOWN'\n", + "\n", + "ptdf['petitioner_raw'] = ptdf[['name', 'party_names']].apply(f_pet, axis=1)\n", + "ptdf['patent_owner_raw'] = ptdf['name'].apply(f_po)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JhZI2iUptJbt" + }, + "outputs": [], + "source": [ + "common_terms = ['corporation','corp.',' corp ', '& co','co.',' co ','company',\n", + " 'l.l.c.', 'llc', 'l.c', ' lc',\n", + " 'l.l.p.', 'llp', 'l.p.', ' lp',\n", + " 'incorporated', 'inc.', ' inc ',\n", + " 'limited', 'ltd',\n", + " ' sa ', ' se ', ' ag ',\n", + " 'gmbh', 'a/s', 'bv', ' nv', 'n.v.',\n", + " 'et al',\n", + " 'n.a.', ' us ', ' usa ',\n", + " '(us)', '(usa)', '(u.s.)', '(u.s.a.)',\n", + " '(california)', '(delaware)', '(united states)',\n", + " ' i,', 'ii', 'iii',\n", + " '1)', '2)',\n", + " ]\n", + "replace_common_terms = '|'.join(common_terms).replace('/','\\/').replace(' ','\\s').replace('.','\\.').replace('(','\\(').replace(')','\\)')\n", + "replace_common_terms += '|\\s\\d+\\s'\n", + "regexp_common_terms = re.compile(replace_common_terms, re.IGNORECASE)\n", + "\n", + "split_terms = ['d/b/a/', 'd/b/a', 'doing business as', 'formerly known as', 'f/k/a/', 'f/k/a', ' and ']\n", + "split_terms_list = '|'.join(split_terms).replace('/','\\/').replace(' ','\\s')\n", + "regexp_split_terms = re.compile(split_terms_list, re.IGNORECASE)\n", + "\n", + "def f_clean(x):\n", + " # no cleaning up special characters\n", + " # add a trailing whitepace to eliminate edge effects for \"lp\" and \"inc\"\n", + " x += ' '\n", + " # remove common terms\n", + " if any([t in x.lower() for t in common_terms]):\n", + " # remove commas and periods associated with these terms\n", + " x = re.sub(regexp_common_terms, '', x)\n", + " x = x.replace(', ',' '); x = x.replace('. ',' ')\n", + " x = x.strip().strip(',').strip('.')\n", + "\n", + " if any([t in x.lower() for t in split_terms]):\n", + " x = re.split(regexp_split_terms, x)\n", + " x = ';'.join(x)\n", + "\n", + " return x\n", + "\n", + "ptdf['petitioner'] = ptdf['petitioner_raw'].apply(f_clean)\n", + "ptdf['patent_owner'] = ptdf['patent_owner_raw'].apply(f_clean)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 66, + "referenced_widgets": [ + "1b33de2a3a4a470aa6f07253885fe1bc", + "f63479552d154f019a083d60b5f7262e", + "64723b9f5a9742df9ba5425820b8674d", + "a30de5d77e1b46bd83cf8ecbeed63171", + "237488437a3a452493d875733ed29275", + "7c0035c0c2f842448c19f92eb1ac54d9", + "acfb2bfc634642cd86aad9009938c455", + "4ef7df9baa4444f5979dabf24975ffeb", + "46c770d287cb402a91ccbf7feece5753", + "84be481bd1844259a25de6207ce09998", + "3dbc67b9c8c646bd934013cb45df9907" + ] + }, + "id": "CY0JTAKe92st", + "outputId": "cb0088e3-0858-4fca-e8c1-ac44dab4ce2a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "20002\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1b33de2a3a4a470aa6f07253885fe1bc", + "version_minor": 0, + "version_major": 2 + }, + "text/plain": [ + " 0%| | 0/2631 [00:00<?, ?it/s]" + ] + }, + "metadata": {} + } + ], + "source": [ + "trainindex = ptdf[ptdf.date < pd.Timestamp(2017,12,31)].index\n", + "testindex = ptdf[ptdf.date > pd.Timestamp(2019,1,1)].index\n", + "\n", + "toktrain = [tokdocs[ind] for ind in range(len(tokdocs)) if ind in trainindex]\n", + "\n", + "tok2int = Token2Int(Params['text_length'], num_words = Params['num_words_to_use'])\n", + "tok2int.fit(toktrain)\n", + "Params['vocab_size'] = tok2int.vocab_size + 1 # add the 0 token\n", + "print(Params['vocab_size'])\n", + "\n", + "X_train = tok2int.transform(toktrain).astype(int)\n", + "Y_train = ptdf.loc[trainindex, 'Unpatentable'].values\n", + "\n", + "toktest = [tokdocs[ind] for ind in range(len(tokdocs)) if ind in testindex]\n", + "X_test = tok2int.transform(toktest).astype(int)\n", + "Y_test = ptdf.loc[testindex, 'Unpatentable'].values" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8P09Tr17Zx1o" + }, + "source": [ + "#Fit Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dldKFWGv-Jct" + }, + "outputs": [], + "source": [ + "if Params['sample_weighting']:\n", + " class_wts = list(class_weight.compute_class_weight(class_weight='balanced',\n", + " classes=np.unique(Y_train), y=Y_train))\n", + " print(class_wts)\n", + " Params['sample_weights'] = np.array([class_wts[yt] for yt in Y_train])\n", + "\n", + " num = len(Y_train)\n", + " num0 = len(np.where(Y_train==0)[0]); num1 = len(np.where(Y_train==1)[0])\n", + " if num1 < num0:\n", + " Params['initial_bias'] = np.log(num1/num0)\n", + " else:\n", + " Params['initial_bias'] = np.log(num0/num1)\n", + "\n", + "else:\n", + " Params['initial_bias'] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "u6XCIyvAZxqD" + }, + "outputs": [], + "source": [ + "for run in range(5):\n", + " print(run)\n", + " \n", + " tf.keras.backend.clear_session() # reset Tensorflow session\n", + "\n", + " X_t = X_train; Y_t = Y_train \n", + "\n", + " with tpu_strategy.scope():\n", + " Params['loss'] = keras.losses.BinaryCrossentropy(from_logits=False)\n", + " Params['metrics'] = [keras.metrics.BinaryAccuracy(name='acc'),\n", + " keras.metrics.AUC(name='auc'),]\n", + " model = EmbedNN(Params)\n", + " model.compile(loss=Params['loss'],\n", + " optimizer=keras.optimizers.Adam(learning_rate=Params['learning_rate']),\n", + " metrics=Params['metrics'],\n", + " steps_per_execution = 100,)\n", + "\n", + " if Params['sample_weighting']:\n", + " train_dataset = tf.data.Dataset.from_tensor_slices((X_t, Y_t, Params['sample_weights']))\n", + " Params['val_sample_weights'] = np.array([class_wts[yt] for yt in Y_test])\n", + " val_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test, Params['val_sample_weights']))\n", + " else:\n", + " train_dataset = tf.data.Dataset.from_tensor_slices((X_t, Y_t))\n", + " val_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test))\n", + " history = model.fit(train_dataset.batch(Params['batch_size']),\n", + " epochs = Params['num_epochs'], verbose = 1,)\n", + " # validation_data = val_dataset.batch(Params['batch_size']),\n", + " # callbacks=Params['callbacks'])\n", + "\n", + " print(\"Results for Testing Data:\")\n", + " test_predict = model.predict(X_test)\n", + " test_predict_bool = np.round(test_predict)\n", + " TestPredict = test_predict_bool\n", + " ClassRep = classification_report(Y_test, test_predict_bool)\n", + " ConfMatrix = confusion_matrix(Y_test, test_predict_bool)\n", + " print(ClassRep)\n", + " print(ConfMatrix)\n", + "\n", + " model.save_weights(FILELOC+\"responses_\"+str(run)+\"_wts.h5\", save_format='h5', overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vyNgcoOWZ2EQ" + }, + "source": [ + "#Interpret Results" + ] + }, + { + "cell_type": "code", + "source": [ + "if Params['sample_weighting']:\n", + " class_wts = list(class_weight.compute_class_weight(class_weight='balanced',\n", + " classes=np.unique(Y_train), y=Y_train))\n", + " print(class_wts)\n", + " Params['sample_weights'] = np.array([class_wts[yt] for yt in Y_train])\n", + "\n", + " num = len(Y_train)\n", + " num0 = len(np.where(Y_train==0)[0]); num1 = len(np.where(Y_train==1)[0])\n", + " if num1 < num0:\n", + " Params['initial_bias'] = np.log(num1/num0)\n", + " else:\n", + " Params['initial_bias'] = np.log(num0/num1)\n", + "\n", + "else:\n", + " Params['initial_bias'] = 0\n", + "\n", + "tf.keras.backend.clear_session()\n", + "with tpu_strategy.scope():\n", + " # try:\n", + " model = EmbedNN(Params)\n", + " model.load_weights(FILELOC+\"responses_wts.h5\")\n", + " pred_test = model.predict(X_test, verbose=False)\n", + "\n", + "att = {}\n", + "for n in range(2,12+1):\n", + " get_attention_model = keras.Model(inputs=model.input,outputs=model.get_layer(f'attention_{n}').output)\n", + " get_attention_model.compile()\n", + " att[n] = get_attention_model.predict(xtest, verbose=1)" + ], + "metadata": { + "id": "LSAzh3t1FeLn" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "##Attention Visualization" + ], + "metadata": { + "id": "q10V2RD5OZ4A" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "stL5tUYCWqUa" + }, + "outputs": [], + "source": [ + "from IPython.display import HTML\n", + "def hlstr(string, color='white'):\n", + " \"\"\"\n", + " Return HTML markup highlighting text with the desired color.\n", + " \"\"\"\n", + " return f\"<mark style=background-color:{color}>{string} </mark>\"\n", + "\n", + "def colorize(attrs, cmap='PiYG'):\n", + " \"\"\"\n", + " Compute hex colors based on the attributions for a single instance.\n", + " Uses a diverging colorscale by default and normalizes and scales\n", + " the colormap so that colors are consistent with the attributions.\n", + " \"\"\"\n", + " import matplotlib as mpl\n", + " cmap_bound = np.abs(attrs).max()\n", + " norm = mpl.colors.Normalize(vmin=-cmap_bound, vmax=cmap_bound)\n", + " cmap = mpl.cm.get_cmap(cmap)\n", + "\n", + " # now compute hex values of colors\n", + " colors = list(map(lambda x: mpl.colors.rgb2hex(cmap(norm(x))), attrs))\n", + " return colors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3STTYnjNgrf3" + }, + "outputs": [], + "source": [ + "N = 8\n", + "n = 7 # document index\n", + "print(pred [n])\n", + "xlen = np.where(X_test[n]==0)[0][0]\n", + "attvec = att[N][n][:xlen]\n", + "xvec = tok2int.reverse([X_test[n][:xlen]])[0]\n", + "strlen = len(attvec)\n", + "THRESH = np.median(attvec)\n", + "colors = colorize(attvec - THRESH)\n", + "\n", + "HTML(\"\".join(list(map(hlstr, xvec.split(), colors))))" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Highest attention words in sample" + ], + "metadata": { + "id": "HSMCjnRAOSm1" + } + }, + { + "cell_type": "code", + "source": [ + "uniquetokens = np.unique(xtest[n][:xlen])\n", + "print(len(uniquetokens))\n", + "tokpos = [np.where(xtest[n][:xlen] == tok)[0] for tok in uniquetokens]\n", + "meanatt = np.array([np.mean(att[2][n][np.array(tpos)]) for tpos in tokpos])\n", + "tokens_sorted_by_meanatt = uniquetokens[np.argsort(-meanatt)]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "m8U2xDZ_L7Nu", + "outputId": "9aef17aa-cddb-467d-f8c6-e43786812cc0" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "605\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "pd.DataFrame.from_dict({'Attention':[tok2int.index_word[t] for t in tokens_sorted_by_meanatt[:20]]})" + ], + "metadata": { + "id": "UJQzk6d8n7ZL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "##LIME Analysis" + ], + "metadata": { + "id": "PLCP7ucjOGGx" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "44AaMHd3wGA5" + }, + "outputs": [], + "source": [ + "def lean_wrapper(texts):\n", + " x = pad_sequences(DTP.texts_to_sequences(texts),\n", + " maxlen = Params['text_length'],\n", + " padding='post',\n", + " truncating='post')\n", + " return np.hstack((1-model.predict(x), model.predict(x)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sEeA4AEzzRBZ" + }, + "outputs": [], + "source": [ + "n = 2774\n", + "xvec = DTP.sequences_to_texts([X_data[n]])[0]\n", + "\n", + "exp = LimeTextExplainer(class_names={0:'Denied',1:'Granted'})\n", + "exp_doc = exp.explain_instance(xvec, lean_wrapper, num_features=50)\n", + "# explist = exp_doc.as_list()\n", + "exp_doc.show_in_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8mKglEF9buWx" + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "accelerator": "TPU", + "colab": { + "collapsed_sections": [], + "name": "PTAB_Model_Decisions_github.ipynb", + "provenance": [], + "toc_visible": true, + "mount_file_id": "1X-M2SntuvoGIIjwFAtPNNIHXyKqSTUsK", + "authorship_tag": "ABX9TyMnadSiryOYGloyXckhl2DZ", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "1b33de2a3a4a470aa6f07253885fe1bc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HBoxView", + "_dom_classes": [], + "_model_name": "HBoxModel", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.5.0", + "box_style": "", + "layout": "IPY_MODEL_f63479552d154f019a083d60b5f7262e", + "_model_module": "@jupyter-widgets/controls", + "children": [ + "IPY_MODEL_64723b9f5a9742df9ba5425820b8674d", + "IPY_MODEL_a30de5d77e1b46bd83cf8ecbeed63171", + "IPY_MODEL_237488437a3a452493d875733ed29275" + ] + } + }, + "f63479552d154f019a083d60b5f7262e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "64723b9f5a9742df9ba5425820b8674d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_7c0035c0c2f842448c19f92eb1ac54d9", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": "100%", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_acfb2bfc634642cd86aad9009938c455" + } + }, + "a30de5d77e1b46bd83cf8ecbeed63171": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "ProgressView", + "style": "IPY_MODEL_4ef7df9baa4444f5979dabf24975ffeb", + "_dom_classes": [], + "description": "", + "_model_name": "FloatProgressModel", + "bar_style": "success", + "max": 2631, + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": 2631, + "_view_count": null, + "_view_module_version": "1.5.0", + "orientation": "horizontal", + "min": 0, + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_46c770d287cb402a91ccbf7feece5753" + } + }, + "237488437a3a452493d875733ed29275": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "HTMLView", + "style": "IPY_MODEL_84be481bd1844259a25de6207ce09998", + "_dom_classes": [], + "description": "", + "_model_name": "HTMLModel", + "placeholder": "", + "_view_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "value": " 2631/2631 [00:05<00:00, 477.85it/s]", + "_view_count": null, + "_view_module_version": "1.5.0", + "description_tooltip": null, + "_model_module": "@jupyter-widgets/controls", + "layout": "IPY_MODEL_3dbc67b9c8c646bd934013cb45df9907" + } + }, + "7c0035c0c2f842448c19f92eb1ac54d9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "acfb2bfc634642cd86aad9009938c455": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "4ef7df9baa4444f5979dabf24975ffeb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "ProgressStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "bar_color": null, + "_model_module": "@jupyter-widgets/controls" + } + }, + "46c770d287cb402a91ccbf7feece5753": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + }, + "84be481bd1844259a25de6207ce09998": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_view_name": "StyleView", + "_model_name": "DescriptionStyleModel", + "description_width": "", + "_view_module": "@jupyter-widgets/base", + "_model_module_version": "1.5.0", + "_view_count": null, + "_view_module_version": "1.2.0", + "_model_module": "@jupyter-widgets/controls" + } + }, + "3dbc67b9c8c646bd934013cb45df9907": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_view_name": "LayoutView", + "grid_template_rows": null, + "right": null, + "justify_content": null, + "_view_module": "@jupyter-widgets/base", + "overflow": null, + "_model_module_version": "1.2.0", + "_view_count": null, + "flex_flow": null, + "width": null, + "min_width": null, + "border": null, + "align_items": null, + "bottom": null, + "_model_module": "@jupyter-widgets/base", + "top": null, + "grid_column": null, + "overflow_y": null, + "overflow_x": null, + "grid_auto_flow": null, + "grid_area": null, + "grid_template_columns": null, + "flex": null, + "_model_name": "LayoutModel", + "justify_items": null, + "grid_row": null, + "max_height": null, + "align_content": null, + "visibility": null, + "align_self": null, + "height": null, + "min_height": null, + "padding": null, + "grid_auto_rows": null, + "grid_gap": null, + "max_width": null, + "order": null, + "_view_module_version": "1.2.0", + "grid_template_areas": null, + "object_position": null, + "object_fit": null, + "grid_auto_columns": null, + "margin": null, + "display": null, + "left": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file