1540 lines (1540 with data), 58.8 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/bahrad/PTAB/blob/master/PTAB_Model_Decisions_github.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "p-aVXISlSKYH"
},
"source": [
"#Initialization"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wOFk8JfaSMSV"
},
"source": [
"##Imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ESf61HRPSERn"
},
"outputs": [],
"source": [
"%tensorflow_version 2.x\n",
"\n",
"%xmode Context\n",
"# Verbose\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import pickle\n",
"import os\n",
"import itertools\n",
"\n",
"from collections import Counter, defaultdict\n",
"import random\n",
"from pandas import DataFrame\n",
"import datetime\n",
"from datetime import datetime\n",
"import dateutil\n",
"from dateutil.parser import parse as dateparse\n",
"from tqdm.notebook import tqdm\n",
"import time\n",
"\n",
"import xgboost as xgb\n",
"\n",
"import sklearn as sk\n",
"from sklearn.preprocessing import MultiLabelBinarizer, QuantileTransformer, OneHotEncoder, StandardScaler\n",
"from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV\n",
"from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit\n",
"from sklearn.linear_model import LogisticRegression\n",
"import sklearn.metrics\n",
"from sklearn.metrics import accuracy_score,classification_report, make_scorer, balanced_accuracy_score, f1_score, coverage_error, roc_auc_score, confusion_matrix, plot_confusion_matrix\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.decomposition import PCA\n",
"from sklearn.utils import resample, shuffle\n",
"from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin\n",
"from sklearn.neighbors import NearestNeighbors\n",
"from sklearn.manifold import TSNE\n",
"from sklearn.utils import class_weight\n",
"\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"\n",
"from imblearn.over_sampling import SMOTE, RandomOverSampler, ADASYN, BorderlineSMOTE\n",
"from imblearn.under_sampling import RandomUnderSampler, EditedNearestNeighbours, CondensedNearestNeighbour, AllKNN\n",
"from imblearn.combine import SMOTEENN, SMOTETomek\n",
"from imblearn.pipeline import make_pipeline,Pipeline\n",
"\n",
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"\n",
"import string\n",
"import re\n",
"# import unicodedata\n",
"\n",
"import nltk\n",
"nltk.download('stopwords')\n",
"from nltk.corpus import stopwords\n",
"STOPWORDS = set(stopwords.words('english'))\n",
"\n",
"nltk.download('averaged_perceptron_tagger')\n",
"nltk.download('wordnet')\n",
"nltk.download('punkt')\n",
"\n",
"!pip install lime\n",
"import lime\n",
"from lime import lime_text\n",
"from lime.lime_text import LimeTextExplainer\n",
"from lime.explanation import Explanation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8KTO164A7t1G"
},
"outputs": [],
"source": [
"# COMMENT OUT FOR PUBLIC CODE\n",
"from google.colab import drive, files\n",
"# drive.mount('/content/drive')\n",
"\n",
"# FILELOC = \"DATA/\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Xvw4VK92r4-b"
},
"outputs": [],
"source": [
"try:\n",
" tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection\n",
" print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])\n",
" tf.config.experimental_connect_to_cluster(tpu)\n",
" tf.tpu.experimental.initialize_tpu_system(tpu)\n",
" tpu_strategy = tf.distribute.TPUStrategy(tpu)\n",
" tpu_env=True\n",
"except ValueError:\n",
" print('Not connected to a TPU runtime.')\n",
" tpu_env=False"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "oSiHsBoY4_bv"
},
"source": [
"#Functions"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "E3qcPlglEzjC"
},
"source": [
"##Define Models"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Va27pqfoegI_"
},
"outputs": [],
"source": [
"def EmbedNN(Params):\n",
"\n",
" inpTensor = keras.Input(shape=(Params['text_length'],))\n",
" \n",
"\n",
" \n",
" if Params['pretrained_embeddings']:\n",
" embedding = keras.layers.Embedding(Params['vocab_size'],\n",
" Params['embedding_dim'],\n",
" weights=[Params['embeddings']],\n",
" input_length=Params['text_length'],\n",
" mask_zero=True,\n",
" trainable=False,\n",
" ) \n",
" else:\n",
" embedding = keras.layers.Embedding(Params['vocab_size'],\n",
" Params['embedding_dim'],\n",
" mask_zero=True,\n",
" trainable=True,\n",
" name='embedding',\n",
" )\n",
" x = embedding(inpTensor)\n",
"\n",
" convs = []\n",
" filter_sizes = list(range(Params['min_filter_size'],Params['max_filter_size']+1))\n",
" for filter_size in filter_sizes:\n",
" l_conv = keras.layers.Conv1D(filters=Params['num_filters'], \n",
" kernel_size=filter_size,\n",
" kernel_regularizer=keras.regularizers.l2(Params['kernel_L2_reg']),\n",
" activation='relu')(x)\n",
" h = keras.layers.TimeDistributed(keras.layers.Dense(Params['num_filters'],\n",
" activation='tanh'))(l_conv)\n",
" attention = keras.layers.TimeDistributed(keras.layers.Dense(1, activation='tanh'))(h)\n",
" attention = keras.layers.Flatten()(attention) \n",
" attention = keras.layers.Softmax(axis=1,\n",
" name='attention_'+str(filter_size))(attention)\n",
" attention = keras.layers.RepeatVector(Params['num_filters'])(attention)\n",
" attention = keras.layers.Permute([2, 1])(attention)\n",
" representation = keras.layers.multiply([h, attention])\n",
" representation = tf.math.reduce_sum(representation, axis = 1)\n",
" convs.append(representation)\n",
" # l_pool = keras.layers.GlobalMaxPooling1D()(l_conv)\n",
" # convs.append(l_pool)\n",
" l_merge = keras.layers.concatenate(convs, axis=1)\n",
" \n",
" x = keras.layers.Dropout(Params['dropout_after_convs'])(l_merge) \n",
"\n",
" dense1 = keras.layers.Dense(Params['num_dense'],\n",
" kernel_constraint=Params['kernel_constraint'],\n",
" activation = 'relu')(x)\n",
" x = dense1\n",
" dropout1 = keras.layers.Dropout(Params['dropout_after_Dense'])(x)\n",
" x = dropout1\n",
"\n",
" if not Params['ifMulticlass']:\n",
" finalOut = keras.layers.Dense(1, activation='sigmoid',\n",
" bias_initializer=tf.keras.initializers.Constant(Params['initial_bias'])\n",
" )(x)\n",
" else:\n",
" finalOut = keras.layers.Dense(Params['nclasses'], activation='softmax')(x)\n",
"\n",
" # define the model's start and end points \n",
" model = keras.Model(inpTensor,finalOut)\n",
"\n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "xAfa4FkoIhU6"
},
"source": [
"#Define Parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fDoDihkSvq7h"
},
"outputs": [],
"source": [
"Params = {}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ndFDWyjpIhU8"
},
"outputs": [],
"source": [
"Params['num_epochs'] = 50\n",
"\n",
"Params['learning_rate'] = 1e-4\n",
"if tpu_env:\n",
" Params['batch_size'] = 48\n",
"else:\n",
" Params['batch_size'] = 48\n",
"\n",
"Params['embedding_dim'] = 128 # 128\n",
"\n",
"# CNN parameters\n",
"Params['min_filter_size'] = 2\n",
"Params['max_filter_size'] = 12 # 12\n",
"Params['num_filters'] = 256 # 256\n",
"Params['dropout_after_convs'] = 0.4 # 0.4\n",
"# Dense Layer Parameters\n",
"Params['num_dense'] = 256 # 256\n",
"Params['dropout_after_Dense'] = 0.4\n",
"\n",
"# Transformer+Attention Model parameters\n",
"Params['embdim'] = 2000\n",
"Params['mask_zero'] = True\n",
"Params['numheads'] = 8\n",
"Params['ffdim'] = 64\n",
"Params['trans_drop'] = 0.4\n",
"Params['Nt'] = 1\n",
"Params['ifPreCNN'] = False\n",
"if Params['ifPreCNN']:\n",
" Params['W'] = 500\n",
" Params['Nc'] = 1\n",
" Params['Nl'] = 1\n",
"Params['num_dense_embed'] = 64 # 256\n",
"Params['dropout_after_Dense_embed'] = 0.0 #0.2\n",
"\n",
"Params['kernel_constraint'] = keras.constraints.max_norm(1.0)\n",
"Params['kernel_L2_reg'] = 0.1\n",
"Params['bias_L2_reg'] = 0.1\n",
"Params['activity_L2_reg'] = 0.1\n",
"\n",
"Params['ifMulticlass'] = False\n",
"Params['nclasses'] = 2\n",
"\n",
"Params['sample_weighting'] = True\n",
"\n",
"Params['loss'] = keras.losses.BinaryCrossentropy(from_logits=False)\n",
"if not tpu_env:\n",
" # otherwise have to define in the TPU environment\n",
" Params['metrics'] = [\n",
" # keras.metrics.TruePositives(name='tp'),\n",
" # keras.metrics.FalsePositives(name='fp'),\n",
" # keras.metrics.TrueNegatives(name='tn'),\n",
" # keras.metrics.FalseNegatives(name='fn'),\n",
" keras.metrics.BinaryAccuracy(name='acc'),\n",
" # keras.metrics.PrecisionAtRecall(0.5, name='par50'),\n",
" # keras.metrics.Precision(name='prec'),\n",
" # keras.metrics.Recall(name='rec'),\n",
" keras.metrics.AUC(name='auc'),\n",
" ]\n",
"\n",
"# Params['initial_bias'] = np.log(num1/num0)\n",
"# Params['initial_bias'] = np.log(2) # default\n",
"# Params['initial_bias'] = None\n",
"\n",
"Params['ifEarlyStopping'] = True\n",
"# Params['ifEarlyStopping'] = False\n",
"# Params['monitor'] = 'loss'\n",
"Params['monitor'] = 'val_auc'\n",
"Params['patience'] = 10\n",
"early_stopping = tf.keras.callbacks.EarlyStopping(\n",
" monitor = Params['monitor'],\n",
" verbose = 2,\n",
" patience = Params['patience'],\n",
" mode = 'auto',\n",
" min_delta = 0,\n",
" restore_best_weights = True\n",
" )\n",
"Params['callbacks'] = [early_stopping]"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wsYToQkTO5iF"
},
"source": [
"#Text Preprocessing & Tokenization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "tDd45DbnVWDp"
},
"outputs": [],
"source": [
"DOCTYPE = 'Responses'\n",
"\n",
"Params['pretrained_embeddings'] = False\n",
"\n",
"# Params['num_words_to_use'] = None\n",
"Params['num_words_to_use'] = 20000\n",
"# Params['num_words_to_use'] = 1000\n",
"\n",
"\n",
"# Params['text_length'] = 4000\n",
"Params['text_length'] = 8000\n",
"Params['text_start'] = 0 # 100\n",
"Params['text_end'] = Params['text_start'] + Params['text_length']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "uor3Kbin2lfZ"
},
"outputs": [],
"source": [
"ptab = pd.read_csv(FILELOC + 'PTAB_Institution_Proceedings_to_20211231.tsv', sep='\\t')\n",
"# print(len(ptab))\n",
"# ptab.drop_duplicates('Proceeding', inplace=True)\n",
"# print(len(ptab))\n",
"# ptab['date'] = ptab['Case Filing Date'].apply(dateparse)\n",
"\n",
"# trainingvariable = 'Responses'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Qx6RjOg82lcU"
},
"outputs": [],
"source": [
"# with open(FILELOC + 'IPR_Proceeding_PartyNames_12312022.txt', 'r', encoding=\"ISO-8859-1\") as f:\n",
"# outfile = [line.rstrip('\\n') for line in f]\n",
"# case = []; number = []\n",
"# for k in range(0,len(outfile),2):\n",
"# case.append(outfile[k])\n",
"# number.append(outfile[k+1])\n",
"# casedf = pd.DataFrame.from_dict({'case':case, 'proc':number})\n",
"# casedf.drop_duplicates('proc', inplace=True)\n",
"# casedf['proc'] = casedf['proc'].apply(lambda x: x.split('(')[0].strip())\n",
"# casedf['name'] = casedf['case'].apply(lambda x: x.strip(\"\\\"\"))\n",
"# casedf[casedf.name.str.contains('Petition')].to_csv('a.csv')\n",
"\n",
"# common_names = set(['business', 'doing', 'company', 'corporation', 'formerly', 'et', 'al'])\n",
"\n",
"# def f(x):\n",
"# y = x\n",
"# if 'Petition' in x:\n",
"# if 'Covered' in x:\n",
"# y = x.replace(\"Petition for Covered Business Method Patent Review by\",\"\")\n",
"# elif 'Inter' in x:\n",
"# y = x.replace(\"Petition for Inter Partes Review by\", \"\")\n",
"# y = y.translate(str.maketrans('', '', string.punctuation))\n",
"# if 'v' in y:\n",
"# y = y.replace(\"v\", \"\")\n",
"# y = [s.strip() for s in y.strip().split(' ') if s != \"\" and s not in STOPWORDS|common_names]\n",
"# return y\n",
"# casedf['party_names'] = casedf['name'].apply(f)\n",
"\n",
"# ptdf = pd.merge(ptab,casedf,left_on='Proceeding',right_on='proc',how='inner')\n",
"# print(len(ptab), len(casedf), len(ptdf))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YxdJ9pvjXJSk"
},
"outputs": [],
"source": [
"# # Drop error messages and duplicates\n",
"\n",
"# ptdf.drop(columns=list({'Petitions','Responses','Decisions'}-{DOCTYPE}), inplace=True)\n",
"# ptdf.drop(columns=['case','proc'], inplace=True)\n",
"\n",
"# ptdf[DOCTYPE] = ptdf[DOCTYPE].fillna('NA')\n",
"\n",
"# # clean up texts by removing (cid:##) which is likely an artifact of the PDF reading process\n",
"# cid_str = re.compile(\"\\(cid:\\d+\\)\")\n",
"# def f(x):\n",
"# return re.sub(cid_str, \"\", x)\n",
"# ptdf[DOCTYPE] = ptdf[DOCTYPE].apply(f)\n",
"\n",
"# def get_word_count(text):\n",
"# return len(text.split())\n",
"# ptdf[f'{DOCTYPE}_Len'] = ptdf[DOCTYPE].apply(get_word_count)\n",
"# MIN_LENGTH = 50\n",
"\n",
"# print(len(ptdf))\n",
"# ptdf.drop(ptdf[ptdf[f'{DOCTYPE}_Len'] < MIN_LENGTH].index, inplace=True)\n",
"# print(len(ptdf))\n",
"# ptdf.drop_duplicates(DOCTYPE, keep=False, inplace=True)\n",
"# print(len(ptdf))\n",
"\n",
"# ptdf.reset_index(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pUFUMGsMiLmt"
},
"outputs": [],
"source": [
"Params['remove_stop_words'] = True\n",
"Params['remove_alphanumeric'] = True\n",
"Params['remove_punctuation'] = True\n",
"Params['remove_shortword_size'] = 3\n",
"Params['remove_propernouns'] = True\n",
"\n",
"Params['clean_all'] = True\n",
"Params['remove_shortword_size'] = 3\n",
"\n",
"Params['use_lowercase'] = True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WMKf8CVm39nn"
},
"outputs": [],
"source": [
"# remove_shortword_size = Params['remove_shortword_size']\n",
"# def process_docs(x):\n",
"# doc = x.replace(\"‘\", \"\\'\").replace(\"’\", \"\\'\").replace(\"´\", \"\\'\").replace(\"“\", \"\\\"\").replace(\"”\", \"\\\"\")\n",
"# t = nltk.tokenize.word_tokenize(doc)\n",
"# PUNCT = set(string.punctuation + u\"‘’´`“”–-§\")\n",
"# tags = nltk.tag.pos_tag(t)\n",
"# propernouns = set([a for a,b in tags if b=='NNP'])\n",
"# # shortwords = set([tt for tt in t if len(tt) <= remove_shortword_size])\n",
"# noisewords = set([tt for tt in t if (len(tt) <= 2) and any(map(lambda x: x in PUNCT, tt))])\n",
"# numwords = set([tt for tt in t if any(map(str.isdigit, tt))])\n",
"# emailwords = set([tt for tt in t if '@' in tt])\n",
"# dotwords = set([tt for tt in t if '.' in tt])\n",
" \n",
"# # reject_list = PUNCT|propernouns|STOPWORDS|shortwords|numwords|emailwords|dotwords\n",
"# reject_list = PUNCT|propernouns|numwords|emailwords|dotwords|noisewords\n",
"# proct = [tt for tt in t if tt not in reject_list]\n",
"# return proct\n",
"\n",
"# doclist = ptdf[DOCTYPE].tolist()\n",
"# # docmap = map(process_docs, doclist)\n",
"# # tokdocs = [doc for doc in tqdm(docmap)]\n",
"# tokdocs = [process_docs(doc) for doc in tqdm(doclist)]\n",
"# with open(FILELOC + 'Tokenized_Responses_20220212.pkl', 'wb') as f:\n",
"# pickle.dump([ptdf, tokdocs], f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "LkiSq11eoXag"
},
"outputs": [],
"source": [
"# if Params['remove_propernouns'] or Params['clean_all']:\n",
"# def f(x):\n",
"# if 'v.' not in x:\n",
"# return 'NO_PARTY'\n",
"# else:\n",
"# y = x.split('v.')\n",
"# petitioner = y[0].split()[0].strip().replace(',', '')\n",
"# patentowner = y[1].split()[0].strip().replace(',', '')\n",
"# return [petitioner, patentowner]\n",
"\n",
"# parties_first = casedf['name'].apply(f).values\n",
"# CASENAMES = set(itertools.chain.from_iterable(parties_first))\n",
"\n",
"# docs = ptdf[DOCTYPE].values\n",
"# partyname_list = ptdf['party_names'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_7WJ3Cqxbb5j"
},
"outputs": [],
"source": [
"# if Params['clean_all']:\n",
"# remove_shortword_size = Params['remove_shortword_size']\n",
"# def process_docs(x):\n",
"# doc = x\n",
"# t = nltk.tokenize.word_tokenize(doc)\n",
"# PUNCT = string.punctuation + u\"‘’´“”–-\"\n",
"# propernouns = set([a for a,b in nltk.tag.pos_tag(t) if b=='NNP'])\n",
"# shortwords = set([tt for tt in t if len(tt) <= remove_shortword_size])\n",
"# reject_list = set(PUNCT)|propernouns|STOPWORDS|shortwords\n",
"\n",
"# proct = [tt for tt in t if tt.isalpha() and tt not in reject_list]\n",
"# return proct\n",
"\n",
"# else:\n",
"# remove_punct = Params['remove_punctuation']\n",
"# remove_stopwords = Params['remove_stop_words']\n",
"# remove_alphanumeric = Params['remove_alphanumeric']\n",
"# # set to False or None if not used otherwise remove this length or less\n",
"# remove_shortword_size = Params['remove_shortword_size']\n",
"# remove_proper = Params['remove_propernouns']\n",
"\n",
"# def process_docs(x):\n",
"# doc, partynames = x\n",
"# t = nltk.tokenize.word_tokenize(doc)\n",
"# PUNCT = string.punctuation + u\"‘’´“”–-\"\n",
"# if remove_punct:\n",
"# proct = [tt for tt in t if tt not in set(PUNCT)]\n",
"# if remove_stopwords:\n",
"# proct = [tt for tt in proct if tt not in STOPWORDS]\n",
"# if remove_alphanumeric:\n",
"# proct = [tt for tt in proct if tt.isalpha()]\n",
"# if remove_shortword_size:\n",
"# proct = [tt for tt in proct if len(tt) > remove_shortword_size]\n",
"# propernouns = set([a for a,b in nltk.tag.pos_tag(proct) if b=='NNP'])\n",
"# if Params['keep_case_names']:\n",
"# propernouns = propernouns - (CASENAMES - set(partynames))\n",
"# if remove_proper:\n",
"# proct = [tt for tt in proct if tt not in propernouns]\n",
"# return proct\n",
"\n",
"# if Params['clean_all']:\n",
"# tokdocs = ptdf[DOCTYPE].apply(process_docs)\n",
"# else:\n",
"# tokdocs = [process_docs([docs[ind], partyname_list[ind]]) for ind in tqdm(ptdf.index)]\n",
"\n",
"# # with open(FILELOC + 'Tokenized_Responses_20220131.pkl', 'wb') as f:\n",
"# # pickle.dump([ptdf, tokdocs], f)\n",
"# # with open(FILELOC + 'Tokenized_Decisions_20220131.pkl', 'wb') as f:\n",
"# # pickle.dump([ptdf, tokdocs], f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "panQ-y442lY6"
},
"outputs": [],
"source": [
"# with open(FILELOC + 'Tokenized_Responses_20220131.pkl', 'rb') as f:\n",
"# ptdf, tokdocs = pickle.load(f)\n",
"\n",
"# with open(FILELOC + 'Tokenized_Responses_noproper_20220131.pkl', 'rb') as f:\n",
"# ptdf, tokdocs = pickle.load(f)\n",
"\n",
"with open(FILELOC + 'Tokenized_Responses_20220212.pkl', 'rb') as f:\n",
" ptdf, tokdocs = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "yXE3Y-_523jO"
},
"outputs": [],
"source": [
"# take a list of tokenized documents (i.e. list of lists) and derive an integer\n",
"# mapping dictionary (0 = not used, 1 = out of vocabular, 2+ are tokens) for the\n",
"# all (if num_words=None) or num_words most common words\n",
"# It will generate a 2D array of truncated / padded document vectors (vec_len)\n",
"# If lowercase set to True then converts all tokens to lowercase\n",
"# Out of vocabulary string is \"oov_str\" (default '<OOV>')\n",
"\n",
"class Token2Int(BaseEstimator,TransformerMixin):\n",
" def __init__(self, vec_len, num_words=None, oov_str='<OOV>', lowercase=True):\n",
" self.vec_len = vec_len\n",
" self.num_words = num_words\n",
" self.oov_str = oov_str\n",
" self.lowercase = lowercase\n",
"\n",
" def fit(self, X, y=None):\n",
" if type(X[0]) is not list:\n",
" X = [X] # only a single document was passed\n",
" if self.lowercase:\n",
" X = [[d.lower() for d in doc] for doc in X]\n",
" wc = Counter(itertools.chain.from_iterable(X))\n",
" self.word_count = wc\n",
" vocab = [w for w,c in wc.most_common(self.num_words)]\n",
" vocab.insert(0, self.oov_str) # assign 1 to OOV\n",
" self.vocab = vocab\n",
" self.vocab_size = len(vocab)\n",
" wordmap = {n:m+1 for m,n in enumerate(vocab)}\n",
" self.word_index = wordmap\n",
" self.index_word = {n:m for m,n in wordmap.items()}\n",
" return self\n",
"\n",
" def transform(self, X):\n",
" if type(X[0]) is not list:\n",
" X = [X] # only a single document was passed\n",
" # X = np.array(list(itertools.zip_longest(*X, fillvalue=0))).T\n",
" if self.lowercase:\n",
" # X = np.vectorize(str.lower)(X)\n",
" X = [[d.lower() for d in doc] for doc in X]\n",
" wordmap = self.word_index\n",
" vocab = self.vocab\n",
" veclen = self.vec_len\n",
" numdocs = len(X)\n",
" # wordmap['0'] = 0\n",
" # # textpad = np.array([t[:veclen] if len(t) >= veclen else t + ['0']*(veclen-len(t)) for t in X]).astype(str)\n",
" # X = [[wordmap.get(x, 1) for x in t] for t in X]\n",
" # return pad_sequences(X, maxlen=veclen, padding='post', truncating='post')\n",
" textpad = np.zeros((numdocs, veclen))\n",
" for d in tqdm(range(numdocs)):\n",
" doc = X[d]\n",
" doclen = min(len(doc), veclen)\n",
" textpad[d,:doclen] = [wordmap.get(word, 1) for word in doc[:doclen]]\n",
" # textpad[d,:doclen] = [wordmap[word] if word in vocab else 1 for word in doc[:doclen]]\n",
" return textpad\n",
"\n",
" def reverse(self, textpad):\n",
" texts = []\n",
" for row in textpad:\n",
" int2text = ['' if w==0 else self.index_word[w] for w in row]\n",
" texts.append(' '.join(int2text).strip())\n",
" return texts"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "t1TbHH0S23f9",
"outputId": "73a6aa85-4ead-4027-aac6-0188633a8e57"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"9283\n",
"9182\n"
]
}
],
"source": [
"map_outcome2unpat = {'Denied': 0,\n",
" 'Denied on Rehearing': -1,\n",
" 'Mixed': 0,\n",
" 'Granted': 1,\n",
" 'Granted on Rehearing': -1,\n",
" 'Indefinite': -1,\n",
" }\n",
"ptdf['Unpatentable'] = ptdf['Decision'].map(map_outcome2unpat)\n",
"\n",
"selind = ptdf[ptdf['Unpatentable'] != -1].index\n",
"\n",
"print(len(ptdf))\n",
"ptdf.drop(ptdf[ptdf['Unpatentable'] == -1].index, inplace=True)\n",
"print(len(ptdf))\n",
"ptdf.reset_index(inplace=True)\n",
"tokdocs = [tokdocs[ind] for ind in range(len(tokdocs)) if ind in selind]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_jbe0xu14bkn"
},
"outputs": [],
"source": [
"def f_pet(x):\n",
" case, partyname = x\n",
" if ' v.' in case:\n",
" try:\n",
" pet,po = case.split(' v.')\n",
" except:\n",
" print(case)\n",
" return pet.strip()\n",
" else:\n",
" return ' '.join(partyname)\n",
"\n",
"def f_po(x):\n",
" case = x\n",
" if ' v.' in case:\n",
" pet,po = case.split(' v.')\n",
" return po.strip()\n",
" else:\n",
" return 'UNKNOWN'\n",
"\n",
"ptdf['petitioner_raw'] = ptdf[['name', 'party_names']].apply(f_pet, axis=1)\n",
"ptdf['patent_owner_raw'] = ptdf['name'].apply(f_po)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "JhZI2iUptJbt"
},
"outputs": [],
"source": [
"common_terms = ['corporation','corp.',' corp ', '& co','co.',' co ','company',\n",
" 'l.l.c.', 'llc', 'l.c', ' lc',\n",
" 'l.l.p.', 'llp', 'l.p.', ' lp',\n",
" 'incorporated', 'inc.', ' inc ',\n",
" 'limited', 'ltd',\n",
" ' sa ', ' se ', ' ag ',\n",
" 'gmbh', 'a/s', 'bv', ' nv', 'n.v.',\n",
" 'et al',\n",
" 'n.a.', ' us ', ' usa ',\n",
" '(us)', '(usa)', '(u.s.)', '(u.s.a.)',\n",
" '(california)', '(delaware)', '(united states)',\n",
" ' i,', 'ii', 'iii',\n",
" '1)', '2)',\n",
" ]\n",
"replace_common_terms = '|'.join(common_terms).replace('/','\\/').replace(' ','\\s').replace('.','\\.').replace('(','\\(').replace(')','\\)')\n",
"replace_common_terms += '|\\s\\d+\\s'\n",
"regexp_common_terms = re.compile(replace_common_terms, re.IGNORECASE)\n",
"\n",
"split_terms = ['d/b/a/', 'd/b/a', 'doing business as', 'formerly known as', 'f/k/a/', 'f/k/a', ' and ']\n",
"split_terms_list = '|'.join(split_terms).replace('/','\\/').replace(' ','\\s')\n",
"regexp_split_terms = re.compile(split_terms_list, re.IGNORECASE)\n",
"\n",
"def f_clean(x):\n",
" # no cleaning up special characters\n",
" # add a trailing whitepace to eliminate edge effects for \"lp\" and \"inc\"\n",
" x += ' '\n",
" # remove common terms\n",
" if any([t in x.lower() for t in common_terms]):\n",
" # remove commas and periods associated with these terms\n",
" x = re.sub(regexp_common_terms, '', x)\n",
" x = x.replace(', ',' '); x = x.replace('. ',' ')\n",
" x = x.strip().strip(',').strip('.')\n",
"\n",
" if any([t in x.lower() for t in split_terms]):\n",
" x = re.split(regexp_split_terms, x)\n",
" x = ';'.join(x)\n",
"\n",
" return x\n",
"\n",
"ptdf['petitioner'] = ptdf['petitioner_raw'].apply(f_clean)\n",
"ptdf['patent_owner'] = ptdf['patent_owner_raw'].apply(f_clean)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 66,
"referenced_widgets": [
"1b33de2a3a4a470aa6f07253885fe1bc",
"f63479552d154f019a083d60b5f7262e",
"64723b9f5a9742df9ba5425820b8674d",
"a30de5d77e1b46bd83cf8ecbeed63171",
"237488437a3a452493d875733ed29275",
"7c0035c0c2f842448c19f92eb1ac54d9",
"acfb2bfc634642cd86aad9009938c455",
"4ef7df9baa4444f5979dabf24975ffeb",
"46c770d287cb402a91ccbf7feece5753",
"84be481bd1844259a25de6207ce09998",
"3dbc67b9c8c646bd934013cb45df9907"
]
},
"id": "CY0JTAKe92st",
"outputId": "cb0088e3-0858-4fca-e8c1-ac44dab4ce2a"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"20002\n"
]
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1b33de2a3a4a470aa6f07253885fe1bc",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
" 0%| | 0/2631 [00:00<?, ?it/s]"
]
},
"metadata": {}
}
],
"source": [
"trainindex = ptdf[ptdf.date < pd.Timestamp(2017,12,31)].index\n",
"testindex = ptdf[ptdf.date > pd.Timestamp(2019,1,1)].index\n",
"\n",
"toktrain = [tokdocs[ind] for ind in range(len(tokdocs)) if ind in trainindex]\n",
"\n",
"tok2int = Token2Int(Params['text_length'], num_words = Params['num_words_to_use'])\n",
"tok2int.fit(toktrain)\n",
"Params['vocab_size'] = tok2int.vocab_size + 1 # add the 0 token\n",
"print(Params['vocab_size'])\n",
"\n",
"X_train = tok2int.transform(toktrain).astype(int)\n",
"Y_train = ptdf.loc[trainindex, 'Unpatentable'].values\n",
"\n",
"toktest = [tokdocs[ind] for ind in range(len(tokdocs)) if ind in testindex]\n",
"X_test = tok2int.transform(toktest).astype(int)\n",
"Y_test = ptdf.loc[testindex, 'Unpatentable'].values"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "8P09Tr17Zx1o"
},
"source": [
"#Fit Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "dldKFWGv-Jct"
},
"outputs": [],
"source": [
"if Params['sample_weighting']:\n",
" class_wts = list(class_weight.compute_class_weight(class_weight='balanced',\n",
" classes=np.unique(Y_train), y=Y_train))\n",
" print(class_wts)\n",
" Params['sample_weights'] = np.array([class_wts[yt] for yt in Y_train])\n",
"\n",
" num = len(Y_train)\n",
" num0 = len(np.where(Y_train==0)[0]); num1 = len(np.where(Y_train==1)[0])\n",
" if num1 < num0:\n",
" Params['initial_bias'] = np.log(num1/num0)\n",
" else:\n",
" Params['initial_bias'] = np.log(num0/num1)\n",
"\n",
"else:\n",
" Params['initial_bias'] = 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "u6XCIyvAZxqD"
},
"outputs": [],
"source": [
"for run in range(5):\n",
" print(run)\n",
" \n",
" tf.keras.backend.clear_session() # reset Tensorflow session\n",
"\n",
" X_t = X_train; Y_t = Y_train \n",
"\n",
" with tpu_strategy.scope():\n",
" Params['loss'] = keras.losses.BinaryCrossentropy(from_logits=False)\n",
" Params['metrics'] = [keras.metrics.BinaryAccuracy(name='acc'),\n",
" keras.metrics.AUC(name='auc'),]\n",
" model = EmbedNN(Params)\n",
" model.compile(loss=Params['loss'],\n",
" optimizer=keras.optimizers.Adam(learning_rate=Params['learning_rate']),\n",
" metrics=Params['metrics'],\n",
" steps_per_execution = 100,)\n",
"\n",
" if Params['sample_weighting']:\n",
" train_dataset = tf.data.Dataset.from_tensor_slices((X_t, Y_t, Params['sample_weights']))\n",
" Params['val_sample_weights'] = np.array([class_wts[yt] for yt in Y_test])\n",
" val_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test, Params['val_sample_weights']))\n",
" else:\n",
" train_dataset = tf.data.Dataset.from_tensor_slices((X_t, Y_t))\n",
" val_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test))\n",
" history = model.fit(train_dataset.batch(Params['batch_size']),\n",
" epochs = Params['num_epochs'], verbose = 1,)\n",
" # validation_data = val_dataset.batch(Params['batch_size']),\n",
" # callbacks=Params['callbacks'])\n",
"\n",
" print(\"Results for Testing Data:\")\n",
" test_predict = model.predict(X_test)\n",
" test_predict_bool = np.round(test_predict)\n",
" TestPredict = test_predict_bool\n",
" ClassRep = classification_report(Y_test, test_predict_bool)\n",
" ConfMatrix = confusion_matrix(Y_test, test_predict_bool)\n",
" print(ClassRep)\n",
" print(ConfMatrix)\n",
"\n",
" model.save_weights(FILELOC+\"responses_\"+str(run)+\"_wts.h5\", save_format='h5', overwrite=True)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vyNgcoOWZ2EQ"
},
"source": [
"#Interpret Results"
]
},
{
"cell_type": "code",
"source": [
"if Params['sample_weighting']:\n",
" class_wts = list(class_weight.compute_class_weight(class_weight='balanced',\n",
" classes=np.unique(Y_train), y=Y_train))\n",
" print(class_wts)\n",
" Params['sample_weights'] = np.array([class_wts[yt] for yt in Y_train])\n",
"\n",
" num = len(Y_train)\n",
" num0 = len(np.where(Y_train==0)[0]); num1 = len(np.where(Y_train==1)[0])\n",
" if num1 < num0:\n",
" Params['initial_bias'] = np.log(num1/num0)\n",
" else:\n",
" Params['initial_bias'] = np.log(num0/num1)\n",
"\n",
"else:\n",
" Params['initial_bias'] = 0\n",
"\n",
"tf.keras.backend.clear_session()\n",
"with tpu_strategy.scope():\n",
" # try:\n",
" model = EmbedNN(Params)\n",
" model.load_weights(FILELOC+\"responses_wts.h5\")\n",
" pred_test = model.predict(X_test, verbose=False)\n",
"\n",
"att = {}\n",
"for n in range(2,12+1):\n",
" get_attention_model = keras.Model(inputs=model.input,outputs=model.get_layer(f'attention_{n}').output)\n",
" get_attention_model.compile()\n",
" att[n] = get_attention_model.predict(xtest, verbose=1)"
],
"metadata": {
"id": "LSAzh3t1FeLn"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"##Attention Visualization"
],
"metadata": {
"id": "q10V2RD5OZ4A"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "stL5tUYCWqUa"
},
"outputs": [],
"source": [
"from IPython.display import HTML\n",
"def hlstr(string, color='white'):\n",
" \"\"\"\n",
" Return HTML markup highlighting text with the desired color.\n",
" \"\"\"\n",
" return f\"<mark style=background-color:{color}>{string} </mark>\"\n",
"\n",
"def colorize(attrs, cmap='PiYG'):\n",
" \"\"\"\n",
" Compute hex colors based on the attributions for a single instance.\n",
" Uses a diverging colorscale by default and normalizes and scales\n",
" the colormap so that colors are consistent with the attributions.\n",
" \"\"\"\n",
" import matplotlib as mpl\n",
" cmap_bound = np.abs(attrs).max()\n",
" norm = mpl.colors.Normalize(vmin=-cmap_bound, vmax=cmap_bound)\n",
" cmap = mpl.cm.get_cmap(cmap)\n",
"\n",
" # now compute hex values of colors\n",
" colors = list(map(lambda x: mpl.colors.rgb2hex(cmap(norm(x))), attrs))\n",
" return colors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "3STTYnjNgrf3"
},
"outputs": [],
"source": [
"N = 8\n",
"n = 7 # document index\n",
"print(pred [n])\n",
"xlen = np.where(X_test[n]==0)[0][0]\n",
"attvec = att[N][n][:xlen]\n",
"xvec = tok2int.reverse([X_test[n][:xlen]])[0]\n",
"strlen = len(attvec)\n",
"THRESH = np.median(attvec)\n",
"colors = colorize(attvec - THRESH)\n",
"\n",
"HTML(\"\".join(list(map(hlstr, xvec.split(), colors))))"
]
},
{
"cell_type": "markdown",
"source": [
"Highest attention words in sample"
],
"metadata": {
"id": "HSMCjnRAOSm1"
}
},
{
"cell_type": "code",
"source": [
"uniquetokens = np.unique(xtest[n][:xlen])\n",
"print(len(uniquetokens))\n",
"tokpos = [np.where(xtest[n][:xlen] == tok)[0] for tok in uniquetokens]\n",
"meanatt = np.array([np.mean(att[2][n][np.array(tpos)]) for tpos in tokpos])\n",
"tokens_sorted_by_meanatt = uniquetokens[np.argsort(-meanatt)]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "m8U2xDZ_L7Nu",
"outputId": "9aef17aa-cddb-467d-f8c6-e43786812cc0"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"605\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"pd.DataFrame.from_dict({'Attention':[tok2int.index_word[t] for t in tokens_sorted_by_meanatt[:20]]})"
],
"metadata": {
"id": "UJQzk6d8n7ZL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"##LIME Analysis"
],
"metadata": {
"id": "PLCP7ucjOGGx"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "44AaMHd3wGA5"
},
"outputs": [],
"source": [
"def lean_wrapper(texts):\n",
" x = pad_sequences(DTP.texts_to_sequences(texts),\n",
" maxlen = Params['text_length'],\n",
" padding='post',\n",
" truncating='post')\n",
" return np.hstack((1-model.predict(x), model.predict(x)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "sEeA4AEzzRBZ"
},
"outputs": [],
"source": [
"n = 2774\n",
"xvec = DTP.sequences_to_texts([X_data[n]])[0]\n",
"\n",
"exp = LimeTextExplainer(class_names={0:'Denied',1:'Granted'})\n",
"exp_doc = exp.explain_instance(xvec, lean_wrapper, num_features=50)\n",
"# explist = exp_doc.as_list()\n",
"exp_doc.show_in_notebook()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8mKglEF9buWx"
},
"outputs": [],
"source": [
""
]
}
],
"metadata": {
"accelerator": "TPU",
"colab": {
"collapsed_sections": [],
"name": "PTAB_Model_Decisions_github.ipynb",
"provenance": [],
"toc_visible": true,
"mount_file_id": "1X-M2SntuvoGIIjwFAtPNNIHXyKqSTUsK",
"authorship_tag": "ABX9TyMnadSiryOYGloyXckhl2DZ",
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"1b33de2a3a4a470aa6f07253885fe1bc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_f63479552d154f019a083d60b5f7262e",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_64723b9f5a9742df9ba5425820b8674d",
"IPY_MODEL_a30de5d77e1b46bd83cf8ecbeed63171",
"IPY_MODEL_237488437a3a452493d875733ed29275"
]
}
},
"f63479552d154f019a083d60b5f7262e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"64723b9f5a9742df9ba5425820b8674d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_7c0035c0c2f842448c19f92eb1ac54d9",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": "100%",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_acfb2bfc634642cd86aad9009938c455"
}
},
"a30de5d77e1b46bd83cf8ecbeed63171": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_4ef7df9baa4444f5979dabf24975ffeb",
"_dom_classes": [],
"description": "",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 2631,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 2631,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_46c770d287cb402a91ccbf7feece5753"
}
},
"237488437a3a452493d875733ed29275": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_84be481bd1844259a25de6207ce09998",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 2631/2631 [00:05<00:00, 477.85it/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_3dbc67b9c8c646bd934013cb45df9907"
}
},
"7c0035c0c2f842448c19f92eb1ac54d9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"acfb2bfc634642cd86aad9009938c455": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"4ef7df9baa4444f5979dabf24975ffeb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"46c770d287cb402a91ccbf7feece5753": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"84be481bd1844259a25de6207ce09998": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"3dbc67b9c8c646bd934013cb45df9907": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}