2403 lines (2402 with data), 132.0 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exploration of 100d space of genome vectors\n",
"\n",
"Genome vectors created by the Dna2VecDataBunch exhibit piculiar patterns. This notebook is dedicated to exploratoin \n",
"of the bacterial genome space using dimensionality reduction techniques"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append(\"../mylib/\")\n",
"\n",
"from genomic import sequence\n",
"from genomic.sequence import regex_filter, count_filter\n",
"from functools import partial\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.decomposition import PCA\n",
"from sklearn import manifold,neighbors\n",
"from scipy.cluster.hierarchy import dendrogram, linkage \n",
"from matplotlib import pyplot as plt\n",
"import seaborn as sns; sns.set(color_codes=True)\n",
"import plotly.plotly as py\n",
"import plotly.graph_objs as go"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load Data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 50.9 s, sys: 1.61 s, total: 52.5 s\n",
"Wall time: 55.3 s\n"
]
}
],
"source": [
"filters=[partial(regex_filter, rx=\"Streptomyces|Bacillus|Vibrio|Streptococcus|Rhizobium|Staphylococcus\"),partial(regex_filter, rx=\"plasmid\", keep=False),\n",
" partial(count_filter, max_count=599)]\n",
"data = sequence.Dna2VecList.from_folder(\"/data/genomes/GenSeq_fastas/train\",filters=filters,agg=partial(np.mean, axis=0),n_cpus=7)\n",
"processors = [\n",
" sequence.GSFileProcessor(),\n",
" sequence.GSTokenizeProcessor(tokenizer=sequence.GSTokenizer(ngram=8, skip=0, n_cpus=7)),\n",
" sequence.Dna2VecProcessor()]\n",
"%time for p in processors: p.process(data)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3169"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(data.items)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Genome vectors"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def log_scale(X):\n",
" x=np.asarray(X);e=1e-6\n",
" return np.log10(x+np.abs(x.min())+e) \n",
"\n",
"\n",
"x=np.asarray(data.items)\n",
"bad_fastas = np.where(np.mean(x,axis=1) == 0.)[0]\n",
"X = np.delete(x, bad_fastas,0)\n",
"labelList=[\" \".join(i.split()[1:3]) for i in data.descriptions]\n",
"labelList=np.delete(np.asarray(labelList), bad_fastas)\n",
"vocab=list(np.unique(labelList))\n",
"y=[vocab.index(x) for x in labelList]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Correlation Distance in log-scaled space"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### tSNE"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tsne = manifold.TSNE(n_components=3, init='pca', perplexity=10, metric=\"correlation\",random_state=0)\n",
"%time X3 = tsne.fit_transform(log_scale(X))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"genus = [i.split()[0] for i in labelList]\n",
"genus_vocab=list(np.unique(genus))\n",
"y=[genus_vocab.index(x) for x in genus]\n",
"genus_vocab\n",
"\n",
"X3_df = pd.DataFrame(data=X3, columns=[\"pc1\",'pc2','pc3'], index=labelList)\n",
"\n",
"X3_df[\"genus\"]=genus\n",
"X3_df[\"y\"]=y\n",
"\n",
"genus_df=X3_df.groupby(\"genus\").agg({\"pc1\": list, \"pc2\":list,\"pc3\":list,\"y\":np.mean})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Correlation Distance visualisation"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/serge/anaconda3/envs/bio/lib/python3.6/site-packages/IPython/core/display.py:689: UserWarning:\n",
"\n",
"Consider using IPython.display.IFrame instead\n",
"\n"
]
},
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~sergeman/6.embed\" height=\"1000px\" width=\"1000px\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data=[]\n",
"for g in genus_df.index:\n",
" trace = go.Scatter3d(\n",
" name = str(g),\n",
" x=genus_df.loc[g,\"pc1\"],\n",
" y=genus_df.loc[g,\"pc2\"],\n",
" z=genus_df.loc[g,\"pc3\"],\n",
" mode='markers',\n",
" marker=dict(\n",
" size=8,\n",
" color=genus_df.loc[g,\"y\"], # set color to an array/list of desired values\n",
" colorscale='Jet', # choose a colorscale\n",
" opacity=0.5)\n",
" )\n",
"\n",
" data.append(trace)\n",
" \n",
"\n",
"layout = go.Layout(\n",
" width=1000,\n",
" height=1000,\n",
" margin=dict(\n",
" l=0,\n",
" r=0,\n",
" b=0,\n",
" t=0\n",
" )\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"py.iplot(fig, filename='correlation distance metric by genus')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Eucleadian Distance in log-scaled space"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### tSNE"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1min 13s, sys: 308 ms, total: 1min 14s\n",
"Wall time: 1min 13s\n"
]
}
],
"source": [
"tsne = manifold.TSNE(n_components=3, init='pca', perplexity=30,random_state=0)\n",
"%time X3 = tsne.fit_transform(log_scale(X))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"genus = [i.split()[0] for i in labelList]\n",
"genus_vocab=list(np.unique(genus))\n",
"y=[genus_vocab.index(x) for x in genus]\n",
"genus_vocab\n",
"\n",
"X3_df = pd.DataFrame(data=X3, columns=[\"pc1\",'pc2','pc3'], index=labelList)\n",
"\n",
"X3_df[\"genus\"]=genus\n",
"X3_df[\"y\"]=y\n",
"\n",
"genus_df=X3_df.groupby(\"genus\").agg({\"pc1\": list, \"pc2\":list,\"pc3\":list,\"y\":np.mean})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Eucleadian Distance Visualisation"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/serge/anaconda3/envs/bio/lib/python3.6/site-packages/IPython/core/display.py:689: UserWarning:\n",
"\n",
"Consider using IPython.display.IFrame instead\n",
"\n"
]
},
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~sergeman/8.embed\" height=\"1000px\" width=\"1000px\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data=[]\n",
"for g in genus_df.index:\n",
" trace = go.Scatter3d(\n",
" name = str(g),\n",
" x=genus_df.loc[g,\"pc1\"],\n",
" y=genus_df.loc[g,\"pc2\"],\n",
" z=genus_df.loc[g,\"pc3\"],\n",
" mode='markers',\n",
" marker=dict(\n",
" size=8,\n",
" color=genus_df.loc[g,\"y\"], # set color to an array/list of desired values\n",
" colorscale='Jet', # choose a colorscale\n",
" opacity=0.5)\n",
" )\n",
"\n",
" data.append(trace)\n",
" \n",
"\n",
"layout = go.Layout(\n",
" width=1000,\n",
" height=1000,\n",
" margin=dict(\n",
" l=0,\n",
" r=0,\n",
" b=0,\n",
" t=0\n",
" )\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"py.iplot(fig, filename='eucledian distance metric by genus')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Genome Inventory"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# DB=\"/data/genomes/GenSeq_fastas/train\"\n",
"DB='/media/serge/SharedSSD/data/genomes/ncbi-genomes-2019-04-07/bacterial genomes'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"all_fastas = sequence.Dna2VecList.from_folder(DB).descriptions"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"inventory = pd.DataFrame(data=[l.split()[1:3] for l in all_fastas], columns=[\"genus\",\"species\" ])"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>species</th>\n",
" </tr>\n",
" <tr>\n",
" <th>genus</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Escherichia</th>\n",
" <td>2239</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Klebsiella</th>\n",
" <td>1718</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Salmonella</th>\n",
" <td>1183</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bacillus</th>\n",
" <td>1172</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Lactobacillus</th>\n",
" <td>953</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Staphylococcus</th>\n",
" <td>889</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Burkholderia</th>\n",
" <td>650</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Enterococcus</th>\n",
" <td>626</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Pseudomonas</th>\n",
" <td>613</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Streptococcus</th>\n",
" <td>564</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Acinetobacter</th>\n",
" <td>531</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bordetella</th>\n",
" <td>504</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Vibrio</th>\n",
" <td>474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Xanthomonas</th>\n",
" <td>395</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Mycobacterium</th>\n",
" <td>368</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Borrelia</th>\n",
" <td>347</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Campylobacter</th>\n",
" <td>339</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Rhizobium</th>\n",
" <td>307</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Enterobacter</th>\n",
" <td>298</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Mycoplasma</th>\n",
" <td>290</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Corynebacterium</th>\n",
" <td>258</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Phaeobacter</th>\n",
" <td>256</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Yersinia</th>\n",
" <td>243</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Brucella</th>\n",
" <td>233</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Clostridium</th>\n",
" <td>228</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Listeria</th>\n",
" <td>227</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Streptomyces</th>\n",
" <td>225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Chlamydia</th>\n",
" <td>202</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Helicobacter</th>\n",
" <td>187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Candidatus</th>\n",
" <td>187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>'Deinococcus</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Mariniflexile</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Marinithermus</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Lactobacillales</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Marinobacterium</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Megamonas</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Melioribacter</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Melittangium</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Methylobacillus</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Methylocaldum</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Magnetococcus</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Mageeibacillus</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Lysinimonas</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Luteitalea</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Lacunisphaera</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Lautropia</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Leadbetterella</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Leminorella</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Lentibacillus</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Lentzea</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Leptothrix</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Levyella</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Limnobaculum</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Limnochorda</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Litorilituus</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Lonsdalea</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Luteibacter</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Luteipulveratus</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>secondary</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>plasmid1</th>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1120 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" species\n",
"genus \n",
"Escherichia 2239\n",
"Klebsiella 1718\n",
"Salmonella 1183\n",
"Bacillus 1172\n",
"Lactobacillus 953\n",
"Staphylococcus 889\n",
"Burkholderia 650\n",
"Enterococcus 626\n",
"Pseudomonas 613\n",
"Streptococcus 564\n",
"Acinetobacter 531\n",
"Bordetella 504\n",
"Vibrio 474\n",
"Xanthomonas 395\n",
"Mycobacterium 368\n",
"Borrelia 347\n",
"Campylobacter 339\n",
"Rhizobium 307\n",
"Enterobacter 298\n",
"Mycoplasma 290\n",
"Corynebacterium 258\n",
"Phaeobacter 256\n",
"Yersinia 243\n",
"Brucella 233\n",
"Clostridium 228\n",
"Listeria 227\n",
"Streptomyces 225\n",
"Chlamydia 202\n",
"Helicobacter 187\n",
"Candidatus 187\n",
"... ...\n",
"'Deinococcus 1\n",
"Mariniflexile 1\n",
"Marinithermus 1\n",
"Lactobacillales 1\n",
"Marinobacterium 1\n",
"Megamonas 1\n",
"Melioribacter 1\n",
"Melittangium 1\n",
"Methylobacillus 1\n",
"Methylocaldum 1\n",
"Magnetococcus 1\n",
"Mageeibacillus 1\n",
"Lysinimonas 1\n",
"Luteitalea 1\n",
"Lacunisphaera 1\n",
"Lautropia 1\n",
"Leadbetterella 1\n",
"Leminorella 1\n",
"Lentibacillus 1\n",
"Lentzea 1\n",
"Leptothrix 1\n",
"Levyella 1\n",
"Limnobaculum 1\n",
"Limnochorda 1\n",
"Litorilituus 1\n",
"Lonsdalea 1\n",
"Luteibacter 1\n",
"Luteipulveratus 1\n",
"secondary 1\n",
"plasmid1 0\n",
"\n",
"[1120 rows x 1 columns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"inventory.groupby(\"genus\").agg({\"species\":\"count\"}).sort_values(\"species\",ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <th>genus</th>\n",
" <th>species</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>'Catharanthus</th>\n",
" <th>roseus'</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>'Deinococcus</th>\n",
" <th>soli'</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>'Nostoc</th>\n",
" <th>azollae'</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18,711,729</th>\n",
" <th>reads</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Acaryochloris</th>\n",
" <th>marina</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"10\" valign=\"top\">Acetobacter</th>\n",
" <th>aceti</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ascendens</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>orientalis</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>oryzifermentans</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pasteurianus</th>\n",
" <td>91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>persici</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pomorum</th>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>senegalensis</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sp.</th>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>tropicalis</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Acetobacterium</th>\n",
" <th>woodii</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Acetohalobium</th>\n",
" <th>arabaticum</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Acetomicrobium</th>\n",
" <th>mobile</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">Acholeplasma</th>\n",
" <th>axanthum</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hippikon</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>laidlawii</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>oculi</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">Achromobacter</th>\n",
" <th>denitrificans</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>insolitus</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sp.</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>spanius</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>xylosoxidans</th>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">Acidaminococcus</th>\n",
" <th>fermentans</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>intestini</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Acidiferrobacter</th>\n",
" <th>sp.</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Zymobacter</th>\n",
" <th>palmae</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Zymomonas</th>\n",
" <th>mobilis</th>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>[Arcobacter]</th>\n",
" <th>porcinus</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>[Bacillus</th>\n",
" <th>thuringiensis]</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">[Bacillus]</th>\n",
" <th>caldolyticus</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>selenitireducens</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">[Brevibacterium]</th>\n",
" <th>flavum</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>frigoritolerans</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">[Clostridium]</th>\n",
" <th>bolteae</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>cellulolyticum</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>cellulosi</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>clariflavum</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>propionicum</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>saccharolyticum</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>scindens</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>stercorarium</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>[Enterobacter]</th>\n",
" <th>lignolyticus</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">[Eubacterium]</th>\n",
" <th>eligens</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hallii</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rectale</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">[Haemophilus]</th>\n",
" <th>ducreyi</th>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>parasuis</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>[Mycobacterium]</th>\n",
" <th>chelonae</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>[Pasteurella]</th>\n",
" <th>aerogenes</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>[Polyangium]</th>\n",
" <th>brachysporum</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>[Pseudomonas</th>\n",
" <th>syringae]</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>[Pseudomonas]</th>\n",
" <th>mesoacidophila</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>complete</th>\n",
" <th>chromosome</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>gamma</th>\n",
" <th>proteobacterium</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>secondary</th>\n",
" <th>endosymbiont</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3186 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" count\n",
"genus species \n",
"'Catharanthus roseus' 2\n",
"'Deinococcus soli' 1\n",
"'Nostoc azollae' 3\n",
"18,711,729 reads 1\n",
"Acaryochloris marina 10\n",
"Acetobacter aceti 1\n",
" ascendens 1\n",
" orientalis 2\n",
" oryzifermentans 1\n",
" pasteurianus 91\n",
" persici 2\n",
" pomorum 7\n",
" senegalensis 2\n",
" sp. 7\n",
" tropicalis 2\n",
"Acetobacterium woodii 1\n",
"Acetohalobium arabaticum 1\n",
"Acetomicrobium mobile 1\n",
"Acholeplasma axanthum 1\n",
" hippikon 2\n",
" laidlawii 2\n",
" oculi 1\n",
"Achromobacter denitrificans 3\n",
" insolitus 4\n",
" sp. 2\n",
" spanius 4\n",
" xylosoxidans 11\n",
"Acidaminococcus fermentans 1\n",
" intestini 1\n",
"Acidiferrobacter sp. 1\n",
"... ...\n",
"Zymobacter palmae 2\n",
"Zymomonas mobilis 49\n",
"[Arcobacter] porcinus 1\n",
"[Bacillus thuringiensis] 2\n",
"[Bacillus] caldolyticus 2\n",
" selenitireducens 1\n",
"[Brevibacterium] flavum 2\n",
" frigoritolerans 1\n",
"[Clostridium] bolteae 2\n",
" cellulolyticum 1\n",
" cellulosi 1\n",
" clariflavum 1\n",
" propionicum 1\n",
" saccharolyticum 1\n",
" scindens 1\n",
" stercorarium 4\n",
"[Enterobacter] lignolyticus 1\n",
"[Eubacterium] eligens 3\n",
" hallii 1\n",
" rectale 1\n",
"[Haemophilus] ducreyi 14\n",
" parasuis 4\n",
"[Mycobacterium] chelonae 2\n",
"[Pasteurella] aerogenes 1\n",
"[Polyangium] brachysporum 1\n",
"[Pseudomonas syringae] 3\n",
"[Pseudomonas] mesoacidophila 4\n",
"complete chromosome 2\n",
"gamma proteobacterium 1\n",
"secondary endosymbiont 1\n",
"\n",
"[3186 rows x 1 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
".groupby([\"genus\", \"species\"]).agg({\"species\": \"count\"})\n",
"inventory.columns=[\"count\"]\n",
"inventory"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['NZ_CP013305.1 Vibrio cholerae strain CRC1106 chromosome 1, complete sequence',\n",
" 'NZ_CP013306.1 Vibrio cholerae strain CRC1106 chromosome 2, complete sequence',\n",
" 'NC_013791.2 Bacillus pseudofirmus OF4, complete genome',\n",
" 'NC_013792.1 Bacillus pseudofirmus OF4 plasmid pBpOF4-01, complete sequence',\n",
" 'NC_013793.1 Bacillus pseudofirmus OF4 plasmid pBpOF4-02, complete sequence',\n",
" 'NC_000913.3 Escherichia coli str. K-12 substr. MG1655, complete genome',\n",
" 'NC_007164.1 Corynebacterium jeikeium K411 complete genome',\n",
" 'NC_003080.1 Corynebacterium jeikeium K411 plasmid pKW4, complete sequence',\n",
" 'NC_002162.1 Ureaplasma parvum serovar 3 str. ATCC 700970, complete genome',\n",
" 'NC_004088.1 Yersinia pestis KIM10+, complete genome',\n",
" 'NC_004838.1 Yersinia pestis KIM10+ plasmid pMT-1, complete sequence',\n",
" 'NC_002620.2 Chlamydia muridarum Nigg, complete genome',\n",
" 'NC_002182.1 Chlamydia muridarum Nigg plasmid pMoPn, complete sequence',\n",
" 'NC_002488.3 Xylella fastidiosa 9a5c, complete genome',\n",
" 'NC_002489.3 Xylella fastidiosa 9a5c plasmid pXF1.3, complete sequence',\n",
" 'NC_002490.1 Xylella fastidiosa 9a5c plasmid pXF51, complete sequence',\n",
" 'NC_002505.1 Vibrio cholerae O1 biovar El Tor str. N16961 chromosome I, complete sequence',\n",
" 'NC_002506.1 Vibrio cholerae O1 biovar El Tor str. N16961 chromosome II, complete sequence',\n",
" 'NC_002516.2 Pseudomonas aeruginosa PAO1, complete genome',\n",
" 'NC_002737.2 Streptococcus pyogenes M1 GAS, complete genome',\n",
" 'NC_002663.1 Pasteurella multocida subsp. multocida str. Pm70, complete genome',\n",
" 'NC_002946.2 Neisseria gonorrhoeae FA 1090 chromosome, complete genome',\n",
" 'NC_002662.1 Lactococcus lactis subsp. lactis Il1403 chromosome, complete genome',\n",
" 'NC_003028.3 Streptococcus pneumoniae TIGR4, complete genome',\n",
" 'NC_002696.2 Caulobacter crescentus CB15 chromosome, complete genome',\n",
" 'NC_004337.2 Shigella flexneri 2a str. 301 chromosome, complete genome',\n",
" 'NC_004851.1 Shigella flexneri 2a str. 301 plasmid pCP301, complete sequence',\n",
" 'NC_013174.1 Jonesia denitrificans DSM 20603, complete genome',\n",
" 'NC_013166.1 Kangiella koreensis DSM 16069, complete genome',\n",
" 'NC_013171.1 Anaerococcus prevotii DSM 20548, complete genome',\n",
" 'NC_013164.1 Anaerococcus prevotii DSM 20548 plasmid pAPRE01, complete sequence',\n",
" 'NC_013222.1 Robiginitalea biformata HTCC2501, complete genome',\n",
" 'NC_017285.1 Chlamydophila pneumoniae LPCoLN, complete genome',\n",
" 'NC_017286.1 Chlamydophila pneumoniae LPCoLN plasmid, complete sequence',\n",
" 'NC_013194.1 Candidatus Accumulibacter phosphatis clade IIA str. UW-1, complete genome',\n",
" 'NC_013193.1 Candidatus Accumulibacter phosphatis clade IIA str. UW-1 plasmid pAph01, complete sequence',\n",
" 'NC_013190.1 Candidatus Accumulibacter phosphatis clade IIA str. UW-1 plasmid pAph02, complete sequence',\n",
" 'NC_013191.1 Candidatus Accumulibacter phosphatis clade IIA str. UW-1 plasmid pAph03, complete sequence',\n",
" 'NC_013216.1 Desulfotomaculum acetoxidans DSM 771, complete genome',\n",
" 'NC_013203.1 Atopobium parvulum DSM 20469, complete genome',\n",
" 'NC_013355.1 Zymomonas mobilis subsp. mobilis NCIMB 11163, complete genome',\n",
" 'NC_013356.1 Zymomonas mobilis subsp. mobilis NCIMB 11163 plasmid pZA1001, complete sequence',\n",
" 'NC_013357.1 Zymomonas mobilis subsp. mobilis NCIMB 11163 plasmid pZA1002, complete sequence',\n",
" 'NC_013358.1 Zymomonas mobilis subsp. mobilis NCIMB 11163 plasmid pZA1003, complete sequence',\n",
" 'NC_013204.1 Eggerthella lenta DSM 2243, complete genome',\n",
" 'NC_013205.1 Alicyclobacillus acidocaldarius subsp. acidocaldarius DSM 446, complete genome',\n",
" 'NC_013206.1 Alicyclobacillus acidocaldarius subsp. acidocaldarius DSM 446 plasmid pAACI01, complete sequence',\n",
" 'NC_013207.1 Alicyclobacillus acidocaldarius subsp. acidocaldarius DSM 446 plasmid pAACI02, complete sequence',\n",
" 'NC_013208.1 Alicyclobacillus acidocaldarius subsp. acidocaldarius DSM 446 plasmid pAACI03, complete sequence',\n",
" 'NC_013223.1 Desulfohalobium retbaense DSM 5692, complete genome',\n",
" 'NC_013224.1 Desulfohalobium retbaense DSM 5692 plasmid pDRET01, complete sequence',\n",
" 'NC_009883.1 Rickettsia bellii OSU 85-389, complete genome',\n",
" 'NC_009953.1 Salinispora arenicola CNS-205, complete genome',\n",
" 'NC_009901.1 Shewanella pealeana ATCC 700345, complete genome',\n",
" 'NC_009922.1 Alkaliphilus oremlandii OhILAs, complete genome',\n",
" 'NC_010612.1 Mycobacterium marinum M, complete genome',\n",
" 'NC_010604.1 Mycobacterium marinum M plasmid pMM23, complete sequence',\n",
" 'NC_012125.1 Salmonella enterica subsp. enterica serovar Paratyphi C strain RKS4594, complete genome',\n",
" 'NC_012124.1 Salmonella enterica subsp. enterica serovar Paratyphi C strain RKS4594 plasmid pSPCV, complete sequence',\n",
" 'NC_009943.1 Desulfococcus oleovorans Hxd3, complete genome',\n",
" 'NC_010424.1 Candidatus Desulforudis audaxviator MP104C, complete genome',\n",
" 'NC_010611.1 Acinetobacter baumannii ACICU, complete genome',\n",
" 'NC_010605.1 Acinetobacter baumannii ACICU plasmid pACICU1, complete sequence',\n",
" 'NC_010606.1 Acinetobacter baumannii ACICU plasmid pACICU2, complete sequence',\n",
" 'NC_010084.1 Burkholderia multivorans ATCC 17616 chromosome 1, complete sequence',\n",
" 'NC_010086.1 Burkholderia multivorans ATCC 17616 chromosome 2, complete sequence',\n",
" 'NC_010087.1 Burkholderia multivorans ATCC 17616 chromosome 3, complete sequence',\n",
" 'NC_010070.1 Burkholderia multivorans ATCC 17616 plasmid pBMUL01, complete sequence',\n",
" 'NC_010103.1 Brucella canis ATCC 23365 chromosome I, complete sequence',\n",
" 'NC_010104.1 Brucella canis ATCC 23365 chromosome II, complete sequence',\n",
" 'NC_012587.1 Sinorhizobium fredii NGR234 chromosome, complete genome',\n",
" 'NC_000914.2 Sinorhizobium fredii NGR234 plasmid pNGR234a, complete sequence',\n",
" 'NC_012586.1 Sinorhizobium fredii NGR234 plasmid pNGR234b, complete sequence',\n",
" 'NC_010003.1 Petrotoga mobilis SJ95, complete genome',\n",
" 'NC_010067.1 Salmonella enterica subsp. arizonae serovar 62:z4,z23:--, complete genome',\n",
" 'NC_010002.1 Delftia acidovorans SPH-1, complete genome',\n",
" 'NC_010001.1 Lachnoclostridium phytofermentans ISDg, complete genome',\n",
" 'NC_010102.1 Salmonella enterica subsp. enterica serovar Paratyphi B str. SPB7, complete genome',\n",
" 'NC_010742.1 Brucella abortus S19 chromosome 1, complete sequence',\n",
" 'NC_010740.1 Brucella abortus S19 chromosome 2, complete sequence',\n",
" 'NC_010117.1 Coxiella burnetii RSA 331, complete genome',\n",
" 'NC_010115.1 Coxiella burnetii RSA 331 plasmid QpH1, complete sequence',\n",
" 'NC_009997.1 Shewanella baltica OS195, complete genome',\n",
" 'NC_009998.1 Shewanella baltica OS195 plasmid pS19501, complete sequence',\n",
" 'NC_009999.1 Shewanella baltica OS195 plasmid pS19502, complete sequence',\n",
" 'NC_010000.1 Shewanella baltica OS195 plasmid pS19503, complete sequence',\n",
" 'NC_010163.1 Acholeplasma laidlawii PG-8A, complete genome',\n",
" 'NC_003197.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2, complete genome',\n",
" 'NC_003277.2 Salmonella enterica subsp. enterica serovar Typhimurium str. LT2 plasmid pSLT, complete sequence',\n",
" 'NC_004631.1 Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome',\n",
" 'NC_002944.2 Mycobacterium avium subsp. paratuberculosis str. k10, complete genome',\n",
" 'NC_007332.1 Mycoplasma hyopneumoniae 7448, complete genome',\n",
" 'NC_005957.1 [Bacillus thuringiensis] serovar konkukian str. 97-27 chromosome, complete genome',\n",
" 'NC_006578.1 [Bacillus thuringiensis] serovar konkukian str. 97-27 plasmid pBT9727, complete sequence',\n",
" 'NC_010816.1 Bifidobacterium longum DJO10A, complete genome',\n",
" 'NC_004252.1 Bifidobacterium longum DJO10A plasmid pDOJH10L, complete sequence',\n",
" 'NC_004253.1 Bifidobacterium longum DJO10A plasmid pDOJH10S, complete sequence',\n",
" 'NC_008800.1 Yersinia enterocolitica subsp. enterocolitica 8081 chromosome, complete genome',\n",
" 'NC_008791.1 Yersinia enterocolitica subsp. enterocolitica 8081 plasmid pYVe8081, complete sequence',\n",
" 'NC_003272.1 Nostoc sp. PCC 7120 DNA, complete genome',\n",
" 'NC_003276.1 Nostoc sp. PCC 7120 plasmid pCC7120alpha DNA, complete genome',\n",
" 'NC_003240.1 Nostoc sp. PCC 7120 plasmid pCC7120beta DNA, complete sequence',\n",
" 'NC_003273.1 Nostoc sp. PCC 7120 plasmid pCC7120delta DNA, complete sequence',\n",
" 'NC_003270.1 Nostoc sp. PCC 7120 plasmid pCC7120epsilon DNA, complete sequence',\n",
" 'NC_003267.1 Nostoc sp. PCC 7120 plasmid pCC7120gamma DNA, complete sequence',\n",
" 'NC_003241.1 Nostoc sp. PCC 7120 plasmid pCC7120zeta DNA, complete sequence',\n",
" 'NC_009342.1 Corynebacterium glutamicum R DNA, complete genome',\n",
" 'NC_009343.1 Corynebacterium glutamicum R plasmid pCGR1 DNA, complete sequence',\n",
" 'NC_010729.1 Porphyromonas gingivalis ATCC 33277 DNA, complete genome',\n",
" 'NC_013799.1 Hydrogenobacter thermophilus TK-6 DNA, complete genome',\n",
" 'NC_003450.3 Corynebacterium glutamicum ATCC 13032 chromosome, complete genome',\n",
" 'NC_017382.1 Helicobacter pylori 51, complete genome',\n",
" 'NC_008322.1 Shewanella sp. MR-7, complete genome',\n",
" 'NC_008320.1 Shewanella sp. MR-7, complete sequence',\n",
" 'NC_008321.1 Shewanella sp. MR-4, complete genome',\n",
" 'NC_008345.1 Shewanella frigidimarina NCIMB 400, complete genome',\n",
" 'NC_008346.1 Syntrophomonas wolfei subsp. wolfei str. Goettingen G311, complete genome',\n",
" 'NC_008347.1 Maricaulis maris MCS10, complete genome',\n",
" 'NC_008344.1 Nitrosomonas eutropha C91, complete genome',\n",
" 'NC_008341.1 Nitrosomonas eutropha C91 Plasmid1, complete sequence',\n",
" 'NC_008342.1 Nitrosomonas eutropha C91 Plasmid2, complete sequence',\n",
" 'NC_008340.1 Alkalilimnicola ehrlichii MLHE-1, complete genome',\n",
" 'NC_008570.1 Aeromonas hydrophila subsp. hydrophila ATCC 7966 chromosome, complete genome',\n",
" 'NC_008435.1 Rhodopseudomonas palustris BisA53, complete genome',\n",
" 'NC_008563.1 Escherichia coli APEC O1, complete genome',\n",
" 'NC_009837.1 Escherichia coli APEC O1 plasmid pAPEC-O1-ColBM, complete sequence',\n",
" 'NC_009838.1 Escherichia coli APEC O1 plasmid pAPEC-O1-R, complete sequence',\n",
" 'NC_008576.1 Magnetococcus marinus MC-1, complete genome',\n",
" 'NC_011566.1 Shewanella piezotolerans WP3, complete genome',\n",
" 'NC_008536.1 Candidatus Solibacter usitatus Ellin6076, complete genome',\n",
" 'NC_008711.1 Paenarthrobacter aurescens TC1, complete genome',\n",
" 'NC_008712.1 Paenarthrobacter aurescens TC1 plasmid TC1, complete sequence',\n",
" 'NC_008713.1 Paenarthrobacter aurescens TC1 plasmid TC2, complete sequence',\n",
" 'NC_008554.1 Syntrophobacter fumaroxidans MPOB, complete genome',\n",
" 'NC_008595.1 Mycobacterium avium 104, complete genome',\n",
" 'NC_008596.1 Mycobacterium smegmatis str. MC2 155 chromosome, complete genome',\n",
" 'NC_008578.1 Acidothermus cellulolyticus 11B, complete genome',\n",
" 'NC_008609.1 Pelobacter propionicus DSM 2379, complete genome',\n",
" 'NC_008607.1 Pelobacter propionicus DSM 2379 plasmid pPRO1, complete sequence',\n",
" 'NC_008608.1 Pelobacter propionicus DSM 2379 plasmid pPRO2, complete sequence',\n",
" 'NC_008600.1 Bacillus thuringiensis str. Al Hakam, complete genome',\n",
" 'NC_008598.1 Bacillus thuringiensis str. Al Hakam, plasmid pALH1, complete sequence',\n",
" 'NC_013928.1 Streptococcus mutans NN2025 DNA, complete genome',\n",
" 'NC_014215.1 Propionibacterium freudenreichii subsp. shermanii CIRM-BIA1, complete genome',\n",
" 'NC_013861.1 Legionella longbeachae NSW150, complete genome',\n",
" 'NC_014544.1 Legionella longbeachae NSW150 plasmid pLLO',\n",
" 'NC_012925.1 Streptococcus suis P1/7 complete genome',\n",
" 'NC_013949.1 Helicobacter mustelae 12198 complete genome',\n",
" 'NC_003062.2 Agrobacterium fabrum str. C58 chromosome circular, complete sequence',\n",
" 'NC_003063.2 Agrobacterium fabrum str. C58 chromosome linear, complete sequence',\n",
" 'NC_003064.2 Agrobacterium fabrum str. C58 plasmid At, complete sequence',\n",
" 'NC_003065.3 Agrobacterium fabrum str. C58 plasmid Ti, complete sequence',\n",
" 'NC_007761.1 Rhizobium etli CFN 42, complete genome',\n",
" 'NC_007762.1 Rhizobium etli CFN 42 plasmid p42a, complete sequence',\n",
" 'NC_007763.1 Rhizobium etli CFN 42 plasmid p42b, complete sequence',\n",
" 'NC_007764.1 Rhizobium etli CFN 42 plasmid p42c, complete sequence',\n",
" 'NC_007765.1 Rhizobium etli CFN 42 plasmid p42e, complete sequence',\n",
" 'NC_007766.1 Rhizobium etli CFN 42 plasmid p42f, complete sequence',\n",
" 'NC_004041.2 Rhizobium etli CFN 42 plasmid symbiotic plasmid p42d, complete sequence',\n",
" 'NC_014148.1 Planctopirus limnophila DSM 3776, complete genome',\n",
" 'NC_014149.1 Planctopirus limnophila DSM 3776 plasmid pPLIM01, complete sequence',\n",
" 'NC_014212.1 Meiothermus silvanus DSM 9946, complete genome',\n",
" 'NC_014213.1 Meiothermus silvanus DSM 9946 plasmid pMESIL01, complete sequence',\n",
" 'NC_014214.1 Meiothermus silvanus DSM 9946 plasmid pMESIL02, complete sequence',\n",
" 'NC_014171.1 Bacillus thuringiensis BMB171, complete genome',\n",
" 'NC_014172.1 Bacillus thuringiensis BMB171 plasmid pBMB171, complete sequence',\n",
" 'NC_014216.1 Desulfurivibrio alkaliphilus AHT2, complete genome',\n",
" 'NC_014158.1 Tsukamurella paurometabola DSM 20162, complete genome',\n",
" 'NC_014159.1 Tsukamurella paurometabola DSM 20162 plasmid pTpau01, complete sequence',\n",
" 'NC_014166.1 Arcobacter nitrofigilis DSM 7299, complete genome',\n",
" 'NC_014147.1 Moraxella catarrhalis BBH18, complete genome',\n",
" 'NC_014100.1 Caulobacter segnis ATCC 21756, complete genome',\n",
" 'NC_014169.1 Bifidobacterium longum subsp. longum JDM301, complete genome',\n",
" 'NC_017297.1 Clostridium botulinum F str. 230613, complete genome',\n",
" 'NC_017298.1 Clostridium botulinum F str. 230613 plasmid pCBF, complete sequence',\n",
" 'NC_014218.1 Arcanobacterium haemolyticum DSM 20595, complete genome',\n",
" 'NC_016582.1 Streptomyces bingchenggensis BCW-1, complete genome',\n",
" 'NC_014220.1 Syntrophothermus lipocalidus DSM 12680, complete genome',\n",
" 'NC_017633.1 Escherichia coli ETEC H10407, complete genome',\n",
" 'NC_017721.1 Escherichia coli ETEC H10407 p52 plasmid',\n",
" 'NC_017723.1 Escherichia coli ETEC H10407 p58 plasmid',\n",
" 'NC_017722.1 Escherichia coli ETEC H10407 p666 plasmid',\n",
" 'NC_017724.1 Escherichia coli ETEC H10407 p948 plasmid',\n",
" 'NC_017441.1 Chlamydia trachomatis Sweden2, complete genome',\n",
" 'NC_017465.1 Lactobacillus fermentum CECT 5716, complete genome',\n",
" 'NC_018588.1 Listeria monocytogenes strain SLCC2372, serotype 1/2c',\n",
" 'NC_018889.1 Listeria monocytogenes serotype 1-2c str. SLCC2372 plasmid pLM1-2cUG1 complete sequence',\n",
" 'NC_016776.1 Bacteroides fragilis 638R genome',\n",
" 'NC_016810.1 Salmonella enterica subsp. enterica serovar Typhimurium SL1344 complete genome',\n",
" 'NC_017718.1 Salmonella enterica subsp. enterica serovar Typhimurium str. SL1344 plasmid pCol1B9_SL1344 complete sequence',\n",
" 'NC_017719.1 Salmonella enterica subsp. enterica serovar Typhimurium str. SL1344 plasmid pRSF1010_SL1344 complete sequence',\n",
" 'NC_017720.1 Salmonella enterica subsp. enterica serovar Typhimurium str. SL1344 plasmid pSLT_SL1344 complete sequence',\n",
" 'NC_016809.1 Haemophilus influenzae 10810 genome',\n",
" 'NC_015964.1 Haemophilus parainfluenzae T3T1 complete genome',\n",
" 'NC_016620.1 Bacteriovorax marinus SJ genome',\n",
" 'NC_019100.1 Bacteriovorax marinus SJ plasmid pBMS1 complete sequence',\n",
" 'NC_017593.1 Streptococcus pneumoniae INV200 genome',\n",
" 'NC_017592.1 Streptococcus pneumoniae OXC141 complete genome',\n",
" 'NC_017591.1 Streptococcus pneumoniae INV104 genome',\n",
" 'NC_021006.1 Streptococcus pneumoniae SPN034156 draft genome',\n",
" 'NC_021028.1 Streptococcus pneumoniae SPN034183 draft genome',\n",
" 'NC_021026.1 Streptococcus pneumoniae SPN994038 draft genome',\n",
" 'NC_021005.1 Streptococcus pneumoniae SPN994039 draft genome',\n",
" 'NC_021003.1 Streptococcus pneumoniae SPN032672 draft genome',\n",
" 'NC_021004.1 Streptococcus pneumoniae SPN033038 draft genome',\n",
" 'NC_017519.1 Mycoplasma hyorhinis MCLD, complete genome',\n",
" 'NC_015428.1 Lactobacillus buchneri NRRL B-30929, complete genome',\n",
" 'NC_015420.1 Lactobacillus buchneri NRRL B-30929 plasmid pLBUC01, complete sequence',\n",
" 'NC_015429.1 Lactobacillus buchneri NRRL B-30929 plasmid pLBUC02, complete sequence',\n",
" 'NC_015421.1 Lactobacillus buchneri NRRL B-30929 plasmid pLBUC03, complete sequence',\n",
" 'NC_023030.2 Mycoplasma gallisepticum S6, complete genome',\n",
" 'NC_015638.1 Lacinutrix sp. 5H-3-7-4, complete genome',\n",
" 'NC_015497.1 Glaciecola sp. 4H-3-7+YE-5, complete genome',\n",
" 'NC_015498.1 Glaciecola sp. 4H-3-7+YE-5 plasmid pGLAAG01, complete sequence',\n",
" 'NC_015496.1 Dokdonia sp. 4H-3-7-5, complete genome',\n",
" 'NC_015501.1 Porphyromonas asaccharolytica DSM 20707, complete genome',\n",
" 'NC_018080.1 Pseudomonas aeruginosa DK2, complete genome',\n",
" 'NC_018081.1 Enterococcus hirae ATCC 9790, complete genome',\n",
" 'NC_015845.1 Enterococcus hirae ATCC 9790 plasmid pTG9790, complete sequence',\n",
" 'NZ_CP010978.1 Pelosinus fermentans JBW45, complete genome',\n",
" 'NC_018089.1 Streptococcus mutans GS-5, complete genome',\n",
" 'NZ_CP015923.1 Salmonella enterica subsp. enterica serovar Newport str. Levine 1, complete genome',\n",
" 'NZ_CP015924.1 Salmonella enterica subsp. enterica serovar Newport str. Levine 15, complete genome',\n",
" 'NZ_CP015876.1 Pseudomonas putida SJTE-1, complete genome',\n",
" 'NZ_CP015877.1 Pseudomonas aeruginosa SJTD-1, complete genome',\n",
" 'NZ_CP010283.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 21550, complete genome',\n",
" 'NZ_CP009564.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 21550 plasmid pCVM21550, complete sequence',\n",
" 'NZ_CP010281.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 22513, complete genome',\n",
" 'NZ_CP009562.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 22513 plasmid pCVM22513, complete sequence',\n",
" 'NZ_CP010282.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 21538, complete genome',\n",
" 'NZ_CP009563.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 21538 plasmid pCVM21538, complete sequence',\n",
" 'NZ_CP010279.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 22425, complete genome',\n",
" 'NZ_CP009560.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 22425 plasmid pCVM22425, complete sequence',\n",
" 'NZ_CP009561.1 Salmonella enterica subsp. enterica serovar Newport str. CVM N18486, complete genome',\n",
" 'NZ_CP010284.1 Salmonella enterica subsp. enterica serovar Newport str. CVM N1543, complete genome',\n",
" 'NZ_CP009570.1 Salmonella enterica subsp. enterica serovar Newport str. CVM N1543 plasmid pCVMN1543, complete sequence',\n",
" 'NZ_CP010280.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 22462, complete genome',\n",
" 'NZ_CP009567.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 22462 plasmid pCFSAN000934_02, complete sequence',\n",
" 'NZ_CP009568.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 22462 plasmid pCFSAN000934_03, complete sequence',\n",
" 'NZ_CP009566.1 Salmonella enterica subsp. enterica serovar Newport str. CVM 22462 plasmid pCVM22462, complete sequence',\n",
" 'NC_018108.1 Taylorella equigenitalis ATCC 35865, complete genome',\n",
" 'NC_018106.1 Klebsiella michiganensis E718, complete sequence',\n",
" 'NC_021501.1 Klebsiella michiganensis strain E718 plasmid pKOX_NDM1, complete sequence',\n",
" 'NC_018107.1 Klebsiella michiganensis E718 plasmid pKOX_R1, complete sequence',\n",
" 'NC_016948.1 Mycobacterium intracellulare MOTT-64, complete genome',\n",
" 'NC_016946.1 Mycobacterium intracellulare ATCC 13950, complete genome',\n",
" 'NC_016947.1 Mycobacterium intracellulare MOTT-02, complete genome',\n",
" 'NC_017049.1 Rickettsia prowazekii str. Chernikova, complete genome',\n",
" 'NZ_CP007066.1 Fusobacterium nucleatum subsp. vincentii 3_1_27 plasmid, complete sequence',\n",
" 'NZ_CP007064.1 Fusobacterium nucleatum subsp. vincentii 3_1_27 chromosome 1, complete sequence',\n",
" 'NZ_CP007065.1 Fusobacterium nucleatum subsp. vincentii 3_1_27 chromosome 2, complete sequence',\n",
" 'NZ_CP008742.1 Pseudomonas savastanoi pv. savastanoi NCPPB 3335, complete genome',\n",
" 'NC_015678.1 Streptococcus parasanguinis ATCC 15912, complete genome',\n",
" 'NC_014643.1 Rothia dentocariosa ATCC 17931, complete genome',\n",
" 'NC_014618.1 Enterobacter lignolyticus SCF1, complete genome',\n",
" 'NC_014625.1 Ketogulonicigenium vulgare Y25, complete genome',\n",
" 'NC_014621.1 Ketogulonicigenium vulgare Y25 plasmid pYP1, complete sequence',\n",
" 'NC_014626.1 Ketogulonicigenium vulgare Y25 plasmid pYP12, complete sequence',\n",
" 'NC_017161.1 Hydrogenobacter thermophilus TK-6, complete genome',\n",
" 'NC_014616.1 Bifidobacterium bifidum S17, complete genome',\n",
" 'NC_014622.2 Paenibacillus polymyxa SC2, complete genome',\n",
" 'NC_014628.2 Paenibacillus polymyxa SC2 plasmid pSC2, complete sequence',\n",
" 'NC_017455.1 Halanaerobium praevalens DSM 2228, complete genome',\n",
" 'NC_014623.1 Stigmatella aurantiaca DW4/3-1, complete genome',\n",
" 'NC_014632.1 Ilyobacter polytropus DSM 2926, complete genome',\n",
" 'NC_014633.1 Ilyobacter polytropus DSM 2926 plasmid pILYOP01, complete sequence',\n",
" 'NC_014634.1 Ilyobacter polytropus DSM 2926 plasmid pILYOP02, complete sequence',\n",
" 'NC_017451.1 Haemophilus influenzae R2866, complete genome',\n",
" 'NC_017452.1 Haemophilus influenzae R2846, complete genome',\n",
" 'NC_015174.1 Rubinisphaera brasiliensis DSM 5305, complete genome',\n",
" 'NC_015214.1 Lactobacillus amylovorus strain 30SC, complete genome',\n",
" 'NC_015213.1 Lactobacillus amylovorus strain 30SC plasmid pRKC30SC1, complete sequence',\n",
" 'NC_015218.1 Lactobacillus amylovorus strain 30SC plasmid pRKC30SC2, complete sequence',\n",
" 'NC_017569.1 Riemerella anatipestifer RA-GD, complete genome',\n",
" 'NC_017295.1 Clostridium acetobutylicum EA 2018, complete genome',\n",
" 'NC_017296.1 Clostridium acetobutylicum EA 2018 EA2018plasmid, complete sequence',\n",
" 'NC_015470.1 Chlamydia psittaci 6BC, complete genome',\n",
" 'NC_015217.1 Chlamydia psittaci 6BC plasmid p6BC, complete sequence',\n",
" 'NZ_CP019725.1 Xanthomonas perforans 91-118 chromosome, complete genome',\n",
" 'NC_015224.1 Yersinia enterocolitica subsp. palearctica 105.5R(r), complete genome',\n",
" 'NC_015475.1 Yersinia enterocolitica subsp. palearctica 105.5R(r) plasmid 105.5R(r)p, complete sequence',\n",
" 'NC_017374.1 Helicobacter pylori 2017, complete genome',\n",
" 'NC_017381.1 Helicobacter pylori 2018, complete genome',\n",
" 'NZ_CP017125.1 Mycoplasma capricolum subsp. capripneumoniae M1601 chromosome, complete genome',\n",
" 'NC_015508.1 Agrobacterium sp. H13-3 linear chromosome, complete sequence',\n",
" 'NC_015183.1 Agrobacterium sp. H13-3 circular chromosome, complete sequence',\n",
" 'NC_015184.1 Agrobacterium sp. H13-3 plasmid pAspH13-3a, complete sequence',\n",
" 'NC_017486.1 Lactococcus lactis subsp. lactis CV56, complete genome',\n",
" 'NC_017483.1 Lactococcus lactis subsp. lactis CV56 plasmid pCV56A, complete sequence',\n",
" 'NC_017487.1 Lactococcus lactis subsp. lactis CV56 plasmid pCV56B, complete sequence',\n",
" 'NC_017484.1 Lactococcus lactis subsp. lactis CV56 plasmid pCV56C, complete sequence',\n",
" 'NC_017485.1 Lactococcus lactis subsp. lactis CV56 plasmid pCV56D, complete sequence',\n",
" 'NC_017488.1 Lactococcus lactis subsp. lactis CV56 plasmid pCV56E, complete sequence',\n",
" 'NC_017244.1 Brucella melitensis M28 chromosome 1, complete sequence',\n",
" 'NC_017245.1 Brucella melitensis M28 chromosome 2, complete sequence',\n",
" 'NC_015259.1 Polymorphum gilvum SL003B-26A1, complete genome',\n",
" 'NC_015258.1 Polymorphum gilvum SL003B-26A1 plasmid pSL003B, complete sequence',\n",
" 'NC_015277.1 Sphingobacterium sp. 21, complete genome',\n",
" 'NC_016783.1 Corynebacterium diphtheriae INCA 402, complete genome',\n",
" 'NC_016800.1 Corynebacterium diphtheriae BH8, complete genome',\n",
" 'NC_017033.1 Frateuria aurantia DSM 6220, complete genome',\n",
" 'NC_019960.1 Prevotella dentalis DSM 3688 chromosome 1, complete sequence',\n",
" 'NC_019968.1 Prevotella dentalis DSM 3688 chromosome 2, complete sequence',\n",
" 'NC_019961.1 Prevotella dentalis DSM 3688 plasmid pPREDE01, complete sequence',\n",
" 'NC_019969.1 Prevotella dentalis DSM 3688 plasmid pPREDE02, complete sequence',\n",
" 'NC_019892.1 Singulisphaera acidiphila DSM 18658, complete genome',\n",
" 'NC_019893.1 Singulisphaera acidiphila DSM 18658 plasmid pSINAC01, complete sequence',\n",
" 'NC_019894.1 Singulisphaera acidiphila DSM 18658 plasmid pSINAC02, complete sequence',\n",
" 'NC_019895.1 Singulisphaera acidiphila DSM 18658 plasmid pSINAC03, complete sequence',\n",
" 'NC_017098.1 Spirochaeta africana DSM 8902, complete genome',\n",
" 'NC_017770.1 Solitalea canadensis DSM 3403, complete genome',\n",
" 'NC_016787.1 Corynebacterium diphtheriae HC03, complete genome',\n",
" 'NC_020272.1 Bacillus amyloliquefaciens IT-45, complete genome',\n",
" 'NC_020273.1 Bacillus amyloliquefaciens IT-45 plasmid pBA45-1, whole genome shotgun sequence',\n",
" 'NC_021171.1 Bacillus sp. 1NLA3E, complete genome',\n",
" 'NZ_CP007053.1 Opitutaceae bacterium TAV5, complete genome',\n",
" 'NZ_CP007054.1 Opitutaceae bacterium TAV5 plasmid, complete sequence',\n",
" 'NC_016745.1 Oceanimonas sp. GK1, complete genome',\n",
" 'NC_016746.1 Oceanimonas sp. GK1 plasmid pOCEGK01, complete sequence',\n",
" 'NC_016747.1 Oceanimonas sp. GK1 plasmid pOCEGK02, complete sequence',\n",
" 'NZ_CP007035.1 Niabella soli DSM 19437, complete genome',\n",
" 'NC_019903.1 Desulfitobacterium dichloroeliminans LMG P-21439, complete genome',\n",
" 'NC_018017.1 Desulfitobacterium dehalogenans ATCC 51507, complete genome',\n",
" 'NC_016791.1 Clostridium sp. BNL1100, complete genome',\n",
" 'NZ_CP006932.1 Candidatus Hepatoplasma crinochetorum Av, complete genome',\n",
" 'NZ_CP006829.1 Thalassolituus oleivorans R6-15, complete genome',\n",
" 'NZ_CP007122.1 Lactobacillus paracasei N1115, complete genome',\n",
" 'NZ_CP007123.1 Lactobacillus paracasei N1115 plasmid, complete sequence',\n",
" 'NZ_CP007124.1 Lactobacillus paracasei N1115 plasmid, complete sequence',\n",
" 'NZ_CP007125.1 Lactobacillus paracasei N1115 plasmid, complete sequence',\n",
" 'NZ_CP007126.1 Lactobacillus paracasei N1115 plasmid, complete sequence',\n",
" 'NC_022568.1 Listeria monocytogenes EGD, complete genome',\n",
" 'NZ_CP007244.1 Bacillus amyloliquefaciens subsp. plantarum TrigoCor1448, complete genome',\n",
" 'NZ_CP006742.1 Bacillus anthracis str. SVA11, complete genome',\n",
" 'NZ_CP006743.1 Bacillus anthracis str. SVA11 plasmid pXO1, complete sequence',\n",
" 'NZ_CP006744.1 Bacillus anthracis str. SVA11 plasmid pXO2, complete sequence',\n",
" 'NZ_CP007183.1 Campylobacter coli RM1875, complete genome',\n",
" 'NZ_CP007185.1 Campylobacter coli RM1875 plasmid pRM1875_2.4kb, complete sequence',\n",
" 'NZ_CP007186.1 Campylobacter coli RM1875 plasmid pRM1875_3.3kb, complete sequence',\n",
" 'NZ_CP007187.1 Campylobacter coli RM1875 plasmid pRM1875_3.4kbp, complete sequence',\n",
" 'NZ_CP007184.1 Campylobacter coli RM1875 plasmid pRM1875_35kb, complete sequence',\n",
" 'NZ_CP007181.1 Campylobacter coli RM4661, complete genome',\n",
" 'NZ_CP007182.1 Campylobacter coli RM4661 plasmid pRM4661_48kbp, complete sequence',\n",
" 'NZ_CP007179.1 Campylobacter coli RM5611, complete genome',\n",
" 'NZ_CP007180.1 Campylobacter coli RM5611 plasmid pRM5611_48kb, complete sequence',\n",
" 'NZ_CP004368.1 Burkholderia pseudomallei MSHR520 chromosome 1, complete sequence',\n",
" 'NZ_CP004369.1 Burkholderia pseudomallei MSHR520 chromosome 2, complete sequence',\n",
" 'NZ_CP004373.1 Gluconobacter oxydans DSM 3504, complete genome',\n",
" 'NZ_CP006571.1 Chlamydia avium 10DC88, complete genome',\n",
" 'NZ_CP006572.1 Chlamydia avium 10DC88 plasmid p10DC88, complete sequence',\n",
" 'NZ_CP007410.1 Pseudomonas brassicacearum strain DF41, complete genome',\n",
" 'NZ_CP007436.1 Bacillus pumilus strain MTCC B6033, complete genome',\n",
" 'NZ_CP007441.1 Pseudomonas stutzeri strain 28a24, complete genome',\n",
" 'NC_022904.2 Pandoraea pnomenusa 3kgm, complete genome',\n",
" 'NC_022198.1 Corynebacterium argentoratense DSM 44202, complete genome',\n",
" 'NC_022117.1 Chlamydia trachomatis strain F/1-93, complete genome',\n",
" 'NC_022118.1 Chlamydia trachomatis strain F/6-94, complete genome',\n",
" 'NC_022873.1 Bacillus thuringiensis YBT-1518, complete genome',\n",
" 'NC_020124.1 Bacillus thuringiensis YBT-1518 plasmid pBMB0228, complete sequence',\n",
" 'NC_022874.1 Bacillus thuringiensis YBT-1518 plasmid pBMB0229, complete sequence',\n",
" 'NC_022875.1 Bacillus thuringiensis YBT-1518 plasmid pBMB0230, complete sequence',\n",
" 'NC_022876.1 Bacillus thuringiensis YBT-1518 plasmid pBMB0231, complete sequence',\n",
" 'NC_022877.1 Bacillus thuringiensis YBT-1518 plasmid pBMB0232, complete sequence',\n",
" 'NC_022882.1 Bacillus thuringiensis YBT-1518 plasmid pBMB0233, complete sequence',\n",
" 'NC_022886.1 Helicobacter pylori BM012A, complete genome',\n",
" 'NC_022911.1 Helicobacter pylori BM012S, complete genome',\n",
" 'NZ_CP017152.1 Acinetobacter baumannii DU202, complete genome',\n",
" 'NZ_CP010979.1 Pseudomonas putida S13.1.2, complete genome',\n",
" 'NC_022900.1 Zymomonas mobilis subsp. mobilis str. CP4 = NRRL B-14023, complete genome',\n",
" 'NC_022901.1 Zymomonas mobilis subsp. mobilis str. CP4 = NRRL B-14023 plasmid, complete sequence',\n",
" 'NC_022913.1 Zymomonas mobilis subsp. mobilis str. CP4 = NRRL B-14023 plasmid, complete sequence',\n",
" 'NC_022902.1 Zymomonas mobilis subsp. mobilis str. CP4 = NRRL B-14023 plasmid, complete sequence',\n",
" 'NC_022903.1 Zymomonas mobilis subsp. mobilis str. CP4 = NRRL B-14023 plasmid, complete sequence',\n",
" 'NC_022910.1 Zymomonas mobilis subsp. mobilis str. CP4 = NRRL B-14023 plasmid, complete sequence',\n",
" 'NC_022909.1 Lactobacillus johnsonii N6.2, complete genome',\n",
" 'NZ_CP009365.1 Pseudomonas soli strain SJ10, complete genome',\n",
" 'NC_022964.1 Dehalococcoides mccartyi GY50, complete genome',\n",
" 'NZ_HG738867.1 Escherichia coli str. K-12 substr. MC4100 complete genome',\n",
" 'NC_022535.1 Rhizobium sp. IRBG74 circular chromosome, complete genome',\n",
" 'NC_022545.1 Rhizobium sp. IRBG74 linear chromosome, complete genome',\n",
" 'NC_022536.1 Rhizobium sp. IRBG74 plasmid IRBL74_p, complete sequence',\n",
" 'NZ_LN794217.1 Rickettsia monacensis, strain IrR/Munich, complete genome',\n",
" 'NZ_CP011913.1 Corynebacterium ulcerans FRC58, complete genome',\n",
" 'NC_019896.1 Bacillus subtilis subsp. subtilis str. BSP1, complete genome',\n",
" 'NC_019902.2 Thioalkalivibrio nitratireducens DSM 14787, complete genome',\n",
" 'NC_019908.1 Brachyspira pilosicoli P43/6/78, complete genome',\n",
" 'NC_019904.1 Echinicola vietnamensis DSM 17526, complete genome',\n",
" 'NC_019905.1 Pseudomonas putida HB3267, complete genome',\n",
" 'NC_019906.1 Pseudomonas putida HB3267 plasmid pPC9, complete sequence',\n",
" 'NC_019907.1 Liberibacter crescens BT-1, complete genome',\n",
" 'NC_019940.1 Thioflavicoccus mobilis 8321, complete genome',\n",
" 'NC_019941.1 Thioflavicoccus mobilis 8321 plasmid pTHIMO01, complete sequence',\n",
" 'NC_019936.1 Pseudomonas stutzeri RCH2, complete genome',\n",
" 'NC_019937.1 Pseudomonas stutzeri RCH2 plasmid pPSEST01, complete sequence',\n",
" 'NC_019938.1 Pseudomonas stutzeri RCH2 plasmid pPSEST02, complete sequence',\n",
" 'NC_019939.1 Pseudomonas stutzeri RCH2 plasmid pPSEST03, complete sequence',\n",
" 'NC_019955.1 Vibrio parahaemolyticus BB22OP chromosome 1, complete sequence',\n",
" 'NC_019971.1 Vibrio parahaemolyticus BB22OP chromosome 2, complete sequence',\n",
" 'NC_019970.1 Thermoanaerobacterium thermosaccharolyticum M0795, complete genome',\n",
" 'NC_019956.1 Thermoanaerobacterium thermosaccharolyticum M0795 plasmid pTHETHE01, complete sequence',\n",
" 'NC_019966.1 Mycobacterium sp. JS623, complete genome',\n",
" 'NC_019957.1 Mycobacterium sp. JS623 plasmid pMYCSM01, complete sequence',\n",
" 'NC_019958.1 Mycobacterium sp. JS623 plasmid pMYCSM02, complete sequence',\n",
" 'NC_019959.1 Mycobacterium sp. JS623 plasmid pMYCSM03, complete sequence',\n",
" 'NC_019978.1 Halobacteroides halobius DSM 5150, complete genome',\n",
" 'NC_019673.1 Saccharothrix espanaensis DSM 44229 complete genome',\n",
" 'NC_019949.1 Mycoplasma cynos C142 complete genome',\n",
" 'NC_019954.2 Tepidanaerobacter acetatoxydans Re1 complete genome',\n",
" 'NZ_CP007528.1 Salmonella enterica subsp. enterica serovar Enteritidis str. CDC_2010K_0968, complete genome',\n",
" 'NZ_CP007529.1 Salmonella enterica subsp. enterica serovar Enteritidis str. CDC_2010K_0968 plasmid00, complete sequence',\n",
" 'NZ_CP012347.1 Salmonella enterica subsp. enterica serovar Pullorum str. ATCC 9120, complete genome',\n",
" 'NZ_CP012348.1 Salmonella enterica subsp. enterica serovar Pullorum str. ATCC 9120 plasmid pCFSAN000725_01, complete sequence',\n",
" 'NZ_CP007255.1 Rhodococcus erythropolis R138, complete genome',\n",
" 'NZ_CP007256.1 Rhodococcus erythropolis R138 plasmid pCRE138, complete sequence',\n",
" 'NZ_CP007257.1 Rhodococcus erythropolis R138 plasmid pLRE138, complete sequence',\n",
" 'NZ_CP015880.1 Ensifer adhaerens strain Casida A chromosome, complete genome',\n",
" 'NZ_CP015881.1 Ensifer adhaerens strain Casida A plasmid pCasidaAA, complete sequence',\n",
" 'NZ_CP015882.1 Ensifer adhaerens strain Casida A plasmid pCasidaAB, complete sequence',\n",
" 'NZ_CP007691.1 Brucella suis bv. 2 strain PT09143 chromosome I, complete sequence',\n",
" 'NZ_CP007692.1 Brucella suis bv. 2 strain PT09143 chromosome II, complete sequence',\n",
" 'NZ_CP007693.1 Brucella suis bv. 2 strain PT09172 chromosome I, complete sequence',\n",
" 'NZ_CP007694.1 Brucella suis bv. 2 strain PT09172 chromosome II, complete sequence',\n",
" 'NZ_CP007697.1 Brucella suis bv. 2 strain Bs364CITA chromosome I, complete sequence',\n",
" 'NZ_CP007698.1 Brucella suis bv. 2 strain Bs364CITA chromosome II, complete sequence',\n",
" 'NZ_CP007720.1 Brucella suis bv. 2 strain Bs396CITA chromosome I, complete sequence',\n",
" 'NZ_CP007721.1 Brucella suis bv. 2 strain Bs396CITA chromosome II, complete sequence',\n",
" 'NZ_CP007805.1 Haemophilus influenzae CGSHiCZ412602, complete genome',\n",
" 'NZ_CP007803.1 Mycobacterium tuberculosis K, complete genome',\n",
" 'NZ_CP006696.1 Xylella fastidiosa subsp. sandyi Ann-1, complete genome',\n",
" 'NZ_CP006697.1 Xylella fastidiosa subsp. sandyi Ann-1 plasmid unnamed1, complete sequence',\n",
" 'NZ_CP006740.1 Xylella fastidiosa MUL0034, complete genome',\n",
" 'NZ_CP006739.1 Xylella fastidiosa MUL0034 plasmid unnamed2, complete sequence',\n",
" 'NZ_CP006986.1 Rhizobium sp. IE4771 chromosome, complete genome',\n",
" 'NZ_CP006987.1 Rhizobium sp. IE4771 plasmid pRetIE4771a, complete sequence',\n",
" 'NZ_CP006988.1 Rhizobium sp. IE4771 plasmid pRetIE4771b, complete sequence',\n",
" 'NZ_CP006989.1 Rhizobium sp. IE4771 plasmid pRetIE4771c, complete sequence',\n",
" 'NZ_CP006990.1 Rhizobium sp. IE4771 plasmid pRetIE4771d, complete sequence',\n",
" 'NZ_CP006991.1 Rhizobium sp. IE4771 plasmid pRetIE4771e, complete sequence',\n",
" 'NZ_CP008696.1 Pseudomonas chlororaphis strain PA23, complete genome',\n",
" 'NZ_CP006819.1 Streptococcus thermophilus ASCC 1275, complete genome',\n",
" 'NZ_CP007695.1 Brucella suis bv. 2 strain Bs143CITA chromosome I, complete sequence',\n",
" 'NZ_CP007696.1 Brucella suis bv. 2 strain Bs143CITA chromosome II, complete sequence',\n",
" 'NC_020533.1 Staphylococcus aureus subsp. aureus ST228 complete genome, isolate 16035',\n",
" 'NC_020534.1 Staphylococcus aureus subsp. aureus ST228 plasmid pI4T8 complete sequence, isolate 16035',\n",
" 'NC_020566.1 Staphylococcus aureus subsp. aureus ST228 complete genome, isolate 16125',\n",
" 'NC_020535.1 Staphylococcus aureus subsp. aureus ST228 plasmid pI5S5 complete sequence, isolate 16125',\n",
" 'NC_020568.1 Staphylococcus aureus subsp. aureus ST228 complete genome, isolate 18583',\n",
" 'NC_020539.1 Staphylococcus aureus subsp. aureus ST228 plasmid pI8T7 complete sequence, isolate 18583',\n",
" 'NC_021173.1 Burkholderia thailandensis MSMB121 chromosome 1, complete sequence',\n",
" 'NC_021174.1 Burkholderia thailandensis MSMB121 chromosome 2, complete sequence',\n",
" 'NC_021176.1 Salmonella enterica subsp. enterica serovar Typhi str. Ty21a, complete genome',\n",
" 'NC_021175.1 Streptococcus cristatus AS 1.3089, complete genome',\n",
" 'NC_021177.1 Streptomyces fulvissimus DSM 40593, complete sequence',\n",
" 'NZ_CP010539.1 Mycoplasma pneumoniae 19294, complete genome',\n",
" 'NC_021182.1 Clostridium pasteurianum BC1, complete genome',\n",
" 'NC_021183.1 Clostridium pasteurianum BC1 plasmid pCLOPA01, complete sequence',\n",
" 'NC_021181.2 Lactobacillus acidophilus La-14, complete genome',\n",
" 'NC_021194.1 Mycobacterium tuberculosis EAI5/NITR206, complete genome',\n",
" 'NC_021191.1 Actinoplanes sp. N902-109, complete genome',\n",
" 'NC_021200.1 Mycobacterium avium subsp. paratuberculosis MAP4, complete genome',\n",
" 'NC_021213.1 Streptococcus suis TL13, complete genome',\n",
" 'NZ_CP009144.1 Sinorhizobium meliloti strain RMO17, complete genome',\n",
" 'NZ_CP009145.1 Sinorhizobium meliloti strain RMO17 plasmid pSymA, complete sequence',\n",
" 'NZ_CP009146.1 Sinorhizobium meliloti strain RMO17 plasmid pSymB, complete sequence',\n",
" 'NZ_CP009215.1 Corynebacterium ureicelerivorans strain IMMIB RIV-2301, complete genome',\n",
" 'NZ_CP009216.1 Corynebacterium ureicelerivorans strain IMMIB RIV-2301 plasmid unnamed, complete sequence',\n",
" 'NZ_CP008853.1 Bacillus anthracis strain Cvac02, complete genome',\n",
" 'NZ_CP008855.1 Bacillus sp. X1(2014), complete genome',\n",
" 'NZ_CP009278.1 Sphingobacterium sp. ML3W, complete genome',\n",
" 'NZ_CP007607.1 Bacillus thuringiensis serovar kurstaki str. YBT-1520, complete genome',\n",
" 'NZ_CP007612.1 Bacillus thuringiensis serovar kurstaki str. YBT-1520 plasmid pBMB11, complete sequence',\n",
" 'NZ_CP007608.1 Bacillus thuringiensis serovar kurstaki str. YBT-1520 plasmid pBMB2062, complete sequence',\n",
" 'NZ_CP007615.1 Bacillus thuringiensis serovar kurstaki str. YBT-1520 plasmid pBMB293, complete sequence',\n",
" 'NZ_CP007616.1 Bacillus thuringiensis serovar kurstaki str. YBT-1520 plasmid pBMB400, complete sequence',\n",
" 'NZ_CP007613.1 Bacillus thuringiensis serovar kurstaki str. YBT-1520 plasmid pBMB69, complete sequence',\n",
" 'NZ_CP007609.1 Bacillus thuringiensis serovar kurstaki str. YBT-1520 plasmid pBMB7921, complete sequence',\n",
" 'NZ_CP007610.1 Bacillus thuringiensis serovar kurstaki str. YBT-1520 plasmid pBMB8240, complete sequence',\n",
" 'NZ_CP007611.1 Bacillus thuringiensis serovar kurstaki str. YBT-1520 plasmid pBMB8513, complete sequence',\n",
" 'NZ_CP007614.1 Bacillus thuringiensis serovar kurstaki str. YBT-1520 plasmid pBMB95, complete sequence',\n",
" 'NZ_CP003424.1 Serratia sp. SCBI, complete genome',\n",
" 'NZ_CP003425.1 Serratia sp. SCBI plasmid SCBI_Pl, complete sequence',\n",
" 'NZ_CP006683.1 Melissococcus plutonius S1, complete genome',\n",
" 'NZ_CP006684.1 Melissococcus plutonius S1 plasmid pMEPL_178, complete sequence',\n",
" 'NZ_CP009239.1 Cellulophaga lytica strain HI1, complete genome',\n",
" 'NZ_CP009089.1 Salmonella enterica subsp. enterica serovar Enteritidis strain OLF-SE7-100819, complete genome',\n",
" 'NZ_CP008897.1 Enterobacter cloacae ECNIH3, complete genome',\n",
" 'NZ_CP008898.1 Enterobacter cloacae ECNIH3 plasmid pENT-576, complete sequence',\n",
" 'NZ_CP008899.1 Enterobacter cloacae ECNIH3 plasmid pENT-8a4, complete sequence',\n",
" 'NZ_CP008900.1 Enterobacter cloacae ECNIH3 plasmid pENT-d4a, complete sequence',\n",
" 'NZ_CP008901.1 Enterobacter cloacae ECNIH3 plasmid pKPC-47e, complete sequence',\n",
" 'NZ_CP009090.1 Salmonella enterica subsp. enterica serovar Enteritidis strain OLF-SE8-1021710, complete genome',\n",
" 'NZ_CP008905.1 Enterobacter cloacae ECR091, complete genome',\n",
" 'NZ_CP008906.1 Enterobacter cloacae ECR091 plasmid pENT-08e, complete sequence',\n",
" 'NZ_CP008907.1 Enterobacter cloacae ECR091 plasmid pENT-4bd, complete sequence',\n",
" 'NZ_CP008908.1 Enterobacter cloacae ECR091 plasmid pKPC-47e, complete sequence',\n",
" 'NZ_CP009091.1 Salmonella enterica subsp. enterica serovar Enteritidis strain OLF-SE9-10012, complete genome',\n",
" 'NZ_CP008943.1 Yersinia pseudotuberculosis strain ATCC 6904, complete genome',\n",
" 'NZ_CP009092.1 Salmonella enterica subsp. enterica serovar Enteritidis strain OLF-SE10-10052, complete genome',\n",
" 'NZ_CP009093.1 Salmonella enterica subsp. enterica serovar Enteritidis strain OLF-SE11-10058, complete genome',\n",
" 'NZ_CP009085.2 Salmonella enterica subsp. enterica serovar Enteritidis strain OLF-SE3-98983-4, complete genome',\n",
" 'NZ_CP009084.2 Salmonella enterica subsp. enterica serovar Enteritidis strain OLF-SE2-98984-6, complete genome',\n",
" 'NZ_CP007229.1 Mycoplasma dispar strain ATCC 27140, complete genome',\n",
" 'NZ_CP016174.1 Amycolatopsis orientalis strain B-37, complete genome',\n",
" 'NZ_CP010954.1 Sphingobium sp. YBL2, complete genome',\n",
" 'NZ_CP010955.1 Sphingobium sp. YBL2 plasmid 1pYBL2-1, complete sequence',\n",
" 'NZ_CP010956.1 Sphingobium sp. YBL2 plasmid 2pYBL2-2, complete sequence',\n",
" 'NZ_CP010957.1 Sphingobium sp. YBL2 plasmid 3pYBL2-3, complete sequence',\n",
" 'NZ_CP010958.1 Sphingobium sp. YBL2 plasmid 4pYBL2-4, complete sequence',\n",
" 'NZ_CP010959.1 Sphingobium sp. YBL2 plasmid 5pYBL2-5, complete sequence',\n",
" 'NZ_CP010960.1 Sphingobium sp. YBL2 plasmid 6pYBL2-6, complete sequence',\n",
" 'NZ_CP009677.1 Mycoplasma hominis strain AF1, complete genome',\n",
" 'NZ_CP007166.1 Xanthomonas oryzae pv. oryzae PXO86, complete genome',\n",
" 'NZ_CP010976.1 Paenibacillus sp. IHBB 10380, complete genome',\n",
" 'NZ_CP010977.1 Paenibacillus sp. IHBB 10380 plasmid, complete sequence',\n",
" 'NZ_CP011005.1 Arthrobacter sp. IHBB 11108, complete genome',\n",
" 'NZ_CP011006.1 Arthrobacter sp. IHBB 11108 plasmid pAG001, complete sequence',\n",
" 'NZ_CP011004.1 Listeria monocytogenes strain N2306, complete genome',\n",
" 'NZ_CP007699.2 Streptomyces lydicus strain A02 chromosome, complete genome',\n",
" 'NZ_LN680001.1 Bacillus sp. BS34A genome assembly, chromosome: I',\n",
" 'NZ_LM995445.1 Mycoplasma capricolum subsp. capripneumoniae genome assembly 9231-Abomsa assembly V8, chromosome : I',\n",
" 'NZ_LN554882.1 Chlamydophila abortus genome assembly CAAB7, chromosome : 1',\n",
" 'NZ_LM995446.1 Escherichia coli genome assembly EcRV308Chr, chromosome : 1',\n",
" 'NZ_LM997412.1 Peptoniphilus sp. ING2-D1G genome assembly, chromosome: I',\n",
" 'NZ_LN794158.1 Candidatus Methylopumilus turicensis genome assembly Candidatus Methylopumilus turicensis MMS-10A-171, chromosome : 1',\n",
" 'NZ_LM651928.1 Shigella flexneri genome assembly NCTC1, chromosome : 1',\n",
" 'NZ_LK936442.1 Burkholderia pseudomallei genome assembly BP_3921g, chromosome : 1',\n",
" 'NZ_LK936443.1 Burkholderia pseudomallei genome assembly BP_3921g, chromosome : 2',\n",
" 'NZ_LN614827.1 Legionella fallonii LLAP-10 genome assembly LFA, chromosome : I',\n",
" 'NZ_LN614828.1 Legionella fallonii LLAP-10 genome assembly LFA, plasmid : II',\n",
" 'NZ_LN614829.1 Legionella fallonii LLAP-10 genome assembly LFA, plasmid : III',\n",
" 'NZ_CP007686.1 Listeria monocytogenes strain L2624, complete genome',\n",
" 'NZ_CP007685.1 Listeria monocytogenes strain L2676, complete genome',\n",
" 'NZ_CP011511.1 Shigella boydii strain ATCC 9210, complete genome',\n",
" 'NZ_CP007684.1 Listeria monocytogenes strain L2626, complete genome',\n",
" 'NZ_CP011501.1 Pandoraea apista strain AU2161, complete genome',\n",
" 'NZ_CP011509.1 Archangium gephyra strain DSM 2261, complete genome',\n",
" 'NZ_CP009498.1 Endomicrobium proavitum strain Rsa215, complete genome',\n",
" 'NZ_CP011827.2 Xanthomonas citri pv. citri strain jx-6, complete genome',\n",
" 'NZ_CP013665.1 Xanthomonas citri pv. citri strain jx-6 plasmid pXAC33, complete sequence',\n",
" 'NZ_CP013664.1 Xanthomonas citri pv. citri strain jx-6 plasmid pXAC64, complete sequence',\n",
" 'NZ_CP011805.1 Altererythrobacter marensis strain KCTC 22370, complete genome',\n",
" 'NZ_CP007601.1 Staphylococcus capitis subsp. capitis strain AYP1020, complete genome',\n",
" 'NZ_CP007602.1 Staphylococcus capitis subsp. capitis strain AYP1020 plasmid pAYP1020, complete sequence',\n",
" 'NZ_CP011503.1 Burkholderia pyrrocinia strain DSM 10685 chromosome 1, complete sequence',\n",
" 'NZ_CP011504.1 Burkholderia pyrrocinia strain DSM 10685 chromosome 2, complete sequence',\n",
" 'NZ_CP011505.1 Burkholderia pyrrocinia strain DSM 10685 chromosome 3, complete sequence',\n",
" 'NZ_CP011506.1 Burkholderia pyrrocinia strain DSM 10685 plasmid p2327, complete sequence',\n",
" 'NZ_CP011770.1 Croceicoccus naphthovorans strain PQ-2, complete genome',\n",
" 'NZ_CP011771.1 Croceicoccus naphthovorans strain PQ-2 plasmid p1, complete sequence',\n",
" 'NZ_CP011772.1 Croceicoccus naphthovorans strain PQ-2 plasmid p2, complete sequence',\n",
" 'NZ_CP011807.3 Pandoraea faecigallinarum strain DSM 23572, complete genome',\n",
" 'NZ_CP011808.2 Pandoraea faecigallinarum strain DSM 23572 plasmid pPF72-1, complete sequence',\n",
" 'NZ_CP011809.2 Pandoraea faecigallinarum strain DSM 23572 plasmid pPF72-2, complete sequence',\n",
" 'NZ_CP006636.1 Escherichia coli PCN061, complete genome',\n",
" 'NZ_CP006637.1 Escherichia coli PCN061 plasmid PCN061p1, complete sequence',\n",
" 'NZ_CP006638.1 Escherichia coli PCN061 plasmid PCN061p2, complete sequence',\n",
" 'NZ_CP006639.1 Escherichia coli PCN061 plasmid PCN061p3, complete sequence',\n",
" 'NZ_CP006640.1 Escherichia coli PCN061 plasmid PCN061p4, complete sequence',\n",
" 'NZ_CP006641.1 Escherichia coli PCN061 plasmid PCN061p5, complete sequence',\n",
" 'NZ_CP006642.1 Escherichia coli PCN061 plasmid PCN061p6, complete sequence',\n",
" 'NZ_CP009743.1 Burkholderia contaminans strain MS14 chromosome 1, complete sequence',\n",
" 'NZ_CP009744.1 Burkholderia contaminans strain MS14 chromosome 2, complete sequence',\n",
" 'NZ_CP009745.1 Burkholderia contaminans strain MS14 chromosome 3, complete sequence',\n",
" 'NZ_CP011855.1 Spiroplasma atrichopogonis strain GNAT3597, complete genome',\n",
" 'NZ_CP011856.1 Spiroplasma eriocheiris strain DSM 21848, complete genome',\n",
" 'NZ_CP011798.1 Enterobacter cloacae strain UW5, complete genome',\n",
" 'NZ_CP007667.1 Neisseria meningitidis strain B6116/77, complete genome',\n",
" 'NZ_CP007668.1 Neisseria meningitidis M0579, complete genome',\n",
" 'NZ_CP011882.1 Bacillus subtilis strain TO-A JPC, complete genome',\n",
" 'NZ_CP011804.1 Clostridium carboxidivorans P7 plasmid, complete sequence',\n",
" 'NZ_CP011803.1 Clostridium carboxidivorans P7, complete genome',\n",
" 'NZ_CP005969.1 Pseudomonas syringae pv. syringae B301D, complete genome',\n",
" 'NZ_CP007584.2 Salmonella enterica subsp. enterica serovar Anatum str. USDA-ARS-USMARC-1735, complete genome',\n",
" 'NZ_CP014707.1 Salmonella enterica subsp. enterica serovar Anatum strain USMARC-1735 plasmid pSAN1-1735, complete sequence',\n",
" 'NZ_CP006003.1 Myxococcus fulvus 124B02, complete genome',\n",
" 'NZ_CP011374.1 Moraxella bovoculi strain 58069, complete genome',\n",
" 'NZ_CP011375.1 Moraxella bovoculi strain 58069 plasmid, complete sequence',\n",
" 'NZ_CP011376.1 Moraxella bovoculi strain 22581, complete genome',\n",
" 'NZ_CP011377.1 Moraxella bovoculi strain 23343, complete genome',\n",
" 'NZ_CP011378.1 Moraxella bovoculi strain 28389, complete genome',\n",
" 'NZ_CP011379.1 Moraxella bovoculi strain 33362, complete genome',\n",
" 'NZ_CP011380.2 Moraxella bovoculi strain 57922, complete genome',\n",
" 'NZ_CP011365.1 Salmonella enterica subsp. enterica serovar Typhimurium strain FORC_015, complete genome',\n",
" 'NZ_CP007497.1 Streptococcus suis strain ZY05719, complete genome',\n",
" 'NZ_CP007537.1 Streptococcus pyogenes strain AP1, complete genome',\n",
" 'NZ_CP009922.2 Streptomyces xiamenensis strain 318, complete genome',\n",
" 'NZ_CP011114.1 Paenibacillus durus ATCC 35681, complete genome',\n",
" 'NZ_CP011366.1 Salinicoccus halodurans strain H3B36, complete genome',\n",
" 'NZ_CP011398.2 Listeria monocytogenes strain CFSAN008100 chromosome, complete genome',\n",
" 'NZ_CP011399.1 Listeria monocytogenes strain CFSAN008100 plasmid pCFSAN008100, complete sequence',\n",
" 'NZ_AP012555.1 Mycobacterium avium subsp. hominissuis TH135 chromosomal DNA, complete genome',\n",
" 'NZ_AP012556.1 Mycobacterium avium subsp. hominissuis TH135 plasmid pMAH135 DNA, complete genome',\n",
" 'NZ_AP013354.1 Helicobacter pylori 26695-1 DNA, complete genome',\n",
" 'NZ_AP013356.1 Helicobacter pylori 26695-1CL DNA, complete genome',\n",
" 'NZ_AP013355.1 Helicobacter pylori 26695-1CH DNA, complete genome',\n",
" 'NZ_AP013293.1 Candidatus Sulcia muelleri PSPU DNA, complete genome',\n",
" 'NZ_AP014582.1 Bordetella bronchiseptica DNA, complete genome, strain: S798',\n",
" 'NZ_AP013294.1 Bacillus sp. OxB-1 DNA, complete genome',\n",
" 'NZ_AP014524.1 Vibrio cholerae MS6 DNA, complete genome, chromosome 1',\n",
" 'NZ_AP014525.1 Vibrio cholerae MS6 DNA, complete genome, chromosome 2',\n",
" 'NZ_AP012549.1 Cyanobacterium endosymbiont of Epithemia turgida isolate EtSB Lake Yunoko DNA, complete genome',\n",
" 'NZ_AP014646.1 Pseudomonas aeruginosa DNA, complete genome, strain: NCGM 1984',\n",
" 'NZ_AP014622.1 Pseudomonas aeruginosa DNA, complete genome, strain: NCGM 1900',\n",
" 'NZ_AP014658.1 Bifidobacterium longum DNA, complete genome, strain: 105-A',\n",
" 'NZ_AP013028.1 Wolbachia endosymbiont of Cimex lectularius DNA, complete genome',\n",
" 'NZ_CP026055.1 Aeromonas caviae strain FDAARGOS_72 chromosome, complete genome',\n",
" 'NZ_CP026052.1 Salmonella enterica strain FDAARGOS_70 chromosome, complete genome',\n",
" 'NZ_CP026053.1 Salmonella enterica strain FDAARGOS_70 plasmid unnamed1, complete sequence',\n",
" 'NZ_CP026054.1 Salmonella enterica strain FDAARGOS_70 plasmid unnamed2, complete sequence',\n",
" 'NZ_CP026051.1 Proteus mirabilis strain FDAARGOS_67 chromosome, complete genome',\n",
" 'NZ_CP026050.1 Serratia marcescens strain FDAARGOS_65 chromosome, complete genome',\n",
" 'NZ_CP026047.1 Raoultella planticola strain FDAARGOS_64 chromosome, complete genome',\n",
" 'NZ_CP026048.1 Raoultella planticola strain FDAARGOS_64 plasmid unnamed1, complete sequence',\n",
" 'NZ_CP026049.1 Raoultella planticola strain FDAARGOS_64 plasmid unnamed2, complete sequence',\n",
" 'NZ_CP026046.1 Morganella morganii strain FDAARGOS_63 chromosome, complete genome',\n",
" 'NZ_CP026045.1 Citrobacter freundii strain FDAARGOS_61 chromosome, complete genome',\n",
" 'NZ_CP026044.1 Proteus mirabilis strain FDAARGOS_60 chromosome, complete genome',\n",
" 'NZ_CP009850.1 Enterobacter cloacae strain ECNIH4, complete genome',\n",
" 'NZ_CP009851.1 Enterobacter cloacae strain ECNIH4 plasmid pENT-c88, complete sequence',\n",
" 'NZ_CP009852.1 Enterobacter cloacae strain ECNIH4 plasmid pENT-e56, complete sequence',\n",
" 'NZ_CP009853.1 Enterobacter cloacae strain ECNIH4 plasmid pKPC-860, complete sequence',\n",
" 'NZ_CP009880.1 Pantoea sp. PSNIH1, complete genome',\n",
" 'NZ_CP009881.1 Pantoea sp. PSNIH1 plasmid pKPC-1c5, complete sequence',\n",
" 'NZ_CP010325.1 Pantoea sp. PSNIH1 plasmid pPSP-057, complete sequence',\n",
" 'NZ_CP009882.1 Pantoea sp. PSNIH1 plasmid pPSP-26e, complete sequence',\n",
" 'NZ_CP010326.1 Pantoea sp. PSNIH1 plasmid pPSP-3a9, complete sequence',\n",
" 'NZ_CP009883.1 Pantoea sp. PSNIH1 plasmid pPSP-a3e, complete sequence',\n",
" 'NZ_CP009884.1 Pantoea sp. PSNIH1 plasmid pPSP-ee2, complete sequence',\n",
" 'NZ_CP009854.1 Enterobacter cloacae strain ECNIH5, complete genome',\n",
" 'NZ_CP009855.1 Enterobacter cloacae strain ECNIH5 plasmid pENT-22e, complete sequence',\n",
" 'NZ_CP009856.1 Enterobacter cloacae strain ECNIH5 plasmid pENT-784, complete sequence',\n",
" 'NZ_CP009857.1 Enterobacter cloacae strain ECNIH5 plasmid pENT-d0d, complete sequence',\n",
" 'NZ_CP009858.1 Enterobacter cloacae strain ECNIH5 plasmid pKPC-47e, complete sequence',\n",
" 'NZ_CP009859.1 Escherichia coli strain ECONIH1, complete genome',\n",
" 'NZ_CP009860.1 Escherichia coli strain ECONIH1 plasmid pECO-824, complete sequence',\n",
" 'NZ_CP009861.1 Escherichia coli strain ECONIH1 plasmid pECO-b75, complete sequence',\n",
" 'NZ_CP009862.1 Escherichia coli strain ECONIH1 plasmid pKPC-629, complete sequence',\n",
" 'NZ_CP009863.1 Klebsiella pneumoniae subsp. pneumoniae strain KPNIH29, complete genome',\n",
" 'NZ_CP009864.1 Klebsiella pneumoniae subsp. pneumoniae strain KPNIH29 plasmid pKPC-e4e, complete sequence',\n",
" 'NZ_CP009865.1 Klebsiella pneumoniae subsp. pneumoniae strain KPNIH29 plasmid pKPN-80a, complete sequence',\n",
" 'NZ_CP009866.1 Pantoea sp. PSNIH2, complete genome',\n",
" 'NZ_CP009867.1 Pantoea sp. PSNIH2 plasmid pKPC-56a, complete sequence',\n",
" 'NZ_CP009868.1 Pantoea sp. PSNIH2 plasmid pPSP-100, complete sequence',\n",
" 'NZ_CP009869.1 Pantoea sp. PSNIH2 plasmid pPSP-75c, complete sequence',\n",
" 'NZ_CP009870.1 Pantoea sp. PSNIH2 plasmid pPSP-b98, complete sequence',\n",
" 'NZ_CP009871.1 Pantoea sp. PSNIH2 plasmid pPSP-cd6, complete sequence',\n",
" 'NZ_CP012074.1 Prevotella fusca JCM 17724 strain W1435 chromosome 1, complete sequence',\n",
" 'NZ_CP012075.1 Prevotella fusca JCM 17724 strain W1435 chromosome 2, complete sequence',\n",
" 'NZ_CP012067.1 Aggregatibacter aphrophilus strain W10433, complete genome',\n",
" 'NZ_CP012072.1 Actinomyces meyeri strain W712, complete genome',\n",
" 'NZ_CP012073.1 Ottowia sp. oral taxon 894 strain W10237, complete genome',\n",
" 'NZ_CP012037.1 Francisella tularensis subsp. tularensis strain WY96, complete genome',\n",
" 'NZ_CP012328.1 Spiroplasma turonicum strain Tab4c, complete genome',\n",
" 'NZ_CP012332.1 Vulgatibacter incomptus strain DSM 27710, complete genome',\n",
" 'NZ_CP011786.1 Bifidobacterium actinocoloniiforme DSM 22766, complete genome',\n",
" 'NZ_CP011995.1 Porphyromonas gingivalis strain A7436, complete genome',\n",
" 'NZ_CP011339.1 Microcystis panniformis FACHB-1757, complete genome',\n",
" 'NZ_CP010319.1 Streptococcus agalactiae strain GBS85147, complete genome',\n",
" 'NZ_CP012299.1 Microbacterium sp. CGR1, complete genome',\n",
" 'NZ_CP011929.1 Marinobacter sp. CP1, complete genome',\n",
" 'NZ_CP012357.1 Spiroplasma litorale strain TN-1, complete genome',\n",
" 'NZ_CP012358.1 Oblitimonas alkaliphila strain B4199 chromosome, complete genome',\n",
" 'NZ_CP012359.1 Oblitimonas alkaliphila strain C6819 chromosome, complete genome',\n",
" 'NZ_CP012360.1 Oblitimonas alkaliphila strain C6918 chromosome, complete genome',\n",
" 'NZ_CP012362.1 Oblitimonas alkaliphila strain D3318 chromosome, complete genome',\n",
" 'NZ_CP012363.1 Oblitimonas alkaliphila strain E1086 chromosome, complete genome',\n",
" 'NZ_CP014773.1 Mucilaginibacter sp. PAMC 26640 chromosome, complete genome',\n",
" 'NZ_CP014772.1 Mucilaginibacter sp. PAMC 26640 plasmid unnamed, complete sequence',\n",
" 'NZ_CP014771.1 Hymenobacter sp. PAMC 26554 chromosome, complete genome',\n",
" 'NZ_CP014769.1 Hymenobacter sp. PAMC 26554 plasmid unnamed1, complete sequence',\n",
" 'NZ_CP014770.1 Hymenobacter sp. PAMC 26554 plasmid unnamed2, complete sequence',\n",
" 'NZ_CP014805.2 Elizabethkingia anophelis strain CSID_3015183678 chromosome, complete genome',\n",
" 'NZ_CP014858.1 Bacillus subtilis subsp. subtilis strain D12-5 chromosome, complete genome',\n",
" 'NZ_CP014783.1 Bacillus amyloliquefaciens strain B15 chromosome, complete genome',\n",
" 'NZ_CP014790.1 Listeria monocytogenes strain 2015TE24968 chromosome, complete genome',\n",
" 'NZ_CP015985.1 Listeria monocytogenes strain 2015TE24968 plasmid pl2015TE24968, complete sequence',\n",
" 'NZ_CP014840.1 Bacillus subtilis subsp. globigii strain ATCC 49760 chromosome, complete genome',\n",
" 'NZ_CP014784.1 Pseudomonas alcaligenes strain NEB 585, complete genome',\n",
" 'NZ_CP014835.1 Streptococcus halotolerans strain HTS9 chromosome, complete genome',\n",
" 'NZ_CP014856.1 Lysinibacillus sphaericus III(3)7, complete genome',\n",
" 'NZ_CP014857.1 Lysinibacillus sphaericus III(3)7 plasmid, complete sequence',\n",
" 'NZ_CP014847.1 Bacillus thuringiensis strain HD12, complete sequence',\n",
" 'NZ_CP014848.1 Bacillus thuringiensis strain HD12 plasmid pHD120017, complete sequence',\n",
" 'NZ_CP014849.1 Bacillus thuringiensis strain HD12 plasmid pHD120038, complete sequence',\n",
" 'NZ_CP014850.1 Bacillus thuringiensis strain HD12 plasmid pHD120039, complete sequence',\n",
" 'NZ_CP014851.1 Bacillus thuringiensis strain HD12 plasmid pHD120112, complete sequence',\n",
" 'NZ_CP014852.1 Bacillus thuringiensis strain HD12 plasmid pHD120161, complete sequence',\n",
" 'NZ_CP014853.1 Bacillus thuringiensis strain HD12 plasmid pHD120345, complete sequence',\n",
" 'NZ_CP014949.1 Enterococcus faecalis strain LD33, complete genome',\n",
" 'NZ_CP014991.1 Helicobacter himalayensis strain YS1, complete genome',\n",
" 'NZ_CP014352.1 Acidipropionibacterium acidipropionici strain ATCC 55737 chromosome, complete genome',\n",
" 'NZ_CP014353.1 Acidipropionibacterium acidipropionici strain ATCC 55737 plasmid unnamed, complete sequence',\n",
" 'NZ_CP014867.1 Pseudomonas chlororaphis isolate 189 chromosome, complete genome',\n",
" 'NZ_CP014861.1 Erysipelothrix rhusiopathiae strain GXBY-1 chromosome, complete genome',\n",
" 'NZ_CP014865.1 Rickettsia prowazekii strain Naples-1 chromosome, complete genome',\n",
" 'NZ_CP015004.1 Bacillus subtilis strain SZMC 6179J, complete genome',\n",
" 'NZ_CP015005.1 Aminobacter aminovorans strain KCTC 2477, complete genome',\n",
" 'NZ_CP015006.1 Aminobacter aminovorans strain KCTC 2477 plasmid pAA01, complete sequence',\n",
" 'NZ_CP015007.1 Aminobacter aminovorans strain KCTC 2477 plasmid pAA02, complete sequence',\n",
" 'NZ_CP015008.1 Aminobacter aminovorans strain KCTC 2477 plasmid pAA03, complete sequence',\n",
" 'NZ_CP015009.1 Aminobacter aminovorans strain KCTC 2477 plasmid pAA04, complete sequence',\n",
" 'NZ_CP010838.1 Bordetella pertussis strain H374, complete genome',\n",
" 'NZ_CP010839.1 Bordetella pertussis strain H378, complete genome',\n",
" 'NZ_CP010840.1 Bordetella pertussis strain H379, complete genome',\n",
" 'NZ_CP014029.2 Klebsiella aerogenes strain FDAARGOS_152 chromosome, complete genome',\n",
" 'NZ_CP014030.2 Citrobacter sp. FDAARGOS_156 strain FDAARGOS_155 chromosome, complete genome',\n",
" 'NZ_CP014031.2 Hafnia paralvei strain FDAARGOS_158 chromosome, complete genome',\n",
" 'NZ_CP014268.2 Escherichia coli B strain C2566, complete genome',\n",
" 'NZ_CP014269.1 Escherichia coli B strain C3029, complete genome',\n",
" 'NZ_CP014270.1 Escherichia coli K-12 strain DHB4, complete genome',\n",
" 'NZ_CP014271.1 Escherichia coli K-12 strain DHB4 plasmid F128-(DHB4), complete sequence',\n",
" 'NZ_CP014272.1 Escherichia coli K-12 strain C3026, complete genome',\n",
" 'NZ_CP014273.1 Escherichia coli K-12 strain C3026 plasmid F128-(C3026), complete sequence',\n",
" 'NZ_CP014326.1 Streptococcus mitis strain SVGS_061, complete genome',\n",
" 'NZ_CP014223.1 [Clostridium] propionicum DSM 1682, complete genome',\n",
" 'NZ_CP014279.1 Corynebacterium stationis strain ATCC 6872, complete genome',\n",
" 'NZ_CP013926.1 Alteromonas stellipolaris strain LMG 21861, complete genome',\n",
" 'NZ_CP013927.1 Alteromonas stellipolaris strain LMG 21861 plasmid pASTE61-200, complete sequence',\n",
" 'NZ_CP013932.1 Alteromonas sp. Mac1, complete genome',\n",
" 'NZ_CP014322.1 Alteromonas addita strain R10SW13, complete genome',\n",
" 'NZ_CP014274.1 Stenotrophomonas sp. KCTC 12332, complete genome',\n",
" 'NZ_CP014323.1 Alteromonas macleodii strain D7, complete genome',\n",
" 'NZ_CP014301.1 Bosea sp. PAMC 26642, complete genome',\n",
" 'NZ_CP014302.1 Bosea sp. PAMC 26642 plasmid, complete sequence',\n",
" 'NZ_CP014304.1 Hymenobacter sp. PAMC26628, complete genome',\n",
" 'NZ_CP014303.1 Hymenobacter sp. PAMC26628 plasmid, complete sequence',\n",
" 'NZ_CP013928.1 Alteromonas mediterranea strain UM8, complete genome',\n",
" 'NZ_CP013929.1 Alteromonas mediterranea strain UM8 plasmid pAMEDUM8_300, complete sequence',\n",
" 'NZ_CP014206.1 Pseudodesulfovibrio indicus strain J2 chromosome, complete genome',\n",
" 'NZ_CP005083.1 Sphingobium sp. TKS chromosome 1, complete sequence',\n",
" 'NZ_CP005084.1 Sphingobium sp. TKS chromosome 2, complete sequence',\n",
" 'NZ_CP005085.1 Sphingobium sp. TKS plasmid pTK1, complete sequence',\n",
" 'NZ_CP005086.1 Sphingobium sp. TKS plasmid pTK2, complete sequence',\n",
" 'NZ_CP005087.1 Sphingobium sp. TKS plasmid pTK3, complete sequence',\n",
" 'NZ_CP005088.1 Sphingobium sp. TKS plasmid pTK4, complete sequence',\n",
" 'NZ_CP005090.1 Sphingobium sp. TKS plasmid pTK6, complete sequence',\n",
" 'NZ_CP005091.1 Sphingobium sp. TKS plasmid pTK7, complete sequence',\n",
" 'NZ_CP005092.1 Sphingobium sp. TKS plasmid pTK8, complete sequence',\n",
" 'NZ_CP005093.1 Sphingobium sp. TKS plasmid pTK9, complete sequence',\n",
" 'NZ_CP005089.1 Sphingobium sp. TKS plasmid pTK5, complete sequence',\n",
" 'NZ_CP005188.1 Sphingobium sp. MI1205 chromosome 1, complete sequence',\n",
" 'NZ_CP005189.1 Sphingobium sp. MI1205 chromosome 2, complete sequence',\n",
" 'NZ_CP005190.1 Sphingobium sp. MI1205 plasmid pMI1, complete sequence',\n",
" 'NZ_CP005191.1 Sphingobium sp. MI1205 plasmid pMI2, complete sequence',\n",
" 'NZ_CP005192.1 Sphingobium sp. MI1205 plasmid pMI3, complete sequence',\n",
" 'NZ_CP005193.1 Sphingobium sp. MI1205 plasmid pMI4, complete sequence',\n",
" 'NZ_CP014004.1 Klebsiella pneumoniae subsp. pneumoniae strain NUHL24835, complete genome',\n",
" 'NZ_CP014005.1 Klebsiella pneumoniae subsp. pneumoniae strain NUHL24835 plasmid unnamed1, complete sequence',\n",
" 'NZ_CP014006.1 Klebsiella pneumoniae subsp. pneumoniae strain NUHL24835 plasmid unnamed2, complete sequence',\n",
" 'NZ_CP013357.1 Burkholderia oklahomensis EO147 chromosome 1, complete sequence',\n",
" 'NZ_CP013356.1 Burkholderia oklahomensis EO147 chromosome 2, complete sequence',\n",
" 'NZ_CP013358.1 Burkholderia oklahomensis C6786 chromosome 1, complete sequence',\n",
" 'NZ_CP013359.1 Burkholderia oklahomensis C6786 chromosome 2, complete sequence',\n",
" 'NZ_CP013373.1 Burkholderia sp. NRF60-BP8 chromosome 1, complete sequence',\n",
" 'NZ_CP013372.1 Burkholderia sp. NRF60-BP8 chromosome 2, complete sequence',\n",
" 'NZ_CP013374.1 Burkholderia sp. NRF60-BP8 chromosome 3, complete sequence',\n",
" 'NZ_CP013389.1 Burkholderia sp. BDU8 chromosome 1, complete sequence',\n",
" 'NZ_CP013388.1 Burkholderia sp. BDU8 chromosome 2, complete sequence',\n",
" 'NZ_CP013386.1 Burkholderia sp. BDU6 chromosome 1, complete sequence',\n",
" 'NZ_CP013387.1 Burkholderia sp. BDU6 chromosome 2, complete sequence',\n",
" 'NZ_CP013384.1 Burkholderia sp. LA-2-3-30-S1-D2 chromosome 1, complete sequence',\n",
" 'NZ_CP013383.1 Burkholderia sp. LA-2-3-30-S1-D2 chromosome 2, complete sequence',\n",
" 'NZ_CP013385.1 Burkholderia sp. LA-2-3-30-S1-D2 chromosome 3, complete sequence',\n",
" 'NZ_CP013417.1 Burkholderia sp. MSMB0266 chromosome 1, complete sequence',\n",
" 'NZ_CP013418.1 Burkholderia sp. MSMB0266 chromosome 2, complete sequence',\n",
" 'NZ_CP013419.1 Burkholderia sp. MSMB0266 plasmid pMSMB0266, complete sequence',\n",
" 'NZ_CP013424.1 Burkholderia sp. MSMB0852 chromosome 1, complete sequence',\n",
" 'NZ_CP013425.1 Burkholderia sp. MSMB0852 chromosome 2, complete sequence',\n",
" 'NZ_CP013423.1 Burkholderia sp. MSMB0852 plasmid pMSMB0852, complete sequence',\n",
" 'NZ_CP013427.1 Burkholderia sp. MSMB0856 chromosome 1, complete sequence',\n",
" 'NZ_CP013428.1 Burkholderia sp. MSMB0856 chromosome 2, complete sequence',\n",
" 'NZ_CP013429.1 Burkholderia sp. MSMB0856 chromosome 3, complete sequence',\n",
" 'NZ_CP013426.1 Burkholderia sp. MSMB0856 plasmid pMSMB0856, complete sequence',\n",
" 'NZ_CP014010.1 Klebsiella pneumoniae subsp. pneumoniae strain RJF999, complete genome',\n",
" 'NZ_CP014011.1 Klebsiella pneumoniae subsp. pneumoniae strain RJF999 plasmid pRJF999, complete sequence',\n",
" 'NZ_CP014008.1 Klebsiella pneumoniae subsp. pneumoniae strain RJF293, complete genome',\n",
" 'NZ_CP014009.1 Klebsiella pneumoniae subsp. pneumoniae strain RJF293 plasmid pRJF293, complete sequence',\n",
" 'NZ_CP013457.1 Burkholderia sp. MSMB617WGS chromosome 1, complete sequence',\n",
" 'NZ_CP013458.1 Burkholderia sp. MSMB617WGS chromosome 2, complete sequence',\n",
" 'NZ_CP013992.1 Flavobacterium columnare strain 94-081, complete genome',\n",
" 'NZ_CP013920.1 Arsenophonus symbiont of Lipoptena fortisetosa strain CB, complete genome',\n",
" 'NZ_CP013975.1 Piscirickettsia salmonis strain CGR02, complete genome',\n",
" 'NZ_CP013976.1 Piscirickettsia salmonis strain CGR02 plasmid pPSCRG02-1, complete sequence',\n",
" 'NZ_CP013977.1 Piscirickettsia salmonis strain CGR02 plasmid pPSCRG02-2, complete sequence',\n",
" 'NZ_CP013978.1 Piscirickettsia salmonis strain CGR02 plasmid pPSCRG02-4, complete sequence',\n",
" 'NZ_CP013997.1 Pseudomonas monteilii strain USDA-ARS-USMARC-56711, complete genome',\n",
" 'NC_022998.1 Spiroplasma apis B31, complete genome',\n",
" 'NC_023037.2 Paenibacillus polymyxa CR1, complete genome',\n",
" 'NZ_CP006579.1 Aeromonas hydrophila 4AK4, complete genome',\n",
" 'NZ_CP006859.1 Listeria monocytogenes serotype 1/2a str. 08-6997, complete genome',\n",
" 'NZ_CP003915.1 Advenella mimigardefordensis DPN7, complete genome',\n",
" 'NZ_CP003916.1 Advenella mimigardefordensis DPN7 plasmid 24p, complete sequence',\n",
" 'NZ_CP007128.1 Gemmatirosa kalamazoonesis strain KBS708, complete genome',\n",
" 'NZ_CP007129.1 Gemmatirosa kalamazoonesis strain KBS708 plasmid 1, complete sequence',\n",
" 'NZ_CP007127.1 Gemmatirosa kalamazoonesis strain KBS708 phage-like extrachromosal element, complete sequence',\n",
" 'NZ_CP007130.1 Gemmatirosa kalamazoonesis strain KBS708 plasmid 2, complete sequence',\n",
" 'NZ_CP006720.1 Spiroplasma mirum ATCC 29335 strain SMCA, complete genome',\n",
" 'NZ_CP006715.1 Bifidobacterium breve 689b, complete genome',\n",
" 'NZ_CP007230.1 Yersinia similis strain 228, complete genome',\n",
" 'NZ_CP007231.1 Yersinia similis strain 228 plasmid, complete sequence',\n",
" 'NC_022107.1 Chlamydia trachomatis F/11-96, complete genome',\n",
" 'NZ_CP007446.1 Snodgrassella alvi wkB2, complete genome',\n",
" 'NZ_CP012371.1 Nitrosospira briensis C-128, complete genome',\n",
" 'NZ_CP010493.1 Campylobacter jejuni strain CJ677CC062, complete genome',\n",
" 'NZ_CP010494.1 Campylobacter jejuni strain CJ677CC059, complete genome',\n",
" 'NZ_CP010496.1 Campylobacter jejuni strain CJ677CC032, complete genome',\n",
" 'NZ_CP010497.1 Campylobacter jejuni strain CJ677CC033, complete genome',\n",
" 'NZ_CP010498.1 Campylobacter jejuni strain CJ677CC537, complete genome',\n",
" 'NZ_CP010499.1 Campylobacter jejuni strain CJ677CC542, complete genome',\n",
" 'NZ_CP010500.1 Campylobacter jejuni strain CJ677CC528, complete genome',\n",
" 'NZ_CP010495.1 Campylobacter jejuni strain CJ677CC538, complete genome',\n",
" 'NZ_CP010501.1 Campylobacter jejuni strain CJ677CC520, complete genome',\n",
" 'NZ_CP010502.1 Campylobacter jejuni strain CJ677CC014, complete genome',\n",
" 'NZ_CP010503.1 Campylobacter jejuni strain CJ677CC039, complete genome',\n",
" 'NZ_CP010504.1 Campylobacter jejuni strain CJ677CC085, complete genome',\n",
" 'NZ_CP010505.1 Campylobacter jejuni strain CJ677CC052, complete genome',\n",
" 'NZ_CP010506.1 Campylobacter jejuni strain CJ677CC527, complete genome',\n",
" 'NZ_CP010507.1 Campylobacter jejuni strain CJ677CC078, complete genome',\n",
" 'NZ_CP010508.1 Campylobacter jejuni strain CJ677CC523, complete genome',\n",
" 'NZ_CP010509.1 Campylobacter jejuni strain CJ677CC540, complete genome',\n",
" 'NZ_CP010510.1 Campylobacter jejuni strain CJ677CC040, complete genome',\n",
" 'NZ_CP010511.1 Campylobacter jejuni strain CJ677CC061, complete genome',\n",
" 'NZ_CP010457.1 Campylobacter jejuni strain CJ677CC539, complete genome',\n",
" 'NZ_CP010458.1 Campylobacter jejuni strain CJ677CC533, complete genome',\n",
" 'NZ_CP010459.1 Campylobacter jejuni strain CJ677CC047, complete genome',\n",
" 'NZ_CP009802.1 Streptomyces sp. FR-008, complete genome',\n",
" 'NZ_CP009803.1 Streptomyces sp. FR-008 plasmid pSSFR1, complete sequence',\n",
" 'NZ_CP009804.1 Streptomyces sp. FR-008 plasmid pSSFR2, complete sequence',\n",
" 'NZ_CP012482.1 Bacillus cellulasensis strain NJ-V2, complete genome',\n",
" 'NZ_CP013114.1 Staphylococcus equorum strain KS1039, complete genome',\n",
" 'NZ_CP009847.1 Vibrio parahaemolyticus strain FORC_004 chromosome 1, complete sequence',\n",
" 'NZ_CP009848.1 Vibrio parahaemolyticus strain FORC_004 chromosome 2, complete sequence',\n",
" 'NZ_CP009849.1 Vibrio parahaemolyticus strain FORC_004 plasmid pFORC4, complete sequence',\n",
" 'NZ_CP009984.1 Vibrio vulnificus strain FORC_009 chromosome 1, complete sequence',\n",
" 'NZ_CP009985.1 Vibrio vulnificus strain FORC_009 chromosome 2, complete sequence',\n",
" 'NZ_CP012837.1 Corynebacterium pseudotuberculosis strain 1002B, complete genome',\n",
" 'NZ_CP012907.1 Helicobacter pylori strain 29CaP, complete genome',\n",
" 'NZ_CP012905.1 Helicobacter pylori strain 7C, complete sequence',\n",
" 'NZ_CP012906.1 Helicobacter pylori strain 7C plasmid, complete sequence',\n",
" 'NZ_CP013120.1 Alteromonas stellipolaris LMG 21856, complete genome',\n",
" 'NZ_CP013121.1 Fusobacterium nucleatum subsp. polymorphum strain ChDC F306, complete genome',\n",
" 'NZ_CP013122.1 Fusobacterium nucleatum subsp. polymorphum strain ChDC F306 plasmid unnamed1, complete sequence',\n",
" 'NZ_CP013123.1 Fusobacterium nucleatum subsp. polymorphum strain ChDC F306 plasmid unnamed2, complete sequence',\n",
" 'NZ_CP014140.1 Aneurinibacillus sp. XH2, complete genome',\n",
" 'NZ_CP013126.1 Acidipropionibacterium acidipropionici strain CGMCC 1.2230 chromosome, complete genome',\n",
" 'NZ_CP012921.1 Salmonella enterica subsp. enterica serovar Heidelberg strain SA02DT10168701, complete genome',\n",
" 'NZ_CP012922.1 Salmonella enterica subsp. enterica serovar Heidelberg strain SA02DT10168701 plasmid pSA02DT10168701_37, complete sequence',\n",
" 'NZ_CP012923.1 Salmonella enterica subsp. enterica serovar Heidelberg strain SA02DT10168701 plasmid pSA02DT10168701_99, complete sequence',\n",
" 'NZ_CP012924.1 Salmonella enterica subsp. enterica serovar Heidelberg strain 12-4374, complete genome',\n",
" 'NZ_CP012925.1 Salmonella enterica subsp. enterica serovar Heidelberg strain 12-4374 plasmid p12-4374_2, complete sequence',\n",
" 'NZ_CP012927.1 Salmonella enterica subsp. enterica serovar Heidelberg strain 12-4374 plasmid p12-4374_3, complete sequence',\n",
" 'NZ_CP012926.1 Salmonella enterica subsp. enterica serovar Heidelberg strain 12-4374 plasmid p12-4374_37, complete sequence',\n",
" 'NZ_CP012928.1 Salmonella enterica subsp. enterica serovar Heidelberg strain 12-4374 plasmid p12-4374_62, complete sequence',\n",
" 'NZ_CP012929.1 Salmonella enterica subsp. enterica serovar Heidelberg strain 12-4374 plasmid p12-4374_96, complete sequence',\n",
" 'NZ_CP012930.1 Salmonella enterica subsp. enterica serovar Heidelberg strain N13-01290, complete genome',\n",
" 'NZ_CP012932.1 Salmonella enterica subsp. enterica serovar Heidelberg strain N13-01290 plasmid pN13-01290_2, complete sequence',\n",
" 'NZ_CP012931.1 Salmonella enterica subsp. enterica serovar Heidelberg strain N13-01290 plasmid pN13-01290_23, complete sequence',\n",
" 'NZ_CP012933.1 Salmonella enterica subsp. enterica serovar Heidelberg strain N13-01290 plasmid pN13-01290_3-1, complete sequence',\n",
" 'NZ_CP012934.1 Salmonella enterica subsp. enterica serovar Heidelberg strain N13-01290 plasmid pN13-01290_3-2, complete sequence',\n",
" 'NZ_CP012935.1 Salmonella enterica subsp. enterica serovar Heidelberg strain N13-01290 plasmid pN13-01290_3-3, complete sequence',\n",
" 'NZ_CP012936.1 Salmonella enterica subsp. enterica serovar Heidelberg strain N13-01290 plasmid pN13-01290_98, complete sequence',\n",
" 'NZ_CP013133.1 Rickettsia rhipicephali strain HJ#5, complete genome',\n",
" 'NZ_CP013134.1 Rickettsia rhipicephali strain HJ#5 plasmid pHJ51, complete sequence',\n",
" 'NZ_CP013135.1 Rickettsia rhipicephali strain HJ#5 plasmid pHJ52 sequence',\n",
" 'NZ_CP013112.1 Escherichia coli strain YD786, complete genome',\n",
" 'NZ_CP013141.1 Lysobacter antibioticus strain ATCC 29479 genome',\n",
" 'NZ_CP011129.1 Lysobacter antibioticus strain 76, complete genome',\n",
" 'NZ_CP012096.1 Serratia plymuthica strain 3Rp8, complete genome',\n",
" 'NZ_CP012097.1 Serratia plymuthica strain 3Re4-18, complete genome',\n",
" 'NZ_CP015963.1 Altererythrobacter ishigakiensis strain NBRC 107699 chromosome, complete genome',\n",
" 'NZ_CP016033.1 Porphyrobacter neustonensis strain DSM 9434, complete genome',\n",
" 'NZ_CP015990.1 Klebsiella pneumoniae strain BR chromosome, complete genome',\n",
" 'NZ_CP015991.1 Klebsiella pneumoniae strain BR plasmid pWSZBR, complete sequence',\n",
" 'NZ_CP014762.1 Klebsiella pneumoniae strain KPNIH39, complete genome',\n",
" 'NZ_CP014763.1 Klebsiella pneumoniae strain KPNIH39 plasmid pKPN-332, complete sequence',\n",
" 'NZ_CP014764.1 Klebsiella pneumoniae strain KPNIH39 plasmid pKPN-704, complete sequence',\n",
" 'NZ_CP014765.1 Klebsiella pneumoniae strain KPNIH39 plasmid pKpQIL-9b8, complete sequence',\n",
" 'NZ_CP012968.1 Staphylococcus sp. AntiMn-1, complete genome',\n",
" 'NZ_CP013300.1 Staphylococcus sp. AntiMn-1 plasmid AM1_232, complete sequence',\n",
" 'NZ_CP015850.1 Ralstonia solanacearum strain YC40-M chromosome, complete genome',\n",
" 'NZ_CP015851.1 Ralstonia solanacearum strain YC40-M plasmid, complete sequence',\n",
" 'NZ_CP015025.1 Klebsiella pneumoniae strain Kpn223, complete genome',\n",
" 'NZ_CP015026.1 Klebsiella pneumoniae strain Kpn223 plasmid pKPN-065, complete sequence',\n",
" 'NZ_CP015130.1 Klebsiella pneumoniae strain Kpn555 chromosome, complete genome',\n",
" 'NZ_CP015131.1 Klebsiella pneumoniae strain Kpn555 plasmid pKPN-7c3, complete sequence',\n",
" 'NZ_CP015133.1 Klebsiella pneumoniae strain Kpn555 plasmid pKPN-d6b, complete sequence',\n",
" 'NZ_CP015132.1 Klebsiella pneumoniae strain Kpn555 plasmid pKPN-d90, complete sequence',\n",
" 'NZ_CP015159.1 Escherichia coli strain Eco889 chromosome, complete genome',\n",
" 'NZ_CP015161.1 Escherichia coli strain Eco889 plasmid pECO-93a, complete sequence',\n",
" 'NZ_CP015160.1 Escherichia coli strain Eco889 plasmid pECO-fce, complete sequence',\n",
" 'NZ_CP015309.1 Corynebacterium pseudotuberculosis strain PA02 chromosome, complete genome',\n",
" 'NZ_CP014623.1 Lactobacillus backii strain TMW 1.1988 chromosome, complete genome',\n",
" 'NZ_CP014624.1 Lactobacillus backii strain TMW 1.1988 plasmid L11988-1, complete sequence',\n",
" 'NZ_CP014625.1 Lactobacillus backii strain TMW 1.1988 plasmid L11988-10, complete sequence',\n",
" 'NZ_CP014626.1 Lactobacillus backii strain TMW 1.1988 plasmid L11988-2, complete sequence',\n",
" 'NZ_CP014627.1 Lactobacillus backii strain TMW 1.1988 plasmid L11988-3, complete sequence',\n",
" 'NZ_CP014628.1 Lactobacillus backii strain TMW 1.1988 plasmid L11988-4, complete sequence',\n",
" 'NZ_CP014629.1 Lactobacillus backii strain TMW 1.1988 plasmid L11988-5, complete sequence',\n",
" 'NZ_CP014630.1 Lactobacillus backii strain TMW 1.1988 plasmid L11988-6, complete sequence',\n",
" 'NZ_CP014631.1 Lactobacillus backii strain TMW 1.1988 plasmid L11988-7, complete sequence',\n",
" 'NZ_CP014632.1 Lactobacillus backii strain TMW 1.1988 plasmid L11988-8, complete sequence',\n",
" 'NZ_CP014633.1 Lactobacillus backii strain TMW 1.1988 plasmid L11988-9, complete sequence',\n",
" 'NZ_CP012969.1 Psychrobacter sp. AntiMn-1, complete genome',\n",
" 'NZ_CP013101.1 Clostridium perfringens strain FORC_025, complete genome',\n",
" 'NZ_CP017715.1 Marinobacter salinus strain Hb8 chromosome, complete genome',\n",
" 'NZ_CP017188.2 Xanthomonas citri pv. glycines str. 8ra chromosome, complete genome',\n",
" 'NZ_CP017189.2 Xanthomonas citri pv. glycines str. 8ra plasmid pXAG27.8ra, complete sequence',\n",
" 'NZ_CP017190.1 Xanthomonas campestris pv. vesicatoria str. 85-10, complete sequence',\n",
" 'NZ_CP017191.1 Xanthomonas campestris pv. vesicatoria str. 85-10 plasmid p_XCV_1, complete sequence',\n",
" 'NZ_CP017192.1 Xanthomonas campestris pv. vesicatoria str. 85-10 plasmid p_XCV_2, complete sequence',\n",
" 'NZ_CP017193.1 Xanthomonas campestris pv. vesicatoria str. 85-10 plasmid p_XCV_3, complete sequence',\n",
" 'NZ_CP017603.1 Clostridium formicaceticum strain ATCC 27076, complete genome',\n",
" 'NZ_CP015208.1 Candidatus Rhodoluna planktonica strain MWH-Dar1, complete genome',\n",
" 'NZ_CP015381.1 Desulfococcus multivorans strain DSM 2059, complete genome',\n",
" 'NZ_CP017751.1 Cupriavidus sp. USMAHM13 chromosome 1, complete sequence',\n",
" 'NZ_CP017752.1 Cupriavidus sp. USMAHM13 chromosome 2, complete sequence',\n",
" 'NZ_CP017753.1 Cupriavidus sp. USMAHM13 plasmid unnamed1, complete sequence',\n",
" 'NZ_CP017748.1 Cupriavidus sp. USMAA2-4 chromosome 1, complete sequence',\n",
" 'NZ_CP017749.1 Cupriavidus sp. USMAA2-4 chromosome 2, complete sequence',\n",
" 'NZ_CP017750.1 Cupriavidus sp. USMAA2-4 plasmid unnamed1, complete sequence',\n",
" 'NZ_CP017754.1 Cupriavidus sp. USMAA1020 chromosome 1, complete sequence',\n",
" 'NZ_CP017755.1 Cupriavidus sp. USMAA1020 chromosome 2, complete sequence',\n",
" 'NZ_CP017756.1 Cupriavidus sp. USMAA1020 plasmid unnamed1, complete sequence',\n",
" 'NZ_CP017747.1 Bacillus velezensis strain SYBC H47 chromosome, complete genome',\n",
" 'NZ_CP017421.1 Arthrobacter sp. ZXY-2 chromosome, complete genome',\n",
" 'NZ_CP017422.1 Arthrobacter sp. ZXY-2 plasmid pZXY21, complete sequence',\n",
" 'NZ_CP017423.1 Arthrobacter sp. ZXY-2 plasmid pZXY22, complete sequence',\n",
" 'NZ_CP017424.1 Arthrobacter sp. ZXY-2 plasmid pZXY23, complete sequence',\n",
" 'NZ_CP017425.1 Arthrobacter sp. ZXY-2 plasmid pZXY24, complete sequence',\n",
" 'NZ_CP017426.1 Arthrobacter sp. ZXY-2 plasmid pZXY25, complete sequence',\n",
" 'NZ_CP017640.1 Methylobacterium sp. C1 chromosome, complete genome',\n",
" 'NZ_CP017718.1 Hyphomonas sp. Mor2 genome',\n",
" 'NZ_CP013013.1 Vibrio cholerae strain Env-390 chromosome 1, complete sequence',\n",
" 'NZ_CP013014.1 Vibrio cholerae strain Env-390 chromosome 2, complete sequence',\n",
" 'NZ_CP013475.1 Mycobacterium tuberculosis strain 1458, complete genome',\n",
" 'NZ_CP017707.1 Chromobacterium vaccinii strain 21-1 chromosome, complete genome',\n",
" 'NZ_CP017622.1 Ketogulonicigenium vulgare strain SPU B805 chromosome, complete genome',\n",
" 'NZ_CP012426.1 Klebsiella pneumoniae strain KP5 chromosome, complete genome',\n",
" 'NZ_CP012427.1 Klebsiella pneumoniae strain KP5 plasmid pSg1-1, complete sequence',\n",
" 'NZ_CP012428.1 Klebsiella pneumoniae strain KP5 plasmid pSg1-2, complete sequence',\n",
" 'NZ_CP012429.1 Klebsiella pneumoniae strain KP5 plasmid pSg1-3, complete sequence',\n",
" 'NZ_CP011839.1 Klebsiella pneumoniae strain KP5 plasmid pSg1-NDM, complete sequence',\n",
" 'NZ_CP017100.1 Escherichia coli strain K-12 NEB 5-alpha chromosome, complete genome',\n",
" 'NZ_CP017107.1 Lactobacillus salivarius strain CICC 23174 chromosome, complete genome',\n",
" 'NZ_CP017108.1 Lactobacillus salivarius strain CICC23174 plasmid pLS_1 sequence',\n",
" 'NZ_CP017109.1 Lactobacillus salivarius strain CICC23174 plasmid pLS_2 sequence',\n",
" 'NZ_CP017110.1 Lactobacillus salivarius strain CICC23174 plasmid pLS_3, complete sequence',\n",
" 'NZ_CP017124.1 Lactobacillus curvatus strain WiKim38, complete genome',\n",
" 'NZ_CP016395.1 Bacillus velezensis strain M75 chromosome, complete genome',\n",
" 'NZ_CP017111.1 Sulfurospirillum halorespirans DSM 13726 chromosome, complete genome',\n",
" 'NZ_CP017116.1 Erysipelothrix rhusiopathiae strain WH13013 chromosome, complete genome',\n",
" 'NZ_CP012692.1 Staphylococcus aureus strain FORC_027, complete genome',\n",
" 'NZ_CP016019.1 Bifidobacterium longum subsp. longum strain AH1206, complete genome',\n",
" 'NZ_CP017247.1 Bacillus licheniformis strain BL1202, complete genome',\n",
" 'NZ_CP016385.1 Salmonella enterica subsp. enterica serovar Typhimurium strain 13-931, complete genome',\n",
" 'NZ_CP016386.1 Salmonella enterica subsp. enterica serovar Typhimurium strain 13-931 plasmid p931-3904, complete sequence',\n",
" 'NZ_CP016387.1 Salmonella enterica subsp. enterica serovar Typhimurium strain 13-931 plasmid p931IncI1, complete sequence',\n",
" 'NZ_CP016388.1 Salmonella enterica subsp. enterica serovar Typhimurium strain 13-931 plasmid p931IncI2, complete sequence',\n",
" 'NZ_CP016389.1 Salmonella enterica subsp. enterica serovar Typhimurium strain 13-931 plasmid pESBL931, complete sequence',\n",
" 'NZ_CP016390.1 Salmonella enterica subsp. enterica serovar Typhimurium strain 13-931 plasmid pSLT931, complete sequence',\n",
" 'NZ_CP015217.1 Leptospira alstonii strain GWTS #1 chromosome 1, complete sequence',\n",
" 'NZ_CP015218.1 Leptospira alstonii strain GWTS #1 chromosome 2, complete sequence',\n",
" 'NZ_CP016044.1 Edwardsiella piscicida strain S11-285 chromosome, complete genome',\n",
" 'NZ_CP016445.1 Edwardsiella piscicida strain S11-285 plasmid unnamed1, complete sequence',\n",
" 'NZ_CP017157.1 Streptomyces lydicus strain 103 chromosome, complete genome',\n",
" 'NZ_CP017149.1 Pseudomonas aeruginosa strain ATCC 15692 chromosome, complete genome',\n",
" 'NZ_CP017150.1 Brevibacterium aurantiacum strain SMQ-1335 chromosome, complete genome',\n",
" 'NZ_CP017175.1 Burkholderia mallei strain Bahrain1 chromosome 1, complete sequence',\n",
" 'NZ_CP017176.1 Burkholderia mallei strain Bahrain1 chromosome 2, complete sequence',\n",
" 'NZ_CP016894.1 Bacillus subtilis strain HJ0-6, complete genome',\n",
" 'NZ_CP016895.1 Acinetobacter larvae strain BRTC-1 chromosome, complete genome',\n",
" 'NZ_CP016294.1 Stenotrophomonas rhizophila strain QL-P4, complete genome',\n",
" 'NZ_CP016824.1 Streptomyces sampsonii strain KJ40 chromosome, complete genome',\n",
" 'NZ_CP016341.1 Bordetella holmesii strain H903, complete genome',\n",
" 'NZ_CP016811.1 Klebsiella pneumoniae strain DHQP1002001 chromosome, complete genome',\n",
" 'NZ_CP016810.1 Klebsiella pneumoniae strain DHQP1002001 plasmid p_IncFIB_DHQP1002001, complete sequence',\n",
" 'NZ_CP016812.1 Klebsiella pneumoniae strain DHQP1002001 plasmid p_incR_DHQP1002001, complete sequence',\n",
" 'NZ_CP016342.1 Bordetella parapertussis strain H904, complete genome',\n",
" 'NZ_CP011533.1 Streptomyces noursei ATCC 11455, complete genome',\n",
" 'NZ_CP012334.1 Bordetella sp. H567, complete genome',\n",
" 'NZ_CP013749.1 Lactobacillus plantarum strain KP, complete genome',\n",
" 'NZ_CP013750.1 Lactobacillus plantarum strain KP plasmid unnamed1, complete sequence',\n",
" 'NZ_CP013751.1 Lactobacillus plantarum strain KP plasmid unnamed2, complete sequence',\n",
" 'NZ_CP013752.1 Lactobacillus plantarum strain KP plasmid unnamed3, complete sequence',\n",
" 'NZ_CP013753.1 Lactobacillus plantarum strain DF, complete genome',\n",
" 'NZ_CP013754.1 Lactobacillus plantarum strain DF plasmid unnamed1, complete sequence',\n",
" 'NZ_CP013755.1 Lactobacillus plantarum strain DF plasmid unnamed2, complete sequence',\n",
" 'NZ_CP013756.1 Lactobacillus plantarum strain DF plasmid unnamed3, complete sequence',\n",
" 'NZ_CP016431.1 Bordetella bronchiseptica strain I328 chromosome, complete genome',\n",
" 'NZ_CP029397.1 Acinetobacter defluvii strain WCHA30 chromosome, complete genome',\n",
" 'NZ_CP029389.1 Acinetobacter defluvii strain WCHA30 plasmid p1_010030, complete sequence',\n",
" 'NZ_CP029390.1 Acinetobacter defluvii strain WCHA30 plasmid p2_010030, complete sequence',\n",
" 'NZ_CP029391.1 Acinetobacter defluvii strain WCHA30 plasmid p3_010030, complete sequence',\n",
" 'NZ_CP029392.1 Acinetobacter defluvii strain WCHA30 plasmid p4_010030, complete sequence',\n",
" 'NZ_CP029393.1 Acinetobacter defluvii strain WCHA30 plasmid p5_010030, complete sequence',\n",
" 'NZ_CP029394.1 Acinetobacter defluvii strain WCHA30 plasmid p6_010030, complete sequence',\n",
" 'NZ_CP029395.1 Acinetobacter defluvii strain WCHA30 plasmid p7_010030, complete sequence',\n",
" 'NZ_CP029396.1 Acinetobacter defluvii strain WCHA30 plasmid pOXA58_010030, complete sequence',\n",
" 'NZ_CP012375.1 Microcystis aeruginosa NIES-2481, complete genome',\n",
" 'NZ_CP025929.1 Microcystis aeruginosa NIES-2481 plasmid p1, complete sequence',\n",
" ...]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_fastas"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>n_sequences</th>\n",
" <th>species</th>\n",
" </tr>\n",
" <tr>\n",
" <th>genus</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Bacillus</th>\n",
" <td>1132</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Streptomyces</th>\n",
" <td>743</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Vibrio</th>\n",
" <td>468</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Rhizobium</th>\n",
" <td>325</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Pseudomonas</th>\n",
" <td>304</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Staphylococcus</th>\n",
" <td>301</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Clostridium</th>\n",
" <td>259</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Streptococcus</th>\n",
" <td>222</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Planktothrix</th>\n",
" <td>179</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Stenotrophomonas</th>\n",
" <td>176</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Escherichia</th>\n",
" <td>133</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Paenibacillus</th>\n",
" <td>127</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Listeria</th>\n",
" <td>104</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Corynebacterium</th>\n",
" <td>103</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Klebsiella</th>\n",
" <td>16</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Shigella</th>\n",
" <td>14</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Salmonella</th>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Enterobacter</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" n_sequences species\n",
"genus \n",
"Bacillus 1132 11\n",
"Streptomyces 743 5\n",
"Vibrio 468 5\n",
"Rhizobium 325 6\n",
"Pseudomonas 304 8\n",
"Staphylococcus 301 6\n",
"Clostridium 259 5\n",
"Streptococcus 222 6\n",
"Planktothrix 179 5\n",
"Stenotrophomonas 176 5\n",
"Escherichia 133 3\n",
"Paenibacillus 127 3\n",
"Listeria 104 4\n",
"Corynebacterium 103 7\n",
"Klebsiella 16 3\n",
"Shigella 14 3\n",
"Salmonella 4 2\n",
"Enterobacter 1 1"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"counts = inventory.reset_index().groupby(\"genus\").agg({\"count\", sum}).drop((\"species\"), axis=1)\n",
"counts.columns=[\"n_sequences\",\"species\"]\n",
"counts.sort_values(\"n_sequences\", ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}