{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyM92DNghoocqJmM9Kl5DupF"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# Analyse some metagenomics data to identify corelated contigs\n","\n","You can copy and paste these commands into Google Colab, and run this notebook to identify which contigs might belong together, eg. come from the same genomes\n","\n","\n","## Step 1. Import some libaries"],"metadata":{"id":"DWHYKg9VL-BJ"}},{"cell_type":"code","execution_count":3,"metadata":{"id":"4R6y2YvsNXs8","executionInfo":{"status":"ok","timestamp":1730774725587,"user_tz":-660,"elapsed":484,"user":{"displayName":"Rob Edwards","userId":"17780279723170493443"}}},"outputs":[],"source":["import os\n","import sys\n","import pandas as pd\n","import seaborn as sns\n","import matplotlib.pyplot as plt\n","import numpy as np\n"]},{"cell_type":"markdown","source":["If you have the data file locally, you can upload it to colab. Otherwise, you can connect Google drive and read the file from there!"],"metadata":{"id":"Em2obIOgMWG5"}},{"cell_type":"code","source":["# df = pd.read_csv('788707_20180129_S_coverage.tsv', sep=\"\\t\", index_col=0)\n","\n","from google.colab import drive\n","drive.mount('/content/drive')\n","\n","df = pd.read_csv('drive/MyDrive/Workshops/788707_20180129_S_coverage.tsv', sep=\"\\t\", index_col=0)\n","df"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":473},"id":"EHsLgNAUUo4H","executionInfo":{"status":"ok","timestamp":1730774759801,"user_tz":-660,"elapsed":30619,"user":{"displayName":"Rob Edwards","userId":"17780279723170493443"}},"outputId":"74b73749-048a-445d-e5c5-8df0d6c8ab4c"},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]},{"output_type":"execute_result","data":{"text/plain":[" 788707_20171213_S 788707_20180129_S 788707_20180313_S \\\n","contig \n","k141_0 0.000000 4.20440 0.00000 \n","k141_1 0.000000 3.85587 9.21708 \n","k141_10 0.000000 2.14286 0.00000 \n","k141_100 0.000000 2.86104 0.00000 \n","k141_1000 0.000000 2.42775 0.00000 \n","... ... ... ... \n","k141_9995 0.000000 5.15532 3.16151 \n","k141_9996 0.000000 3.68349 0.00000 \n","k141_9997 0.170054 6.28332 11.11980 \n","k141_9998 0.000000 2.03607 0.00000 \n","k141_9999 0.000000 1.92308 0.00000 \n","\n"," 788707_20181126_S \n","contig \n","k141_0 0.00000 \n","k141_1 1.77046 \n","k141_10 0.00000 \n","k141_100 0.00000 \n","k141_1000 0.00000 \n","... ... \n","k141_9995 0.00000 \n","k141_9996 0.00000 \n","k141_9997 0.13688 \n","k141_9998 0.00000 \n","k141_9999 0.00000 \n","\n","[14353 rows x 4 columns]"],"text/html":["\n","
\n"," | 788707_20171213_S | \n","788707_20180129_S | \n","788707_20180313_S | \n","788707_20181126_S | \n","
---|---|---|---|---|
contig | \n","\n"," | \n"," | \n"," | \n"," |
k141_0 | \n","0.000000 | \n","4.20440 | \n","0.00000 | \n","0.00000 | \n","
k141_1 | \n","0.000000 | \n","3.85587 | \n","9.21708 | \n","1.77046 | \n","
k141_10 | \n","0.000000 | \n","2.14286 | \n","0.00000 | \n","0.00000 | \n","
k141_100 | \n","0.000000 | \n","2.86104 | \n","0.00000 | \n","0.00000 | \n","
k141_1000 | \n","0.000000 | \n","2.42775 | \n","0.00000 | \n","0.00000 | \n","
... | \n","... | \n","... | \n","... | \n","... | \n","
k141_9995 | \n","0.000000 | \n","5.15532 | \n","3.16151 | \n","0.00000 | \n","
k141_9996 | \n","0.000000 | \n","3.68349 | \n","0.00000 | \n","0.00000 | \n","
k141_9997 | \n","0.170054 | \n","6.28332 | \n","11.11980 | \n","0.13688 | \n","
k141_9998 | \n","0.000000 | \n","2.03607 | \n","0.00000 | \n","0.00000 | \n","
k141_9999 | \n","0.000000 | \n","1.92308 | \n","0.00000 | \n","0.00000 | \n","
14353 rows × 4 columns
\n","\n"," | length | \n","
---|---|
contig | \n","\n"," |
k141_1800 | \n","347 | \n","
k141_12597 | \n","379 | \n","
k141_8999 | \n","397 | \n","
k141_0 | \n","455 | \n","
k141_10798 | \n","565 | \n","
... | \n","... | \n","
k141_3595 | \n","2053 | \n","
k141_3596 | \n","3022 | \n","
k141_3597 | \n","471 | \n","
k141_3598 | \n","472 | \n","
k141_3599 | \n","380 | \n","
14353 rows × 1 columns
\n","\n"," | 788707_20171213_S | \n","788707_20180129_S | \n","788707_20180313_S | \n","788707_20181126_S | \n","
---|---|---|---|---|
contig | \n","\n"," | \n"," | \n"," | \n"," |
k141_10016 | \n","0.349473 | \n","8.00344 | \n","8.01700 | \n","0.482462 | \n","
k141_10116 | \n","0.104002 | \n","5.24973 | \n","5.87481 | \n","0.155361 | \n","
k141_10189 | \n","0.317612 | \n","5.15224 | \n","5.75403 | \n","0.226567 | \n","
k141_10215 | \n","0.281800 | \n","1.88650 | \n","0.00000 | \n","0.000000 | \n","
k141_10223 | \n","0.314713 | \n","6.19827 | \n","8.20220 | \n","0.141096 | \n","
... | \n","... | \n","... | \n","... | \n","... | \n","
k141_9786 | \n","0.216080 | \n","4.71022 | \n","6.42379 | \n","0.000000 | \n","
k141_9827 | \n","0.051166 | \n","5.59172 | \n","8.86182 | \n","0.327532 | \n","
k141_9878 | \n","0.723157 | \n","5.43338 | \n","6.00259 | \n","0.675291 | \n","
k141_993 | \n","0.291545 | \n","4.95044 | \n","0.00000 | \n","0.000000 | \n","
k141_9997 | \n","0.170054 | \n","6.28332 | \n","11.11980 | \n","0.136880 | \n","
268 rows × 4 columns
\n","\n"," | contig | \n","Sample | \n","Depth | \n","
---|---|---|---|
0 | \n","k141_10016 | \n","788707_20171213_S | \n","0.349473 | \n","
1 | \n","k141_10116 | \n","788707_20171213_S | \n","0.104002 | \n","
2 | \n","k141_10189 | \n","788707_20171213_S | \n","0.317612 | \n","
3 | \n","k141_10215 | \n","788707_20171213_S | \n","0.281800 | \n","
4 | \n","k141_10223 | \n","788707_20171213_S | \n","0.314713 | \n","
... | \n","... | \n","... | \n","... | \n","
1067 | \n","k141_9786 | \n","788707_20181126_S | \n","0.000000 | \n","
1068 | \n","k141_9827 | \n","788707_20181126_S | \n","0.327532 | \n","
1069 | \n","k141_9878 | \n","788707_20181126_S | \n","0.675291 | \n","
1070 | \n","k141_993 | \n","788707_20181126_S | \n","0.000000 | \n","
1071 | \n","k141_9997 | \n","788707_20181126_S | \n","0.136880 | \n","
1072 rows × 3 columns
\n","contig | \n","k141_10016 | \n","k141_10116 | \n","k141_10189 | \n","k141_10215 | \n","k141_10223 | \n","k141_10246 | \n","k141_1032 | \n","k141_10423 | \n","k141_10428 | \n","k141_10650 | \n","... | \n","k141_954 | \n","k141_9546 | \n","k141_9580 | \n","k141_9607 | \n","k141_9757 | \n","k141_9786 | \n","k141_9827 | \n","k141_9878 | \n","k141_993 | \n","k141_9997 | \n","
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
contig | \n","\n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," | \n"," |
k141_10016 | \n","1.000000 | \n","0.996792 | \n","0.996441 | \n","0.508572 | \n","0.979783 | \n","0.658830 | \n","0.986242 | \n","0.809457 | \n","0.965302 | \n","0.979254 | \n","... | \n","0.989863 | \n","0.871763 | \n","0.960788 | \n","0.964751 | \n","0.552240 | \n","0.975736 | \n","0.950348 | \n","0.996718 | \n","0.551643 | \n","0.928814 | \n","
k141_10116 | \n","0.996792 | \n","1.000000 | \n","0.999818 | \n","0.439789 | \n","0.992605 | \n","0.598469 | \n","0.996125 | \n","0.760683 | \n","0.982974 | \n","0.992184 | \n","... | \n","0.975326 | \n","0.908177 | \n","0.978922 | \n","0.940594 | \n","0.484368 | \n","0.990024 | \n","0.972032 | \n","0.999910 | \n","0.483753 | \n","0.955475 | \n","
k141_10189 | \n","0.996441 | \n","0.999818 | \n","1.000000 | \n","0.441502 | \n","0.992971 | \n","0.600309 | \n","0.995603 | \n","0.761247 | \n","0.982628 | \n","0.992777 | \n","... | \n","0.974600 | \n","0.908885 | \n","0.977602 | \n","0.939722 | \n","0.484583 | \n","0.990595 | \n","0.971656 | \n","0.999982 | \n","0.483989 | \n","0.955697 | \n","
k141_10215 | \n","0.508572 | \n","0.439789 | \n","0.441502 | \n","1.000000 | \n","0.333871 | \n","0.982519 | \n","0.359496 | \n","0.916842 | \n","0.267551 | \n","0.334774 | \n","... | \n","0.621728 | \n","0.032094 | \n","0.250488 | \n","0.711676 | \n","0.995781 | \n","0.318277 | \n","0.216954 | \n","0.442515 | \n","0.995902 | \n","0.159266 | \n","
k141_10223 | \n","0.979783 | \n","0.992605 | \n","0.992971 | \n","0.333871 | \n","1.000000 | \n","0.503240 | \n","0.998403 | \n","0.679663 | \n","0.997104 | \n","0.999929 | \n","... | \n","0.941469 | \n","0.951854 | \n","0.993001 | \n","0.892769 | \n","0.378057 | \n","0.999789 | \n","0.992220 | \n","0.992698 | \n","0.377442 | \n","0.983725 | \n","
... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","... | \n","
k141_9786 | \n","0.975736 | \n","0.990024 | \n","0.990595 | \n","0.318277 | \n","0.999789 | \n","0.489031 | \n","0.997131 | \n","0.666870 | \n","0.997652 | \n","0.999848 | \n","... | \n","0.934851 | \n","0.957208 | \n","0.993212 | \n","0.884099 | \n","0.361827 | \n","1.000000 | \n","0.993662 | \n","0.990231 | \n","0.361220 | \n","0.986580 | \n","
k141_9827 | \n","0.950348 | \n","0.972032 | \n","0.971656 | \n","0.216954 | \n","0.992220 | \n","0.394236 | \n","0.988654 | \n","0.586973 | \n","0.998634 | \n","0.991695 | \n","... | \n","0.896996 | \n","0.979915 | \n","0.997981 | \n","0.835481 | \n","0.265431 | \n","0.993662 | \n","1.000000 | \n","0.971407 | \n","0.264747 | \n","0.997591 | \n","
k141_9878 | \n","0.996718 | \n","0.999910 | \n","0.999982 | \n","0.442515 | \n","0.992698 | \n","0.601123 | \n","0.995651 | \n","0.762202 | \n","0.982458 | \n","0.992436 | \n","... | \n","0.975225 | \n","0.908001 | \n","0.977704 | \n","0.940590 | \n","0.486016 | \n","0.990231 | \n","0.971407 | \n","1.000000 | \n","0.485417 | \n","0.955167 | \n","
k141_993 | \n","0.551643 | \n","0.483753 | \n","0.483989 | \n","0.995902 | \n","0.377442 | \n","0.986551 | \n","0.406229 | \n","0.935704 | \n","0.314768 | \n","0.377407 | \n","... | \n","0.662858 | \n","0.076033 | \n","0.301678 | \n","0.749745 | \n","0.999999 | \n","0.361220 | \n","0.264747 | \n","0.485417 | \n","1.000000 | \n","0.205007 | \n","
k141_9997 | \n","0.928814 | \n","0.955475 | \n","0.955697 | \n","0.159266 | \n","0.983725 | \n","0.340064 | \n","0.976744 | \n","0.536821 | \n","0.993046 | \n","0.983582 | \n","... | \n","0.866875 | \n","0.991159 | \n","0.991564 | \n","0.798599 | \n","0.205664 | \n","0.986580 | \n","0.997591 | \n","0.955167 | \n","0.205007 | \n","1.000000 | \n","
268 rows × 268 columns
\n","\n"," | length | \n","
---|---|
contig | \n","\n"," |
k141_9055 | \n","305 | \n","
k141_3728 | \n","307 | \n","
k141_3084 | \n","310 | \n","
k141_1386 | \n","324 | \n","
k141_8665 | \n","325 | \n","
... | \n","... | \n","
k141_7934 | \n","7537 | \n","
k141_6589 | \n","8079 | \n","
k141_7068 | \n","8304 | \n","
k141_2928 | \n","8804 | \n","
k141_12474 | \n","12648 | \n","
250 rows × 1 columns
\n","\n"," | k141_12474 | \n","
---|---|
contig | \n","\n"," |
k141_13159 | \n","0.055370 | \n","
k141_13759 | \n","0.093904 | \n","
k141_3084 | \n","0.103676 | \n","
k141_380 | \n","0.131021 | \n","
k141_198 | \n","0.131109 | \n","
... | \n","... | \n","
k141_10999 | \n","0.999828 | \n","
k141_7934 | \n","0.999917 | \n","
k141_10428 | \n","0.999921 | \n","
k141_11620 | \n","0.999973 | \n","
k141_12474 | \n","1.000000 | \n","
250 rows × 1 columns
\n","