{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Basic use cases for pyft" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[2024-05-28T23:34:23Z INFO pyft::fiberdata] 6 records fetched in 0.00s\n", "[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata made for 6 records in 0.02s\n", "100%|██████████| 6/6 [00:00<00:00, 509.30it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ " chrom motif_start motif_end strand n_spanning_fibers n_spanning_msps \\\n", "0 chr11 5204946 5204981 + 181 92 \n", "0 chr11 5204946 5204981 + 181 92 \n", "0 chr11 5204946 5204981 + 181 92 \n", "0 chr11 5204946 5204981 + 181 92 \n", "0 chr11 5204946 5204981 + 181 92 \n", ".. ... ... ... ... ... ... \n", "16 chr19 45817350 45817385 + 136 124 \n", "16 chr19 45817350 45817385 + 136 124 \n", "16 chr19 45817350 45817385 + 136 124 \n", "16 chr19 45817350 45817385 + 136 124 \n", "16 chr19 45817350 45817385 + 136 124 \n", "\n", " n_overlapping_nucs module:0-8 module:8-16 module:16-23 module:23-29 \\\n", "0 85 False False False False \n", "0 85 False False False False \n", "0 85 False False True True \n", "0 85 False False False False \n", "0 85 False False False False \n", ".. ... ... ... ... ... \n", "16 8 False True True True \n", "16 8 False True True True \n", "16 8 False False False True \n", "16 8 True False True True \n", "16 8 False False False False \n", "\n", " module:29-35 fire_qual fiber_name n_modules \\\n", "0 False 247 m64076_211222_124721/148505307/ccs 5 \n", "0 False -1 m64076_211222_124721/51053256/ccs 5 \n", "0 False 246 m64076_211222_124721/62391018/ccs 5 \n", "0 False -1 m64076_211222_124721/97191992/ccs 5 \n", "0 False -1 m64076_211222_124721/99419016/ccs 5 \n", ".. ... ... ... ... \n", "16 True 0 m64076_211222_124721/157222001/ccs 5 \n", "16 True 246 m64076_211222_124721/65339699/ccs 5 \n", "16 False 0 m64076_211222_124721/6882497/ccs 5 \n", "16 False 243 m64076_211222_124721/31394454/ccs 5 \n", "16 False -1 m64076_211222_124721/100926481/ccs 5 \n", "\n", " has_spanning_msp \n", "0 True \n", "0 False \n", "0 True \n", "0 False \n", "0 False \n", ".. ... \n", "16 True \n", "16 True \n", "16 True \n", "16 True \n", "16 False \n", "\n", "[2065 rows x 16 columns]\n", " chrom motif_start motif_end strand fire_qual \\\n", "0 chr11 5204946 5204981 + 247 \n", "1 chr11 5204946 5204981 + -1 \n", "2 chr11 5204946 5204981 + 246 \n", "3 chr11 5204946 5204981 + -1 \n", "4 chr11 5204946 5204981 + -1 \n", "... ... ... ... ... ... \n", "10320 chr19 45817350 45817385 + 0 \n", "10321 chr19 45817350 45817385 + 246 \n", "10322 chr19 45817350 45817385 + 0 \n", "10323 chr19 45817350 45817385 + 243 \n", "10324 chr19 45817350 45817385 + -1 \n", "\n", " fiber_name has_spanning_msp footprinted \\\n", "0 m64076_211222_124721/148505307/ccs True False \n", "1 m64076_211222_124721/51053256/ccs False False \n", "2 m64076_211222_124721/62391018/ccs True False \n", "3 m64076_211222_124721/97191992/ccs False False \n", "4 m64076_211222_124721/99419016/ccs False False \n", "... ... ... ... \n", "10320 m64076_211222_124721/157222001/ccs True True \n", "10321 m64076_211222_124721/65339699/ccs True True \n", "10322 m64076_211222_124721/6882497/ccs True False \n", "10323 m64076_211222_124721/31394454/ccs True False \n", "10324 m64076_211222_124721/100926481/ccs False False \n", "\n", " start end centering_position centering_strand type \n", "0 0 8 5204946 + not-footprinted \n", "1 0 8 5204946 + not-footprinted \n", "2 0 8 5204946 + not-footprinted \n", "3 0 8 5204946 + not-footprinted \n", "4 0 8 5204946 + not-footprinted \n", "... ... ... ... ... ... \n", "10320 29 35 45817350 + footprinted \n", "10321 29 35 45817350 + footprinted \n", "10322 29 35 45817350 + not-footprinted \n", "10323 29 35 45817350 + not-footprinted \n", "10324 29 35 45817350 + not-footprinted \n", "\n", "[10325 rows x 13 columns]\n", " chrom fiber_start fiber_end fiber_name strand \\\n", "0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs + \n", "0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs + \n", "0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs + \n", "0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs + \n", "0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs + \n", ".. ... ... ... ... ... \n", "23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs - \n", "23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs - \n", "23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs - \n", "23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs - \n", "23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs - \n", "\n", " type start end qual \n", "0 msp 26333672 26333727 0 \n", "0 msp 26333848 26333890 0 \n", "0 msp 26334056 26334094 0 \n", "0 msp 26334254 26334319 0 \n", "0 msp 26334561 26334565 0 \n", ".. ... ... ... ... \n", "23 5mC 26365739 26365740 213 \n", "23 5mC 26366886 26366887 255 \n", "23 5mC 26367221 26367222 172 \n", "23 5mC 26367226 26367227 246 \n", "23 5mC 26367254 26367255 252 \n", "\n", "[9111 rows x 9 columns]\n", " chrom fiber_start fiber_end fiber_name strand \\\n", "0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs + \n", "0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs + \n", "0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs + \n", "0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs + \n", "0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs + \n", ".. ... ... ... ... ... \n", "23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs - \n", "23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs - \n", "23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs - \n", "23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs - \n", "23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs - \n", "\n", " type start end qual centering_position centering_strand \n", "0 msp -16802 -16734 0 26354169 - \n", "0 msp -16489 -16476 0 26354169 - \n", "0 msp -16230 -16185 0 26354169 - \n", "0 msp -16044 -16004 0 26354169 - \n", "0 msp -15865 -15810 0 26354169 - \n", ".. ... ... ... ... ... ... \n", "23 5mC -708 -707 173 26354169 - \n", "23 5mC -667 -666 224 26354169 - \n", "23 5mC -591 -590 135 26354169 - \n", "23 5mC -95 -94 228 26354169 - \n", "23 5mC -61 -60 178 26354169 - \n", "\n", "[9111 rows x 11 columns]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "[2024-05-28T23:34:23Z INFO pyft::fiberdata] 6 records fetched in 0.02s\n", "[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata made for 6 records in 0.02s\n", "[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata centered for 6 records in 0.00s\n", "[2024-05-28T23:34:23Z INFO pyft::fiberdata] 6 records fetched in 0.04s\n", "[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata made for 6 records in 0.02s\n", "[2024-05-28T23:34:23Z INFO pyft::fiberdata] 6 records fetched in 0.01s\n", "[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata made for 6 records in 0.02s\n", "[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata centered for 6 records in 0.00s\n" ] } ], "source": [ "# import pyft\n", "# from pyft import pyft\n", "import pyft\n", "import tqdm\n", "\n", "bam_f = \"../../../tests/data/center.bam\"\n", "fiberbam = pyft.Fiberbam(bam_f)\n", "out_fiberbam = pyft.Fiberwriter(\"test.bam\", bam_f)\n", "rgn = [\"chr22\", 26_354_169, 26_354_170]\n", "for fiber in tqdm.tqdm(fiberbam.fetch(*rgn)):\n", " # the number of ccs passes\n", " fiber.ec\n", " # the mps start positions\n", " fiber.msp.starts\n", " # the fire quality scores of the MSPs\n", " fiber.msp.qual\n", " # print the nuc reference starts\n", " fiber.nuc.reference_starts\n", " # lift query (fiber) positions to reference positions\n", " fiber.lift_query_positions([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n", " # lift reference positions to query (fiber) positions\n", " fiber.lift_reference_positions([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n", "\n", " out_fiberbam.write(fiber)\n", "\n", "\n", "for fiber in fiberbam.center(rgn[0], start=rgn[1], end=rgn[2], strand=\"-\"):\n", " # returns the same fiber object as above; however, all the positions have been modified to be relative to the region fetched\n", " # print(fiber.msp.reference_starts)\n", " continue\n", "\n", "\n", "# example of reading in a footprinting table\n", "df = pyft.utils.read_footprint_table(\n", " \"../../../tests/data/ctcf-footprints.bed.gz\", long=True\n", ")\n", "print(df)\n", "\n", "# read in a footprinting table and center the positions\n", "df = pyft.utils.read_and_center_footprint_table(\n", " \"../../../tests/data/ctcf-footprints.bed.gz\"\n", ")\n", "print(df)\n", "\n", "# read a region of a fiberbam into a pandas dataframe\n", "df = pyft.utils.region_to_df(fiberbam, rgn)\n", "print(df)\n", "\n", "# read a region of a fiberbam into a pandas dataframe and center the positions\n", "df = pyft.utils.region_to_centered_df(fiberbam, rgn, strand=\"-\")\n", "print(df)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "fiber: m54329U_210814_130637/103874956/ccs\tchrom: .\tstart: 5506049\tend 5532904\tnum m6a: 1908\t num cpg: 379\tnum nuc: 141\t num msp: 142\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n", "hi\n" ] } ], "source": [ "bam_f = \"../../../tests/data/center.bam\"\n", "fiberbam = pyft.Fiberbam(bam_f)\n", "\n", "# iterate over a fiberbam one fiber at a time\n", "for idx, fiber in enumerate(fiberbam):\n", " if idx > 10:\n", " break\n", " print(fiber)" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 2 }