{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Plotting with `pyft`"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DataTransformerRegistry.enable('vegafusion')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pyft\n",
"import pandas as pd\n",
"import altair as alt\n",
"\n",
"alt.data_transformers.enable(\"vegafusion\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read in the results of a ft-footprint calculation and plot the results using `pyft`."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" chrom | \n",
" motif_start | \n",
" motif_end | \n",
" strand | \n",
" footprint_codes | \n",
" fire_qual | \n",
" fiber_name | \n",
" has_spanning_msp | \n",
" footprinted | \n",
" start | \n",
" end | \n",
" centering_position | \n",
" centering_strand | \n",
" type | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" chr11 | \n",
" 5204946 | \n",
" 5204981 | \n",
" + | \n",
" 3 | \n",
" 247 | \n",
" m64076_211222_124721/148505307/ccs | \n",
" True | \n",
" True | \n",
" 0 | \n",
" 1 | \n",
" 5204946 | \n",
" + | \n",
" footprinted | \n",
"
\n",
" \n",
" | 1 | \n",
" chr11 | \n",
" 5204946 | \n",
" 5204981 | \n",
" + | \n",
" 2 | \n",
" -1 | \n",
" m64076_211222_124721/51053256/ccs | \n",
" False | \n",
" True | \n",
" 0 | \n",
" 1 | \n",
" 5204946 | \n",
" + | \n",
" not-footprinted | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" chrom motif_start motif_end strand footprint_codes fire_qual \\\n",
"0 chr11 5204946 5204981 + 3 247 \n",
"1 chr11 5204946 5204981 + 2 -1 \n",
"\n",
" fiber_name has_spanning_msp footprinted start \\\n",
"0 m64076_211222_124721/148505307/ccs True True 0 \n",
"1 m64076_211222_124721/51053256/ccs False True 0 \n",
"\n",
" end centering_position centering_strand type \n",
"0 1 5204946 + footprinted \n",
"1 1 5204946 + not-footprinted "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dfm = pyft.utils.read_and_center_footprint_table(\n",
" \"../../../tests/data/ctcf-footprints.bed.gz\"\n",
")\n",
"dfm.head(2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read in fiber data centered on the footprint locations. "
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2024-11-12T22:29:48Z INFO pyft::fiberdata] 181 records fetched in 0.01s\n",
"[2024-11-12T22:29:48Z INFO pyft::fiberdata] Fiberdata made for 181 records in 0.11s\n",
"[2024-11-12T22:29:48Z INFO pyft::fiberdata] Fiberdata centered for 181 records in 0.02s\n",
"[2024-11-12T22:29:48Z INFO pyft::fiberdata] 172 records fetched in 0.11s\n",
"[2024-11-12T22:29:48Z INFO pyft::fiberdata] Fiberdata made for 172 records in 0.10s\n",
"[2024-11-12T22:29:48Z INFO pyft::fiberdata] Fiberdata centered for 172 records in 0.07s\n"
]
}
],
"source": [
"rgns = pd.read_csv(\"../../../tests/data/ctcf.bed.gz\", sep=\"\\t\", header=None, nrows=2)\n",
"rgns.columns = [\"chrom\", \"start\", \"end\", \"name\", \"score\", \"strand\", \"name2\"]\n",
"fiberbam = pyft.Fiberbam(\"../../../tests/data/ctcf.bam\")\n",
"centers = []\n",
"z = None\n",
"for idx, rgn in rgns.iterrows():\n",
" region = (rgn[\"chrom\"], rgn[\"start\"], rgn[\"end\"])\n",
" z = pyft.utils.region_to_centered_df(\n",
" fiberbam, region, strand=rgn[\"strand\"], max_flank=250\n",
" )\n",
" centers.append(z)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Combine the footprinting results with the fiber data centered around the footprints. "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" chrom | \n",
" fiber_start | \n",
" fiber_end | \n",
" fiber_name | \n",
" strand | \n",
" type | \n",
" start | \n",
" end | \n",
" qual | \n",
" centering_position | \n",
" centering_strand | \n",
" motif_start | \n",
" motif_end | \n",
" footprint_codes | \n",
" fire_qual | \n",
" has_spanning_msp | \n",
" footprinted | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" chr11 | \n",
" 5184260.0 | \n",
" 5205600.0 | \n",
" m64076_211222_124721/148505307/ccs | \n",
" + | \n",
" msp | \n",
" -225 | \n",
" -160 | \n",
" 0 | \n",
" 5204946 | \n",
" + | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" chr11 | \n",
" 5184260.0 | \n",
" 5205600.0 | \n",
" m64076_211222_124721/148505307/ccs | \n",
" + | \n",
" msp | \n",
" -57 | \n",
" 135 | \n",
" 247 | \n",
" 5204946 | \n",
" + | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" chrom fiber_start fiber_end fiber_name strand \\\n",
"0 chr11 5184260.0 5205600.0 m64076_211222_124721/148505307/ccs + \n",
"1 chr11 5184260.0 5205600.0 m64076_211222_124721/148505307/ccs + \n",
"\n",
" type start end qual centering_position centering_strand motif_start \\\n",
"0 msp -225 -160 0 5204946 + NaN \n",
"1 msp -57 135 247 5204946 + NaN \n",
"\n",
" motif_end footprint_codes fire_qual has_spanning_msp footprinted \n",
"0 NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"both_dfs = pd.concat(centers + [dfm], axis=0).reset_index(drop=True)\n",
"both_dfs.head(2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show the chart within the notebook."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
"