Basic use cases for pyft
[1]:
# import pyft
# from pyft import pyft
import pyft
import tqdm
bam_f = "../../../tests/data/center.bam"
fiberbam = pyft.Fiberbam(bam_f)
out_fiberbam = pyft.Fiberwriter("test.bam", bam_f)
rgn = ["chr22", 26_354_169, 26_354_170]
for fiber in tqdm.tqdm(fiberbam.fetch(*rgn)):
# the number of ccs passes
fiber.ec
# the mps start positions
fiber.msp.starts
# the fire quality scores of the MSPs
fiber.msp.qual
# print the nuc reference starts
fiber.nuc.reference_starts
# lift query (fiber) positions to reference positions
fiber.lift_query_positions([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# lift reference positions to query (fiber) positions
fiber.lift_reference_positions([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
out_fiberbam.write(fiber)
for fiber in fiberbam.center(rgn[0], start=rgn[1], end=rgn[2], strand="-"):
# returns the same fiber object as above; however, all the positions have been modified to be relative to the region fetched
# print(fiber.msp.reference_starts)
continue
# example of reading in a footprinting table
df = pyft.utils.read_footprint_table(
"../../../tests/data/ctcf-footprints.bed.gz", long=True
)
print(df)
# read in a footprinting table and center the positions
df = pyft.utils.read_and_center_footprint_table(
"../../../tests/data/ctcf-footprints.bed.gz"
)
print(df)
# read a region of a fiberbam into a pandas dataframe
df = pyft.utils.region_to_df(fiberbam, rgn)
print(df)
# read a region of a fiberbam into a pandas dataframe and center the positions
df = pyft.utils.region_to_centered_df(fiberbam, rgn, strand="-")
print(df)
[2024-05-28T23:34:23Z INFO pyft::fiberdata] 6 records fetched in 0.00s
[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata made for 6 records in 0.02s
100%|██████████| 6/6 [00:00<00:00, 509.30it/s]
chrom motif_start motif_end strand n_spanning_fibers n_spanning_msps \
0 chr11 5204946 5204981 + 181 92
0 chr11 5204946 5204981 + 181 92
0 chr11 5204946 5204981 + 181 92
0 chr11 5204946 5204981 + 181 92
0 chr11 5204946 5204981 + 181 92
.. ... ... ... ... ... ...
16 chr19 45817350 45817385 + 136 124
16 chr19 45817350 45817385 + 136 124
16 chr19 45817350 45817385 + 136 124
16 chr19 45817350 45817385 + 136 124
16 chr19 45817350 45817385 + 136 124
n_overlapping_nucs module:0-8 module:8-16 module:16-23 module:23-29 \
0 85 False False False False
0 85 False False False False
0 85 False False True True
0 85 False False False False
0 85 False False False False
.. ... ... ... ... ...
16 8 False True True True
16 8 False True True True
16 8 False False False True
16 8 True False True True
16 8 False False False False
module:29-35 fire_qual fiber_name n_modules \
0 False 247 m64076_211222_124721/148505307/ccs 5
0 False -1 m64076_211222_124721/51053256/ccs 5
0 False 246 m64076_211222_124721/62391018/ccs 5
0 False -1 m64076_211222_124721/97191992/ccs 5
0 False -1 m64076_211222_124721/99419016/ccs 5
.. ... ... ... ...
16 True 0 m64076_211222_124721/157222001/ccs 5
16 True 246 m64076_211222_124721/65339699/ccs 5
16 False 0 m64076_211222_124721/6882497/ccs 5
16 False 243 m64076_211222_124721/31394454/ccs 5
16 False -1 m64076_211222_124721/100926481/ccs 5
has_spanning_msp
0 True
0 False
0 True
0 False
0 False
.. ...
16 True
16 True
16 True
16 True
16 False
[2065 rows x 16 columns]
chrom motif_start motif_end strand fire_qual \
0 chr11 5204946 5204981 + 247
1 chr11 5204946 5204981 + -1
2 chr11 5204946 5204981 + 246
3 chr11 5204946 5204981 + -1
4 chr11 5204946 5204981 + -1
... ... ... ... ... ...
10320 chr19 45817350 45817385 + 0
10321 chr19 45817350 45817385 + 246
10322 chr19 45817350 45817385 + 0
10323 chr19 45817350 45817385 + 243
10324 chr19 45817350 45817385 + -1
fiber_name has_spanning_msp footprinted \
0 m64076_211222_124721/148505307/ccs True False
1 m64076_211222_124721/51053256/ccs False False
2 m64076_211222_124721/62391018/ccs True False
3 m64076_211222_124721/97191992/ccs False False
4 m64076_211222_124721/99419016/ccs False False
... ... ... ...
10320 m64076_211222_124721/157222001/ccs True True
10321 m64076_211222_124721/65339699/ccs True True
10322 m64076_211222_124721/6882497/ccs True False
10323 m64076_211222_124721/31394454/ccs True False
10324 m64076_211222_124721/100926481/ccs False False
start end centering_position centering_strand type
0 0 8 5204946 + not-footprinted
1 0 8 5204946 + not-footprinted
2 0 8 5204946 + not-footprinted
3 0 8 5204946 + not-footprinted
4 0 8 5204946 + not-footprinted
... ... ... ... ... ...
10320 29 35 45817350 + footprinted
10321 29 35 45817350 + footprinted
10322 29 35 45817350 + not-footprinted
10323 29 35 45817350 + not-footprinted
10324 29 35 45817350 + not-footprinted
[10325 rows x 13 columns]
chrom fiber_start fiber_end fiber_name strand \
0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs +
0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs +
0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs +
0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs +
0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs +
.. ... ... ... ... ...
23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs -
23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs -
23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs -
23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs -
23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs -
type start end qual
0 msp 26333672 26333727 0
0 msp 26333848 26333890 0
0 msp 26334056 26334094 0
0 msp 26334254 26334319 0
0 msp 26334561 26334565 0
.. ... ... ... ...
23 5mC 26365739 26365740 213
23 5mC 26366886 26366887 255
23 5mC 26367221 26367222 172
23 5mC 26367226 26367227 246
23 5mC 26367254 26367255 252
[9111 rows x 9 columns]
chrom fiber_start fiber_end fiber_name strand \
0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs +
0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs +
0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs +
0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs +
0 chr22 26333471 26371209 m64076_210328_012155/35587949/ccs +
.. ... ... ... ... ...
23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs -
23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs -
23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs -
23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs -
23 chr22 26354168 26367283 m54329U_210326_192251/160237619/ccs -
type start end qual centering_position centering_strand
0 msp -16802 -16734 0 26354169 -
0 msp -16489 -16476 0 26354169 -
0 msp -16230 -16185 0 26354169 -
0 msp -16044 -16004 0 26354169 -
0 msp -15865 -15810 0 26354169 -
.. ... ... ... ... ... ...
23 5mC -708 -707 173 26354169 -
23 5mC -667 -666 224 26354169 -
23 5mC -591 -590 135 26354169 -
23 5mC -95 -94 228 26354169 -
23 5mC -61 -60 178 26354169 -
[9111 rows x 11 columns]
[2024-05-28T23:34:23Z INFO pyft::fiberdata] 6 records fetched in 0.02s
[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata made for 6 records in 0.02s
[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata centered for 6 records in 0.00s
[2024-05-28T23:34:23Z INFO pyft::fiberdata] 6 records fetched in 0.04s
[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata made for 6 records in 0.02s
[2024-05-28T23:34:23Z INFO pyft::fiberdata] 6 records fetched in 0.01s
[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata made for 6 records in 0.02s
[2024-05-28T23:34:23Z INFO pyft::fiberdata] Fiberdata centered for 6 records in 0.00s
[2]:
bam_f = "../../../tests/data/center.bam"
fiberbam = pyft.Fiberbam(bam_f)
# iterate over a fiberbam one fiber at a time
for idx, fiber in enumerate(fiberbam):
if idx > 10:
break
print(fiber)
fiber: m54329U_210814_130637/103874956/ccs chrom: . start: 5506049 end 5532904 num m6a: 1908 num cpg: 379 num nuc: 141 num msp: 142
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi