Basic use cases for pyft

[1]:
# import pyft
# from pyft import pyft
import pyft
import tqdm

bam_f = "../../../tests/data/center.bam"
fiberbam = pyft.Fiberbam(bam_f)
out_fiberbam = pyft.Fiberwriter("test.bam", bam_f)
rgn = ["chr22", 26_354_169, 26_354_170]
for fiber in tqdm.tqdm(fiberbam.fetch(*rgn)):
    # the number of ccs passes
    fiber.ec
    # the mps start positions
    fiber.msp.starts
    # the fire quality scores of the MSPs
    fiber.msp.qual
    # print the nuc reference starts
    fiber.nuc.reference_starts
    # lift query (fiber) positions to reference positions
    fiber.lift_query_positions([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    # lift reference positions to query (fiber) positions
    fiber.lift_reference_positions([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

    out_fiberbam.write(fiber)


for fiber in fiberbam.center(rgn[0], start=rgn[1], end=rgn[2], strand="-"):
    # returns the same fiber object as above; however, all the positions have been modified to be relative to the region fetched
    # print(fiber.msp.reference_starts)
    continue


# example of reading in a footprinting table
df = pyft.utils.read_footprint_table(
    "../../../tests/data/ctcf-footprints.bed.gz", long=True
)
print(df)

# read in a footprinting table and center the positions
df = pyft.utils.read_and_center_footprint_table(
    "../../../tests/data/ctcf-footprints.bed.gz"
)
print(df)

# read a region of a fiberbam into a pandas dataframe
df = pyft.utils.region_to_df(fiberbam, rgn)
print(df)

# read a region of a fiberbam into a pandas dataframe and center the positions
df = pyft.utils.region_to_centered_df(fiberbam, rgn, strand="-")
print(df)
[2024-05-28T23:34:23Z INFO  pyft::fiberdata] 6 records fetched in 0.00s
[2024-05-28T23:34:23Z INFO  pyft::fiberdata] Fiberdata made for 6 records in 0.02s
100%|██████████| 6/6 [00:00<00:00, 509.30it/s]
    chrom  motif_start  motif_end strand  n_spanning_fibers  n_spanning_msps  \
0   chr11      5204946    5204981      +                181               92
0   chr11      5204946    5204981      +                181               92
0   chr11      5204946    5204981      +                181               92
0   chr11      5204946    5204981      +                181               92
0   chr11      5204946    5204981      +                181               92
..    ...          ...        ...    ...                ...              ...
16  chr19     45817350   45817385      +                136              124
16  chr19     45817350   45817385      +                136              124
16  chr19     45817350   45817385      +                136              124
16  chr19     45817350   45817385      +                136              124
16  chr19     45817350   45817385      +                136              124

    n_overlapping_nucs  module:0-8  module:8-16  module:16-23  module:23-29  \
0                   85       False        False         False         False
0                   85       False        False         False         False
0                   85       False        False          True          True
0                   85       False        False         False         False
0                   85       False        False         False         False
..                 ...         ...          ...           ...           ...
16                   8       False         True          True          True
16                   8       False         True          True          True
16                   8       False        False         False          True
16                   8        True        False          True          True
16                   8       False        False         False         False

    module:29-35  fire_qual                          fiber_name  n_modules  \
0          False        247  m64076_211222_124721/148505307/ccs          5
0          False         -1   m64076_211222_124721/51053256/ccs          5
0          False        246   m64076_211222_124721/62391018/ccs          5
0          False         -1   m64076_211222_124721/97191992/ccs          5
0          False         -1   m64076_211222_124721/99419016/ccs          5
..           ...        ...                                 ...        ...
16          True          0  m64076_211222_124721/157222001/ccs          5
16          True        246   m64076_211222_124721/65339699/ccs          5
16         False          0    m64076_211222_124721/6882497/ccs          5
16         False        243   m64076_211222_124721/31394454/ccs          5
16         False         -1  m64076_211222_124721/100926481/ccs          5

    has_spanning_msp
0               True
0              False
0               True
0              False
0              False
..               ...
16              True
16              True
16              True
16              True
16             False

[2065 rows x 16 columns]
       chrom  motif_start  motif_end strand  fire_qual  \
0      chr11      5204946    5204981      +        247
1      chr11      5204946    5204981      +         -1
2      chr11      5204946    5204981      +        246
3      chr11      5204946    5204981      +         -1
4      chr11      5204946    5204981      +         -1
...      ...          ...        ...    ...        ...
10320  chr19     45817350   45817385      +          0
10321  chr19     45817350   45817385      +        246
10322  chr19     45817350   45817385      +          0
10323  chr19     45817350   45817385      +        243
10324  chr19     45817350   45817385      +         -1

                               fiber_name  has_spanning_msp  footprinted  \
0      m64076_211222_124721/148505307/ccs              True        False
1       m64076_211222_124721/51053256/ccs             False        False
2       m64076_211222_124721/62391018/ccs              True        False
3       m64076_211222_124721/97191992/ccs             False        False
4       m64076_211222_124721/99419016/ccs             False        False
...                                   ...               ...          ...
10320  m64076_211222_124721/157222001/ccs              True         True
10321   m64076_211222_124721/65339699/ccs              True         True
10322    m64076_211222_124721/6882497/ccs              True        False
10323   m64076_211222_124721/31394454/ccs              True        False
10324  m64076_211222_124721/100926481/ccs             False        False

       start  end  centering_position centering_strand             type
0          0    8             5204946                +  not-footprinted
1          0    8             5204946                +  not-footprinted
2          0    8             5204946                +  not-footprinted
3          0    8             5204946                +  not-footprinted
4          0    8             5204946                +  not-footprinted
...      ...  ...                 ...              ...              ...
10320     29   35            45817350                +      footprinted
10321     29   35            45817350                +      footprinted
10322     29   35            45817350                +  not-footprinted
10323     29   35            45817350                +  not-footprinted
10324     29   35            45817350                +  not-footprinted

[10325 rows x 13 columns]
    chrom  fiber_start  fiber_end                           fiber_name strand  \
0   chr22     26333471   26371209    m64076_210328_012155/35587949/ccs      +
0   chr22     26333471   26371209    m64076_210328_012155/35587949/ccs      +
0   chr22     26333471   26371209    m64076_210328_012155/35587949/ccs      +
0   chr22     26333471   26371209    m64076_210328_012155/35587949/ccs      +
0   chr22     26333471   26371209    m64076_210328_012155/35587949/ccs      +
..    ...          ...        ...                                  ...    ...
23  chr22     26354168   26367283  m54329U_210326_192251/160237619/ccs      -
23  chr22     26354168   26367283  m54329U_210326_192251/160237619/ccs      -
23  chr22     26354168   26367283  m54329U_210326_192251/160237619/ccs      -
23  chr22     26354168   26367283  m54329U_210326_192251/160237619/ccs      -
23  chr22     26354168   26367283  m54329U_210326_192251/160237619/ccs      -

   type     start       end qual
0   msp  26333672  26333727    0
0   msp  26333848  26333890    0
0   msp  26334056  26334094    0
0   msp  26334254  26334319    0
0   msp  26334561  26334565    0
..  ...       ...       ...  ...
23  5mC  26365739  26365740  213
23  5mC  26366886  26366887  255
23  5mC  26367221  26367222  172
23  5mC  26367226  26367227  246
23  5mC  26367254  26367255  252

[9111 rows x 9 columns]
    chrom  fiber_start  fiber_end                           fiber_name strand  \
0   chr22     26333471   26371209    m64076_210328_012155/35587949/ccs      +
0   chr22     26333471   26371209    m64076_210328_012155/35587949/ccs      +
0   chr22     26333471   26371209    m64076_210328_012155/35587949/ccs      +
0   chr22     26333471   26371209    m64076_210328_012155/35587949/ccs      +
0   chr22     26333471   26371209    m64076_210328_012155/35587949/ccs      +
..    ...          ...        ...                                  ...    ...
23  chr22     26354168   26367283  m54329U_210326_192251/160237619/ccs      -
23  chr22     26354168   26367283  m54329U_210326_192251/160237619/ccs      -
23  chr22     26354168   26367283  m54329U_210326_192251/160237619/ccs      -
23  chr22     26354168   26367283  m54329U_210326_192251/160237619/ccs      -
23  chr22     26354168   26367283  m54329U_210326_192251/160237619/ccs      -

   type   start     end qual  centering_position centering_strand
0   msp  -16802  -16734    0            26354169                -
0   msp  -16489  -16476    0            26354169                -
0   msp  -16230  -16185    0            26354169                -
0   msp  -16044  -16004    0            26354169                -
0   msp  -15865  -15810    0            26354169                -
..  ...     ...     ...  ...                 ...              ...
23  5mC    -708    -707  173            26354169                -
23  5mC    -667    -666  224            26354169                -
23  5mC    -591    -590  135            26354169                -
23  5mC     -95     -94  228            26354169                -
23  5mC     -61     -60  178            26354169                -

[9111 rows x 11 columns]

[2024-05-28T23:34:23Z INFO  pyft::fiberdata] 6 records fetched in 0.02s
[2024-05-28T23:34:23Z INFO  pyft::fiberdata] Fiberdata made for 6 records in 0.02s
[2024-05-28T23:34:23Z INFO  pyft::fiberdata] Fiberdata centered for 6 records in 0.00s
[2024-05-28T23:34:23Z INFO  pyft::fiberdata] 6 records fetched in 0.04s
[2024-05-28T23:34:23Z INFO  pyft::fiberdata] Fiberdata made for 6 records in 0.02s
[2024-05-28T23:34:23Z INFO  pyft::fiberdata] 6 records fetched in 0.01s
[2024-05-28T23:34:23Z INFO  pyft::fiberdata] Fiberdata made for 6 records in 0.02s
[2024-05-28T23:34:23Z INFO  pyft::fiberdata] Fiberdata centered for 6 records in 0.00s
[2]:
bam_f = "../../../tests/data/center.bam"
fiberbam = pyft.Fiberbam(bam_f)

# iterate over a fiberbam one fiber at a time
for idx, fiber in enumerate(fiberbam):
    if idx > 10:
        break
    print(fiber)
fiber: m54329U_210814_130637/103874956/ccs      chrom: .        start: 5506049  end 5532904     num m6a: 1908    num cpg: 379   num nuc: 141     num msp: 142
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi
hi