I want to extract nucleotide per position out of a bam file in python using pysam module of python.
As i am a beginner so I want to ask that
Which part of bam file tells about position??
How to calculate nuleotides(A,T,G,C) present on this position(i.e what logic should be used) using python's pysam??
So anyone tell me about this.
The sample bam file and the code used to print it is shown below.
.
#!/usr/bin/python
import pysam
samfile = pysam.Samfile( "reads.sorted.bam", "rb" )
for line in samfile:
print(line)
The above code prints the bam file as follows.
72NUT:00012:01818 0 0 0 0 4M22I250M -1 -1 276 CCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTTTGGGGGGTATGCACGCGATAGCATTGCGGGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCCATTATTTATCGCACCTACGTTCAATATTACAGGCGAGCATACTTACTAAAGCGTATTAATTAATTAATGCTTGTAGGACATAACAATAACAATTAAATGTCT array('B', [35, 32, 35, 32, 34, 34, 30, 34, 34, 32, 34, 34, 32, 34, 34, 34, 34, 33, 33, 33, 34, 34, 32, 33, 33, 34, 34, 34, 34, 34, 32, 34, 35, 35, 34, 33, 33, 34, 34, 35, 30, 34, 34, 34, 32, 34, 31, 34, 31, 34, 25, 25, 25, 34, 23, 23, 23, 11, 23, 22, 17, 25, 34, 35, 35, 32, 34, 34, 34, 34, 34, 34, 35, 30, 34, 32, 30, 30, 30, 34, 34, 24, 34, 34, 34, 34, 29, 34, 34, 34, 34, 34, 16, 34, 34, 30, 30, 26, 32, 32, 32, 34, 32, 32, 32, 30, 30, 29, 25, 26, 21, 26, 26, 25, 36, 27, 33, 31, 31, 26, 26, 26, 26, 30, 31, 31, 30, 36, 29, 30, 30, 30, 30, 31, 34, 26, 30, 30, 30, 33, 33, 33, 30, 30, 30, 31, 31, 31, 33, 30, 30, 30, 33, 33, 33, 33, 26, 33, 33, 33, 28, 32, 28, 32, 31, 31, 28, 31, 31, 31, 30, 30, 30, 24, 30, 34, 30, 35, 36, 35, 19, 26, 26, 26, 33, 33, 29, 30, 27, 30, 30, 30, 30, 25, 20, 15, 15, 11, 15, 15, 15, 18, 23, 23, 33, 33, 29, 26, 26, 26, 30, 30, 30, 29, 30, 30, 30, 28, 32, 32, 30, 30, 27, 11, 25, 23, 23, 25, 25, 33, 30, 33, 29, 33, 28, 31, 20, 25, 19, 25, 22, 27, 27, 31, 31, 17, 23, 23, 29, 27, 22, 27, 32, 25, 26, 25, 28, 32, 33, 21, 26, 26, 30, 26, 26, 23, 26, 30, 33, 33, 24, 30, 30, 30, 26, 26]) [('AS', -138), ('XN', 0), ('XM', 14), ('XO', 1), ('XG', 22), ('NM', 36), ('MD', '0G0A1C59C8G19A56T1T36A5C8T2G28T10G7'), ('YT', 'UU')]
72NUT:00158:00470 16 0 0 3 4M7I213M -1 -1 224 CACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGGGCTCTCCATGCATTTGGTATTTTCGTTTGGGGGGTATGCACGCGATAGCATTGCGGGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCCATTATTTATCGCACCTACGTTCAATATTACAGGCGAGCATACTTACTAAAGCGTATTAATTAATT array('B', [21, 21, 22, 23, 14, 14, 9, 15, 15, 15, 23, 34, 23, 23, 17, 25, 25, 25, 23, 27, 27, 27, 25, 18, 26, 26, 26, 32, 28, 33, 30, 30, 21, 25, 25, 16, 16, 16, 22, 21, 9, 23, 23, 23, 23, 22, 22, 21, 30, 30, 29, 25, 25, 25, 23, 27, 17, 27, 23, 17, 23, 23, 23, 9, 23, 23, 23, 23, 27, 21, 38, 32, 11, 32, 32, 32, 32, 23, 28, 28, 28, 28, 29, 34, 34, 34, 34, 34, 34, 34, 30, 25, 23, 25, 17, 25, 25, 34, 27, 36, 35, 34, 34, 34, 34, 34, 32, 34, 34, 34, 31, 34, 31, 34, 34, 34, 34, 34, 29, 35, 34, 34, 34, 34, 34, 34, 30, 30, 36, 30, 30, 26, 30, 33, 33, 35, 25, 28, 25, 14, 25, 25, 23, 23, 17, 33, 30, 23, 25, 25, 20, 25, 23, 28, 35, 27, 21, 30, 30, 29, 22, 27, 23, 11, 23, 25, 25, 29, 29, 34, 36, 34, 32, 34, 30, 30, 30, 34, 31, 34, 34, 31, 35, 34, 34, 32, 34, 34, 34, 34, 32, 34, 34, 35, 35, 34, 34, 34, 34, 34, 34, 30, 31, 31, 26, 26, 15, 26, 26, 26, 34, 31, 34, 34, 31, 34, 32, 34, 31, 34, 31, 34, 32, 34]) [('AS', -85), ('XN', 0), ('XM', 13), ('XO', 1), ('XG', 7), ('NM', 20), ('MD', '0G1T0C32A26C8G19A56T1T36A5C8T2G10'), ('YT', 'UU')]
72NUT:00473:00766 0 0 0 0 4M12I165M1I19M1I15M1I2M1I22M1D35M1I28M -1 -1 307 GACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTTTGGGGGGTACGCACGCGATAGCATTGCGGGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCCATTATTTATCGCACCTATCGTTCAATATTACAGGCGAGTCATACTTACTAAAGCTGATATTAATTAATTAATGCTTGTAGACATAACAATAACAATTAAATGTCTGCACAGCCGTCTTTCCACACAGACATCATAACAAAAAA array('B', [35, 34, 34, 34, 34, 35, 35, 35, 30, 31, 31, 34, 32, 34, 34, 34, 34, 34, 34, 34, 32, 34, 34, 34, 34, 34, 34, 30, 30, 30, 26, 30, 34, 34, 31, 34, 32, 34, 32, 35, 34, 32, 32, 34, 26, 25, 25, 14, 26, 25, 31, 31, 31, 31, 33, 26, 29, 29, 34, 34, 36, 34, 35, 30, 34, 32, 35, 34, 34, 34, 34, 24, 34, 34, 32, 33, 11, 23, 23, 23, 23, 23, 8, 23, 31, 34, 34, 34, 34, 34, 34, 34, 34, 29, 30, 30, 34, 34, 34, 34, 32, 30, 26, 26, 36, 24, 34, 34, 34, 34, 34, 35, 26, 29, 30, 31, 29, 29, 33, 35, 30, 26, 28, 34, 34, 29, 35, 35, 34, 32, 32, 36, 34, 35, 31, 31, 31, 34, 32, 33, 32, 34, 26, 29, 29, 34, 25, 30, 30, 34, 32, 34, 32, 34, 34, 35, 32, 34, 34, 34, 34, 33, 34, 26, 30, 30, 26, 29, 35, 36, 28, 26, 29, 34, 27, 14, 15, 14, 23, 26, 14, 14, 13, 13, 26, 8, 13, 13, 8, 19, 19, 34, 28, 34, 34, 34, 34, 28, 17, 17, 17, 17, 17, 19, 19, 19, 19, 17, 33, 20, 25, 30, 13, 29, 29, 21, 27, 14, 14, 14, 14, 13, 18, 19, 18, 18, 18, 32, 13, 17, 13, 16, 8, 16, 8, 16, 25, 30, 19, 6, 12, 12, 12, 12, 12, 20, 12, 12, 12, 18, 12, 17, 13, 17, 24, 18, 17, 24, 13, 17, 22, 33, 33, 14, 25, 25, 31, 25, 25, 17, 17, 31, 25, 13, 13, 13, 8, 12, 12, 12, 12, 18, 8, 12, 8, 16, 18, 18, 16, 16, 21, 32, 32, 30, 30, 30, 21, 19, 19, 25, 16, 12, 12, 12, 12, 12, 4]) [('AS', -147), ('XN', 0), ('XM', 13), ('XO', 7), ('XG', 18), ('NM', 31), ('MD', '2T0C59C8G0T18A56T1T36A5C11G20^G7T10G44'), ('YT', 'UU')]
72NUT:00541:01644 0 0 0 3 4M5I265M -1 -1 274 CGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTTTGGGGGGTATGCACGCGATAGCATTGCGGGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCCATTATTTATCGCACCTACGTTCAATATTACAGGCGAGCATACTTACTAAAGCGTATTAATTAATTAATGCTTGTATGACATAACAATAACAATTAAATGTCTGCACAGCCGTCTTTC array('B', [36, 34, 34, 30, 30, 30, 31, 34, 34, 35, 34, 34, 34, 32, 30, 30, 30, 34, 34, 33, 29, 30, 29, 26, 29, 33, 34, 32, 34, 33, 35, 32, 34, 34, 34, 34, 34, 29, 26, 29, 26, 29, 27, 27, 31, 34, 35, 35, 32, 34, 31, 31, 30, 34, 34, 36, 30, 35, 31, 34, 34, 34, 35, 34, 25, 34, 34, 34, 36, 25, 26, 29, 26, 26, 26, 10, 26, 34, 34, 34, 34, 34, 30, 30, 30, 34, 34, 34, 30, 30, 30, 34, 34, 32, 34, 34, 34, 35, 29, 35, 36, 36, 34, 31, 30, 29, 30, 34, 34, 31, 34, 32, 34, 35, 35, 31, 31, 31, 28, 31, 34, 34, 37, 31, 31, 32, 39, 35, 31, 31, 31, 34, 34, 34, 30, 31, 31, 31, 37, 29, 34, 34, 34, 29, 30, 28, 30, 34, 34, 30, 34, 34, 34, 29, 30, 30, 27, 30, 35, 32, 34, 34, 34, 28, 30, 30, 34, 34, 34, 34, 35, 32, 34, 34, 29, 29, 29, 22, 26, 34, 29, 26, 26, 26, 28, 26, 26, 26, 25, 19, 25, 25, 25, 24, 19, 19, 19, 19, 17, 20, 30, 34, 34, 34, 34, 34, 29, 34, 29, 20, 17, 24, 24, 20, 20, 25, 23, 32, 35, 31, 30, 26, 30, 26, 30, 30, 30, 27, 11, 14, 15, 15, 14, 14, 14, 15, 27, 27, 29, 26, 31, 34, 32, 34, 35, 32, 34, 34, 31, 34, 31, 34, 34, 24, 25, 25, 25, 25, 25, 25, 25, 25, 31, 14, 14, 14, 21, 14, 14, 14, 26, 26, 8, 13]) [('AS', -101), ('XN', 0), ('XM', 19), ('XO', 1), ('XG', 5), ('NM', 24), ('MD', '0G0A0T0C59C8G19A56T1T36A5C8T2G20G7T10G16C0T2C1'), ('YT', 'UU')]
72NUT:00614:00539 16 0 0 0 3M17I77M1I160M -1 -1 258 TAACGAACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTTTGGGGGGTATGCACGCGGATAGCATTGCGGGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCCATTATTTATCGCACCTACGTTCAATATTACAGGCGAGCATACTTACTAAAGCGTATTAATTAATTAATGCTTGTAGGACATAGCAATA array('B', [12, 8, 12, 12, 12, 8, 12, 12, 12, 21, 25, 24, 31, 34, 34, 29, 29, 30, 30, 34, 34, 34, 30, 30, 29, 35, 35, 35, 30, 31, 31, 26, 32, 26, 34, 32, 32, 32, 30, 33, 29, 33, 29, 27, 29, 30, 33, 33, 32, 30, 24, 30, 30, 25, 29, 29, 34, 29, 28, 20, 34, 20, 23, 29, 29, 34, 23, 36, 30, 21, 25, 25, 32, 22, 33, 32, 33, 34, 34, 29, 34, 34, 16, 36, 36, 38, 37, 38, 34, 30, 26, 26, 22, 22, 23, 23, 14, 9, 14, 14, 25, 31, 31, 33, 34, 32, 34, 33, 32, 26, 32, 31, 31, 31, 31, 31, 36, 30, 31, 34, 34, 32, 30, 29, 29, 29, 25, 25, 25, 24, 29, 25, 29, 33, 31, 28, 29, 30, 34, 34, 34, 34, 34, 34, 30, 30, 30, 35, 34, 34, 28, 34, 32, 25, 25, 17, 25, 17, 31, 31, 28, 26, 29, 30, 34, 34, 34, 30, 34, 34, 34, 31, 35, 35, 30, 35, 35, 34, 34, 34, 34, 34, 34, 32, 34, 34, 34, 34, 34, 32, 34, 34, 31, 34, 34, 34, 31, 34, 34, 34, 34, 32, 35, 34, 34, 34, 34, 34, 34, 34, 34, 34, 31, 35, 35, 34, 34, 28, 35, 34, 34, 34, 34, 34, 34, 31, 34, 31, 34, 33, 35, 32, 34, 32, 36, 33, 34, 30, 30, 30, 31, 34, 34, 34, 34, 32, 34, 34, 34, 35, 35, 34, 35, 34, 31, 34, 34, 32]) [('AS', -125), ('XN', 0), ('XM', 13), ('XO', 2), ('XG', 18), ('NM', 31), ('MD', '0G1T60C8G19A56T1T36A5C8T2G27A0T4'), ('YT', 'UU')]