I wrote the below script which takes a SAM file as input and creates for each contig a SAM and FASTA file:
#!/usr/bin/env python3 import click import os @click.command() @click.option('--sam', help="SAM files", required=True) @click.option('--output', help="Output directory", required=True) def retrieve_contig_reads(sam, output): with open(sam) as align: for line in align: try: parts = line.rstrip().split('\t') illumina_id = parts Illumina_seq= parts contig_name = parts with open(os.path.join(output, contig_name + ".fasta"), 'a') as fasta: fasta.write(">" + illumina_id + '\n') fasta.write(Illumina_seq + '\n') with open(os.path.join(output, contig_name + ".sam"), 'a') as sam_line: sam_line.write(line + '\n') except IndexError: continue if __name__ == '__main__': retrieve_contig_reads()
Unfortunately, it ran for more than 2 weeks and has still not completed. Is there a faster way to retrieve the alignment and the reads for each contig?
Thank you in advance,