I have 2 fasta files with sequence's in it.i want to align the sequences in second file to first file and report identity
>s1 aaccggactggacatccg >s2 gtcgactctcggaattg ....
>a1 actg >a2 tccg ....
i want to take the file2 sequences and look in file1 and print the matching with mismatched base in uppercase and identity in csv format
name,a1_alignment,a1_identity,a2_alignment,a2_identity s1,actg,100,tccg,100 s2,aCtg,95,tcCg,95
here what i did so far
import sys import os,csv from Bio import SeqIO from itertools import * from optparse import OptionParser parser = OptionParser() parser.add_option("-m", "--mismatch_threshold", dest="mismatch_threshold", default = 2, help="This is the number of differences you'll allow between the actualread and your sequence of interest. Default is 2") (options, args) = parser.parse_args() if len(sys.argv) != 4: print "Usage : python search.py <file1> <file2> <fout>" sys.exit() f1 = open(sys.argv,'r') f2 = open(sys.argv,'r') fout = open(sys.argv,'w') writer = csv.writer(fout) def long(f1): for record in SeqIO.parse(f1,'fasta'): header = record.name sequence = record.seq yield [header, sequence] def short(f2): for record in SeqIO.parse(f2,'fasta'): head = record.name seq = record.seq return seq def alignment(sequence,seq,mismatch_threshold): l1 = len(sequence) l2 = len(seq) alignment =  for i in range(0,min(l1,l2)): if sequence[i] == seq[i]: alignment.append(i) else: mismatch = sum( c1 != c2 for c1,c2 in zip(sequence,seq)) if mismatch <= mismatch_threshold: alignment.append(i) k = 0 l = 0 for read in alignment: for letter in read: if letter == isupper(): pass else: if letter == alignment.seq[j]: l +=1 k += 1 k = 0 length = seq percent = 100*l/len(seq) #print percent yield percent longsequences = long(open(sys.argv,'r')) shortsequences = short(open(sys.argv,'r')) align = alignment(longsequences,shortsequences,options.mismatch_threshold) for name in head: writer.writerow(( name +'_alignment' , name + '_identity')) for s in align: # print to csv file
i am confused and have problem in reading the the sequences in the def function getting this error
what is the best of looping the sequences of file2 and aligning them to sequences in file1 and find an alignment with mismatches?
File "search.py", line 34, in alignment l1 = len(sequence) TypeError: object of type 'generator' has no len()