Entering edit mode
3.3 years ago
aranyak111
•
0
I am trying to do RNA seq analysis and my goal is to filter gene counts less than 5 using custom made python scripts. The code chunk goes as follows
# filter reads lower than five
nano filtercounts.py
#!/usr/bin/python
#Load relevant modules
import sys
#Read in the HTseq gene count output files for each of the 8 samples
inputlist = [sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], sys.argv[7], sys.argv[8]]
##Determine the genes with sufficient coverage
#Set up a variable to hold genes with sufficient coverage in any of the 8 samples
genelist = []
#For each HTseq file
for i in range(len(inputlist)):
#Open it
inputfile = open(inputlist[i], 'r')
#For each line
for line in inputfile:
#Determine the number of reads/transcripts for this gene
splitline = line.strip().split('\t')
#Does it pass the threshold?
if int(splitline[1]) >= 5:
#If so, add it to list
if not splitline[0] in genelist:
genelist.append(splitline[0])
#Close the file
inputfile.close()
#Write out the list of sufficiently expressed genes
outputlist = open(sys.argv[9], 'w')
for gene in genelist:
outputlist.write(gene + '\n')
outputlist.close()
##Filter each of the HTseq files for only sufficiently expressed genes across our samples
#For each HTseq file
for i in range(len(inputlist)):
#Open it
inputfile = open(inputlist[i], 'r')
#Open an output 'filtered' file
outputfile = open(sys.argv[i+10], 'w')
#For each line
for line in inputfile:
#Determine the gene
splitline = line.strip().split('\t')
#Is it in our list?
if splitline[0] in genelist:
#If so, write out the original line
outputfile.write(line)
#Close the files
inputfile.close()
outputfile.close()
python filtercounts.py 'wth1count.txt' 'wth2count.txt' 'wth3count.txt' 'wth4count.txt' '99h1count.txt' '99h2count.txt' '99h3count.txt' '99h4count.txt' 'genelist.txt' 'wth1filtered.txt' 'wth2filtered.txt' 'wth3filtered.txt' 'wth4filtered.txt' '99h1filtered.txt' '99h2filtered.txt' '99h3filtered.txt' '99h4filtered.txt'
The program works fine but at the end of the code execution I am not getting the last two output files '99h3filtered.txt' '99h4filtered.txt'.