|
/* |
|
Author: Pierre Lindenbaum PhD. |
|
*/ |
|
import java.io.File; |
|
|
|
import javax.xml.stream.XMLOutputFactory; |
|
import javax.xml.stream.XMLStreamWriter; |
|
|
|
import net.sf.picard.reference.ReferenceSequence; |
|
import net.sf.picard.reference.ReferenceSequenceFile; |
|
import net.sf.picard.reference.ReferenceSequenceFileFactory; |
|
import net.sf.samtools.CigarElement; |
|
import net.sf.samtools.SAMFileReader; |
|
import net.sf.samtools.SAMFileReader.ValidationStringency; |
|
import net.sf.samtools.SAMRecord; |
|
import net.sf.samtools.SAMRecordIterator; |
|
|
|
|
|
public class Biostar59647 |
|
{ |
|
public static void main(String[] args) |
|
throws Exception |
|
{ |
|
if(args.length<3) |
|
{ |
|
System.err.println("Usage: ref bam chr:start-end chr:start-end ..."); |
|
return; |
|
} |
|
File refFile=new File(args[0]); |
|
File bamFile=new File(args[1]); |
|
SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT); |
|
ReferenceSequenceFile reference=ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile); |
|
SAMFileReader samFileReader=new SAMFileReader(bamFile); |
|
XMLOutputFactory xmlfactory= XMLOutputFactory.newInstance(); |
|
XMLStreamWriter w= xmlfactory.createXMLStreamWriter(System.out,"UTF-8"); |
|
w.writeStartDocument("UTF-8","1.0"); |
|
w.writeStartElement("biostar59647"); |
|
w.writeAttribute("ref", args[0]); |
|
w.writeAttribute("bam", args[1]); |
|
for(int optind=2;optind<args.length;++optind) |
|
{ |
|
String s=args[optind]; |
|
|
|
int colon=s.indexOf(':'); if(colon==-1) continue; |
|
String chrom=s.substring(0,colon); |
|
int t=s.indexOf('-',colon+1);if(t==-1) continue; |
|
int start=Integer.parseInt(s.substring(colon+1,t)); |
|
int end=Integer.parseInt(s.substring(t+1)); |
|
|
|
ReferenceSequence refseq=reference.getSubsequenceAt(chrom, start, end); |
|
if(refseq==null) continue; |
|
byte dna[]=refseq.getBases(); |
|
w.writeStartElement("interval"); |
|
w.writeAttribute("chrom", chrom); |
|
w.writeAttribute("start", String.valueOf(start)); |
|
w.writeAttribute("end", String.valueOf(end)); |
|
|
|
SAMRecordIterator iter=samFileReader.queryOverlapping(chrom, start, end); |
|
while(iter.hasNext()) |
|
{ |
|
SAMRecord rec=iter.next(); |
|
|
|
final byte readbases[]=rec.getReadBases(); |
|
w.writeStartElement("read"); |
|
|
|
w.writeStartElement("name"); |
|
w.writeCharacters(rec.getReadName()); |
|
w.writeEndElement(); |
|
w.writeStartElement("sequence"); |
|
w.writeCharacters(new String(readbases)); |
|
w.writeEndElement(); |
|
w.writeStartElement("flags"); |
|
w.writeCharacters(String.valueOf(rec.getFlags())); |
|
w.writeEndElement(); |
|
w.writeStartElement("chrom"); |
|
w.writeCharacters(rec.getReferenceName()); |
|
w.writeEndElement(); |
|
w.writeStartElement("pos"); |
|
w.writeCharacters(String.valueOf(rec.getAlignmentStart())); |
|
w.writeEndElement(); |
|
w.writeStartElement("cigar"); |
|
w.writeCharacters(rec.getCigarString()); |
|
w.writeEndElement(); |
|
|
|
w.writeStartElement("align"); |
|
|
|
int readIndex = 0; |
|
int refIndex = rec.getAlignmentStart(); |
|
|
|
for (final CigarElement e : rec.getCigar().getCigarElements()) |
|
{ |
|
switch (e.getOperator()) |
|
{ |
|
case H : break; // ignore hard clips |
|
case P : break; // ignore pads |
|
case I : //cont. |
|
case S : |
|
{ |
|
final int length = e.getLength(); |
|
for(int i=0;i<length;++i) |
|
{ |
|
w.writeEmptyElement(e.getOperator().name()); |
|
w.writeAttribute("read-index",String.valueOf(readIndex+1)); |
|
if(readIndex>=0 && readIndex< readbases.length) |
|
{ |
|
w.writeAttribute("read-base",String.valueOf((char)(readbases[readIndex]))); |
|
} |
|
readIndex++; |
|
} |
|
break; |
|
} |
|
case N : //cont. -- reference skip |
|
case D : |
|
{ |
|
final int length = e.getLength(); |
|
for(int i=0;i<length;++i) |
|
{ |
|
w.writeEmptyElement(e.getOperator().name()); |
|
w.writeAttribute("ref-index",String.valueOf(refIndex)); |
|
int idx=refIndex-start; |
|
if(idx>=0 && idx< dna.length) |
|
{ |
|
w.writeAttribute("ref-base",String.valueOf((char)(dna[idx]))); |
|
} |
|
refIndex++; |
|
} |
|
break; |
|
} |
|
case M : |
|
case EQ : |
|
case X : |
|
{ |
|
final int length = e.getLength(); |
|
for(int i=0;i<length;++i) |
|
{ |
|
w.writeEmptyElement(e.getOperator().name()); |
|
char baseRead='\0'; |
|
if(readIndex>=0 && readIndex< readbases.length) |
|
{ |
|
baseRead=(char)(rec.getReadBases()[readIndex]); |
|
w.writeAttribute("read-index",String.valueOf(readIndex+1)); |
|
w.writeAttribute("read-base",String.valueOf(baseRead)); |
|
} |
|
w.writeAttribute("ref-index",String.valueOf(refIndex)); |
|
int idx2=refIndex-start; |
|
if(idx2>=0 && idx2< dna.length) |
|
{ |
|
char baseRef=(char)(dna[idx2]); |
|
w.writeAttribute("ref-base",String.valueOf(baseRef)); |
|
if(Character.toUpperCase(baseRef)!=Character.toUpperCase(baseRead)) |
|
{ |
|
w.writeAttribute("mismatch","true"); |
|
} |
|
} |
|
refIndex++; |
|
readIndex++; |
|
} |
|
break; |
|
} |
|
|
|
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + e.getOperator()); |
|
} |
|
} |
|
|
|
|
|
w.writeEndElement(); |
|
|
|
|
|
|
|
w.writeEndElement(); |
|
} |
|
iter.close(); |
|
w.writeEndElement(); |
|
} |
|
samFileReader.close(); |
|
w.writeEndElement(); |
|
w.writeEndDocument(); |
|
w.flush(); |
|
w.close(); |
|
} |
|
} |