Hello, I am trying to run MaSuRCA on a subset of a bacterial genome. The data is paired -end and I have followed the documentation on how to run the program but it hits an error and says Missing forward file for PE library pe at ///path to MaSuRCA directory/bin/masurca line 298, <FILE> line 13
. I am not sure what is going wrong and why it won't recognize the file.
My configuration file
!/bin/bash
# config file for my first test run of V_gazogenes data
SCRATCH_DIR=/lustre/medusa/catheason
DATA_ROOT=/$SCRATCH_DIR/assemblydata/rdikow_bacteria_ABYSS_or_VELVET/V_gazogenes/trimmomatic
DATA
PE= pe 100 20 $DATA_ROOT/paired1.fq $DATA_ROOT/paired2.fq
END
PARAMETERS
# values between 25 and 101 are supported, auto will compute the optimal size
GRAPH_KMER_SIZE=auto
# set this to 1 for Illumina-only assemblies and to 0 if you have 1x or more long (Sanger, 454) reads
USE_LINKING_MATES=1
# Typically set it to 60 for bacteria and something large (300) for mammals
LIMIT_JUMP_COVERAGE = 60
# these are the additional parameters to Celera Assembler.
CA_PARAMETERS = ovlMerSize=30 cgwErrorRate=0.25 ovlMemory=4GB
# minimum count k-mers used in error correction 1 means all k-mers are used. one can increase to 2 if coverage >100
KMER_COUNT_THRESHOLD = 1
# auto-detected number of cpus to use
NUM_THREADS= $NUM_THREADS
# this is mandatory jellyfish hash size
JF_SIZE=63065410
# this specifies if we do (1) or do not (0) want to trim long runs of homopolymers (e.g. # GGGGGGG) from 3' read ends, use it for high GC genomes
DO_HOMOPOLYMER_TRIM=0
END
My job file
#!/bin/bash
#PBS -j oe
#PBS -S /bin/bash
#PBS -A UT-INTERN
#PBS -l ncpus=16,mem=64000MB
cd $PBS_O_WORKDIR
SCRATCH_DIR=/lustre/medusa/catheason
MaS_PATH=/$SCRATCHDIR/MaSuRCA-2.2.1
MaS_BIN=/$MaS_PATH/bin
CONFIG_PATH=/$MaS_PATH/firsttestrun
$MaS_BIN/masurca $CONFIG_PATH/firstconfig.txt