import sys
import os
import datetime
import logging
import argparse
python_lib = os.path.join(os.getenv("HOME"), "python_lib")
if not python_lib in sys.path: sys.path.append(python_lib)
from fs import FS
log_level = logging.DEBUG
logging.basicConfig(filename=None, format='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=log_level)
def main():
    fs = FS()
    args = get_arguments()
num_all_variants = 0
num_PASS_variants = 0
num_PASS_SNV_variants = 0
num_PASS_INS_variants = 0
num_PASS_DEL_variants = 0
for line in fs.read(args.input_vcf_file):
    if line.startswith("##"):
        continue
    elif line.startswith("#"):
        line = line[1:]
        header = line.split("\t")
    else:
        num_all_variants = num_all_variants + 1
        fields = line.split("\t")
        REF = fields[header.index("REF")]
        ALT = fields[header.index("ALT")]
        FILTER = fields[header.index("FILTER")]
        if FILTER == "PASS":
            num_PASS_variants = num_PASS_variants + 1
            if len(REF) == 1 and len(ALT) == 1:
                num_PASS_SNV_variants = num_PASS_SNV_variants + 1
            if len(REF) > 1 and len(ALT) == 1:
                num_PASS_DEL_variants = num_PASS_DEL_variants + 1
            if len(REF) == 1 and len(ALT) > 1:
                num_PASS_INS_variants = num_PASS_INS_variants + 1
output = [num_all_variants, num_PASS_variants, num_PASS_SNV_variants, num_PASS_INS_variants, num_PASS_DEL_variants]                 
header_out = "vcf_file\tall_variants\tPASS_variants\tPASS_SNV\tPASS_INS\tPASS_DEL"
#print header_out.replace(", ", "\t")
#print(header_out)
#print(os.path.basename(args.input_vcf_file) + "\t" + "\t".join(map(str,output)))
fs.write("VCF.xls", header_out+"\n")
fs.write("VCF.xls", os.path.basename(args.input_vcf_file)[:50] + "\t" + "\t".join(map(str,output)) + "\n")
#logging.info(out_file)
fs.close_without_print()
#print "Done\n\n\n"
os.system("easy VCF.xls")
sys.exit()
def get_arguments():
    main_description = '''\
Count VCF
'''
epi_log='''
This program requires the python packages:
- argparse
 
- openpyxl (pypm install "openpyxl<=2.1.5")
 
- logging
 ''' 
 help_help = '''\
 show this help message and exit\
 '''
 version_help = '''\
 show the version of this program\
 '''
 
input_vcf_file_help = '''\
input VCF file
'''
arg_parser = argparse.ArgumentParser(description=main_description, epilog=epi_log, formatter_class=argparse.RawTextHelpFormatter, add_help=False)
arg_parser.register('type','bool', lambda s: str(s).lower() in ['true', '1', 't', 'y', 'yes']) # add type keyword to registries
###############################
#    1. required arguments    #
###############################
required_group = arg_parser.add_argument_group("required arguments")
###############################
#    2. optional arguments    #
###############################
optional_group = arg_parser.add_argument_group("optional arguments")
###############################
#    3. positional arguments  #
###############################
arg_parser.add_argument('input_vcf_file', metavar="VCF_FILE", type=str, help=input_vcf_file_help)
optional_group.add_argument("-h", "--help", action="help", help=help_help)
optional_group.add_argument("-v", "--version", action="version", version="%(prog)s: version 1.0", help=version_help)
args = arg_parser.parse_args()
abs_paths = ["input_vcf_file"]
create_dirs = []
must_exist_paths = ["input_vcf_file"]
check_arguments(args, arg_parser, abs_paths, create_dirs, must_exist_paths)
return args
def check_arguments(args, arg_parser, abs_paths, create_dirs, must_exist_paths):
    '''
    check_arguments:
    (1) change the paths into absolute paths
    (2) create diretories if needed
    (3) check if the paths exists in the file system
    '''
    for abs_path in abs_paths:
        obj = eval("args." + abs_path)
        if isinstance(obj, list):
        # list of files
        exec("args.{abs_path} = [os.path.abspath(x) for x in args.{abs_path}]".format(abs_path=abs_path))  in globals(), locals()
    else:
        # single file
        exec("args.{abs_path} = os.path.abspath(args.{abs_path})".format(abs_path=abs_path))  in globals(), locals()
for create_dir in create_dirs:
    dir_path = eval("args." + create_dir)
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        logging.debug("%s is created!\n" % dir_path)
for must_exist_path in must_exist_paths:
    obj = eval("args." + must_exist_path)
    if isinstance(obj, list):
        # list of files
        for path in obj:
            if not os.path.exists(path):
                arg_parser.print_help()
                logging.error("No file exist as \"%s\" \n" % path )
                sys.exit()                  
    else:
        # single file
        path = obj
        if not os.path.exists(path):
            arg_parser.print_help()
            logging.error("\"%s\" does not exist!!!\n" % path )
            sys.exit()
if __name__ == '__main__':
    main()
                    
                
                 
Hi, can somebody add comments for knowing "Novel SNPs" in a .vcf file ?. Many Thanks.
You should post this as a separate question seeing as it has nothing to do with counting SNPs, and also you posted this as an answer which it isn't. I would suggest you post a new question, that works a lot better because this post already has 3 answers so most people will probably ignore it
Alright, i agree with your comment but since my original question was almost similar as "Method to count total number of SNPs and Novel SNP in a VCF file" so i though that most of the peoples will give me link of this post only so i asked the second half of my question on this post. Anyway, i'll post the second half as a new question. Thanks.
That's not the same question, and even if the question was already posted it's always good to get a fresh answer.