Source code for grep_vcf.scripts.grep_vcf
#########################################################################
# grep_vcf - remove line fom vcf file where positions are not in #
# reference file #
# Authors: Bertrand Neron #
# Copyright (c) 2020 Institut Pasteur (Paris) and CNRS. #
# See the COPYRIGHT file for details #
# #
# This file is part of grep_vcf package. #
# #
# grep_vcf is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# grep_vcf is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details . #
# #
# You should have received a copy of the GNU General Public License #
# along with grep_vcf (LICENSE). #
# If not, see <https://www.gnu.org/licenses/>. #
#########################################################################
import sys
import os
import argparse
import grep_vcf
import grep_vcf.grep_vcf as gv
[docs]def get_version_message():
"""
:return: the version informations
:rtype: str
"""
version = grep_vcf.__version__
vers_msg = f"""grep_vcf {version}
Python {sys.version}
grep_vcf is distributed under the terms of the GNU General Public License (GPLv3).
See the LICENSE file for details.
"""
return vers_msg
[docs]def parse_args(args):
"""
:param args: The arguments provided on the command line
:type args: List of strings [without the program name]
:return: The arguments parsed
:rtype: :class:`aprgparse.Namespace` object.
"""
parser = argparse.ArgumentParser(
epilog="For more details, visit the grep vcf github page and see the grep vcf documentation.",
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
_____ __ _______ ______
/ ____| \ \ / / ____| ____|
| | __ _ __ ___ _ __ \ \ / / | | |__
| | |_ | '__/ _ \ '_ \ \ \/ /| | | __|
| |__| | | | __/ |_) | \ / | |____| |
\_____|_| \___| .__/ \/ \_____|_|
| |
|_|
grep_vcf is a tiny tool to filter vcf file based on position file and vice et versa.
""")
parser.add_argument("positions",
help="The text file with the positions looking for in vcf file. "
"It must be a tsv file (https://en.wikipedia.org/wiki/Tab-separated_values)."
"where position are in first column."
"Lines starting with '#' are considering as comments.")
parser.add_argument("--vcf",
help="The path to the vcf file. By default grep_vcf search for the same path as position file"
" but with '.vcf' as extension."
)
parser.add_argument("--out",
default=sys.stdout,
help="The path to an output file, default is stdout. "
"If the file exists, it will be replaced.")
parser.add_argument("--invert", "-v",
action='store_true',
default=False,
help="Invert the sense of matching, to select non-matching vcf lines.")
parser.add_argument("--switch",
action='store_true',
default=False,
help="Filter position file to keep lines that position match in vcf")
parser.add_argument("--version", "-V",
action='version',
version=get_version_message(),
help="Display version information and quit."
)
parsed_args = parser.parse_args(args)
if parsed_args.vcf is None:
parsed_args.vcf = os.path.splitext(parsed_args.positions)[0] + '.vcf'
for path in parsed_args.positions, parsed_args.vcf:
if not os.path.exists(path):
raise FileNotFoundError(f"The file {path} does not exists.")
return parsed_args
[docs]def main(args=None):
"""
:param args: the arguments to use to run
:param args: list of str
"""
args = sys.argv[1:] if args is None else args
parsed_args = parse_args(args)
positions_path = parsed_args.positions
vcf_path = parsed_args.vcf
if parsed_args.out is not sys.stdout:
out = open(parsed_args.out, 'w')
else:
out = sys.stdout
try:
with open(positions_path) as positions, open(vcf_path) as vcf:
ref, target = (positions, vcf) if not parsed_args.switch else (vcf, positions)
if parsed_args.invert:
gen = gv.invert_match_generator(ref, target)
else:
gen = gv.match_generator(ref, target)
for line in gen:
out.write(line)
finally:
if not out.closed and out.name != '<stdout>':
out.close()
if __name__ == "__main__":
main()