#!/usr/bin/env perl # # Author: petr.danecek@sanger # use strict; use warnings; use Carp; my $opts = parse_params(); sort_vcf($opts); exit; #-------------------------------- sub error { my (@msg) = @_; if ( scalar @msg ) { croak @msg; } die "Usage: vcf-sort > out.vcf\n", " cat file.vcf | vcf-sort > out.vcf\n", "Options:\n", " -c, --chromosomal-order Use natural ordering (1,2,10,MT,X) rather then the default (1,10,2,MT,X). This requires\n", " new version of the unix \"sort\" command which supports the --version-sort option.\n", " -t, --temporary-directory Use a directory other than /tmp as the temporary directory for sorting.\n", " -h, -?, --help This help message.\n", "\n"; } sub parse_params { my $opts = {}; while (my $arg=shift(@ARGV)) { if ( $arg eq '-c' || $arg eq '--chromosomal-order' ) { $$opts{chromosomal_order}=1; next; } if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); } if ( $arg eq '-t' || $arg eq '--temporary-directory' ) { $$opts{temp_dir}=shift(@ARGV); next; } if ( -e $arg ) { $$opts{file}=$arg; next } error("Unknown parameter \"$arg\". Run -h for help.\n"); } return $opts; } sub sort_vcf { my ($opts) = @_; my $fh; if ( exists($$opts{file}) ) { if ( $$opts{file}=~/\.gz$/i ) { open($fh,"gunzip -c $$opts{file} |") or error("$$opts{file}: $!"); } else { open($fh,'<',$$opts{file}) or error("$$opts{file}: $!"); } } else { $fh = *STDIN; } my $sort_opts = check_sort_options($opts); my $cmd; if ( exists($$opts{temp_dir}) ) { $cmd = "sort $sort_opts -T $$opts{temp_dir} -k2,2n"; } else { $cmd = "sort $sort_opts -k2,2n"; } open(my $sort_fh,"| $cmd") or error("$cmd: $!"); my $unflushed = select(STDOUT); $| = 1; while (my $line=<$fh>) { if ( $line=~/^#/ ) { print $line; next; } print $sort_fh $line; last; } select($unflushed); while (my $line=<$fh>) { print $sort_fh $line; } } sub check_sort_options { my ($opts) = @_; if ( !$$opts{chromosomal_order} ) { return '-k1,1d' } my @has_version_sort = `sort --help | grep -- --version-sort`; if ( scalar @has_version_sort ) { return '-k1,1V'; } error("Old version of sort command installed, please run without the -c option.\n"); return '-k1,1d'; }