#!/usr/bin/env perl # File: epcrToHgPsl # Author: Terry Furey # Date: 7/17/2002 # Description: Convert an output file from ePCR to a psl file # DO NOT EDIT the /cluster/bin/scripts copy of this file -- # edit ~/kent/src/hg/stsMarkers/epcrToPsl instead. # $Id: epcrToPsl,v 1.5 2009/05/06 21:20:33 hiram Exp $ use warnings; use strict; # USAGE message if ($#ARGV < 2) { print STDERR "USAGE: epcrToHgPsl [-mouse -rat] \n"; print STDERR " is something like /hive/data/genomes/db/\n"; exit(1); } my $rat = 0; my $mouse = 0; my $inf = ""; while ($#ARGV > 4) { my $arg = shift(@ARGV); if ($arg eq "-rat") { $rat = 1; } elsif ($arg eq "-mouse") { $mouse = 1; } } my $file = shift(@ARGV); open(FILE, "<$file") || die "Can not open epcr file '$file'"; my $epcrCount = `cat $file | wc -l`; chomp $epcrCount; my $primers = shift(@ARGV); open(PRIMERS, "<$primers") || die "Can not open primers file '$primers'"; my $dir = shift(@ARGV); my $db = `basename $dir`; chomp $db; my $twoBit = "$dir/$db.2bit"; my %chrSizes; # Get chrom chrom sizes print STDERR "Reading chrom sizes $dir/chrom.sizes\n"; open(CHR, "$dir/chrom.sizes") || die "Can not open '$dir/chrom.sizes'"; while (my $line = ) { chomp($line); my ($chr, $size) = split('\s+', $line); $chrSizes{$chr} = $size; } my %primerLeft; my %primerRight; my %primerLeftSize; my %primerRightSize; my %rightStart; my %size; # Read in the primers file print STDERR "Reading primer info from $primers\n"; while (my $line = ) { chomp($line); my ($primerid, $left, $right, $distance, $id) = split(' ',$line); $primerLeft{$primerid} = substr($left,0,7); $primerRight{$primerid} = substr($right,0,7); my @left = split(//,$left); $primerLeftSize{$primerid}=$#left + 1; my @right = split(//,$right); $primerRightSize{$primerid} = $#right + 1; if ($distance eq "-") { $size{$primerid} = 500; } elsif ($distance =~ /-/) { my ($start,$end) = split("-",$distance); $size{$primerid} = int(($end - $start)/2) + $start; } else { $size{$primerid} = $distance; } $rightStart{$primerid} = $size{$primerid} - $primerRightSize{$primerid}; } close(PRIMERS); my $done = 0; # Read in epcr file and output psl file print STDERR "Creating psl file ${file}.psl and ${file}.nomatch\n"; open(OUT, ">${file}.psl"); open(ERR, ">${file}.nomatch"); my %ratName; while (my $line = ) { chomp($line); ++$done; printf STDERR "%d of %d complete == %% %.2f\n", $done, $epcrCount, 100.0*$done/$epcrCount if (0 == $done%500); my ($chr, $place, $dbsts, $ucsc) = split(' ',$line); if (($rat) || ($mouse)) { $ratName{$ucsc} = $dbsts; $dbsts = $ucsc; } my ($start, $end) = split(/\.\./,$place); $start--; my $match = $primerLeftSize{$dbsts} + $primerRightSize{$dbsts}; my $tsize = $end - $start; my $tgap = 1; my $tgapsize = $tsize - $match; my $qgap = 1; my $qgapsize = $size{$dbsts} - $match; my $leftstart = $start; my $rightStart = $end - $primerRightSize{$dbsts}; # Determine strand my $strand = 'x'; my $to = $start + 7; open(FA, "/cluster/bin/x86_64/twoBitToFa $twoBit:$chr:$start-$to stdout|") || die "Can not read from $twoBit:$chr:$start-$to"; my $find = ; $find = ; close(FA); chomp($find); $find =~ tr/a-z/A-Z/; if ($find eq $primerLeft{$dbsts}) { $strand = "+"; } elsif ($find eq $primerRight{$dbsts}) { $strand = "-"; } else { my @chars = split(//,$find); my @left = split(//,$primerLeft{$dbsts}); my @right = split(//,$primerRight{$dbsts}); my $r = 0; my $l = 0; for (my $i = 0; $i < 7; $i++) { if ($chars[$i] eq $left[$i]) { $l++; } if ($chars[$i] eq $right[$i]) { $r++; } } if ($l > 5) { $strand = "+"; } elsif ($r > 5) { $strand = "-"; } else { $strand = ""; print ERR "Could not determine strand for $line - $find ($primerLeft{$dbsts}-$l,$primerRight{$dbsts}-$r)\n"; } } # Print out record if ($strand) { print OUT "$match\t0\t0\t0\t$qgap\t$qgapsize\t$tgap\t$tgapsize\t$strand\t"; if (($rat) || ($mouse)) { print OUT "$ratName{$dbsts}\t$size{$dbsts}\t0\t$size{$dbsts}\t$chr\t0\t$start\t$end\t"; } else { print OUT "dbSTS_$dbsts\t$size{$dbsts}\t0\t$size{$dbsts}\t$chr\t$chrSizes{$chr}\t$start\t$end\t"; } print OUT "2\t$primerLeftSize{$dbsts},$primerRightSize{$dbsts},\t0,$rightStart{$dbsts},\t"; print OUT "$leftstart,$rightStart,\n"; } } close(FILE); close(OUT);