#!/usr/bin/perl # This script was originally written by Yontao Lu and was copied from # /cluster/store4/ratMarker/code/ into the kent/src tree in order # to get it under CVS control; added warnings. # $Id: createStsInfoRat,v 1.3 2006/03/09 02:00:28 angie Exp $ use warnings; ##Des: Take a look how many RH marker has primers info if(@ARGV != 6) { print STDERR "createStsInfoRat: Unists raw alias rhmap fhhmap shrspmap\n"; exit(1); } ##0-id, 1-name, 2-rgdid,3-rgdname, 4-uistsid, 5-#ofalias, 6-alias, 7-primer1, 8-primer2, 9-distance, 10-isSeq, 11-organism, 12-FFHXACI, 13-chr, 14-posi, 15-SHRSPXBN, 16-chr, 17-posi, 18-RH, 19-chr, 20-posi, 21-RHLOD, 22-Gene_name, 23-Gene_Id, 24-cloSeq $uni = shift; $raw = shift; $ali = shift; $rh = shift; $fhh = shift; $sh = shift; open(RH, "<$rh") || die "Can't open rh map $rh: $!"; open(ALI, "<$ali") || die "can't open alias $ali: $!"; open(UNI, "<$uni") || die "Can't open unists $uni: $!"; open(RAW, "<$raw") || die "Can't open rawinfo $raw: $!"; open(FHH, "<$fhh") || die "Can't open fhh map $fhh: $!"; open(SH, "<$sh") || die "Can't open shrsh map $sh: $!"; $ucscId = 1; while(my $line = ) { chomp($line); my (@eles) = split(/\t/, $line); $eles[4] = uc($eles[4]); $eles[6] = uc($eles[6]); if($eles[1] =~ /\w/o) { $sts{$eles[0]} = $ucscId; $sts{$eles[4]} = $ucscId; $sts{$eles[6]} = $ucscId; } $table[$ucscId][0] = $ucscId; if (! $eles[4] || $eles[4] eq '-') { warn "WARNING: Missing name for UniSTS ID $eles[0] (UCSC ID $ucscId) will cause trouble downstream.\n"; } $table[$ucscId][1] = $eles[4]; $table[$ucscId][2] = 0; $table[$ucscId][3] = ""; $table[$ucscId][4] = $eles[0]; $table[$ucscId][5] = 0; $table[$ucscId][6] = ""; $table[$ucscId][7] = $eles[1]; $table[$ucscId][8] = $eles[2]; $table[$ucscId][9] = $eles[3] || "0"; $table[$ucscId][10] = 0; $table[$ucscId][11] = "Rattus norvegicus"; $table[$ucscId][12] = $table[$ucscId][13] = ""; $table[$ucscId][15] = $table[$ucscId][16] = ""; $table[$ucscId][18] = $table[$ucscId][19] = ""; $table[$ucscId][14] = $table[$ucscId][17] = $table[$ucscId][20] = 0.0; $table[$ucscId][21] = 0.0; $table[$ucscId][22] = $table[$ucscId][23] = $table[$ucscId][24] = ""; $ucscId++; } close(UNI); $empty=""; delete $sts{$empty}; while(my $line = ) { chomp($line); my($unid, $names) = split(/\t/, $line); my (@name) = split(/\;/, $names); if($sts{$unid}) { my $indx = $sts{$unid}; $table[$indx][5] = @name; $table[$indx][6] = $names; foreach my $key (@name) { $key = uc($key); $sts{$key} = $indx; } } else{ my $indx = 0; foreach my $key (@name) { $key = uc($key); if($sts{$key} ) { $indx = $sts{$key}; last; } } if ($indx) { $table[$indx][5] = @name; $table[$indx][6] = $names; foreach my $key (@name) { $key = uc($key); $sts{$key} = $indx; } } } } close(ALI); $found = 0; $new = 0; #$line = ; while(my $line = ) { chomp($line); my (@eles) = split(/\t/, $line); next if ($eles[0] eq 'RGD_ID' && $eles[1] eq 'SYMBOL_NAME'); my $rgdId = "RGD:$eles[0]"; $eles[1] = uc($eles[1]); if($sts{$eles[1]}) { my $indx = $sts{$eles[1]}; $table[$indx][2] = $eles[0] || 0; $table[$indx][3] = $eles[1]; if($eles[2] && $eles[2] =~ /[ATCG]/o) { $table[$indx][24] = $eles[2]; $table[$indx][10] = 1; } $found++; } elsif($sts{$rgdId}) { my $indx = $sts{$rgdId}; $table[$indx][2] = $eles[0] || 0; $table[$indx][3] = $eles[1]; if($eles[2] =~ /[ATCG]/o) { $table[$indx][24] = $eles[2]; $table[$indx][10] = 1 ; } $found++; } elsif($eles[3] =~ /\w/o || $eles[2] =~ /[ATCG]/o) { $table[$ucscId][0] = $ucscId; if (! $eles[1] || $eles[1] eq '-') { warn "WARNING: Missing name for RGD ID $eles[0] (UCSC ID $ucscId) will cause trouble downstream.\n"; } $table[$ucscId][1] = $eles[1]; $table[$ucscId][2] = $eles[0] || 0; $table[$ucscId][3] = $eles[1]; $table[$ucscId][4] = $table[$ucscId][5] = 0; $table[$ucscId][6] = ""; $table[$ucscId][7] = $eles[3]; $table[$ucscId][8] = $eles[4]; $table[$ucscId][9] = $eles[5] || "0"; $table[$ucscId][11] = "Rattus norvegicus"; $table[$ucscId][24] = ""; if($eles[2] =~ /[ATCG]/o) { $table[$ucscId][10] = 1; $table[$ucscId][24] = $eles[2]; } else { $table[$ucscId][10] = 0; } $table[$ucscId][12] = $table[$ucscId][13] = ""; $table[$ucscId][15] = $table[$ucscId][16] = ""; $table[$ucscId][18] = $table[$ucscId][19] = ""; $table[$ucscId][14] = $table[$ucscId][17] = $table[$ucscId][20] = 0.0; $table[$ucscId][21] = 0.0; $table[$ucscId][22] = $table[$ucscId][23] = ""; $sts{$eles[1]} = $ucscId; $ucscId++; $new++; } } close(RAW); delete $sts{$empty}; print STDERR "found: $found\tnew: $new\n"; open(ALI, "<$ali") || die "can't open alias $!"; while(my $line = ) { chomp($line); my($unid, $names) = split(/\t/, $line); my (@name) = split(/\;/, $names); if($sts{$unid}) { my $indx = $sts{$unid}; $table[$indx][5] = @name; $table[$indx][6] = $names; foreach my $key (@name) { $key = uc($key); $sts{$key} = $indx; } } else{ my $indx = 0; foreach my $key (@name) { $key = uc($key); if($sts{$key} ) { $indx = $sts{$key}; last; } } if ($indx) { $table[$indx][5] = @name; $table[$indx][6] = $names; foreach my $key (@name) { $key = uc($key); $sts{$key} = $indx; } } } } close(ALI); while(my $line = ) { next if($line =~ /^\#/); chomp($line); my($unid, $name,@rest) = split(/\t/, $line); $name = uc($name); my $indx = 0; if($sts{$unid}) { $indx = $sts{$unid}; } elsif($sts{$name}) { $indx =$sts{$name}; } next if ($indx == 0); $table[$indx][18] = "RH_map.2.2"; $table[$indx][19] = $rest[0]; $table[$indx][20] = $rest[1]; $table[$indx][21] = $rest[2] if ($rest[2] !~ "F"); } close(RH); while(my $line = ) { next if($line =~ /^\#/); chomp($line); my($unid, $name,@rest) = split(/\t/, $line); $name = uc($name); my $indx = 0; if($sts{$unid}) { $indx = $sts{$unid}; } elsif($sts{$name}) { $indx =$sts{$name}; } next if ($indx == 0); $table[$indx][12] = "FHH_x_ACI"; $table[$indx][13] = $rest[0]; $table[$indx][14] = $rest[1]; } close(FHH); while(my $line = ) { next if($line =~ /^\#/); chomp($line); my($unid, $name,@rest) = split(/\t/, $line); $name = uc($name); my $indx = 0; if($sts{$unid}) { $indx = $sts{$unid}; } elsif($sts{$name}) { $indx =$sts{$name}; } next if ($indx == 0); $table[$indx][15] = "SHRSP_x_BN"; $table[$indx][16] = $rest[0]; $table[$indx][17] = $rest[1]; } close(SH); for(my $i = 1; $i < $ucscId; $i++) { die "ucscId $i has too few elements " . scalar(@{$table[$i]}) if (scalar(@{$table[$i]} < 25)); for (my $j=0; $j < 25; $j++) { die "ucscId $i el $j is undef" if (! defined $table[$i]->[$j]); } my $newline = join("\t", @{$table[$i]}); print "$newline\n"; }