# for emacs: -*- mode: sh; -*- # tracks for Ed's Neandertal paper # This starts from empty hg18 on a private server (genome-nt). ######################################################################### # CREATE NEANDERTAL TRACK GROUP (DONE 3/3/10 angie) hgsql hg18 < ~/kent/src/hg/lib/grp.sql hgsql hg18 -e \ "insert into grp values ('neandertal', 'Neandertal Assembly and Analysis', 1.5);" # Edit hg.conf to have this grp setting: db.grp=ntdb:grp # where ntdb is a profile (ntdb.user= etc) defined earlier in hg.conf. ######################################################################### # ASSEMBLED BAM TRACK TABLES (DONE 3/3/10 angie) # Copied Ed's *-hg18.bam into local gbdb/hg18 mkdir -p /data/NT/gbdb/hg18/neandertal/seqAlis cp -p /hive/users/angie/testBams/*-hg18.bam /data/NT/gbdb/hg18/neandertal/seqAlis/ foreach t (Feld1 Mez1 Sid1253) hgsql hg18 -e "drop table if exists bam$t; create table bam$t (fileName varchar(255) not null);" hgsql hg18 -e "insert into bam$t values ('/data/NT/gbdb/hg18/neandertal/seqAlis/$t-hg18.bam');" end foreach i (16 25 26) hgsql hg18 -e "drop table if exists bamVi33dot$1; create table bamVi33dot$1 (fileName varchar(255) not null);" hgsql hg18 -e "insert into bamVi33dot$1 values ('/data/NT/gbdb/hg18/neandertal/seqAlis/Vi33.$i-hg18.bam');" end hgsql hg18 -e "drop table if exists bamAll; create table bamAll (fileName varchar(255) not null);" hgsql hg18 -e "insert into bamAll values ('/data/NT/gbdb/hg18/neandertal/seqAlis/all-hg18.bam');" ######################################################################### # RAW READ ALIGNMENTS BAM TRACK TABLES (DONE 3/8/10 angie) mkdir /data/NT/gbdb/hg18/neandertal/seqLibs cp /hive/users/angie/testBams/by_library/SL*.hg18.bam* /data/NT/gbdb/hg18/neandertal/seqLibs chmod a+r /data/NT/gbdb/hg18/neandertal/seqLibs/* cd /data/NT/gbdb/hg18/neandertal/seqLibs foreach sl (37 39 61 49 15 16 17 18 19 20 21 22 28 29 33 42 46 47 32 6 7 8 10 11 12 \ 13 14 24 26 27 30 31 9) samtools index SL$sl.hg18.bam hgsql hg18 -e "create table bamSL$sl (fileName varchar(255) not null);" hgsql hg18 -e "insert into bamSL$sl values ('/data/NT/gbdb/hg18/neandertal/seqLibs/SL$sl.hg18.bam');" end #*** NOTE: We ended up discarding this track -- too low-level to be of interest. #*** Removed table & files 4/12/10. ######################################################################### # HUMAN READ ALIGNMENTS BAM TRACK TABLES (DONE 3/8/10 angie) # Copied Ed's MMS*.hg18.bam into local gbdb/hg18 hgsql hg18 -e "drop table bamMMS4" hgsql hg18 -e "drop table bamMMS5" hgsql hg18 -e "drop table bamMMS6" hgsql hg18 -e "drop table bamMMS7" hgsql hg18 -e "drop table bamMMS8" hgsql hg18 -e "create table bamMMS4 (fileName varchar(255) not null);" hgsql hg18 -e "insert into bamMMS4 values ('/data/NT/gbdb/hg18/neandertal/humans/MMS4_HGDP00778_Han.paired.qualfilt.rmdup.entropy1.0.sort.bam');" hgsql hg18 -e "create table bamMMS5 (fileName varchar(255) not null);" hgsql hg18 -e "insert into bamMMS5 values ('/data/NT/gbdb/hg18/neandertal/humans/MMS5_HGDP00542_Papuan.paired.qualfilt.rmdup.entropy1.0.sort.bam');" hgsql hg18 -e "create table bamMMS6 (fileName varchar(255) not null);" hgsql hg18 -e "insert into bamMMS6 values ('/data/NT/gbdb/hg18/neandertal/humans/MMS6_HGDP00927_Yoruba.paired.qualfilt.rmdup.entropy1.0.sort.bam');" hgsql hg18 -e "create table bamMMS7 (fileName varchar(255) not null);" hgsql hg18 -e "insert into bamMMS7 values ('/data/NT/gbdb/hg18/neandertal/humans/MMS7_HGDP01029_San.paired.qualfilt.rmdup.entropy1.0.sort.bam');" hgsql hg18 -e "create table bamMMS8 (fileName varchar(255) not null);" hgsql hg18 -e "insert into bamMMS8 values ('/data/NT/gbdb/hg18/neandertal/humans/MMS8_HGDP00521_French.paired.qualfilt.rmdup.entropy1.0.sort.bam');" ######################################################################### # PER-INDIVIDUAL READS (DONE 3/9/10 angie) mkdir /hive/users/angie/testBams/rawReads cd /hive/users/angie/testBams/rawReads # Headers of these two files are identical, so no pre-processing: samtools merge SLMez1.hg18.bam ../by_library/SL{39,61}.hg18.bam # Right number of alignments in output? samtools view ../by_library/SL39.hg18.bam | wc -l #625768 samtools view ../by_library/SL61.hg18.bam | wc -l #640684 expr 625768 + 640684 #1266452 samtools view SLMez1.hg18.bam |wc -l #1266452 # Do the same for other individs according to mapping in Ed's Lib2Bone.txt: samtools merge SLVi33.16.hg18.bam \ ../by_library/SL{15,16,17,18,19,20,21,22,28,29,33,42,46,47}.hg18.bam samtools merge SLVi33.25.hg18.bam ../by_library/SL{6,7,8,32}.hg18.bam samtools merge SLVi33.26.hg18.bam \ ../by_library/SL{9,10,11,12,13,14,24,26,27,30,31}.hg18.bam foreach f (*.bam) samtools index $f end # Make database tables on genome-nt for files copied into local /gbdb: foreach i (Feld1 Mez1 Sid1253) hgsql hg18 -e "create table bamSL$i (fileName varchar(255) not null);" hgsql hg18 -e "insert into bamSL$i values ('/data/NT/gbdb/hg18/neandertal/seqAlis/SL$i.hg18.bam');" end foreach i (16 25 26) hgsql hg18 -e "create table bamSLVi33dot$i (fileName varchar(255) not null);" hgsql hg18 -e "insert into bamSLVi33dot$i values ('/data/NT/gbdb/hg18/neandertal/seqAlis/SLVi33.$i.hg18.bam');" end ######################################################################### # NEANDERTAL ALLELES FOR CODING DIFFS BETW CHIMP AND HUMAN (DONE 3/23/10 angie) # Copied file emailed from Hernan Burbano 3/15/10 to # /hive/users/angie/testBams/hernan/neandertal_table_for_UCSC.tsv.gz cd /hive/users/angie/testBams/hernan/ zcat neandertal_table_for_UCSC.tsv.gz | head -1 #Refseq ID chr position strand human_nucleotide chimpanzee_nucleotide coverage human_codon chimpanzee_codon number_ancestral_reads number_derived_reads number_third_state_reads neandertal_state # Make a bed 6 with names that encode the SNP and read info, and view as CT: zcat neandertal_table_for_UCSC.tsv.gz \ | perl -wpe '$_="", next if /^#/; chomp; @w=split("\t"); \ $w[12] =~ s/^(\w).*/$1/; \ $other = $w[11] > 0 ? "+$w[11]?" : ""; \ $name = "$w[12]:$w[9]$w[5]>$w[10]$w[4]$other($w[8]>$w[7])"; \ $_ = join("\t", $w[1], $w[2], $w[2]+1, $name, $w[6]*100, $w[3]) . "\n"' \ | sort -k1,1 -k2n,2n \ > ntSnps.bed # Ed asked for coloring to indicate Ancestral vs. Derived: zcat neandertal_table_for_UCSC.tsv.gz \ | perl -wpe '$_="", next if /^#/; chomp; @w=split("\t"); \ $other = $w[11] > 0 ? "+$w[11]?" : ""; \ $name = "$w[9]$w[5]>$w[10]$w[4]$other($w[8]>$w[7])"; \ $color = ($w[12] =~/^A/) ? "0,0,200" : ($w[12] =~ /^D/) ? "0,180,0" : "200,0,0"; \ $_ = join("\t", $w[1], $w[2], $w[2]+1, $name, $w[6]*100, $w[3], \ $w[2], $w[2]+1, $color) . "\n"' \ | sort -k1,1 -k2n,2n \ > ntHumChimpCodingDiff.bed hgLoadBed -tab -maxChromNameLength=13 hg18 ntHumChimpCodingDiff ntHumChimpCodingDiff.bed #Loaded 11345 elements of size 9 ######################################################################### # SELECTIVE SWEEP SCAN SCORES (DONE 3/24/10 angie) # Ed copied hg18.sss.CpGmask.vbf.zscore.tar.gz to /hive/users/angie/testBams/ . # Ed's description of the data: # The table has a row for every position that is observed to be # polymorphic in the modern human HGDP data, for which 3 alleles are # present. The columns have these data: # 1. chromosome # 2. position # 3. Allele string for human data (A=ancestral; D=derived; 0=no data) # 4. probability that a Neandertal allele would be derived, given the # configuration of ancestral/derived alleles in column 3 # 5. Observed number of neandertal derived alleles at this site # 6. Observed number of neandertal ancestral alleles at this site # 7. Total number of expected neandertal derived alleles in the 100kb # window around this site # 8. Observed number of neandertal derived alleles in the 100 kb window # around this site # 9. The total number of polymorphic sites in this 100 kb window # 10. The variance of the number of expected number of derived alleles # in this 100 kb window # 11. The z-score of the difference between the observed and the # expected. That is (column 8 - column 7) / column 10 # # For the browser track, I think only columns 1, 2, 10, and 11 are # necessary. Columns 1 and 2 give the position and the value to be # displayed is column 11 +/- column 10. cd /hive/users/angie/testBams tar xvzf hg18.sss.CpGmask.vbf.zscore.tar.gz cd /hive/users/angie/testBams/SSS # Since we don't have a way to superimpose 2 wiggles (to show value +- # some other value), I will make 4 subtracks to run by Ed: # variance (col 10), z-score (col 11), z-score + variance, z-score - variance awk '{print $1 "\t" ($2-1) "\t" $2 "\t" $10;}' chr*.txt \ > ntSssVariance.bed awk '{print $1 "\t" ($2-1) "\t" $2 "\t" $11;}' chr*.txt \ > ntSssZScore.bed awk '{print $1 "\t" ($2-1) "\t" $2 "\t" ($11+$10);}' chr*.txt \ > ntSssZScorePlusVar.bed awk '{print $1 "\t" ($2-1) "\t" $2 "\t" ($11-$10);}' chr*.txt \ > ntSssZScoreMinusVar.bed foreach f (nt*.bed) hgLoadBed -bedGraph=4 -maxChromNameLength=13 hg18 $f:r $f end #Loaded 5617139 elements of size 4 # Those are pretty large for bedBraph - consider bigWig. # Actually, Ed didn't like those as much as the following one, so ignore them. # Make a bigWig with two scores per snp: z-score + variance at the snp position, # and z-score - variance at the next base position. # By Ed's request, drop the chrY data. #There's more than one value for chr1 base 4881364 (in coordinates that start with 1). cp /dev/null ntSssZScorePMVar.varStep foreach f (chr*.txt) set chr = $f:r:r:r:r:r if ($chr == "chrY") then echo skipping $chr continue endif echo "variableStep chrom=$chr" >> ntSssZScorePMVar.varStep awk 'BEGIN{prev=-1;} \ {if ($2 != prev) { print $2 "\t" ($11+$10); } \ print ($2+1) "\t" ($11-$10); prev = $2+1;}' $f \ >> ntSssZScorePMVar.varStep end wigToBigWig ntSssZScorePMVar.varStep /hive/data/genomes/hg18/chrom.sizes ntSssZScorePMVar.bw # Copying instead of linking because testBams is not readable cp -p ntSssZScorePMVar.bw /data/NT/gbdb/hg18/neandertal/bbi/ hgsql hg18 -e 'drop table if exists ntSssZScorePMVar; \ create table ntSssZScorePMVar (fileName varchar(255) not null); \ insert into ntSssZScorePMVar values \ ("/data/NT/gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw");' ######################################################################### # SELECTIVE SWEEP SCAN TOP 5% REGIONS (DONE 3/24/10 angie - re-shaded 5/5) # Ed copied z-p5.CpGmask.vbf.txt to /hive/users/angie/testBams/ . # Ed's description of the data: # This file contains the regions that are in the 5% tail of our score # distribution, sorted by genetic width. The columns of this # space-delimited table are: # 1. chromosome # 2. start of region # 3. end of region # 4. length of region # 5. number of polymorphic sites in region # 6. The recombination rate in the region from Myers et al (cM/Mb) # 7. The genetic width of the region (cM) # 8. Total number of observed neandertal derived alleles in the region # 9. Total number of observed neandertal ancestral alleles in the region # 10. Expected number of neandertal derived alleles in the region # 11. The z-score of the region # # For these, the only thing that should be displayed is the region and # the z-score. # Max (absolute) score is -8.7, min (abs) is -4.3. Make scores for shading, and make # this bed not bedGraph so we can click on items to see length etc. awk '{print $1 "\t" ($2-1) "\t" $3 "\t" $11 "\t" int(400 + 600*(-$11-4.3)/4.4);}' z-p5.CpGmask.vbf.txt \ | sort -k1,1 -k2n,2n \ > ntSssTop5p.bed hgLoadBed hg18 ntSssTop5p ntSssTop5p.bed #Loaded 212 elements of size 4 ######################################################################### # MITOCHONDRION ALIGNMENT (DONE 4/19/10 angie) # Download Ed's previously published Neandertal mitochondrial genome from # http://www.ncbi.nlm.nih.gov/nuccore/196123578 # NOTE FOR RELEASE: move this from /data/NT/mito to /hive/data/genomes/hg18/bed/ntMito and # reload tables. mkdir /data/NT/mito cd /data/NT/mito wget --timestamping -O ntM.fa \ 'http://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?tool=portal&db=nuccore&val=196123578&dopt=fasta&sendto=on&log$=seqview' # Edit fasta header line to remove everything except "NC_011137.1" blat -noHead /hive/data/genomes/hg19/M/chrM.fa ntM.fa ntM.psl cat ntM.psl #16361 194 0 0 2 10 4 16 + NC_011137.1 16565 0 16565 chrM 16571 0 16571 5 309,204,15660,76,306, 0,310,514,16174,16259, 0,312,520,16181,16265, hgLoadPsl hg18 -table=ntMito ntM.psl mkdir /data/NT/gbdb/hg18/neandertal/ntMito ln -s `pwd`/ntM.fa /data/NT/gbdb/hg18/neandertal/ntMito/ hgLoadSeq hg18 /data/NT/gbdb/hg18/neandertal/ntMito/ntM.fa ######################################################################### # SELECTIVE SWEEP SCAN SNPs (DONE 5/4/10 angie) # Ed copied hg18.sss.CpGmask.vbf.zscore.tar.gz to /hive/users/angie/testBams/ . # Ed's description of the data: # The table has a row for every position that is observed to be # polymorphic in the modern human HGDP data, for which 3 alleles are # present. The columns have these data: # 1. chromosome # 2. position # 3. Allele string for human data (A=ancestral; D=derived; 0=no data) # The order of the individuals is hg18, San, Yoruban, Han, Papuan, French. # 4. probability that a Neandertal allele would be derived, given the # configuration of ancestral/derived alleles in column 3 # 5. Observed number of neandertal derived alleles at this site # 6. Observed number of neandertal ancestral alleles at this site # 7. Total number of expected neandertal derived alleles in the 100kb # window around this site # 8. Observed number of neandertal derived alleles in the 100 kb window # around this site # 9. The total number of polymorphic sites in this 100 kb window # 10. The variance of the number of expected number of derived alleles # in this 100 kb window # 11. The z-score of the difference between the observed and the # expected. That is (column 8 - column 7) / column 10 # For the SNPs, Ed suggested coloring ones red where most modern humans have # the derived allele but Neandertals have only ancestral. cd /hive/users/angie/testBams/SSS perl -wpe 'chomp; @w=split; $w[2] =~ s/0/_/g; \ $tmp = $w[2]; $dCount = ($tmp =~ s/D//g); \ $rgb = ($dCount >= 4 && $w[4] == 0) ? "220,0,0" : "0,0,0"; \ $_ = join("\t", $w[0], $w[1]-1, $w[1], $w[2].":$w[4]D,$w[5]A", 0, "+", \ $w[1]-1, $w[1], $rgb) . "\n";' \ chr*.txt \ > ntSssSnps.bed hgLoadBed hg18 ntSssSnps ntSssSnps.bed # Drop data on chrY: hgsql hg18 -e 'delete from ntSssSnps where chrom = "chrY"' ######################################################################### # OUT-OF-AFRICA HAPLOTYPES (DONE 5/4/10 angie) mkdir /data/NT/ntOoaHaplo cd /data/NT/ntOoaHaplo # Edited Table 5 of Ed's paper into a bed 4 + file, ntOoaHaplo.bed . # Made ntOoaHaplo.as . autoSql ntOoaHaplo.as ntOoaHaplo # Edited ntOoaHaplo.sql to add bin column, allow NULL for extra fields. hgLoadBed hg18 ntOoaHaplo ntOoaHaplo.bed \ -sqlTable=ntOoaHaplo.sql -tab # Some warnings about missing values in row 7, no problem. ######################################################################### # MIGRATE TRACKS TO HGWDEV (DONE 5/6/10 angie) #ASAP: # Make chmod-700 subdirs of hg18/bed and panTro2/bed umask 77 mkdir /hive/data/genomes/hg18/bed/{ntHumChimpCodingDiff,ntSss,ntOoaHaplo,ntSeqContigs,ntSeqReads,ntModernHumans,ntMito} mkdir /hive/data/genomes/panTro2/bed/{ntSeqContigs,ntSeqReads} umask 2 # Hard-link data files from testBams to subDirs, make /gbdb/ links to the subdirs ln /hive/users/angie/testBams/rawReads/* /hive/data/genomes/hg18/bed/ntSeqReads ln /hive/users/angie/testBams/rawReadsChimp/* /hive/data/genomes/panTro2/bed/ntSeqReads chmod 444 /hive/data/genomes/{hg18,panTro2}/bed/ntSeqReads/* foreach db (hg18 panTro2) set dbAbbrev = `echo $db | sed -e 's/panTro2/pt2/'` mkdir -p /gbdb/$db/neandertal/seqAlis/ ln -s /hive/data/genomes/$db/bed/ntSeqReads/*-$dbAbbrev.bam /gbdb/$db/neandertal/seqAlis/ rsync -av /hive/users/angie/testBams/*-$dbAbbrev.bam /hive/data/genomes/$db/bed/ntSeqContigs/ chmod 664 /hive/data/genomes/$db/bed/ntSeqContigs/* ln -s /hive/data/genomes/$db/bed/ntSeqContigs/*-$dbAbbrev.bam /gbdb/$db/neandertal/seqAlis/ end # Got sysadmins to make MMS* world-readable. ln /hive/users/angie/testBams/MMS*.bam* /hive/data/genomes/hg18/bed/ntModernHumans/ mkdir /gbdb/hg18/neandertal/humans ln -s /hive/data/genomes/hg18/bed/ntModernHumans/MMS*.bam* /gbdb/hg18/neandertal/humans ln /hive/users/angie/testBams/hernan/* /hive/data/genomes/hg18/bed/ntHumChimpCodingDiff/ ln /hive/users/angie/testBams/hg18.sss.CpGmask.vbf.zscore.tar.gz /hive/data/genomes/hg18/bed/ntSss/ mkdir /hive/data/genomes/hg18/bed/ntSss/SSS ln /hive/users/angie/testBams/SSS/* /hive/data/genomes/hg18/bed/ntSss/SSS/ mkdir /gbdb/hg18/neandertal/bbi/ ln -s /hive/data/genomes/hg18/bed/ntSss/SSS/ntSssZScorePMVar.bw /gbdb/hg18/neandertal/bbi/ ln /hive/users/angie/testBams/{z-p5.CpGmask.vbf.txt,ntSssTop5p.bed} /hive/data/genomes/hg18/bed/ntSss/ # on genome-nt: rsync -av /data/NT/mito/* /hive/data/genomes/hg18/bed/ntMito/ # back on hgwdev: mkdir /gbdb/hg18/neandertal/ntMito ln -s /hive/data/genomes/hg18/bed/ntMito/ntM.fa /gbdb/hg18/neandertal/ntMito/ # on genome-nt: rsync -av /data/NT/ntOoaHaplo/* /hive/data/genomes/hg18/bed/ntOoaHaplo/ # Load bed tables with fake names: cd /hive/data/genomes/hg18/bed/ntHumChimpCodingDiff hgLoadBed -tab hg18 hernan ntHumChimpCodingDiff.bed #Loaded 11345 elements of size 9 cd /hive/data/genomes/hg18/bed/ntSss hgLoadBed hg18 ashTop5 ntSssTop5p.bed #Loaded 212 elements of size 5 cd /hive/data/genomes/hg18/bed/ntSss/SSS hgLoadBed hg18 ashSsss ntSssSnps.bed #Loaded 5617139 elements of size 9 hgsql hg18 -e 'delete from ashSsss where chrom = "chrY"' # Write up overview of downloadable /gbdb, table dumps # 10:30 or later: # copy over .html files to ~/kent/src/... # copy over trackDb.ra entries # copy /data/NT/ntOoaHaplo/ntOoaHaplo.{as,sql} to kent/src/hg/lib # ? copy overview page to ~/browser/ ? # migrate pushQ to hgwbeta: ssh genome-nt hgsqldump qapushq neandertal \ | sed -re 's/genome-nt/hgwdev/g; s@/data/NT/gbdb@/gbdb@g; s/\),\(/),\n(/g' \ > /hive/users/angie/testBams/pushQ.sql # Read carefully to check for any additional necessary edits. ssh hgwbeta hgsql qapushq < /hive/users/angie/testBams/pushQ.sql # 10:50: # if twiddling thumbs :), take it live on hgwdev-angie # load database tables foreach db (hg18 panTro2) set dbAbbrev = `echo $db | sed -e 's/panTro2/pt2/'` foreach t (Feld1 Mez1 Sid1253) hgsql $db -e "drop table if exists bam$t; create table bam$t (fileName varchar(255) not null);" hgsql $db -e "insert into bam$t values ('/gbdb/$db/neandertal/seqAlis/$t-$dbAbbrev.bam');" end foreach i (16 25 26) hgsql $db -e "drop table if exists bamVi33dot$i; create table bamVi33dot$i (fileName varchar(255) not null);" hgsql $db -e "insert into bamVi33dot$i values ('/gbdb/$db/neandertal/seqAlis/Vi33.$i-$dbAbbrev.bam');" end hgsql $db -e "drop table if exists bamAll; create table bamAll (fileName varchar(255) not null);" hgsql $db -e "insert into bamAll values ('/gbdb/$db/neandertal/seqAlis/all-$dbAbbrev.bam');" end foreach db (hg18 panTro2) set dbAbbrev = `echo $db | sed -e 's/panTro2/pt2/'` foreach i (Feld1 Mez1 Sid1253) hgsql $db -e "drop table if exists bamSL$i; create table bamSL$i (fileName varchar(255) not null);" hgsql $db -e "insert into bamSL$i values ('/gbdb/$db/neandertal/seqAlis/SL$i.$dbAbbrev.bam');" end foreach i (16 25 26) echo hgsql $db -e "drop table if exists bamSLVi33dot$i; create table bamSLVi33dot$i (fileName varchar(255) not null);" echo hgsql $db -e "insert into bamSLVi33dot$i values ('/gbdb/$db/neandertal/seqAlis/SLVi33.$i.$dbAbbrev.bam');" end end foreach n (4 5 6 7 8) hgsql hg18 -e "create table bamMMS$n (fileName varchar(255) not null);" end hgsql hg18 -e "insert into bamMMS4 values ('/gbdb/hg18/neandertal/humans/MMS4_HGDP00778_Han.paired.qualfilt.rmdup.entropy1.0.sort.bam');" hgsql hg18 -e "insert into bamMMS5 values ('/gbdb/hg18/neandertal/humans/MMS5_HGDP00542_Papuan.paired.qualfilt.rmdup.entropy1.0.sort.bam');" hgsql hg18 -e "insert into bamMMS6 values ('/gbdb/hg18/neandertal/humans/MMS6_HGDP00927_Yoruba.paired.qualfilt.rmdup.entropy1.0.sort.bam');" hgsql hg18 -e "insert into bamMMS7 values ('/gbdb/hg18/neandertal/humans/MMS7_HGDP01029_San.paired.qualfilt.rmdup.entropy1.0.sort.bam');" hgsql hg18 -e "insert into bamMMS8 values ('/gbdb/hg18/neandertal/humans/MMS8_HGDP00521_French.paired.qualfilt.rmdup.entropy1.0.sort.bam');" hgsql hg18 -e 'drop table if exists ntSssZScorePMVar; \ create table ntSssZScorePMVar (fileName varchar(255) not null); \ insert into ntSssZScorePMVar values \ ("/gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw");' cd /hive/data/genomes/hg18/bed/ntMito hgLoadPsl hg18 -table=ntMito ntM.psl hgLoadSeq hg18 /gbdb/hg18/neandertal/ntMito/ntM.fa cd /hive/data/genomes/hg18/bed/ntOoaHaplo hgLoadBed hg18 ntOoaHaplo ntOoaHaplo.bed \ -sqlTable=ntOoaHaplo.sql -tab #Loaded 13 elements of size 15 # 11am: take it live! ssh hgwdev # add neandertal group to hg18.grp and panTro2.grp, between compGeno and varRep hgsql hg18 -e "INSERT INTO grp VALUES ('neandertal', 'Neandertal Assembly and Analysis', 6.5, 0);" hgsql panTro2 -e "INSERT INTO grp VALUES ('neandertal', 'Neandertal Assembly and Analysis', 6.5);" # chmod 775 the new bed/ subdirs chmod 775 /hive/data/genomes/hg18/bed/{ntHumChimpCodingDiff,ntSss,ntOoaHaplo,ntSeqContigs,ntSeqReads,ntModernHumans,ntMito} chmod 775 /hive/data/genomes/panTro2/bed/{ntSeqContigs,ntSeqReads} # rename fake-named database tables to real names hgsql hg18 -e 'rename table hernan to ntHumChimpCodingDiff' hgsql hg18 -e 'rename table ashTop5 to ntSssTop5p' hgsql hg18 -e 'rename table ashSsss to ntSssSnps' # check in trackDb, html, ntOoaHaplo.{as,sql} # make alpha in trackDb on hgwdev # run buildTableDescriptions.pl on hgwdev cd ~/kentclean/src cvsup ~/kent/src/test/buildTableDescriptions.pl -db hg18 ~/kent/src/test/buildTableDescriptions.pl -db panTro2 # check in overview page in browser CVS, make + make alpha on hgwdev cd ~/browser cvsup cvs add Neandertal cvs add Neandertal/index.html # other files... images? cvs ci -m "Migrating from genome-nt" Neandertal make alpha # add tracks to tablesIgnored in all.joiner # add this file to CVS and add note about track histories in this file to hg18.txt (& panTro2) # When tracks are on hgwbeta: # run buildTableDescriptions.pl on hgwbeta ssh hgwbeta cd ~/kentfix && cvsup ssh qateam@hgwbeta crontab -l | grep buildTableD # run that job. ######################################################################### # Lifting to hg19 (WORKING - 2010-12-15 - Hiram) #### H-C Coding Diffs mkdir -p /hive/data/genomes/hg19/bed/homNea/codingDiff cd /hive/data/genomes/hg19/bed/homNea/codingDiff hgsql -N -e "select * from ntHumChimpCodingDiff;" hg18 \ | cut -f2- > ntHumChimpCodingDiff.hg18.tab liftOver -bedPlus=8 -tab ntHumChimpCodingDiff.hg18.tab \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz \ ntHumChimpCodingDiff.hg19.tab unMapped.tab # -rw-rw-r-- 1 0 Dec 15 10:49 unMapped.tab hgLoadBed -tab hg19 ntHumChimpCodingDiff ntHumChimpCodingDiff.hg19.tab #### 5% Lowest S - Selective Sweep Scan (S): 5% Smallest S scores mkdir /hive/data/genomes/hg19/bed/homNea/SssTop5p cd /hive/data/genomes/hg19/bed/homNea/SssTop5p hgsql -N -e "select * from ntSssTop5p;" hg18 \ | cut -f2- > ntSssTop5p.hg18.tab liftOver ntSssTop5p.hg18.tab \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz \ ntSssTop5p.hg19.tab unMapped.tab # -rw-rw-r-- 1 0 Dec 15 10:49 unMapped.tab hgLoadBed -tab hg19 ntSssTop5p ntSssTop5p.hg19.tab #### Sel Swp Scan (S) - Selective Sweep Scan (S) on Neandertal vs. Human Polymorphisms (Z-Score +- Variance) mkdir /hive/data/genomes/hg19/bed/homNea/SssZScorePMVar cd /hive/data/genomes/hg19/bed/homNea/SssZScorePMVar bigWigToBedGraph /gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw \ ntSssZScorePMVa.hg18.bedGraph liftOver -bedPlus=3 -tab ntSssZScorePMVa.hg18.bedGraph \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz \ ntSssZScorePMVa.hg19.bedGraph unMapped.tab grep "^chr" unMapped.tab | wc # 1981 7924 62659 grep -v "^chr" unMapped.tab | sort | uniq -c # 1981 #Deleted in new grep "^chr" unMapped.tab | cut -f1 | sort | uniq -c # 2 chr10 # 38 chr12 # 5 chr13 # 15 chr14 # 22 chr15 # 532 chr17 # 16 chr19 # 25 chr2 # 2 chr22 # 48 chr3 # 85 chr4 # 23 chr5 # 24 chr6 # 814 chr7 # 198 chr8 # 132 chrX sort -k1,1 -k2,2n ntSssZScorePMVa.hg19.bedGraph > sorted.bedGraph bedGraphToBigWig sorted.bedGraph \ /hive/data/genomes/hg19/chrom.sizes ntSssZScorePMVar.bw rm -f sorted.bedGraph bigWigInfo ntSssZScorePMVar.bw # version: 4 # isCompressed: yes # isSwapped: 0 # primaryDataSize: 71,065,376 # primaryIndexSize: 357,392 # zoomLevels: 10 # chromCount: 23 # basesCovered: 11,101,027 # mean: -0.023223 # min: -8.833200 # max: 33.637199 # std: 4.037342 bigWigInfo /gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw # version: 3 # isCompressed: yes # isSwapped: 0 # primaryDataSize: 53,391,942 # primaryIndexSize: 357,400 # zoomLevels: 7 # chromCount: 23 # basesCovered: 11,103,008 # mean: -0.023066 # min: -8.833200 # max: 33.637199 # std: 4.037344 ls -ogL /gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw *.bw # -rw-rw-r-- 1 56244661 Mar 24 2010 /gbdb/hg18/neandertal/bbi/ntSssZScorePMVar.bw # -rw-rw-r-- 1 107739421 Dec 15 10:11 ntSssZScorePMVar.bw mkdir /gbdb/hg19/neandertal/bbi ln -s `pwd`/ntSssZScorePMVar.bw /gbdb/hg19/neandertal/bbi hgBbiDbLink database trackName fileName hgBbiDbLink hg19 ntSssZScorePMVar \ /gbdb/hg19/neandertal/bbi/ntSssZScorePMVar.bw #### S SNPs - SNPS Used for Selective Sweep Scan (S) mkdir /hive/data/genomes/hg19/bed/homNea/SssSnps cd /hive/data/genomes/hg19/bed/homNea/SssSnps hgsql -N -e "select * from ntSssSnps;" hg18 | cut -f2- > ntSssSnps.hg18.bed liftOver -bedPlus=8 -tab ntSssSnps.hg18.bed \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz \ ntSssSnps.hg19.bed unMapped.tab grep "^chr" unMapped.tab | wc # 1197 10773 74797 grep -v "^chr" unMapped.tab | sort | uniq -c # 1197 #Deleted in new hgLoadBed hg19 ntSssSnps ntSssSnps.hg19.bed # Loaded 5614241 elements of size 9 #### Cand. Gene Flow - Candidate Regions for Gene Flow from Neandertal to #### Non-African Modern Humans mkdir /hive/data/genomes/hg19/bed/homNea/OoaHaplo cd /hive/data/genomes/hg19/bed/homNea/OoaHaplo hgsql -N -e "select * from ntOoaHaplo;" hg18 | cut -f2- \ > ntOoaHaplo.hg18.bed liftOver -bedPlus=8 -tab ntOoaHaplo.hg18.bed \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg18/liftOver/hg18ToHg19.over.chain.gz \ ntOoaHaplo.hg19.bed unMapped.tab # -rw-rw-r-- 1 0 Dec 15 10:49 unMapped.tab hgLoadBed hg19 ntOoaHaplo ntOoaHaplo.hg19.bed \ -sqlTable=$HOME/kent/src/hg/lib/ntOoaHaplo.sql -tab #### Neandertal Mito - Neandertal Mitochondrial Sequence (Vi33.16, 2008) mkdir /hive/data/genomes/hg19/bed/homNea/Mito cd /hive/data/genomes/hg19/bed/homNea/Mito hgsql -N -e "select * from ntMito;" hg18 | cut -f2- \ > ntMito.hg18.psl # it is the same chrM in hg19, no need to lift hgLoadPsl hg19 -table=ntMito ntMito.hg18.psl mkdir /gbdb/hg19/neandertal/ntMito ln -s /hive/data/genomes/hg18/bed/ntMito/ntM.fa \ /gbdb/hg19/neandertal/ntMito/ hgLoadSeq hg19 /gbdb/hg19/neandertal/ntMito/ntM.fa