################################################################################
# Defines the following, which are to be called with automatic variables $@, $< 
# and $^ in various combinations in 'makefile'

# These commands are Make macros, expanded and then evaluated by the SHELL (/bin/bash)

# define make_snp_table
# define load_snp_table
# define load_indels
# define load_pileup_table_cmd
# define make_annotations_table
# define load_exonic_indels
# define load_coding_annotations
# define load_noncoding_annotations
# define load_splice_annotations
# define calc_tx_offset
# define get_snp_codons
# define get_intergenic_regions
# define load_masked_query_sql
# define get_masking_regions
# define filter_masked_regions
# define classify_intergenic_relations
# define load_intergenic_annotations
# define make_rel_position_table
# define get_containing_regions
# define combine_all
# define retrieve_grouped


empty := 
space := $(empty) $(empty)
comma := ,

define newline
$(empty)
$(empty)
endef

define quote_list
$(subst $(space),$(comma),$(patsubst %,'%',$(strip $(1))))
endef


define print_nonempty_file
$(shell if [ -s $(1) ]; then echo -en $(1); fi)
endef

# required defined variables
# BFA_FILE
# MAQGENE_GENOME_DIR

SPECIES := $(basename $(BFA_FILE))
FEATURE_TABLE := $(SPECIES)_features
SNP_CHANGES_TABLE := $(SPECIES)_snp_changes
GENE_DNA_TABLE := $(SPECIES)_coding_dna
CODON_TABLE := $(SPECIES)_genetic_code
DNA_BOUNDS_FILE := $(MAQGENE_GENOME_DIR)/$(SPECIES).dnas
PER_LOCUS_TABLE := $(SPECIES)_per_locus
GENE_NAMES_TABLE := $(SPECIES)_gene_names


################################################################################
# variant table
################################################################################
define make_snp_table
drop table if exists $(1);
create table $(1) (
  id int(10) NOT NULL auto_increment,
  dna char(40) NOT NULL,
  start int(10) NOT NULL,
  end int(10) NOT NULL,
  indel_size int(5) NOT NULL,
  variant_type enum('indel', 'point') NOT NULL,
  ref_base char NOT NULL,
  sample_base char NOT NULL,
  snp_score int(5) NOT NULL,
  read_depth int(5) NOT NULL,
  loci_multiplicity double(5,2) NOT NULL,
  mapping_quality int(5) NOT NULL,
  neighbor_quality int(5) NOT NULL,
  primary key (id),
  unique key (dna, start),
  key (indel_size)
)
endef


define load_snp_table
  load data local infile '/dev/stdin' into table $(1)
  (id, dna, @start, ref_base, sample_base, snp_score, read_depth,
   loci_multiplicity, mapping_quality, neighbor_quality)
   set start = @start - 1, end = @start, indel_size = 0,
   variant_type = 'point';

   flush table $(1);
endef


define load_indels
  load data local infile '/dev/stdin' into table $(1)
  (dna, @start, indel_size, @num_spanning_reads, 
   @num_left_reads, @num_right_reads, @junk)
  set id = NULL,
      start = @start - 1, 
      end = @start - 1 + if (indel_size > 0, 0, -indel_size),
      ref_base = 'X',
      sample_base = 'X',
      variant_type = 'indel',
      snp_score = -1000,
      read_depth = if (@num_left_reads < @num_right_reads, @num_left_reads, @num_right_reads),
      loci_multiplicity = -1000,
      mapping_quality = -1000,
      neighbor_quality = -1000;

  flush table $(1);
endef


define load_snp_placeholders
  load data local infile '/dev/stdin' ignore into table $(1)
  (dna, @start, ref_base, read_depth, @junk)
  set id = NULL,
      start = @start - 1, 
      end = @start - 1,
      variant_type = 'point',
      snp_score = -1000,
      loci_multiplicity = -1000,
      mapping_quality = -1000,
      neighbor_quality = -1000;

  flush table $(1);
endef


dna_enum := $(call quote_list,$(shell cut -f 2 $(DNA_BOUNDS_FILE)))

genomic_feature_list := $(call quote_list,$(shell $(MYSQL_CMD) --batch --skip-column-names -e \
	"select distinct feature from $(FEATURE_TABLE)"))

define make_pileup_table_cmd
drop table if exists $(1);
create table $(1) (
  dna enum($(dna_enum)) NOT NULL,
  start int(10) NOT NULL,
  ref_base char(1) NOT NULL,
  read_depth int(5) NOT NULL,
  sample_reads varchar($(PILEUP_MAX_DISPLAY_DEPTH)) NOT NULL,
  A_fwd int(5) NOT NULL,
  A_rev int(5) NOT NULL,
  C_fwd int(5) NOT NULL,
  C_rev int(5) NOT NULL,
  G_fwd int(5) NOT NULL,
  G_rev int(5) NOT NULL,
  T_fwd int(5) NOT NULL,
  T_rev int(5) NOT NULL,
  wt_fwd int(5) NOT NULL,
  wt_rev int(5) NOT NULL,
  primary key (dna, start)
);
endef

define load_pileup_table_cmd
load data local infile '/dev/stdin' into table $(1)
(dna, @start, ref_base, read_depth, sample_reads,
A_fwd, A_rev, C_fwd, C_rev, G_fwd, G_rev, T_fwd, T_rev, wt_fwd, wt_rev)
set start = @start - 1;
flush table $(1)
endef


define make_annotations_table
drop table if exists $(1);
create table $(1) (
  id int(10) NOT NULL,
  class varchar(100) NOT NULL,
  description varchar(100) NOT NULL,
  parent_feature varchar(100) NOT NULL,
  primary key (id, class, parent_feature)
);
endef

############################################################################
# Classify exonic indels as frameshift or inframe.  By convention, 
# indel_size > 0 means insertion.  Same start and end position on reference
# indel_size < 0 means deletion.  End - start = size of deleted piece of DNA
# For deletions, classifies based on the size of the piece of exon deleted
# For insertions, the size of the insertion is used in its entirety.
############################################################################

define load_exonic_indels
insert into $(1)
select
  snp.id,
  if (snp.indel_size < 0, 
    if (rel.overlap % 3 = 0, 'inframe', 'frameshift'),
    if (snp.indel_size % 3 = 0, 'inframe', 'frameshift')) class,
  if (rel.overlap != snp.end - snp.start, 'exon_boundary', 'none') description,
  feature.attribute parent_feature

from $(2) snp
join $(3) rel on (snp.id = rel.query_region_id)
join $(FEATURE_TABLE) feature on (rel.target_region_id = feature.id)
where feature.feature = 'exon'
and snp.variant_type = 'indel';
endef


########################################################################
# Load the coding annotations
# 4.  Classify the transition as: 
#       Silent (AA -> same AA), missense (AA -> Stop), 
#       nonsense (AA -> diff AA), readthrough (stop -> AA)
########################################################################

define load_coding_annotations
insert into $(1)
select 
  trans.id, 
  if (reference_codons.amino = sample_codons.amino, 'silent',
    if (reference_codons.amino3 = 'stop', 'readthrough',
      if (sample_codons.amino3 = 'stop', 'premature_stop', 
        if (trans.start = 'Y' and sample_codons.is_start != 'Y', 'non_start',
            'missense')
      )
    )
  ) class,
  concat(ref_codon,'->',sam_codon,'[',
         reference_codons.amino3,'->',sample_codons.amino3,']') description,
  trans.gene  

from $(2) trans
join $(CODON_TABLE) reference_codons on (trans.ref_codon = reference_codons.codon)
join $(CODON_TABLE) sample_codons on (trans.sam_codon = sample_codons.codon);
endef


#############################################################
# Load non-coding annotations
# All snps that overlap non-coding features
# three_prime_UTR, five_prime_UTR, ncRNA, known variants
# introns are considered non-genic and handled in the section
# 'intergenic' regions
#############################################################

define load_noncoding_annotations
insert into $(1)
select 
  snp.id, 
  feature.feature class, 
  'none' description,
  feature.attribute parent_feature

from $(2) snp
join $(3) rel on (snp.id = rel.query_region_id)
join $(FEATURE_TABLE) feature on (rel.target_region_id = feature.id)
where feature.feature not in ('exon', 'intron', 'mRNA');
endef


###############################################################
# Load splice site annotations
###############################################################

define load_splice_annotations
insert into $(1)
select 
  snp.id, 
  if (feature.strand = '+', 
      if (snp.start - feature.start < 2, 'splice_donor', 'splice_acceptor'),
      if (snp.start - feature.start < 2, 'splice_acceptor', 'splice_donor')) class,
  'none' description,
  feature.attribute parent_feature

from $(2) snp
join $(3) rel on (snp.id = rel.query_region_id)
join $(FEATURE_TABLE) feature on (rel.target_region_id = feature.id)
where feature.feature = 'intron'
and rel.distance > -2;
endef


define make_offsets_table
drop table if exists $(1);
create table $(1) (
id int(10) NOT NULL,
gene varchar(50) NOT NULL,
5prime_offset int(10) NOT NULL,
3prime_offset int(10) NOT NULL,
total_length int(10) NOT NULL,
boundary_type char(20) NOT NULL,
primary key (id, gene, boundary_type)
);
endef


# gives sum of exonic bases between gene boundaries and named feature bound
# Arguments:
# 1 output_table
# 2 region_table
# 3 rel_table
# 4 field in region_table (start or end)
# 5 id field in region_table (usually just 'id')
# output_table: id, gene, 5prime_offset, 3prime_offset, total_length, boundary_type

# assumes snp regions are associated to mRNA regions via 'rel'
define calc_tr_offsets
insert into $(1)

select reg.$(5), 
gene.attribute gene,
if(exon.strand = '+',
   if(reg.$(4) < min(exon.start), reg.$(4) - min(exon.start), 
   sum(if(reg.$(4) < exon.end, reg.$(4), exon.end) - 
   if(reg.$(4) < exon.start, reg.$(4), exon.start))),

   if(max(exon.end) < reg.$(4), max(exon.end) - reg.$(4), 
   sum(if(exon.end < reg.$(4), reg.$(4), exon.end) -
       if(exon.start < reg.$(4), reg.$(4), exon.start)))
  ),

if(exon.strand = '+',
   if(max(exon.end) < reg.$(4), max(exon.end) - reg.$(4), 
   sum(if(exon.end < reg.$(4), reg.$(4), exon.end) -
       if(exon.start < reg.$(4), reg.$(4), exon.start))),

   if(reg.$(4) < min(exon.start), reg.$(4) - min(exon.start), 
   sum(if(reg.$(4) < exon.end, reg.$(4), exon.end) - 
   if(reg.$(4) < exon.start, reg.$(4), exon.start)))),

sum(exon.end - exon.start) total_length,

'$(4)' boundary_type

from $(2) reg 
join $(3) rel on (reg.$(5) = rel.query_region_id) 
join $(FEATURE_TABLE) gene on (rel.target_region_id = gene.id) 
join $(FEATURE_TABLE) exon on (exon.attribute = gene.attribute) 
where exon.feature = 'exon' 
and gene.feature = 'mRNA' 
group by reg.$(5), gene.attribute

endef


#define get_next_id
#100
#endef


define get_next_id
$(shell $(MYSQL_CMD) --skip-column-names --batch -e "select max($(1)) + 1 from $(2)")
endef


define load_uncovered_table

drop table if exists $(1);
create table $(1) (
id int(10) NOT NULL auto_increment,
dna char(40) NOT NULL,
start int(10) NOT NULL,
end int(10) NOT NULL,
primary key (id)
)
auto_increment = $(2);

load data local infile '/dev/stdin' into table $(1)
  (dna, start, end)
endef


# produces an annotation string for uncovered regions of protein-coding genes
# uses intermediate result from 'calc_tx_offsets'
# here 'startbound' is the table of offsets of the left (start) bound
define load_uncovered_gene_annotations
insert into $(1)
select
  b.id,
  'uncovered',
  if(b.5prime_offset = e.5prime_offset,
     concat('non-exonic, ',unc.start - gene.start,' to ',unc.end - gene.start,' bp into'),
     concat(
            if(b.5prime_offset < 0, 
               concat(-b.5prime_offset, ' bp upstream'),
               if(b.3prime_offset < 0,
                  concat(-b.3prime_offset, ' bp downstream'),
                  concat('codon ',floor(b.5prime_offset / 3) + 1)
               )
            ),
            ' to ', 
            if(e.3prime_offset < 0,
               concat(-e.3prime_offset, ' bp downstream'),
               if(e.5prime_offset < 0,
                  concat(-e.5prime_offset, ' bp upstream'),
                  concat('codon ',ceiling(e.5prime_offset / 3) + 1)
               )
            ),
            ' (',
            format(100 * (if(e.5prime_offset < 0,0,e.5prime_offset) - 
                         (if(b.5prime_offset < 0,0,b.5prime_offset))
                         ) / b.total_length,0
            ),
            ' % of ',
            floor(b.total_length/3),
            ' codons)'
            )
  )
  description,
  b.gene

from $(2) unc
join $(3) b using (id)
join $(3) e using (id, gene)
join $(FEATURE_TABLE) gene on (e.gene = gene.attribute)

where b.boundary_type = 'start'
and   e.boundary_type = 'end'
and gene.strand = '+'
and gene.feature = 'mRNA'

union

select
  b.id,
  'uncovered',
  if(b.5prime_offset = e.5prime_offset,
     concat('non-exonic, ',gene.end - unc.end,' to ',gene.end - unc.start,' bp into'),
     concat(
            if(e.5prime_offset < 0, 
               concat(-e.5prime_offset, ' bp upstream'),
               if(e.3prime_offset < 0,
                  concat(-e.3prime_offset, ' bp downstream'),
                  concat('codon ',floor(e.5prime_offset / 3) + 1)
               )
            ),
            ' to ', 
            if(b.3prime_offset < 0,
               concat(-b.3prime_offset, ' bp downstream'),
               if(b.5prime_offset < 0,
                  concat(-b.5prime_offset, ' bp upstream'),
                  concat('codon ',ceiling(b.5prime_offset / 3) + 1)
               )
            ),
            ' (',
            format(100 * (if(b.5prime_offset < 0,0,b.5prime_offset)
                       - (if(e.5prime_offset < 0,0,e.5prime_offset))
                         ) / b.total_length,0
            ),
            ' % of ',
            floor(b.total_length/3),
            ' codons)'
            )
  )
  description,
  b.gene

from $(2) unc
join $(3) b using (id)
join $(3) e using (id, gene)
join $(FEATURE_TABLE) gene on (e.gene = gene.attribute)

where b.boundary_type = 'start'
and   e.boundary_type = 'end'
and gene.strand = '-'
and gene.feature = 'mRNA'

endef


# load variant annotations into 

define get_snp_codons
drop table if exists $(1);
create table $(1) (
id int(10) NOT NULL,
gene varchar(50) NOT NULL,
ref_codon char(4) NOT NULL,
sam_codon char(4) NOT NULL,
start char(1) NOT NULL,
primary key (id, gene)
);

insert into $(1)
select 
  snp.id,
  off.gene,
  @ref_codon := 
     substr(gene_dna.sequence, 
            (floor(if(feature.strand = '+',
                      off.5prime_offset,
                      off.5prime_offset - 1
                   ) / 3
             ) * 3
            ) + 1, 3
           ) ref_codon,
  insert(@ref_codon, 
         (if(feature.strand = '+',
             off.5prime_offset,
             off.5prime_offset - 1) % 3) + 1,
         1,
         if(feature.strand = '+', 
            snp.sample_base, 
            substr('ACGTTGCA', instr('ACGTTGCA', snp.sample_base)+4, 1)
           )
        ) sam_codon,
  if((feature.strand = '+' && off.5prime_offset < 3) ||
     (feature.strand = '-' && off.5prime_offset - 1 < 3), 'Y', 'N') start

from $(2) off
join $(3) snp on (off.id = snp.id)
join $(GENE_DNA_TABLE) gene_dna using (gene)
join $(FEATURE_TABLE) feature on 
  (gene_dna.gene = feature.attribute and
   feature.start <= snp.start and
   feature.end >= snp.end)

where snp.variant_type = 'point'
and feature.feature = 'exon'
and off.boundary_type = 'start'
and off.5prime_offset >= 0
and off.3prime_offset >= 0;

endef


###############################################################
# Intergenic regions
###############################################################

# Retrieve all variants in the proximity of an mRNA.  'proximity' is
# defined as having an interval distance in the range [0, max] with
# some mRNA.  NOTE: in general there is a many-to-many relationship
# between variants and mRNAs.  Each association is treated
# independently.


# find all regions that are within a certain distance range of the span
# of mRNA.
intergenic_target_sql := select id, '$(SPECIES)',dna,start,end,strand \
from $(FEATURE_TABLE) feature where feature.feature = 'mRNA'


define get_intergenic_regions
$(MYSQL_CMD) -e "drop table if exists $(1); create table $(1) like $(2)";
$(MAQGENE_EXE_DIR)/associate_regions -b $(max_gene_radius) -c -1000000 -m $(max_intervening_genes)
-o >($(MYSQL_CMD) -e "load data local infile '/dev/stdin' into table $(1); flush table $(1)")
-d $(DNA_BOUNDS_FILE) -p $(MAQGENE_GENOME_DIR)
-q <($(MYSQL_BATCH) -e "select id, '$(SPECIES)', dna, start, end, '+' from $(3)")
-t <($(MYSQL_BATCH) -e "$(intergenic_target_sql)")
1>/dev/null;
endef


# Get masked query ids, load them into a table
define load_masked_query_sql
drop table if exists $(1);
create table $(1)
(query_region_id int(10) NOT NULL, primary key (query_region_id));
load data local infile '/dev/stdin' ignore into table $(1) 
(@j1, query_region_id, @j3, @j4, @j5, @j6, @j7)
endef


# Find all such SNPs that are associated with a more important feature, such as
# a SNP, exon, ncRNA, or other thing.  (This is basically anything that is not
# mRNA or intron.  These will be filtered out from the 'intergenic associations
# as found by 'intergenic_target_sql'
masked_target_sql := select id,'$(SPECIES)',dna,start,end,strand from \
$(FEATURE_TABLE) feature where feature.feature not in ('mRNA', 'intron')


define get_masking_regions
$(MAQGENE_EXE_DIR)/associate_regions -b -1 -c -1000000
-m $(max_intervening_genes)
-o >($(MYSQL_BATCH) -e "$(call load_masked_query_sql,$(1))")
-d $(DNA_BOUNDS_FILE) 
-p $(MAQGENE_GENOME_DIR)
-q <($(MYSQL_BATCH) -e "select id, '$(SPECIES)', dna, start, end, '+' from $(2)")
-t <($(MYSQL_BATCH) -e "$(masked_target_sql)")
1>/dev/null;
endef


# Filter masked query ids from intergenic relative position table
intergenic_rel_position_table := $(cns_cksum)_rel_intergenic

define filter_masked_regions
delete i.* from $(1) i join $(2) m using (query_region_id)
endef


# Classify intergenic relations
intergenic_associations_table := $(cns_cksum)_intergenic


define classify_intergenic_relations
  drop table if exists $(1);
  create table $(1) (
    id INT(10) NOT NULL,
    target_region_id INT(10) NOT NULL,
    relation enum ('upstream', 'downstream', 'into') NOT NULL,
    distance INT(10) NOT NULL,
    num_regions_between INT(3) NOT NULL,
    attribute varchar(50) NOT NULL,
    primary key (id, target_region_id),
    key (id, attribute)
  );
  insert into $(1)
  select 
    snp.id,
    rel.target_region_id,
    if (rel.overlap > 0,
      'into',
      if (feature.strand = '+',
        if (snp.end < feature.start, 'upstream', 'downstream'), 
        if (feature.end < snp.start, 'upstream', 'downstream')
      )
    ) relation,
	rel.distance,
    rel.num_regions_between,
    feature.attribute

  from $(2) snp
  join $(3) rel on (snp.id = rel.query_region_id)
  join $(FEATURE_TABLE) feature on (rel.target_region_id = feature.id)
endef


define conventionalize_genic_distance
	update $(1)
	set distance = -distance
	where relation in ('upstream','into')
endef


# Load intergenic annotations
define load_intergenic_annotations
  insert into $(1)
  select 
    id,
    'nongenic' class, 
    concat(if (relation in ('upstream','into'), - distance, distance),' ',relation) description,
    attribute
  from $(2)
  where ((relation = 'upstream' and num_regions_between = 0 and distance < $(umdd)) or
         (relation = 'upstream' and num_regions_between < $(max_intervening_genes) and distance < $(umid)) or
         (relation = 'downstream' and num_regions_between = 0 and distance < $(dmdd)) or
         (relation = 'downstream' and num_regions_between < $(max_intervening_genes) and distance < $(dmid)) or
          relation = 'into'
        );
endef


define make_rel_position_table
drop table if exists $(1);
CREATE TABLE $(1) (
association_id INT(10) NOT NULL,
query_region_id INT(10) unsigned NOT NULL,
target_region_id INT(10) NOT NULL,
distance INT(10) NOT NULL,
overlap INT(10) NOT NULL,
same_strand enum('SAME', 'OPP') NOT NULL,
num_regions_between INT(3) NOT NULL,
PRIMARY KEY (association_id),
KEY (query_region_id),
KEY (target_region_id)
)
endef

###################################################################
# produces a 'rel' table associating all query regions that overlap features
###################################################################
define get_containing_regions
$(MAQGENE_EXE_DIR)/associate_regions -b -1 -c -1000000 -m 0 
-o >($(MYSQL_CMD) -e "load data local infile '/dev/stdin' into table $(1); flush table $(1)")
-d $(DNA_BOUNDS_FILE) -p $(MAQGENE_GENOME_DIR)
-q <($(MYSQL_BATCH) -e "select id, '$(SPECIES)', dna, start, end, '+' from $(2)")
-t <($(MYSQL_BATCH) -e "select id, '$(SPECIES)', dna,start,end,strand from $(FEATURE_TABLE) \
     where feature in ($(3))")
1>/dev/null
endef


define get_locus_extra_columns
select column_name
from INFORMATION_SCHEMA.columns 
where table_name = '$(PER_LOCUS_TABLE)' 
and column_name not in ('dna','start')
endef


# has primary key (id, class, parent_feature) i.e. there should be at
# most one relationship between a given id for a class, parent_feature
# combination
define combine_all
drop table if exists $(2);
create table $(2)
(primary key (id, class, description),
 key (dna, start)) as

select 
snp.id,
'$(outfile_basename)' mutant_strain,
snp.dna, 
snp.start + 1 start,
snp.end - snp.start length,
$(1)
snp.ref_base reference_base, 
snp.sample_base sample_base, 
snp.snp_score consensus_score,
snp.loci_multiplicity,
snp.mapping_quality,
snp.neighbor_quality,
(pile.wt_fwd + pile.wt_rev) number_wildtype_reads,
(snp.read_depth - (pile.wt_fwd + pile.wt_rev)) number_variant_reads,
snp.read_depth sequencing_depth,
pile.sample_reads,
snp.variant_type,
snp.indel_size,
class.class,
class.description,
group_concat(class.parent_feature order by class.parent_feature) parent_features

from $(3) snp
join $(4) pile using (dna,start)
join $(5) class on (snp.id = class.id)
left join $(PER_LOCUS_TABLE) using (dna,start)
where 
(snp.variant_type = 'point' and 
  (snp.snp_score >= $(min_snp_score)
  and snp.loci_multiplicity <= $(max_loci_multiplicity)
  and snp.neighbor_quality >= $(min_neighborhood_quality)
  and pile.read_depth >= $(min_total_depth)
  and ((pile.read_depth - pile.wt_fwd - pile.wt_rev) / pile.read_depth) 
      >= $(min_fraction_polymorphic_reads))
  or snp.variant_type = 'indel'
)

group by id, class, description

union

select 
unc.id id,
'$(outfile_basename)' mutant_strain,
unc.dna, 
unc.start + 1 start,
unc.end - unc.start length,
$(1)
'-' reference_base, 
'-' sample_base, 
0 consensus_score,
0,
0,
0,
0 number_wildtype_reads,
0 number_variant_reads,
0 sequencing_depth,
'' sample_reads,
'uncovered',
0,
class.class,
class.description,
group_concat(class.parent_feature order by class.parent_feature) parent_features

from $(6) unc
join $(5) class on (unc.id = class.id)
left join $(PER_LOCUS_TABLE) using (dna,start)

group by id, class, description;

alter table $(2) order by dna, start, length
endef

define ungrouped_column_list
select concat(column_name,',') 
from INFORMATION_SCHEMA.columns 
where table_name = '$(1)' 
and column_name not in ('class','description','parent_features')
endef


define retrieve_grouped
select
$(shell $(MYSQL_BATCH) -e "$(call ungrouped_column_list,$(1))")
group_concat(class) classes,
group_concat(description) descriptions,
group_concat(concat('{',parent_features,'}')) parent_features

from $(1)
group by id
order by dna, start;
endef


define has_snp_changes_table
select count(*) from INFORMATION_SCHEMA.tables 
where table_schema = '$(MYSQL_DB)' 
and TABLE_NAME = '$(SPECIES)_snp_changes'
endef


define known_snp_mixin_ratio_table
select 
  f.id, f.dna, f.start + 1 start, 
  e.snp_name, e.snp_base, 
  if(e.snp_base = 'a', p.A_fwd + p.A_rev, 
     if (e.snp_base = 'c', p.C_fwd + p.C_rev, 
         if (e.snp_base = 'g', p.G_fwd + p.G_rev, 
             if (e.snp_base = 't', p.T_fwd + p.T_rev, -1)))) snp_reads, 
  p.ref_base, 
  p.wt_fwd + p.wt_rev wt_reads,
  p.read_depth,
  p.sample_reads 
from $(FEATURE_TABLE) f 
join $(SNP_CHANGES_TABLE) e on (f.attribute = e.snp_name) 
join $(1) p using (dna,start)
endef


define get_known_snp_coords
select distinct dna, start + 1
from $(FEATURE_TABLE)
where feature = 'SNP'
order by dna, start
endef