#!/usr/bin/env python
import csv
import os
import pysam
import re
import fnmatch
import array

#Read a table from a csv file - if boolean variable snps_only = TRUE, then read only snps
#-if int variable bppos is >0 , then only rows with well defined positions are read, positions column is given
#by the number in int_only
class csvtable:
    def __init__(self, directory, first_col, last_col, snps_only, bppos, header):
        self.data = []
        with open(directory, 'rU') as f:
            reader = csv.reader(f)
            if (snps_only == True) & (bppos>0):
                if (header == True):
                    head = reader.next()
                    h = [head[bppos], head[first_col:last_col]]
                    self.data.append(h)
                    for row in reader:
                        pos = float(row[bppos])
                        snps = row[first_col:last_col]
                        if (pos % int(pos) == 0) & (allsnps(snps) == True):
                            snps = row[first_col:last_col]
                            snps = [int(pos), snps]
                            self.data.append(snps)
            else:
                for row in reader:
                    snps = row[first_col:last_col]
                    self.data.append(snps)
            f.close()

#Check that all values in a string list l are SNPs
def allsnps(l):
    flag = True
    lengths = [len(element) for element in l]
    if sum(lengths)> len(l) : flag = False
    return flag
        

#Read a column from a csv file as a vector
class csvvector:
    def __init__(self, directory, column):
        self.data = []
        with open(directory, 'rU') as f:
            reader = csv.reader(f)
            for row in reader:
                self.data.append(row[column])
            f.close()
            
#Read a fasta file
#class read_fasta_file(infile):
#    sequences = []
#    with open (infile, 'rU') as f:
#        sequence = []
#        line = f.readline().rstrip()
#        title = line[1:len(line)]
#        while line != "":
#            line = f.readline().rstrip()
#            if not line.startswith(">"):
#                sequence.append(line)
#            else:
#                sequences.append((title, "".join(sequence)))
#                title = line[1:len(line)]
#                sequence = ""
#        f.close()
#    fastaseq = dict(sequences)
#    return(fastaseq)

#get fragments from a fasta file
def get_frag(fastaseq, ref, start, end):
    return("".join[">", fastaseq[ref][start:end]])

        
def verifybreaks(bamdir, breaks, bp, variants):
    final_breaks = []
    print(bamdir)
    for file in os.listdir(bamdir):
        print(file)
        if fnmatch.fnmatch(file, '*.bam'):
            samfile = pysam.Samfile(bamdir+"/"+file, "rb")
            line = samfile.header['RG'][0]['SM']
	    print(line)	
            curr_line_breaks = [[nn, mline, chrom, sp, ep, bef, aft, int(yes), int(no)] for [nn, mline, chrom, (sp, ep, bef, aft, yes, no)] in breaks if mline == line]
            for breakpoint in curr_line_breaks:
                iter = samfile.fetch(current_chromosome, breakpoint[3], breakpoint[3]+1)
                for x in iter:
                    flag = 0
                    if (x.pos + len(x.seq) >= breakpoint[4]):
			print "Position is ", x.pos+1, "and read sequence is ", x.seq
                        read_start, read_end, qual = x.pos+1, x.pos+len(x.seq), x.qual
                        snps = [snp for snp in bp if ((snp >= read_start) & (snp <= read_end))]
                        contr = 0
                        for snp in snps:
                            snp_in_read = x.seq[snp-read_start]
                            snp_index = bp.index(snp)
                            if (snp < breakpoint[3]):
                                snp_in_variants = variants[bp.index(snp)][founders[breakpoint[5]]][0] 
                                if ((snp_in_variants in matches[snp_in_read])==False):
                                    flag = flag+1
                                    contr = contr +1
				    print "Snp is ", snp
				    print "Read: ", snp_in_read, " Variants: ", snp_in_variants		
                            elif (snp > breakpoint[4]):
                                snp_in_variants = variants[bp.index(snp)][founders[breakpoint[6]]][0] 
                                if ((snp_in_variants in matches[snp_in_read])==False):
                                    flag = flag+1
                                    contr = contr +1
				    print "Snp is ", snp
				    print "Read: ", snp_in_read, " Variants: ", snp_in_variants		
                        if (flag == 0):
                            breakpoint[7] = breakpoint[7]+1
                        else:
                            breakpoint[8] = breakpoint[8]+1
                        print "Breakpoint :", breakpoint
                    else: a = 0 #print ("No coverage by a single read")
                final_breaks.append(breakpoint)
            samfile.close()
    return final_breaks
    
b = csvtable('/data/martha/Recombination/data/breakpoints/breaks50.csv', 0, 9, False, 0, True)