#! /usr/bin/env python
from sys import stdin, stdout, stderr, exit, argv
from getopt import getopt
#from hcluster import single, to_tree
from disjointset import DisjointSet
from defaultdict import Dict
from math import log, sin, cos, pi, exp, floor
from copy import deepcopy
def log10(x):
  return log(x, 10)

opts, args = getopt(argv[1:], "?Hab:c:d:e:hi:l:mo:r:suvw:")
outnames = [] # Names of output files
verbose = False # Output progress information
csort = False # Sort accessions so accessions with more shared haplotypes are close
asort = False # Sort accessions alphabetically
start = None # Start of region to visualise
end = None # End of region to visualise
ostart = None # Option specification for start
oend = None # Option specification for end
uniqueprefix = False # Truncate accession names to the minimum amount required for uniqueness
minscore = .5 # Minimum score that a shared haplotype region for an accession can have - determines how faded it can be
minobs = None # Minimum score
maxobs = None # Maximum score
infofiles = None # Files sequencing and SNP information is read from
avrange = 50000 # SNP density for a point is averaged over region extending avrange either side
density = None # Density of data points in SNP density plots
classcolours = [] # Colours used for connected components of shared haplotypes
scaledist = .07 # Distance, as fraction of plot length from plots to informative SNP density scale
# Function for outputting brief usage description
def print_usage(output = stdout, exitcode = None, postscript = None):
  print >> output, "Usage:", argv[0], "[Options] [input files]"
  print >> output, "Read information generated by shared haplotype investigation and create an"
  print >> output, "Octave format plot of the data. Information should all pertain to a single"
  print >> output, "chromosome, as numerical values of chromosomes are used for sorting"
  print >> output, "\nLegal options are:"
  print >> output, " -oN Output plot code to file N. Default is to output to standard output."
  print >> output, "     Multiple output files can be specified by repeated application of this"
  print >> output, "     option"
  print >> output, " -cC Add colour C (which should be comma separated RGB values) to the colours"
  print >> output, "     used to represent connected components of shared haplotypes. Default is"
  print >> output, "     set of all RGB values with one or two values of 1 and the rest 0. Fading"
  print >> output, "     is defined by interpolation by score between class colour and white."
  print >> output, "     Colours are used in the order they are added. If an insufficient number"
  print >> output, "     of colours is available, the program will crash"
  print >> output, " -a  Sort accessions alphabetically"
  print >> output, " -s  Sort accessions by first clustering by maximum shared length between"
  print >> output, "     haplotypes and then ordering subtrees so sum of shared lengths between"
  print >> output, "     neighbours is maximised. If both the -a and -s options are specified,"
  print >> output, "     the last occurrence takes presedence. CURRENTLY DEFUNCT"
  print >> output, " -bX Set start point of region to X, otherwise 0 is used"
  print >> output, " -eX Set end point of region to X, otherwise maximum value encountered is used"
  print >> output, " -u  Only include as much of the prefix of an accession name required for"
  print >> output, "     uniqueness in the graphs showing shared haplotypes in 2D plot"
  print >> output, " -lX Set lowest score of accesion to X; score is used as colour intensity"
  print >> output, " -iN Use SNP information from file N to affect the background representation"
  print >> output, "     of the accessions (default is a thin, grey line). Multiple files can"
  print >> output, "     be specified"
  print >> output, " -wX Width of region to base statistics from information files on. Sites with"
  print >> output, "     an absolute difference to current position of at most X will be used"
  print >> output, " -dX Density of points used for plotting information file statistics for. A"
  print >> output, "     data point every X position will be used to generate the graphs. If no"
  print >> output, "     density is specified, the total set of positions from the information"
  print >> output, "     file be used to define the data points"
  print >> output, " -rX Distance from plots to informative SNP density scale, measured relative"
  print >> output, "     to the width of the plot."
  print >> output, " -v  Write some information to standard error"
  print >> output, " -h, -H, -? Print this information and stop"
  if postscript != None:
    output.write('\n')
    print >> output, postscript
  if exitcode != None:
    exit(exitcode)

# Analyse command line options
for o in opts:
  if o[0][1] == 'o':
    outnames.append(o[1])
  elif o[0][1] == 'i':
    if infofiles == None:
      infofiles = [o[1]]
    else:
      infofiles.append(o[1])
  elif o[0][1] == 'w':
    avrange = int(o[1])
  elif o[0][1] == 'd':
    density = int(o[1])
  elif o[0][1] == 'c':
    try:
      c = map(lambda x: float(x), o[1].split(','))
      if len(c) != 3 or max(c) > 1 or min(c) < 0:
        raise ValueError
    except ValueError:
      print_usage(output = stderr, exitcode = 1, postscript = "Colour not a valid comma separated RGB value")
    classcolours.append(c)
  elif o[0][1] == 's':
    # csort = True CURRENTLY DEFUNCT
    asort = False
  elif o[0][1] == 'a':
    asort = True
    csort = False
  elif o[0][1] == 'l':
    minscore = float(o[1])
    if minscore < 0 or minscore > 1:
      print_usage(output = stderr, exitcode = 1, postscript = "Minimum score has to be number between 0 and 1")
  elif o[0][1] == 'r':
    scaledist = float(o[1])
  elif o[0][1] == 'u':
    uniqueprefix = True
  elif o[0][1] == 'v':
    verbose = True
  elif o[0][1] == 'b':
    ostart = int(o[1])
  elif o[0][1] == 'e':
    oend = int(o[1])
  elif o[0][1] in 'hH?':
    print_usage(output = stdout, exitcode = 0)

# Set up files for output
if outnames == []:
  outfiles = [stdout]
else:
  outfiles = []
  for n in outnames:
    outfiles.append(open(n, 'w'))
# Set up files for input
if args == []:
  args = [stdin]
# Define default colours if none were specified
if classcolours == []:
  classcolours = [(0, 0, 1), (1, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 1), (1, 1, 0), (1, .5, .5), (.5, 1, .5), (.5, .5, 1)]

# Binary search for item in arr having valuefn return largest value
# not larger than x; arr is assumed to be sorted according to valuefn
def bsearchd(x, arr, valuefn):
  if len(arr) == 0:
    return None
  low = 0
  high = len(arr)
  while low < high - 1:
    mid = (low + high) / 2
    if valuefn(arr[mid]) <= x:
      low = mid
    else:
      high = mid
  return low

# Binary search for item in arr having valuefn return smallest value
# not smaller than x; arr is assumed to be sorted according to valuefn
def bsearchu(x, arr, valuefn):
  if len(arr) == 0:
    return None
  low = -1
  high = len(arr) - 1
  while low < high - 1:
    mid = (low + high) / 2
    if valuefn(arr[mid]) < x:
      low = mid
    else:
      high = mid
  return high

# Read background information
if infofiles != None:
  # Structure for holding information about overall informative sites
  informative = set([])
  alle = set([])
  refprivate = set([])
  refseq = None
  # Class for holding information on a specific site
  class SiteInf:
    def __init__(self, heterozygous, coverage):
      self.heterozygous = (int(heterozygous) == 1)
      self.coverage = int(coverage)
  # Class for holding information on a specific accession
  class AccInfo:
    # Class initialisation
    def __init__(self):
      self.__info__ = []      # List of per site information
      self.__sorted__ = True  # Is list sorted?  Iterate over all
    # positions in points (default is to iterate over all positions
    # from infofiles) and return average values
    def iterate(self, points = None):
      class __ReturnValue:
        def __init__(self, position, heterozygocity, private, informative, length, coverage):
          self.pos = position
          self.het = heterozygocity
          self.snp = private
          self.inf = informative
          self.len = length
          self.cov = coverage
      if not self.__sorted__:
        self.__info__.sort()
      if points == None:
        points = alle
      low = 0
      high = 0
      hetacc = 0
      snpacc = 0
      inflist = list(informative)
      inflist.sort()
      ilow = 0
      ihigh = 0
      def privatesnp(i):
        return int(self.__info__[i][0] not in informative and (self != refseq or self.__info__[i][0] in refprivate))
      for i in points:
        if i < start or i > end:
          # Ignore points outside start,end interval
          continue
        # Update with sites no longer in region averaged over
        while low < len(self.__info__) and self.__info__[low][0] < i - avrange:
          hetacc -= int(self.__info__[low][1].heterozygous)
          snpacc -= privatesnp(low)
          low += 1
        # Update with new sites included in region averaged over
        while high < len(self.__info__) and self.__info__[high][0] <= i + avrange:
          hetacc += int(self.__info__[high][1].heterozygous)
          snpacc += privatesnp(high)
          high += 1
        while ilow < len(inflist) and inflist[ilow] < i - avrange:
          ilow += 1
        while ihigh < len(inflist) and inflist[ihigh] <= i + avrange:
          ihigh += 1
        yield __ReturnValue(i, float(hetacc) / max(1, high - low), snpacc, ihigh - ilow, min(end, i + avrange) - max(start, i - avrange), max(0, self.find(i)[1].coverage))
    # Iterator for object
    def __iter__(self):
      for i in self.iterate():
        yield i
    # Add new position
    def add(self, position, heterozygous, coverage):
      self.__info__.append((position, SiteInf(heterozygous, coverage)))
      self.__sorted__ = False
    # Extract positions within global variable avrange distance from position
    def extract(self, position):
      if not self.__sorted__:
        self.__info__.sort()
      i = bsearchu(position - avrange, self.__info__, lambda x: x[0])
      j = bsearchd(position + avrange, self.__info__, lambda x: x[0])
      return self.__info__[i:j + 1]
    # Return element closest to position
    def find(self, position):
      if not self.__sorted__:
        self.__info__.sort()
      i = bsearchd(position, self.__info__, lambda x: x[0])
      if i < len(self.__info__) - 1 and self.__info__[i + 1][0] - position < position - self.__info__[i][0]:
        i += 1
      return self.__info__[i]
  info = Dict(initfn = AccInfo)
  count = Dict(0)
  for fname in infofiles:
    if verbose:
      print >> stderr, "Reading information from", fname
    f = open(fname)
    s = f.readline()
    while s != "":
      s = s.split()
      info[s[0]].add(int(s[2]), int(s[3]), int(s[4]))
      count[int(s[2])] += 1
      alle.add(int(s[2]))
      s = f.readline()
    f.close()
  # Private SNPs are only registered for accession and reference,
  # unless it is a reference private SNP where it is registered for
  # all
  for i in count.items():
    if i[1] > 2 and i[1] < len(info):
      informative.add(i[0])
    elif i[1] == len(info):
      # Private SNP for reference sequence
      refprivate.add(i[0])
  alle = list(alle)
  alle.sort()
  i = -1
  for a in info.values():
    if len(a.__info__) > i:
      i = len(a.__info__)
      refseq = a

# Output s to all output files
def output(s):
  for f in outfiles:
    print >> f, s
  f.flush()

# Determine whether prefix of s matches p
def prefix(s, p):
  return s[:len(p)] == p

# Read data from file pointer f
def readdata(f):
  global maxobs, minobs
  # Class for shared haplotypes
  class SharedHap:
    def __init__(self, start, end, score):
      self.start = start
      self.end = end
      self.score = score
  # Read data
  data = {} # Shared haplotype regions
  acc = []  # Accessions encountered
  while True:
    s = f.readline()
    if s == "":
      break
    if prefix(s, "Comparing"):
      # Start of new pair of accessions
      s = s[len("Comparing accessions "):]
      i = s.find(" and ")
      a1 = s[:i].strip()
      if a1 not in acc:
        acc.append(a1)
      a2 = s[i + len(" and "):].strip()
      if a2 not in acc:
        acc.append(a2)
      data[(a1, a2)] = data[(a2, a1)] = []
    else:
      # Specification of shared haplotype
      s = s.split()
      if s[0].isdigit():
        data[(a1, a2)].append(SharedHap(int(s[0]), int(s[2]), float(s[-1][1:-2])))
        # Remember highest and lowest scores observed
        if maxobs == None or data[(a1, a2)] < maxobs:
          maxobs = data[(a1, a2)]
        if minobs == None or data[(a1, a2)] > minobs:
          minobs = data[(a1, a2)]
  return data, acc

# Clean up data by removing parts not in the region specified by start
# and end. If no start/end has been specified, find it.
def cleanup(data):
  global start, end
  start = ostart
  end = oend
  if start == None:
    for i in data.values():
      for j in i:
        if start == None:
          start = j.start
        else:
          start = min(start, j.start)
  if end == None:
    end = 0
    for i in data.values():
      for j in i:
        end = max(end, j.end)
  # Check regions against start and end
  for i in data.values():
    # Run through shared haplotypes for pair yielding i
    j = 0
    while j < len(i):
      if i[j].end < start or i[j].start > end:
        # This region is entirely outside start to end region
        del i[j]
      else:
        if i[j].start < start:
          # This region begins before start
          i[j].start = start
        if i [j].end > end:
          # This region ends after end
          i[j].end = end
        j += 1

# Reorder subtrees in t such that sum of shared haplotype lengths
# between neighbours, given by d, is maximised.
def reorder(t, d):
  M = {}
  N = {}
  L = {}
  B = {}
  # Generate list of all leaves under node t
  def leaves(t):
    return t.pre_order(lambda x: x.get_id())
  # Update M and N for node t and return leaves under t
  def recurse(t, d, M, N, L, B):
    if t.is_leaf():
      # Reached bottom of recursion
      M[(t.get_id(), t.get_id())] = 0
      return ([t.get_id()], ), None
    else:
      left = recurse(t.get_left(), d, M, N, L, B)[0]
      cleft = sum(left, [])
      L[t.get_left()] = cleft
      right = recurse(t.get_right(), d, M, N, L, B)[0]
      cright = sum(right, [])
      L[t.get_right()] = cright
      best = (-1,)
      b = {}
      for l in xrange(len(left)): # For subtrees of left subtree
        for i in left[l]: # For leaves in left subtree
          for j in cright:
            # Initialise best score with flanks i, j for all j in right subtree
            M[(i, j)] = -1
          for r in xrange(len(right)): # For subtrees of right subtree
            for j in right[r]: # For leaves in right subtree
              # Compute best score with left flank i and left flank of
              # right subtree j
              N[(i, j)] = -1
              for k in left[len(left) - 1 - l]:
                if M[(i, k)] + d[(k, j)] > N[(i, j)]:
                  N[(i, j)] = M[(i, k)] + d[(k, j)]
                  b[(i, j)] = k
              # Compute best score for all i, k flanks where k is not
              # in same subtree of right subtree as j, unless right
              # subtree is a leaf
              for k in right[len(right) - 1 - r]:
                if N[(i, j)] + M[(j, k)] > M[(i, k)]:
                  M[(i, k)] = N[(i, j)] + M[(j, k)]
                  B[(i, k)] = ((i, b[(i, j)]), (j, k))
          # Make score symmetric
          for j in cright:
            M[(j, i)] = M[(i, j)]
            B[(j, i)] = B[(i, j)]
            if M[(i, j)] > best[0]:
              best = (M[(i, j)], (i, j))
      return (cleft, cright), best[1]
  # Compute best score obtainable for any pair of flanking accessions
  # (the pair of accessions uniquely define the node in the tree).
  best = recurse(t, d, M, N, L, B)[1]
  # Backtrack score for flanking pair i, j, where i is in left subtree
  # and j is in right subtree.
  def backtrack(t, i, j, flip):
    if not t.is_leaf():
      # Still need to recurse deeper
      flip = flip ^ (i != B[(i, j)][0][0])
      backtrack(t.get_left(), B[(i, j)][0][0], B[(i, j)][0][1], flip)
      backtrack(t.get_right(), B[(i, j)][1][0], B[(i, j)][1][1], flip)
      if flip:
        # Swap subtrees
        t.right, t.left = t.get_left(), t.get_right()
  backtrack(t, best[0], best[1], False)
  return t.pre_order(lambda x: x.get_id())

# Sort data by first doing a hierarchical agglomerative clustering
# based on maximum similarity, then reorder subtrees such that sum of
# similarities between neighbours is maximised.
def sortdata(acc, data):
  # Start by creating distance matrix
  d = []
  D = {}
  for i in xrange(len(acc)):
    for j in xrange(i + 1, len(acc)):
      d.append(0)
      for k in data[(acc[i], acc[j])]:
        d[-1] += k.end - k.start
      D[(i, j)] = D[(j, i)] = d[-1]
  m = max(d)
  for i in xrange(len(d)):
    d[i] = m - d[i]
  # Perform clustering
  t = to_tree(single(d))
  # Reorder tree to maximise sum of neighbour similarities
  return map(lambda x: acc[x], reorder(t, D))

# Class representing a shared haplotype
class Sharing:
  def __init__(self, start, end, score, pair):
    self.start = start
    self.end = end
    self.pair = pair
    self.score = score
# Find regions of shared haplotypes and the overlaps in each region
def find_regions(data):
  # Create list of all shared haplotypes in sorted order
  shared = sum(map(lambda x: map(lambda y: Sharing(y.start, y.end, y.score, x[0]), x[1]), data.items()), [])
  shared.sort(lambda x, y: x.start - y.start)
  # Find maximal sets where no shared hapotypes are disjoint
  end = shared[0].end
  regions = []
  region = [(shared[0].pair, shared[0].score)]
  overhang = []
  newoverhang = []
  for i in xrange(1, len(shared)):
    if shared[i].start > end:
      # This shared haplotype does not overlap all shared haplotypes
      # in region; start new region
      # Eliminate shared haplotypes from previous regions that no longer
      # extends into current region
      j = 0
      while j < len(overhang):
        if overhang[j].end >= shared[i - 1].start:
          # This shared haplotype extends into current region
          region.append((overhang[j].pair, overhang[j].score))
        if overhang[j].end < shared[i].start:
          # This shared haplotype does not extend into new region
          del overhang[j]
        else:
          # Proceed to next shared haplotype
          j += 1
      # Store start and end of core region, and all shared haplotypes in region
      regions.append((shared[i - 1].start, end, region))
      # Start new region
      region = [(shared[i].pair, shared[i].score)]
      end = shared[i].end
      # Move shared haplotypes part of current region to haplotypes
      # that can extend into subsequent regions
      for j in newoverhang:
        if j.end >= shared[i].start:
          overhang.append(j)
      newoverhang = [shared[i]]
    else:
      # This shared haplotype belongs to current region
      end = min(end, shared[i].end)
      region.append((shared[i].pair, shared[i].score))
      newoverhang.append(shared[i])
  # Check for addition of shared haplotypes to last region
  for i in overhang:
    if i.end >= shared[-1].start:
      region.append((i.pair, i.score))
  regions.append((shared[-1].start, end, region))
  return regions

# Return string with name of accession i with any extensions removed
def prune_acc(s, unique = False, acc = None):
  if unique:
    i = 2
    for j in acc:
      if not prefix(s, j):
        while i <= len(s) and prefix(s[:i], j):
          i += 1
    s = s[:i - 1]
  return s

# Create visualisation of data in file f
def __count():
  i = 0
  while True:
    i += 1
    yield i
count = __count().next
def visualise(f):
  if f == stdin:
    output("# Visualisation of data from standard input")
    if verbose:
      print >> stderr, "Reading data from standard input"
  else:
    output("# Visualisation of data from " + f)
    if verbose:
      print >> stderr, "Reading data from file", f
    f = open(f)
  # Read data
  data, acc = readdata(f)

  # Clean up data
  cleanup(data)

  if csort:
    # Sort data to group similar accessions together
    acc = sortdata(acc, data)
  elif asort:
    # Sort data alphabetically
    acc.sort()

  # Output Octave code for visualising data
  # Iterator function for points that should be used as data points for
  # plotting SNP density information
  def points():
    if density != None:
      for i in xrange(start, end, density):
        yield i
      yield end
    else:
      i = bsearchu(start, alle, lambda x: x)
      while i < len(alle) and alle[i] <= end:
        yield alle[i]
        i += 1
  # Create summary visualisation for the accessions as single entities
  output("figure(%d)" % count())
  output('box("off")')
  output('axis("off")')
  output('line([%d %d], [-.5 -.5], "linewidth", 1.5, "color", "k", "linestyle", "-")' % (start, end))
  ticstep = 10 ** int(log10((end - start) / 4.0))
  if 5 * ticstep <= (end - start) / 4.0:
    ticstep = 5 * ticstep
  elif 2 * ticstep <= (end - start) / 4.0:
    i = 2 * ticstep
  ticstart = ticstep * (int((start - 1) / ticstep) + 1)
  for i in xrange(ticstart, end + 1, ticstep):
    output('line([%d %d], [-.5 -.8], "linewidth", 1.5, "color", "k", "linestyle", "-")' % (i, i))
    output('text(%d, -1.2, "%d", "horizontalalignment", "center", "verticalalignment", "top")' % (i, i))
  for i in xrange(len(acc)):
    output('line([%d %d], [%d %d], "linewidth", 1, "color", [.7, .7, .7], "linestyle", ":")' % (start, end, i, i))
    output('text(%d, %d, "%s", "horizontalalignment", "right")' % (start - (end - start) * .05, i, prune_acc(acc[i], unique = uniqueprefix, acc = acc)))
  if infofiles != None:
    # Function for determining tic value
    def findtic(x):
      print x
      e = int(floor(log(x, 10)))
      if x / 10 ** e > 2:
        if x / 10 ** e > 5:
          m = 5
        else:
          m = 2
      else:
        m = 1
      return (m * 10 ** e, m, e)
    # Start by plotting informative SNP density as background
    x = []
    y = []
    for i in points():
      x.append(str(i))
      y.append(float(bsearchd(min(end, i + avrange), alle, lambda x: x) - bsearchu(max(start, i - avrange), alle, lambda x: x)) / float(min(end, i + avrange) - max(start, i - avrange)))
    m = max(y)
    tic = findtic(m)
    scale = (len(acc) - 1) / m
    y = map(lambda x: "%.2f" % (x * scale), y)
    output('line([%d %d %d], [0 0 %d], "color", [.6 .6 .6], "linewidth", 1)' % (end + (end - start) * (scaledist + .01), end + (end - start) * scaledist, end + (end - start) * scaledist, len(acc)))
    output('line([%d %d], [%.2f, %.2f], "color", [.6, .6, .6], "linewidth", 1)' % (end + (end - start) * scaledist, end + (end - start) * (scaledist + .01), tic[0] * scale, tic[0] * scale))
    output('text(%d, 0, "0", "color", [.6, .6, .6])' % (end + (end - start) * (scaledist + .02)))
    output('text(%d, %.2f, "%s10^{%d}", "color", [.6, .6, .6])' % (end + (end - start) * (scaledist + .02), tic[0] * scale, int(tic[1] != 1) * ("%d * " % tic[1]), tic[2]))
    output('line([' + " ".join(x) + '], [' + " ".join(y) + '], "color", [.6, .6, .6], "linewidth", .7, "linestyle", "-")')
  # Create sorted list of all shared haplotype events, and compute
  # geometric mean of all lengths and scores
  she = []
  meanlength = 0
  meanscore = 0
  for i in data:
    if i[0] < i[1]:
      # We only want one copy of each shared haplotype
      for j in data[i]:
        she.extend([(j.start, Sharing(j.start, j.end, j.score, i)), (j.end, Sharing(j.start, j.end, j.score, i))])
        meanlength += log(j.end - j.start)
        meanscore += log(max(.025, 100 - j.score))
  she.sort()
  meanlength = exp(meanlength * 2.0 / len(she))
  meanscore = exp(meanscore * 2.0 / len(she))

  # Class describing representation
  class Repr:
    def __init__(self, group, score, start):
      self.group = group
      self.score = score
      self.start = start
  # Scan through region, keeping track of which pairs of accessions
  # share haplotypes
  conn = {}        # Connections
  current = Dict() # Current annotation for accessions
  for e in xrange(len(she)):
    if she[e][0] == she[e][1].start:
      # New shared haplotype
      conn[she[e][1].pair] = she[e][1]
    else:
      # End of shared haplotype
      del conn[she[e][1].pair]
    if e < len(she) - 1 and she[e][0] == she[e + 1][0]:
      # Deal with events happening at same position in one go
      continue
    # Class for holding relevant information about edges
    class EdgeInf:
      def __init__(self, score, length):
        self.score = score
        self.length = length
    # Determine representation of current state and plot lines changing status
    # First compute connected components and connectivities
    comp = {} # Connected components
    edges = Dict(lambda: [], True) # Connections for each accession
    for i in conn:
      for j in xrange(2):
        if i[j] not in comp:
          # First time we have seen this accession
          comp[i[j]] = DisjointSet(i[j])
      # Update with connection between these two accessions
      comp[i[0]].union(comp[i[1]])
      for j in xrange(2):
        edges[i[j]].append(EdgeInf(conn[i].score, conn[i].end - conn[i].start)) 

    # Function for computing geometric mean
    def geometric_mean(l):
      return exp(reduce(lambda x, y: log(y) + x, l, 0) * 1.0 / len(l))
    # Function assigning score to haplotype 
    def accscore(edges, compsize):
      # Function computing score of accession with edge scores given by
      # edges in component of size compsize
      # Score dependencies:
      # Length - linear contribution, normalised by overall mean
      length = geometric_mean(map(lambda x: x.length, edges)) / meanlength
      # Connectivity - fraction of full clique this sequence is connected to
      number = len(edges) / float(compsize - 1)
      # Score - reciprocal of deviation from 100%, normalised
      score = meanscore / geometric_mean(map(lambda x: max(0.025, 100 - x.score), edges))
      # Clique size - reciprocal of expected tree length compared to expected
      # separation of pair, mapped to the interval from 1/2 to 1
      size = 1 - 1.0 / (log(compsize) * compsize * (compsize - 1))
      # Final score
      finalscore = min(1, length * number * score * size)
      # Map final score to interval from minscore to 1
      return minscore + (1 - minscore) * finalscore

    # Determine RGB values for specified class and score
    def acccolour(score, group):
      return tuple(map(lambda x: 1 - score * (1 - x), classcolours[group]))

    # Alternative determination of RGB values
    def altcolour(frac, group):
      frac = minscore + (1 - minscore) * frac
      return tuple(map(lambda x: 1 - frac * (1 - x), classcolours[group]))

    def width(score):
      # Width ranges from 6 to 20, with 14 for overall mean
      return 2 * min(10, max(3, 10 - (score - 0.025) * 3 / (meanscore - 0.025)))

    # Generate output for accessions starting, ending, or changing score here
    updated = set([])
    for a in acc:
      if a in current:
        if a in edges:
          # Accession has shared haplotype extending over event position
          score = (geometric_mean(map(lambda x: max(0.025, 100 - x.score), edges[a])), len(edges[a]) / float(comp[a].size() - 1))
          if score != current[a].score:
            # But its score changes
            output('line([%d %d], [%d %d], "linewidth", %.1f, "color", [%.2f, %.2f, %.2f], "linestyle", "-")' % ((current[a].start, she[e][0], acc.index(a), acc.index(a), width(current[a].score[0])) + altcolour(current[a].score[1], current[a].group)))
            current[a].start = she[e][0]
            updated.add(a)
        else:
          # Accession ends having a shared haplotype at this position
          output('line([%d %d], [%d %d], "linewidth", %.1f, "color", [%.2f, %.2f, %.2f], "linestyle", "-")' % ((current[a].start, she[e][0], acc.index(a), acc.index(a), width(current[a].score[0])) + altcolour(current[a].score[1], current[a].group)))
          del current[a]
      elif a in edges:
        # Start of shared haplotype for this accession
        current[a] = Repr(None, (geometric_mean(map(lambda x: max(0.025, 100 - x.score), edges[a])), len(edges[a]) / float(comp[a].size() - 1)), she[e][0])
    # Determine classes of connected components and output accessions
    # changing class here
    group = Dict(lambda: Dict(0), True)
    for i in comp:
      if i in current and current[i].group != None:
        # Update score for component of i inheriting class of i
        group[comp[i].find()][current[i].group] += current[i].score[0]
      else:
        group[comp[i].find()][None] += current[i].score[0]
    # Sort class assignments accoring to score
    vote1 = []
    vote2 = []
    for i in group:
      s = 0
      for j in group[i]:
        if j != None:
          vote1.append((-group[i][j], i, j))
        s -= group[i][j]
      vote2.append((s, i))
    # Assign existing classes based on score
    vote1.sort()
    group = {}
    used = len(classcolours) * [False]
    for i in vote1:
      if i[1] not in group and not used[i[2]]:
        # Class with representation i[1] has not been assigned a class
        # yet, and i[2] is unused - use this assignment.
        group[i[1]] = i[2]
        used[i[2]] = True
    # Assign lowest ranking available class to components not yet
    # assigned classes
    vote2.sort()
    j = 0
    for i in vote2:
      if i[1] not in group:
        while j < len(used) and used[j]:
          j += 1
        if j >= len(used):
          raise ValueError, "Ran out of classes at position " + str(she[e][0])
        group[i[1]] = j
    # For each accession in a currently shared haplotype, compute its
    # representation and perform necessary updates if it has changed
    for a in current:
      if current[a].group == None:
        # Accession only just started having a shared haplotype here
        current[a].group = group[comp[a].find()]
      elif current[a].group != group[comp[a].find()]:
        # Accession changes class at this position
        if a not in updated:
          # And we haven't yet output it
          output('line([%d %d], [%d %d], "linewidth", %.1f, "color", [%.2f, %.2f, %.2f], "linestyle", "-")' % ((current[a].start, she[e][0], acc.index(a), acc.index(a), width(current[a].score[0])) + altcolour(current[a].score[1], current[a].group)))
        current[a].group = group[comp[a].find()]
      # Update scores
      if a in updated:
        # Update score for accessions where we identified a change in score
        current[a].score = (geometric_mean(map(lambda x: max(0.025, 100 - x.score), edges[a])), len(edges[a]) / float(comp[a].size() - 1))
  # As ends of shared haplotypes are also events, everything should be
  # properly concluded.
  for a in acc:
    if a in current:
      print >> stderr, "Still accessions with current track"
      exit(1)
  # Generate lines representing SNP densities
  if infofiles != None:
    x = Dict(initfn = lambda: [])
    yd = Dict(initfn = lambda: [])
    maxyd = 0
    for a in acc:
      yf = []
      for p in info[a].iterate(points()):
        x[a].append(str(p.pos))
        yd[a].append(float(p.snp) / p.len)
        if p.snp == 0 and p.inf == 0:
          yf.append("%.2f" % (acc.index(a) + .5))
        else:
          yf.append('%.2f' % (acc.index(a) + p.snp / float(p.snp + p.inf)))
      if len(x[a]) > 0:
        output('line([' + " ".join(x[a]) + '], [' + " ".join(yf) + '], "linewidth", .5, "linestyle", "-", "color", [.3, .3, .3])')
        maxyd = max(maxyd, max(yd[a]))
    # Determine scaling for private SNP density graphs
    tic = findtic(maxyd)
    scale = .9 / maxyd
    # Output graphs for private SNP density values
    for a in acc:
      if len(x[a]) > 0:
        output('line([' + " ".join(x[a]) + '], [' + " ".join(map(lambda x: ("%.2f" % (acc.index(a) + x * scale)), yd[a])) + '], "linewidth", .5, "linestyle", "-", "color", [0, 0, 0])')
        output('line([%(x2).2f %(x1).2f %(x1).2f], [%(y)d %(y)d %(y)d.9], "linewidth", 1, "linestyle", "-", "color", [0, 0, 0])' % {'x1': end + (end - start) * .01, 'x2': end + (end - start) * .02, 'y': acc.index(a)})
        output('line([%(x1).2f %(x2).2f], [%(y).2f, %(y).2f], "linewidth", 1, "linestyle", "-", "color", [0, 0, 0])' % {'x1': end + (end - start) * .01, 'x2': end + (end - start) * .02, 'y': acc.index(a) + scale * tic[0]})
        output('text(%.2f, %d, "0", "color", [0, 0, 0])' % (end + (end - start) * .03, acc.index(a)))
        output('text(%.2f, %.2f, "%s10^{%d}", "color", [0, 0, 0])' % (end + (end - start) * .03, acc.index(a) + scale * tic[0], int(tic[1] != 1) * ("%d * " % tic[1]), tic[2])) 
    if f != stdin:
      f.close()

# Run through each input file and generate visualisation
for f in args:
  visualise(f)
if outnames != []:
  for f in outfiles:
    f.close()