#!/usr/local/bin/perl -w # # $Id:$ # # FILE: iupac2meme # AUTHOR: Timothy L. Bailey # CREATE DATE: 3/06/2010 # DESCRIPTION: Convert a DNA IUPAC motif to MEME format use warnings; use strict; use lib qw(/home/rmott/meme/lib/perl); use MotifUtils qw(seq_to_intern intern_to_meme read_background_file); use Getopt::Long; use Pod::Usage; =head1 NAME iupac2meme - Converts IUPAC motifs into MEME motifs. =head1 SYNOPSIS iupac2meme [options] + Options: -alpha DNA|PROTEIN IUPAC alphabet; default: DNA -numseqs assume frequencies based on this many sequences; default: 20 -bg file with background frequencies of letters; default: uniform background -pseudo add times letter background to each frequency; default: 0 -logodds output the log-odds (PSSM) and frequency (PSPM) motifs; default: PSPM motif only -url website for the motif; The motif name is substituted for MOTIF_NAME; default: no url -nosort don't sort the order of motifs Converts IUPAC motifs into MEME motifs. Example IUPAC DNA motif: ACGGWNNYCGT Example IUPAC PROTEN motif: IKLVBZYXXHG Writes standard output. =cut # Set option defaults my $alph_type = 'DNA'; my $num_seqs = 20; my $bg_file; my $pseudo_total = 0; my $print_logodds = 0; my $url_pattern = ""; my $no_sort = 0; my @motif_strings = (); GetOptions("alpha=s" => \$alph_type, "numseqs=i" => \$num_seqs, "bg=s" => \$bg_file, "pseudo=f" => \$pseudo_total, "logodds" => \$print_logodds, "url=s" => \$url_pattern, "nosort" => \$no_sort) or pod2usage(2); @motif_strings = @ARGV; #check alpha type $alph_type = uc($alph_type); pod2usage("Option -alpha must be either DNA or PROTEIN.") if ($alph_type !~ m/(DNA|PROTEIN)/); #check num seqs pod2usage("Option -numseqs must have a positive value.") if ($num_seqs < 0); #check pseudo total pod2usage("Option -pseudo must have a positive value.") if ($pseudo_total < 0); #check IUPAC motifs pod2usage("At least one IUPAC motif must be specified.") if (!@motif_strings); #sort the motifs @motif_strings = sort(@motif_strings) unless $no_sort; # get the background model my %bg = &read_background_file($alph_type eq 'DNA', $bg_file); # # convert the IUPAC motifs to MEME format # my $num_motifs = 0; foreach my $iupac_motif (@motif_strings) { my $url = $url_pattern; $url =~ s/MOTIF_NAME/$iupac_motif/g; my ($motif, $errors) = seq_to_intern(\%bg, $iupac_motif, $num_seqs, $pseudo_total, url => $url); print STDERR join("\n", @{$errors}), "\n" if @{$errors}; print intern_to_meme($motif, $print_logodds, 1, !($num_motifs++)) if $motif; } # convert motif to MEME format