#!/usr/bin/perl -w use strict; ################################ Variables ############################### my @orfs; # location, name, and direction of ORFs # Each element is a triple [$left, $right, $info] # where $left and $right are the starting and ending points # of the ORF, and info gives the name, discription, and # direction of the ORF. my @sites; # location of putative binding sites # Each element is a pair [$left, $right] giving the starting # and ending points of the possible binding site. ############################# Main Program ############################### read_orfs("7120DB.DAT"); #print_orfs(); read_sites("motif.hits"); @sites = sort { $$a[0] <=> $$b[0] } @sites; #print_sites(); print_context("site.context"); ############################# Input Subroutines ########################## sub read_orfs { my ($orf_file) = @_; my $record_length = 230; open ORF_DATA, "<$orf_file" or die "Can't open $orf_file: $!\n"; binmode ORF_DATA; # Tell Perl this isn't a text file my $buffer; my $packing = "x4A11" # orf_name . "x8" # orf_contig . "x1a8" # orf_left . "x1a8" # orf_right . "x4A1" # orf_direction . "x18" # orf_accession . "x9" # orf_pct . "x9" # orf_eval . "x4A50" # orf_descr ; read ORF_DATA, $buffer, 5; # Skip first five bytes of the file. my $last_left = -1e100; while (read ORF_DATA, $buffer, $record_length) { length($buffer) == $record_length or die "Short record\n"; my ($orf_name, $orf_left, $orf_right, $orf_direction, $orf_descr) = unpack($packing, $buffer); foreach my $coordinate ($orf_left, $orf_right) { $coordinate = unpack("d", reverse $coordinate); } last if $last_left > $orf_left; $last_left = $orf_left; if ($orf_direction eq "d") { $orf_direction = "-->" } elsif ($orf_direction eq "c") { $orf_direction = "<--" } else { $orf_direction = "???" } push @orfs, [$orf_left, $orf_right, "$orf_direction\t$orf_name\t$orf_descr"]; } close ORF_DATA; } sub print_orfs { foreach my $orf (@orfs) { print "@$orf\n"; } } sub read_sites { my ($site_file) = @_; open BIND, "<$site_file" or die "Can't open $site_file: $!\n"; while (<BIND>) { my ($start, $sequence) = /^\s*\S+\s+\S+\s+(\d+)\s+(\S+)\s*$/ or die "Can't decode line $. of $site_file\n"; my $end = $start + length($sequence) - 1; push @sites, [$start, $end]; } close BIND; } sub print_sites { foreach my $site (@sites) { print "@$site\n"; } } ########################### Printing Subroutines ######################### sub print_context { my ($context_file) = @_; open CONTEXT, ">$context_file" or die "Can't open $context_file:$!\n"; foreach my $site (@sites) { my $o = 0; while ($o < @orfs and $orfs[$o][1] < $$site[0]) { # Site is to the right of $orfs[$o] $o = $o + 1; } if ($o == @orfs) { # site is to the right of every ORF die "Can't yet print context without right-hand ORF\n"; } elsif ($$site[1] < $orfs[$o][0]) { # Site is to the left of $orfs[$o] if ($o == 0) { die "Can't yet print context without left-hand ORF\n" } else { print_pair($site, $orfs[$o-1], $orfs[$o]) } } else { # $site overlaps $orfs[$o] while ($o < @orfs and $$site[1] >= $orfs[$o][0]) { print_overlap($site, $orfs[$o]); $o = $o + 1; } } } close CONTEXT; } sub print_pair { my ($site, $left, $right) = @_; my ($left_info, $right_info) = ($$left[2], $$right[2]); print_line($left_info, $$site[0] - $$left[1], $site, $$right[0] - $$site[1], $right_info); } sub print_overlap { my ($site, $orf) = @_; my $info = $$orf[2]; print_line($info, $$site[0] - $$orf[0], $site, $$orf[1] - $$site[1], "", ""); } sub print_line { my ($a, $b, $site, $c, $d) = @_; my $id = "$$site[0]-$$site[1]"; print CONTEXT "$a\t$b\t$id\t$c\t$d\n"; }