#! /usr/bin/perl #7/5/2008 LSS sub usage { print STDERR "usage: extract sequences \n\n"; print STDERR "Extracts positions from source sequence file and outputs a fasta delimited file.\n"; print STDERR "Creator: Laurie Stevison\n"; exit; } #end sub if ($#ARGV != 1) { # zero means one argument &usage; } $source = $ARGV[0]; $positions = $ARGV[1]; $output = $source . ".txt"; open(READSRC, $source); open(READPOS, $positions); open(WRITEOUT, ">$output"); print STDERR "Reading input file..."; while () { chomp; $input = $_; @input_array = split(/\t/, $input); #print WRITEOUT "$input_array[0], $input_array[1], $input_array[2],...\n"; push(@group_name , $input_array[0]); #print WRITEOUT "$group_name[0] , $group_name[1], $group_name[2]...\n"; push(@positions , $input_array[1]); #print WRITEOUT "$positions[0] , $positions[1], $positions[2]...\n"; } #end while print STDERR "done.\n\n"; print STDERR "Reading source file..."; while () { chomp; if (/>/) { $seq_name = $'; push(@grp_names, $seq_name); if ($newseq==1) {push(@sequences, $sequence);} $sequence = ""; $newseq=0; next; } else { $sequence .= $_; $newseq=1; } #end else } #end while push(@sequences, $sequence); print "done.\n\n"; print STDERR "Extracting positions...\n"; for ($i = 0; $i <= $#positions; $i++) { $grp_name = $group_name[$i]; #print WRITEOUT "$grp_name\n"; for ($k = 0; $k <= $#grp_names; $k++) { @short_name = split(" ", $grp_names[$k]); #print WRITEOUT "$short_name[0]\n"; if ($short_name[0] eq $grp_name) { $hold_sequence = $sequences[$k]; } else { next; } #end else } #end for $start = ($positions[$i])-5000; $extract = substr($hold_sequence, $start, 10000); print WRITEOUT ">$grp_name: $positions[$i]\n"; print WRITEOUT "$extract\n"; } #end for print STDERR "done.\n\n";