#!/usr/bin/env perl # # by Xiaodong Bai # for the extraction of sequences from GenBank based on user-defined information # use strict; use warnings; use Bio::DB::GenBank; use Bio::SeqIO; use vars qw(%hash); die("Usage: $0 \n"), unless (@ARGV == 2); my $infofile = shift; open (IN,"$infofile"); while () { chomp; next if /^\s/; next if /^Gene/; my @tmp = split /\t/; $hash{$tmp[0]} = 1; } close IN; my $gb = new Bio::DB::GenBank; my $outfile = shift; my $out = Bio::SeqIO->new(-file => ">$outfile", -format => 'fasta'); foreach my $key (keys %hash) { my $query_str = $key . " AND \"complete cds\""; my $query = Bio::DB::Query::GenBank->new(-query => $query_str, -db => 'nucleotide'); next unless (my $seqin = $gb->get_Stream_by_query($query)); while (my $seqobj = $seqin->next_seq) { $out->write_seq($seqobj); } }