{"vars":[{"kind":2,"containerName":"","name":"vars","line":167},{"line":171,"containerName":"","kind":2,"name":"base"},{"containerName":null,"kind":13,"name":"@ATTR","line":172},{"localvar":"my","kind":13,"containerName":null,"name":"$attr","line":173,"definition":"my"},{"containerName":null,"kind":13,"name":"@ATTR","line":173},{"line":174,"name":"%OK_FIELD","containerName":null,"kind":13},{"kind":13,"containerName":null,"name":"$attr","line":174},{"definition":"sub","children":[{"name":"$self","localvar":"my","containerName":"AUTOLOAD","kind":13,"line":178,"definition":"my"},{"definition":"my","line":179,"containerName":"AUTOLOAD","localvar":"my","kind":13,"name":"$attr"},{"line":179,"kind":13,"containerName":"AUTOLOAD","name":"$AUTOLOAD"},{"line":180,"name":"$attr","kind":13,"containerName":"AUTOLOAD"},{"kind":13,"containerName":"AUTOLOAD","name":"$attr","line":181},{"line":181,"name":"$attr","kind":13,"containerName":"AUTOLOAD"},{"containerName":"AUTOLOAD","kind":13,"name":"$self","line":182},{"line":182,"containerName":"AUTOLOAD","kind":12,"name":"throw"},{"name":"$OK_FIELD","containerName":"AUTOLOAD","kind":13,"line":182},{"line":182,"name":"$attr","containerName":"AUTOLOAD","kind":13},{"line":183,"name":"$self","containerName":"AUTOLOAD","kind":13},{"name":"$attr","kind":13,"containerName":"AUTOLOAD","line":183},{"line":184,"name":"$self","kind":13,"containerName":"AUTOLOAD"},{"containerName":"AUTOLOAD","kind":13,"name":"$attr","line":184}],"containerName":"main::","name":"AUTOLOAD","line":177,"kind":12,"range":{"end":{"character":9999,"line":185},"start":{"line":177,"character":0}}},{"containerName":"main::","name":"new","children":[{"definition":"my","line":233,"name":"$class","localvar":"my","kind":13,"containerName":"new"},{"kind":13,"containerName":"new","name":"@args","line":233},{"definition":"my","line":234,"kind":13,"localvar":"my","containerName":"new","name":"$self"},{"name":"$class","kind":13,"containerName":"new","line":234},{"kind":13,"containerName":"new","name":"@args","line":234},{"line":235,"kind":13,"localvar":"my","containerName":"new","name":"$attr","definition":"my"},{"line":235,"kind":13,"containerName":"new","name":"$value"},{"kind":13,"containerName":"new","name":"@args","line":236},{"containerName":"new","kind":13,"name":"$attr","line":237},{"kind":13,"containerName":"new","name":"@args","line":237},{"line":238,"name":"$attr","kind":13,"containerName":"new"},{"line":238,"name":"$attr","kind":13,"containerName":"new"},{"line":239,"containerName":"new","kind":13,"name":"$value"},{"name":"@args","containerName":"new","kind":13,"line":239},{"line":240,"containerName":"new","kind":13,"name":"$self"},{"containerName":"new","kind":13,"name":"$attr","line":240},{"line":240,"kind":13,"containerName":"new","name":"$value"},{"name":"$self","containerName":"new","kind":13,"line":242},{"line":243,"kind":13,"containerName":"new","name":"$self"}],"detail":"($class,@args)","definition":"sub","kind":12,"range":{"start":{"character":0,"line":232},"end":{"character":9999,"line":244}},"line":232,"signature":{"label":"new($class,@args)","documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).","parameters":[{"label":"$class"},{"label":"@args"}]}},{"name":"SUPER","kind":12,"containerName":"new","line":234},{"name":"_parse","kind":12,"line":242},{"definition":"sub","containerName":"main::","name":"_parse","children":[{"definition":"my","localvar":"my","kind":13,"containerName":"_parse","name":"$self","line":257},{"line":258,"kind":13,"containerName":"_parse","name":"$self"},{"kind":13,"localvar":"my","containerName":"_parse","name":"$file","line":259,"definition":"my"},{"name":"$self","containerName":"_parse","kind":13,"line":261},{"kind":12,"containerName":"_parse","name":"warn","line":261},{"line":261,"name":"$self","containerName":"_parse","kind":13},{"localvar":"my","kind":13,"containerName":"_parse","name":"$SEQ","line":262,"definition":"my"},{"line":262,"name":"$self","kind":13,"containerName":"_parse"},{"line":262,"name":"$self","containerName":"_parse","kind":13},{"name":"throw","kind":12,"containerName":"_parse","line":262},{"definition":"my","line":263,"name":"$name","localvar":"my","containerName":"_parse","kind":13},{"line":263,"name":"$line","containerName":"_parse","kind":13},{"definition":"my","line":264,"localvar":"my","kind":13,"containerName":"_parse","name":"$seq"},{"line":265,"kind":13,"localvar":"my","containerName":"_parse","name":"@lines","definition":"my"},{"name":"$SEQ","kind":13,"containerName":"_parse","line":265},{"kind":13,"containerName":"_parse","name":"$line","line":266},{"kind":13,"containerName":"_parse","name":"@lines","line":266},{"containerName":"_parse","kind":13,"name":"$line","line":267},{"name":"$line","containerName":"_parse","kind":13,"line":268},{"containerName":"_parse","kind":13,"name":"$name","line":269},{"line":270,"containerName":"_parse","kind":13,"name":"$self"},{"containerName":"_parse","kind":13,"name":"$self","line":271},{"name":"$name","containerName":"_parse","kind":13,"line":271},{"line":271,"name":"$name","containerName":"_parse","kind":13},{"line":273,"containerName":"_parse","kind":13,"name":"$self"},{"line":274,"kind":13,"containerName":"_parse","name":"$self"},{"line":274,"kind":13,"containerName":"_parse","name":"$name"},{"line":274,"name":"$name","containerName":"_parse","kind":13},{"line":276,"name":"$self","containerName":"_parse","kind":13},{"line":276,"name":"$name","containerName":"_parse","kind":13},{"name":"$self","kind":13,"containerName":"_parse","line":277},{"line":277,"name":"$name","kind":13,"containerName":"_parse"},{"line":278,"kind":13,"containerName":"_parse","name":"$self"},{"line":278,"name":"$name","kind":13,"containerName":"_parse"},{"line":279,"kind":13,"containerName":"_parse","name":"$self"},{"name":"$name","kind":13,"containerName":"_parse","line":279},{"line":280,"name":"$self","kind":13,"containerName":"_parse"},{"kind":13,"containerName":"_parse","name":"$name","line":280},{"line":281,"kind":13,"containerName":"_parse","name":"$self"},{"name":"$name","kind":13,"containerName":"_parse","line":281},{"line":281,"kind":13,"containerName":"_parse","name":"$seq"},{"line":282,"name":"$beg","kind":13,"localvar":"my","containerName":"_parse","definition":"my"},{"kind":13,"containerName":"_parse","name":"$seq","line":283},{"line":283,"name":"$self","kind":13,"containerName":"_parse"},{"line":283,"name":"$name","containerName":"_parse","kind":13},{"name":"$seq","kind":13,"containerName":"_parse","line":283},{"line":283,"name":"$beg","kind":13,"containerName":"_parse"},{"line":283,"kind":13,"containerName":"_parse","name":"$beg"},{"definition":"my","name":"$count","localvar":"my","containerName":"_parse","kind":13,"line":284},{"line":284,"name":"$self","containerName":"_parse","kind":13},{"name":"$name","containerName":"_parse","kind":13,"line":284},{"name":"$seq","kind":13,"containerName":"_parse","line":284},{"definition":"my","line":285,"containerName":"_parse","localvar":"my","kind":13,"name":"$countGC"},{"line":285,"kind":13,"containerName":"_parse","name":"$seq"},{"kind":13,"containerName":"_parse","name":"$self","line":286},{"kind":13,"containerName":"_parse","name":"$name","line":286},{"containerName":"_parse","kind":13,"name":"$countGC","line":286},{"line":286,"kind":13,"containerName":"_parse","name":"$count"},{"line":287,"kind":13,"containerName":"_parse","name":"$seq"},{"line":290,"kind":13,"containerName":"_parse","name":"$seq"},{"line":290,"containerName":"_parse","kind":13,"name":"$line"},{"line":290,"kind":13,"containerName":"_parse","name":"$seq"},{"line":295,"name":"$self","containerName":"_parse","kind":13},{"line":296,"localvar":"my","kind":13,"containerName":"_parse","name":"$QUAL","definition":"my"},{"name":"$self","kind":13,"containerName":"_parse","line":296},{"containerName":"_parse","kind":12,"name":"throw","line":296},{"kind":13,"containerName":"_parse","name":"@lines","line":297},{"line":297,"containerName":"_parse","kind":13,"name":"$QUAL"},{"line":300,"name":"$self","containerName":"_parse","kind":13},{"name":"warn","containerName":"_parse","kind":12,"line":300},{"line":300,"kind":13,"containerName":"_parse","name":"$self"},{"containerName":"_parse","kind":13,"name":"$self","line":301},{"line":301,"name":"qualfile","kind":12,"containerName":"_parse"},{"definition":"my","kind":13,"localvar":"my","containerName":"_parse","name":"$QUAL","line":302},{"line":302,"name":"$self","kind":13,"containerName":"_parse"},{"name":"throw","kind":12,"containerName":"_parse","line":302},{"containerName":"_parse","kind":13,"name":"@lines","line":303},{"line":303,"name":"$QUAL","containerName":"_parse","kind":13},{"containerName":"_parse","kind":13,"name":"$self","line":306},{"containerName":"_parse","kind":12,"name":"warn","line":306},{"line":306,"name":"$self","containerName":"_parse","kind":13},{"line":307,"name":"@lines","containerName":"_parse","kind":13},{"localvar":"my","containerName":"_parse","kind":13,"name":"@vals","line":310,"definition":"my"},{"name":"@slice","kind":13,"containerName":"_parse","line":310},{"line":310,"name":"$num","kind":13,"containerName":"_parse"},{"line":310,"name":"$tot","containerName":"_parse","kind":13},{"line":310,"name":"$vals","containerName":"_parse","kind":13},{"containerName":"_parse","localvar":"my","kind":13,"name":"$qual","line":311,"definition":"my"},{"line":312,"kind":13,"containerName":"_parse","name":"$line"},{"line":312,"kind":13,"containerName":"_parse","name":"@lines"},{"line":313,"kind":13,"containerName":"_parse","name":"$line"},{"kind":13,"containerName":"_parse","name":"$line","line":314},{"name":"$name","kind":13,"containerName":"_parse","line":315},{"name":"@vals","kind":13,"containerName":"_parse","line":316},{"kind":13,"containerName":"_parse","name":"$qual","line":316},{"line":317,"kind":13,"containerName":"_parse","name":"@slice"},{"name":"@vals","kind":13,"containerName":"_parse","line":317},{"kind":13,"containerName":"_parse","name":"$self","line":317},{"line":317,"kind":13,"containerName":"_parse","name":"$name"},{"name":"$self","kind":13,"containerName":"_parse","line":317},{"line":317,"kind":13,"containerName":"_parse","name":"$name"},{"line":318,"kind":13,"containerName":"_parse","name":"$vals"},{"containerName":"_parse","kind":13,"name":"@slice","line":318},{"line":319,"name":"$self","kind":13,"containerName":"_parse"},{"line":319,"name":"$name","kind":13,"containerName":"_parse"},{"kind":13,"containerName":"_parse","name":"$vals","line":319},{"line":320,"containerName":"_parse","kind":13,"name":"$qual"},{"containerName":"_parse","kind":13,"name":"$num","line":321},{"name":"@slice","containerName":"_parse","kind":13,"line":321},{"containerName":"_parse","kind":13,"name":"$tot","line":322},{"kind":13,"containerName":"_parse","name":"$num","line":322},{"containerName":"_parse","kind":13,"name":"$num","line":324},{"line":324,"name":"@slice","kind":13,"containerName":"_parse"},{"kind":13,"containerName":"_parse","name":"$self","line":325},{"line":325,"name":"$name","kind":13,"containerName":"_parse"},{"line":325,"name":"$tot","kind":13,"containerName":"_parse"},{"line":325,"name":"$num","containerName":"_parse","kind":13},{"line":326,"name":"$tot","kind":13,"containerName":"_parse"},{"name":"$qual","kind":13,"containerName":"_parse","line":329},{"name":"$line","kind":13,"containerName":"_parse","line":329},{"line":329,"containerName":"_parse","kind":13,"name":"$qual"},{"name":"$self","kind":13,"containerName":"_parse","line":334},{"line":335,"containerName":"_parse","localvar":"my","kind":13,"name":"$INFO","definition":"my"},{"name":"$self","containerName":"_parse","kind":13,"line":335},{"line":335,"kind":12,"containerName":"_parse","name":"throw"},{"line":336,"name":"@lines","containerName":"_parse","kind":13},{"name":"$INFO","kind":13,"containerName":"_parse","line":336},{"name":"$self","containerName":"_parse","kind":13,"line":339},{"containerName":"_parse","kind":12,"name":"warn","line":339},{"name":"$self","containerName":"_parse","kind":13,"line":339},{"line":340,"name":"$self","kind":13,"containerName":"_parse"},{"line":340,"containerName":"_parse","kind":12,"name":"infofile"},{"line":341,"kind":13,"localvar":"my","containerName":"_parse","name":"$INFO","definition":"my"},{"line":341,"name":"$self","kind":13,"containerName":"_parse"},{"name":"throw","kind":12,"containerName":"_parse","line":341},{"line":342,"kind":13,"containerName":"_parse","name":"@lines"},{"name":"$INFO","containerName":"_parse","kind":13,"line":342},{"line":345,"name":"$self","containerName":"_parse","kind":13},{"containerName":"_parse","kind":12,"name":"warn","line":345},{"containerName":"_parse","kind":13,"name":"$self","line":345},{"containerName":"_parse","kind":13,"name":"@lines","line":346},{"line":349,"containerName":"_parse","kind":13,"name":"@lines"},{"line":352,"name":"$self","containerName":"_parse","kind":13},{"line":352,"containerName":"_parse","kind":13,"name":"$self"},{"line":358,"name":"$self","kind":13,"containerName":"_parse"},{"line":359,"name":"$STDERR_LUCY","localvar":"my","kind":13,"containerName":"_parse","definition":"my"},{"containerName":"_parse","kind":13,"name":"$self","line":359},{"name":"throw","containerName":"_parse","kind":12,"line":359},{"line":360,"kind":13,"containerName":"_parse","name":"@lines"},{"kind":13,"containerName":"_parse","name":"$STDERR_LUCY","line":360},{"line":363,"kind":13,"containerName":"_parse","name":"$self"},{"line":363,"name":"warn","kind":12,"containerName":"_parse"},{"kind":13,"containerName":"_parse","name":"$self","line":363},{"kind":13,"containerName":"_parse","name":"$self","line":364},{"line":364,"name":"stderrfile","kind":12,"containerName":"_parse"},{"definition":"my","name":"$STDERR_LUCY","kind":13,"localvar":"my","containerName":"_parse","line":365},{"name":"$self","kind":13,"containerName":"_parse","line":365},{"kind":12,"containerName":"_parse","name":"throw","line":365},{"line":366,"kind":13,"containerName":"_parse","name":"@lines"},{"kind":13,"containerName":"_parse","name":"$STDERR_LUCY","line":366},{"name":"$self","kind":13,"containerName":"_parse","line":369},{"kind":12,"containerName":"_parse","name":"warn","line":369},{"line":369,"containerName":"_parse","kind":13,"name":"$self"},{"line":370,"name":"@lines","kind":13,"containerName":"_parse"},{"line":373,"kind":13,"containerName":"_parse","name":"$self"},{"name":"@lines","kind":13,"containerName":"_parse","line":374},{"line":375,"name":"$self","containerName":"_parse","kind":13},{"name":"$self","containerName":"_parse","kind":13,"line":376},{"line":377,"containerName":"_parse","kind":13,"name":"$self"},{"containerName":"_parse","kind":13,"name":"$self","line":378},{"line":379,"name":"$self","kind":13,"containerName":"_parse"},{"name":"$self","containerName":"_parse","kind":13,"line":381},{"kind":13,"containerName":"_parse","name":"$self","line":382},{"line":387,"name":"@lines","kind":13,"containerName":"_parse"},{"kind":13,"containerName":"_parse","name":"$self","line":388}],"kind":12,"range":{"start":{"line":256,"character":0},"end":{"line":391,"character":9999}},"line":256},{"line":258,"kind":12,"name":"seqfile"},{"line":261,"name":"lucy_verbose","kind":12},{"line":262,"kind":12,"name":"seqfile"},{"name":"fwd_desig","kind":12,"line":270},{"kind":12,"name":"sequences","line":271},{"line":271,"kind":12,"name":"direction"},{"kind":12,"name":"rev_desig","line":273},{"line":274,"name":"sequences","kind":12},{"kind":12,"name":"direction","line":274},{"kind":12,"name":"sequences","line":276},{"kind":12,"name":"min_clone_len","line":276},{"name":"sequences","kind":12,"line":277},{"line":277,"kind":12,"name":"max_clone_len"},{"line":278,"kind":12,"name":"sequences"},{"kind":12,"name":"med_clone_len","line":278},{"kind":12,"name":"sequences","line":279},{"line":279,"kind":12,"name":"beg_clear"},{"line":280,"name":"sequences","kind":12},{"kind":12,"name":"end_clear","line":280},{"line":281,"kind":12,"name":"sequences"},{"name":"length_raw","kind":12,"line":281},{"line":283,"kind":12,"name":"sequences"},{"name":"sequence","kind":12,"line":283},{"name":"sequences","kind":12,"line":284},{"name":"length_clear","kind":12,"line":284},{"name":"sequences","kind":12,"line":286},{"line":286,"kind":12,"name":"per_GC"},{"line":295,"name":"qualfile","kind":12},{"line":300,"kind":12,"name":"lucy_verbose"},{"line":306,"kind":12,"name":"lucy_verbose"},{"line":317,"name":"sequences","kind":12},{"line":317,"kind":12,"name":"beg_clear"},{"line":317,"name":"sequences","kind":12},{"name":"end_clear","kind":12,"line":317},{"kind":12,"name":"sequences","line":319},{"kind":12,"name":"quality","line":319},{"line":325,"kind":12,"name":"sequences"},{"line":325,"name":"avg_quality","kind":12},{"kind":12,"name":"infofile","line":334},{"kind":12,"name":"lucy_verbose","line":339},{"line":345,"kind":12,"name":"lucy_verbose"},{"kind":12,"name":"sequences","line":352},{"kind":12,"name":"full_length","line":352},{"line":352,"kind":12,"name":"sequences"},{"line":358,"kind":12,"name":"stderrfile"},{"line":363,"kind":12,"name":"lucy_verbose"},{"name":"lucy_verbose","kind":12,"line":369},{"name":"adv_stderr","kind":12,"line":373},{"line":375,"kind":12,"name":"reject"},{"kind":12,"name":"reject","line":376},{"name":"reject","kind":12,"line":377},{"name":"reject","kind":12,"line":378},{"line":379,"name":"sequences","kind":12},{"name":"polyA","kind":12,"line":379},{"name":"reject","kind":12,"line":381},{"line":382,"name":"sequences","kind":12},{"kind":12,"name":"reject","line":388},{"children":[{"definition":"my","line":406,"containerName":"get_Seq_Objs","localvar":"my","kind":13,"name":"$self"},{"line":407,"containerName":"get_Seq_Objs","localvar":"my","kind":13,"name":"$seqobj","definition":"my"},{"line":407,"containerName":"get_Seq_Objs","kind":13,"name":"@seqobjs"},{"line":408,"localvar":"my","kind":13,"containerName":"get_Seq_Objs","name":"$key","definition":"my"},{"name":"$self","kind":13,"containerName":"get_Seq_Objs","line":408}],"name":"get_Seq_Objs","containerName":"main::","definition":"sub","line":405,"kind":12,"range":{"start":{"character":0,"line":405},"end":{"line":408,"character":9999}}},{"kind":12,"name":"sequences","line":408},{"line":409,"name":"$seqobj","kind":13,"containerName":null},{"name":"Bio","containerName":"PrimarySeq","kind":12,"line":409},{"line":409,"name":"new","kind":12,"containerName":"main::"},{"line":411,"name":"@seqobjs","kind":13,"containerName":null},{"line":411,"name":"$seqobj","kind":13,"containerName":null},{"name":"@seqobjs","kind":13,"containerName":null,"line":413},{"detail":"($self,$key)","definition":"sub","name":"get_Seq_Obj","containerName":"main::","children":[{"line":428,"name":"$self","localvar":"my","containerName":"get_Seq_Obj","kind":13,"definition":"my"},{"containerName":"get_Seq_Obj","kind":13,"name":"$key","line":428},{"definition":"my","name":"$seqobj","kind":13,"localvar":"my","containerName":"get_Seq_Obj","line":429},{"line":429,"kind":12,"containerName":"get_Seq_Obj","name":"new"},{"line":431,"kind":13,"containerName":"get_Seq_Obj","name":"$seqobj"}],"signature":{"parameters":[{"label":"$self"},{"label":"$key"}],"documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence ","label":"get_Seq_Obj($self,$key)"},"kind":12,"range":{"start":{"line":427,"character":0},"end":{"character":9999,"line":432}},"line":427},{"line":429,"name":"Bio","kind":12,"containerName":"PrimarySeq"},{"line":446,"range":{"start":{"character":0,"line":446},"end":{"line":448,"character":9999}},"kind":12,"definition":"sub","children":[{"name":"$self","localvar":"my","containerName":"get_sequence_names","kind":13,"line":447,"definition":"my"},{"line":448,"kind":13,"localvar":"my","containerName":"get_sequence_names","name":"@keys","definition":"my"},{"line":448,"containerName":"get_sequence_names","kind":13,"name":"$self"}],"containerName":"main::","name":"get_sequence_names"},{"line":448,"kind":12,"name":"sequences"},{"line":449,"containerName":null,"kind":13,"name":"@keys"},{"signature":{"label":"sequence($self,$key)","parameters":[{"label":"$self"},{"label":"$key"}],"documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   "},"line":462,"kind":12,"range":{"end":{"line":465,"character":9999},"start":{"character":0,"line":462}},"definition":"sub","detail":"($self,$key)","children":[{"line":463,"kind":13,"localvar":"my","containerName":"sequence","name":"$self","definition":"my"},{"line":463,"containerName":"sequence","kind":13,"name":"$key"},{"name":"$self","containerName":"sequence","kind":13,"line":464},{"name":"$key","kind":13,"containerName":"sequence","line":464}],"containerName":"main::","name":"sequence"},{"line":464,"name":"sequences","kind":12},{"line":464,"kind":12,"name":"sequence"},{"children":[{"definition":"my","line":479,"localvar":"my","containerName":"quality","kind":13,"name":"$self"},{"name":"$key","kind":13,"containerName":"quality","line":479},{"line":480,"name":"$self","kind":13,"containerName":"quality"},{"line":480,"containerName":"quality","kind":13,"name":"$key"}],"name":"quality","containerName":"main::","definition":"sub","detail":"($self,$key)","line":478,"range":{"start":{"line":478,"character":0},"end":{"line":481,"character":9999}},"kind":12,"signature":{"label":"quality($self,$key)","documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   \n\n\nsub sequence {\n    my ($self, $key) = @_;\n    return $self->{sequences}{$key}{sequence};\n}\n\n=head2 quality\n\n Title   :  quality\n Usage   :  $lucyObj->quality($seqname)\n Function:  returns the quality values of one of the sequences analyzed by Lucy.\n\t    This method depends on the user having provided a quality file.\n Returns :  string\n Args    :  name of a sequence","parameters":[{"label":"$self"},{"label":"$key"}]}},{"name":"sequences","kind":12,"line":480},{"line":480,"name":"quality","kind":12},{"definition":"sub","detail":"($self,$key)","children":[{"definition":"my","line":494,"name":"$self","containerName":"avg_quality","localvar":"my","kind":13},{"line":494,"name":"$key","containerName":"avg_quality","kind":13},{"line":495,"containerName":"avg_quality","kind":13,"name":"$self"},{"line":495,"kind":13,"containerName":"avg_quality","name":"$key"}],"containerName":"main::","name":"avg_quality","signature":{"label":"avg_quality($self,$key)","documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   \n\n\nsub sequence {\n    my ($self, $key) = @_;\n    return $self->{sequences}{$key}{sequence};\n}\n\n=head2 quality\n\n Title   :  quality\n Usage   :  $lucyObj->quality($seqname)\n Function:  returns the quality values of one of the sequences analyzed by Lucy.\n\t    This method depends on the user having provided a quality file.\n Returns :  string\n Args    :  name of a sequence\n\n\nsub quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{quality};\n}\n\n=head2 avg_quality\n\n Title   :  avg_quality\n Usage   :  $lucyObj->avg_quality($seqname)\n Function:  returns the average quality value for one of the sequences analyzed by Lucy.\n Returns :  float\n Args    :  name of a sequence","parameters":[{"label":"$self"},{"label":"$key"}]},"line":493,"kind":12,"range":{"start":{"line":493,"character":0},"end":{"character":9999,"line":496}}},{"name":"sequences","kind":12,"line":495},{"line":495,"name":"avg_quality","kind":12},{"detail":"($self,$key)","definition":"sub","containerName":"main::","name":"direction","children":[{"name":"$self","localvar":"my","containerName":"direction","kind":13,"line":512,"definition":"my"},{"line":512,"kind":13,"containerName":"direction","name":"$key"},{"containerName":"direction","kind":13,"name":"$self","line":513},{"line":513,"name":"$key","containerName":"direction","kind":13},{"line":513,"name":"$self","kind":13,"containerName":"direction"},{"kind":13,"containerName":"direction","name":"$key","line":513}],"signature":{"label":"direction($self,$key)","parameters":[{"label":"$self"},{"label":"$key"}],"documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   \n\n\nsub sequence {\n    my ($self, $key) = @_;\n    return $self->{sequences}{$key}{sequence};\n}\n\n=head2 quality\n\n Title   :  quality\n Usage   :  $lucyObj->quality($seqname)\n Function:  returns the quality values of one of the sequences analyzed by Lucy.\n\t    This method depends on the user having provided a quality file.\n Returns :  string\n Args    :  name of a sequence\n\n\nsub quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{quality};\n}\n\n=head2 avg_quality\n\n Title   :  avg_quality\n Usage   :  $lucyObj->avg_quality($seqname)\n Function:  returns the average quality value for one of the sequences analyzed by Lucy.\n Returns :  float\n Args    :  name of a sequence\n\n\nsub avg_quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{avg_quality};\n}\n\n=head2 direction\n\n Title   :  direction\n Usage   :  $lucyObj->direction($seqname)\n Function:  returns the direction for one of the sequences analyzed by Lucy\n\t    providing that 'fwd_desig' or 'rev_desig' were set when the\n \t    Lucy object was created.\n\t    Strings returned are: 'F' for forward, 'R' for reverse.  \n Returns :  string \n Args    :  name of a sequence"},"kind":12,"range":{"start":{"line":511,"character":0},"end":{"line":515,"character":9999}},"line":511},{"kind":12,"name":"sequences","line":513},{"kind":12,"name":"direction","line":513},{"line":513,"name":"sequences","kind":12},{"name":"direction","kind":12,"line":513},{"signature":{"documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   \n\n\nsub sequence {\n    my ($self, $key) = @_;\n    return $self->{sequences}{$key}{sequence};\n}\n\n=head2 quality\n\n Title   :  quality\n Usage   :  $lucyObj->quality($seqname)\n Function:  returns the quality values of one of the sequences analyzed by Lucy.\n\t    This method depends on the user having provided a quality file.\n Returns :  string\n Args    :  name of a sequence\n\n\nsub quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{quality};\n}\n\n=head2 avg_quality\n\n Title   :  avg_quality\n Usage   :  $lucyObj->avg_quality($seqname)\n Function:  returns the average quality value for one of the sequences analyzed by Lucy.\n Returns :  float\n Args    :  name of a sequence\n\n\nsub avg_quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{avg_quality};\n}\n\n=head2 direction\n\n Title   :  direction\n Usage   :  $lucyObj->direction($seqname)\n Function:  returns the direction for one of the sequences analyzed by Lucy\n\t    providing that 'fwd_desig' or 'rev_desig' were set when the\n \t    Lucy object was created.\n\t    Strings returned are: 'F' for forward, 'R' for reverse.  \n Returns :  string \n Args    :  name of a sequence\n\n\nsub direction {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{direction} if $self->{sequences}{$key}{direction}; \n    return \"\";\n}\n\n=head2 length_raw\n\n Title   :  length_raw\n Usage   :  $lucyObj->length_raw($seqname)\n Function:  returns the length of a DNA sequence prior to quality/ vector \n\t    trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence","parameters":[{"label":"$self"},{"label":"$key"}],"label":"length_raw($self,$key)"},"range":{"end":{"character":9999,"line":531},"start":{"line":528,"character":0}},"kind":12,"line":528,"detail":"($self,$key)","definition":"sub","name":"length_raw","containerName":"main::","children":[{"definition":"my","name":"$self","localvar":"my","kind":13,"containerName":"length_raw","line":529},{"line":529,"name":"$key","containerName":"length_raw","kind":13},{"line":530,"name":"$self","kind":13,"containerName":"length_raw"},{"containerName":"length_raw","kind":13,"name":"$key","line":530}]},{"kind":12,"name":"sequences","line":530},{"line":530,"kind":12,"name":"length_raw"},{"children":[{"definition":"my","line":545,"name":"$self","localvar":"my","containerName":"length_clear","kind":13},{"line":545,"name":"$key","kind":13,"containerName":"length_clear"},{"line":546,"name":"$self","containerName":"length_clear","kind":13},{"containerName":"length_clear","kind":13,"name":"$key","line":546}],"containerName":"main::","name":"length_clear","definition":"sub","detail":"($self,$key)","line":544,"kind":12,"range":{"start":{"character":0,"line":544},"end":{"line":547,"character":9999}},"signature":{"label":"length_clear($self,$key)","parameters":[{"label":"$self"},{"label":"$key"}],"documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   \n\n\nsub sequence {\n    my ($self, $key) = @_;\n    return $self->{sequences}{$key}{sequence};\n}\n\n=head2 quality\n\n Title   :  quality\n Usage   :  $lucyObj->quality($seqname)\n Function:  returns the quality values of one of the sequences analyzed by Lucy.\n\t    This method depends on the user having provided a quality file.\n Returns :  string\n Args    :  name of a sequence\n\n\nsub quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{quality};\n}\n\n=head2 avg_quality\n\n Title   :  avg_quality\n Usage   :  $lucyObj->avg_quality($seqname)\n Function:  returns the average quality value for one of the sequences analyzed by Lucy.\n Returns :  float\n Args    :  name of a sequence\n\n\nsub avg_quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{avg_quality};\n}\n\n=head2 direction\n\n Title   :  direction\n Usage   :  $lucyObj->direction($seqname)\n Function:  returns the direction for one of the sequences analyzed by Lucy\n\t    providing that 'fwd_desig' or 'rev_desig' were set when the\n \t    Lucy object was created.\n\t    Strings returned are: 'F' for forward, 'R' for reverse.  \n Returns :  string \n Args    :  name of a sequence\n\n\nsub direction {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{direction} if $self->{sequences}{$key}{direction}; \n    return \"\";\n}\n\n=head2 length_raw\n\n Title   :  length_raw\n Usage   :  $lucyObj->length_raw($seqname)\n Function:  returns the length of a DNA sequence prior to quality/ vector \n\t    trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_raw {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_raw};\n}\n\n=head2 length_clear\n\n Title   :  length_clear\n Usage   :  $lucyObj->length_clear($seqname)\n Function:  returns the length of a DNA sequence following quality/ vector   \n            trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence"}},{"name":"sequences","kind":12,"line":546},{"line":546,"name":"length_clear","kind":12},{"line":560,"range":{"start":{"line":560,"character":0},"end":{"line":563,"character":9999}},"kind":12,"signature":{"label":"start_clear($self,$key)","documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   \n\n\nsub sequence {\n    my ($self, $key) = @_;\n    return $self->{sequences}{$key}{sequence};\n}\n\n=head2 quality\n\n Title   :  quality\n Usage   :  $lucyObj->quality($seqname)\n Function:  returns the quality values of one of the sequences analyzed by Lucy.\n\t    This method depends on the user having provided a quality file.\n Returns :  string\n Args    :  name of a sequence\n\n\nsub quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{quality};\n}\n\n=head2 avg_quality\n\n Title   :  avg_quality\n Usage   :  $lucyObj->avg_quality($seqname)\n Function:  returns the average quality value for one of the sequences analyzed by Lucy.\n Returns :  float\n Args    :  name of a sequence\n\n\nsub avg_quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{avg_quality};\n}\n\n=head2 direction\n\n Title   :  direction\n Usage   :  $lucyObj->direction($seqname)\n Function:  returns the direction for one of the sequences analyzed by Lucy\n\t    providing that 'fwd_desig' or 'rev_desig' were set when the\n \t    Lucy object was created.\n\t    Strings returned are: 'F' for forward, 'R' for reverse.  \n Returns :  string \n Args    :  name of a sequence\n\n\nsub direction {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{direction} if $self->{sequences}{$key}{direction}; \n    return \"\";\n}\n\n=head2 length_raw\n\n Title   :  length_raw\n Usage   :  $lucyObj->length_raw($seqname)\n Function:  returns the length of a DNA sequence prior to quality/ vector \n\t    trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_raw {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_raw};\n}\n\n=head2 length_clear\n\n Title   :  length_clear\n Usage   :  $lucyObj->length_clear($seqname)\n Function:  returns the length of a DNA sequence following quality/ vector   \n            trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_clear};\n}\n\n=head2 start_clear\n\n Title   :  start_clear\n Usage   :  $lucyObj->start_clear($seqname)\n Function:  returns the beginning position of good quality, vector free DNA sequence \n\t    determined by Lucy.\n Returns :  integer\n Args    :  name of a sequence","parameters":[{"label":"$self"},{"label":"$key"}]},"children":[{"name":"$self","localvar":"my","kind":13,"containerName":"start_clear","line":561,"definition":"my"},{"name":"$key","containerName":"start_clear","kind":13,"line":561},{"name":"$self","kind":13,"containerName":"start_clear","line":562},{"line":562,"kind":13,"containerName":"start_clear","name":"$key"}],"containerName":"main::","name":"start_clear","definition":"sub","detail":"($self,$key)"},{"name":"sequences","kind":12,"line":562},{"name":"beg_clear","kind":12,"line":562},{"name":"end_clear","containerName":"main::","children":[{"kind":13,"localvar":"my","containerName":"end_clear","name":"$self","line":578,"definition":"my"},{"line":578,"kind":13,"containerName":"end_clear","name":"$key"},{"line":579,"name":"$self","kind":13,"containerName":"end_clear"},{"containerName":"end_clear","kind":13,"name":"$key","line":579}],"detail":"($self,$key)","definition":"sub","range":{"start":{"character":0,"line":577},"end":{"line":580,"character":9999}},"kind":12,"line":577,"signature":{"parameters":[{"label":"$self"},{"label":"$key"}],"documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   \n\n\nsub sequence {\n    my ($self, $key) = @_;\n    return $self->{sequences}{$key}{sequence};\n}\n\n=head2 quality\n\n Title   :  quality\n Usage   :  $lucyObj->quality($seqname)\n Function:  returns the quality values of one of the sequences analyzed by Lucy.\n\t    This method depends on the user having provided a quality file.\n Returns :  string\n Args    :  name of a sequence\n\n\nsub quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{quality};\n}\n\n=head2 avg_quality\n\n Title   :  avg_quality\n Usage   :  $lucyObj->avg_quality($seqname)\n Function:  returns the average quality value for one of the sequences analyzed by Lucy.\n Returns :  float\n Args    :  name of a sequence\n\n\nsub avg_quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{avg_quality};\n}\n\n=head2 direction\n\n Title   :  direction\n Usage   :  $lucyObj->direction($seqname)\n Function:  returns the direction for one of the sequences analyzed by Lucy\n\t    providing that 'fwd_desig' or 'rev_desig' were set when the\n \t    Lucy object was created.\n\t    Strings returned are: 'F' for forward, 'R' for reverse.  \n Returns :  string \n Args    :  name of a sequence\n\n\nsub direction {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{direction} if $self->{sequences}{$key}{direction}; \n    return \"\";\n}\n\n=head2 length_raw\n\n Title   :  length_raw\n Usage   :  $lucyObj->length_raw($seqname)\n Function:  returns the length of a DNA sequence prior to quality/ vector \n\t    trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_raw {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_raw};\n}\n\n=head2 length_clear\n\n Title   :  length_clear\n Usage   :  $lucyObj->length_clear($seqname)\n Function:  returns the length of a DNA sequence following quality/ vector   \n            trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_clear};\n}\n\n=head2 start_clear\n\n Title   :  start_clear\n Usage   :  $lucyObj->start_clear($seqname)\n Function:  returns the beginning position of good quality, vector free DNA sequence \n\t    determined by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub start_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{beg_clear};\n}\n\n\n=head2 end_clear\n\n Title   :  end_clear\n Usage   :  $lucyObj->end_clear($seqname)\n Function:  returns the ending position of good quality, vector free DNA sequence\n            determined by Lucy.\n Returns :  integer\n Args    :  name of a sequence","label":"end_clear($self,$key)"}},{"line":579,"name":"sequences","kind":12},{"line":579,"kind":12,"name":"end_clear"},{"signature":{"label":"per_GC($self,$key)","documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   \n\n\nsub sequence {\n    my ($self, $key) = @_;\n    return $self->{sequences}{$key}{sequence};\n}\n\n=head2 quality\n\n Title   :  quality\n Usage   :  $lucyObj->quality($seqname)\n Function:  returns the quality values of one of the sequences analyzed by Lucy.\n\t    This method depends on the user having provided a quality file.\n Returns :  string\n Args    :  name of a sequence\n\n\nsub quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{quality};\n}\n\n=head2 avg_quality\n\n Title   :  avg_quality\n Usage   :  $lucyObj->avg_quality($seqname)\n Function:  returns the average quality value for one of the sequences analyzed by Lucy.\n Returns :  float\n Args    :  name of a sequence\n\n\nsub avg_quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{avg_quality};\n}\n\n=head2 direction\n\n Title   :  direction\n Usage   :  $lucyObj->direction($seqname)\n Function:  returns the direction for one of the sequences analyzed by Lucy\n\t    providing that 'fwd_desig' or 'rev_desig' were set when the\n \t    Lucy object was created.\n\t    Strings returned are: 'F' for forward, 'R' for reverse.  \n Returns :  string \n Args    :  name of a sequence\n\n\nsub direction {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{direction} if $self->{sequences}{$key}{direction}; \n    return \"\";\n}\n\n=head2 length_raw\n\n Title   :  length_raw\n Usage   :  $lucyObj->length_raw($seqname)\n Function:  returns the length of a DNA sequence prior to quality/ vector \n\t    trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_raw {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_raw};\n}\n\n=head2 length_clear\n\n Title   :  length_clear\n Usage   :  $lucyObj->length_clear($seqname)\n Function:  returns the length of a DNA sequence following quality/ vector   \n            trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_clear};\n}\n\n=head2 start_clear\n\n Title   :  start_clear\n Usage   :  $lucyObj->start_clear($seqname)\n Function:  returns the beginning position of good quality, vector free DNA sequence \n\t    determined by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub start_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{beg_clear};\n}\n\n\n=head2 end_clear\n\n Title   :  end_clear\n Usage   :  $lucyObj->end_clear($seqname)\n Function:  returns the ending position of good quality, vector free DNA sequence\n            determined by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub end_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{end_clear};\n}\n\n=head2 per_GC\n\n Title   :  per_GC\n Usage   :  $lucyObj->per_GC($seqname)\n Function:  returns the percente of the good quality, vector free DNA sequence\n            determined by Lucy.\n Returns :  float\n Args    :  name of a sequence","parameters":[{"label":"$self"},{"label":"$key"}]},"kind":12,"range":{"start":{"line":593,"character":0},"end":{"line":596,"character":9999}},"line":593,"detail":"($self,$key)","definition":"sub","name":"per_GC","containerName":"main::","children":[{"line":594,"kind":13,"localvar":"my","containerName":"per_GC","name":"$self","definition":"my"},{"containerName":"per_GC","kind":13,"name":"$key","line":594},{"line":595,"containerName":"per_GC","kind":13,"name":"$self"},{"line":595,"kind":13,"containerName":"per_GC","name":"$key"}]},{"name":"sequences","kind":12,"line":595},{"kind":12,"name":"per_GC","line":595},{"definition":"sub","detail":"($self,$key)","children":[{"line":612,"name":"$self","localvar":"my","kind":13,"containerName":"full_length","definition":"my"},{"containerName":"full_length","kind":13,"name":"$key","line":612},{"kind":13,"containerName":"full_length","name":"$self","line":613},{"line":613,"name":"$key","kind":13,"containerName":"full_length"}],"name":"full_length","containerName":"main::","signature":{"parameters":[{"label":"$self"},{"label":"$key"}],"documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   \n\n\nsub sequence {\n    my ($self, $key) = @_;\n    return $self->{sequences}{$key}{sequence};\n}\n\n=head2 quality\n\n Title   :  quality\n Usage   :  $lucyObj->quality($seqname)\n Function:  returns the quality values of one of the sequences analyzed by Lucy.\n\t    This method depends on the user having provided a quality file.\n Returns :  string\n Args    :  name of a sequence\n\n\nsub quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{quality};\n}\n\n=head2 avg_quality\n\n Title   :  avg_quality\n Usage   :  $lucyObj->avg_quality($seqname)\n Function:  returns the average quality value for one of the sequences analyzed by Lucy.\n Returns :  float\n Args    :  name of a sequence\n\n\nsub avg_quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{avg_quality};\n}\n\n=head2 direction\n\n Title   :  direction\n Usage   :  $lucyObj->direction($seqname)\n Function:  returns the direction for one of the sequences analyzed by Lucy\n\t    providing that 'fwd_desig' or 'rev_desig' were set when the\n \t    Lucy object was created.\n\t    Strings returned are: 'F' for forward, 'R' for reverse.  \n Returns :  string \n Args    :  name of a sequence\n\n\nsub direction {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{direction} if $self->{sequences}{$key}{direction}; \n    return \"\";\n}\n\n=head2 length_raw\n\n Title   :  length_raw\n Usage   :  $lucyObj->length_raw($seqname)\n Function:  returns the length of a DNA sequence prior to quality/ vector \n\t    trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_raw {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_raw};\n}\n\n=head2 length_clear\n\n Title   :  length_clear\n Usage   :  $lucyObj->length_clear($seqname)\n Function:  returns the length of a DNA sequence following quality/ vector   \n            trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_clear};\n}\n\n=head2 start_clear\n\n Title   :  start_clear\n Usage   :  $lucyObj->start_clear($seqname)\n Function:  returns the beginning position of good quality, vector free DNA sequence \n\t    determined by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub start_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{beg_clear};\n}\n\n\n=head2 end_clear\n\n Title   :  end_clear\n Usage   :  $lucyObj->end_clear($seqname)\n Function:  returns the ending position of good quality, vector free DNA sequence\n            determined by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub end_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{end_clear};\n}\n\n=head2 per_GC\n\n Title   :  per_GC\n Usage   :  $lucyObj->per_GC($seqname)\n Function:  returns the percente of the good quality, vector free DNA sequence\n            determined by Lucy.\n Returns :  float\n Args    :  name of a sequence\n\n\nsub per_GC {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{per_GC};\n}\n\n=head2 full_length\n\n Title   :  full_length\n Usage   :  $lucyObj->full_length($seqname)\n Function:  returns the truth value for whether or not the sequence read was\n            full length (ie. vector present on both ends of read).  This method\n            depends on the user having provided the 'info' file (Lucy must be\n            run with the -debug 'info_filename' option to get this file).\n Returns :  boolean \n Args    :  name of a sequence","label":"full_length($self,$key)"},"line":611,"kind":12,"range":{"end":{"character":9999,"line":615},"start":{"line":611,"character":0}}},{"line":613,"name":"sequences","kind":12},{"kind":12,"name":"full_length","line":613},{"signature":{"label":"polyA($self,$key)","parameters":[{"label":"$self"},{"label":"$key"}],"documentation":"1;\n# $Id: Lucy.pm 16123 2009-09-17 12:57:27Z cjfields $ \n#\n# BioPerl module for Bio::Tools::Lucy\n#\n# Copyright Her Majesty the Queen of England\n# written by Andrew Walsh (paeruginosa@hotmail.com) during employment with \n# Agriculture and Agri-food Canada, Cereal Research Centre, Winnipeg, MB\n#\n# You may distribute this module under the same terms as perl itself\n# POD documentation - main docs before the code\n\n=head1 NAME\n\nBio::Tools::Lucy - Object for analyzing the output from Lucy,\n  a vector and quality trimming program from TIGR\n\n=head1 SYNOPSIS\n\n  # Create the Lucy object from an existing Lucy output file\n  @params = ('seqfile' => 'lucy.seq', 'lucy_verbose' => 1);\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n  # Get names of all sequences\n  $names = $lucyObj->get_sequence_names();\n\n  #  Print seq and qual values for sequences >400 bp in order to run CAP3\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  # Get an array of Bio::PrimarySeq objects\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n\n=head1 DESCRIPTION\n\nBio::Tools::Lucy.pm provides methods for analyzing the sequence and\nquality values generated by Lucy program from TIGR.\n\nLucy will identify vector, poly-A/T tails, and poor quality regions in\na sequence.  (www.genomics.purdue.edu/gcg/other/lucy.pdf)\n\nThe input to Lucy can be the Phred sequence and quality files\ngenerated from running Phred on a set of chromatograms.\n\nLucy can be obtained (free of charge to academic users) from\nwww.tigr.org/softlab\n\nThere are a few methods that will only be available if you make some\nminor changes to the source for Lucy and then recompile.  The changes\nare in the 'lucy.c' file and there is a diff between the original and\nthe modified file in the Appendix\n\nPlease contact the author of this module if you have any problems\nmaking these modifications.\n\nYou do not have to make these modifications to use this module.\n\n=head2 Creating a Lucy object\n\n  @params = ('seqfile' => 'lucy.seq', 'adv_stderr' => 1, \n\t     'fwd_desig' => '_F', 'rev_desig' => '_R');\n  $lucyObj = Bio::Tools::Lucy->new(@params);\n\n=head2 Using a Lucy object\n\n  You should get an array with the sequence names in order to use\n  accessor methods.  Note: The Lucy binary program will fail unless\n  the sequence names provided as input are unique.\n\n  $names_ref = $lucyObj->get_sequence_names();\n\n  This code snippet will produce a Fasta format file with sequence\n  lengths and %GC in the description line.\n\n  foreach $name (@$names) {\n      print FILE \">$name\\t\",\n\t\t $lucyObj->length_clear($name), \"\\t\",\n\t\t $lucyObj->per_GC($name), \"\\n\",\n\t\t $lucyObj->sequence($name), \"\\n\";\n  }\n\n\n  Print seq and qual values for sequences >400 bp in order to assemble\n  them with CAP3 (or other assembler).\n\n  foreach $name (@$names) {\n      next unless $lucyObj->length_clear($name) > 400;\n      print SEQ \">$name\\n\", $lucyObj->sequence($name), \"\\n\";\n      print QUAL \">$name\\n\", $lucyObj->quality($name), \"\\n\";\n  }\n\n  Get all the sequences as Bio::PrimarySeq objects (eg., for use with\n  Bio::Tools::Run::StandaloneBlast to perform BLAST).\n\n  @seqObjs = $lucyObj->get_Seq_Objs();\n\n  Or use only those sequences that are full length and have a Poly-A\n  tail.\n\n  foreach $name (@$names) {\n      next unless ($lucyObj->full_length($name) and $lucy->polyA($name));\n      push @seqObjs, $lucyObj->get_Seq_Obj($name);\n  }\n\n\n  Get the names of those sequences that were rejected by Lucy.\n\n  $rejects_ref = $lucyObj->get_rejects();\n\n  Print the names of the rejects and 1 letter code for reason they\n  were rejected.\n\n  foreach $key (sort keys %$rejects_ref) {\n      print \"$key:  \", $rejects_ref->{$key};\n  }\n\n  There is a lot of other information available about the sequences\n  analyzed by Lucy (see APPENDIX).  This module can be used with the\n  DBI module to store this sequence information in a database.\n\n=head1 FEEDBACK\n\n=head2 Mailing Lists\n\nUser feedback is an integral part of the evolution of this and other\nBioperl modules.  Send your comments and suggestions preferably to one\nof the Bioperl mailing lists.  Your participation is much appreciated.\n\n  bioperl-l@bioperl.org                  - General discussion\n  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists\n\n=head2 Support \n\nPlease direct usage questions or support issues to the mailing list:\n\nI<bioperl-l@bioperl.org>\n\nrather than to the module maintainer directly. Many experienced and \nreponsive experts will be able look at the problem and quickly \naddress it. Please include a thorough description of the problem \nwith code and data examples if at all possible.\n\n=head2 Reporting Bugs\n\nReport bugs to the Bioperl bug tracking system to help us keep track\nthe bugs and their resolution. Bug reports can be submitted via the web:\n\n  http://bugzilla.open-bio.org/\n\n=head1 AUTHOR\n\nAndrew G. Walsh\t\tpaeruginosa@hotmail.com\n\n=head1 APPENDIX\n\nMethods available to Lucy objects are described below.  Please note\nthat any method beginning with an underscore is considered internal\nand should not be called directly.\n\n\n\npackage Bio::Tools::Lucy;\n\nuse vars qw($AUTOLOAD @ATTR %OK_FIELD);\nuse strict;\nuse Bio::PrimarySeq;\n\nuse base qw(Bio::Root::Root Bio::Root::IO);\n@ATTR = qw(seqfile qualfile stderrfile infofile lucy_verbose fwd_desig rev_desig adv_stderr); \nforeach my $attr (@ATTR) {\n    $OK_FIELD{$attr}++\n}\n\nsub AUTOLOAD {\n    my $self = shift;\n    my $attr = $AUTOLOAD;\n    $attr =~ s/.*:://;\n    $attr = lc $attr;\n    $self->throw(\"Unallowed parameter: $attr !\") unless $OK_FIELD{$attr};\n    $self->{$attr} = shift if @_;\n    return $self->{$attr};\n}\n\n=head2 new\n\n Title\t :  new\n Usage\t :  $lucyObj = Bio::Tools::Lucy->new(seqfile => lucy.seq, rev_desig => '_R', \n\t    fwd_desig => '_F')\n Function:  creates a Lucy object from Lucy analysis files\n Returns :  reference to Bio::Tools::Lucy object\n Args\t :  seqfile\tFasta sequence file generated by Lucy\n\t       qualfile\tQuality values file generated by Lucy\n\t       infofile\tInfo file created when Lucy is run with -debug \n                     'infofile' option\n\t       stderrfile\tStandard error captured from Lucy when Lucy is run \n\t\t\t with -info option and STDERR is directed to stderrfile \n\t\t\t (ie. lucy ... 2> stderrfile).\n\t\t\t Info in this file will include sequences dropped for low \n\t\t\t quality. If you've modified Lucy source (see adv_stderr below), \n\t\t\t it will also include info on which sequences were dropped because \n\t\t\t they were vector, too short, had no insert, and whether a poly-A \n\t\t\t tail was found (if Lucy was run with -cdna option).\n\t       lucy_verbose verbosity level (0-1).  \n\t       fwd_desig\tThe string used to determine whether sequence is a \n          forward read.  \n\t\t\t The parser will assume that this match will occus at the \n\t\t\t end of the sequence name string.\n\t       rev_desig\tAs above, for reverse reads. \n \t       adv_stderr\tCan be set to a true value (1).  Will only work if \n          you have modified \n\t\t\t the Lucy source code as outlined in DESCRIPTION and capture \n\t\t\t the standard error from Lucy.\n\nIf you don't provide filenames for qualfile, infofile or stderrfile,\nthe module will assume that .qual, .info, and .stderr are the file\nextensions and search in the same directory as the .seq file for these\nfiles.\n\nFor example, if you create a Lucy object with $lucyObj =\nBio::Tools::Lucy-E<gt>new(seqfile =E<gt>lucy.seq), the module will\nfind lucy.qual, lucy.info and lucy.stderr.\n\nYou can omit any or all of the quality, info or stderr files, but you\nwill not be able to use all of the object methods (see method\ndocumentation below).\n\n\nsub new {\n\tmy ($class,@args) = @_;\n\tmy $self = $class->SUPER::new(@args);\n\tmy ($attr, $value);\n\twhile (@args) {\n\t\t$attr = shift @args;\n\t\t$attr = lc $attr;\n\t\t$value = shift @args;\n\t\t$self->{$attr} = $value;\n\t}\n\t&_parse($self);\n\treturn $self;\n}\n\n=head2 _parse\n\n Title\t :  _parse\n Usage\t :  n/a (internal function)\n Function:  called by new() to parse Lucy output files\n Returns :  nothing\n Args\t :  none\n\n\nsub _parse {\n\tmy $self = shift;\n\t$self->{seqfile} =~ /^(\\S+)\\.\\S+$/;\n\tmy $file = $1;\n\n\t$self->warn(\"Opening $self->{seqfile} for parsing...\\n\") if $self->{lucy_verbose};\n\topen my $SEQ, $self->{seqfile} or $self->throw(\"Could not open sequence file: $self->{seqfile}\");\n\tmy ($name, $line);\n\tmy $seq = \"\";\n\tmy @lines = <$SEQ>;\n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)/) {    \n\t\t\t$name = $1;\n\t\t\tif ($self->{fwd_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"F\" if $name =~ /^(\\S+)($self->{fwd_desig})$/;\n\t\t\t}\n\t\t\tif ($self->{rev_desig}) {\n\t\t\t\t$self->{sequences}{$name}{direction} = \"R\" if $name =~ /^(\\S+)($self->{rev_desig})$/;\n\t\t\t}\n\t\t\t$self->{sequences}{$name}{min_clone_len} = $2; # this is used for TIGR Assembler, as are $3 and $4\n\t\t\t$self->{sequences}{$name}{max_clone_len} = $3;\n\t\t\t$self->{sequences}{$name}{med_clone_len} = $4; \n\t\t\t$self->{sequences}{$name}{beg_clear} = $5;\n\t\t\t$self->{sequences}{$name}{end_clear} = $6;\n\t\t\t$self->{sequences}{$name}{length_raw} = $seq =~ tr/[AGCTN]//; # from what I've seen, these are the bases Phred calls.  Please let me know if I'm wrong.     \n\t\t\tmy $beg = $5-1; # substr function begins with index 0\n\t\t\t$seq = $self->{sequences}{$name}{sequence} = substr ($seq, $beg, $6-$beg);\n\t\t\tmy $count = $self->{sequences}{$name}{length_clear} = $seq =~ tr/[AGCTN]//;\n\t\t\tmy $countGC =  $seq =~ tr/[GC]//;\n\t\t\t$self->{sequences}{$name}{per_GC} = $countGC/$count * 100;\n\t\t\t$seq = \"\";\n\t\t}\n\t\telse {\n\t\t\t$seq = $line.$seq;\n\t\t}\n\t}\n\n\t# now parse quality values (check for presence of quality file first) \n\tif ($self->{qualfile}) {\n\t\topen my $QUAL, \"$self->{qualfile}\" or $self->throw(\"Could not open quality file: $self->{qualfile}\");\n\t\t@lines = <$QUAL>;\n\t}\n\telsif (-e \"$file.qual\") {\n\t\t$self->warn(\"You did not set qualfile, but I'm opening $file.qual\\n\") if $self->{lucy_verbose};\n\t$self->qualfile(\"$file.qual\");\n\t\topen my $QUAL, \"$file.qual\" or $self->throw(\"Could not open quality file: $file.qual\");\n\t\t@lines = <$QUAL>;\n\t}\n    else {\n\t\t $self->warn(\"I did not find a quality file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t @lines = ();\n    }\n\n\tmy (@vals, @slice, $num, $tot, $vals);  \n\tmy $qual = \"\"; \n\twhile ($line = pop @lines) {\n\t\tchomp $line;\n\t\tif ($line =~ /^>(\\S+)/) {\n\t\t\t$name = $1;\n\t\t\t@vals = split /\\s/ , $qual;\n\t\t\t@slice = @vals[$self->{sequences}{$name}{beg_clear} - 1 .. $self->{sequences}{$name}{end_clear} - 1];\n\t\t\t$vals = join \"\\t\", @slice;\n\t\t\t$self->{sequences}{$name}{quality} = $vals;\n\t\t\t$qual = \"\";\n\t\t\tforeach $num (@slice) {\n\t\t\t\t$tot += $num;\n\t\t\t}\n\t\t\t$num = @slice;\n\t\t\t$self->{sequences}{$name}{avg_quality} = $tot/$num;\n\t\t\t$tot = 0;\n\t\t}\n\t\telse {\n\t\t\t$qual = $line.$qual;\n\t\t}\n\t}\n\n\t# determine whether reads are full length\n\tif ($self->{infofile}) {\n\t\topen my $INFO, \"$self->{infofile}\" or $self->throw(\"Could not open info file: $self->{infofile}\");\n\t\t@lines = <$INFO>;\n\t}\n\telsif (-e \"$file.info\") {\n\t\t$self->warn(\"You did not set infofile, but I'm opening $file.info\\n\") if $self->{lucy_verbose};\n\t\t$self->infofile(\"$file.info\");\n\t\topen my $INFO, \"$file.info\" or $self->throw(\"Could not open info file: $file.info\");\n\t\t@lines = <$INFO>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find an info file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tforeach (@lines) {\n\t\t/^(\\S+).+CLV\\s+(\\d+)\\s+(\\d+)$/;\n\t\tif ($2>0 && $3>0) {\n\t\t\t$self->{sequences}{$1}{full_length} = 1 if $self->{sequences}{$1}; # will show cleavage info for rejected sequences too\n\t\t}\n\t}\n\n\n\t# parse rejects (and presence of poly-A if Lucy has been modified)\n\tif ($self->{stderrfile}) {\n\t\topen my $STDERR_LUCY, \"$self->{stderrfile}\" or $self->throw(\"Could not open quality file: $self->{stderrfile}\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telsif (-e \"$file.stderr\") {\n\t\t$self->warn(\"You did not set stderrfile, but I'm opening $file.stderr\\n\") if $self->{lucy_verbose};\n\t\t$self->stderrfile(\"$file.stderr\");\n\t\topen my $STDERR_LUCY, \"$file.stderr\" or $self->throw(\"Could not open quality file: $file.stderr\");\n\t\t@lines = <$STDERR_LUCY>;\n\t}\n\telse {\n\t\t$self->warn(\"I did not find a standard error file.  You will not be able to use all of the accessor methods.\\n\") if $self->{lucy_verbose};\n\t\t@lines = ();\n\t}\n\n\tif ($self->{adv_stderr}) {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"Q\" if /dropping\\s+(\\S+)/;\n\t\t\t$self->{reject}{$1} = \"V\" if /Vector: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"E\" if /Empty: (\\S+)/;\n\t\t\t$self->{reject}{$1} = \"S\" if m{Short/ no insert: (\\S+)};\n\t\t\t$self->{sequences}{$1}{polyA} = 1 if /(\\S+) has PolyA/;\n\t\t\tif (/Dropped PolyA: (\\S+)/) {\n\t\t\t\t$self->{reject}{$1} = \"P\";\n\t\t\t\tdelete $self->{sequences}{$1};\n\t\t\t}\n\t\t}\n\t}\n\telse {\n\t\tforeach (@lines) {\n\t\t\t$self->{reject}{$1} = \"R\" if /dropping\\s+(\\S+)/;\n\t\t}\n\t}\n}\n\n=head2 get_Seq_Objs\n\n Title   :  get_Seq_Objs\n Usage   :  $lucyObj->get_Seq_Objs()\n Function:  returns an array of references to Bio::PrimarySeq objects \n\t    where -id = 'sequence name' and -seq = 'sequence'\n\n Returns :  array of Bio::PrimarySeq objects\n Args\t :  none\n\n\nsub get_Seq_Objs {\n    my $self = shift;\n    my($seqobj, @seqobjs);\n    foreach my $key (sort keys %{$self->{sequences}}) {\n\t$seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n\t\t\t\t\t-id => \"$key\");\n\tpush @seqobjs, $seqobj;\n    }\n    return \\@seqobjs;\n} \n\n=head2 get_Seq_Obj\n\n Title   :  get_Seq_Obj\n Usage   :  $lucyObj->get_Seq_Obj($seqname)\n Function:  returns reference to a Bio::PrimarySeq object where -id = 'sequence name'\n\t    and -seq = 'sequence'\n Returns :  reference to Bio::PrimarySeq object\n Args\t :  name of a sequence \n\n\nsub get_Seq_Obj {\n    my ($self, $key) = @_;\n    my $seqobj = Bio::PrimarySeq->new( -seq => \"$self->{sequences}{$key}{sequence}\",\n                                    -id => \"$key\");\n    return $seqobj;\n}\n\n=head2 get_sequence_names\n\n Title   :  get_sequence_names\n Usage   :  $lucyObj->get_sequence_names\n Function:  returns reference to an array of names of the sequences analyzed by Lucy.\n\t    These names are required for most of the accessor methods.  \n\t    Note: The Lucy binary will fail unless sequence names are unique.\n Returns :  array reference\n Args\t :  none \n\n\nsub get_sequence_names {\n    my $self = shift;\n    my @keys = sort keys %{$self->{sequences}};\n    return \\@keys;\n}\n\n=head2 sequence\n\n Title   :  sequence\n Usage   :  $lucyObj->sequence($seqname)\n Function:  returns the DNA sequence of one of the sequences analyzed by Lucy.\n Returns :  string\n Args\t :  name of a sequence                   \n\n\nsub sequence {\n    my ($self, $key) = @_;\n    return $self->{sequences}{$key}{sequence};\n}\n\n=head2 quality\n\n Title   :  quality\n Usage   :  $lucyObj->quality($seqname)\n Function:  returns the quality values of one of the sequences analyzed by Lucy.\n\t    This method depends on the user having provided a quality file.\n Returns :  string\n Args    :  name of a sequence\n\n\nsub quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{quality};\n}\n\n=head2 avg_quality\n\n Title   :  avg_quality\n Usage   :  $lucyObj->avg_quality($seqname)\n Function:  returns the average quality value for one of the sequences analyzed by Lucy.\n Returns :  float\n Args    :  name of a sequence\n\n\nsub avg_quality {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{avg_quality};\n}\n\n=head2 direction\n\n Title   :  direction\n Usage   :  $lucyObj->direction($seqname)\n Function:  returns the direction for one of the sequences analyzed by Lucy\n\t    providing that 'fwd_desig' or 'rev_desig' were set when the\n \t    Lucy object was created.\n\t    Strings returned are: 'F' for forward, 'R' for reverse.  \n Returns :  string \n Args    :  name of a sequence\n\n\nsub direction {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{direction} if $self->{sequences}{$key}{direction}; \n    return \"\";\n}\n\n=head2 length_raw\n\n Title   :  length_raw\n Usage   :  $lucyObj->length_raw($seqname)\n Function:  returns the length of a DNA sequence prior to quality/ vector \n\t    trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_raw {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_raw};\n}\n\n=head2 length_clear\n\n Title   :  length_clear\n Usage   :  $lucyObj->length_clear($seqname)\n Function:  returns the length of a DNA sequence following quality/ vector   \n            trimming by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub length_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{length_clear};\n}\n\n=head2 start_clear\n\n Title   :  start_clear\n Usage   :  $lucyObj->start_clear($seqname)\n Function:  returns the beginning position of good quality, vector free DNA sequence \n\t    determined by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub start_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{beg_clear};\n}\n\n\n=head2 end_clear\n\n Title   :  end_clear\n Usage   :  $lucyObj->end_clear($seqname)\n Function:  returns the ending position of good quality, vector free DNA sequence\n            determined by Lucy.\n Returns :  integer\n Args    :  name of a sequence\n\n\nsub end_clear {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{end_clear};\n}\n\n=head2 per_GC\n\n Title   :  per_GC\n Usage   :  $lucyObj->per_GC($seqname)\n Function:  returns the percente of the good quality, vector free DNA sequence\n            determined by Lucy.\n Returns :  float\n Args    :  name of a sequence\n\n\nsub per_GC {\n    my($self, $key) = @_;\n    return $self->{sequences}{$key}{per_GC};\n}\n\n=head2 full_length\n\n Title   :  full_length\n Usage   :  $lucyObj->full_length($seqname)\n Function:  returns the truth value for whether or not the sequence read was\n            full length (ie. vector present on both ends of read).  This method\n            depends on the user having provided the 'info' file (Lucy must be\n            run with the -debug 'info_filename' option to get this file).\n Returns :  boolean \n Args    :  name of a sequence\n\n\nsub full_length {\n    my($self, $key) = @_;\n    return 1 if $self->{sequences}{$key}{full_length};\n    return 0;\n}\n\n=head2 polyA\n\n Title   :  polyA\n Usage   :  $lucyObj->polyA($seqname)\n Function:  returns the truth value for whether or not a poly-A tail was detected\n            and clipped by Lucy.  This method depends on the user having modified\n            the source for Lucy as outlined in DESCRIPTION and invoking Lucy with\n            the -cdna option and saving the standard error.\n            Note, the final sequence will not show the poly-A/T region.\n Returns :  boolean\n Args    :  name of a sequence"},"line":631,"kind":12,"range":{"start":{"character":0,"line":631},"end":{"character":9999,"line":635}},"definition":"sub","detail":"($self,$key)","children":[{"localvar":"my","containerName":"polyA","kind":13,"name":"$self","line":632,"definition":"my"},{"name":"$key","kind":13,"containerName":"polyA","line":632},{"line":633,"containerName":"polyA","kind":13,"name":"$self"},{"name":"$key","kind":13,"containerName":"polyA","line":633}],"containerName":"main::","name":"polyA"},{"line":633,"name":"sequences","kind":12},{"line":633,"name":"polyA","kind":12},{"definition":"sub","containerName":"main::","name":"get_rejects","children":[{"line":657,"localvar":"my","kind":13,"containerName":"get_rejects","name":"$self","definition":"my"},{"name":"$self","containerName":"get_rejects","kind":13,"line":658}],"kind":12,"range":{"end":{"character":9999,"line":659},"start":{"character":0,"line":656}},"line":656},{"name":"reject","kind":12,"line":658}],"version":5}