package GDxBase::GeneModel::Specification::Summary; =head1 Name GDxBase::GeneModel::Specification::Summary =head1 Description A subclass of GDxBase::GeneModel::Specification, to gather features from multiple other gene models, and display them as a single summary track. Last Revision: 17-Feb-06 Author: James Allen Email: james.allen@cimr.cam.ac.uk =cut use warnings; use strict; use Carp; ############################################################################### # #$Id: Summary.pm,v 1.3 2008/09/03 20:08:24 phonybone Exp $ # ############################################################################### use Class::AutoClass; use GDxBase::GeneModel::Specification; use GDxBase::GeneModel::Specification::GFF; our @ISA = ("Class::AutoClass", "GDxBase::GeneModel::Specification"); our (@AUTO_ATTRIBUTES, %DEFAULTS); @AUTO_ATTRIBUTES = qw(types); %DEFAULTS = (); Class::AutoClass::declare(__PACKAGE__); ################################################################################ =head1 Methods =cut =head2 new Title: new Usage: GDxBase::GeneModel::Specification::Summary->new($source, $tax_id, $section, [$display_gene_ref], [$collection_start], [$collection_stop], [$glyph_style], [$glyph_bgcolor], [$glyph_fgcolor], [$glyph_utr_color]) Function: Create new GDxBase::GeneModel::Specification::Summary object Returns: A GDxBase::GeneModel::Specification::Summary object Args: source - mandatory, e.g. EnsEMBL, Vega, CCDS tax_id - mandatory, e.g. 9606 section - mandatory, 'GENEMODEL' by default display_gene_ref - optional, undef by default collection_start - optional, same as gene_start by default collection_stop - optional, same as gene_stop by default glyph_style - optional, 'processed_transcript' by default glyph_bgcolor - optional, 'navajowhite' by default glyph_fgcolor - optional, 'midnightblue' by default glyph_utr_color - optional, 'deeppink' by default =cut sub _init_self { my $self = shift; # Nobody here but us chickens. } ################################################################################ =head2 get_features Title: get_features Usage: GDxBase::GeneModel::Specification::Summary->get_features($cgi_page, $gene_models) Function: Get an array(s) of features for display Returns: A hash containing an array of Bio::SeqFeature::Generic objects Args: cgi_page - mandatory, a GDxBase::CGI_Page object gene_models - mandatory, a hash containing features from other GDxBase::GeneModel::Specification objects Notes: The 'cgi_page' parameter is not actually used in this method, but it is retained for consistency with the other subclasses of GDxBase::GeneModel::Specification. =cut sub get_features { my $self = shift; my ($gmc, $config_values) = @_; my (@summary, %summary, $summary_transcript); # If types is defined, then we know that we want to look up a previously # generated bunch of summary data, that has been stored in a GFF database. if ($self->types) { my $gff_gene_model = GDxBase::GeneModel::Specification::GFF->new( source => $self->source, types => $self->types ); my ($features, undef, undef) = $gff_gene_model->get_features($gmc, $config_values); $summary_transcript = $$features[0]; # The GFF module returns subfeatures as Bio::DB::GFF::Features, which is # no good for us, as later we want to add tags which control the display of # the summary chunks. foreach my $feature ($summary_transcript->remove_SeqFeatures) { my $new_feature = Bio::SeqFeature::Generic->new(-start => $feature->start, -end => $feature->stop, -score => $feature->score); $summary_transcript->add_SeqFeature($new_feature, "EXPAND"); } } else { my %exclude; if ($$config_values{"exclude"} && $$config_values{"exclude"} ne "") { foreach my $source (@{$$config_values{"exclude"}}) { $exclude{$source} = 1; } } # For each point within the panel bounds we count how many transcripts cross # that position; then gather adjacent points with the same count value into # a chunk. We then make each chunk a feature, with the height of the glyph # proportional to the count, thus getting a sort of bar chart. Colours are # used to indicate groups of count values as well. foreach my $source (keys %{$gmc->features}) { next if exists $exclude{$source}; my $features = ${$gmc->features}{$source}{$gmc->chromosome}; foreach my $feature (@{$features}) { # Now, each feature is a transcript, made up of sub-features; # and it's the sub-features that we're interested in. my @sub_features; if (ref($feature) eq "Bio::SeqFeature::Generic") { @sub_features = $feature->get_SeqFeatures; } elsif (ref($feature) eq "Bio::SeqFeature::Transcript") { @sub_features = $feature->features; } else { warn "Unrecognised feature type ", ref($feature), "\n"; } foreach my $sub_feature (@sub_features) { if ($sub_feature->has_tag("PositionVariant")) { if (join("", $sub_feature->get_tag_values("PositionVariant")) eq "1") { for (my $i = $sub_feature->start; $i <= $sub_feature->end; $i++ ) { $summary{$i}++; } } } else { for (my $i = $sub_feature->start; $i <= $sub_feature->end; $i++ ) { $summary{$i}++; } } } } } # So we have thousands of points that have a count value. We could use each # as a feature and render them individually, but this is time-consuming, as # well as being somewhat inelegant. So we go through them and group # adjacent points that have the same count value, which should drastically # reduce the number of things to render. my $previous_count; if (exists $summary{$gmc->collection_start}) { $previous_count = $summary{$gmc->collection_start}; } else { $previous_count = 0; } my $previous_start = $gmc->collection_start; my $previous_stop = $gmc->collection_start; for (my $point = $gmc->collection_start; $point <= $gmc->collection_stop; $point++ ) { if (not defined($summary{$point})) { $summary{$point} = 0; } if ($summary{$point} == $previous_count) { $previous_stop = $point; } else { push @summary, { start => $previous_start, stop => $previous_stop, count => $previous_count }; $previous_count = $summary{$point}; $previous_start = $point; $previous_stop = $point; } } # We need this to record the final sub-feature. push @summary, { start => $previous_start, stop => $previous_stop, count => $previous_count }; # OK, so we want to create a generic feature, and add chunks to it, then # we can treat it as if it was a regular gene model. Wicked. $summary_transcript = Bio::SeqFeature::Generic->new(); foreach my $chunk (@summary) { if ($$chunk{count}) { my $feature = Bio::SeqFeature::Generic->new(-start => $$chunk{start}, -end => $$chunk{stop}, -score => $$chunk{count}); $summary_transcript->add_SeqFeature($feature, "EXPAND"); } } if ($summary_transcript->get_SeqFeatures) { # Add a horizontal line to make it look prettier. my $line = Bio::SeqFeature::Generic->new( -start => $gmc->collection_start, -end => $gmc->collection_stop, -score => 0); $summary_transcript->add_SeqFeature($line, "EXPAND"); } } # So, we now have a single summary feature with a set of subfeatures, # and the structure should be the same whether the data has come from # a gff database or has been generated on-the-fly. So we can go through # the features and format nicely. $summary_transcript->add_tag_value("mouseover_text", "Summary of all transcripts"); foreach my $feature ($summary_transcript->get_SeqFeatures) { my $bgcolor = '#0000AA'; if ($feature->score == 0) { $feature->add_tag_value("glyph", "line"); $feature->add_tag_value("bgcolor", $bgcolor); $feature->add_tag_value("fgcolor", $bgcolor); $feature->add_tag_value("height", 1); } else { my $mouseover_text = $feature->score." transcripts in this section"; # We go from light to dark blue. if ($feature->score <= 1) { $bgcolor = '#00CCFF'; $mouseover_text =~ s/transcripts/transcript/; } elsif ($feature->score <= 2) { $bgcolor = '#0077FF'; } elsif ($feature->score <= 3) { $bgcolor = '#0044FF'; } elsif ($feature->score <= 4) { $bgcolor = '#0033BB'; } $feature->add_tag_value("mouseover_text", $mouseover_text); $feature->add_tag_value("glyph", "segments"); $feature->add_tag_value("bgcolor", $bgcolor); $feature->add_tag_value("fgcolor", $bgcolor); $feature->add_tag_value("height", ($feature->score*6)); } } return ({$gmc->chromosome => [$summary_transcript]}, undef, undef); } ################################################################################ 1;