package GDxBase::GeneModel::Utilities; =head1 Name GDxBase::GeneModel::Utilities =head1 Description This is still a bit of a work in progress, and bits of the module don't work yet; but fetch_gene_data and create_meta_model_gff do work, and can be used to create summary and consensus gffs. These can be loaded into the gene models database to prevent the need for the data to be computed on the fly. Last Revision: 5-Oct-06 Author: James Allen Email: james.allen@cimr.cam.ac.uk =cut use warnings; #use strict; use Carp; use Config::IniFiles; use GDxBase::GeneModel::Collection; use GDxBase::GeneModel::Specification::GFF; use GDxBase::GeneModel::Specification::Summary; use GDxBase::GeneModel::Specification::Consensus; use Class::AutoClass; our @ISA = "Class::AutoClass"; our (@AUTO_ATTRIBUTES, %DEFAULTS); @AUTO_ATTRIBUTES = qw( gene_data dbh ); %DEFAULTS = ( ); Class::AutoClass::declare(__PACKAGE__); ################################################################################ =head1 Methods =cut =head2 new Title: new Usage: GDxBase::GeneModel::Utilities->new() Function: Returns: Args: # Expecting a hashref with a gene id as the key and the chromosome and start # and stop points of the gene as the value, in the format of a 3-element # arrayref. Build is an optional parameter. # Or, if no hashref is provided, look for dbh info instead, and generate # the gene boundaries automatically. Provide a flag which allows the user to # write the data to a file. Part of this involves building database tables. =cut sub _init_self { my $self = shift; } sub create_gene_data { my $self = shift; my %args = @_; confess "A 'gene_ids' parameter must be provided" unless $args{"gene_ids"}; my @gene_ids = @{$args{"gene_ids"}}; my $dbh; if ($args{"dbh"}) { $dbh = $args{"dbh"} if $args{"dbh"}; } elsif ($self->dbh) { $dbh = $self->dbh; } else { confess "A database handle must be provided"; } my $gene_data; # Here we loop through the gene ids, making a genemodelcollection object each time. # Then we call some method and it all works out brilliantly, populating # the gene_data attribute as if by magic. return(scalar(keys(%{$gene_data}))); } sub store_gene_data { my $self = shift; my %args = @_; confess "'gene_data' attribute is empty" unless $self->gene_data; my $dbh; if ($args{"dbh"}) { $dbh = $args{"dbh"} if $args{"dbh"}; } elsif ($self->dbh) { $dbh = $self->dbh; } else { confess "A database handle must be provided"; } my $table = $args{"table"} || "gene_location"; my @columns = @{$args{"columns"}} || ("gene_id", "chr", "start", "stop", "strand"); unless ($args{"append"}) { $dbh->do("DELETE FROM $table;"); } my $sql = "INSERT INTO $table (".join(", ", @columns).") VALUES "; foreach my $gene_id (keys %{$self->gene_data}) { $sql .= "($gene_id, "; $sql .= ${$self->gene_data}[0].", "; $sql .= ${$self->gene_data}[1].", "; $sql .= ${$self->gene_data}[2].", "; $sql .= ${$self->gene_data}[3]."), "; } $sql =~ s/,\s$/;/; my $rows = $dbh->do($sql); return($rows); } sub fetch_gene_data { my $self =shift; my %args = @_; my $dbh; if ($args{"dbh"}) { $dbh = $args{"dbh"} if $args{"dbh"}; } elsif ($self->dbh) { $dbh = $self->dbh; } else { confess "A database handle must be provided"; } my $table = $args{"table"} || "gene_location"; my $columns = $args{"columns"} || ["gene_id", "chr", "start", "stop", "strand"]; my $gene_ids = $args{"gene_ids"}; my $sql = "SELECT ".join(", ", @{$columns})." FROM $table"; if ($gene_ids) { $sql .= " WHERE ".$$columns[0]." IN (".join(", ", @{$gene_ids}).")"; } my %gene_data = map { shift @$_, [ @$_ ]} @{$dbh->selectall_arrayref($sql)}; $self->gene_data(\%gene_data); return(scalar(keys(%gene_data))); } ################################################################################ # Have methods to create a gff file from a given source # and to generate a summary and import the data into the database # and to generate a set of gene boundaries # and to create static gffs from a database ################################################################################ =head2 create_meta_model_gff Title: create_meta_model_gff Usage: GDxBase::GeneModel::Utilities->create_meta_model_gff($meta_models, $dir, $append) Function: Write the details for summary and consensus tracks to GFF files Returns: Nothing Args: dir - mandatory, directory in which to save the GFF file write_mode - mandatory, indicates whether to overwrite or append =cut sub create_meta_model_gff { my $self = shift; my %args = @_; confess "'gene_data' attribute is empty" unless $self->gene_data; confess "A 'gene_model_classes' parameter must be provided" unless $args{"gene_model_classes"}; my $gene_model_classes = $args{"gene_model_classes"}; my $meta_models = $args{"meta_models"} || ["Summary", "Consensus"]; my $dir = $args{"dir"} || ""; my $build; my $ignore_build = 0; if ($args{"build"}) { $build = $args{"build"}; } else { $ignore_build = 1; } my $write_mode = ">"; $write_mode .= ">" if $args{"append"}; foreach my $meta_model (@$meta_models) { open($meta_model, "$write_mode$dir$meta_model.gff") || confess "Cannot write to output file $dir$meta_model.gff"; } my $cfg = Config::IniFiles->new( -file => "/home/james/software/t1dbase_w_GDxBase/tmp/james_ini" ); foreach my $gene_id (keys %{$self->gene_data}) { my $gmc = GDxBase::GeneModel::Collection->new( chromosome => "chr".${$self->gene_data}{$gene_id}[0], build => $build, gene_id => $gene_id, gene_start => ${$self->gene_data}{$gene_id}[1], gene_stop => ${$self->gene_data}{$gene_id}[2], config_file => $cfg, ignore_build => $ignore_build ); foreach my $gene_model_class (keys %$gene_model_classes) { foreach my $source (keys %{$$gene_model_classes{$gene_model_class}}) { my $gene_model; #if ($gene_model_class eq "GFF") { $gene_model = GDxBase::GeneModel::Specification::GFF->new( source => $source, types => $$gene_model_classes{$gene_model_class}{$source} ); #} else { # $gene_model = "GDxBase::GeneModel::Specification::$gene_model_class"->new( source => $source ); #} my $count = $gmc->add_gene_model( gene_model => $gene_model, db_config_section => "GM_GFF" ); } } foreach my $meta_model (@$meta_models) { my $gene_model = "GDxBase::GeneModel::Specification::$meta_model"->new( source => $meta_model ); my $feature_count = $gmc->add_gene_model( gene_model => $gene_model, db_config_section => $meta_model ); if ($feature_count) { # OK, so now we just grab the meta models from $gmc->features, which is a hash # of arrays (key=source), where those arrays collections of feature objects. my $features = ${$gmc->features}{$meta_model}; foreach my $feature (@{$features}) { my ($chr, $source, $method, $start, $stop, $score, $strand, $phase, $group, $attr); $chr = $gmc->chromosome; $source = $meta_model; $method = "meta"; $phase = "."; $attr = "Build $build" if $build; # We need to manipulate different meta models in different ways; if # other meta models are added, it's very likely that another # conditional branch will need to be added. # Summary has a score, indicating the number of contributing # models but in Consensus this is a property of the main feature, rather # than each individual subfeature; Consensus subfeatures have a strand # value; and the Consensus feature has a display_name which shows # the types of contributing models, and a 'desc' tag, which lists the ids # of the contributing models. if ($meta_model eq "Summary") { $strand = "."; $group = "Native_id $gene_id;"; $attr .= ";EntrezGene $gene_id"; foreach my $subfeature ($feature->get_SeqFeatures) { $start = $subfeature->start; $stop = $subfeature->end; $score = $subfeature->score; print $meta_model "$chr\t$source\t$method\t$start\t$stop\t$score\t$strand\t$phase\t$group$attr\n"; } } elsif ($meta_model eq "Consensus") { $score = $feature->score; $group = "Native_id '".$feature->display_name."';"; $attr .= ";EntrezGene $gene_id"; $attr .= ";ModelIDs '".join("", $feature->get_tag_values("desc"))."'"; foreach my $subfeature ($feature->get_SeqFeatures) { $start = $subfeature->start; $stop = $subfeature->end; $strand = $subfeature->strand; print $meta_model "$chr\t$source\t$method\t$start\t$stop\t$score\t$strand\t$phase\t$group$attr\n"; } } else { $score = "."; $group = "Native_id $gene_id;"; $attr .= ";EntrezGene $gene_id"; foreach my $subfeature ($feature->get_SeqFeatures) { $start = $subfeature->start; $stop = $subfeature->end; $strand = $subfeature->strand; print $meta_model "$chr\t$source\t$method\t$start\t$stop\t$score\t$strand\t$phase\t$group\t$attr\n"; } } } } } } foreach my $meta_model (@$meta_models) { close($meta_model); } } 1;