#!/usr/bin/perl use warnings; use strict; use Getopt::Long; use DBI; ################################################################################ # Arguments - Start my ($help, $gff_dir, %source, $tax_id); GetOptions( 'help' => \$help, 'gff_dir=s' => \$gff_dir, 'source=s' => \%source, 'tax_id:i' => \$tax_id ); sub usage { print qq~ Converts sql files from the Ensembl website into a gff file. Last updated: 15-Apr-08 Author: James Allen (james.allen\@cimr.cam.ac.uk) Please provide --gff_dir --source [--tax_id] [--help] ~; print "\n"; exit; } usage if defined $help; usage unless $gff_dir && scalar(keys(%source)); my $script_dir = $0; $script_dir =~ s!/[^/]+$!!; my $name = $source{"name"}; my $build = $source{"build"}; my ($database_name) = $source{"data_base_url"} =~ /\/(\w+)\/?$/; my $data_dir = "$gff_dir/rawdata/$name\_$build"; my $gff_file = "$gff_dir/$name\_$build.gff"; # We get the db login info from a simple tab-delimited text file. open(DB_INFO, "$script_dir/database_login.txt") || die "Cannot open file $script_dir/database_login.txt"; my $header = ; my ($user, $pass, $host) = =~ /^([^\t]+)\t([^\t]*)\t([^\t\n]*)$/; close(DB_INFO); # Arguments - End ################################################################################ ################################################################################ # Create Database - Start # We implicitly work on localhost; it's less hassle, and the gff loading # script (used in a separate script) requires it anyway. my $dbh; if ($host) { $dbh = DBI->connect("DBI:mysql:mysql;host=$host", $user, $pass); } else { $dbh = DBI->connect("DBI:mysql:mysql", $user, $pass); } $dbh->do("DROP DATABASE IF EXISTS $database_name;") || die $dbh->errstr; $dbh->do("CREATE DATABASE $database_name;") || die $dbh->errstr; $dbh->disconnect; my $cmd = "mysql -u $user -p$pass "; $cmd .= "-h $host " if $host; $cmd .= "$database_name < $data_dir/$database_name.sql"; my $error = system($cmd); die "Error loading Vega database" if $error; if ($host) { $dbh = DBI->connect("DBI:mysql:$database_name;host=$host;mysql_read_default_file=$script_dir/my.cnf", $user, $pass); } else { $dbh = DBI->connect("DBI:mysql:$database_name;mysql_read_default_file=$script_dir/my.cnf", $user, $pass); } # In addition to the standard tables, we create our own for mapping between # EntrezGene and Vega IDs. my $VegaInfo_sql = "CREATE TABLE VegaInfo (". "entrez_gene_id int, ". "stable_id varchar(128) NOT NULL, ". "KEY stable_id_idx (stable_id)". ") ENGINE=MyISAM;"; $dbh->do($VegaInfo_sql) || die $dbh->errstr; # Create Database - End ################################################################################ ################################################################################ # Process Data Files - Start opendir(DATA_DIR, $data_dir) || die "Cannot find data_dir $data_dir"; foreach my $file (grep {/^.*\.txt$/} readdir(DATA_DIR)) { my ($table) = $file =~ /^(.*)\.txt$/; my $load_sql = "LOAD DATA LOCAL INFILE '$data_dir/$file' INTO TABLE $table"; if ($table eq "VegaInfo") { $load_sql .= " IGNORE 1 LINES"; } $dbh->do($load_sql) || die $dbh->errstr; } closedir(DATA_DIR); # Process Data Files - End ################################################################################ ################################################################################ # Query Database - Start $cmd = "mysql -u $user -p$pass "; $cmd .= "-h $host " if $host; $cmd .= "--skip-column-names $database_name < $script_dir/vega.sql > $gff_file"; $error = system($cmd); die "Error querying Vega database" if $error; $dbh->do("DROP DATABASE IF EXISTS $database_name;") || die $dbh->errstr; $dbh->disconnect; # Query Database - End ################################################################################