#!/usr/bin/perl use warnings; use strict; use Getopt::Long; ################################################################################ # Arguments - Start my ($help, $gff_dir, %source, $tax_id); GetOptions( 'help' => \$help, 'gff_dir=s' => \$gff_dir, 'source=s' => \%source, 'tax_id:i' => \$tax_id ); sub usage { print qq~ Converts a tab-delimited file from the CCDS website into a gff file. Last updated: 09-Apr-08 Author: James Allen (james.allen\@cimr.cam.ac.uk) Please provide --gff_dir --source [--tax_id] [--help] ~; print "\n"; exit; } usage if defined $help; usage unless $gff_dir && scalar(keys(%source)); my $name = $source{"name"}; my $build = $source{"build"}; my $data_file = "$gff_dir/rawdata/$name\_$build/".$source{"data_file"}; my $gff_file = "$gff_dir/$name\_$build.gff"; # Arguments - End ################################################################################ ################################################################################ # Process Data File - Start open(DATA_FILE, glob("$data_file")) || die "Cannot open file $data_file."; open(GFF_FILE, ">$gff_file") || die "Cannot open file $gff_file."; while (my $line = ) { if ($line =~ /^[^#]/) { my ($chromosome, $g_accession, $gene, $gene_id, $ccds_id, $ccds_status, $cds_strand, $cds_from, $cds_to, $cds_locations) = split(/\t/, $line); next if $ccds_status =~ /^Withdrawn/; $ccds_id =~ s/\.\d+//; $cds_locations =~ s/[\[\]\n]//g; my @cds_locations = split(/, /, $cds_locations); foreach my $cds_location (@cds_locations) { my ($start, $stop) = $cds_location =~ /(\d+)\-(\d+)/; $start += 1; $stop += 1; print GFF_FILE "chr$chromosome\t$name\tCDS\t$start\t$stop\t.\t$cds_strand". "\t.\tNative_id $ccds_id;Build $build;EntrezGene $gene_id\n"; } } } close(DATA_FILE); close(GFF_FILE); # Process Data File - End ################################################################################