# BioMirror/DataBanks.pm # Bio-Mirror perl packages # d.gilbert ## dbs to add ## pdb ! ## x blast dbs - for mirroring only? or add blast server autoupdate opt as per srs? ## rice est data -? but as .tar.Z of directories - but hard to update/sync that way? ## x pfam? ## x add classes for EBI's embl emdaily ## other from ncbi, embl, ## iubiodocs ? bionet news ? ## x flybase parts ? =head1 NAME BioMirror::DataBanks -- package file to include BioMirror::Data subclasses. =head1 DESCRIPTION Package does nothing but whose file includes all BioMirror::Data subclasses. These data classes are loaded by BioMirror::Data:: methods (see C ) This can be customized locally and included separately at runtime. For example, C See also EBI.pm, NCBI.pm, Pfam.pm, Flybase.pm, Swiss.pm, Transfac.pm and other BioMirror::DataBanks perl modules. -- drop this in favor of BioMirror::loadPackages() method? =cut package BioMirror::DataBanks; sub main { ## do anything? } #------------- # see also DDBJ.pm # see also EBI.pm # see also Flybase.pm # see also NCBI.pm # see also Pfam.pm # see also Swiss.pm # see also Transfac.pm #------------- #------------- package BioMirror::Rebase; @ISA = qw( BioMirror::Data ); sub new { my $class= shift; my $self = $class->SUPER::new( srsdb => 'REBASE', name => 'Rebase', makeflags => $BioMirror::Data::kFromArchive|$BioMirror::Data::kDoSrsIndex, dosummary => 1, # add to summary table source => { home => 'NEB', comment => 'The Restriction Enzyme Database', url => 'ftp://vent.neb.com/pub/rebase/', web => 'http://rebase.neb.com/rebase/', get_patt => '(^allenz\.|^bairoch\.|^gcg\.|^README|^Readme|^REBASE.DOC|^NEWS\.)', name_mappings => 's|\.[0-9]+||', recursive => 'false', }, mirror_dir => '$zpath/rebase/', expanded_dir => '$dpath/rebase/', data => [ 'allenz=rebase.dat', 'bairoch' ], @_ ); return $self; } sub getRelease { ## REBASE version 904 my $self= shift; my $rel= undef; my $df= BioMirror::replaceVars( $self->expanded_dir . 'rebase.dat'); my $buf= $self->readChunk($df); if ($buf =~ m/REBASE version ([0-9\.]+)/) { $rel= "$1"; } return $rel; } #------------- package BioMirror::PIR; @ISA = qw( BioMirror::Data ); sub new { my $class= shift; my $self = $class->SUPER::new( srsdb => 'PIR', name => 'PIR', makeflags => $BioMirror::Data::kFromArchive|$BioMirror::Data::kDoSrsIndex, dosummary => 1, # add to summary table source => { home => 'NBRF', comment => 'Protein Information Resource', url => 'ftp://nbrfa.georgetown.edu/pir/databases/pir_codata/', ## 'ftp://ncbi.nlm.nih.gov/repository/PIR/ascii/', web => 'http://www-nbrf.georgetown.edu/pir/', get_patt => '(\.dat\.Z$|README)', local_ignore => '(nrl3d)', recursive => 'false', }, mirror_dir => '$zpath/pir/release/', expanded_dir => '$dpath/pir/release/', data => [ 'pir1.dat','pir2.dat','pir3.dat','pir4.dat', ], @_ ); return $self; } sub getRelease { # Release 25.00, June 30, 1998 my $self= shift; my $rel= undef; my $df= BioMirror::replaceVars( $self->expanded_dir . ${$self->data}[0]); my $buf= $self->readChunk($df); if ($buf =~ m/Release ([0-9\.]+), ([0-9A-z\, ]+)/) { $rel= "$1, $2"; } return $rel; } package BioMirror::PIRNEW; @ISA = qw( BioMirror::PIR ); sub new { my $class= shift; my $self = $class->SUPER::new( srsdb => 'PIRNEW', name => 'PIRNEW', makeflags => $BioMirror::Data::kFromArchive|$BioMirror::Data::kDoSrsIndex, # dosummary => 1, # add to summary table source => { home => 'NBRF', comment => 'PIR updates from NBRF, Georgetown', url => 'ftp://nbrfa.georgetown.edu/pir/databases/pir_updates/', web => 'http://www-nbrf.georgetown.edu/pir/', get_patt => '(\.dat\.Z$|README)', local_ignore => '(nrl3d)', recursive => 'false', }, mirror_dir => '$zpath/pir/new/', expanded_dir => '$dpath/pir/new/', data => [ 'pir1.dat','pir2.dat','pir3.dat','pir4.dat', ], @_ ); return $self; } package BioMirror::NRL3D; ## @ISA = qw( BioMirror::PIR ); ## gone, jun03 sub new { my $class= shift; my $self = $class->SUPER::new( srsdb => 'NRL3D', name => 'NRL_3D', # makeflags => $BioMirror::Data::kFromArchive|$BioMirror::Data::kDoSrsIndex, # sourceflags => $BioMirror::Data::kSuperSource, dosummary => 0, # add to summary table source => { home => 'NBRF', comment => 'NRL_3D Protein Sequence--Structure Database', url => 'ftp://nbrfa.georgetown.edu/pir/databases/nrl_3d_codata/', ## 'ftp://ncbi.nlm.nih.gov/repository/PIR/ascii/', web => 'http://www-nbrf.georgetown.edu/pir/', get_patt => '(\.dat\.Z$|README)', local_ignore => '(pir)', recursive => 'false', }, data => [ 'nrl3d.dat', ], @_ ); return $self; } #------------- package BioMirror::PDB; @ISA = qw( BioMirror::Data ); sub new { my $class= shift; my $self = $class->SUPER::new( srsdb => 'PDB', name => 'PDB', makeflags => $BioMirror::Data::kDoSrsIndex, # | $BioMirror::Data::kInArchive dosummary => 1, # add to summary table source => { home => 'RCSB', comment => 'Protein Data Bank of 3-D macromolecular structures', url => 'ftp://ftp.rcsb.org/pub/pdb/', web => 'http://www.rcsb.org/pdb/', recursive => 'true', do_deletes => 'true', max_delete_files => '5%', max_delete_dirs => '5%', ls_lR_file => 'ls-lR', algorithm => '0', #? what is this? frorm rcsb.org instructions mode_copy => 'true', #? umask => '0700', #? }, mirror_dir => '$zpath/pdb/', ## expanded_dir => '$dpath/rebase/', ## data => [ '*', ], ## srsindex from mirror_dir $zpath/pdb/data/structures/all/pdb/ ## or use symlink to it? @_ ); return $self; } package BioMirror::GeneOntology; @ISA = qw( BioMirror::Data ); sub new { my $class= shift; my $self = $class->SUPER::new( srsdb => 'GO', name => 'GeneOntology', # makeflags => $BioMirror::Data::kDoSrsIndex, # | $BioMirror::Data::kInArchive dosummary => 1, # add to summary table source => { home => 'GeneOntology', comment => 'Vocabularies of gene functions and roles', url => 'ftp://ftp.geneontology.org/pub/go/', web => 'http://www.geneontology.org/', recursive => 'true', do_deletes => 'true', max_delete_files => '5%', max_delete_dirs => '5%', # ls_lR_file => 'ls-lR', # algorithm => '0', #? what is this? frorm rcsb.org instructions # mode_copy => 'true', #? # umask => '0700', #? }, mirror_dir => '$zpath/geneontology/', @_ ); return $self; } 1; # perly __END__ ## current IUBio srs data root /bio/data archive@ flyabs@ methods/ rebase/ bionet@ flygenes@ nrl3d@ seqanalref/ biosoft-catalog@ gbest/ pdb/ swissnew/ blocks/ gbfull/ pir/ swissprot/ enzyme/ gbnew/ prints/ transfac/ epd/ limb/ prosite/ trembl/ to add:: ? GENPEPT & GENPEPTNEW ??? name => 'PATHWAY', servers => { 'EMBL-EBI, Hinxton, Cambridge, UK' => { name => 'EMBL-EBI, Hinxton, Cambridge, UK', } , 'Oak Ridge National Laboratory, Tennessee, USA' => { name => 'Oak Ridge Nation al Laboratory, Tennessee, USA', } , } doc => [ "Pathway", the list of pathway maps is a part of the KEGG system.KEGG (Kyoto Enc yclopedia of Genes and Genomes) is an effort tocomputerizecurrent knowledge of molecular and cellular biology in terms of theinformation pathways that consist of interacting molecules or genes and]