# BioMirror/Data.pm # Bio-Mirror perl packages # d.gilbert ## ?? use $dbinfo= SRS::info() to set these data vars ?? ## ?? &/or update srs.i files w/ new data files (gb) ## expanded_dir => '$dpath/blocks/', ## data => [ 'blocks.dat' ], ## e.g., SRS::info('blocks') => ## Directory: ## "SRSROOT:.data/blocks/" ## ## Filename(s): ## blocks.dat =head1 NAME BioMirror::Data - databank base class =head1 DESCRIPTION Defines common variables and methods for BioMirror databanks. Most variables are set by subclasses of this package/class. See package BioMirror::DataBanks:: for subclasses. =cut package BioMirror::Data; # BEGIN { eval{ use lib '/Users/gilbertd/bio/perlib';}; } # use lib '/Users/gilbertd/bio/perlib'; $VERSION = "1.0"; sub Version { $VERSION; } use URI::URL; # use vars qw( # $kFromSource $kFromMirror $kFromArchive $kSuperSource $kDoSrsIndex # ); # some data class flags $kFromSource= 1; ## get from remote source (?not a mirrored or archived data set) $kFromMirror= 2; ## get from mirror ftp/http server $kFromArchive= 4; ## get from local archive (.Z/.gz) copy $kSuperSource= 8; ## source included in superclass (don't mirror separately) $kDoSrsIndex= 16; ## want to srsindex this data sub isBioMirrorDataClass() { return 1; } sub getSubclasses { ## some perl tricks here %packhash= %BioMirror::; foreach $symname (sort keys %packhash ) { local *sym= $packhash{$symname}; if (defined(%sym) && $symname =~ /::$/) { my $classname= "BioMirror::$symname"; $classname =~ s/::$//; next if ($classname eq 'BioMirror::Data'); ## skip this class my $ismyclass= $classname->can('isBioMirrorDataClass'); if ($ismyclass) { ## print "new $classname \n" if $BioMirror::debug; my $ob= new $classname; push( @dataclasslist, $ob); } } } } sub elements() { ## can we get Perl to load all BioMirror::Data subclasses ? ## one way: # - stroll thru @INC, open each if is -d, # - look for BioMirror/ subdir, # - require all .pm inside, # - do something with @ISA ? or each BEGIN - call some BioMirror::Data method? ## also? - autosplit each package into BioMirror/dataclass.pm files getSubclasses() unless(defined @dataclasslist); return @dataclasslist; } sub new { my $that= shift; my $class= ref($that) || $that; my %fields = @_; # convert into associative array my $self = \%fields; bless $self, $class; $self->init(); return $self; } sub init { my $self= shift; ## print STDERR "BioMirror::Data init"; $self->{name}= 'Bio-Mirror Data' unless (exists $self->{name} ); } sub getRelease { return undef; } sub updateArchiveToExpanded { my $self = shift; my $fromdir= BioMirror::replaceVars( $self->mirror_dir ); my $fromurl= new URI::URL( $fromdir, 'file:'); ## ^^ is this enough? - replaceVars( $self->mirror_dir ) may yeild ftp:// or http:// ? if ($fromurl->local_path) { my $doupdate= (($self->makeflags & $BioMirror::Data::kFromArchive) != 0); return 0 unless($doupdate); ##? return $self->localArchiveToExpanded($fromdir); ## $fromurl->local_path } else { return $self->remoteArchiveToExpanded($fromurl); } } sub remoteArchiveToExpanded { my $self = shift; my $fromurl= shift; ##? my $error = 1; my @data = @{$self->data}; my $todir = BioMirror::replaceVars( $self->expanded_dir ); my $f; foreach $f ( @data ) { my ($fz); if ( $f =~ m/=/) { ## check if $f has a regex to match eg. "(sprot[0-9]+.dat.Z)=seq.dat" ($fz,$f) = split(/=/,$f); ## if ($fz =~ m/^\((.+)\)$/) { $fz= "($1)"; } } else { $fz= $f; } ## ?? need %{$self->source} or other remote url info before this call !? $error= BioMirror::remoteArchiveToExpandedFile( "$fromurl/$fz", "$todir/$f"); last if ($error); } return $error; } ## return list ($self->expanded_dir, @data) ## ! need also a 'getData' method that returns file names in place of regex ! sub getDataPathnames(;$ $) { my $self = shift; my ($expandedlist, $adddocs) = @_; my @data = @{$self->data}; if ($adddocs) { my @docs = @{$self->docs}; ##? do these also? push(@data,@docs) if (@docs); } my $fromdir= BioMirror::replaceVars( $self->mirror_dir ); my $todir = BioMirror::replaceVars( $self->expanded_dir ); my @files; my $f; if ($expandedlist || ! -d $fromdir) { local(*D); if (opendir(D,$todir)) { @files= grep( !/^\./, readdir(D)); closedir(D); } } else { #?? is this better than readdir($todir) ? foreach $f ( @data ) { my ($fz, $tof); my @fz; if ( $f =~ m/=/ ) { ($fz,$tof) = split(/=/,$f); } else { $fz= $f; $tof= $f; } if ( $fz =~ m/^\((.+)\)$/ ) { @fz= $self->matchfiles($fromdir,$1); $tof= '' if ($tof eq $fz); } else { @fz= ($fz); } foreach $fz (@fz) { if (!$tof) { ($tof= $fz) =~ s/(\.Z|\.gz)$//; } push(@files, $tof); $tof= ''; } } } return ($todir, @files); } ## see BioMirror::updateData() -- does this belong here or there? sub localArchiveToExpanded { my $self = shift; # my $fromdir= shift; ##? my $error = 0; my @data = @{$self->data}; ## getData() ? my @docs = @{$self->docs}; ##? do these also? push(@data,@docs) if (@docs); my $fromdir= BioMirror::replaceVars( $self->mirror_dir ); my $todir = BioMirror::replaceVars( $self->expanded_dir ); my $onlinedir = ($self->online_dir) ? BioMirror::replaceVars( $self->online_dir ) : undef; if (!-d $fromdir) { Carp::carp("Missing archive folder $fromdir\n"); return 1; ## && -d $todir -- not if doMakeDirs in main } my $f; EFILE: foreach $f ( @data ) { my ($fz, $tof, $fpat, $tpat); my @fz; if ( $f =~ m/=/ ) { ($fz,$tof) = split(/=/,$f); } else { $fz= $f; $tof= $f; } if ( $fz =~ m/^\((.+)\)$/ ) { $fpat= $1; @fz= $self->matchfiles($fromdir,$fpat); $tof= '' if ($tof eq $fz); } else { @fz= ($fz); } $tpat= $1 if ( $tof =~ m/^\((.+)\)$/ ); my $toff= $tof; foreach $fz (@fz) { if ($tpat && $fpat) { ($toff= $fz) =~ s/$fpat/$tpat/; } elsif (!$toff) { $toff= $fz ; } $toff=~ s/\.(Z|gz)$//; my $onff= (defined $onlinedir) ? "$onlinedir/$toff" : undef; $error= BioMirror::copyArchiveToExpandedFile( "$fromdir/$fz", "$todir/$toff", $onff); last EFILE if ($error); $toff= ''; } } return $error; } sub matchfiles( $ $) { ## where $matchfile is regex like 'sprot[0-9]+.dat' or '\.seq' my $self = shift; local($path,$matchfile) = @_; local(*D); if (opendir(D,$path)) { my @fs= grep( /$matchfile/, readdir(D)); closedir(D); print STDERR "$path matchfile: $matchfile = @fs\n" if $BioMirror::debug; return @fs; } return ($matchfile); ##? or undef or () } sub print { my $self= shift; foreach (sort keys %$self) { print "$_ => "; my $val= $self->{$_}; if (ref($val) =~ /HASH/) { foreach $k (sort keys %$val) { print "$k=>$$val{$k}\n"; } } elsif (ref($val) =~ /ARRAY/ ) { print join(', ',@$val) ."\n"; } else { print "$val\n"; } } } sub toString { ## java favorite my $self= shift; my $s= '%' . ref($self) . '= { '; foreach (sort keys %$self) { $s .= "$_ => "; my $val= $self->{$_}; if (ref($val) =~ /HASH/) { $s .= '{'; foreach $k (sort keys %$val) { $s .= "$k=>\'$$val{$k}\', "; } $s .= '}, '; } elsif (ref($val) =~ /ARRAY/ ) { $s .= 'qw[' . join(', ',@$val) .'], '; } else { $s .= "\'$val\', "; } } $s .= ' }; '; return $s; } sub readChunk { my $self= shift; local($filename,$chunksize,$offset)= @_; $chunksize= 2048 if ($chunksize<=0); my $buf= ''; local(*D); if (open(D,"<$filename")) { read(D, $buf, $chunksize, $offset); close D; } return $buf; } ## packname ?? name or srsdb or check %source ? - name can have spaces! sub packname { my $self= shift; return lc($self->{srsdb}); } sub comment { my $self= shift; return ${$self->{source}}{comment}; } sub getFtpMirrorPackage { my $self= shift; local($mirrorurl)= @_; ##? optionally pack from BM ftp site? return undef if ( ($self->sourceflags & $BioMirror::Data::kSuperSource) != 0); if ($mirrorurl) { my $url= new URI::URL($mirrorurl); my $remotepath= $url->path; if (length($remotepath)<2) { $remotepath= $self->{mirror_dir}; ## strip leading localpath variable ?? -- no, remote-biomirror may need leading path!? $remotepath =~ s+^\$zpath+/biomirror+; ##? hack } my $localpath= BioMirror::replaceVars( $self->{mirror_dir} ); ## or $self->expanded_dir ? my $pack= "package=".$self->packname()."\n"; $pack .= " site=".$url->host."\n"; if ($url->user && $url->user ne 'anonymous') { $pack .= " remote_user=".$url->user."\n"; $pack .= " remote_password=".$url->password."\n" if ($url->password); } $pack .= " remote_dir=$remotepath\n"; $pack .= " local_dir=$localpath\n"; $pack .= " comment=Mirror of ".$self->comment()."\n\n"; return $pack; } else { ## from source my $url= new URI::URL($self->{source}{url}); if ($url->scheme eq 'ftp') { my $localpath= BioMirror::replaceVars( $self->{mirror_dir} ); ## or $self->expanded_dir ? my %shash= %{$self->{source}}; $shash{site}= $url->host unless($shash{site}); $shash{remote_dir}= $url->path unless($shash{remote_dir}); if ($url->user && $url->user ne 'anonymous') { $shash{remote_user}= $url->user; $shash{remote_password}= $url->password if ($url->password); ## ^^ bad password for anon ftp - user@host - no domainname! } $shash{local_dir}= $localpath unless($shash{local_dir}); my $pack= "package=".$self->packname()."\n"; ## need to screen out 'url', ? other non-mirror.pl keys my $k; foreach $k (sort keys %shash) { next if ($k =~ /url|web|home/); my $val= $shash{$k}; if ($val =~ /^\+/) { $pack .= " $k$val\n"; } ## for 'exclude_patt+|fasta' else { $pack .= " $k=$val\n"; } } $pack .= "\n"; return $pack; } } return undef; } sub getPackageLDIF { my $self= shift; my $dn= shift; return undef if ( ($self->sourceflags & $BioMirror::Data::kSuperSource) != 0); my $pn= $self->packname(); my $name= $self->name(); $dn= 'bc=BioMirror Catalog,ou=gridtest.bio.indiana.edu,o=Grid' unless($dn); $dn= "pkg=$pn, $dn"; my $pack =< site_ comment => description, true/false => TRUE/FALSE my %dropkeys= map { $_,1; } qw(local_dir max_delete_files max_delete_dirs do_deletes algorithm mode_copy umask) ; my %renamekeys= qw(comment description); # ... remote_dir site_dir ... my $url= new URI::URL($self->{source}{url}); if ($url->scheme eq 'ftp') { my $localpath= BioMirror::replaceVars( $self->{mirror_dir} ); ## or $self->expanded_dir ? my %shash= %{$self->{source}}; $shash{site}= $url->host unless($shash{site}); $shash{remote_dir}= $url->path unless($shash{remote_dir}); if ($url->user && $url->user ne 'anonymous') { $shash{remote_user}= $url->user; $shash{remote_password}= $url->password if ($url->password); ## ^^ bad password for anon ftp - user@host - no domainname! } $shash{local_dir}= $localpath unless($shash{local_dir}); ## need to screen out 'url', ? other non-mirror.pl keys my $k; foreach $k (sort keys %shash) { # next if ($k =~ /url|web|home/); next if ($dropkeys{$k}); my $val= $shash{$k}; $k = $renamekeys{$k} || $k; $k =~ s/^remote_/site_/; $k =~ s/_/-/g; # LDAP is picky about _ in keys, - is ok though !! $pack .= "$k: $val\n"; } $pack .= "\n"; return $pack; } return undef; } sub getSummaryHtml { my $self= shift; my $mirrorurl= shift; return undef unless ($self->dosummary); my %shash= %{$self->{source}}; my $localurl = $self->{mirror_dir}; ## $localurl =~ s+^\$zpath+/biomirror+; ##? hack -- FIXME $localurl =~ s+^\$zpath++; ##? hack -- FIXME $localurl = $mirrorurl . $localurl; my $homeurl= $shash{homeurl} || $shash{url}; my $name= $self->{name}; my $tabrow = < $name $shash{comment} $shash{home} TEOF return $tabrow; } ## AUTOLOAD -- supply set/get methods for fields ## change names to cleaner 'get'/'set' -- so can do getVariable(2) for array ref ? sub AUTOLOAD { my $self= shift; my $type= ref($self); my $name= $AUTOLOAD; $name =~ s/.*://; ## drop package prefix ## unless (exists $self->{$name} ) { warn "Can't access $name field in class $type"; } ## call now $obj->get_varname($x), $obj->set_varname($x) ## OR $obj->getVarname($x), $obj->setVarname($x) if ($name =~ /^get_?(.+)/) { my $var= lcfirst($1); my $rvar= $self->{$var}; if (@_ && ref($rvar) =~ /ARRAY/) { return ${$rvar}[shift]; } else { return $rvar; } } elsif ($name =~ /^set_?(.+)/) { my $var= lcfirst($1); if (@_) { return $self->{$var}= shift;} else { return $self->{$var}= undef; } } ## call was $obj->varname() -- this is perl semi-standard - support? elsif (@_) { return $self->{$name}= shift; } else { return $self->{$name}; } } sub DESTROY { my $self= shift; ##warn "DESTROY $self"; } #------------- 1; ## perly