SUPERFAMILY 1.75 HMM library and genome assignments server

Evaluation Ruleset for SCOP 1.75 Benchmarks

For more information see the more detailed page on the outdated ruleset for SCOP 1.61 here.


sub Criteria{
#1.75 version
#Takes as an input two SCOP classifications, and returns a flag. 
#1 if they're the same, 0 if it's ambiguous, and -1 if they're different
my $flag=-1;
my $one=$_[1];
my $two=$_[2];
my $cf1;
my $cf2;
my $sf1;
my $sf2;
my $fa1;
my $fa2;
my %rossmann=('c.2',0,'c.3',0,'c.4',0,'c.5',0,'c.27',0,'c.28',0,'c.30',0,'c.31',0);
#these all have notes in SCOP

if ($one =~ /^(\w\.\d+)(\.\d+)(\.\d+)/){
$fa1="$1$2$3";
$sf1="$1$2";
$cf1="$1";
}
else{
print STDERR "Error parsing classification: $one\n";
}
if ($two =~ /^(\w\.\d+)(\.\d+)(\.\d+)/){
$fa2="$1$2$3";
$sf2="$1$2";
$cf2="$1";
}
else{
print STDERR "Error parsing classification: $two\n";
}

#Same fold ambiguous
if ($cf1 eq $cf2){
$flag=0;
}
#plain right
if ($sf1 eq $sf2){
$flag=1;
}

#Fixed as of SCOP 1.63!
##Unless Membrane all-alpha
#if (($sf1 eq 'f.2.1' or $sf2 eq 'f.2.1') and $fa1 ne $fa2){
#$flag=-1;
#}

#TIM barrels
#first 7 similar -note in SCOP-
if (($sf1 eq 'c.1.1' or $sf1 eq 'c.1.2' or $sf1 eq 'c.1.3' or $sf1 eq 'c.1.4' or $sf1 eq 'c.1.5' or $sf1 eq 'c.1.6' or $sf1 eq 'c.1.7') and ($sf2 eq 'c.1.1' or $sf2 eq 'c.1.2' or $sf2 eq 'c.1.3' or $sf2 eq 'c.1.4' or $sf2 eq 'c.1.5' or $sf2 eq 'c.1.6' or $sf2 eq 'c.1.7')){
$flag=1;
}
elsif (($cf1 eq 'c.1' and $cf2 eq 'c.1' and ($sf1 ne $sf2))){
$flag=0;
}

#Rossmann-like
if (exists($rossmann{$cf1}) and exists($rossmann{$cf2})){
if ($cf1 eq $cf2){
$flag=1;
}
else{
$flag=0;
}
}
#Nah! Julian thinks now it's a grower, since 1.63
##as of 1.57  c.23.12 looks like superposes OK
#if ((exists($rossmann{$cf1}) and $sf2 eq 'c.23.12') or (exists($rossmann{$cf2}) and $sf1 eq 'c.23.12')){
#$flag=1;
#}
#Old note -correspondance checked-
#1ykf, residues 151-314 superpose with 1eiz: 2.26382 ANGSTROMS/ATOM over 98 residues
if ((exists($rossmann{$cf1}) and $cf2 eq 'c.66') or (exists($rossmann{$cf2}) and $cf1 eq 'c.66')){
$flag=0;
}
#1lvh superposes with 1ek6 to 2.19488 ANGSTROMS/ATOM over 88 residues(same topology), BUT is VERY different
if ((exists($rossmann{$cf1}) and $cf2 eq 'c.108') or (exists($rossmann{$cf2}) and $cf1 eq 'c.108')){
$flag=0;
}
#Can't find it and there are no cross-hits anyway.
##Old note -correspondance checked-
#if ((exists($rossmann{$cf1}) and $cf2 eq 'c.32') or (exists($rossmann{$cf2}) and $cf1 eq 'c.32')){
#$flag=0;
#}
#note: the ATP nucleotide-binding site is similar to that of the NAD-binding Rossmann-folds
if ((exists($rossmann{$cf1}) and $sf2 eq 'c.111.1') or (exists($rossmann{$cf2}) and $sf1 eq 'c.111.1')){
$flag=0;
}

#Other rules
#beta propellors 4-8 blades
if (($cf1 eq 'b.66' or $cf1 eq 'b.67' or $cf1 eq 'b.68' or $cf1 eq 'b.69' or $cf1 eq 'b.70') and ($cf2 eq 'b.66' or $cf2 eq 'b.67' or $cf2 eq 'b.68' or $cf2 eq 'b.69' or $cf2 eq 'b.70')){
if ($cf1 eq $cf2){
$flag=1;
}
else{
$flag=0;
}
}
#Note in SCOP, Similar in architecture but partly differs in topology Periplasmic binding protein-like I/II
if (($cf1 eq 'c.94' and $cf2 eq 'c.93') or( $cf2 eq 'c.94' and $cf1 eq 'c.93')){
$flag=0;
}
#This re-classified in 1.67 (Cutinase-like)
#if (($sf1 eq 'c.23.9' and $sf2 eq 'c.69.1') or ($sf2 eq 'c.23.9' and $sf1 eq 'c.69.1')){
#$flag=0;
#}
#This re-classified in 1.63
#if (($fa1 eq 'f.2.1.10' and $sf2 eq 'c.108.1') or ($fa2 eq 'f.2.1.10' and $sf1 eq 'c.108.1')){
#$flag=1;
#}
#Very similar alpha super-helix
if (($sf1 eq 'a.118.8' and $sf2 eq 'a.118.6') or ($sf2 eq 'a.118.8' and $sf1 eq 'a.118.6')){
$flag=0;
}
#Similar motif sulphur binding
if (($sf1 eq 'd.58.1' and $sf2 eq 'a.1.2') or ($sf2 eq 'd.58.1' and $sf1 eq 'a.1.2')){
$flag=0;
}
#OK note in SCOP one of the previous cases
if (($sf1 eq 'a.137.4' and $sf2 eq 'c.96.1') or ($sf2 eq 'a.137.4' and $sf1 eq 'c.96.1')){
$flag=0;
}
#OK same fold and general look
if (($sf1 eq 'b.42.5' and $sf2 eq 'b.42.1') or ($sf2 eq 'b.42.5' and $sf1 eq 'b.42.1')){
$flag=0;
}
#Note in SCOP. Leucine rich repeats both of them,  structures look the same OK
if (($sf1 eq 'c.10.1' and $sf2 eq 'c.10.2') or ($sf2 eq 'c.10.1' and $sf1 eq 'c.10.2')){
$flag=0;
}
##re-classified in SCOP 1.67
##Obvious sequence homology with blast,  one is beta-beta-alpha superhelix,  and one is beta-alpha togethor in PFAM
#if (($sf1 eq 'c.11.1' and $sf2 eq 'c.10.2') or ($sf2 eq 'c.11.1' and $sf1 eq 'c.10.2')){
#$flag=0;
#}
##Obvious sequence homology with blast,  one is beta-beta-alpha superhelix,  and one is beta-alpha togethor in PFAM
#if (($sf1 eq 'c.11.1' and $sf2 eq 'c.10.1') or( $sf2 eq 'c.11.1' and $sf1 eq 'c.10.1')){
#$flag=0;
#}
#Note in SCOP, contains P-loop
if (($sf1 eq 'c.91.1' and $sf2 eq 'c.37.1') or( $sf2 eq 'c.91.1' and $sf1 eq 'c.37.1')){
$flag=0;
}
#Note in SCOP, shared motif
if (($sf1 eq 'd.51.1' and $sf2 eq 'd.52.3') or( $sf2 eq 'd.51.1' and $sf1 eq 'd.52.3')){
$flag=0;
}
#Note in SCOP: variant of beta/alpha barrel
if (($cf1 eq 'c.6' and $cf2 eq 'c.1') or( $cf2 eq 'c.6' and $cf1 eq 'c.1')){
$flag=0;
}
#Note in SCOP, possible link
if (($sf1 eq 'a.24.1' and $sf2 eq 'a.63.1') or( $sf2 eq 'a.24.1' and $sf1 eq 'a.63.1')){
$flag=0;
}

####NEw to 1.69
##Shared helix no note in SCOP, sequence identical, structure different position d1dkza1 hit d1bpr__ Seems to have been fixed (note)
#if (($sf1 eq 'a.8.4' and $sf2 eq 'b.130.1') or( $sf2 eq 'a.8.4' and $sf1 eq 'b.130.1')){
#$flag=0;
#}
#Shared helix no note in SCOP, sequence aligns, structure looks same (domain boundary definition problem) d1t3ta1 hit d1vk3a1
if (($sf1 eq 'a.5.10' and $sf2 eq 'd.79.4') or( $sf2 eq 'a.5.10' and $sf1 eq 'd.79.4')){
$flag=0;
}
#Shared helix no note in SCOP, sequence alignment very bad, but includes a hyper-variable linker region. Looks like a misclassified? part of a strange chain d1vf5b hit d1bccc3 # note in SCOP but not exactly clear if the note means this or something else
if (($sf1 eq 'f.21.1' and $sf2 eq 'f.32.1') or( $sf2 eq 'f.21.1' and $sf1 eq 'f.32.1')){
$flag=0;
}
###-----------

####NEw to 1.73
##Note in SCOP similar sequence repeat, but also similar assembly
if (($sf1 eq 'a.118.24' and $sf2 eq 'd.211.1') or( $sf2 eq 'a.118.24' and $sf1 eq 'd.211.1')){
$flag=0;
}
###------------

####NEw to coiled coils in 1.73 --based on comments made by Owen--
#Note in SCOP  for a.38.1 that reads "Contains a leuzine zipper motif" and the h.1.3 class is the leuzine zipper family.
if (($sf1 eq 'h.1.3' and $sf2 eq 'a.38.1') or( $sf2 eq 'h.1.3' and $sf1 eq 'a.38.1')){
$flag=0;
}
#There is a note in SCOP for a.24.1 that reads "Can exist in a coiled coil oligormeric state, see PDB entry 1av1" which is the pdb which is making the incorrect hit.
if (($sf1 eq 'h.5.1' and $sf2 eq 'a.24.1') or( $sf2 eq 'h.5.1' and $sf1 eq 'a.24.1')){
$flag=0;
}
#Note in SCOP: a globular structure of a larger fragment containing this region is available; (1dn1), chain B  where 1dn1 is in the a.47.2 class.
if (($sf1 eq 'h.1.15' and $sf2 eq 'a.47.2') or( $sf2 eq 'h.1.15' and $sf1 eq 'a.47.2')){
$flag=0;
}
#These are fusion proteins that have a unit from both the h.1.3 and h.3.2 proteins. Also int terms of straight forward yes/no coiled coil prediction it makes no real difference!
if (($sf1 eq 'h.1.3' and $sf2 eq 'h.3.2') or( $sf2 eq 'h.1.3' and $sf1 eq 'h.3.2')){
$flag=0;
}
###------------


####NEw to 1.75
##d2duya1 seems to hit d2i5ha1, gets structural hit with GANGSTA, superposes nicely with 2duy Alexey will probably change this in 1.77
if (($sf1 eq 'a.60.2' and $sf2 eq 'e.71.1') or( $sf2 eq 'a.60.2' and $sf1 eq 'e.71.1')){
$flag=0;
}
##note in SCOP: related to domain 3 of the Polypeptide chain release factors RF1 and RF2 (scop_fa 75621)
if (($sf1 eq 'e.38.1' and $sf2 eq 'd.50.4') or( $sf2 eq 'e.38.1' and $sf1 eq 'd.50.4')){
$flag=0;
}
##note in SCOP: The active site is formed by the toprim and "winged helix" domains (domains 1 and 4); these two domains are also found in the type II topoisomerase (DNA gyrase A) and in the alpha subunit of topoisomerase IV 
if (($sf1 eq 'e.10.1' and $sf2 eq 'a.4.5') or( $sf2 eq 'e.10.1' and $sf1 eq 'a.4.5')){
$flag=0;
}
##note in SCOP: insertion of a zinc-ribbon subdomain in the beta-hairpin "wing"
if (($fa1 eq 'a.4.5.1' and $sf2 eq 'g.41.5') or( $fa2 eq 'a.4.5.1' and $sf1 eq 'g.41.5')){
$flag=0;
}
###------------



#-----------------------------------

return ($flag);
}