3232import java .text .SimpleDateFormat ;
3333import java .util .ArrayList ;
3434import java .util .Arrays ;
35+ import java .util .Collections ;
3536import java .util .Date ;
3637import java .util .HashMap ;
3738import java .util .Iterator ;
@@ -2107,7 +2108,7 @@ private void pdb_CONECT_Handler(String line) {
21072108 }
21082109 }
21092110
2110- /*
2111+ /**
21112112 Handler for
21122113 MODEL Record Format
21132114
@@ -2116,7 +2117,6 @@ private void pdb_CONECT_Handler(String line) {
21162117 1 - 6 Record name "MODEL "
21172118 11 - 14 Integer serial Model serial number.
21182119 */
2119-
21202120 private void pdb_MODEL_Handler (String line ) {
21212121
21222122 if (params .isHeaderOnly ()) return ;
@@ -2127,24 +2127,12 @@ private void pdb_MODEL_Handler(String line) {
21272127 current_chain .addGroup (current_group );
21282128 current_group .trimToSize ();
21292129 }
2130- //System.out.println("starting new model "+(structure.nrModels()+1));
21312130
21322131 Chain ch = isKnownChain (current_chain .getChainID (),current_model ) ;
21332132 if ( ch == null ) {
21342133 current_model .add (current_chain );
21352134 }
2136- // removing water-only chains, they don't follow the standard data modeling practices.
2137- // We have to remove them or otherwise they can cause problems down the line,
2138- // e.g. 3o6j has chain Z with a single water molecule
2139- Iterator <Chain > it = current_model .iterator ();
2140- while (it .hasNext ()) {
2141- Chain c = it .next ();
2142- if (StructureTools .isChainWaterOnly (c )) {
2143- logger .warn ("Chain {} ({} atom groups) is composed of water molecules only. Removing it." ,
2144- c .getChainID (), c .getAtomGroups ().size ());
2145- it .remove ();
2146- }
2147- }
2135+
21482136 structure .addModel (current_model );
21492137 current_model = new ArrayList <Chain >();
21502138 current_chain = null ;
@@ -2939,18 +2927,7 @@ private void triggerEndFileChecks(){
29392927 pdbHeader .setJournalArticle (journalArticle );
29402928 }
29412929
2942- // removing water-only chains, they don't follow the standard data modeling practices.
2943- // We have to remove them or otherwise they can cause problems down the line,
2944- // e.g. 3o6j has chain Z with a single water molecule
2945- Iterator <Chain > it = current_model .iterator ();
2946- while (it .hasNext ()) {
2947- Chain c = it .next ();
2948- if (StructureTools .isChainWaterOnly (c )) {
2949- logger .warn ("Chain {} ({} atom groups) is composed of water molecules only. Removing it." ,
2950- c .getChainID (), c .getAtomGroups ().size ());
2951- it .remove ();
2952- }
2953- }
2930+
29542931 structure .addModel (current_model );
29552932 structure .setPDBHeader (pdbHeader );
29562933 structure .setCrystallographicInfo (crystallographicInfo );
@@ -3156,6 +3133,32 @@ public void linkChains2Compound(Structure s){
31563133 }
31573134 }
31583135
3136+ // in rare cases where a purely non-polymer or purely water chain is present we have missed it above
3137+ // we need now to assign a new compound to it so that at least the structure is consistent
3138+ // see https://github.com/biojava/biojava/pull/394
3139+
3140+ if (compounds !=null && !compounds .isEmpty ()) {
3141+ for (Chain c : s .getChains ()) {
3142+ if (c .getCompound () == null ) {
3143+
3144+ Compound compound = new Compound ();
3145+ compound .addChain (c );
3146+ compound .setMolId (findMaxCompoundId (compounds )+1 );
3147+ c .setCompound (compound );
3148+ compounds .add (compound );
3149+
3150+ logger .warn ("No compound (entity) found in file for chain {}. Creating new compound {} for it." , c .getChainID (), compound .getMolId ());
3151+ }
3152+ }
3153+ }
3154+ }
3155+
3156+ private static int findMaxCompoundId (List <Compound > compounds ) {
3157+ List <Integer > allIds = new ArrayList <Integer >(compounds .size ());
3158+ for (Compound compound : compounds ) {
3159+ allIds .add (compound .getMolId ());
3160+ }
3161+ return Collections .max (allIds );
31593162 }
31603163
31613164 /**
0 commit comments