@@ -797,26 +797,21 @@ public void documentEnd() {
797797 continue ;
798798 }
799799 int eId = Integer .parseInt (entityId );
800- // We didn't add above compounds for nonpolymeric entities, thus here if a chain is nonpolymeric
801- // its compound won't be found. In biojava Structure data model a nonpolymeric chain does not really
802- // make much sense, since all small molecules are associated to a polymeric chain (the same data
803- // model as PDB files).
804- // In any case it happens in rare cases that a non-polymeric chain is not associated to any polymeric
805- // chain, e.g.
806- // - 2uub: asym_id X, chainId Z, entity_id 24: fully non-polymeric but still with its own chainId
807- // - 3o6j: asym_id K, chainId Z, entity_id 6 : a single water molecule
808- // - 1dz9: asym_id K, chainId K, entity_id 6 : a potassium ion alone
809- // We will discard those chains here, because they don't fit into the current data model and thus
810- // can cause problems, e.g.
811- // a) they would not be linked to a compound and give null pointers
812- // b) StructureTools.getAllAtoms() methods that return all atoms except waters would have
813- // empty lists for water-only chains
800+
801+ // Compounds are not added for non-polymeric entities, if a chain is non-polymeric its compound won't be found.
802+ // TODO: add all entities and unique compounds and add methods to directly get polymer or non-polymer
803+ // asyms (chains). Either create a unique StructureImpl or modify existing for a better representation of the
804+ // mmCIF internal data structures but is compatible with Structure interface.
814805 Compound compound = structure .getCompoundById (eId );
815806 if (compound ==null ) {
816- logger .warn ("Could not find a compound for entity_id {} corresponding to chain id {} (asym id {})."
817- + " Most likely it is a purely non-polymeric chain ({} groups). Removing it from this structure." ,
818- eId ,chain .getChainID (),chain .getInternalChainID (),chain .getAtomGroups ().size ());
819- it .remove ();
807+ // Supports the case where the only chain members were from non-polymeric entity that is missing.
808+ // Solved by creating a new Compound(entity) to which this chain will belong.
809+ logger .warn ("Could not find a compound for entity_id {}, for chain id {}, creating a new compound." ,
810+ eId , chain .getChainID ());
811+ compound = new Compound ();
812+ compound .setId ((long )eId );
813+ compound .addChain (chain );
814+ structure .addCompound (compound );
820815 } else {
821816 logger .debug ("Adding chain with chain id {} (asym id {}) to compound with entity_id {}" ,
822817 chain .getChainID (), chain .getInternalChainID (), eId );
@@ -1109,12 +1104,20 @@ private void addCompounds(StructAsym asym) {
11091104 // get the corresponding Entity
11101105 Compound c = structure .getCompoundById (eId );
11111106 if ( c == null ){
1112- if (e !=null && e .getType ().equals ("polymer" )) {
1113- c = createNewCompoundFromESG (esg , eId );
1114- c .setMolName (e .getPdbx_description ());
1115- structure .addCompound (c );
1116- logger .debug ("Adding Compound with entity id {} from _entity_src_syn, with name: {}" ,eId ,c .getMolName ());
1117- }
1107+ if (e !=null ) {
1108+ if (e .getType ().equals ("polymer" )) {
1109+ c = createNewCompoundFromESG (esg , eId );
1110+ c .setMolName (e .getPdbx_description ());
1111+ structure .addCompound (c );
1112+ logger .debug ("Adding Compound with entity id {} from _entity_src_syn, with name: {}" ,eId ,c .getMolName ());
1113+ } else if (e .getType ().equals ("non-solvent" )) {
1114+ // TODO handle non-polymer compounds.
1115+ } else if (e .getType ().equals ("water" )) {
1116+ // TODO handle solvent entity.
1117+ } else {
1118+ logger .warn ("Could not add entity id " + esg .getEntity_id () + " that has unknown _entity.type" );
1119+ }
1120+ }
11181121 }
11191122
11201123 }
0 commit comments