@@ -804,7 +804,7 @@ public void documentEnd() {
804804 // compounds (entities)
805805 // In addCompounds above we created the compounds if they were present in the file
806806 // Now we need to make sure that they are linked to chains and also that if they are not present in the file we need to add them now
807- linkCompounds ();
807+ linkEntities ();
808808
809809 if (!params .isHeaderOnly ()) {
810810
@@ -922,10 +922,10 @@ public void documentEnd() {
922922 }
923923
924924 /**
925- * Here we link compounds ( entities) to chains.
926- * Also if compounds are not present in file, this initialises the compounds with some heuristics, see {@link CompoundFinder }
925+ * Here we link entities to chains.
926+ * Also if entities are not present in file, this initialises the entities with some heuristics, see {@link EntityFinder }
927927 */
928- private void linkCompounds () {
928+ private void linkEntities () {
929929
930930
931931 for (int i =0 ; i < structure .nrModels () ; i ++){
@@ -945,7 +945,7 @@ private void linkCompounds() {
945945 }
946946 int eId = Integer .parseInt (entityId );
947947
948- // Compounds are not added for non-polymeric entities, if a chain is non-polymeric its compound won't be found.
948+ // Entities are not added for non-polymeric entities, if a chain is non-polymeric its entity won't be found.
949949 // TODO: add all entities and unique compounds and add methods to directly get polymer or non-polymer
950950 // asyms (chains). Either create a unique StructureImpl or modify existing for a better representation of the
951951 // mmCIF internal data structures but is compatible with Structure interface.
@@ -954,39 +954,44 @@ private void linkCompounds() {
954954 // - 3o6j: asym_id K, chainId Z, entity_id 6 : a single water molecule
955955 // - 1dz9: asym_id K, chainId K, entity_id 6 : a potassium ion alone
956956
957- EntityInfo compound = structure .getCompoundById (eId );
958- if (compound ==null ) {
957+ EntityInfo e = structure .getEntityById (eId );
958+ if (e ==null ) {
959959 // Supports the case where the only chain members were from non-polymeric entity that is missing.
960960 // Solved by creating a new Compound(entity) to which this chain will belong.
961- logger .warn ("Could not find a compound for entity_id {}, for chain id {}, creating a new compound ." ,
961+ logger .warn ("Could not find an Entity for entity_id {}, for chain id {}, creating a new Entity ." ,
962962 eId , chain .getChainID ());
963- compound = new EntityInfo ();
964- compound .setMolId (eId );
965- compound .addChain (chain );
966- chain .setEntityInfo (compound );
967- structure .addEntityInfo (compound );
963+ e = new EntityInfo ();
964+ e .setMolId (eId );
965+ e .addChain (chain );
966+ if (StructureTools .isChainWaterOnly (chain )) {
967+ e .setType (EntityType .WATER );
968+ } else {
969+ e .setType (EntityType .NONPOLYMER );
970+ }
971+ chain .setEntityInfo (e );
972+ structure .addEntityInfo (e );
968973 } else {
969- logger .debug ("Adding chain with chain id {} (asym id {}) to compound with entity_id {}" ,
974+ logger .debug ("Adding chain with chain id {} (asym id {}) to Entity with entity_id {}" ,
970975 chain .getChainID (), chain .getInternalChainID (), eId );
971- compound .addChain (chain );
972- chain .setEntityInfo (compound );
976+ e .addChain (chain );
977+ chain .setEntityInfo (e );
973978 }
974979
975980 }
976981
977982 }
978983
979- // to make sure we have Compounds linked to chains, we call getCompounds () which will lazily initialise the
980- // compounds using heuristics (see CompoundFinder ) in the case that they were not explicitly present in the file
981- List <EntityInfo > compounds = structure .getEntityInfos ();
984+ // to make sure we have Entities linked to chains, we call getEntityInfos () which will lazily initialise the
985+ // compounds using heuristics (see EntityFinder ) in the case that they were not explicitly present in the file
986+ List <EntityInfo > entities = structure .getEntityInfos ();
982987
983- // final sanity check: it can happen that from the annotated compounds some are not linked to any chains
988+ // final sanity check: it can happen that from the annotated entities some are not linked to any chains
984989 // e.g. 3s26: a sugar entity does not have any chains associated to it (it seems to be happening with many sugar compounds)
985- // we simply log it, this can sign some other problems if the compounds are used down the line
986- for (EntityInfo compound : compounds ) {
987- if (compound .getChains ().isEmpty ()) {
988- logger .info ("Compound {} '{}' has no chains associated to it" ,
989- compound .getId ()==null ?"with no entity id" :compound .getId (), compound .getDescription ());
990+ // we simply log it, this can sign some other problems if the entities are used down the line
991+ for (EntityInfo e : entities ) {
992+ if (e .getChains ().isEmpty ()) {
993+ logger .info ("Entity {} '{}' has no chains associated to it" ,
994+ e .getId ()==null ?"with no entity id" :e .getId (), e .getDescription ());
990995 }
991996 }
992997
@@ -1111,25 +1116,30 @@ private void addCompounds(StructAsym asym) {
11111116 try {
11121117 eId = Integer .parseInt (asym .getEntity_id ());
11131118 } catch (NumberFormatException e ) {
1114- logger .warn ("Could not parse mol_id from string {}. Will use 0 for creating Compound " ,asym .getEntity_id ());
1119+ logger .warn ("Could not parse mol_id from string {}. Will use 0 for creating Entity " ,asym .getEntity_id ());
11151120 }
11161121 Entity e = getEntity (eId );
11171122
11181123 // for some mmCIF files like 1yrm all 3 of _entity_src_gen, _entity_src_nat and _pdbx_entity_src_syn are missing
11191124 // we need to fill the Compounds in some other way:
11201125
1121- EntityInfo c = structure .getCompoundById (eId );
1126+ EntityInfo c = structure .getEntityById (eId );
11221127
11231128 if (c ==null ) {
11241129 c = new EntityInfo ();
11251130 c .setMolId (eId );
11261131 // we only add the compound if a polymeric one (to match what the PDB parser does)
11271132 if (e !=null ) {
11281133 c .setDescription (e .getPdbx_description ());
1129- c .setType (EntityType .entityTypeFromString (e .getType ()));
1130- addAnicilliaryEntityData (asym , eId , e , c );
1134+ EntityType eType = EntityType .entityTypeFromString (e .getType ());
1135+ if (eType !=null ) {
1136+ c .setType (eType );
1137+ } else {
1138+ logger .warn ("Type '{}' is not recognised as a valid entity type for entity {}" , e .getType (), eId );
1139+ }
1140+ addAncilliaryEntityData (asym , eId , e , c );
11311141 structure .addEntityInfo (c );
1132- logger .debug ("Adding Compound with entity id {} from _entity, with name: {}" ,eId , c .getDescription ());
1142+ logger .debug ("Adding Entity with entity id {} from _entity, with name: {}" ,eId , c .getDescription ());
11331143 }
11341144 }
11351145 }
@@ -1142,7 +1152,7 @@ private void addCompounds(StructAsym asym) {
11421152 * @param entity
11431153 * @param entityInfo
11441154 */
1145- private void addAnicilliaryEntityData (StructAsym asym , int entityId , Entity entity , EntityInfo entityInfo ) {
1155+ private void addAncilliaryEntityData (StructAsym asym , int entityId , Entity entity , EntityInfo entityInfo ) {
11461156 // Loop through each of the entity types and add the corresponding data
11471157 // We're assuming if data is duplicated between sources it is consistent
11481158 // This is a potentially huge assumption...
@@ -1629,7 +1639,7 @@ public void newEntitySrcSyn(EntitySrcSyn entitySrcSyn){
16291639
16301640 /**
16311641 * The EntityPolySeq object provide the amino acid sequence objects for the Entities.
1632- * Later on the entities are mapped to the BioJava Chain and Compound objects.
1642+ * Later on the entities are mapped to the BioJava {@link Chain} and {@link EntityInfo} objects.
16331643 * @param epolseq the EntityPolySeq record for one amino acid
16341644 */
16351645 @ Override
0 commit comments