@@ -153,17 +153,18 @@ public class PDBFileParser {
153153 private Map <String , Site > siteMap = new LinkedHashMap <String , Site >();
154154 private Map <String , List <ResidueNumber >> siteToResidueMap = new LinkedHashMap <String , List <ResidueNumber >>();
155155
156- private Matrix4d currentNcsOp = null ;
156+ private Matrix4d currentNcsOp ;
157157 private List <Matrix4d > ncsOperators ;
158158
159159 // for storing LINK until we have all the atoms parsed
160160 private List <LinkRecord > linkRecords ;
161161
162162 // for parsing COMPOUND and SOURCE Header lines
163- private int molTypeCounter = 1 ;
164- // private int continuationNo ;
163+ private int prevMolId ;
164+ private String previousContinuationField ;
165165 private String continuationField ;
166- private String continuationString = "" ;
166+ private String continuationString ;
167+
167168 private DateFormat dateFormat ;
168169
169170 // for rfree parsing
@@ -205,9 +206,6 @@ public class PDBFileParser {
205206
206207
207208
208-
209- private String previousContinuationField = "" ;
210-
211209 /** Secondary strucuture assigned by the PDB author/
212210 *
213211 */
@@ -264,7 +262,7 @@ public PDBFileParser() {
264262 helixList = new ArrayList <Map <String ,String >>();
265263 strandList = new ArrayList <Map <String ,String >>();
266264 turnList = new ArrayList <Map <String ,String >>();
267- current_compound = new Compound () ;
265+ current_compound = null ;
268266 dbrefs = new ArrayList <DBRef >();
269267 siteMap = null ;
270268 dateFormat = new SimpleDateFormat ("dd-MMM-yy" , Locale .US );
@@ -739,23 +737,7 @@ private void pdb_REVDAT_Handler(String line) {
739737 */
740738 private void pdb_SEQRES_Handler (String line ) {
741739
742- // System.out.println("PDBFileParser.pdb_SEQRES_Handler: BEGIN");
743- // System.out.println(line);
744-
745- //TODO: treat the following residues as amino acids?
746740 /*
747- MSE Selenomethionine
748- CSE Selenocysteine
749- PTR Phosphotyrosine
750- SEP Phosphoserine
751- TPO Phosphothreonine
752- HYP 4-hydroxyproline
753- 5HP Pyroglutamic acid; 5-hydroxyproline
754- PCA Pyroglutamic Acid
755- LYZ 5-hydroxylysine
756- GLX Glu or Gln
757- ASX Asp or Asn
758- GLA gamma-carboxy-glutamic acid
759741 1 2 3 4 5 6 7
760742 1234567890123456789012345678901234567890123456789012345678901234567890
761743 SEQRES 1 A 376 LYS PRO VAL THR VAL LYS LEU VAL ASP SER GLN ALA THR
@@ -916,10 +898,6 @@ private void pdb_JRNL_Handler(String line) {
916898 */
917899 private void pdb_COMPND_Handler (String line ) {
918900
919- String continuationNr = line .substring (9 , 10 ).trim ();
920-
921- logger .debug ("current continuationNo is "
922- + continuationNr );
923901 logger .debug ("previousContinuationField is "
924902 + previousContinuationField );
925903 logger .debug ("current continuationField is "
@@ -941,41 +919,29 @@ private void pdb_COMPND_Handler(String line) {
941919 line = line .substring (0 , 72 );
942920 }
943921
944- //String beginningOfLine = line.substring(0, 10);
945- //line = line.replace(beginningOfLine, "");
946922 line = line .substring (10 , line .length ());
947923
948-
949- logger .debug ("LINE: >" + line + "<" );
950-
951- String [] fieldList = line .split ("\\ s+" );
924+
925+ String [] fieldList = line .trim ().split ("\\ s+" );
952926 int fl = fieldList .length ;
953- if ((fl >0 ) && (!fieldList [0 ].equals ("" ))
954- && compndFieldValues .contains (fieldList [0 ])) {
955- // System.out.println("[PDBFileParser.pdb_COMPND_Handler] Setting continuationField to '" + fieldList[0] + "'");
927+ if ((fl >0 ) && compndFieldValues .contains (fieldList [0 ])) {
928+
956929 continuationField = fieldList [0 ];
957930 if (previousContinuationField .equals ("" )) {
958931 previousContinuationField = continuationField ;
959932 }
960-
961- } else if ((fl >1 ) && compndFieldValues .contains (fieldList [1 ])) {
962- // System.out.println("[PDBFileParser.pdb_COMPND_Handler] Setting continuationField to '" + fieldList[1] + "'");
963- continuationField = fieldList [1 ];
964- if (previousContinuationField .equals ("" )) {
965- previousContinuationField = continuationField ;
933+
934+ } else if (fl >0 ) {
935+ // the ':' character indicates the end of a field name and should be invalid as part the first data token
936+ // e.g. obsolete file 1hhb has a malformed COMPND line that can only be caught with this kind of check
937+ if (fieldList [0 ].contains (":" ) ) {
938+ logger .info ("COMPND line does not follow the PDB 3.0 format. Note that COMPND parsing is not supported any longer in format 2.3 or earlier" );
939+ return ;
966940 }
967941
968942 } else {
969- if (continuationNr .equals ("" )) {
970-
971- logger .debug ("looks like an old PDB file" );
972-
973- continuationField = "MOLECULE:" ;
974- if (previousContinuationField .equals ("" )) {
975- previousContinuationField = continuationField ;
976- }
977- }
978-
943+
944+ // the line will be added as data to the previous field
979945 }
980946
981947 line = line .replace (continuationField , "" ).trim ();
@@ -1025,41 +991,46 @@ private void pdb_COMPND_Handler(String line) {
1025991 // System.out.println("[pdb_COMPND_Handler] Final COMPND line - Finishing off final MolID header.");
1026992 compndValueSetter (continuationField , continuationString );
1027993 continuationString = "" ;
1028- compounds .add (current_compound );
994+ if ( current_compound != null ) compounds .add (current_compound );
1029995 }
1030996 }
1031997
1032- /** set the value in the currrent molId object
1033- *
998+ /**
999+ * Set the value in the currrent molId object
10341000 * @param field
10351001 * @param value
10361002 */
10371003 private void compndValueSetter (String field , String value ) {
10381004
10391005 value = value .trim ().replace (";" , "" );
10401006 if (field .equals ("MOL_ID:" )) {
1041-
1042- //TODO: find out why an extra mol or chain gets added and why 1H1J, 1J1H ATOM records are missing, but not 1H1H....
1043-
1044- logger .debug ("molTypeCounter " + molTypeCounter + " "
1045- + value );
1007+
10461008 int i = -1 ;
10471009 try {
10481010 i = Integer .valueOf (value );
10491011 } catch (NumberFormatException e ){
1050- logger .warn (e . getMessage () + " while trying to parse COMPND MOL_ID line." );
1012+ logger .warn ("Value '{}' does not look like a number, while trying to parse COMPND MOL_ID line.", value );
10511013 }
1052- if (molTypeCounter != i ) {
1053- molTypeCounter ++;
1014+ if (i >0 && prevMolId !=i ) {
1015+
1016+ if (current_compound !=null ) compounds .add (current_compound );
10541017
1055- compounds . add ( current_compound );
1056- current_compound = null ;
1018+ logger . debug ( "Initialising new Compound with mol_id {}" , i );
1019+
10571020 current_compound = new Compound ();
1058-
1021+
1022+ current_compound .setMolId (i );
1023+
1024+ prevMolId = i ;
10591025 }
10601026
1061- current_compound .setMolId (i );
10621027 }
1028+
1029+ // if for some reason (e.g. missing mol_id line) the current_compound is null we can't add anything to it, return
1030+ if (current_compound ==null ) {
1031+ return ;
1032+ }
1033+
10631034 if (field .equals ("MOLECULE:" )) {
10641035 current_compound .setMolName (value );
10651036
@@ -2665,14 +2636,15 @@ public Structure parsePDBFile(BufferedReader buf)
26652636 current_group = null ;
26662637 pdbHeader = new PDBHeader ();
26672638 connects = new ArrayList <Map <String ,Integer >>();
2639+ previousContinuationField = "" ;
26682640 continuationField = "" ;
26692641 continuationString = "" ;
2670- current_compound = new Compound () ;
2642+ current_compound = null ;
26712643 sourceLines .clear ();
26722644 compndLines .clear ();
26732645 isLastCompndLine = false ;
26742646 isLastSourceLine = false ;
2675- molTypeCounter = 1 ;
2647+ prevMolId = - 1 ;
26762648 compounds .clear ();
26772649 helixList .clear ();
26782650 strandList .clear ();
0 commit comments