diff --git a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/PDBFileParser.java b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/PDBFileParser.java index 10cade8e16..d039c49aa1 100644 --- a/biojava-structure/src/main/java/org/biojava/nbio/structure/io/PDBFileParser.java +++ b/biojava-structure/src/main/java/org/biojava/nbio/structure/io/PDBFileParser.java @@ -2017,6 +2017,8 @@ private void switchCAOnly(){ /** safes repeating a few lines ... */ private Integer conect_helper (String line,int start,int end) { + if (line.length() < end) return null; + String sbond = line.substring(start,end).trim(); int bond = -1 ; Integer b = null ; @@ -2343,7 +2345,15 @@ private void pdb_SSBOND_Handler(String line){ private void pdb_LINK_Handler(String line) { if (params.isHeaderOnly()) return; + + // Check for the minimal set of fields. + if (line.length()<56) { + logger.info("LINK line has length under 56. Ignoring it."); + return; + } + int len = line.length(); + String name1 = line.substring(12, 16).trim(); String altLoc1 = line.substring(16, 17).trim(); String resName1 = line.substring(17, 20).trim(); @@ -2356,10 +2366,13 @@ private void pdb_LINK_Handler(String line) { String resName2 = line.substring(47, 50).trim(); String chainID2 = line.substring(51, 52).trim(); String resSeq2 = line.substring(52, 56).trim(); - String iCode2 = line.substring(56, 57).trim(); + String iCode2 = null; // Might get trimmed if blank. + if (len > 56) iCode2 = line.substring(56, 57).trim(); - String sym1 = line.substring(59, 65).trim(); - String sym2 = line.substring(66, 72).trim(); + String sym1 = null; + if (len > 64) sym1 = line.substring(59, 65).trim(); + String sym2 = null; + if (len > 71) sym2 = line.substring(66, 72).trim(); // System.err.println("LINK"); // System.err.println("\tName: " + name1); @@ -2690,54 +2703,54 @@ public Structure parsePDBFile(BufferedReader buf) String recordName = line.substring (0, 6).trim (); - if (recordName.equals("ATOM")) - pdb_ATOM_Handler(line); - else if (recordName.equals("SEQRES")) - pdb_SEQRES_Handler(line); - else if (recordName.equals("HETATM")) - pdb_ATOM_Handler(line); - else if (recordName.equals("MODEL")) - pdb_MODEL_Handler(line); - else if (recordName.equals("HEADER")) - pdb_HEADER_Handler(line); - else if (recordName.equals("AUTHOR")) - pdb_AUTHOR_Handler(line); - else if (recordName.equals("TITLE")) - pdb_TITLE_Handler(line); - else if (recordName.equals("SOURCE")) - sourceLines.add(line); //pdb_SOURCE_Handler - else if (recordName.equals("COMPND")) - compndLines.add(line); //pdb_COMPND_Handler - else if (recordName.equals("JRNL")) - pdb_JRNL_Handler(line); - else if (recordName.equals("EXPDTA")) - pdb_EXPDTA_Handler(line); - else if (recordName.equals("CRYST1")) - pdb_CRYST1_Handler(line); - else if (recordName.startsWith("MTRIX")) - pdb_MTRIXn_Handler(line); - else if (recordName.equals("REMARK")) - pdb_REMARK_Handler(line); - else if (recordName.equals("CONECT")) - pdb_CONECT_Handler(line); - else if (recordName.equals("REVDAT")) - pdb_REVDAT_Handler(line); - else if (recordName.equals("DBREF")) - pdb_DBREF_Handler(line); - else if (recordName.equals("SITE")) - pdb_SITE_Handler(line); - else if (recordName.equals("SSBOND")) - pdb_SSBOND_Handler(line); - else if (recordName.equals("LINK")) - pdb_LINK_Handler(line); - else if ( params.isParseSecStruc()) { - if ( recordName.equals("HELIX") ) pdb_HELIX_Handler ( line ) ; - else if (recordName.equals("SHEET")) pdb_SHEET_Handler(line ) ; - else if (recordName.equals("TURN")) pdb_TURN_Handler( line ) ; - } - else { - // this line type is not supported, yet. - // we ignore it + try { + if (recordName.equals("ATOM")) + pdb_ATOM_Handler(line); + else if (recordName.equals("SEQRES")) + pdb_SEQRES_Handler(line); + else if (recordName.equals("HETATM")) + pdb_ATOM_Handler(line); + else if (recordName.equals("MODEL")) + pdb_MODEL_Handler(line); + else if (recordName.equals("HEADER")) + pdb_HEADER_Handler(line); + else if (recordName.equals("AUTHOR")) + pdb_AUTHOR_Handler(line); + else if (recordName.equals("TITLE")) + pdb_TITLE_Handler(line); + else if (recordName.equals("SOURCE")) + sourceLines.add(line); //pdb_SOURCE_Handler + else if (recordName.equals("COMPND")) + compndLines.add(line); //pdb_COMPND_Handler + else if (recordName.equals("JRNL")) + pdb_JRNL_Handler(line); + else if (recordName.equals("EXPDTA")) + pdb_EXPDTA_Handler(line); + else if (recordName.equals("CRYST1")) + pdb_CRYST1_Handler(line); + else if (recordName.startsWith("MTRIX")) + pdb_MTRIXn_Handler(line); + else if (recordName.equals("REMARK")) + pdb_REMARK_Handler(line); + else if (recordName.equals("CONECT")) + pdb_CONECT_Handler(line); + else if (recordName.equals("REVDAT")) + pdb_REVDAT_Handler(line); + else if (recordName.equals("DBREF")) + pdb_DBREF_Handler(line); + else if (recordName.equals("SITE")) + pdb_SITE_Handler(line); + else if (recordName.equals("SSBOND")) + pdb_SSBOND_Handler(line); + else if (recordName.equals("LINK")) + pdb_LINK_Handler(line); + else if ( params.isParseSecStruc()) { + if ( recordName.equals("HELIX") ) pdb_HELIX_Handler ( line ) ; + else if (recordName.equals("SHEET")) pdb_SHEET_Handler(line ) ; + else if (recordName.equals("TURN")) pdb_TURN_Handler( line ) ; + } + } catch (StringIndexOutOfBoundsException ex) { + logger.warn("Unable to parse [" + line + "]"); } diff --git a/biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestShortLines.java b/biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestShortLines.java new file mode 100644 index 0000000000..04312721a0 --- /dev/null +++ b/biojava-structure/src/test/java/org/biojava/nbio/structure/io/TestShortLines.java @@ -0,0 +1,71 @@ +package org.biojava.nbio.structure.io; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.biojava.nbio.structure.Atom; +import org.biojava.nbio.structure.Chain; +import org.biojava.nbio.structure.Group; +import org.biojava.nbio.structure.Structure; +import org.junit.Test; + +/** + * This class will test parsing short CONECT lines. + */ +public class TestShortLines { + + @Test + public void testConect() throws IOException { + PDBFileParser pdbPars = new PDBFileParser(); + FileParsingParameters params = pdbPars.getFileParsingParameters(); + params.setCreateAtomBonds(true); + + // CONECTS will be deprecated, but will we create bonds? + // Like the LINK records, should BioJava create BondImpl when params.setCreateAtomBonds(true)? + + StringBuilder sb = new StringBuilder(); + sb.append("HETATM 2398 P FAD A 500 8.398 46.448 73.490 1.00 13.51 P \n"); + sb.append("HETATM 2399 PA FAD A 500 6.089 45.580 75.235 1.00 15.88 P \n"); + sb.append("HETATM 2400 O1P FAD A 500 7.908 47.684 72.869 1.00 4.19 O \n"); + sb.append("CONECT 2400 2398\n"); + String shortLine = sb.toString(); + Structure s; + // Parse short + try(InputStream is = new ByteArrayInputStream(shortLine.getBytes())) { + s = pdbPars.parsePDBFile(is); + } + + // After 4.2, CONECTS are deprecated, but there is not yet an implementation + // describing how CONECTS will be replaced - will Bonds be created? + assertEquals(1, s.getConnections().size()); + } + + @Test + public void testLINK() throws IOException { + Structure s; + PDBFileParser pdbPars = new PDBFileParser(); + FileParsingParameters params = pdbPars.getFileParsingParameters(); + params.setCreateAtomBonds(true); + + StringBuilder sb = new StringBuilder(); + sb.append("ATOM 2412 C21 2EG A 7 0.888 44.973 72.238 1.00 29.17 C \n"); + sb.append("ATOM 2413 C22 2EG B 19 0.888 44.973 72.238 1.00 29.17 C \n"); + //sb.append("LINK C21 2EG A 7 C22 2EG B 19 1555 1555 1.56 "); + sb.append("LINK C21 2EG A 7 C22 2EG B 19\n"); + String shortLine = sb.toString(); + + // Parse short + try(InputStream is = new ByteArrayInputStream(shortLine.getBytes())) { + s = pdbPars.parsePDBFile(is); + } + + // Should be a bond present in the Atoms. + Chain c = s.getChain(0, 0); + Group g = c.getAtomGroups().get(0); + Atom a = g.getAtom(0); + assertEquals(1, a.getBonds().size()); + } +}