|
52 | 52 | import org.biojava.nbio.structure.Author; |
53 | 53 | import org.biojava.nbio.structure.Chain; |
54 | 54 | import org.biojava.nbio.structure.ChainImpl; |
55 | | -import org.biojava.nbio.structure.EntityInfo; |
56 | | -import org.biojava.nbio.structure.EntityType; |
57 | 55 | import org.biojava.nbio.structure.DBRef; |
58 | 56 | import org.biojava.nbio.structure.Element; |
| 57 | +import org.biojava.nbio.structure.EntityInfo; |
| 58 | +import org.biojava.nbio.structure.EntityType; |
59 | 59 | import org.biojava.nbio.structure.Group; |
60 | 60 | import org.biojava.nbio.structure.GroupIterator; |
61 | 61 | import org.biojava.nbio.structure.HetatomImpl; |
|
71 | 71 | import org.biojava.nbio.structure.StructureTools; |
72 | 72 | import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; |
73 | 73 | import org.biojava.nbio.structure.io.mmcif.model.ChemCompAtom; |
| 74 | +import org.biojava.nbio.structure.io.util.PDBTemporaryStorageUtils.LinkRecord; |
74 | 75 | import org.biojava.nbio.structure.secstruc.SecStrucInfo; |
75 | 76 | import org.biojava.nbio.structure.secstruc.SecStrucType; |
76 | 77 | import org.biojava.nbio.structure.xtal.CrystalCell; |
@@ -186,6 +187,9 @@ public class PDBFileParser { |
186 | 187 | private Map<String, List<ResidueNumber>> siteToResidueMap = new LinkedHashMap<String, List<ResidueNumber>>(); |
187 | 188 |
|
188 | 189 | private List<SSBondImpl> ssbonds = new ArrayList<>(); |
| 190 | + |
| 191 | + // for storing LINK until we have all the atoms parsed |
| 192 | + private List<LinkRecord> linkRecords; |
189 | 193 |
|
190 | 194 | private Matrix4d currentNcsOp; |
191 | 195 | private List<Matrix4d> ncsOperators; |
@@ -288,6 +292,8 @@ public PDBFileParser() { |
288 | 292 | // set the correct max values for parsing... |
289 | 293 | loadMaxAtoms = params.getMaxAtoms(); |
290 | 294 | atomCAThreshold = params.getAtomCaThreshold(); |
| 295 | + |
| 296 | + linkRecords = new ArrayList<LinkRecord>(); |
291 | 297 |
|
292 | 298 | blankChainIdsPresent = false; |
293 | 299 |
|
@@ -1984,6 +1990,8 @@ private void switchCAOnly(){ |
1984 | 1990 |
|
1985 | 1991 | /** safes repeating a few lines ... */ |
1986 | 1992 | private Integer conect_helper (String line,int start,int end) { |
| 1993 | + if (line.length() < end) return null; |
| 1994 | + |
1987 | 1995 | String sbond = line.substring(start,end).trim(); |
1988 | 1996 | int bond = -1 ; |
1989 | 1997 | Integer b = null ; |
@@ -2236,8 +2244,73 @@ private void pdb_SSBOND_Handler(String line){ |
2236 | 2244 | } |
2237 | 2245 |
|
2238 | 2246 |
|
| 2247 | + /** |
| 2248 | + * Takes care of LINK records. These take the format of: |
| 2249 | + * |
| 2250 | + * <pre> |
| 2251 | + * COLUMNS DATA TYPE FIELD DEFINITION |
| 2252 | + * -------------------------------------------------------------------------------- |
| 2253 | + * 1 - 6 Record name "LINK " |
| 2254 | + * 13 - 16 Atom name1 Atom name. |
| 2255 | + * 17 Character altLoc1 Alternate location indicator. |
| 2256 | + * 18 - 20 Residue name resName1 Residue name. |
| 2257 | + * 22 Character chainID1 Chain identifier. |
| 2258 | + * 23 - 26 Integer resSeq1 Residue sequence number. |
| 2259 | + * 27 AChar iCode1 Insertion code. |
| 2260 | + * 43 - 46 Atom name2 Atom name. |
| 2261 | + * 47 Character altLoc2 Alternate location indicator. |
| 2262 | + * 48 - 50 Residue name resName2 Residue name. |
| 2263 | + * 52 Character chainID2 Chain identifier. |
| 2264 | + * 53 - 56 Integer resSeq2 Residue sequence number. |
| 2265 | + * 57 AChar iCode2 Insertion code. |
| 2266 | + * 60 - 65 SymOP sym1 Symmetry operator for 1st atom. |
| 2267 | + * 67 - 72 SymOP sym2 Symmetry operator for 2nd atom. |
| 2268 | + * </pre> |
| 2269 | + * |
| 2270 | + * (From http://www.wwpdb.org/documentation/format32/sect6.html#LINK) |
| 2271 | + * |
| 2272 | + * @param line the LINK record line to parse. |
| 2273 | + */ |
| 2274 | + private void pdb_LINK_Handler(String line) { |
| 2275 | + |
| 2276 | + if (params.isHeaderOnly()) return; |
| 2277 | + |
| 2278 | + // Check for the minimal set of fields. |
| 2279 | + if (line.length()<56) { |
| 2280 | + logger.info("LINK line has length under 56. Ignoring it."); |
| 2281 | + return; |
| 2282 | + } |
| 2283 | + |
| 2284 | + int len = line.length(); |
| 2285 | + |
| 2286 | + String name1 = line.substring(12, 16).trim(); |
| 2287 | + String altLoc1 = line.substring(16, 17).trim(); |
| 2288 | + String resName1 = line.substring(17, 20).trim(); |
| 2289 | + String chainID1 = line.substring(21, 22).trim(); |
| 2290 | + String resSeq1 = line.substring(22, 26).trim(); |
| 2291 | + String iCode1 = line.substring(26, 27).trim(); |
| 2292 | + |
| 2293 | + String name2 = line.substring(42, 46).trim(); |
| 2294 | + String altLoc2 = line.substring(46, 47).trim(); |
| 2295 | + String resName2 = line.substring(47, 50).trim(); |
| 2296 | + String chainID2 = line.substring(51, 52).trim(); |
| 2297 | + String resSeq2 = line.substring(52, 56).trim(); |
| 2298 | + String iCode2 = null; // Might get trimmed if blank. |
| 2299 | + if (len > 56) iCode2 = line.substring(56, 57).trim(); |
| 2300 | + |
| 2301 | + String sym1 = null; |
| 2302 | + if (len > 64) sym1 = line.substring(59, 65).trim(); |
| 2303 | + String sym2 = null; |
| 2304 | + if (len > 71) sym2 = line.substring(66, 72).trim(); |
| 2305 | + |
| 2306 | + linkRecords.add(new LinkRecord( |
| 2307 | + name1, altLoc1, resName1, chainID1, resSeq1, iCode1, |
| 2308 | + name2, altLoc2, resName2, chainID2, resSeq2, iCode2, |
| 2309 | + sym1, sym2)); |
| 2310 | + } |
| 2311 | + |
2239 | 2312 | /** |
2240 | | - * Handler for the SITE records. |
| 2313 | + * Handler for the SITE records. <br> |
2241 | 2314 | * |
2242 | 2315 | * <pre> |
2243 | 2316 | * |
@@ -2520,6 +2593,7 @@ public Structure parsePDBFile(BufferedReader buf) |
2520 | 2593 | lengthCheck = -1; |
2521 | 2594 | atomCount = 0; |
2522 | 2595 | atomOverflow = false; |
| 2596 | + linkRecords = new ArrayList<LinkRecord>(); |
2523 | 2597 | siteToResidueMap.clear(); |
2524 | 2598 |
|
2525 | 2599 | blankChainIdsPresent = false; |
@@ -2552,58 +2626,58 @@ public Structure parsePDBFile(BufferedReader buf) |
2552 | 2626 | recordName = line.trim(); |
2553 | 2627 | else |
2554 | 2628 | recordName = line.substring (0, 6).trim (); |
2555 | | - |
2556 | | - if (recordName.equals("ATOM")) |
2557 | | - pdb_ATOM_Handler(line); |
2558 | | - else if (recordName.equals("SEQRES")) |
2559 | | - pdb_SEQRES_Handler(line); |
2560 | | - else if (recordName.equals("HETATM")) |
2561 | | - pdb_ATOM_Handler(line); |
2562 | | - else if (recordName.equals("MODEL")) |
2563 | | - pdb_MODEL_Handler(line); |
2564 | | - else if (recordName.equals("TER")) |
2565 | | - pdb_TER_Handler(); |
2566 | | - else if (recordName.equals("HEADER")) |
2567 | | - pdb_HEADER_Handler(line); |
2568 | | - else if (recordName.equals("AUTHOR")) |
2569 | | - pdb_AUTHOR_Handler(line); |
2570 | | - else if (recordName.equals("TITLE")) |
2571 | | - pdb_TITLE_Handler(line); |
2572 | | - else if (recordName.equals("SOURCE")) |
2573 | | - sourceLines.add(line); //pdb_SOURCE_Handler |
2574 | | - else if (recordName.equals("COMPND")) |
2575 | | - compndLines.add(line); //pdb_COMPND_Handler |
2576 | | - else if (recordName.equals("JRNL")) |
2577 | | - pdb_JRNL_Handler(line); |
2578 | | - else if (recordName.equals("EXPDTA")) |
2579 | | - pdb_EXPDTA_Handler(line); |
2580 | | - else if (recordName.equals("CRYST1")) |
2581 | | - pdb_CRYST1_Handler(line); |
2582 | | - else if (recordName.startsWith("MTRIX")) |
2583 | | - pdb_MTRIXn_Handler(line); |
2584 | | - else if (recordName.equals("REMARK")) |
2585 | | - pdb_REMARK_Handler(line); |
2586 | | - else if (recordName.equals("CONECT")) |
2587 | | - pdb_CONECT_Handler(line); |
2588 | | - else if (recordName.equals("REVDAT")) |
2589 | | - pdb_REVDAT_Handler(line); |
2590 | | - else if (recordName.equals("DBREF")) |
2591 | | - pdb_DBREF_Handler(line); |
2592 | | - else if (recordName.equals("SITE")) |
2593 | | - pdb_SITE_Handler(line); |
2594 | | - else if (recordName.equals("SSBOND")) |
2595 | | - pdb_SSBOND_Handler(line); |
2596 | | - else if ( params.isParseSecStruc()) { |
2597 | | - if ( recordName.equals("HELIX") ) pdb_HELIX_Handler ( line ) ; |
2598 | | - else if (recordName.equals("SHEET")) pdb_SHEET_Handler(line ) ; |
2599 | | - else if (recordName.equals("TURN")) pdb_TURN_Handler( line ) ; |
2600 | | - } |
2601 | | - else { |
2602 | | - // this line type is not supported, yet. |
2603 | | - // we ignore it |
2604 | | - } |
2605 | | - |
2606 | | - |
| 2629 | + |
| 2630 | + try { |
| 2631 | + if (recordName.equals("ATOM")) |
| 2632 | + pdb_ATOM_Handler(line); |
| 2633 | + else if (recordName.equals("SEQRES")) |
| 2634 | + pdb_SEQRES_Handler(line); |
| 2635 | + else if (recordName.equals("HETATM")) |
| 2636 | + pdb_ATOM_Handler(line); |
| 2637 | + else if (recordName.equals("MODEL")) |
| 2638 | + pdb_MODEL_Handler(line); |
| 2639 | + else if (recordName.equals("TER")) |
| 2640 | + pdb_TER_Handler(); |
| 2641 | + else if (recordName.equals("HEADER")) |
| 2642 | + pdb_HEADER_Handler(line); |
| 2643 | + else if (recordName.equals("AUTHOR")) |
| 2644 | + pdb_AUTHOR_Handler(line); |
| 2645 | + else if (recordName.equals("TITLE")) |
| 2646 | + pdb_TITLE_Handler(line); |
| 2647 | + else if (recordName.equals("SOURCE")) |
| 2648 | + sourceLines.add(line); //pdb_SOURCE_Handler |
| 2649 | + else if (recordName.equals("COMPND")) |
| 2650 | + compndLines.add(line); //pdb_COMPND_Handler |
| 2651 | + else if (recordName.equals("JRNL")) |
| 2652 | + pdb_JRNL_Handler(line); |
| 2653 | + else if (recordName.equals("EXPDTA")) |
| 2654 | + pdb_EXPDTA_Handler(line); |
| 2655 | + else if (recordName.equals("CRYST1")) |
| 2656 | + pdb_CRYST1_Handler(line); |
| 2657 | + else if (recordName.startsWith("MTRIX")) |
| 2658 | + pdb_MTRIXn_Handler(line); |
| 2659 | + else if (recordName.equals("REMARK")) |
| 2660 | + pdb_REMARK_Handler(line); |
| 2661 | + else if (recordName.equals("CONECT")) |
| 2662 | + pdb_CONECT_Handler(line); |
| 2663 | + else if (recordName.equals("REVDAT")) |
| 2664 | + pdb_REVDAT_Handler(line); |
| 2665 | + else if (recordName.equals("DBREF")) |
| 2666 | + pdb_DBREF_Handler(line); |
| 2667 | + else if (recordName.equals("SITE")) |
| 2668 | + pdb_SITE_Handler(line); |
| 2669 | + else if (recordName.equals("SSBOND")) |
| 2670 | + pdb_SSBOND_Handler(line); |
| 2671 | + else if (recordName.equals("LINK")) |
| 2672 | + pdb_LINK_Handler(line); |
| 2673 | + else if ( params.isParseSecStruc()) { |
| 2674 | + if ( recordName.equals("HELIX") ) pdb_HELIX_Handler ( line ) ; |
| 2675 | + else if (recordName.equals("SHEET")) pdb_SHEET_Handler(line ) ; |
| 2676 | + else if (recordName.equals("TURN")) pdb_TURN_Handler( line ) ; |
| 2677 | + } |
| 2678 | + } catch (StringIndexOutOfBoundsException | NullPointerException ex) { |
| 2679 | + logger.info("Unable to parse [" + line + "]"); |
| 2680 | + } |
2607 | 2681 | } |
2608 | 2682 |
|
2609 | 2683 | makeCompounds(compndLines, sourceLines); |
@@ -2681,6 +2755,13 @@ private void makeCompounds(List<String> compoundList, |
2681 | 2755 | private void formBonds() { |
2682 | 2756 |
|
2683 | 2757 | BondMaker maker = new BondMaker(structure, params); |
| 2758 | + |
| 2759 | + // LINK records should be preserved, they are the way that |
| 2760 | + // inter-residue bonds are created for ligands such as trisaccharides, unusual polymers. |
| 2761 | + // The analogy in mmCIF is the _struct_conn record. |
| 2762 | + for (LinkRecord linkRecord : linkRecords) { |
| 2763 | + maker.formLinkRecordBond(linkRecord); |
| 2764 | + } |
2684 | 2765 |
|
2685 | 2766 | maker.formDisulfideBonds(ssbonds); |
2686 | 2767 |
|
|
0 commit comments