Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
6c22796
Support for extended PDBID PDB_nnnnNXXX
aalhossary Jul 27, 2021
dc12461
Some deprecated functions removal
aalhossary Jul 31, 2021
7b08387
Fix typo
aalhossary Aug 3, 2021
c39632c
Targeted most of Spencer's comments
aalhossary Aug 4, 2021
7dd8528
Fixed a bug in PDBId.equals() method
aalhossary Aug 4, 2021
e8466b9
Changing PDBID in SCOP record to lower case
aalhossary Aug 4, 2021
12efad9
public static PDBId field XXXX
aalhossary Sep 22, 2021
d4dfc95
Behavior is now an enum
aalhossary Oct 1, 2021
22e9158
Expose Pattern objects instead of Strings
aalhossary Oct 1, 2021
5c6bcdc
ID stored internally in a reduced format (without initial "PDB_" prefix)
aalhossary Oct 1, 2021
76112f0
Enhancements and bug fix
aalhossary Oct 2, 2021
d95e561
Added BDBId Unit test
aalhossary Oct 2, 2021
f54dd7d
Merge branch 'master' into Support_Extended_PDBID
aalhossary Oct 2, 2021
727aff6
Added @Deprecated codeTag
aalhossary Oct 2, 2021
960a1e6
Adding more TODOs
aalhossary Oct 2, 2021
605c6f8
more replacement of deprecated methods
aalhossary Oct 2, 2021
ec53cee
Removed PDBIdException
aalhossary Oct 9, 2021
fd48eba
review and untouch regular expressions
aalhossary Oct 9, 2021
84d9654
Un-deprecate, copy javadoc, and remove @author on methods
aalhossary Oct 10, 2021
e900fe5
Remove NullPointerException
aalhossary Oct 10, 2021
711fb8a
Bug fix
aalhossary Oct 10, 2021
f92db27
clean commented out code
aalhossary Oct 10, 2021
d9e15e0
Update CHANGELOG
aalhossary Oct 10, 2021
b410f16
Fix unit test
aalhossary Oct 13, 2021
6c8d267
PdbPair does not accept null
aalhossary Oct 12, 2021
a03a657
Addressing reviewer's comments
aalhossary Oct 13, 2021
a55aa95
Change capitalization state
aalhossary Oct 14, 2021
732a2c2
Fix probable NPE + keep consistent PdbId method naming convention
aalhossary Oct 14, 2021
34cb49e
Addressed some of the reviewer's comments.
aalhossary Oct 14, 2021
40a55d2
Use JUnit 5
aalhossary Oct 14, 2021
141d667
PdbId class Documentation
aalhossary Oct 15, 2021
c91a4dc
JavaDoc style update
aalhossary Oct 15, 2021
9ce50fe
Minor updates
aalhossary Oct 15, 2021
0fb6742
Reverting a wrong optimization
aalhossary Oct 15, 2021
02bfee4
Merge branch 'biojavaorigin/master' into Support_Extended_PDBID
aalhossary Oct 16, 2021
0d96963
Adding BioJava development code
aalhossary Oct 16, 2021
87fbc10
Javadoc
aalhossary Oct 21, 2021
17f366e
XXXX PdbId objects are not equal unless they are the same object
aalhossary Oct 22, 2021
ef9cf3f
No more XXXX. ANY malformed PdbId gracefully set to null
aalhossary Oct 23, 2021
02974cd
Addressing Reviewer's comments
aalhossary Oct 26, 2021
108fe97
Removing TODOs
aalhossary Oct 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Addressing Reviewer's comments
  • Loading branch information
aalhossary committed Oct 26, 2021
commit 02974cd8ee1bedba68d09fc08b2b09de00af82c0
4 changes: 1 addition & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,13 @@ BioJava 6.0.0 (future release)
* Fixed `CRC64Checksum#public void update(byte[] b, int offset, int length)` to use the `length` argument correctly as specified in `java.util.zip.Checksum` interface.
* In `SubstructureIdentifier`, `StructureName`, `EcodDomain`, `ScopDomain` : `getPdbId()` returns `PdbId` object instead of `String`.
* Removed `DownloadChemCompProvider.useDefaultUrlLayout` with a more flexible system to provide templated URLs `DownloadChemCompProvider.setChemCompPathUrlTemplate()` and `DownloadChemCompProvider.setServerBaseUrl()`
* In `Structure` (and `StructureImple`), the accessor methods `String getPdbId()` and `setPdbId(String)` were previously depricated. They were revived in BioJava 6.0.0 but as `PdbId getPdbId()` and `setPdbId(PdbId)` instead.n

### Added
* New `keywords` field in `PDBHeader` class, populated by PDB and mmCIF parsers #946
* OBO parsing now supports multiple altids, #960
* New class `PdbId` that wrapps a PDB Identifier and handles conversion between current short PDBID format and upcoming extended PDBID format #930

### revived
* In `Structure` (and `StructureImple`), the accessor methods `String getPdbId()` and `setPdbId(String)` were previously depricated. They were revived in BioJava 6.0.0 but as `PdbId getPdbId()` and `setPdbId(PdbId)` instead.

### Fixed
* Correct chain assignment to entities when parsing PDB/mmCIF without entity information (in cases with more than 3 chains per entity) #931
* Dealing with chain ids correctly when parsing bonds in PDB-format files #943 #929
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,10 @@ public class PdbId implements Comparable<PdbId>, Serializable{
private static final String PDB_0000 = PREFIX_PDB_ + STRING_0000;

/**
* How the PDB ID output/conversion should go, if possible.
* Controls how the PDB ID output/conversion should go, if possible.
* The default is to try to produce short PDB ID. If failed, produce extended PDB ID.
*/
public enum Behavior{
/**
* Try to produce short PDB ID. If failed, produce extended PDB ID.
*/
PREFER_SHORT,
/**
* Always produce Extended PDB ID.
*/
PREFER_EXTENDED
}
private static Behavior defaultShorteningBehaviour = Behavior.PREFER_SHORT;
private static final boolean defaultShorteningBehaviour = true;


/**
Expand All @@ -66,7 +57,11 @@ public enum Behavior{
/**
* A regular expression that matches a PDB ID in the extended format.
*/
public static final Pattern PATTERN_EXTENDED_PDBID = Pattern.compile("PDB_\\d{4}[1-9]\\p{Alnum}{3}");
public static final Pattern PATTERN_EXTENDED_PDBID = Pattern.compile("(pdb|PDB)_\\p{Alnum}{8}");
/**
/ * A regular expression that matches an extended PDB ID that is compatible with the short format.
*/
public static final Pattern PATTERN_SHORTABLE_EXTENDED_PDBID = Pattern.compile("(pdb|PDB)_0000[1-9]\\p{Alnum}{3}");

/**
* Keeps the ID in <b>UPPER CASE</b>, in a <em>reduced</em> form (without the <code>PDB_</code> prefix).
Expand Down Expand Up @@ -109,13 +104,13 @@ public static boolean isValidExtendedPdbId(String id) {

/**
* Checks whether an Extended PDB ID is shortable, <i>assuming it is a valid extended PDB ID</i>.
* If you are not sure the String represents a valid extended PdbId, use {@link #isValidExtendedPdbId(String)} first.
* @see #isValidExtendedPdbId(String)
* @param extendedId the supposedly valid extended PDB ID.
* @return <code>true</code> if <code>extendedId</code> starts with "PDB_0000", <code>false</code> otherwise.
* @return <code>true</code> if <code>extendedId</code> can be shortened
* (ie. it matches the regular expression "(pdb|PDB)_0000[1-9][a-zA-Z0-9]{3}"), <code>false</code> otherwise.
*/
public static boolean isShortCompatible(String extendedId) {
return extendedId.length() >= 8 && extendedId.substring(0, 8).equals/*IgnoreCase*/(PDB_0000);
return PATTERN_SHORTABLE_EXTENDED_PDBID.matcher(extendedId).matches();
}

@Override
Expand Down Expand Up @@ -158,17 +153,17 @@ public String getId() {

/**
* Get a <code>String</code> representation of this PdbId instance, using the <i>passed in</i> behavior.<br>
* @param b when it equals <code>Behavior.PREFER_SHORT</code>, the class will try to produce the short ID whenever possible.
* @return The PdbId in short format if possible and <code>b</code> equals <code>Behavior.PREFER_SHORT</code>, the extended PDB ID form otherwise.
* @param prefereShort when it is <code>true</code>, the class will try to produce the short ID whenever possible.
* @return The PdbId in short format if possible and <code>prefereShort</code> is <code>true</code>, the extended PDB ID form otherwise.
*/
public String getId(Behavior b) {
if (b == Behavior.PREFER_SHORT && isInternalShortCompatible(idCode))
public String getId(boolean prefereShort) {
if (prefereShort && isInternalShortCompatible(idCode))
return internalToShortNoCheck(idCode);
return PREFIX_PDB_ + idCode;
}

/**
* Get the PDB Id in the sort format. Throws an exception if the conversion is not possible.<br>
* Get the PDB Id in the short format. Throws an exception if the conversion is not possible.<br>
* Use this method only if you know that this PDB ID is shortable.
* @return the PDB ID in the short format.
* @throws StructureException if the conversion was not possible.
Expand All @@ -183,7 +178,7 @@ public String getShortId() throws StructureException{

/**
* Converts <code>shortId</code> to the PDB ID extended format.
* If <code>shortId</code> is a valid short PDB ID (or XXXX), it would be converted to an extended ID,
* If <code>shortId</code> is a valid short PDB ID, it would be converted to an extended ID,
* if <code>shortId</code> is a valid extended PDB ID, it would be returned in UPPER CASE,
* a {@link StructureException} is thrown otherwise.
* @param shortId the PDB ID to convert to extended format
Expand All @@ -210,7 +205,7 @@ public static String toExtendedId(String shortId) throws StructureException{
* @throws StructureException if the conversion was not possible.
*/
public static String toShortId(String extendedId) throws StructureException{
if (isValidExtendedPdbId(extendedId) && isShortCompatible(extendedId)) {
if (isShortCompatible(extendedId)) {
return extendedId.substring(8).toUpperCase();
} else if (isValidShortPdbId(extendedId)) {
return extendedId.toUpperCase();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public class StructureIO {
| chainID
| chainID '_' resNum '-' resNum
pdbID := [1-9][a-zA-Z0-9]{3}
| PDB_[0-9]{4}[1-9][a-zA-Z0-9]{3}
| PDB_[a-zA-Z0-9]{8}
chainID := [a-zA-Z0-9]
scopID := 'd' pdbID [a-z_][0-9_]
biol := 'BIO:' pdbID [:]? [0-9]+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
* | chainID
* | chainID '_' resNum '-' resNum
* pdbID := [1-9][a-zA-Z0-9]{3}
* | PDB_[0-9]{4}[1-9][a-zA-Z0-9]{3}
* | PDB_[a-zA-Z0-9]{8}
* chainID := [a-zA-Z0-9]+
* resNum := [-+]?[0-9]+[A-Za-z]?
* </pre>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ public String getPath() {
* | chainID
* | chainID '_' resNum '-' resNum
* pdbID := [1-9][a-zA-Z0-9]{3}
* | PDB_[0-9]{4}[1-9][a-zA-Z0-9]{3}
* | PDB_[a-zA-Z0-9]{8}
* chainID := [a-zA-Z0-9]
* scopID := 'd' pdbID [a-z_][0-9_]
* resNum := [-+]?[0-9]+[A-Za-z]?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ private void pdb_HEADER_Handler(String line) {
try {
pdbIdToSet = new PdbId(pdbCode);
} catch (IllegalArgumentException e) {
logger.info("Malformed (or null) PDB ID {}. setting PdbId to null", pdbCode);
pdbIdToSet = null;
Comment thread
aalhossary marked this conversation as resolved.
}
structure.setPdbId(pdbIdToSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -870,9 +870,11 @@ public void consumeStruct(Struct struct) {

if (struct.isDefined() && struct.getEntryId().isDefined()) {
PdbId pdbId;
String pdbCode = struct.getEntryId().get(0);
try {
pdbId = new PdbId(struct.getEntryId().get(0));
pdbId = new PdbId(pdbCode);
} catch (IllegalArgumentException e) {
logger.info("Malformed (or null) PDB ID {}. setting PdbId to null", pdbCode);
pdbId = null;
Comment thread
aalhossary marked this conversation as resolved.
}
pdbHeader.setPdbId(pdbId);
Expand Down Expand Up @@ -940,7 +942,7 @@ public void consumeStructRefSeq(StructRefSeq structRefSeq) {

DBRef dbRef = new DBRef();

dbRef.setIdCode(structRefSeq.getPdbxPDBIdCode().get(rowIndex)); //TODO Shall we change this as well?
dbRef.setIdCode(structRefSeq.getPdbxPDBIdCode().get(rowIndex));
dbRef.setDbAccession(structRefSeq.getPdbxDbAccession().get(rowIndex));
dbRef.setDbIdCode(structRefSeq.getPdbxDbAccession().get(rowIndex));
dbRef.setChainName(structRefSeq.getPdbxStrandId().get(rowIndex));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ public void testGetIdPrefereShortFormat() {
String id;

pdbId = new PdbId("1abc");
id = pdbId.getId(PdbId.Behavior.PREFER_SHORT);
id = pdbId.getId(true);
assertEquals(id, "1ABC");

pdbId = new PdbId("PDB_55551abc");
id = pdbId.getId(PdbId.Behavior.PREFER_SHORT);
id = pdbId.getId(true);
assertEquals(id, "PDB_55551ABC");
}

Expand All @@ -77,11 +77,11 @@ public void testGetIdPrefereExtendedFormat() {
String id;

pdbId = new PdbId("1abc");
id = pdbId.getId(PdbId.Behavior.PREFER_EXTENDED);
id = pdbId.getId(false);
assertEquals(id, "PDB_00001ABC");

pdbId = new PdbId("PDB_55551abc");
id = pdbId.getId(PdbId.Behavior.PREFER_EXTENDED);
id = pdbId.getId(false);
assertEquals(id, "PDB_55551ABC");
}

Expand Down Expand Up @@ -113,7 +113,7 @@ public void testIsExtendedPDBID() {
assertTrue(PdbId.isValidExtendedPdbId("PDB_00001abc"), "Didn't accept lower case");
assertTrue(PdbId.isValidExtendedPdbId("PDB_00004HHB"), "Didn't accept upper case");
assertTrue(PdbId.isValidExtendedPdbId("PDB_22224HHB"), "Didn't accept upper case");
assertFalse(PdbId.isValidExtendedPdbId("PDB_AAAA4HHB"), "Accepted wrong format");
assertTrue(PdbId.isValidExtendedPdbId("PDB_AAAA4HHB"), "It should accept any 8 alphanumeric values");
assertFalse(PdbId.isValidExtendedPdbId("1ABC"), "Accepted short format");
}

Expand All @@ -123,10 +123,8 @@ public void testIsShortCompatible() {
assertTrue(PdbId.isShortCompatible("PDB_00004HHB"), "Didn't accept upper case");
assertFalse(PdbId.isShortCompatible("1ABC"), "Accepted short format");
assertFalse(PdbId.isShortCompatible("PDB_AAAA4HHB"), "Accepted wrong format");

//Although this is wrong, returning true is the expected behavior of
// this method; because it does NOT validate the passed in string.
assertTrue(PdbId.isShortCompatible("PDB_0000XXXXXXXXXXXXX"), "Accepted wrong format");
assertFalse(PdbId.isShortCompatible("PDB_0000AHHB"), "Accepted letter (1HHB should pass but AHHB should not pass");
assertFalse(PdbId.isShortCompatible("PDB_0000AHHBBBBB"), "should be a valid extended PDB ID");
}

@Test
Expand All @@ -139,9 +137,9 @@ public void testToExtendedFormat() {
assertEquals(PdbId.toExtendedId("PDB_00001abc"), "PDB_00001ABC");
}, "Didn't recognize extended format");

assertThrows(StructureException.class, () -> {
assertDoesNotThrow(() -> {
PdbId.toExtendedId("PDB_aaaa1abc");
}, "Accepted wrong format");
}, "Should accept any 8 alphanumeric values");
}

@Test
Expand Down