Skip to content

Commit cb9ebab

Browse files
committed
Fixing issue introduced in 543d730: mmcif parser was not doing
nucleotide parsing correctly. Now TestLongPdbVsMmCifParsing passes. Also: - removed the UnknownPdbAminoAcidException to make things more consistent - undeprecating StructureTools Calpha methods because they can still be valid for some purposes - deprecating some inconsistently named method in StructureTools
1 parent 61ef1e4 commit cb9ebab

File tree

8 files changed

+137
-187
lines changed

8 files changed

+137
-187
lines changed

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/io/TestLongPdbVsMmCifParsing.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ public void testVeryLongPdbVsMmCif() throws IOException, StructureException {
124124

125125
@Test
126126
public void testSingle() throws IOException, StructureException {
127-
testAll(Arrays.asList("3zyb"));
127+
testAll(Arrays.asList("4mml"));
128128
}
129129

130130
@After

biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java

Lines changed: 52 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ public class StructureTools {
135135
* The atom name of the backbone phosphate in RNA
136136
*/
137137
public static final String P_ATOM_NAME = "P";
138+
139+
/**
140+
* The atom used as representative for nucleotides, equivalent to {@link #CA_ATOM_NAME} for proteins
141+
*/
138142
public static final String NUCLEOTIDE_REPRESENTATIVE = C4_ATOM_NAME;
139143

140144
/**
@@ -151,21 +155,14 @@ public class StructureTools {
151155

152156

153157

154-
//private static final String insertionCodeRegExp = "([0-9]+)([a-zA-Z]*)";
155-
//private static final Pattern insertionCodePattern = Pattern.compile(insertionCodeRegExp);
156-
157-
158158
// there is a file format change in PDB 3.0 and nucleotides are being renamed
159-
static private Map<String, Character> nucleotides30 ;
160-
static private Map<String, Character> nucleotides23 ;
159+
private static final Map<String, Character> nucleotides30 ;
160+
private static final Map<String, Character> nucleotides23 ;
161161

162162
//amino acid 3 and 1 letter code definitions
163163
private static final Map<String, Character> aminoAcids;
164164

165165
private static final Set<Element> hBondDonorAcceptors;
166-
// // for conversion 3code 1code
167-
// private static SymbolTokenization threeLetter ;
168-
// private static SymbolTokenization oneLetter ;
169166

170167

171168
static {
@@ -181,7 +178,6 @@ public class StructureTools {
181178
nucleotides30.put("U", 'U');
182179
nucleotides30.put("I", 'I');
183180

184-
//TODO: check if they are always HETATMs, in that case this will not be necessary
185181
// the DNA linkers - the +C , +G, +A +T +U and +I have been replaced with these:
186182
nucleotides30.put("TAF",UNKNOWN_GROUP_LABEL); // Fluorinated Thymine
187183
nucleotides30.put("TC1",UNKNOWN_GROUP_LABEL); // Furanosyl
@@ -250,7 +246,7 @@ public class StructureTools {
250246
}
251247

252248

253-
/** Count how many number of Atoms are contained within a Structure object.
249+
/** Count how many Atoms are contained within a Structure object.
254250
*
255251
* @param s the structure object
256252
* @return the number of Atoms in this Structure
@@ -567,9 +563,7 @@ public static final Atom[] getAtomArray(Chain c, String[] atomNames){
567563
* @param c the structure object
568564
* @return an Atom[] array
569565
* @see #getRepresentativeAtomArray(Chain)
570-
* @deprecated Use the more generic {@link #getRepresentativeAtomArray(Chain)} instead
571566
*/
572-
@Deprecated
573567
public static final Atom[] getAtomCAArray(Chain c){
574568
List<Atom> atoms = new ArrayList<Atom>();
575569

@@ -772,9 +766,8 @@ public static Atom[] duplicateCA2(Atom[] ca2) throws StructureException{
772766
* Return an Atom array of the C-alpha atoms. Any atom that is a carbon and has CA name will be returned.
773767
* @param s the structure object
774768
* @return an Atom[] array
775-
* @deprecated Use the more generic {@link #getRepresentativeAtomArray(Structure)} instead
769+
* @see #getRepresentativeAtomArray(Structure)
776770
*/
777-
@Deprecated
778771
public static Atom[] getAtomCAArray(Structure s){
779772

780773
List<Atom> atoms = new ArrayList<Atom>();
@@ -877,28 +870,32 @@ public static Atom[] getBackboneAtomArray(Structure s){
877870

878871
/**
879872
* Convert three character amino acid codes into single character
880-
* e.g. convert CYS to C
881-
* @return a character
882-
* @param code3 a three character amino acid representation String
883-
* @throws UnknownPdbAminoAcidException
873+
* e.g. convert CYS to C.
874+
* Valid 3-letter codes will be those of the standard 20 amino acids plus
875+
* MSE, CSE, SEC, PYH, PYL (see the {@link #aminoAcids} map)
876+
* @return the 1 letter code, or null if the given 3 letter code does not correspond to
877+
* an amino acid code
878+
* @param groupCode3 a three character amino acid representation String
884879
* @see {@link #get1LetterCode(String)}
885880
*/
886-
public static final Character convert_3code_1code(String code3)
887-
throws UnknownPdbAminoAcidException {
888-
Character code1 = null;
889-
code1 = aminoAcids.get(code3);
890-
891-
if (code1 == null) {
892-
throw new UnknownPdbAminoAcidException(code3 + " not a standard amino acid");
893-
} else {
894-
return code1;
895-
}
881+
public static final Character get1LetterCodeAmino(String groupCode3) {
882+
return aminoAcids.get(groupCode3);
883+
}
896884

885+
/**
886+
*
887+
* @param code3
888+
* @return
889+
* @deprecated Use {@link #get1LetterCodeAmino(String)} instead
890+
*/
891+
@Deprecated
892+
public static final Character convert_3code_1code(String code3) {
893+
return get1LetterCodeAmino(code3);
897894
}
898895

899896
/**
900-
* Convert a three letter aminoacid code into a single character code.
901-
* If the code does not correspond to a amino acid or nucleotide, returns
897+
* Convert a three letter amino acid or nucleotide code into a single character code.
898+
* If the code does not correspond to an amino acid or nucleotide, returns
902899
* {@link #UNKNOWN_GROUP_LABEL}.
903900
*
904901
* Returned null for nucleotides prior to version 4.0.1.
@@ -907,37 +904,38 @@ public static final Character convert_3code_1code(String code3)
907904
*/
908905
public static final Character get1LetterCode(String groupCode3){
909906

910-
Character aminoCode1;
911-
try {
912-
// is it a standard amino acid ?
913-
aminoCode1 = convert_3code_1code(groupCode3);
914-
} catch (UnknownPdbAminoAcidException e){
907+
Character code1;
908+
909+
// is it a standard amino acid ?
910+
code1 = get1LetterCodeAmino(groupCode3);
911+
912+
if (code1 == null) {
915913
// hm groupCode3 is not standard
916-
// perhaps it is an nucleotide?
914+
// perhaps it is a nucleotide?
917915
groupCode3 = groupCode3.trim();
918916
if ( isNucleotide(groupCode3) ) {
919-
//System.out.println("nucleotide, aminoCode1:"+aminoCode1);
920-
aminoCode1= nucleotides30.get(groupCode3);
921-
if(aminoCode1 == null) {
922-
aminoCode1 = nucleotides23.get(groupCode3);
917+
code1= nucleotides30.get(groupCode3);
918+
if(code1 == null) {
919+
code1 = nucleotides23.get(groupCode3);
923920
}
924-
if(aminoCode1 == null) {
925-
aminoCode1 = UNKNOWN_GROUP_LABEL;
921+
if(code1 == null) {
922+
code1 = UNKNOWN_GROUP_LABEL;
926923
}
927924
} else {
928925
// does not seem to be so let's assume it is
929926
// nonstandard aminoacid and label it "X"
930927
//logger.warning("unknown group name "+groupCode3 );
931-
aminoCode1 = UNKNOWN_GROUP_LABEL;
932-
}
928+
code1 = UNKNOWN_GROUP_LABEL;
929+
}
933930
}
934-
935-
return aminoCode1;
931+
932+
return code1;
936933

937934
}
938935

939936

940-
/* Test if the threelettercode of an ATOM entry corresponds to a
937+
/**
938+
* Test if the three-letter code of an ATOM entry corresponds to a
941939
* nucleotide or to an aminoacid.
942940
* @param a 3-character code for a group.
943941
*
@@ -1230,11 +1228,11 @@ public static final String convertAtomsToSeq(Atom[] atoms) {
12301228
}
12311229
}
12321230
String code3 = g.getPDBName();
1233-
try {
1234-
buf.append(convert_3code_1code(code3) );
1235-
} catch (UnknownPdbAminoAcidException e){
1236-
buf.append('X');
1237-
}
1231+
Character code1 = get1LetterCodeAmino(code3);
1232+
if (code1 == null) code1 = UNKNOWN_GROUP_LABEL;
1233+
1234+
buf.append(code1);
1235+
12381236
prevGroup = g;
12391237

12401238
}
@@ -1307,9 +1305,8 @@ public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
13071305
* @param chain
13081306
* @param cutoff
13091307
* @return
1310-
* @deprecated Use the more generic {@link #getRepresentativeAtomsInContact(Chain, double)} instead
1308+
* @see {@link #getRepresentativeAtomsInContact(Chain, double)}
13111309
*/
1312-
@Deprecated
13131310
public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
13141311
Grid grid = new Grid(cutoff);
13151312

biojava-structure/src/main/java/org/biojava/nbio/structure/UnknownPdbAminoAcidException.java

Lines changed: 0 additions & 58 deletions
This file was deleted.

biojava-structure/src/main/java/org/biojava/nbio/structure/align/util/AlignmentTools.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ public static AFPChain replaceOptAln(int[][][] newAlgn, AFPChain afpChain, Atom[
715715
copyAFP.setBlockGap(calculateBlockGap(newAlgn));
716716

717717
//Recalculate properties: superposition, tm-score, etc
718-
Atom[] ca2clone = StructureTools.cloneCAArray(ca2); // don't modify ca1 positions
718+
Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); // don't modify ca1 positions
719719
AlignmentTools.updateSuperposition(copyAFP, ca1, ca2clone);
720720

721721
//It re-does the sequence alignment strings from the OptAlgn information only

0 commit comments

Comments
 (0)