|
| 1 | +/** |
| 2 | + * OWASP Enterprise Security API (ESAPI) |
| 3 | + * |
| 4 | + * This file is part of the Open Web Application Security Project (OWASP) |
| 5 | + * Enterprise Security API (ESAPI) project. For details, please see |
| 6 | + * <a href="http://www.owasp.org/index.php/ESAPI">http://www.owasp.org/index.php/ESAPI</a>. |
| 7 | + * |
| 8 | + * Copyright (c) 2022 - The OWASP Foundation |
| 9 | + * |
| 10 | + * The ESAPI is published by OWASP under the BSD license. You should read and accept the |
| 11 | + * LICENSE before you use, modify, and/or redistribute this software. |
| 12 | + * |
| 13 | + * @author Jeffrey Walton (noloader .at. gmail.com) |
| 14 | + * @author Kevin Wall (kevin.w.wall .at. gmail.com) |
| 15 | + * @author Matt Seil (matt.seil .at. owasp.org) |
| 16 | + * @created 2022 |
| 17 | + */ |
| 18 | +package org.owasp.esapi.codecs; |
| 19 | + |
| 20 | +/** |
| 21 | + * Implementation of the Codec interface for JSON strings. |
| 22 | + * This class performs <a |
| 23 | + * href="https://datatracker.ietf.org/doc/html/rfc8259#section-7">String escaping</a> |
| 24 | + * on the entire string according to RFC 8259, Section 7. |
| 25 | + * |
| 26 | + * RFC 8259 requires conforming implementations use UTF-8. However, the ESAPI interfaces |
| 27 | + * utilize Java strings, which are UTF-16. This may cause problems during encoding and |
| 28 | + * decoding operations. To avoid some of the problems, convert the string to UTF-8 before |
| 29 | + * encoding and from UTF-8 after decoding. Ultimately the ESAPI encoder interfaces will |
| 30 | + * need modification to provide byte array arguments and return values. |
| 31 | + * |
| 32 | + * @see <a href="https://datatracker.ietf.org/doc/html/rfc8259#section-7">RFC 8259, |
| 33 | + * The JavaScript Object Notation (JSON) Data Interchange Format, Section 7</a> |
| 34 | + * |
| 35 | + * @author Jeffrey Walton (noloader .at. gmail.com) |
| 36 | + * @author Kevin Wall (kevin.w.wall .at. gmail.com) |
| 37 | + * @author Matt Seil (matt.seil .at. owasp.org) |
| 38 | + * @since July 31, 2022 |
| 39 | + * @see org.owasp.esapi.Encoder |
| 40 | + */ |
| 41 | +public class JSONCodec extends AbstractIntegerCodec { |
| 42 | + |
| 43 | + |
| 44 | + /** |
| 45 | + * {@inheritDoc} |
| 46 | + * |
| 47 | + * Escape special characters in JSON strings. |
| 48 | + * |
| 49 | + * encodeCharacter will escape the characters Backspace (\b), Form Feed (\f), |
| 50 | + * Carriage Return (\r), Line Feed (\n), Tab (\t), Double Quote (") and Backslash (\). |
| 51 | + * If the character is a control character (U+0000 through U+001f), then it will be |
| 52 | + * Unicode encoded (\u0000 through \u001f). If the character is not special or in the |
| 53 | + * user supplied immune list, then the character is returned unescaped. If the |
| 54 | + * character is null then an empty string is returned. |
| 55 | + * |
| 56 | + * @param immune character array of whitelist characters which should not be encoded |
| 57 | + * @param c the character to encode if not in the immune list |
| 58 | + * @return encoded character if the character is special, and the character otherwise. |
| 59 | + */ |
| 60 | + public String encodeCharacter( char[] immune, Character c ) { |
| 61 | + if ( c == null ) { |
| 62 | + return ""; |
| 63 | + } |
| 64 | + |
| 65 | + return encodeCharacter(immune, charToCodepoint( c )); |
| 66 | + } |
| 67 | + |
| 68 | + /** |
| 69 | + * {@inheritDoc} |
| 70 | + * |
| 71 | + * Escape special characters in JSON strings. |
| 72 | + * |
| 73 | + * encodeCharacter will escape the characters Backspace (\b), Form Feed (\f), |
| 74 | + * Carriage Return (\r), Line Feed (\n), Tab (\t), Double Quote (") and Backslash (\). |
| 75 | + * If the character is a control character (U+0000 through U+001f), then it will be |
| 76 | + * Unicode encoded (\u0000 through \u001f). If the character is not special or in the |
| 77 | + * user supplied immune list, then the character is returned unescaped. If the |
| 78 | + * character is null then an empty string is returned. |
| 79 | + * |
| 80 | + * @param immune character array of whitelist characters which should not be encoded |
| 81 | + * @param c the character to encode if not in the immune list |
| 82 | + * @return encoded character if the character is special, and the character otherwise. |
| 83 | + */ |
| 84 | + public String encodeCharacter( char[] immune, int codePoint ) |
| 85 | + throws IllegalArgumentException { |
| 86 | + |
| 87 | + if ( Character.isValidCodePoint(codePoint) == false ) { |
| 88 | + throw new IllegalArgumentException( "Invalid codepoint '" + codePoint + "'." ); |
| 89 | + } |
| 90 | + |
| 91 | + if ( immune != null ) { |
| 92 | + // More efficient than sort and binary search. If the immune array |
| 93 | + // was presorted, then this could be O(log n). But we can't add the |
| 94 | + // precondition now. It is too late in the game. |
| 95 | + for ( Character ch : immune ) { |
| 96 | + if ( charToCodepoint( ch ) == codePoint ) { |
| 97 | + return new String(Character.toChars(codePoint)); |
| 98 | + } |
| 99 | + } |
| 100 | + } |
| 101 | + |
| 102 | + // Per the RFC... Two-character sequence escape representations of some |
| 103 | + // popular characters |
| 104 | + switch ( codePoint ) { |
| 105 | + case '\b': return "\\b"; |
| 106 | + case '\f': return "\\f"; |
| 107 | + case '\r': return "\\r"; |
| 108 | + case '\n': return "\\n"; |
| 109 | + case '\t': return "\\t"; |
| 110 | + case '"': return "\\\""; |
| 111 | + case '/': return "\\/"; |
| 112 | + case '\\': return "\\\\"; |
| 113 | + } |
| 114 | + |
| 115 | + // Per the RFC... All Unicode characters may be placed within the |
| 116 | + // quotation marks, except for the characters that MUST be escaped: |
| 117 | + // quotation mark, reverse solidus, and the control characters |
| 118 | + // (U+0000 through U+001F). |
| 119 | + if ( codePoint <= 0x1f ) { |
| 120 | + |
| 121 | + return String.format("\\u%04x", codePoint); |
| 122 | + } |
| 123 | + |
| 124 | + return new String(Character.toChars(codePoint)); |
| 125 | + } |
| 126 | + |
| 127 | + |
| 128 | + /** |
| 129 | + * {@inheritDoc} |
| 130 | + * |
| 131 | + * Decodes special characters in encoded JSON strings. |
| 132 | + * |
| 133 | + * decodeCharacter will decode the encoded character sequences for popular characters |
| 134 | + * Backspace (\b), Form Feed (\f), Carriage Return (\r), Line Feed (\n), Tab (\t), |
| 135 | + * Double Quote ("), Forward slash (/) and Backslash (\). The function will also decode |
| 136 | + * six-character sequences of \u0000 - \uffff. If the character is not encoded then a |
| 137 | + * null character is returned. |
| 138 | + * |
| 139 | + * decodeCharacter does not handle all Unicode codepoints properly. If a codepoint is |
| 140 | + * encountered with a surrogate pair, then null is returned. This will happen with |
| 141 | + * codepoints greater than 64k. In this case we need to return two characters, not one. |
| 142 | + * |
| 143 | + * @param input a character sequence to decode |
| 144 | + * @return the decoded version of the encoded character starting at index, |
| 145 | + * or null otherwise |
| 146 | + * |
| 147 | + * @throws IllegalArgumentException |
| 148 | + * if an invalid character sequence is encountered |
| 149 | + */ |
| 150 | + public Integer decodeCharacter( PushbackSequence<Integer> input ) |
| 151 | + throws IllegalArgumentException { |
| 152 | + |
| 153 | + input.mark(); |
| 154 | + |
| 155 | + Integer first = input.next(), second = null; |
| 156 | + if ( first == null || first.intValue() != '\\' ) { |
| 157 | + input.reset(); |
| 158 | + return null; |
| 159 | + } |
| 160 | + |
| 161 | + String errorMessage = null; |
| 162 | + |
| 163 | + try |
| 164 | + { |
| 165 | + errorMessage = "Invalid JSON escape representation"; |
| 166 | + |
| 167 | + if ( (second = input.next()) == null ) { |
| 168 | + throw new IllegalArgumentException(); |
| 169 | + } |
| 170 | + |
| 171 | + // Per the RFC... Two-character sequence escape representations of some popular characters |
| 172 | + switch ( second.intValue() ) { |
| 173 | + case 'b': return (int)'\b'; |
| 174 | + case 'f': return (int)'\f'; |
| 175 | + case 'r': return (int)'\r'; |
| 176 | + case 'n': return (int)'\n'; |
| 177 | + case 't': return (int)'\t'; |
| 178 | + case '"': return (int)'\"'; |
| 179 | + case '/': return (int)'/'; |
| 180 | + case '\\': return (int)'\\'; |
| 181 | + } |
| 182 | + |
| 183 | + errorMessage = "Invalid JSON two-character escape representation"; |
| 184 | + |
| 185 | + // Per the RFC... All characters may be escaped as a six-character sequence: a reverse solidus, |
| 186 | + // followed by the lowercase letter u, followed by four hexadecimal digits that encode the |
| 187 | + // character's code point. The hexadecimal letters A through F can be uppercase or lowercase. |
| 188 | + // So, for example, a string containing only a single reverse solidus character may be represented |
| 189 | + // as "\u005C". |
| 190 | + if ( second.intValue() == 'u' ) { |
| 191 | + |
| 192 | + errorMessage = "Invalid JSON six-character escape representation"; |
| 193 | + |
| 194 | + return (convertToInt( input.next() ) << 12) + |
| 195 | + (convertToInt( input.next() ) << 8) + |
| 196 | + (convertToInt( input.next() ) << 4) + |
| 197 | + (convertToInt( input.next() ) << 0); |
| 198 | + } |
| 199 | + |
| 200 | + // Do nothing. Fall into throw below. |
| 201 | + } |
| 202 | + catch (IllegalArgumentException e) |
| 203 | + { |
| 204 | + // Do nothing. Fall into throw below. |
| 205 | + } |
| 206 | + |
| 207 | + // Catch all. The escaped character sequence was invalid. |
| 208 | + input.reset(); |
| 209 | + throw new IllegalArgumentException( errorMessage ); |
| 210 | + } |
| 211 | + |
| 212 | + protected int charToCodepoint( Character ch ) { |
| 213 | + |
| 214 | + final String s = Character.toString(ch); |
| 215 | + assert (s.length() == 1) : "Ooops"; |
| 216 | + |
| 217 | + return s.codePointAt(0); |
| 218 | + } |
| 219 | + |
| 220 | + protected int convertToInt( Integer hexDigit ) { |
| 221 | + |
| 222 | + if ( hexDigit == null ) { |
| 223 | + throw new IllegalArgumentException( "Cannot convert from '<null>' to int." ); |
| 224 | + } |
| 225 | + |
| 226 | + final int value = Character.digit( hexDigit.intValue(), 16 ); |
| 227 | + |
| 228 | + if ( value < 0 || value >= 16 ) { |
| 229 | + throw new IllegalArgumentException( "Cannot convert from hexadecimal '" + hexDigit.toString() + "' to int." ); |
| 230 | + } |
| 231 | + |
| 232 | + return value; |
| 233 | + } |
| 234 | + |
| 235 | +} |
0 commit comments