-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathArgumentTokenizer.java
More file actions
229 lines (219 loc) · 8.3 KB
/
ArgumentTokenizer.java
File metadata and controls
229 lines (219 loc) · 8.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
/*BEGIN_COPYRIGHT_BLOCK
*
* Copyright (c) 2001-2010, JavaPLT group at Rice University (drjava@rice.edu)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the names of DrJava, the JavaPLT group, Rice University, nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This software is Open Source Initiative approved Open Source Software.
* Open Source Initative Approved is a trademark of the Open Source Initiative.
*
* This file is part of DrJava. Download the current version of this project
* from http://www.drjava.org/ or http://sourceforge.net/projects/drjava/
*
* END_COPYRIGHT_BLOCK*/
package edu.rice.cs.util;
import java.util.List;
import java.util.LinkedList;
/**
* Utility class which can tokenize a String into a list of String arguments,
* with behavior similar to parsing command line arguments to a program.
* Quoted Strings are treated as single arguments, and escaped characters
* are translated so that the tokenized arguments have the same meaning.
* Since all methods are static, the class is declared abstract to prevent
* instantiation.
* @version $Id$
*/
public abstract class ArgumentTokenizer {
private static final int NO_TOKEN_STATE = 0;
private static final int NORMAL_TOKEN_STATE = 1;
private static final int SINGLE_QUOTE_STATE = 2;
private static final int DOUBLE_QUOTE_STATE = 3;
/** Tokenizes the given String into String tokens
* @param arguments A String containing one or more command-line style arguments to be tokenized.
* @return A list of parsed and properly escaped arguments.
*/
public static List<String> tokenize(String arguments) {
return tokenize(arguments, false);
}
/** Tokenizes the given String into String tokens.
* @param arguments A String containing one or more command-line style arguments to be tokenized.
* @param stringify whether or not to include escape special characters
* @return A list of parsed and properly escaped arguments.
*/
public static List<String> tokenize(String arguments, boolean stringify) {
LinkedList<String> argList = new LinkedList<String>();
StringBuilder currArg = new StringBuilder();
boolean escaped = false;
int state = NO_TOKEN_STATE; // start in the NO_TOKEN_STATE
int len = arguments.length();
// Loop over each character in the string
for (int i = 0; i < len; i++) {
char c = arguments.charAt(i);
if (escaped) {
// Escaped state: just append the next character to the current arg.
escaped = false;
currArg.append(c);
}
else {
switch(state) {
case SINGLE_QUOTE_STATE:
if (c == '\'') {
// Seen the close quote; continue this arg until whitespace is seen
state = NORMAL_TOKEN_STATE;
}
else {
currArg.append(c);
}
break;
case DOUBLE_QUOTE_STATE:
if (c == '"') {
// Seen the close quote; continue this arg until whitespace is seen
state = NORMAL_TOKEN_STATE;
}
else if (c == '\\') {
// Look ahead, and only escape quotes or backslashes
i++;
char next = arguments.charAt(i);
if (next == '"' || next == '\\') {
currArg.append(next);
}
else {
currArg.append(c);
currArg.append(next);
}
}
else {
currArg.append(c);
}
break;
// case NORMAL_TOKEN_STATE:
// if (Character.isWhitespace(c)) {
// // Whitespace ends the token; start a new one
// argList.add(currArg.toString());
// currArg = new StringBuffer();
// state = NO_TOKEN_STATE;
// }
// else if (c == '\\') {
// // Backslash in a normal token: escape the next character
// escaped = true;
// }
// else if (c == '\'') {
// state = SINGLE_QUOTE_STATE;
// }
// else if (c == '"') {
// state = DOUBLE_QUOTE_STATE;
// }
// else {
// currArg.append(c);
// }
// break;
case NO_TOKEN_STATE:
case NORMAL_TOKEN_STATE:
switch(c) {
case '\\':
escaped = true;
state = NORMAL_TOKEN_STATE;
break;
case '\'':
state = SINGLE_QUOTE_STATE;
break;
case '"':
state = DOUBLE_QUOTE_STATE;
break;
default:
if (!Character.isWhitespace(c)) {
currArg.append(c);
state = NORMAL_TOKEN_STATE;
}
else if (state == NORMAL_TOKEN_STATE) {
// Whitespace ends the token; start a new one
argList.add(currArg.toString());
currArg = new StringBuilder();
state = NO_TOKEN_STATE;
}
}
break;
default:
throw new IllegalStateException("ArgumentTokenizer state " + state + " is invalid!");
}
}
}
// If we're still escaped, put in the backslash
if (escaped) {
currArg.append('\\');
argList.add(currArg.toString());
}
// Close the last argument if we haven't yet
else if (state != NO_TOKEN_STATE) {
argList.add(currArg.toString());
}
// Format each argument if we've been told to stringify them
if (stringify) {
for (int i = 0; i < argList.size(); i++) {
argList.set(i, "\"" + _escapeQuotesAndBackslashes(argList.get(i)) + "\"");
}
}
return argList;
}
/** Inserts backslashes before any occurrences of a backslash or
* quote in the given string. Also converts any special characters
* appropriately.
*/
protected static String _escapeQuotesAndBackslashes(String s) {
final StringBuilder buf = new StringBuilder(s);
// Walk backwards, looking for quotes or backslashes.
// If we see any, insert an extra backslash into the buffer at
// the same index. (By walking backwards, the index into the buffer
// will remain correct as we change the buffer.)
for (int i = s.length()-1; i >= 0; i--) {
char c = s.charAt(i);
if ((c == '\\') || (c == '"')) {
buf.insert(i, '\\');
}
// Replace any special characters with escaped versions
else if (c == '\n') {
buf.deleteCharAt(i);
buf.insert(i, "\\n");
}
else if (c == '\t') {
buf.deleteCharAt(i);
buf.insert(i, "\\t");
}
else if (c == '\r') {
buf.deleteCharAt(i);
buf.insert(i, "\\r");
}
else if (c == '\b') {
buf.deleteCharAt(i);
buf.insert(i, "\\b");
}
else if (c == '\f') {
buf.deleteCharAt(i);
buf.insert(i, "\\f");
}
}
return buf.toString();
}
}