Skip to content

Commit a585ffb

Browse files
committed
- Added new readLine() method and new methods for processing byte order mark in the CSV class
git-svn-id: svn://192.168.0.80/JavaXT/javaxt-express@1437 2c7b0aa6-e0b2-3c4e-bb4a-8b65b6c465ff
1 parent 4d4fdb4 commit a585ffb

File tree

1 file changed

+100
-1
lines changed

1 file changed

+100
-1
lines changed

src/javaxt/express/utils/CSV.java

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
public class CSV {
1313

14-
public static final String UTF8_BOM = "\uFEFF";
14+
//public static final String UTF8_BOM = "\uFEFF";
1515

1616
//**************************************************************************
1717
//** Columns
@@ -105,6 +105,105 @@ public static String readLine(String data){
105105
}
106106

107107

108+
//**************************************************************************
109+
//** readLine
110+
//**************************************************************************
111+
/** Returns a substring for the given data, ending at the first line break
112+
* that is not inside a quote. Example usage:
113+
<pre>
114+
115+
//Get input stream
116+
javaxt.io.File file; //create file!
117+
java.io.InputStream is = file.getInputStream();
118+
119+
//Read header
120+
String header = CSV.readLine(is);
121+
int bom = CSV.getByteOrderMark(header);
122+
if (bom>-1) header = header.substring(bom);
123+
console.log(header);
124+
125+
//Read rows
126+
String row;
127+
while (!(row=CSV.readLine(is)).isEmpty()){
128+
console.log(row);
129+
}
130+
131+
//Close input stream
132+
is.close();
133+
</pre>
134+
*/
135+
public static String readLine(java.io.InputStream is) throws java.io.IOException {
136+
137+
StringBuilder str = new StringBuilder();
138+
boolean insideDoubleQuotes = false;
139+
int i = 0;
140+
while((i=is.read())!=-1) {
141+
char c = (char) i;
142+
143+
if ((c=='\r' || c=='\n') && str.length()==0) continue;
144+
145+
if (c=='"'){
146+
if (insideDoubleQuotes) insideDoubleQuotes = false;
147+
else insideDoubleQuotes = true;
148+
}
149+
150+
if (c=='\r' || c=='\n'){
151+
if (!insideDoubleQuotes) break;
152+
}
153+
str.append(c);
154+
}
155+
return str.toString();
156+
}
157+
158+
159+
//**************************************************************************
160+
//** getByteOrderMark
161+
//**************************************************************************
162+
/** Returns end position of the Byte Order Mark (BOM). Example usage:
163+
<pre>
164+
int bom = CSV.getByteOrderMark(header);
165+
if (bom>-1) header = header.substring(bom);
166+
</pre>
167+
*/
168+
public static int getByteOrderMark(String str){
169+
170+
if (str.length()<2) return -1;
171+
172+
int a=-1, b=-1, c=-1, d=-1;
173+
if (str.length()>1){
174+
a = (int) str.charAt(0);
175+
b = (int) str.charAt(1);
176+
if (a==254 && b==255) return 2; //UTF-16 (BE)
177+
if (b==255 && b==254) return 2; //UTF-16 (LE)
178+
}
179+
180+
if (str.length()>2){
181+
c = (int) str.charAt(2);
182+
if (a==239 && b==187 && c==191) return 3; //UTF-8
183+
if (a==43 && b==47 && c==118) return 3; //UTF-7
184+
if (a==247 && b==100 && c==76) return 3; //UTF-1
185+
}
186+
187+
if (str.length()>3){
188+
d = (int) str.charAt(3);
189+
if (a==0 && b==0 && c==254 && d==255) return 4; //UTF-32 (BE)
190+
if (a==255 && b==254 && c==0 && d==0) return 4; //UTF-32 (LE)
191+
}
192+
193+
return -1;
194+
}
195+
196+
197+
//**************************************************************************
198+
//** startsWithByteOrderMark
199+
//**************************************************************************
200+
/** Returns true if the given string starts with a Byte Order Mark (BOM)
201+
*/
202+
public static boolean startsWithByteOrderMark(String str){
203+
return getByteOrderMark(str)>-1;
204+
}
205+
206+
108207
//**************************************************************************
109208
//** getValue
110209
//**************************************************************************

0 commit comments

Comments
 (0)