Skip to content

Commit 16842dd

Browse files
olim7tAlexandre Dutra
authored andcommitted
JAVA-1150: Add example and FAQ entry about ByteBuffer/BLOB.
1 parent dbbc40b commit 16842dd

4 files changed

Lines changed: 273 additions & 0 deletions

File tree

changelog/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
- [improvement] JAVA-1164: Clarify documentation on Host.listenAddress and broadcastAddress.
2222
- [improvement] JAVA-1171: Add Host method to determine if DSE Graph is enabled.
2323
- [improvement]: JAVA-1069: Bootstrap driver-examples module.
24+
- [documentation] JAVA-1150: Add example and FAQ entry about ByteBuffer/BLOB.
2425

2526
Merged from 2.1 branch:
2627

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
/*
2+
* Copyright (C) 2012-2015 DataStax Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.datastax.driver.examples.datatypes;
17+
18+
import com.datastax.driver.core.*;
19+
import com.datastax.driver.core.utils.Bytes;
20+
import com.google.common.collect.ImmutableMap;
21+
22+
import java.io.*;
23+
import java.nio.ByteBuffer;
24+
import java.nio.channels.FileChannel;
25+
import java.util.Map;
26+
27+
/**
28+
* Inserts and retrieves values in BLOB columns.
29+
* <p/>
30+
* By default, the Java driver maps this type to {@link java.nio.ByteBuffer}. The ByteBuffer API is a bit tricky to use
31+
* at times, so we will show common pitfalls as well. We strongly recommend that you read the {@link java.nio.Buffer}
32+
* and {@link ByteBuffer} API docs and become familiar with the capacity, limit and position properties.
33+
* <a href="http://tutorials.jenkov.com/java-nio/buffers.html">This tutorial</a> might also help.
34+
* <p/>
35+
* Preconditions:
36+
* - a Cassandra cluster is running and accessible through the contacts points identified by CONTACT_POINTS and PORT;
37+
* - FILE references an existing file.
38+
* <p/>
39+
* Side effects:
40+
* - creates a new keyspace "examples" in the cluster. It a keyspace with this name already exists, it will be reused;
41+
* - creates a table "examples.blobs". If it already exists, it will be reused;
42+
* - inserts data in the table.
43+
*/
44+
public class Blobs {
45+
46+
static String[] CONTACT_POINTS = {"127.0.0.1"};
47+
static int PORT = 9042;
48+
49+
static File FILE = new File(Blobs.class.getResource("/cassandra_logo.png").getFile());
50+
51+
public static void main(String[] args) throws IOException {
52+
Cluster cluster = null;
53+
try {
54+
cluster = Cluster.builder()
55+
.addContactPoints(CONTACT_POINTS).withPort(PORT)
56+
.build();
57+
Session session = cluster.connect();
58+
59+
createSchema(session);
60+
allocateAndInsert(session);
61+
retrieveSimpleColumn(session);
62+
retrieveMapColumn(session);
63+
insertConcurrent(session);
64+
insertFromAndRetrieveToFile(session);
65+
} finally {
66+
if (cluster != null) cluster.close();
67+
}
68+
}
69+
70+
private static void createSchema(Session session) {
71+
session.execute("CREATE KEYSPACE IF NOT EXISTS examples " +
72+
"WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}");
73+
session.execute("CREATE TABLE IF NOT EXISTS examples.blobs(k int PRIMARY KEY, b blob, m map<text, blob>)");
74+
}
75+
76+
private static void allocateAndInsert(Session session) {
77+
// One way to get a byte buffer is to allocate it and fill it yourself:
78+
ByteBuffer buffer = ByteBuffer.allocate(16);
79+
while (buffer.hasRemaining())
80+
buffer.put((byte) 0xFF);
81+
82+
// Don't forget to flip! The driver expects a buffer that is ready for reading. That is, it will consider all
83+
// the data between buffer.position() and buffer.limit().
84+
// Right now we are positioned at the end because we just finished writing, so if we passed the buffer as-is it
85+
// would appear to be empty:
86+
assert buffer.limit() - buffer.position() == 0;
87+
88+
buffer.flip();
89+
// Now position is back to the beginning, so the driver will see all 16 bytes.
90+
assert buffer.limit() - buffer.position() == 16;
91+
92+
session.execute("INSERT INTO examples.blobs (k, b, m) VALUES (1, ?, ?)",
93+
buffer, ImmutableMap.of("test", buffer));
94+
}
95+
96+
private static void retrieveSimpleColumn(Session session) {
97+
Row row = session.execute("SELECT b, m FROM examples.blobs WHERE k = 1").one();
98+
99+
ByteBuffer buffer = row.getBytes("b");
100+
101+
// The driver always returns buffers that are ready for reading.
102+
assert buffer.limit() - buffer.position() == 16;
103+
104+
// One way to read from the buffer is to use absolute getters. Do NOT start reading at index 0, as the buffer
105+
// might start at a different position (we'll see an example of that later).
106+
for (int i = buffer.position(); i < buffer.limit(); i++) {
107+
byte b = buffer.get(i);
108+
assert b == (byte) 0xFF;
109+
}
110+
111+
// Another way is to use relative getters.
112+
while (buffer.hasRemaining()) {
113+
byte b = buffer.get();
114+
assert b == (byte) 0xFF;
115+
}
116+
// Note that relative getters change the position, so when we're done reading we're at the end again.
117+
assert buffer.position() == buffer.limit();
118+
119+
// Reset the position for the next operation.
120+
buffer.flip();
121+
122+
// Yet another way is to convert the buffer to a byte array. Do NOT use buffer.array(), because it returns the
123+
// buffer's *backing array*, which is not the same thing as its contents:
124+
// - not all byte buffers have backing arrays
125+
// - even then, the backing array might be larger than the buffer's contents
126+
//
127+
// The driver provides a utility method that handles those details for you:
128+
byte[] array = Bytes.getArray(buffer);
129+
assert array.length == 16;
130+
for (byte b : array) {
131+
assert b == (byte) 0xFF;
132+
}
133+
}
134+
135+
private static void retrieveMapColumn(Session session) {
136+
Row row = session.execute("SELECT b, m FROM examples.blobs WHERE k = 1").one();
137+
138+
// The map columns illustrates the pitfalls with position() and array().
139+
Map<String, ByteBuffer> m = row.getMap("m", String.class, ByteBuffer.class);
140+
ByteBuffer buffer = m.get("test");
141+
142+
// We did get back a buffer that contains 16 bytes as expected.
143+
assert buffer.limit() - buffer.position() == 16;
144+
// However, it is not positioned at 0. And you can also see that its backing array contains more than 16 bytes.
145+
// What happens is that the buffer is a "view" of the last 16 of a 32-byte array.
146+
// This is an implementation detail and you shouldn't have to worry about it if you process the buffer correctly
147+
// (don't iterate from 0, use Bytes.getArray()).
148+
assert buffer.position() == 16;
149+
assert buffer.array().length == 32;
150+
}
151+
152+
private static void insertConcurrent(Session session) {
153+
PreparedStatement preparedStatement = session.prepare("INSERT INTO examples.blobs (k, b) VALUES (1, :b)");
154+
155+
// This is another convenient utility provided by the driver. It's useful for tests.
156+
ByteBuffer buffer = Bytes.fromHexString("0xffffff");
157+
158+
// When you pass a byte buffer to a bound statement, it creates a shallow copy internally with the
159+
// buffer.duplicate() method.
160+
BoundStatement boundStatement = preparedStatement.bind();
161+
boundStatement.setBytes("b", buffer);
162+
163+
// This means you can now move in the original buffer, without affecting the insertion if it happens later.
164+
buffer.position(buffer.limit());
165+
166+
session.execute(boundStatement);
167+
Row row = session.execute("SELECT b FROM examples.blobs WHERE k = 1").one();
168+
assert Bytes.toHexString(row.getBytes("b")).equals("0xffffff");
169+
170+
buffer.flip();
171+
172+
// HOWEVER duplicate() only performs a shallow copy. The two buffers still share the same contents. So if you
173+
// modify the contents of the original buffer, this will affect another execution of the bound statement.
174+
buffer.put(0, (byte) 0xaa);
175+
session.execute(boundStatement);
176+
row = session.execute("SELECT b FROM examples.blobs WHERE k = 1").one();
177+
assert Bytes.toHexString(row.getBytes("b")).equals("0xaaffff");
178+
179+
// This will also happen if you use the async API, e.g. create the bound statement, call executeAsync() on it
180+
// and reuse the buffer immediately.
181+
182+
// If you reuse buffers concurrently and want to avoid those issues, perform a deep copy of the buffer before
183+
// passing it to the bound statement.
184+
int startPosition = buffer.position();
185+
ByteBuffer buffer2 = ByteBuffer.allocate(buffer.limit() - startPosition);
186+
buffer2.put(buffer);
187+
buffer.position(startPosition);
188+
buffer2.flip();
189+
boundStatement.setBytes("b", buffer2);
190+
session.execute(boundStatement);
191+
192+
// Note: unlike BoundStatement, SimpleStatement does not duplicate its arguments, so even the position will be
193+
// affected if you change it before executing the statement. Again, resort to deep copies if required.
194+
}
195+
196+
private static void insertFromAndRetrieveToFile(Session session) throws IOException {
197+
ByteBuffer buffer = readAll(FILE);
198+
session.execute("INSERT INTO examples.blobs (k, b) VALUES (1, ?)", buffer);
199+
200+
File tmpFile = File.createTempFile("blob", ".png");
201+
System.out.printf("Writing retrieved buffer to %s%n", tmpFile.getAbsoluteFile());
202+
203+
Row row = session.execute("SELECT b FROM examples.blobs WHERE k = 1").one();
204+
writeAll(row.getBytes("b"), tmpFile);
205+
}
206+
207+
// Note:
208+
// - this is written with Java 6 APIs; if you're on a more recent version this can be improved (try-with-resources,
209+
// new-new io...)
210+
// - this reads the whole file in memory in one go. If your file does not fit in memory you should probably not
211+
// insert it into Cassandra either ;)
212+
private static ByteBuffer readAll(File file) throws IOException {
213+
FileInputStream inputStream = null;
214+
boolean threw = false;
215+
try {
216+
inputStream = new FileInputStream(file);
217+
FileChannel channel = inputStream.getChannel();
218+
ByteBuffer buffer = ByteBuffer.allocate((int) channel.size());
219+
channel.read(buffer);
220+
buffer.flip();
221+
return buffer;
222+
} catch (IOException e) {
223+
threw = true;
224+
throw e;
225+
} finally {
226+
close(inputStream, threw);
227+
}
228+
}
229+
230+
private static void writeAll(ByteBuffer buffer, File file) throws IOException {
231+
FileOutputStream outputStream = null;
232+
boolean threw = false;
233+
try {
234+
outputStream = new FileOutputStream(file);
235+
FileChannel channel = outputStream.getChannel();
236+
channel.write(buffer);
237+
} catch (IOException e) {
238+
threw = true;
239+
throw e;
240+
} finally {
241+
close(outputStream, threw);
242+
}
243+
}
244+
245+
private static void close(Closeable inputStream, boolean threw) throws IOException {
246+
if (inputStream != null)
247+
try {
248+
inputStream.close();
249+
} catch (IOException e) {
250+
if (!threw) throw e; // else preserve original exception
251+
}
252+
}
253+
}
12.3 KB
Loading

faq/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Native protocol v1 does not support paging, but you can emulate it in
1111
CQL with `LIMIT` and the `token()` function. See
1212
[this conversation](https://groups.google.com/a/lists.datastax.com/d/msg/java-driver-user/U2KzAHruWO4/6vDmUVDDkOwJ) on the mailing list.
1313

14+
1415
### Can I check if a conditional statement (lightweight transaction) was successful?
1516

1617
When executing a conditional statement, the `ResultSet` will contain a single `Row` with a
@@ -36,6 +37,7 @@ Note that, unlike manual inspection, `wasApplied` does not consume the first row
3637

3738
[wasApplied]: http://docs.datastax.com/en/drivers/java/2.1/com/datastax/driver/core/ResultSet.html#wasApplied--
3839

40+
3941
### What is a parameterized statement and how can I use it?
4042

4143
Starting with Cassandra 2.0, normal statements (that is non-prepared statements) do
@@ -49,12 +51,14 @@ session.execute( "INSERT INTO contacts (email, firstname, lastname)
4951

5052
See [Simple statements](../manual/statements/simple/) for more information.
5153

54+
5255
### Does a parameterized statement escape parameters?
5356

5457
A parameterized statement sends the values of parameters separate from the query
5558
(similar to the way a prepared statement does) as bytes so there is no need to escape
5659
parameters.
5760

61+
5862
### What's the difference between a parameterized statement and a Prepared statement?
5963

6064
The only similarity between a parameterized statement and a prepared statement is in
@@ -67,6 +71,7 @@ the way that the parameters are sent. The difference is that a prepared statemen
6771

6872
See [Prepared statements](../manual/statements/prepared/) for more information.
6973

74+
7075
### Can I combine `PreparedStatements` and normal statements in a batch?
7176

7277
Yes. A batch can include both bound statements and simple statements:
@@ -82,6 +87,7 @@ batch.add(new SimpleStatement( "INSERT INTO contacts (email, firstname, lastname
8287
session.execute(batch);
8388
```
8489

90+
8591
### Can I get the raw bytes of a text column?
8692

8793
If you need to access the raw bytes of a text column, call the
@@ -90,6 +96,7 @@ If you need to access the raw bytes of a text column, call the
9096
Trying to use `Row.getBytes("columnName")` for the same purpose results in an
9197
exception, as the `getBytes` method can only be used if the column has the CQL type `BLOB`.
9298

99+
93100
### How do I increment counters with `QueryBuilder`?
94101

95102
Considering the following query:
@@ -106,6 +113,7 @@ Statement query = QueryBuilder.update("clickstream")
106113
.where(eq("userid", id));
107114
```
108115

116+
109117
### Is there a way to control the batch size of the results returned from a query?
110118

111119
Use the `setFetchSize()` method on your `Statement` object. The fetch size controls
@@ -118,6 +126,7 @@ only affects what is retrieved at a time, not the overall number of rows.
118126

119127
See [Paging](../manual/paging/) for more information.
120128

129+
121130
### What's the difference between using `setFetchSize()` and `LIMIT`?
122131

123132
Basically, `LIMIT` controls the maximum number of results returned by the query,
@@ -128,3 +137,13 @@ For example, if you limit is 30 and your fetch size is 10, the `ResultSet` will
128137
rows each.
129138

130139
See [Paging](../manual/paging/) for more information.
140+
141+
142+
### I'm reading a BLOB column and the driver returns incorrect data.
143+
144+
Check your code to ensure that you read the returned `ByteBuffer` correctly. `ByteBuffer` is a very error-prone API,
145+
and we've had many reports where the problem turned out to be in user code.
146+
147+
See [Blobs.java] in the `driver-examples` module for some examples and explanations.
148+
149+
[Blobs.java]: https://github.com/datastax/java-driver/tree/3.0.x/driver-examples/src/main/java/com/datastax/driver/examples/datatypes/Blobs.java

0 commit comments

Comments
 (0)