forked from heavyai/heavydb
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStreamInsertSimple.cpp
More file actions
133 lines (121 loc) · 4.36 KB
/
StreamInsertSimple.cpp
File metadata and controls
133 lines (121 loc) · 4.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*
* Copyright 2017 MapD Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file StreamInsert.cpp
* @author Wei Hong <wei@mapd.com>
* @brief Sample MapD Client code for inserting a stream of rows from stdin
* to a MapD table.
*
* Copyright (c) 2014 MapD Technologies, Inc. All rights reserved.
**/
#include <cstring>
#include <string>
#include <iostream>
#include <boost/tokenizer.hpp>
// include files for Thrift and MapD Thrift Services
#include "gen-cpp/MapD.h"
#include <thrift/transport/TSocket.h>
#include <thrift/protocol/TBinaryProtocol.h>
#include <thrift/transport/TBufferTransports.h>
using namespace ::apache::thrift;
using namespace ::apache::thrift::protocol;
using namespace ::apache::thrift::transport;
// Thrift uses boost::shared_ptr instead of std::shared_ptr
using boost::shared_ptr;
namespace {
// anonymous namespace for private functions
const size_t INSERT_BATCH_SIZE = 10000;
// reads tab-delimited rows from std::cin and load them to
// table_name in batches of size INSERT_BATCH_SIZE until done
void stream_insert(MapDClient& client,
const TSessionId session,
const std::string& table_name,
const TRowDescriptor& row_desc,
const char* delimiter) {
std::string line;
std::vector<TStringRow> input_rows;
TStringRow row;
boost::char_separator<char> sep{delimiter, "", boost::keep_empty_tokens};
while (std::getline(std::cin, line)) {
row.cols.clear();
boost::tokenizer<boost::char_separator<char>> tok{line, sep};
for (const auto& s : tok) {
TStringValue ts;
ts.str_val = s;
ts.is_null = s.empty();
row.cols.push_back(ts);
}
if (row.cols.size() != row_desc.size()) {
std::cerr << "Incorrect number of columns: (" << row.cols.size() << " vs " << row_desc.size() << ") " << line
<< std::endl;
continue;
}
input_rows.push_back(row);
if (input_rows.size() >= INSERT_BATCH_SIZE) {
try {
client.load_table(session, table_name, input_rows);
} catch (TMapDException& e) {
std::cerr << e.error_msg << std::endl;
}
input_rows.clear();
}
}
// load remaining rowset if any
if (input_rows.size() > 0)
client.load_table(session, table_name, input_rows);
}
}
int main(int argc, char** argv) {
std::string server_host("localhost"); // default to localohost
int port = 9091; // default port number
const char* delimiter = "\t"; // only support tab delimiter for now
if (argc < 5) {
std::cout << "Usage: <table> <database> <user> <password> [hostname[:port]]" << std::endl;
return 1;
}
std::string table_name(argv[1]);
std::string db_name(argv[2]);
std::string user_name(argv[3]);
std::string passwd(argv[4]);
if (argc >= 6) {
char* host = strtok(argv[5], ":");
char* portno = strtok(NULL, ":");
server_host = host;
if (portno != NULL)
port = atoi(portno);
}
shared_ptr<TTransport> socket(new TSocket(server_host, port));
shared_ptr<TTransport> transport(new TBufferedTransport(socket));
shared_ptr<TProtocol> protocol(new TBinaryProtocol(transport));
MapDClient client(protocol);
TSessionId session;
try {
transport->open(); // open transport
client.connect(session, user_name, passwd, db_name); // connect to mapd_server
TTableDetails table_details;
client.get_table_details(table_details, session, table_name);
stream_insert(client, session, table_name, table_details.row_desc, delimiter);
client.disconnect(session); // disconnect from mapd_server
transport->close(); // close transport
} catch (TMapDException& e) {
std::cerr << e.error_msg << std::endl;
return 1;
} catch (TException& te) {
std::cerr << "Thrift error: " << te.what() << std::endl;
return 1;
}
return 0;
}