/* * Copyright 2017 MapD Technologies, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @file DataGen.cpp * @brief Sample MapD Client code for generating random data that can be * inserted into a given MapD table. * * Usage: [] [hostname[:port]] * The program executes the following: * 1. connect to mapd_server at hostname:port (default: localhost:9091) * with * 2. get the table descriptor of
* 3. randomly generate tab-delimited data that can be imported to
* 4. disconnect from mapd_server * * Copyright (c) 2014 MapD Technologies, Inc. All rights reserved. **/ #include #include #include #include #include #include #include #include // include files for Thrift and MapD Thrift Services #include "gen-cpp/MapD.h" #include #include #include using namespace ::apache::thrift; using namespace ::apache::thrift::protocol; using namespace ::apache::thrift::transport; // Thrift uses boost::shared_ptr instead of std::shared_ptr using boost::shared_ptr; namespace { // anonymous namespace for private functions std::default_random_engine random_gen(std::random_device{}()); // returns a random int as string std::string gen_int() { std::uniform_int_distribution dist(INT_MIN, INT_MAX); return std::to_string(dist(random_gen)); } // returns a random float as string std::string gen_real() { std::uniform_real_distribution dist(0.0, 1.0); return std::to_string(dist(random_gen)); } const int max_str_len = 100; // returns a random string of length up to max_str_len std::string gen_string() { std::string chars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890"); std::uniform_int_distribution<> char_dist(0, chars.size() - 1); std::uniform_int_distribution<> len_dist(0, max_str_len); int len = len_dist(random_gen); std::string s(len, ' '); for (int i = 0; i < len; i++) s[i] = chars[char_dist(random_gen)]; return s; } // returns a random boolean as string std::string gen_bool() { std::uniform_int_distribution dist(0, 1); if (dist(random_gen) == 1) return "t"; return "f"; } // returns a random time as string std::string gen_time() { std::uniform_int_distribution dist(0, INT32_MAX); time_t t = dist(random_gen); std::tm* tm_ptr = gmtime(&t); char buf[9]; strftime(buf, 9, "%T", tm_ptr); return buf; } // returns a random timestamp as string std::string gen_timestamp() { std::uniform_int_distribution dist(0, INT32_MAX); time_t t = dist(random_gen); std::tm* tm_ptr = gmtime(&t); char buf[20]; strftime(buf, 20, "%F %T", tm_ptr); return buf; } // returns a random date as string std::string gen_date() { std::uniform_int_distribution dist(0, INT32_MAX); time_t t = dist(random_gen); std::tm* tm_ptr = gmtime(&t); char buf[11]; strftime(buf, 11, "%F", tm_ptr); return buf; } // output to std::cout num_rows number of rows conforming to row_desc. // each column value is separated by delimiter. void data_gen(const TRowDescriptor& row_desc, const char* delimiter, int num_rows) { for (int i = 0; i < num_rows; i++) { bool not_first = false; for (auto p = row_desc.begin(); p != row_desc.end(); ++p) { if (not_first) std::cout << delimiter; else not_first = true; switch (p->col_type.type) { case TDatumType::SMALLINT: case TDatumType::INT: case TDatumType::BIGINT: std::cout << gen_int(); break; case TDatumType::FLOAT: case TDatumType::DOUBLE: case TDatumType::DECIMAL: std::cout << gen_real(); break; case TDatumType::STR: std::cout << gen_string(); break; case TDatumType::TIME: std::cout << gen_time(); break; case TDatumType::TIMESTAMP: case TDatumType::INTERVAL_DAY_TIME: case TDatumType::INTERVAL_YEAR_MONTH: std::cout << gen_timestamp(); break; case TDatumType::DATE: std::cout << gen_date(); break; case TDatumType::BOOL: std::cout << gen_bool(); break; } } std::cout << std::endl; } } } int main(int argc, char** argv) { std::string server_host("localhost"); // default to localhost int port = 9091; // default port number int num_rows = 1000000; // default number of rows to generate const char* delimiter = "\t"; // only support tab delimiter for now if (argc < 5) { std::cout << "Usage:
[] [hostname[:port]]" << std::endl; return 1; } std::string table_name(argv[1]); std::string db_name(argv[2]); std::string user_name(argv[3]); std::string passwd(argv[4]); if (argc >= 6) { num_rows = atoi(argv[5]); if (argc >= 7) { char* host = strtok(argv[6], ":"); char* portno = strtok(NULL, ":"); server_host = host; if (portno != NULL) port = atoi(portno); } } shared_ptr socket(new TSocket(server_host, port)); shared_ptr transport(new TBufferedTransport(socket)); shared_ptr protocol(new TBinaryProtocol(transport)); MapDClient client(protocol); TSessionId session; try { transport->open(); // open transport client.connect(session, user_name, passwd, db_name); // connect to mapd_server TTableDetails table_details; client.get_table_details(table_details, session, table_name); data_gen(table_details.row_desc, delimiter, num_rows); client.disconnect(session); // disconnect from mapd_server transport->close(); // close transport } catch (TMapDException& e) { std::cerr << e.error_msg << std::endl; return 1; } catch (TException& te) { std::cerr << "Thrift error: " << te.what() << std::endl; return 1; } return 0; }