From 59033bdefcf9941fd26b682528a110784a5e821f Mon Sep 17 00:00:00 2001 From: cclauss Date: Fri, 16 Nov 2018 07:09:05 +0100 Subject: [PATCH 001/111] Travis CI: Drop Python 2.6 and add Python 3.6 Python 2.6 end of life was more that five years ago. --- .travis.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 60d000eb..77becd8d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,10 @@ language: python python: - - "2.6" - "2.7" + - "3.6" +matrix: + allow_failures: + - python: "3.6" +install: pip install flake8 +before_script: flake8 ./bin/q --count --select=E901,E999,F821,F822,F823 --show-source --statistics script: test/test-all From ebba4f8bb34a6f550d51a2960b96b35fe9cafada Mon Sep 17 00:00:00 2001 From: cmpt376Kor <45115872+cmpt376Kor@users.noreply.github.com> Date: Sat, 17 Nov 2018 18:24:32 -0800 Subject: [PATCH 002/111] Fixes some confusion in README.markdown --- README.markdown | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.markdown b/README.markdown index 8106c9f2..85863af7 100644 --- a/README.markdown +++ b/README.markdown @@ -3,9 +3,9 @@ # q - Text as Data q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files). -q treats ordinary files as database tables, and supports all SQL constructs, such as WHERE, GROUP BY, JOINs etc. It supports automatic column name and column type detection, and provides full support for multiple encodings. +q treats ordinary files as database tables, and supports all SQL constructs, such as WHERE, GROUP BY, JOINs etc. It supports automatic column name and type detection, and q provides full support for multiple character encodings. -q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/). It contains everything you need to download and use q in no time. +q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/). It contains everything you need to download and use q immediately. ## Installation. Extremely simple. From 167abe37838274add48c9fe1b3cbad6a9d812bcf Mon Sep 17 00:00:00 2001 From: cclauss Date: Sat, 1 Dec 2018 00:59:40 +0100 Subject: [PATCH 003/111] Old style exceptions --> new style for Python 3 --- bin/q | 62 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/bin/q b/bin/q index 0e3c2dfe..721375dd 100755 --- a/bin/q +++ b/bin/q @@ -136,7 +136,7 @@ class Sqlite3DB(object): def store_db_to_disk_fast(self,sqlite_db_filename,table_names_mapping): try: import sqlitebck - except ImportError, e: + except ImportError as e: msg = "sqlitebck python module cannot be found - fast store to disk cannot be performed. Note that for now, sqlitebck is not packaged as part of q. In order to use the fast method, you need to manually `pip install sqlitebck` into your python environment. We obviously consider this as a bug and it will be fixed once proper packaging will be done, making the fast method the standard one." raise MissingSqliteBckModuleException(msg) @@ -701,12 +701,12 @@ def encoded_csv_reader(encoding, f, dialect, **kwargs): else: for row in csv_reader: yield row - except ValueError,e: + except ValueError as e: if e.message is not None and e.message.startswith('could not convert string to'): raise CouldNotConvertStringToNumericValueException(e.message) else: raise CouldNotParseInputException(str(e)) - except Exception,e: + except Exception as e: if str(e).startswith("field larger than field limit"): raise ColumnMaxLengthLimitExceededException(str(e)) elif 'universal-newline' in str(e): @@ -745,17 +745,17 @@ class MaterializedFileState(object): BOM = self.f.read(3) if BOM != '\xef\xbb\xbf': raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM)) - except Exception,e: + except Exception as e: raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e)) csv_reader = encoded_csv_reader(self.encoding, self.f, dialect=self.dialect) try: for col_vals in csv_reader: self.lines_read += 1 yield col_vals - except ColumnMaxLengthLimitExceededException,e: + except ColumnMaxLengthLimitExceededException as e: msg = "Column length is larger than the maximum. Offending file is '%s' - Line is %s, counting from 1 (encoding %s). The line number is the raw line number of the file, ignoring whether there's a header or not" % (self.filename,self.lines_read + 1,self.encoding) raise ColumnMaxLengthLimitExceededException(msg) - except UniversalNewlinesExistException,e2: + except UniversalNewlinesExistException as e2: # No need to translate the exception, but we want it to be explicitly defined here for clarity raise UniversalNewlinesExistException() @@ -876,11 +876,11 @@ class TableCreator(object): raise MissingHeaderException("Header line is expected but missing in file %s" % filename) total_data_lines_read += mfs.lines_read - (1 if self.skip_header else 0) - except StrictModeColumnCountMismatchException,e: + except StrictModeColumnCountMismatchException as e: raise ColumnCountMismatchException( 'Strict mode - Expected %s columns instead of %s columns in file %s row %s. Either use relaxed/fluffy modes or check your delimiter' % ( e.expected_col_count, e.actual_col_count, normalized_filename(mfs.filename), mfs.lines_read)) - except FluffyModeColumnCountMismatchException,e: + except FluffyModeColumnCountMismatchException as e: raise ColumnCountMismatchException( 'Deprecated fluffy mode - Too many columns in file %s row %s (%s fields instead of %s fields). Consider moving to either relaxed or strict mode' % ( normalized_filename(mfs.filename), mfs.lines_read, e.actual_col_count, e.expected_col_count)) @@ -1346,40 +1346,40 @@ class QTextAsData(object): warnings = warnings, error = error) - except EmptyDataException,e: + except EmptyDataException as e: warnings.append(QWarning(e,"Warning - data is empty")) - except MissingHeaderException,e: + except MissingHeaderException as e: error = QError(e,e.msg,117) - except FileNotFoundException, e: + except FileNotFoundException as e: error = QError(e,e.msg,30) - except sqlite3.OperationalError, e: + except sqlite3.OperationalError as e: msg = str(e) error = QError(e,"query error: %s" % msg,1) if "no such column" in msg and effective_input_params.skip_header: warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names')) - except ColumnCountMismatchException, e: + except ColumnCountMismatchException as e: error = QError(e,e.msg,2) - except (UnicodeDecodeError, UnicodeError), e: + except (UnicodeDecodeError, UnicodeError) as e: error = QError(e,"Cannot decode data. Try to change the encoding by setting it using the -e parameter. Error:%s" % e,3) - except BadHeaderException, e: + except BadHeaderException as e: error = QError(e,"Bad header row: %s" % e.msg,35) - except CannotUnzipStdInException,e: + except CannotUnzipStdInException as e: error = QError(e,"Cannot decompress standard input. Pipe the input through zcat in order to decompress.",36) - except UniversalNewlinesExistException,e: + except UniversalNewlinesExistException as e: error = QError(e,"Data contains universal newlines. Run q with -U to use universal newlines. Please note that q still doesn't support universal newlines for .gz files or for stdin. Route the data through a regular file to use -U.",103) - except UnprovidedStdInException,e: + except UnprovidedStdInException as e: error = QError(e,"Standard Input must be provided in order to use it as a table",61) - except CouldNotConvertStringToNumericValueException,e: + except CouldNotConvertStringToNumericValueException as e: error = QError(e,"Could not convert string to a numeric value. Did you use `-w nonnumeric` with unquoted string values? Error: %s" % e.msg,58) - except CouldNotParseInputException,e: + except CouldNotParseInputException as e: error = QError(e,"Could not parse the input. Please make sure to set the proper -w input-wrapping parameter for your input, and that you use the proper input encoding (-e). Error: %s" % e.msg,59) - except ColumnMaxLengthLimitExceededException,e: + except ColumnMaxLengthLimitExceededException as e: error = QError(e,e.msg,31) - except MissingSqliteBckModuleException, e: + except MissingSqliteBckModuleException as e: error = QError(e,e.msg,79) - except KeyboardInterrupt,e: + except KeyboardInterrupt as e: warnings.append(QWarning(e,"Interrupted")) - except Exception, e: + except Exception as e: error = QError(e,repr(e),199) return QOutput(warnings = warnings,error = error , metadata=QMetadata(table_structures=table_structures,data_loads = data_loads)) @@ -1505,10 +1505,10 @@ class QOutputPrinter(object): def print_output(self,f_out,f_err,results): try: self._print_output(f_out,f_err,results) - except (UnicodeEncodeError, UnicodeError), e: + except (UnicodeEncodeError, UnicodeError) as e: print >>f_err, "Cannot encode data. Error:%s" % e sys.exit(3) - except IOError, e: + except IOError as e: if e.errno == 32: # broken pipe, that's ok pass @@ -1561,13 +1561,13 @@ class QOutputPrinter(object): row_str.append(fmt_str % "") f_out.write(self.output_params.delimiter.join(row_str) + "\n") - except (UnicodeEncodeError, UnicodeError), e: + except (UnicodeEncodeError, UnicodeError) as e: print >>sys.stderr, "Cannot encode data. Error:%s" % e sys.exit(3) - except TypeError,e: + except TypeError as e: print >>sys.stderr, "Error while formatting output: %s" % e sys.exit(4) - except IOError, e: + except IOError as e: if e.errno == 32: # broken pipe, that's ok pass @@ -1580,7 +1580,7 @@ class QOutputPrinter(object): try: # Prevent python bug when order of pipe shutdowns is reversed f_out.flush() - except IOError, e: + except IOError as e: pass def run_standalone(): @@ -1744,7 +1744,7 @@ def run_standalone(): print >>sys.stderr,"Query cannot be empty (query number %s)" % (idx+1) sys.exit(1) - except Exception,e: + except Exception as e: print >>sys.stderr,"Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding) sys.exit(3) From 881837b3d9f60f56b39f82edfb3faa1752a071f7 Mon Sep 17 00:00:00 2001 From: cclauss Date: Sat, 1 Dec 2018 22:39:19 +0100 Subject: [PATCH 004/111] Use print() function in both Python 2 and Python 3 --- bin/q | 73 ++++++++++++++++++++++++++++++----------------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/bin/q b/bin/q index 721375dd..4a555c91 100755 --- a/bin/q +++ b/bin/q @@ -27,6 +27,7 @@ # # Run with --help for command line details # +from __future__ import print_function q_version = "1.8" __all__ = [ 'QTextAsData' ] @@ -166,7 +167,7 @@ class Sqlite3DB(object): def update_many(self, sql, params): try: if self.show_sql: - print sql, " params: " + str(params) + print(sql, " params: " + str(params)) self.cursor.executemany(sql, params) finally: pass # cursor.close() @@ -174,7 +175,7 @@ class Sqlite3DB(object): def execute_and_fetch(self, q): try: if self.show_sql: - print repr(q) + print(repr(q)) self.cursor.execute(q) if self.cursor.description is not None: # we decode the column names, so they can be encoded to any output format later on @@ -542,7 +543,7 @@ class TableColumnInferer(object): raise Exception('Unknown parsing mode %s' % self.mode) if self.column_count == 1 and self.expected_column_count != 1: - print >>sys.stderr, "Warning: column count is one - did you provide the correct delimiter?" + print("Warning: column count is one - did you provide the correct delimiter?", file=sys.stderr) self.infer_column_types() @@ -677,7 +678,7 @@ class TableColumnInferer(object): if False in comparison and not self.skip_header: number_of_column_types = len(set(self.column_types)) if number_of_column_types == 1 and list(set(self.column_types))[0] == str: - print >>sys.stderr, 'Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data' + print('Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data', file=sys.stderr) def get_column_dict(self): return dict(zip(self.column_names, self.column_types)) @@ -1074,10 +1075,10 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter): return max_lengths def print_credentials(): - print >>sys.stderr,"q version %s" % q_version - print >>sys.stderr,"Copyright (C) 2012-2017 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)" - print >>sys.stderr,"http://harelba.github.io/q/" - print >>sys.stderr + print("q version %s" % q_version, file=sys.stderr) + print("Copyright (C) 2012-2017 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr) + print("http://harelba.github.io/q/", file=sys.stderr) + print(file=sys.stderr) class QWarning(object): def __init__(self,exception,msg): @@ -1325,11 +1326,11 @@ class QTextAsData(object): if save_db_to_disk_filename is not None: self.db.done() dump_start_time = time.time() - print >>sys.stderr,"Data has been loaded in %4.3f seconds" % (dump_start_time - load_start_time) - print >>sys.stderr,"Saving data to db file %s" % save_db_to_disk_filename + print("Data has been loaded in %4.3f seconds" % (dump_start_time - load_start_time), file=sys.stderr) + print("Saving data to db file %s" % save_db_to_disk_filename, file=sys.stderr) self.db.store_db_to_disk(save_db_to_disk_filename,sql_object.get_qtable_name_effective_table_names(),save_db_to_disk_method) - print >>sys.stderr,"Data has been saved into %s . Saving has taken %4.3f seconds" % (save_db_to_disk_filename,time.time()-dump_start_time) - print >>sys.stderr,"Query to run on the database: %s;" % sql_object.get_effective_sql(True) + print("Data has been saved into %s . Saving has taken %4.3f seconds" % (save_db_to_disk_filename,time.time()-dump_start_time), file=sys.stderr) + print("Query to run on the database: %s;" % sql_object.get_effective_sql(True), file=sys.stderr) # TODO Propagate dump results using a different output class instead of an empty one return QOutput() @@ -1481,12 +1482,12 @@ class QOutputPrinter(object): def print_errors_and_warnings(self,f,results): if results.status == 'error': error = results.error - print >>f,error.msg + print(error.msg, file=f) if self.show_tracebacks: - print >>f,error.traceback + print(error.traceback, file=f) for warning in results.warnings: - print >>f,"%s" % warning.msg + print("%s" % warning.msg, file=f) def print_analysis(self,f_out,f_err,results): self.print_errors_and_warnings(f_err,results) @@ -1498,15 +1499,15 @@ class QOutputPrinter(object): return for table_structure in results.metadata.table_structures: - print >>f_out,"Table for file: %s" % normalized_filename(table_structure.filenames_str) + print("Table for file: %s" % normalized_filename(table_structure.filenames_str), file=f_out) for n,t in zip(table_structure.column_names,table_structure.column_types): - print >>f_out," `%s` - %s" % (n,t) + print(" `%s` - %s" % (n,t), file=f_out) def print_output(self,f_out,f_err,results): try: self._print_output(f_out,f_err,results) except (UnicodeEncodeError, UnicodeError) as e: - print >>f_err, "Cannot encode data. Error:%s" % e + print("Cannot encode data. Error:%s" % e, file=f_err) sys.exit(3) except IOError as e: if e.errno == 32: @@ -1562,10 +1563,10 @@ class QOutputPrinter(object): f_out.write(self.output_params.delimiter.join(row_str) + "\n") except (UnicodeEncodeError, UnicodeError) as e: - print >>sys.stderr, "Cannot encode data. Error:%s" % e + print("Cannot encode data. Error:%s" % e, file=sys.stderr) sys.exit(3) except TypeError as e: - print >>sys.stderr, "Error while formatting output: %s" % e + print("Error while formatting output: %s" % e, file=sys.stderr) sys.exit(4) except IOError as e: if e.errno == 32: @@ -1718,19 +1719,19 @@ def run_standalone(): if len(args) == 0 and options.query_filename is None: print_credentials() - print >>sys.stderr,"Must provide at least one query in the command line, or through a file with the -q parameter" + print("Must provide at least one query in the command line, or through a file with the -q parameter", file=sys.stderr) sys.exit(1) if options.query_filename is not None: if len(args) != 0: - print >>sys.stderr,"Can't provide both a query file and a query on the command line" + print("Can't provide both a query file and a query on the command line", file=sys.stderr) sys.exit(1) try: f = file(options.query_filename) query_strs = [f.read()] f.close() except: - print >>sys.stderr,"Could not read query from file %s" % options.query_filename + print("Could not read query from file %s" % options.query_filename, file=sys.stderr) sys.exit(1) else: query_strs = args @@ -1741,22 +1742,22 @@ def run_standalone(): query_strs[idx] = query_strs[idx].decode(options.query_encoding).strip() if len(query_strs[idx]) == 0: - print >>sys.stderr,"Query cannot be empty (query number %s)" % (idx+1) + print("Query cannot be empty (query number %s)" % (idx+1), file=sys.stderr) sys.exit(1) except Exception as e: - print >>sys.stderr,"Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding) + print("Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding), file=sys.stderr) sys.exit(3) if options.mode not in ['fluffy', 'relaxed', 'strict']: - print >>sys.stderr, "Parsing mode can be one of fluffy, relaxed or strict" + print("Parsing mode can be one of fluffy, relaxed or strict", file=sys.stderr) sys.exit(13) output_encoding = get_stdout_encoding(options.output_encoding) try: STDOUT = codecs.getwriter(output_encoding)(sys.stdout) except: - print >>sys.stderr,"Could not create output stream using output encoding %s" % (output_encoding) + print("Could not create output stream using output encoding %s" % (output_encoding), file=sys.stderr) sys.exit(200) # If the user flagged for a tab-delimited file then set the delimiter to tab @@ -1769,15 +1770,15 @@ def run_standalone(): if options.delimiter is None: options.delimiter = ' ' elif len(options.delimiter) != 1: - print >>sys.stderr, "Delimiter must be one character only" + print("Delimiter must be one character only", file=sys.stderr) sys.exit(5) if options.input_quoting_mode not in QTextAsData.input_quoting_modes.keys(): - print >>sys.stderr,"Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QTextAsData.input_quoting_modes.keys()),options.input_quoting_mode) + print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QTextAsData.input_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr) sys.exit(55) if options.output_quoting_mode not in QOutputPrinter.output_quoting_modes.keys(): - print >>sys.stderr,"Output quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QOutputPrinter.output_quoting_modes.keys()),options.input_quoting_mode) + print("Output quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QOutputPrinter.output_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr) sys.exit(56) if options.column_count is not None: @@ -1790,7 +1791,7 @@ def run_standalone(): try: codecs.lookup(options.encoding) except LookupError: - print >>sys.stderr, "Encoding %s could not be found" % options.encoding + print("Encoding %s could not be found" % options.encoding, file=sys.stderr) sys.exit(10) if options.output_delimiter: @@ -1812,22 +1813,22 @@ def run_standalone(): if max_column_length_limit < 1: raise Exception() except: - print >> sys.stderr, "Max column length limit must be a positive integer (%s)" % max_column_length_limit + print("Max column length limit must be a positive integer (%s)" % max_column_length_limit, file=sys.stderr) sys.exit(31) if options.save_db_to_disk_filename is not None: if options.analyze_only: - print >>sys.stderr,"Cannot save database to disk when running with -A (analyze-only) option." + print("Cannot save database to disk when running with -A (analyze-only) option.", file=sys.stderr) sys.exit(119) - print >>sys.stderr,"Going to save data into a disk database: %s" % options.save_db_to_disk_filename + print("Going to save data into a disk database: %s" % options.save_db_to_disk_filename, file=sys.stderr) if os.path.exists(options.save_db_to_disk_filename): - print >> sys.stderr, "Disk database file %s already exists." % options.save_db_to_disk_filename + print("Disk database file %s already exists." % options.save_db_to_disk_filename, file=sys.stderr) sys.exit(77) if options.save_db_to_disk_method is not None: if options.save_db_to_disk_method not in ['standard','fast']: - print >>sys.stderr,"save-db-to-disk method should be either standard or fast (%s)" % options.save_db_to_disk_method + print("save-db-to-disk method should be either standard or fast (%s)" % options.save_db_to_disk_method, file=sys.stderr) sys.exit(78) default_input_params = QInputParams(skip_header=options.skip_header, From ccccf2d50177708aa4bd649fea9d57d30a4322a7 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 7 Dec 2018 17:12:03 +0200 Subject: [PATCH 005/111] wip --- bin/q | 105 +++++++++++++++++++++++++++++++++--------------- test/test-all | 2 +- test/test-suite | 36 +++++++++++++++-- 3 files changed, 106 insertions(+), 37 deletions(-) diff --git a/bin/q b/bin/q index 4a555c91..0d6cf8db 100755 --- a/bin/q +++ b/bin/q @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python # Copyright (C) 2012-2018 Harel Ben-Attia # @@ -43,13 +43,17 @@ import codecs import locale import time import re -from ConfigParser import ConfigParser +from six.moves import configparser, range, filter import traceback import csv import hashlib import uuid -import cStringIO import math +import six + +if six.PY3: + long = int + unicode = six.text_type DEBUG = False @@ -179,7 +183,7 @@ class Sqlite3DB(object): self.cursor.execute(q) if self.cursor.description is not None: # we decode the column names, so they can be encoded to any output format later on - query_column_names = [c[0].decode('utf-8') for c in self.cursor.description] + query_column_names = [c[0] for c in self.cursor.description] else: query_column_names = None result = self.cursor.fetchall() @@ -222,7 +226,7 @@ class Sqlite3DB(object): def generate_create_table(self, table_name, column_names, column_dict): # Convert dict from python types to db types column_name_to_db_type = dict( - (n, self.type_names[t]) for n, t in column_dict.iteritems()) + (n, self.type_names[t]) for n, t in six.iteritems(column_dict)) column_defs = ','.join(['"%s" %s' % ( n.replace('"', '""'), column_name_to_db_type[n]) for n in column_names]) return 'CREATE TABLE %s (%s)' % (table_name, column_defs) @@ -418,12 +422,12 @@ class Sql(object): qtable_name] = effective_table_name def get_effective_sql(self,original_names=False): - if len(filter(lambda x: x is None, self.qtable_name_effective_table_names)) != 0: + if len(list(filter(lambda x: x is None, self.qtable_name_effective_table_names))) != 0: raise Exception('There are qtables without effective tables') effective_sql = [x for x in self.sql_parts] - for qtable_name, positions in self.qtable_name_positions.iteritems(): + for qtable_name, positions in six.iteritems(self.qtable_name_positions): for pos in positions: if not original_names: effective_sql[pos] = self.qtable_name_effective_table_names[ @@ -525,8 +529,8 @@ class TableColumnInferer(object): return type_list[0] else: # check for the number of types without nulls, - type_list_without_nulls = filter( - lambda x: x is not None, type_list) + type_list_without_nulls = list(filter( + lambda x: x is not None, type_list)) # If all the sample lines are of the same type, if len(set(type_list_without_nulls)) == 1: # return it @@ -579,7 +583,7 @@ class TableColumnInferer(object): (v, "Column name must be UTF-8 Compatible")) continue # We're checking for column duplication for each field in order to be able to still provide it along with other errors - if len(filter(lambda x: x == v,value_list)) > 1: + if len(list(filter(lambda x: x == v,value_list))) > 1: entry = (v, "Column name is duplicated") # Don't duplicate the error report itself if entry not in column_name_errors: @@ -611,7 +615,7 @@ class TableColumnInferer(object): # in relaxed mode, add columns to fill the missing ones self.header_row = self.header_row + \ ['c%s' % (x + len(self.header_row) + 1) - for x in xrange(self.column_count - len(self.header_row))] + for x in range(self.column_count - len(self.header_row))] elif len(self.header_row) > self.column_count: if self.mode == 'strict': raise ColumnCountMismatchException("Strict mode. Header row contains more columns than expected column count (%s vs %s)" % ( @@ -661,7 +665,7 @@ class TableColumnInferer(object): def infer_column_types(self): self.column_types = [] self.column_types2 = [] - for column_number in xrange(self.column_count): + for column_number in range(self.column_count): column_value_list = [ row[column_number] if column_number < len(row) else None for row in self.rows] column_type = self.determine_type_of_value_list(column_value_list) @@ -693,7 +697,30 @@ class TableColumnInferer(object): return self.column_types -def encoded_csv_reader(encoding, f, dialect, **kwargs): +def py3_encoded_csv_reader(encoding, f, dialect, is_stdin,**kwargs): + try: + if not is_stdin: + csv_reader = csv.reader(codecs.iterdecode(f,encoding), dialect,**kwargs) + else: + csv_reader = csv.reader(f, dialect, **kwargs) + + for row in csv_reader: + yield row + except ValueError as e: + if e.message is not None and e.message.startswith('could not convert string to'): + raise CouldNotConvertStringToNumericValueException(e.message) + else: + raise CouldNotParseInputException(str(e)) + except Exception as e: + if str(e).startswith("field larger than field limit"): + raise ColumnMaxLengthLimitExceededException(str(e)) + elif 'universal-newline' in str(e): + raise UniversalNewlinesExistException() + else: + raise + + +def py2_encoded_csv_reader(encoding, f, dialect, is_stdin, **kwargs): try: csv_reader = csv.reader(f, dialect, **kwargs) if encoding is not None and encoding != 'none': @@ -715,6 +742,12 @@ def encoded_csv_reader(encoding, f, dialect, **kwargs): else: raise +if six.PY2: + encoded_csv_reader = py2_encoded_csv_reader +else: + encoded_csv_reader = py3_encoded_csv_reader + + def normalized_filename(filename): if filename == '-': return 'stdin' @@ -748,7 +781,7 @@ class MaterializedFileState(object): raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM)) except Exception as e: raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e)) - csv_reader = encoded_csv_reader(self.encoding, self.f, dialect=self.dialect) + csv_reader = encoded_csv_reader(self.encoding, self.f, is_stdin=self.is_stdin,dialect=self.dialect) try: for col_vals in csv_reader: self.lines_read += 1 @@ -838,13 +871,13 @@ class TableCreator(object): raise CannotUnzipStdInException() else: if self.gzipped or filename.endswith('.gz'): - f = gzip.GzipFile(fileobj=file(filename,'rb')) + f = gzip.GzipFile(fileobj=open(filename,'rb')) else: if self.with_universal_newlines: file_opening_mode = 'rbU' else: file_opening_mode = 'rb' - f = file(filename,file_opening_mode) + f = open(filename,file_opening_mode) return f def _pre_populate(self,dialect): @@ -975,7 +1008,7 @@ class TableCreator(object): if actual_col_count < expected_col_count: col_vals = col_vals + \ - [None for x in xrange(expected_col_count - actual_col_count)] + [None for x in range(expected_col_count - actual_col_count)] # in relaxed mode, we merge all extra columns to the last column value if self.mode == 'relaxed': @@ -1066,9 +1099,9 @@ class TableCreator(object): def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter): if len(m) == 0: return [] - max_lengths = [0 for x in xrange(0, len(m[0]))] - for row_index in xrange(0, len(m)): - for col_index in xrange(0, len(m[0])): + max_lengths = [0 for x in range(0, len(m[0]))] + for row_index in range(0, len(m)): + for col_index in range(0, len(m[0])): new_len = len(unicode(output_field_quoting_func(output_delimiter,m[row_index][col_index]))) if new_len > max_lengths[col_index]: max_lengths[col_index] = new_len @@ -1272,7 +1305,7 @@ class QTextAsData(object): self._load_data(filename,input_params,stop_after_analysis=stop_after_analysis) def load_data_from_string(self,filename,str_data,input_params=QInputParams(),stop_after_analysis=False): - sf = cStringIO.StringIO(str_data) + sf = six.StringIO(str_data) try: self._load_data(filename,input_params,stdin_file=sf,stdin_filename=filename,stop_after_analysis=stop_after_analysis) finally: @@ -1390,7 +1423,7 @@ class QTextAsData(object): def unload(self): - for filename,table_creator in self.table_creators.iteritems(): + for filename,table_creator in six.iteritems(self.table_creators): try: table_creator.drop_table() except: @@ -1401,13 +1434,13 @@ class QTextAsData(object): def _create_materialized_files(self,table_creator): d = table_creator.materialized_file_dict m = {} - for filename,mfs in d.iteritems(): + for filename,mfs in six.iteritems(d): m[filename] = QMaterializedFile(filename,mfs.is_stdin) return m def _create_table_structures_list(self): table_structures = [] - for filename,table_creator in self.table_creators.iteritems(): + for filename,table_creator in six.iteritems(self.table_creators): column_names = table_creator.column_inferer.get_column_names() column_types = [self.db.type_names[table_creator.column_inferer.get_column_dict()[k]].lower() for k in column_names] materialized_files = self._create_materialized_files(table_creator) @@ -1431,7 +1464,7 @@ def quote_minimal_func(output_delimiter,v): if v is None: return v t = type(v) - if t == str or t == unicode and ((output_delimiter in v) or ('"' in v)): + if (t == str or t == unicode) and ((output_delimiter in v) or ('"' in v)): return '"%s"' % (escape_double_quotes_if_needed(v)) return v; @@ -1561,11 +1594,13 @@ class QOutputPrinter(object): else: row_str.append(fmt_str % "") - f_out.write(self.output_params.delimiter.join(row_str) + "\n") + xxxx = self.output_params.delimiter.join(row_str) + "\n" + f_out.write(xxxx) except (UnicodeEncodeError, UnicodeError) as e: print("Cannot encode data. Error:%s" % e, file=sys.stderr) sys.exit(3) except TypeError as e: + print(traceback.format_exc()) print("Error while formatting output: %s" % e, file=sys.stderr) sys.exit(4) except IOError as e: @@ -1585,7 +1620,7 @@ class QOutputPrinter(object): pass def run_standalone(): - p = ConfigParser() + p = configparser.ConfigParser() p.read([os.path.expanduser('~/.qrc'), '.qrc']) def get_option_with_default(p, option_type, option, default): @@ -1727,7 +1762,7 @@ def run_standalone(): print("Can't provide both a query file and a query on the command line", file=sys.stderr) sys.exit(1) try: - f = file(options.query_filename) + f = open(options.query_filename) query_strs = [f.read()] f.close() except: @@ -1736,16 +1771,19 @@ def run_standalone(): else: query_strs = args + SYSTEM_ENCODING = locale.getpreferredencoding() + if options.query_encoding is not None and options.query_encoding != 'none': try: for idx in range(len(query_strs)): - query_strs[idx] = query_strs[idx].decode(options.query_encoding).strip() + query_strs[idx] = query_strs[idx].encode(SYSTEM_ENCODING).decode(options.query_encoding).strip() if len(query_strs[idx]) == 0: print("Query cannot be empty (query number %s)" % (idx+1), file=sys.stderr) sys.exit(1) except Exception as e: + print(traceback.format_exc()) print("Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding), file=sys.stderr) sys.exit(3) @@ -1755,7 +1793,10 @@ def run_standalone(): output_encoding = get_stdout_encoding(options.output_encoding) try: - STDOUT = codecs.getwriter(output_encoding)(sys.stdout) + if six.PY2: + STDOUT = codecs.getwriter(output_encoding)(sys.stdout) + else: + STDOUT = sys.stdout except: print("Could not create output stream using output encoding %s" % (output_encoding), file=sys.stderr) sys.exit(200) @@ -1773,11 +1814,11 @@ def run_standalone(): print("Delimiter must be one character only", file=sys.stderr) sys.exit(5) - if options.input_quoting_mode not in QTextAsData.input_quoting_modes.keys(): + if options.input_quoting_mode not in list(QTextAsData.input_quoting_modes.keys()): print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QTextAsData.input_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr) sys.exit(55) - if options.output_quoting_mode not in QOutputPrinter.output_quoting_modes.keys(): + if options.output_quoting_mode not in list(QOutputPrinter.output_quoting_modes.keys()): print("Output quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QOutputPrinter.output_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr) sys.exit(56) diff --git a/test/test-all b/test/test-all index 94d1f5ab..d3dcb022 100755 --- a/test/test-all +++ b/test/test-all @@ -9,6 +9,6 @@ trap return_to_original_folder EXIT pushd $(dirname $0)/ -./test-suite +./test-suite "$@" set +e diff --git a/test/test-suite b/test/test-suite index b5c05956..8ebcdf8d 100755 --- a/test/test-suite +++ b/test/test-suite @@ -20,6 +20,7 @@ import time from tempfile import NamedTemporaryFile import locale import pprint +import six sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin')) from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams @@ -28,9 +29,21 @@ from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams # make sure that the output is correctly encoded SYSTEM_ENCODING = locale.getpreferredencoding() + +DEBUG = False +if len(sys.argv) > 2 and sys.argv[2] == '-v': + DEBUG = True + def run_command(cmd_to_run): + global DEBUG + if DEBUG: + print(cmd_to_run) + p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True) o, e = p.communicate() + if six.PY3: + o = o.decode(SYSTEM_ENCODING) + e = e.decode(SYSTEM_ENCODING) # remove last newline o = o.rstrip() e = e.strip() @@ -43,7 +56,8 @@ def run_command(cmd_to_run): e = e.split(os.linesep) else: e = [] - return (p.returncode, o, e) + + return (p.returncode, [x.encode(SYSTEM_ENCODING) for x in o], [x.encode(SYSTEM_ENCODING) for x in e]) uneven_ls_output = """drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux drwxr-xr-x 2 root root 4096 Apr 19 2013 /mnt @@ -132,7 +146,9 @@ class AbstractQTestCase(unittest.TestCase): return tmpfile def cleanup(self, tmpfile): - os.remove(tmpfile.name) + global DEBUG + if not DEBUG: + os.remove(tmpfile.name) def random_tmp_filename(self,prefix,postfix): # TODO Use more robust method for this @@ -738,7 +754,7 @@ class BasicTests(AbstractQTestCase): def test_output_header_with_non_ascii_names(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' Hr\xc3\xa1\xc4\x8d from %s" % tmp_data_file.name,encoding=None) + tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' Hr\xc3\xa1\xc4\x8d from %s" % tmp_data_file.name),encoding=None) cmd = '../bin/q -d , -q %s -H -Q utf-8 -O' % tmp_query_file.name retcode, o, e = run_command(cmd) @@ -2289,6 +2305,18 @@ def suite(): return unittest.TestSuite([basic_module_stuff, basic_stuff, parsing_mode, sql, formatting,save_db_to_disk_tests]) if __name__ == '__main__': + if len(sys.argv) > 1: + suite = unittest.TestSuite() + if '.' in sys.argv[1]: + c,m = sys.argv[1].split(".") + suite.addTest(globals()[c](m)) + else: + tl = unittest.TestLoader() + tc = tl.loadTestsFromTestCase(globals()[sys.argv[1]]) + suite = unittest.TestSuite([tc]) + else: + suite = suite() + test_runner = unittest.TextTestRunner(verbosity=2) - result = test_runner.run(suite()) + result = test_runner.run(suite) sys.exit(not result.wasSuccessful()) From 2992f416650457df69c174054ba11b0a7e68e18b Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 7 Dec 2018 17:39:34 +0200 Subject: [PATCH 006/111] lots of test six.b() changes - not sure if query_encoding changes are good --- bin/q | 21 +++---------- test/test-suite | 78 ++++++++++++++++++++++++++----------------------- 2 files changed, 45 insertions(+), 54 deletions(-) diff --git a/bin/q b/bin/q index 0d6cf8db..09acb536 100755 --- a/bin/q +++ b/bin/q @@ -1752,6 +1752,8 @@ def run_standalone(): print_credentials() sys.exit(0) + SYSTEM_ENCODING = locale.getpreferredencoding() + if len(args) == 0 and options.query_filename is None: print_credentials() print("Must provide at least one query in the command line, or through a file with the -q parameter", file=sys.stderr) @@ -1762,7 +1764,7 @@ def run_standalone(): print("Can't provide both a query file and a query on the command line", file=sys.stderr) sys.exit(1) try: - f = open(options.query_filename) + f = open(options.query_filename,encoding=options.query_encoding or SYSTEM_ENCODING) query_strs = [f.read()] f.close() except: @@ -1771,21 +1773,6 @@ def run_standalone(): else: query_strs = args - SYSTEM_ENCODING = locale.getpreferredencoding() - - if options.query_encoding is not None and options.query_encoding != 'none': - try: - for idx in range(len(query_strs)): - query_strs[idx] = query_strs[idx].encode(SYSTEM_ENCODING).decode(options.query_encoding).strip() - - if len(query_strs[idx]) == 0: - print("Query cannot be empty (query number %s)" % (idx+1), file=sys.stderr) - sys.exit(1) - - except Exception as e: - print(traceback.format_exc()) - print("Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding), file=sys.stderr) - sys.exit(3) if options.mode not in ['fluffy', 'relaxed', 'strict']: print("Parsing mode can be one of fluffy, relaxed or strict", file=sys.stderr) @@ -1815,7 +1802,7 @@ def run_standalone(): sys.exit(5) if options.input_quoting_mode not in list(QTextAsData.input_quoting_modes.keys()): - print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QTextAsData.input_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr) + print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(sorted(QTextAsData.input_quoting_modes.keys())),options.input_quoting_mode), file=sys.stderr) sys.exit(55) if options.output_quoting_mode not in list(QOutputPrinter.output_quoting_modes.keys()): diff --git a/test/test-suite b/test/test-suite index 8ebcdf8d..35be12d9 100755 --- a/test/test-suite +++ b/test/test-suite @@ -21,6 +21,7 @@ from tempfile import NamedTemporaryFile import locale import pprint import six +from six.moves import range sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin')) from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams @@ -57,7 +58,11 @@ def run_command(cmd_to_run): else: e = [] - return (p.returncode, [x.encode(SYSTEM_ENCODING) for x in o], [x.encode(SYSTEM_ENCODING) for x in e]) + + res = (p.returncode, [x.encode(SYSTEM_ENCODING) for x in o], [x.encode(SYSTEM_ENCODING) for x in e]) + if DEBUG: + print("RESULT:{}".format(res)) + return res uneven_ls_output = """drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux drwxr-xr-x 2 root root 4096 Apr 19 2013 /mnt @@ -82,14 +87,14 @@ find_output = """8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 1 8263604 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514175754.version """ -header_row = 'name,value1,value2' -sample_data_rows = ['a,1,0', 'b,2,0', 'c,,0'] -sample_data_rows_with_empty_string = ['a,aaa,0', 'b,bbb,0', 'c,,0'] -sample_data_no_header = "\n".join(sample_data_rows) + "\n" -sample_data_with_empty_string_no_header = "\n".join( - sample_data_rows_with_empty_string) + "\n" -sample_data_with_header = header_row + "\n" + sample_data_no_header -sample_data_with_missing_header_names = "name,value1\n" + sample_data_no_header +header_row = six.b('name,value1,value2') +sample_data_rows = [six.b('a,1,0'), six.b('b,2,0'), six.b('c,,0')] +sample_data_rows_with_empty_string = [six.b('a,aaa,0'), six.b('b,bbb,0'), six.b('c,,0')] +sample_data_no_header = six.b("\n").join(sample_data_rows) + six.b("\n") +sample_data_with_empty_string_no_header = six.b("\n").join( + sample_data_rows_with_empty_string) + six.b("\n") +sample_data_with_header = header_row + six.b("\n") + sample_data_no_header +sample_data_with_missing_header_names = six.b("name,value1\n") + sample_data_no_header sample_quoted_data = '''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6" @@ -131,17 +136,16 @@ int_value = "2328372328373" sample_data_with_long_values = "%s\n%s\n%s" % (long_value1,int_value,int_value) def one_column_warning(e): - return e[0].startswith('Warning: column count is one') + return e[0].startswith(six.b('Warning: column count is one')) class AbstractQTestCase(unittest.TestCase): - def create_file_with_data(self, data, encoding='utf-8'): + def create_file_with_data(self, data, encoding=None): + if encoding is not None: + raise Exception('Deprecated: Encoding must be none') tmpfile = NamedTemporaryFile(delete=False) - if encoding != 'none' and encoding is not None: - tmpfile.write(data.encode(encoding)) - else: - tmpfile.write(data) + tmpfile.write(data) tmpfile.close() return tmpfile @@ -209,8 +213,8 @@ class BasicTests(AbstractQTestCase): self.assertTrue(len(o) == 1) self.assertTrue(len(e) == 1) - s = sum(xrange(1, 11)) - self.assertTrue(o[0] == '%s %s' % (s, s / 10.0)) + s = sum(range(1, 11)) + self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0))) self.assertTrue(one_column_warning(e)) def test_gzipped_file(self): @@ -224,7 +228,7 @@ class BasicTests(AbstractQTestCase): self.assertTrue(len(o) == 1) self.assertTrue(len(e) == 1) - s = sum(xrange(1, 11)) + s = sum(range(1, 11)) self.assertTrue(o[0] == '%s %s' % (s, s / 10.0)) self.assertTrue(one_column_warning(e)) @@ -380,7 +384,7 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) def test_stdin_input(self): - cmd = 'printf "%s" | ../bin/q -d , "select c1,c2,c3 from -"' % sample_data_no_header + cmd = six.b('printf "%s" | ../bin/q -d , "select c1,c2,c3 from -"') % sample_data_no_header retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) @@ -427,10 +431,10 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) - self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) - self.assertEquals(o[1].strip(), '`c1` - text') - self.assertEquals(o[2].strip(), '`c2` - int') - self.assertEquals(o[3].strip(), '`c3` - int') + self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEquals(o[1].strip(), six.b('`c1` - text')) + self.assertEquals(o[2].strip(), six.b('`c2` - int')) + self.assertEquals(o[3].strip(), six.b('`c3` - int')) def test_header_exception_on_numeric_header_data(self): tmpfile = self.create_file_with_data(sample_data_no_header) @@ -826,7 +830,7 @@ class BasicTests(AbstractQTestCase): def test_select_failed_output_encoding(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None) + tmp_query_file = self.create_file_with_data(six.b("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) cmd = '../bin/q -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name retcode, o, e = run_command(cmd) @@ -973,8 +977,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'quoted data,23') - self.assertEquals(o[1],'unquoted-data,54') + self.assertEquals(o[0],six.b('quoted data,23')) + self.assertEquals(o[1],six.b('unquoted-data,54')) self.cleanup(tmp_data_file) @@ -988,8 +992,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'quoted data,23') - self.assertEquals(o[1],'unquoted-data,54') + self.assertEquals(o[0],six.b('quoted data,23')) + self.assertEquals(o[1],six.b('unquoted-data,54')) self.cleanup(tmp_data_file) @@ -1003,8 +1007,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),1) self.assertEquals(len(o),0) - self.assertTrue(e[0].startswith('Input quoting mode can only be one of all,none,minimal')) - self.assertTrue('unknown_wrapping_mode' in e[0]) + self.assertTrue(e[0].startswith(six.b('Input quoting mode can only be one of all,minimal,none'))) + self.assertTrue(six.b('unknown_wrapping_mode') in e[0]) self.cleanup(tmp_data_file) @@ -1064,8 +1068,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'"quoted data",23') - self.assertEquals(o[1],'"unquoted-data",54') + self.assertEquals(o[0],six.b('"quoted data",23')) + self.assertEquals(o[1],six.b('"unquoted-data",54')) self.cleanup(tmp_data_file) @@ -1079,8 +1083,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'"quoted data","23"') - self.assertEquals(o[1],'"unquoted-data","54"') + self.assertEquals(o[0],six.b('"quoted data","23"')) + self.assertEquals(o[1],six.b('"unquoted-data","54"')) self.cleanup(tmp_data_file) @@ -1322,7 +1326,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o),0) self.assertEquals(len(e),1) - self.assertEquals(e[0],"No files matching 'non-existent-file' have been found") + self.assertEquals(e[0],six.b("No files matching 'non-existent-file' have been found")) def test_default_column_max_length_parameter__short_enough(self): huge_text = "x" * 131000 @@ -1570,7 +1574,7 @@ class ParsingModeTests(AbstractQTestCase): column_names = [x[0] for x in column_tuples] column_types = [x[2] for x in column_tuples] - self.assertEquals(column_names, ['`c%s`' % x for x in xrange(1, 12)]) + self.assertEquals(column_names, ['`c%s`' % x for x in range(1, 12)]) self.assertEquals(column_types, [ 'text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text', 'text', 'text']) @@ -1594,7 +1598,7 @@ class ParsingModeTests(AbstractQTestCase): column_names = [x[0] for x in column_tuples] column_types = [x[2] for x in column_tuples] - self.assertEquals(column_names, ['`c%s`' % x for x in xrange(1, 10)]) + self.assertEquals(column_names, ['`c%s`' % x for x in range(1, 10)]) self.assertEquals( column_types, ['text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text']) From a2507d16b4418f004a1c9efc605bc27a0d09ea90 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Dec 2018 10:46:14 +0200 Subject: [PATCH 007/111] wip --- bin/q | 45 +++++++---- test/test-suite | 194 ++++++++++++++++++++++++------------------------ 2 files changed, 124 insertions(+), 115 deletions(-) diff --git a/bin/q b/bin/q index 09acb536..adc92c9a 100755 --- a/bin/q +++ b/bin/q @@ -183,7 +183,7 @@ class Sqlite3DB(object): self.cursor.execute(q) if self.cursor.description is not None: # we decode the column names, so they can be encoded to any output format later on - query_column_names = [c[0] for c in self.cursor.description] + query_column_names = [c[0].decode('utf-8') for c in self.cursor.description] else: query_column_names = None result = self.cursor.fetchall() @@ -747,7 +747,6 @@ if six.PY2: else: encoded_csv_reader = py3_encoded_csv_reader - def normalized_filename(filename): if filename == '-': return 'stdin' @@ -1454,7 +1453,7 @@ class QTextAsData(object): return q_output def escape_double_quotes_if_needed(v): - x = v.replace('"','""') + x = v.replace(six.u('"'), six.u('""')) return x def quote_none_func(output_delimiter,v): @@ -1464,22 +1463,22 @@ def quote_minimal_func(output_delimiter,v): if v is None: return v t = type(v) - if (t == str or t == unicode) and ((output_delimiter in v) or ('"' in v)): - return '"%s"' % (escape_double_quotes_if_needed(v)) + if (t == str or t == unicode) and ((output_delimiter in v) or (six.u('"') in v)): + return six.u('"{}"').format(escape_double_quotes_if_needed(v)) return v; def quote_nonnumeric_func(output_delimiter,v): if v is None: return v if type(v) == str or type(v) == unicode: - return '"%s"' % (escape_double_quotes_if_needed(v)) + return six.u('"{}"').format(escape_double_quotes_if_needed(v)) return v; def quote_all_func(output_delimiter,v): if type(v) == str or type(v) == unicode: - return '"%s"' % (escape_double_quotes_if_needed(v)) + return six.u('"{}"').format(escape_double_quotes_if_needed(v)) else: - return '"%s"' % v + return six.u('"{}"').format(v) class QOutputParams(object): def __init__(self, @@ -1487,12 +1486,14 @@ class QOutputParams(object): beautify=False, output_quoting_mode='minimal', formatting=None, - output_header=False): + output_header=False, + encoding=None): self.delimiter = delimiter self.beautify = beautify self.output_quoting_mode = output_quoting_mode self.formatting = formatting self.output_header = output_header + self.encoding = encoding def __str__(self): return "QOutputParams<%s>" % str(self.__dict__) @@ -1597,6 +1598,7 @@ class QOutputPrinter(object): xxxx = self.output_params.delimiter.join(row_str) + "\n" f_out.write(xxxx) except (UnicodeEncodeError, UnicodeError) as e: + print(traceback.format_exc()) print("Cannot encode data. Error:%s" % e, file=sys.stderr) sys.exit(3) except TypeError as e: @@ -1752,8 +1754,8 @@ def run_standalone(): print_credentials() sys.exit(0) - SYSTEM_ENCODING = locale.getpreferredencoding() +### if len(args) == 0 and options.query_filename is None: print_credentials() print("Must provide at least one query in the command line, or through a file with the -q parameter", file=sys.stderr) @@ -1764,7 +1766,7 @@ def run_standalone(): print("Can't provide both a query file and a query on the command line", file=sys.stderr) sys.exit(1) try: - f = open(options.query_filename,encoding=options.query_encoding or SYSTEM_ENCODING) + f = file(options.query_filename) query_strs = [f.read()] f.close() except: @@ -1773,6 +1775,19 @@ def run_standalone(): else: query_strs = args + if options.query_encoding is not None and options.query_encoding != 'none': + try: + for idx in range(len(query_strs)): + query_strs[idx] = query_strs[idx].decode(options.query_encoding).strip() + + if len(query_strs[idx]) == 0: + print("Query cannot be empty (query number %s)" % (idx+1), file=sys.stderr) + sys.exit(1) + + except Exception as e: + print("Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding), file=sys.stderr) + sys.exit(3) +### if options.mode not in ['fluffy', 'relaxed', 'strict']: print("Parsing mode can be one of fluffy, relaxed or strict", file=sys.stderr) @@ -1780,10 +1795,7 @@ def run_standalone(): output_encoding = get_stdout_encoding(options.output_encoding) try: - if six.PY2: - STDOUT = codecs.getwriter(output_encoding)(sys.stdout) - else: - STDOUT = sys.stdout + STDOUT = codecs.getwriter(output_encoding)(sys.stdout) except: print("Could not create output stream using output encoding %s" % (output_encoding), file=sys.stderr) sys.exit(200) @@ -1879,7 +1891,8 @@ def run_standalone(): beautify=options.beautify, output_quoting_mode=options.output_quoting_mode, formatting=options.formatting, - output_header=options.output_header) + output_header=options.output_header, + encoding=output_encoding) q_output_printer = QOutputPrinter(output_params,show_tracebacks=options.verbose) for query_str in query_strs: diff --git a/test/test-suite b/test/test-suite index 35be12d9..517bc7e4 100755 --- a/test/test-suite +++ b/test/test-suite @@ -42,24 +42,20 @@ def run_command(cmd_to_run): p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True) o, e = p.communicate() - if six.PY3: - o = o.decode(SYSTEM_ENCODING) - e = e.decode(SYSTEM_ENCODING) # remove last newline o = o.rstrip() e = e.strip() # split rows if o != '': - o = o.split(os.linesep) + o = o.split(six.b(os.linesep)) else: o = [] if e != '': - e = e.split(os.linesep) + e = e.split(six.b(os.linesep)) else: e = [] - - res = (p.returncode, [x.encode(SYSTEM_ENCODING) for x in o], [x.encode(SYSTEM_ENCODING) for x in e]) + res = (p.returncode, o, e) if DEBUG: print("RESULT:{}".format(res)) return res @@ -96,40 +92,40 @@ sample_data_with_empty_string_no_header = six.b("\n").join( sample_data_with_header = header_row + six.b("\n") + sample_data_no_header sample_data_with_missing_header_names = six.b("name,value1\n") + sample_data_no_header -sample_quoted_data = '''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted +sample_quoted_data = six.b('''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6" non-quoted-value "this is a quoted value" "this is a ""double double"" quoted value" "this is an escaped \\"quoted value\\"" "this is a double double quoted ""multiline value""." "this is an escaped \\"multiline value\\"." control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6" -''' +''') -double_double_quoted_data = '''regular_double_quoted double_double_quoted +double_double_quoted_data = six.b('''regular_double_quoted double_double_quoted "this is a quoted value" "this is a quoted value with ""double double quotes""" -''' +''') -escaped_double_quoted_data = '''regular_double_quoted escaped_double_quoted +escaped_double_quoted_data = six.b('''regular_double_quoted escaped_double_quoted "this is a quoted value" "this is a quoted value with \\"escaped double quotes\\"" -''' +''') -combined_quoted_data = '''regular_double_quoted double_double_quoted escaped_double_quoted +combined_quoted_data = six.b('''regular_double_quoted double_double_quoted escaped_double_quoted "this is a quoted value" "this is a quoted value with ""double double quotes""" "this is a quoted value with \\"escaped double quotes\\"" -''' +''') -sample_quoted_data2 = '"quoted data" 23\nunquoted-data 54' +sample_quoted_data2 = six.b('"quoted data" 23\nunquoted-data 54') -one_column_data = '''data without commas 1 +one_column_data = six.b('''data without commas 1 data without commas 2 -''' +''') # Values with leading whitespace -sample_data_rows_with_spaces = ['a,1,0', ' b, 2,0', 'c,,0'] -sample_data_with_spaces_no_header = "\n".join( - sample_data_rows_with_spaces) + "\n" +sample_data_rows_with_spaces = [six.b('a,1,0'), six.b(' b, 2,0'), six.b('c,,0')] +sample_data_with_spaces_no_header = six.b("\n").join( + sample_data_rows_with_spaces) + six.b("\n") -header_row_with_spaces = 'name,value 1,value2' +header_row_with_spaces = six.b('name,value 1,value2') sample_data_with_spaces_with_header = header_row_with_spaces + \ - "\n" + sample_data_with_spaces_no_header + six.b("\n") + sample_data_with_spaces_no_header long_value1 = "23683289372328372328373" int_value = "2328372328373" @@ -219,7 +215,7 @@ class BasicTests(AbstractQTestCase): def test_gzipped_file(self): tmpfile = self.create_file_with_data( - '\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00', encoding='none') + six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00')) cmd = '../bin/q -z "select sum(c1),avg(c1) from %s"' % tmpfile.name @@ -236,7 +232,7 @@ class BasicTests(AbstractQTestCase): def test_attempt_to_unzip_stdin(self): tmpfile = self.create_file_with_data( - '\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00', encoding='none') + six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00')) cmd = 'cat %s | ../bin/q -z "select sum(c1),avg(c1) from -"' % tmpfile.name @@ -276,7 +272,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) - self.assertEquals(o[0],"1") + self.assertEquals(o[0],six.b("1")) self.cleanup(tmpfile) @@ -290,7 +286,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) - self.assertEquals(o[0],"2") + self.assertEquals(o[0],six.b("2")) self.cleanup(tmpfile) @@ -417,10 +413,10 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) - self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) - self.assertEquals(o[1].strip(), '`c1` - text') - self.assertEquals(o[2].strip(), '`c2` - int') - self.assertEquals(o[3].strip(), '`c3` - int') + self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEquals(o[1].strip(), six.b('`c1` - text')) + self.assertEquals(o[2].strip(), six.b('`c2` - int')) + self.assertEquals(o[3].strip(), six.b('`c3` - int')) self.cleanup(tmpfile) @@ -445,9 +441,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 0) self.assertEquals(len(e), 3) self.assertTrue( - 'Bad header row: Header must contain only strings' in e[0]) - self.assertTrue("Column name must be a string" in e[1]) - self.assertTrue("Column name must be a string" in e[2]) + six.b('Bad header row: Header must contain only strings') in e[0]) + self.assertTrue(six.b("Column name must be a string") in e[1]) + self.assertTrue(six.b("Column name must be a string") in e[2]) self.cleanup(tmpfile) @@ -459,12 +455,12 @@ class BasicTests(AbstractQTestCase): self.assertNotEquals(retcode, 0) self.assertEquals(len(o),4) self.assertEquals(len(e),2) - self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) - self.assertEquals(o[1].strip(), '`name` - text') - self.assertEquals(o[2].strip(), '`value1` - int') - self.assertEquals(o[3].strip(), '`value2` - int') - self.assertEquals(e[0].strip(),'query error: no such column: c1') - self.assertTrue(e[1].startswith('Warning - There seems to be a ')) + self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEquals(o[1].strip(), six.b('`name` - text')) + self.assertEquals(o[2].strip(), six.b('`value1` - int')) + self.assertEquals(o[3].strip(), six.b('`value2` - int')) + self.assertEquals(e[0].strip(),six.b('query error: no such column: c1')) + self.assertTrue(e[1].startswith(six.b('Warning - There seems to be a '))) self.cleanup(tmpfile) @@ -475,7 +471,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) - self.assertEquals(" ".join(o), "a b c") + self.assertEquals(six.b(" ").join(o), six.b("a b c")) self.cleanup(tmpfile) @@ -486,10 +482,10 @@ class BasicTests(AbstractQTestCase): self.assertEquals(retcode, 0) self.assertEquals(len(o), 4) - self.assertEquals(o[0],'name') - self.assertEquals(o[1],'a') - self.assertEquals(o[2],'b') - self.assertEquals(o[3],'c') + self.assertEquals(o[0],six.b('name')) + self.assertEquals(o[1],six.b('a')) + self.assertEquals(o[2],six.b('b')) + self.assertEquals(o[3],six.b('c')) self.cleanup(tmpfile) @@ -502,9 +498,9 @@ class BasicTests(AbstractQTestCase): self.assertNotEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 2) - self.assertTrue('no such column: c3' in e[0]) + self.assertTrue(six.b('no such column: c3') in e[0]) self.assertEquals( - e[1], 'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names') + e[1], six.b('Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names')) self.cleanup(tmpfile) @@ -517,13 +513,13 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 4) self.assertEquals(len(e), 1) - self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) - self.assertEquals(o[1].strip(), '`c1` - text') - self.assertEquals(o[2].strip(), '`c2` - text') - self.assertEquals(o[3].strip(), '`c3` - text') + self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEquals(o[1].strip(), six.b('`c1` - text')) + self.assertEquals(o[2].strip(), six.b('`c2` - text')) + self.assertEquals(o[3].strip(), six.b('`c3` - text')) self.assertEquals( - e[0], 'Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data') + e[0], six.b('Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data')) self.cleanup(tmpfile) @@ -563,7 +559,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) - self.assertEquals(o[0], 'value1') + self.assertEquals(o[0], six.b('value1')) self.cleanup(tmpfile) @@ -589,9 +585,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) - self.assertEquals(o[0], 'a') - self.assertEquals(o[1], 'b') - self.assertEquals(o[2], 'c') + self.assertEquals(o[0], six.b('a')) + self.assertEquals(o[1], six.b('b')) + self.assertEquals(o[2], six.b('c')) self.cleanup(tmpfile) @@ -604,9 +600,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) - self.assertEquals(o[0], 'a') - self.assertEquals(o[1], ' b') - self.assertEquals(o[2], 'c') + self.assertEquals(o[0], six.b('a')) + self.assertEquals(o[1], six.b(' b')) + self.assertEquals(o[2], six.b('c')) self.cleanup(tmpfile) @@ -619,10 +615,10 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 4) - self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) - self.assertEquals(o[1].strip(), '`c1` - text') - self.assertEquals(o[2].strip(), '`c2` - int') - self.assertEquals(o[3].strip(), '`c3` - int') + self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEquals(o[1].strip(), six.b('`c1` - text')) + self.assertEquals(o[2].strip(), six.b('`c2` - int')) + self.assertEquals(o[3].strip(), six.b('`c3` - int')) self.cleanup(tmpfile) @@ -677,7 +673,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) - self.assertEquals(e[0],'Query cannot be empty (query number 1)') + self.assertEquals(e[0],six.b('Query cannot be empty (query number 1)')) def test_failure_in_query_stops_processing_queries(self): cmd = '../bin/q -d , "select 500" "select 300" "wrong-query" "select 8000"' @@ -686,8 +682,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(retcode, 1) self.assertEquals(len(e), 1) self.assertEquals(len(o), 2) - self.assertEquals(o[0],'500') - self.assertEquals(o[1],'300') + self.assertEquals(o[0],six.b('500')) + self.assertEquals(o[1],six.b('300')) def test_multiple_queries_in_command_line(self): cmd = '../bin/q -d , "select 500" "select 300+100" "select 300" "select 200"' @@ -697,10 +693,10 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 4) - self.assertEquals(o[0],'500') - self.assertEquals(o[1],'400') - self.assertEquals(o[2],'300') - self.assertEquals(o[3],'200') + self.assertEquals(o[0],six.b('500')) + self.assertEquals(o[1],six.b('400')) + self.assertEquals(o[2],six.b('300')) + self.assertEquals(o[3],six.b('200')) def test_literal_calculation_query(self): cmd = '../bin/q -d , "select 1+40/6"' @@ -710,7 +706,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 1) - self.assertEquals(o[0],'7') + self.assertEquals(o[0],six.b('7')) def test_literal_calculation_query_float_result(self): cmd = '../bin/q -d , "select 1+40/6.0"' @@ -733,9 +729,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) - self.assertEquals(o[0], 'a') - self.assertEquals(o[1], 'b') - self.assertEquals(o[2], 'c') + self.assertEquals(o[0], six.b('a')) + self.assertEquals(o[1], six.b('b')) + self.assertEquals(o[2], six.b('c')) self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) @@ -751,7 +747,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o),0) self.assertEquals(len(e),1) - self.assertTrue(e[0].startswith('Could not decode query number 1 using the provided query encoding (ascii)')) + self.assertTrue(e[0].startswith(six.b('Could not decode query number 1 using the provided query encoding (ascii)'))) self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) @@ -777,7 +773,7 @@ class BasicTests(AbstractQTestCase): def test_use_query_file_with_query_encoding(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None) + tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) cmd = '../bin/q -d , -q %s -H -Q utf-8' % tmp_query_file.name retcode, o, e = run_command(cmd) @@ -795,7 +791,7 @@ class BasicTests(AbstractQTestCase): def test_use_query_file_and_command_line(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data("select name from %s" % tmp_data_file.name) + tmp_query_file = self.create_file_with_data(six.b("select name from %s" % tmp_data_file.name)) cmd = '../bin/q -d , -q %s -H "select * from ppp"' % tmp_query_file.name retcode, o, e = run_command(cmd) @@ -804,14 +800,14 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) - self.assertTrue(e[0].startswith("Can't provide both a query file and a query on the command line")) + self.assertTrue(e[0].startswith(six.b("Can't provide both a query file and a query on the command line"))) self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) def test_select_output_encoding(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None) + tmp_query_file = self.create_file_with_data(six.b("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) for target_encoding in ['utf-8','ibm852']: cmd = '../bin/q -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding) @@ -839,7 +835,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) - self.assertTrue(e[0].startswith('Cannot encode data')) + self.assertTrue(e[0].startswith(six.b('Cannot encode data'))) self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) @@ -948,8 +944,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'"quoted,data",23') - self.assertEquals(o[1],'unquoted-data,54,') + self.assertEquals(o[0],six.b('"quoted,data",23')) + self.assertEquals(o[1],six.b('unquoted-data,54,')) self.cleanup(tmp_data_file) @@ -1022,8 +1018,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'quoted data,23') - self.assertEquals(o[1],'unquoted-data,54') + self.assertEquals(o[0],six.b('quoted data,23')) + self.assertEquals(o[1],six.b('unquoted-data,54')) self.cleanup(tmp_data_file) @@ -1037,8 +1033,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'quoted data,23') - self.assertEquals(o[1],'unquoted-data,54') + self.assertEquals(o[0],six.b('quoted data,23')) + self.assertEquals(o[1],six.b('unquoted-data,54')) self.cleanup(tmp_data_file) @@ -1053,8 +1049,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'"quoted data" 23') - self.assertEquals(o[1],'unquoted-data 54') + self.assertEquals(o[0],six.b('"quoted data" 23')) + self.assertEquals(o[1],six.b('unquoted-data 54')) self.cleanup(tmp_data_file) @@ -1133,8 +1129,8 @@ class BasicTests(AbstractQTestCase): def test_input_field_quoting_and_data_types_with_encoding(self): # Checks combination of minimal input field quoting, with special characters that need to be decoded - # Both content and proper data types are verified - data = '111,22.22,"testing text with special characters - citt\xc3\xa0 ",http://somekindofurl.com,12.13.14.15,12.1\n' - tmp_data_file = self.create_file_with_data(data,encoding='none') + data = six.b('111,22.22,"testing text with special characters - citt\xc3\xa0 ",http://somekindofurl.com,12.13.14.15,12.1\n') + tmp_data_file = self.create_file_with_data(data) cmd = '../bin/q -d , "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) @@ -1249,8 +1245,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'escaped_double_quoted') - self.assertEquals(o[1],'this is a quoted value with \\escaped') + self.assertEquals(o[0],six.b('escaped_double_quoted')) + self.assertEquals(o[1],six.b('this is a quoted value with \\escaped')) cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c3 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) @@ -1367,7 +1363,7 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) def test_column_max_length_parameter(self): - file_data = "a,b,c\nvery-long-text,2,3\n" + file_data = six.b("a,b,c\nvery-long-text,2,3\n") tmpfile = self.create_file_with_data(file_data) cmd = '../bin/q -H -d , -M 3 "select a from %s"' % tmpfile.name @@ -1377,9 +1373,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) - self.assertTrue(e[0].startswith("Column length is larger than the maximum")) - self.assertTrue(("Offending file is '%s'" % tmpfile.name) in e[0]) - self.assertTrue('Line is 2' in e[0]) + self.assertTrue(e[0].startswith(six.b("Column length is larger than the maximum"))) + self.assertTrue((six.b("Offending file is '%s'" % tmpfile.name)) in e[0]) + self.assertTrue(six.b('Line is 2') in e[0]) cmd2 = '../bin/q -H -d , -M 300 -H "select a from %s"' % tmpfile.name retcode2, o2, e2 = run_command(cmd2) @@ -1388,12 +1384,12 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o2), 1) self.assertEquals(len(e2), 0) - self.assertEquals(o2[0],'very-long-text') + self.assertEquals(o2[0],six.b('very-long-text')) self.cleanup(tmpfile) def test_invalid_column_max_length_parameter(self): - file_data = "a,b,c\nvery-long-text,2,3\n" + file_data = six.b("a,b,c\nvery-long-text,2,3\n") tmpfile = self.create_file_with_data(file_data) cmd = '../bin/q -H -d , -M 0 "select a from %s"' % tmpfile.name @@ -1403,7 +1399,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) - self.assertTrue(e[0].startswith('Max column length limit must be a positive integer')) + self.assertTrue(e[0].startswith(six.b('Max column length limit must be a positive integer'))) self.cleanup(tmpfile) From e6640d1c475e1a6d3dd38f5b97d6594cda9fd491 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Dec 2018 10:48:31 +0200 Subject: [PATCH 008/111] wip --- bin/q | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/q b/bin/q index adc92c9a..048d2272 100755 --- a/bin/q +++ b/bin/q @@ -1598,7 +1598,6 @@ class QOutputPrinter(object): xxxx = self.output_params.delimiter.join(row_str) + "\n" f_out.write(xxxx) except (UnicodeEncodeError, UnicodeError) as e: - print(traceback.format_exc()) print("Cannot encode data. Error:%s" % e, file=sys.stderr) sys.exit(3) except TypeError as e: From c478adb034f5ed43ceaf02c5425891e602c9e041 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Dec 2018 11:47:47 +0200 Subject: [PATCH 009/111] wip --- bin/q | 30 ++++++++++++------- test/test-suite | 80 ++++++++++++++++++++++++++----------------------- 2 files changed, 62 insertions(+), 48 deletions(-) diff --git a/bin/q b/bin/q index 048d2272..61f1d1ee 100755 --- a/bin/q +++ b/bin/q @@ -183,7 +183,10 @@ class Sqlite3DB(object): self.cursor.execute(q) if self.cursor.description is not None: # we decode the column names, so they can be encoded to any output format later on - query_column_names = [c[0].decode('utf-8') for c in self.cursor.description] + if six.PY2: + query_column_names = [unicode(c[0],'utf-8') for c in self.cursor.description] + else: + query_column_names = [c[0] for c in self.cursor.description] else: query_column_names = None result = self.cursor.fetchall() @@ -1586,16 +1589,18 @@ class QOutputPrinter(object): fmt_str = formatting_dict[str(i + 1)] else: if self.output_params.beautify: - fmt_str = "%%-%ss" % max_lengths[i] + fmt_str = six.u("{{0:<{}}}") % max_lengths[i] else: - fmt_str = "%s" + fmt_str = six.u("{}") if col is not None: - row_str.append(fmt_str % self.output_field_quoting_func(self.output_params.delimiter,col)) + xx = self.output_field_quoting_func(self.output_params.delimiter,col) + row_str.append(fmt_str.format(xx)) else: - row_str.append(fmt_str % "") + row_str.append(fmt_str.format("")) + - xxxx = self.output_params.delimiter.join(row_str) + "\n" + xxxx = six.u(self.output_params.delimiter).join(row_str) + six.u("\n") f_out.write(xxxx) except (UnicodeEncodeError, UnicodeError) as e: print("Cannot encode data. Error:%s" % e, file=sys.stderr) @@ -1753,7 +1758,6 @@ def run_standalone(): print_credentials() sys.exit(0) - ### if len(args) == 0 and options.query_filename is None: print_credentials() @@ -1765,14 +1769,17 @@ def run_standalone(): print("Can't provide both a query file and a query on the command line", file=sys.stderr) sys.exit(1) try: - f = file(options.query_filename) + f = open(options.query_filename,'rb') query_strs = [f.read()] f.close() except: print("Could not read query from file %s" % options.query_filename, file=sys.stderr) sys.exit(1) else: - query_strs = args + if sys.stdin.encoding is not None: + query_strs = [x.encode(sys.stdin.encoding) for x in args] + else: + query_strs = args if options.query_encoding is not None and options.query_encoding != 'none': try: @@ -1794,7 +1801,10 @@ def run_standalone(): output_encoding = get_stdout_encoding(options.output_encoding) try: - STDOUT = codecs.getwriter(output_encoding)(sys.stdout) + if six.PY3: + STDOUT = codecs.getwriter(output_encoding)(sys.stdout.buffer) + else: + STDOUT = codecs.getwriter(output_encoding)(sys.stdout) except: print("Could not create output stream using output encoding %s" % (output_encoding), file=sys.stderr) sys.exit(200) diff --git a/test/test-suite b/test/test-suite index 517bc7e4..f6299734 100755 --- a/test/test-suite +++ b/test/test-suite @@ -46,11 +46,11 @@ def run_command(cmd_to_run): o = o.rstrip() e = e.strip() # split rows - if o != '': + if o != six.b(''): o = o.split(six.b(os.linesep)) else: o = [] - if e != '': + if e != six.b(''): e = e.split(six.b(os.linesep)) else: e = [] @@ -321,16 +321,16 @@ class BasicTests(AbstractQTestCase): def test_tab_delimition_parameter__with_manual_override_attempt(self): tmpfile = self.create_file_with_data( - sample_data_no_header.replace(",", "\t")) + sample_data_no_header.replace(six.b(","), six.b("\t"))) cmd = '../bin/q -t -d , "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t")) - self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t")) - self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t")) + self.assertEquals(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEquals(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEquals(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) self.cleanup(tmpfile) @@ -343,9 +343,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0].replace(",", "|")) - self.assertEquals(o[1], sample_data_rows[1].replace(",", "|")) - self.assertEquals(o[2], sample_data_rows[2].replace(",", "|")) + self.assertEquals(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEquals(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEquals(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) self.cleanup(tmpfile) @@ -524,7 +524,7 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) def test_empty_data(self): - tmpfile = self.create_file_with_data('') + tmpfile = self.create_file_with_data(six.b('')) cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) @@ -532,12 +532,12 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) - self.assertTrue('Warning - data is empty' in e[0]) + self.assertTrue(six.b('Warning - data is empty') in e[0]) self.cleanup(tmpfile) def test_empty_data_with_header_param(self): - tmpfile = self.create_file_with_data('') + tmpfile = self.create_file_with_data(six.b('')) cmd = '../bin/q -d , "select c1 from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) @@ -545,7 +545,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) - m = "Header line is expected but missing in file %s" % tmpfile.name + m = six.b("Header line is expected but missing in file %s" % tmpfile.name) self.assertTrue(m in e[0]) self.cleanup(tmpfile) @@ -640,7 +640,7 @@ class BasicTests(AbstractQTestCase): def test_column_analysis_for_spaces_in_header_row(self): tmpfile = self.create_file_with_data( - header_row_with_spaces + "\n" + sample_data_no_header) + header_row_with_spaces + six.b("\n") + sample_data_no_header) cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H -A' % tmpfile.name retcode, o, e = run_command(cmd) @@ -648,10 +648,10 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 4) - self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) - self.assertEquals(o[1].strip(), '`name` - text') - self.assertEquals(o[2].strip(), '`value 1` - int') - self.assertEquals(o[3].strip(), '`value2` - int') + self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEquals(o[1].strip(), six.b('`name` - text')) + self.assertEquals(o[2].strip(), six.b('`value 1` - int')) + self.assertEquals(o[3].strip(), six.b('`value2` - int')) self.cleanup(tmpfile) @@ -663,7 +663,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) - self.assertEquals(e[0],'Query cannot be empty (query number 1)') + self.assertEquals(e[0],six.b('Query cannot be empty (query number 1)')) def test_empty_query_in_command_line(self): cmd = '../bin/q -d , " "' @@ -738,7 +738,7 @@ class BasicTests(AbstractQTestCase): def test_use_query_file_with_incorrect_query_encoding(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None) + tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) cmd = '../bin/q -d , -q %s -H -Q ascii' % tmp_query_file.name retcode, o, e = run_command(cmd) @@ -842,7 +842,7 @@ class BasicTests(AbstractQTestCase): def test_use_query_file_with_empty_query(self): - tmp_query_file = self.create_file_with_data(" ") + tmp_query_file = self.create_file_with_data(six.b(" ")) cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name retcode, o, e = run_command(cmd) @@ -851,7 +851,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) - self.assertTrue(e[0].startswith("Query cannot be empty")) + self.assertTrue(e[0].startswith(six.b("Query cannot be empty"))) self.cleanup(tmp_query_file) @@ -863,7 +863,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) - self.assertTrue(e[0].startswith("Could not read query from file")) + self.assertTrue(e[0].startswith(six.b("Could not read query from file"))) def test_non_quoted_values_in_quoted_data(self): tmp_data_file = self.create_file_with_data(sample_quoted_data) @@ -1207,8 +1207,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'double_double_quoted') - self.assertEquals(o[1],'this is a quoted value with "double') + self.assertEquals(o[0],six.b('double_double_quoted')) + self.assertEquals(o[1],six.b('this is a quoted value with "double')) cmd = '../bin/q -d " " --disable-double-double-quoting "select c3 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) @@ -1217,8 +1217,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'') - self.assertEquals(o[1],'double') + self.assertEquals(o[0],six.b('')) + self.assertEquals(o[1],six.b('double')) cmd = '../bin/q -d " " --disable-double-double-quoting "select c4 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) @@ -1227,8 +1227,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'') - self.assertEquals(o[1],'quotes"""') + self.assertEquals(o[0],six.b('')) + self.assertEquals(o[1],six.b('quotes"""')) self.cleanup(tmp_data_file) @@ -1255,8 +1255,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'') - self.assertEquals(o[1],'double') + self.assertEquals(o[0],six.b('')) + self.assertEquals(o[1],six.b('double')) cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c4 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) @@ -1265,8 +1265,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'') - self.assertEquals(o[1],'quotes\\""') + self.assertEquals(o[0],six.b('')) + self.assertEquals(o[1],six.b('quotes\\""')) self.cleanup(tmp_data_file) @@ -1405,7 +1405,7 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) def test_duplicate_column_name_detection(self): - file_data = "a,b,a\n10,20,30\n30,40,50" + file_data = six.b("a,b,a\n10,20,30\n30,40,50") tmpfile = self.create_file_with_data(file_data) cmd = '../bin/q -H -d , "select a from %s"' % tmpfile.name @@ -1415,8 +1415,8 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 0) self.assertEquals(len(e), 2) - self.assertTrue(e[0].startswith('Bad header row:')) - self.assertEquals(e[1],"'a': Column name is duplicated") + self.assertTrue(e[0].startswith(six.b('Bad header row:'))) + self.assertEquals(e[1],six.b("'a': Column name is duplicated")) self.cleanup(tmpfile) @@ -1747,7 +1747,9 @@ class ParsingModeTests(AbstractQTestCase): class FormattingTests(AbstractQTestCase): def test_column_formatting(self): - cmd = 'seq 1 10 | ../bin/q -f 1=%4.3f,2=%4.3f "select sum(c1),avg(c1) from -" -c 1' + # TODO Decide if this breaking change is reasonable + #cmd = 'seq 1 10 | ../bin/q -f 1=%4.3f,2=%4.3f "select sum(c1),avg(c1) from -" -c 1' + cmd = 'seq 1 10 | ../bin/q -f 1={:4.3f},2={:4.3f} "select sum(c1),avg(c1) from -" -c 1' retcode, o, e = run_command(cmd) @@ -1759,7 +1761,9 @@ class FormattingTests(AbstractQTestCase): def test_column_formatting_with_output_header(self): perl_regex = "'s/1\n/column_name\n1\n/;'" - cmd = 'seq 1 10 | perl -pe ' + perl_regex + ' | ../bin/q -f 1=%4.3f,2=%4.3f "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' + # TODO Decide if this breaking change is reasonable + #cmd = 'seq 1 10 | perl -pe ' + perl_regex + ' | ../bin/q -f 1=%4.3f,2=%4.3f "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' + cmd = 'seq 1 10 | perl -pe ' + perl_regex + ' | ../bin/q -f 1={:4.3f},2={:4.3f} "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' retcode, o, e = run_command(cmd) From ceed0e7af07bbdad98b2b9e3ef18b2097f44fc96 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Dec 2018 12:18:37 +0200 Subject: [PATCH 010/111] wip --- bin/q | 2 +- test/test-suite | 44 ++++++++++++++++++++++---------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/bin/q b/bin/q index 61f1d1ee..fc67361f 100755 --- a/bin/q +++ b/bin/q @@ -648,7 +648,7 @@ class TableColumnInferer(object): counts = {} for column_count in column_count_list: counts[column_count] = counts.get(column_count, 0) + 1 - return ", ".join(["%s rows with %s columns" % (v, k) for k, v in counts.iteritems()]) + return six.u(", ").join([six.u("{} rows with {} columns".format(v, k)) for k, v in six.iteritems(counts)]) def _do_strict_analysis(self): column_count_list = [len(col_vals) for col_vals in self.rows] diff --git a/test/test-suite b/test/test-suite index f6299734..d1e74dad 100755 --- a/test/test-suite +++ b/test/test-suite @@ -225,7 +225,7 @@ class BasicTests(AbstractQTestCase): self.assertTrue(len(e) == 1) s = sum(range(1, 11)) - self.assertTrue(o[0] == '%s %s' % (s, s / 10.0)) + self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0))) self.assertTrue(one_column_warning(e)) self.cleanup(tmpfile) @@ -241,7 +241,7 @@ class BasicTests(AbstractQTestCase): self.assertTrue(len(o) == 0) self.assertTrue(len(e) == 1) - self.assertEquals(e[0],'Cannot decompress standard input. Pipe the input through zcat in order to decompress.') + self.assertEquals(e[0],six.b('Cannot decompress standard input. Pipe the input through zcat in order to decompress.')) self.cleanup(tmpfile) @@ -300,22 +300,22 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) - self.assertEquals(" ".join(o), 'a b c') + self.assertEquals(six.b(" ").join(o), six.b('a b c')) self.cleanup(tmpfile) def test_tab_delimition_parameter(self): tmpfile = self.create_file_with_data( - sample_data_no_header.replace(",", "\t")) + sample_data_no_header.replace(six.b(","), six.b("\t"))) cmd = '../bin/q -t "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t")) - self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t")) - self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t")) + self.assertEquals(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEquals(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEquals(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) self.cleanup(tmpfile) @@ -358,9 +358,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t")) - self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t")) - self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t")) + self.assertEquals(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEquals(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEquals(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) self.cleanup(tmpfile) @@ -572,7 +572,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) - self.assertTrue('Warning - data is empty' in e[0]) + self.assertTrue(six.b('Warning - data is empty') in e[0]) self.cleanup(tmpfile) @@ -624,7 +624,7 @@ class BasicTests(AbstractQTestCase): def test_spaces_in_header_row(self): tmpfile = self.create_file_with_data( - header_row_with_spaces + "\n" + sample_data_no_header) + header_row_with_spaces + six.b("\n") + sample_data_no_header) cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) @@ -632,9 +632,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) - self.assertEquals(o[0], 'a,1') - self.assertEquals(o[1], 'b,2') - self.assertEquals(o[2], 'c,') + self.assertEquals(o[0], six.b('a,1')) + self.assertEquals(o[1], six.b('b,2')) + self.assertEquals(o[2], six.b('c,')) self.cleanup(tmpfile) @@ -720,7 +720,7 @@ class BasicTests(AbstractQTestCase): def test_use_query_file(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data("select name from %s" % tmp_data_file.name) + tmp_query_file = self.create_file_with_data(six.b("select name from %s" % tmp_data_file.name)) cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name retcode, o, e = run_command(cmd) @@ -959,7 +959,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),1) self.assertEquals(len(o),0) - self.assertTrue(e[0].startswith('Strict mode. Column Count is expected to identical')) + self.assertTrue(e[0].startswith(six.b('Strict mode. Column Count is expected to identical'))) self.cleanup(tmp_data_file) @@ -1343,9 +1343,9 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) def test_default_column_max_length_parameter__too_long(self): - huge_text = "x" * 132000 + huge_text = six.b("x") * 132000 - file_data = "a,b,c\n1,%s,3\n" % huge_text + file_data = six.b("a,b,c\n1,{},3\n".format(huge_text)) tmpfile = self.create_file_with_data(file_data) @@ -1356,9 +1356,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) - self.assertTrue(e[0].startswith("Column length is larger than the maximum")) - self.assertTrue(("Offending file is '%s'" % tmpfile.name) in e[0]) - self.assertTrue('Line is 2' in e[0]) + self.assertTrue(e[0].startswith(six.b("Column length is larger than the maximum"))) + self.assertTrue(six.b("Offending file is '{}'".format(tmpfile.name)) in e[0]) + self.assertTrue(six.b('Line is 2') in e[0]) self.cleanup(tmpfile) From c61b4f03527131b22ab4bab02d2c3bc5e73e1ce1 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Dec 2018 12:39:46 +0200 Subject: [PATCH 011/111] wip --- test/test-suite | 125 ++++++++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 62 deletions(-) diff --git a/test/test-suite b/test/test-suite index d1e74dad..dfc1e007 100755 --- a/test/test-suite +++ b/test/test-suite @@ -22,6 +22,7 @@ import locale import pprint import six from six.moves import range +import codecs sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin')) from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams @@ -60,7 +61,7 @@ def run_command(cmd_to_run): print("RESULT:{}".format(res)) return res -uneven_ls_output = """drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux +uneven_ls_output = six.b("""drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux drwxr-xr-x 2 root root 4096 Apr 19 2013 /mnt drwxr-xr-x 2 root root 4096 Apr 24 2013 /srv drwx------ 2 root root 16384 Jun 21 2013 /lost+found @@ -69,9 +70,9 @@ drwxr-xr-x 2 root root 4096 Jun 21 2013 /cdrom drwxr-xr-x 3 root root 4096 Jun 21 2013 /home lrwxrwxrwx 1 root root 29 Jun 21 2013 /vmlinuz -> boot/vmlinuz-3.8.0-19-generic lrwxrwxrwx 1 root root 32 Jun 21 2013 /initrd.img -> boot/initrd.img-3.8.0-19-generic -""" +""") -find_output = """8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 11:00 /tmp +find_output = six.b("""8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 11:00 /tmp 8299123 4 drwxrwxr-x 2 harel harel 4096 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576 8263229 964 -rw-rw-r-- 1 mapred mapred 984569 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormcode.ser 8263230 4 -rw-rw-r-- 1 harel harel 1223 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormconf.ser @@ -81,7 +82,7 @@ find_output = """8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 1 8263607 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514169735.version 8263533 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514172733.version 8263604 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514175754.version -""" +""") header_row = six.b('name,value1,value2') sample_data_rows = [six.b('a,1,0'), six.b('b,2,0'), six.b('c,,0')] @@ -1771,11 +1772,11 @@ class FormattingTests(AbstractQTestCase): self.assertEquals(len(o), 2) self.assertEquals(len(e), 0) - self.assertEquals(o[0], 'mysum myavg') - self.assertEquals(o[1], '55.000 5.500') + self.assertEquals(o[0], six.b('mysum myavg')) + self.assertEquals(o[1], six.b('55.000 5.500')) def test_failure_to_parse_universal_newlines_without_explicit_flag(self): - data = 'permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a' + data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') tmp_data_file = self.create_file_with_data(data) cmd = '../bin/q -d , -H "select * from %s"' % tmp_data_file.name @@ -1785,7 +1786,7 @@ class FormattingTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) - self.assertTrue(e[0].startswith('Data contains universal newlines')) + self.assertTrue(e[0].startswith(six.b('Data contains universal newlines'))) self.cleanup(tmp_data_file) @@ -1838,8 +1839,8 @@ class SqlTests(AbstractQTestCase): self.assertEquals(retcode, 0) self.assertEquals(len(o), 2) - self.assertEquals(o[0], 'ppp dip.1@otherdomain.com') - self.assertEquals(o[1], 'ppp dip.2@otherdomain.com') + self.assertEquals(o[0], six.b('ppp dip.1@otherdomain.com')) + self.assertEquals(o[1], six.b('ppp dip.2@otherdomain.com')) def test_join_example_with_output_header(self): cmd = '../bin/q -O "select myfiles.c8 aaa,emails.c2 bbb from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"' @@ -1848,12 +1849,12 @@ class SqlTests(AbstractQTestCase): self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) - self.assertEquals(o[0], 'aaa bbb') - self.assertEquals(o[1], 'ppp dip.1@otherdomain.com') - self.assertEquals(o[2], 'ppp dip.2@otherdomain.com') + self.assertEquals(o[0], six.b('aaa bbb')) + self.assertEquals(o[1], six.b('ppp dip.1@otherdomain.com')) + self.assertEquals(o[2], six.b('ppp dip.2@otherdomain.com')) def test_self_join1(self): - tmpfile = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)])) + tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)"' % (tmpfile.name,tmpfile.name) retcode, o, e = run_command(cmd) @@ -1864,7 +1865,7 @@ class SqlTests(AbstractQTestCase): self.cleanup(tmpfile) def test_self_join_reuses_table(self): - tmpfile = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)])) + tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)" -A' % (tmpfile.name,tmpfile.name) retcode, o, e = run_command(cmd) @@ -1872,14 +1873,14 @@ class SqlTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) - self.assertEquals(o[0],'Table for file: %s' % tmpfile.name) - self.assertEquals(o[1],' `c1` - int') - self.assertEquals(o[2],' `c2` - int') + self.assertEquals(o[0],six.b('Table for file: %s' % tmpfile.name)) + self.assertEquals(o[1],six.b(' `c1` - int')) + self.assertEquals(o[2],six.b(' `c2` - int')) self.cleanup(tmpfile) def test_self_join2(self): - tmpfile1 = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)])) + tmpfile1 = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2)"' % (tmpfile1.name,tmpfile1.name) retcode, o, e = run_command(cmd) @@ -1889,7 +1890,7 @@ class SqlTests(AbstractQTestCase): self.cleanup(tmpfile1) - tmpfile2 = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)])) + tmpfile2 = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2) join %s a3 on (a1.c2 = a3.c2)"' % (tmpfile2.name,tmpfile2.name,tmpfile2.name) retcode, o, e = run_command(cmd) @@ -1900,12 +1901,12 @@ class SqlTests(AbstractQTestCase): self.cleanup(tmpfile2) def test_disable_column_type_detection(self): - tmpfile = self.create_file_with_data('''regular_text,text_with_digits1,text_with_digits2,float_number + tmpfile = self.create_file_with_data(six.b('''regular_text,text_with_digits1,text_with_digits2,float_number "regular text 1",67,"67",12.3 "regular text 2",067,"067",22.3 "regular text 3",123,"123",33.4 "regular text 4",-123,"-123",0122.2 -''') +''')) # Check original column type detection cmd = '../bin/q -A -d , -H "select * from %s"' % (tmpfile.name) @@ -1917,11 +1918,11 @@ class SqlTests(AbstractQTestCase): self.assertEquals(len(o), 5) - self.assertEquals(o[0],'Table for file: %s' % tmpfile.name) - self.assertEquals(o[1],' `regular_text` - text') - self.assertEquals(o[2],' `text_with_digits1` - int') - self.assertEquals(o[3],' `text_with_digits2` - int') - self.assertEquals(o[4],' `float_number` - float') + self.assertEquals(o[0],six.b('Table for file: %s' % tmpfile.name)) + self.assertEquals(o[1],six.b(' `regular_text` - text')) + self.assertEquals(o[2],six.b(' `text_with_digits1` - int')) + self.assertEquals(o[3],six.b(' `text_with_digits2` - int')) + self.assertEquals(o[4],six.b(' `float_number` - float')) # Check column types detected when actual detection is disabled cmd = '../bin/q -A -d , -H --as-text "select * from %s"' % (tmpfile.name) @@ -1932,11 +1933,11 @@ class SqlTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 5) - self.assertEquals(o[0],'Table for file: %s' % tmpfile.name) - self.assertEquals(o[1],' `regular_text` - text') - self.assertEquals(o[2],' `text_with_digits1` - text') - self.assertEquals(o[3],' `text_with_digits2` - text') - self.assertEquals(o[4],' `float_number` - text') + self.assertEquals(o[0],six.b('Table for file: %s' % tmpfile.name)) + self.assertEquals(o[1],six.b(' `regular_text` - text')) + self.assertEquals(o[2],six.b(' `text_with_digits1` - text')) + self.assertEquals(o[3],six.b(' `text_with_digits2` - text')) + self.assertEquals(o[4],six.b(' `float_number` - text')) # Get actual data with regular detection cmd = '../bin/q -d , -H "select * from %s"' % (tmpfile.name) @@ -1947,10 +1948,10 @@ class SqlTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 4) - self.assertEquals(o[0],"regular text 1,67,67,12.3"); - self.assertEquals(o[1],"regular text 2,67,67,22.3"); - self.assertEquals(o[2],"regular text 3,123,123,33.4"); - self.assertEquals(o[3],"regular text 4,-123,-123,122.2"); + self.assertEquals(o[0],six.b("regular text 1,67,67,12.3")) + self.assertEquals(o[1],six.b("regular text 2,67,67,22.3")) + self.assertEquals(o[2],six.b("regular text 3,123,123,33.4")) + self.assertEquals(o[3],six.b("regular text 4,-123,-123,122.2")) # Get actual data without detection cmd = '../bin/q -d , -H --as-text "select * from %s"' % (tmpfile.name) @@ -1961,10 +1962,10 @@ class SqlTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 4) - self.assertEquals(o[0],"regular text 1,67,67,12.3"); - self.assertEquals(o[1],"regular text 2,067,067,22.3"); - self.assertEquals(o[2],"regular text 3,123,123,33.4"); - self.assertEquals(o[3],"regular text 4,-123,-123,0122.2"); + self.assertEquals(o[0],six.b("regular text 1,67,67,12.3")) + self.assertEquals(o[1],six.b("regular text 2,067,067,22.3")) + self.assertEquals(o[2],six.b("regular text 3,123,123,33.4")) + self.assertEquals(o[3],six.b("regular text 4,-123,-123,0122.2")) self.cleanup(tmpfile) @@ -1972,7 +1973,7 @@ class SqlTests(AbstractQTestCase): class BasicModuleTests(AbstractQTestCase): def test_simple_query(self): - tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6") + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) r = q.execute('select * from %s' % tmpfile.name) @@ -1988,7 +1989,7 @@ class BasicModuleTests(AbstractQTestCase): self.cleanup(tmpfile) def test_loaded_data_reuse(self): - tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6") + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) r1 = q.execute('select * from %s' % tmpfile.name) @@ -2013,10 +2014,10 @@ class BasicModuleTests(AbstractQTestCase): self.cleanup(tmpfile) def test_stdin_injection(self): - tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6") + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - r = q.execute('select * from -',stdin_file=file(tmpfile.name,'rb')) + r = q.execute('select * from -',stdin_file=codecs.open(tmpfile.name,'rb',encoding='utf-8')) self.assertTrue(r.status == 'ok') self.assertEquals(len(r.warnings),0) @@ -2029,10 +2030,10 @@ class BasicModuleTests(AbstractQTestCase): self.cleanup(tmpfile) def test_named_stdin_injection(self): - tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6") + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - r = q.execute('select a from my_stdin_data',stdin_file=file(tmpfile.name,'rb'),stdin_filename='my_stdin_data') + r = q.execute('select a from my_stdin_data',stdin_file=codecs.open(tmpfile.name,'rb',encoding='utf-8'),stdin_filename='my_stdin_data') self.assertTrue(r.status == 'ok') self.assertEquals(len(r.warnings),0) @@ -2045,11 +2046,11 @@ class BasicModuleTests(AbstractQTestCase): self.cleanup(tmpfile) def test_stdin_injection_isolation(self): - tmpfile1 = self.create_file_with_data("a b c\n1 2 3\n4 5 6") - tmpfile2 = self.create_file_with_data("d e f\n7 8 9\n10 11 12") + tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + tmpfile2 = self.create_file_with_data(six.b("d e f\n7 8 9\n10 11 12")) q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - r1 = q.execute('select * from -',stdin_file=file(tmpfile1.name,'rb')) + r1 = q.execute('select * from -',stdin_file=codecs.open(tmpfile1.name,'rb',encoding='utf-8')) self.assertTrue(r1.status == 'ok') self.assertEquals(len(r1.warnings),0) @@ -2059,7 +2060,7 @@ class BasicModuleTests(AbstractQTestCase): self.assertEquals(len(r1.metadata.data_loads),1) self.assertEquals(r1.metadata.data_loads[0].filename,'-') - r2 = q.execute('select * from -',stdin_file=file(tmpfile2.name,'rb')) + r2 = q.execute('select * from -',stdin_file=codecs.open(tmpfile2.name,'rb',encoding='utf-8')) self.assertTrue(r2.status == 'ok') self.assertEquals(len(r2.warnings),0) @@ -2074,11 +2075,11 @@ class BasicModuleTests(AbstractQTestCase): self.cleanup(tmpfile2) def test_multiple_stdin_injection(self): - tmpfile1 = self.create_file_with_data("a b c\n1 2 3\n4 5 6") - tmpfile2 = self.create_file_with_data("d e f\n7 8 9\n10 11 12") + tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + tmpfile2 = self.create_file_with_data(six.b("d e f\n7 8 9\n10 11 12")) q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - r1 = q.execute('select * from my_stdin_data1',stdin_file=file(tmpfile1.name,'rb'),stdin_filename='my_stdin_data1') + r1 = q.execute('select * from my_stdin_data1',stdin_file=codecs.open(tmpfile1.name,'rb',encoding='utf-8'),stdin_filename='my_stdin_data1') self.assertTrue(r1.status == 'ok') self.assertEquals(len(r1.warnings),0) @@ -2088,7 +2089,7 @@ class BasicModuleTests(AbstractQTestCase): self.assertEquals(len(r1.metadata.data_loads),1) self.assertEquals(r1.metadata.data_loads[0].filename,'my_stdin_data1') - r2 = q.execute('select * from my_stdin_data2',stdin_file=file(tmpfile2.name,'rb'),stdin_filename='my_stdin_data2') + r2 = q.execute('select * from my_stdin_data2',stdin_file=codecs.open(tmpfile2.name,'rb',encoding='utf-8'),stdin_filename='my_stdin_data2') self.assertTrue(r2.status == 'ok') self.assertEquals(len(r2.warnings),0) @@ -2112,8 +2113,8 @@ class BasicModuleTests(AbstractQTestCase): self.cleanup(tmpfile2) def test_different_input_params_for_different_files(self): - tmpfile1 = self.create_file_with_data("a b c\n1 2 3\n4 5 6") - tmpfile2 = self.create_file_with_data("7\t8\t9\n10\t11\t12") + tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + tmpfile2 = self.create_file_with_data(six.b("7\t8\t9\n10\t11\t12")) q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) @@ -2133,8 +2134,8 @@ class BasicModuleTests(AbstractQTestCase): self.cleanup(tmpfile2) def test_different_input_params_for_different_files(self): - tmpfile1 = self.create_file_with_data("a b c\n1 2 3\n4 5 6") - tmpfile2 = self.create_file_with_data("7\t8\t9\n10\t11\t12") + tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + tmpfile2 = self.create_file_with_data(six.b("7\t8\t9\n10\t11\t12")) q = QTextAsData() @@ -2154,7 +2155,7 @@ class BasicModuleTests(AbstractQTestCase): self.cleanup(tmpfile2) def test_input_params_override(self): - tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6") + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) default_input_params = QInputParams() @@ -2205,7 +2206,7 @@ class BasicModuleTests(AbstractQTestCase): self.assertTrue(q_output.error.msg.startswith('query error')) def test_execute_response(self): - tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6") + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) q = QTextAsData() @@ -2236,7 +2237,7 @@ class BasicModuleTests(AbstractQTestCase): self.cleanup(tmpfile) def test_analyze_response(self): - tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6") + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) q = QTextAsData() @@ -2267,7 +2268,7 @@ class BasicModuleTests(AbstractQTestCase): self.cleanup(tmpfile) def test_load_data_from_string(self): - input_str = 'column1,column2,column3\n' + '\n'.join(['value1,2.5,value3'] * 1000) + input_str = six.u('column1,column2,column3\n') + six.u('\n').join([six.u('value1,2.5,value3')] * 1000) q = QTextAsData() From 8dce1eb99b7745163310023865e46b1274a5ed5a Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Dec 2018 13:27:55 +0200 Subject: [PATCH 012/111] wip --- .travis.yml | 4 +- bin/q | 3 +- requirements.txt | 2 + test/test-suite | 185 ++++++++++++++++++++++++----------------------- 4 files changed, 100 insertions(+), 94 deletions(-) create mode 100644 requirements.txt diff --git a/.travis.yml b/.travis.yml index 77becd8d..b6c29f1a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,9 +2,11 @@ language: python python: - "2.7" - "3.6" + - "3.7" matrix: allow_failures: - python: "3.6" -install: pip install flake8 + - python: "3.7" +install: pip install -r requirements before_script: flake8 ./bin/q --count --select=E901,E999,F821,F822,F823 --show-source --statistics script: test/test-all diff --git a/bin/q b/bin/q index fc67361f..d9cc1267 100755 --- a/bin/q +++ b/bin/q @@ -133,8 +133,7 @@ class Sqlite3DB(object): for s in self.conn.iterdump(): c.execute(s) results = c.fetchall() - #print "executed %s results %s " % (s,results) - for source_filename_str,tn in table_names_mapping.iteritems(): + for source_filename_str,tn in six.iteritems(table_names_mapping): c.execute('alter table `%s` rename to `%s`' % (tn, source_filename_str)) new_db.close() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..6c4193ae --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +six==1.11.0 +flake8==3.6.0 diff --git a/test/test-suite b/test/test-suite index dfc1e007..3ef9ade3 100755 --- a/test/test-suite +++ b/test/test-suite @@ -39,7 +39,7 @@ if len(sys.argv) > 2 and sys.argv[2] == '-v': def run_command(cmd_to_run): global DEBUG if DEBUG: - print(cmd_to_run) + print("CMD: {}".format(cmd_to_run)) p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True) o, e = p.communicate() @@ -163,16 +163,16 @@ class SaveDbToDiskTests(AbstractQTestCase): self.assertFalse(os.path.exists(db_filename)) retcode, o, e = run_command('seq 1 1000 | ../bin/q "select count(*) from -" -c 1 -S %s' % db_filename) - + self.assertTrue(retcode == 0) self.assertTrue(len(o) == 0) self.assertTrue(len(e) == 5) - self.assertTrue(e[0].startswith('Going to save data')) - self.assertTrue(db_filename in e[0]) - self.assertTrue(e[1].startswith('Data has been loaded in')) - self.assertTrue(e[2].startswith('Saving data to db file')) - self.assertTrue(e[3].startswith('Data has been saved into')) - self.assertTrue(e[4] == 'Query to run on the database: select count(*) from `-`;') + self.assertTrue(e[0].startswith(six.b('Going to save data'))) + self.assertTrue(db_filename.encode(sys.stdout.encoding) in e[0]) + self.assertTrue(e[1].startswith(six.b('Data has been loaded in'))) + self.assertTrue(e[2].startswith(six.b('Saving data to db file'))) + self.assertTrue(e[3].startswith(six.b('Data has been saved into'))) + self.assertTrue(e[4] == six.b('Query to run on the database: select count(*) from `-`;')) self.assertTrue(os.path.exists(db_filename)) @@ -195,8 +195,8 @@ class SaveDbToDiskTests(AbstractQTestCase): retcode2, o2, e2 = run_command('seq 1 1000 | ../bin/q "select count(*) from -" -c 1 -S %s' % db_filename) self.assertTrue(retcode2 != 0) - self.assertTrue(e2[0].startswith('Going to save data into a disk database')) - self.assertTrue(e2[1] == 'Disk database file %s already exists.' % db_filename) + self.assertTrue(e2[0].startswith(six.b('Going to save data into a disk database'))) + self.assertTrue(e2[1] == six.b('Disk database file {} already exists.'.format(db_filename))) os.remove(db_filename) @@ -257,9 +257,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 3) self.assertTrue(e[0].startswith( - "Warning: column count is one - did you provide the correct delimiter")) - self.assertTrue(e[1].startswith("Bad header row")) - self.assertTrue("Column name cannot contain commas" in e[2]) + six.b("Warning: column count is one - did you provide the correct delimiter"))) + self.assertTrue(e[1].startswith(six.b("Bad header row"))) + self.assertTrue(six.b("Column name cannot contain commas") in e[2]) self.cleanup(tmpfile) @@ -374,9 +374,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t")) - self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t")) - self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t")) + self.assertEquals(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEquals(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEquals(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) self.cleanup(tmpfile) @@ -1098,20 +1098,20 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),2) - self.assertEquals("\n".join(o),input_data) + self.assertEquals(six.b("\n").join(o),input_data) self.cleanup(tmp_data_file) def test_consistency_of_chaining_minimal_wrapping_to_minimal_wrapping(self): - input_data = '"quoted data" 23\nunquoted-data 54' + input_data = six.b('"quoted data" 23\nunquoted-data 54') self._internal_test_consistency_of_chaining_output_to_input(input_data,'minimal','minimal') def test_consistency_of_chaining_all_wrapping_to_all_wrapping(self): - input_data = '"quoted data" "23"\n"unquoted-data" "54"' + input_data = six.b('"quoted data" "23"\n"unquoted-data" "54"') self._internal_test_consistency_of_chaining_output_to_input(input_data,'all','all') def test_utf8_with_bom_encoding(self): - utf_8_data_with_bom = '\xef\xbb\xbf"typeid","limit","apcost","date","checkpointId"\n"1","2","5","1,2,3,4,5,6,7","3000,3001,3002"\n"2","2","5","1,2,3,4,5,6,7","3003,3004,3005"\n' + utf_8_data_with_bom = six.b('\xef\xbb\xbf"typeid","limit","apcost","date","checkpointId"\n"1","2","5","1,2,3,4,5,6,7","3000,3001,3002"\n"2","2","5","1,2,3,4,5,6,7","3003,3004,3005"\n') tmp_data_file = self.create_file_with_data(utf_8_data_with_bom,encoding=None) cmd = '../bin/q -d , -H -O -e utf-8-sig "select * from %s"' % tmp_data_file.name @@ -1121,9 +1121,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),3) - self.assertEquals(o[0],'typeid,limit,apcost,date,checkpointId') - self.assertEquals(o[1],'1,2,5,"1,2,3,4,5,6,7","3000,3001,3002"') - self.assertEquals(o[2],'2,2,5,"1,2,3,4,5,6,7","3003,3004,3005"') + self.assertEquals(o[0],six.b('typeid,limit,apcost,date,checkpointId')) + self.assertEquals(o[1],six.b('1,2,5,"1,2,3,4,5,6,7","3000,3001,3002"')) + self.assertEquals(o[2],six.b('2,2,5,"1,2,3,4,5,6,7","3003,3004,3005"')) self.cleanup(tmp_data_file) @@ -1149,13 +1149,13 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),7) - self.assertTrue(o[0].startswith('Table for file')) - self.assertEquals(o[1].strip(),'`c1` - int') - self.assertEquals(o[2].strip(),'`c2` - float') - self.assertEquals(o[3].strip(),'`c3` - text') - self.assertEquals(o[4].strip(),'`c4` - text') - self.assertEquals(o[5].strip(),'`c5` - text') - self.assertEquals(o[6].strip(),'`c6` - float') + self.assertTrue(o[0].startswith(six.b('Table for file'))) + self.assertEquals(o[1].strip(),six.b('`c1` - int')) + self.assertEquals(o[2].strip(),six.b('`c2` - float')) + self.assertEquals(o[3].strip(),six.b('`c3` - text')) + self.assertEquals(o[4].strip(),six.b('`c4` - text')) + self.assertEquals(o[5].strip(),six.b('`c5` - text')) + self.assertEquals(o[6].strip(),six.b('`c6` - float')) self.cleanup(tmp_data_file) @@ -1170,10 +1170,10 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e),0) self.assertEquals(len(o),4) - self.assertTrue(o[0],'multiline_double_double_quoted') - self.assertTrue(o[1],'control-value-5') - self.assertTrue(o[2],'this is a double double quoted "multiline\n value".') - self.assertTrue(o[3],'control-value-5') + self.assertTrue(o[0],six.b('multiline_double_double_quoted')) + self.assertTrue(o[1],six.b('control-value-5')) + self.assertTrue(o[2],six.b('this is a double double quoted "multiline\n value".')) + self.assertTrue(o[3],six.b('control-value-5')) self.cleanup(tmp_data_file) @@ -1326,9 +1326,9 @@ class BasicTests(AbstractQTestCase): self.assertEquals(e[0],six.b("No files matching 'non-existent-file' have been found")) def test_default_column_max_length_parameter__short_enough(self): - huge_text = "x" * 131000 + huge_text = six.b("x" * 131000) - file_data = "a,b,c\n1,%s,3\n" % huge_text + file_data = six.b("a,b,c\n1,{},3\n".format(huge_text)) tmpfile = self.create_file_with_data(file_data) @@ -1339,7 +1339,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) - self.assertEquals(o[0],'1') + self.assertEquals(o[0],six.b('1')) self.cleanup(tmpfile) @@ -1433,7 +1433,7 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) - self.assertTrue("Column Count is expected to identical" in e[0]) + self.assertTrue(six.b("Column Count is expected to identical") in e[0]) self.cleanup(tmpfile) @@ -1447,7 +1447,7 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals( - e[0], "Strict mode. Column count is expected to be 4 but is 3") + e[0], six.b("Strict mode. Column count is expected to be 4 but is 3")) self.cleanup(tmpfile) @@ -1461,7 +1461,7 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals( - e[0], "Strict mode. Column count is expected to be 2 but is 3") + e[0], six.b("Strict mode. Column count is expected to be 2 but is 3")) self.cleanup(tmpfile) @@ -1475,10 +1475,10 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(o), 4) self.assertEquals(len(e), 0) - self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) - self.assertEquals(o[1].strip(), '`name` - text') - self.assertEquals(o[2].strip(), '`value1` - int') - self.assertEquals(o[3].strip(), '`c3` - int') + self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEquals(o[1].strip(), six.b('`name` - text')) + self.assertEquals(o[2].strip(), six.b('`value1` - int')) + self.assertEquals(o[3].strip(), six.b('`c3` - int')) self.cleanup(tmpfile) @@ -1493,7 +1493,7 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals( - e[0], 'Strict mode. Header row contains less columns than expected column count(2 vs 3)') + e[0], six.b('Strict mode. Header row contains less columns than expected column count(2 vs 3)')) self.cleanup(tmpfile) @@ -1506,9 +1506,9 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) - self.assertEquals(o[0], 'a;1;0') - self.assertEquals(o[1], 'b;2;0') - self.assertEquals(o[2], 'c;;0') + self.assertEquals(o[0], six.b('a;1;0')) + self.assertEquals(o[1], six.b('b;2;0')) + self.assertEquals(o[2], six.b('c;;0')) self.cleanup(tmpfile) @@ -1521,7 +1521,7 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) - self.assertEquals(o[0], '1.5') + self.assertEquals(o[0], six.b('1.5')) self.cleanup(tmpfile) @@ -1534,7 +1534,7 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) - self.assertEquals(o[0], 'c,,0') + self.assertEquals(o[0], six.b('c,,0')) self.cleanup(tmpfile) @@ -1549,7 +1549,7 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) - self.assertEquals(o[0], 'c,,0') + self.assertEquals(o[0], six.b('c,,0')) self.cleanup(tmpfile) @@ -1566,14 +1566,14 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(column_rows), 11) - column_tuples = [x.strip().split(" ") for x in column_rows] + column_tuples = [x.strip().split(six.b(" ")) for x in column_rows] column_info = [(x[0], x[2]) for x in column_tuples] column_names = [x[0] for x in column_tuples] column_types = [x[2] for x in column_tuples] - self.assertEquals(column_names, ['`c%s`' % x for x in range(1, 12)]) - self.assertEquals(column_types, [ - 'text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text', 'text', 'text']) + self.assertEquals(column_names, [six.b('`c{}`'.format(x)) for x in range(1, 12)]) + self.assertEquals(column_types, list(map(lambda x:six.b(x),[ + 'text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text', 'text', 'text']))) self.cleanup(tmpfile) @@ -1590,14 +1590,14 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(column_rows), 9) - column_tuples = [x.strip().split(" ") for x in column_rows] + column_tuples = [x.strip().split(six.b(" ")) for x in column_rows] column_info = [(x[0], x[2]) for x in column_tuples] column_names = [x[0] for x in column_tuples] column_types = [x[2] for x in column_tuples] - self.assertEquals(column_names, ['`c%s`' % x for x in range(1, 10)]) + self.assertEquals(column_names, [six.b('`c{}`'.format(x)) for x in range(1, 10)]) self.assertEquals( - column_types, ['text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text']) + column_types, list(map(lambda x:six.b(x),['text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text']))) self.cleanup(tmpfile) @@ -1610,8 +1610,8 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(o), 9) self.assertEquals(len(e), 0) - expected_output = ["/selinux", "/mnt", "/srv", "/lost+found", '"/initrd.img.old -> /boot/initrd.img-3.8.0-19-generic"', - "/cdrom", "/home", '"/vmlinuz -> boot/vmlinuz-3.8.0-19-generic"', '"/initrd.img -> boot/initrd.img-3.8.0-19-generic"'] + expected_output = list(map(lambda x:six.b(x),["/selinux", "/mnt", "/srv", "/lost+found", '"/initrd.img.old -> /boot/initrd.img-3.8.0-19-generic"', + "/cdrom", "/home", '"/vmlinuz -> boot/vmlinuz-3.8.0-19-generic"', '"/initrd.img -> boot/initrd.img-3.8.0-19-generic"'])) self.assertEquals(o, expected_output) @@ -1626,9 +1626,9 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o),2) - self.assertEquals(e[0],"Warning: column count is one - did you provide the correct delimiter?") - self.assertEquals(o[0],'data without commas 1') - self.assertEquals(o[1],'data without commas 2') + self.assertEquals(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) + self.assertEquals(o[0],six.b('data without commas 1')) + self.assertEquals(o[1],six.b('data without commas 2')) self.cleanup(tmpfile) @@ -1641,9 +1641,9 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o),2) - self.assertEquals(e[0],"Warning: column count is one - did you provide the correct delimiter?") - self.assertEquals(o[0],'data without commas 1') - self.assertEquals(o[1],'data without commas 2') + self.assertEquals(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) + self.assertEquals(o[0],six.b('data without commas 1')) + self.assertEquals(o[1],six.b('data without commas 2')) self.cleanup(tmpfile) @@ -1657,8 +1657,8 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'data without commas 1') - self.assertEquals(o[1],'data without commas 2') + self.assertEquals(o[0],six.b('data without commas 1')) + self.assertEquals(o[1],six.b('data without commas 2')) self.cleanup(tmpfile) @@ -1671,8 +1671,8 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o),2) - self.assertEquals(o[0],'data without commas 1') - self.assertEquals(o[1],'data without commas 2') + self.assertEquals(o[0],six.b('data without commas 1')) + self.assertEquals(o[1],six.b('data without commas 2')) self.cleanup(tmpfile) @@ -1685,18 +1685,18 @@ class ParsingModeTests(AbstractQTestCase): self.assertEquals(len(o), 9) self.assertEquals(len(e), 0) - expected_output = ["/selinux", "/mnt", "/srv", "/lost+found", - "/initrd.img.old", "/cdrom", "/home", "/vmlinuz", "/initrd.img"] + expected_output = list(map(lambda x:six.b(x),["/selinux", "/mnt", "/srv", "/lost+found", + "/initrd.img.old", "/cdrom", "/home", "/vmlinuz", "/initrd.img"])) self.assertEquals(o, expected_output) self.cleanup(tmpfile) def test_fluffy_mode_column_count_mismatch(self): - data_row = "column1 column2 column3 column4" + data_row = six.b("column1 column2 column3 column4") data_list = [data_row] * 1000 - data_list[950] = "column1 column2 column3 column4 column5" - tmpfile = self.create_file_with_data("\n".join(data_list)) + data_list[950] = six.b("column1 column2 column3 column4 column5") + tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) cmd = '../bin/q -m fluffy "select * from %s"' % tmpfile.name retcode, o, e = run_command(cmd) @@ -1704,16 +1704,16 @@ class ParsingModeTests(AbstractQTestCase): self.assertNotEquals(retcode,0) self.assertEquals(len(o),0) self.assertEquals(len(e),1) - self.assertTrue(e[0].startswith("Deprecated fluffy mode")) - self.assertTrue(' row 951 ' in e[0]) + self.assertTrue(e[0].startswith(six.b("Deprecated fluffy mode"))) + self.assertTrue(six.b(' row 951 ') in e[0]) self.cleanup(tmpfile) def test_strict_mode_column_count_mismatch__less_columns(self): - data_row = "column1 column2 column3 column4" + data_row = six.b("column1 column2 column3 column4") data_list = [data_row] * 1000 - data_list[750] = "column1 column3 column4" - tmpfile = self.create_file_with_data("\n".join(data_list)) + data_list[750] = six.b("column1 column3 column4") + tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) cmd = '../bin/q -m strict "select * from %s"' % tmpfile.name retcode, o, e = run_command(cmd) @@ -1721,16 +1721,16 @@ class ParsingModeTests(AbstractQTestCase): self.assertNotEquals(retcode,0) self.assertEquals(len(o),0) self.assertEquals(len(e),1) - self.assertTrue(e[0].startswith("Strict mode - Expected 4 columns instead of 3 columns")) - self.assertTrue(' row 751.' in e[0]) + self.assertTrue(e[0].startswith(six.b("Strict mode - Expected 4 columns instead of 3 columns"))) + self.assertTrue(six.b(' row 751.') in e[0]) self.cleanup(tmpfile) def test_strict_mode_column_count_mismatch__more_columns(self): - data_row = "column1 column2 column3 column4" + data_row = six.b("column1 column2 column3 column4") data_list = [data_row] * 1000 - data_list[750] = "column1 column2 column3 column4 column5" - tmpfile = self.create_file_with_data("\n".join(data_list)) + data_list[750] = six.b("column1 column2 column3 column4 column5") + tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) cmd = '../bin/q -m strict "select * from %s"' % tmpfile.name retcode, o, e = run_command(cmd) @@ -1738,8 +1738,8 @@ class ParsingModeTests(AbstractQTestCase): self.assertNotEquals(retcode,0) self.assertEquals(len(o),0) self.assertEquals(len(e),1) - self.assertTrue(e[0].startswith("Strict mode - Expected 4 columns instead of 5 columns")) - self.assertTrue(' row 751.' in e[0]) + self.assertTrue(e[0].startswith(six.b("Strict mode - Expected 4 columns instead of 5 columns"))) + self.assertTrue(six.b(' row 751.') in e[0]) self.cleanup(tmpfile) @@ -1758,7 +1758,7 @@ class FormattingTests(AbstractQTestCase): self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) - self.assertEquals(o[0], '55.000 5.500') + self.assertEquals(o[0], six.b('55.000 5.500')) def test_column_formatting_with_output_header(self): perl_regex = "'s/1\n/column_name\n1\n/;'" @@ -1791,14 +1791,17 @@ class FormattingTests(AbstractQTestCase): self.cleanup(tmp_data_file) def test_universal_newlines_parsing_flag(self): - expected_output = [['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-May-07', '6850000', 'USD', 'b'], + def list_as_byte_list(l): + return list(map(lambda x:six.b(x),l)) + + expected_output = list(map(lambda x:list_as_byte_list(x),[['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-May-07', '6850000', 'USD', 'b'], ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Oct-06', '6000000', 'USD', 'a'], ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Jan-08', '25000000', 'USD', 'c'], ['mycityfaces', 'MyCityFaces', '7', 'web', 'Scottsdale', 'AZ', '1-Jan-08', '50000', 'USD', 'seed'], ['flypaper', 'Flypaper', '', 'web', 'Phoenix', 'AZ', '1-Feb-08', '3000000', 'USD', 'a'], - ['infusionsoft', 'Infusionsoft', '105', 'software', 'Gilbert', 'AZ', '1-Oct-07', '9000000', 'USD', 'a']] + ['infusionsoft', 'Infusionsoft', '105', 'software', 'Gilbert', 'AZ', '1-Oct-07', '9000000', 'USD', 'a']])) - data = 'permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a' + data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') tmp_data_file = self.create_file_with_data(data) cmd = '../bin/q -d , -H -U "select permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round from %s"' % tmp_data_file.name From c38d4e009597bb1985d4b518c69bec3d6c2410c0 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Dec 2018 13:59:28 +0200 Subject: [PATCH 013/111] fixed pip command line --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b6c29f1a..01940dd3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,6 @@ matrix: allow_failures: - python: "3.6" - python: "3.7" -install: pip install -r requirements +install: pip install -r requirements.txt before_script: flake8 ./bin/q --count --select=E901,E999,F821,F822,F823 --show-source --statistics script: test/test-all From 69d140fde47855cda59e05aaba150d813b9e9a27 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Dec 2018 14:17:40 +0200 Subject: [PATCH 014/111] python 3.7 support for travis --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 01940dd3..20d00a8a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,9 @@ python: - "3.6" - "3.7" matrix: + include: + - python: "3.7" + dist: xenial # Need for python 3.7 allow_failures: - python: "3.6" - python: "3.7" From 820e06f79fb1e46b70eefba65bbfe1b18cca0223 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Dec 2018 14:21:36 +0200 Subject: [PATCH 015/111] remove old non-working 3.7 definition for travis --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 20d00a8a..ae4f1dfd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,6 @@ language: python python: - "2.7" - "3.6" - - "3.7" matrix: include: - python: "3.7" From f83f681fafb293fc6867ed93a09cad54b5a5ebf7 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Mon, 10 Dec 2018 17:40:56 +0200 Subject: [PATCH 016/111] Fixed all tests except universal newlines in py3 --- bin/q | 16 +++++++++++----- test/test-suite | 8 ++++---- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/bin/q b/bin/q index d9cc1267..a0cd3f45 100755 --- a/bin/q +++ b/bin/q @@ -27,7 +27,10 @@ # # Run with --help for command line details # +from __future__ import absolute_import +from __future__ import division from __future__ import print_function + q_version = "1.8" __all__ = [ 'QTextAsData' ] @@ -778,7 +781,7 @@ class MaterializedFileState(object): if self.encoding == 'utf-8-sig' and self.lines_read == 0 and not self.skipped_bom: try: BOM = self.f.read(3) - if BOM != '\xef\xbb\xbf': + if BOM != six.b('\xef\xbb\xbf'): raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM)) except Exception as e: raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e)) @@ -1467,14 +1470,14 @@ def quote_minimal_func(output_delimiter,v): t = type(v) if (t == str or t == unicode) and ((output_delimiter in v) or (six.u('"') in v)): return six.u('"{}"').format(escape_double_quotes_if_needed(v)) - return v; + return v def quote_nonnumeric_func(output_delimiter,v): if v is None: return v if type(v) == str or type(v) == unicode: return six.u('"{}"').format(escape_double_quotes_if_needed(v)) - return v; + return v def quote_all_func(output_delimiter,v): if type(v) == str or type(v) == unicode: @@ -1575,7 +1578,7 @@ class QOutputPrinter(object): formatting_dict = dict( [(x.split("=")[0], x.split("=")[1]) for x in self.output_params.formatting.split(",")]) else: - formatting_dict = None + formatting_dict = {} try: if self.output_params.output_header and results.metadata.output_column_name_list is not None: @@ -1584,7 +1587,7 @@ class QOutputPrinter(object): row_str = [] skip_formatting = rownum == 0 and self.output_params.output_header for i, col in enumerate(row): - if formatting_dict is not None and str(i + 1) in formatting_dict.keys() and not skip_formatting: + if str(i + 1) in formatting_dict.keys() and not skip_formatting: fmt_str = formatting_dict[str(i + 1)] else: if self.output_params.beautify: @@ -1593,6 +1596,9 @@ class QOutputPrinter(object): fmt_str = six.u("{}") if col is not None: + # Hack for python2 - The defaulting rendering of a float to string is losing precision. This hack works around it by using repr() + if six.PY2 and isinstance(col, float) and str(i+1) not in formatting_dict: + col = repr(col) xx = self.output_field_quoting_func(self.output_params.delimiter,col) row_str.append(fmt_str.format(xx)) else: diff --git a/test/test-suite b/test/test-suite index 3ef9ade3..49f28829 100755 --- a/test/test-suite +++ b/test/test-suite @@ -717,7 +717,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 0) self.assertEquals(len(o), 1) - self.assertEquals(o[0],'7.66666666667') + self.assertEquals(o[0],six.b('7.666666666666667')) def test_use_query_file(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) @@ -1829,9 +1829,9 @@ class SqlTests(AbstractQTestCase): self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) - self.assertEquals(o[0], 'mapred mapred 0.93895816803') - self.assertEquals(o[1], 'root root 0.02734375') - self.assertEquals(o[2], 'harel harel 0.0108880996704') + self.assertEquals(o[0], six.b('mapred mapred 0.9389581680297852')) + self.assertEquals(o[1], six.b('root root 0.02734375')) + self.assertEquals(o[2], six.b('harel harel 0.010888099670410156')) self.cleanup(tmpfile) From 1319aeb6201c1a11bc4cfa369843aa2464ef8bcf Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Mon, 10 Dec 2018 18:43:49 +0200 Subject: [PATCH 017/111] some fixes after review --- .travis.yml | 2 +- bin/q | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index ae4f1dfd..402d9e12 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,5 +10,5 @@ matrix: - python: "3.6" - python: "3.7" install: pip install -r requirements.txt -before_script: flake8 ./bin/q --count --select=E901,E999,F821,F822,F823 --show-source --statistics +before_script: flake8 ./bin/q ./test/test-suite --count --select=E901,E999,F821,F822,F823 --show-source --statistics script: test/test-all diff --git a/bin/q b/bin/q index a0cd3f45..4a6c49b0 100755 --- a/bin/q +++ b/bin/q @@ -1106,7 +1106,8 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter): max_lengths = [0 for x in range(0, len(m[0]))] for row_index in range(0, len(m)): for col_index in range(0, len(m[0])): - new_len = len(unicode(output_field_quoting_func(output_delimiter,m[row_index][col_index]))) + # TODO Optimize this and make sure that py2 hack of float precision is applied here as well + new_len = len("{}".format(output_field_quoting_func(output_delimiter,m[row_index][col_index]))) if new_len > max_lengths[col_index]: max_lengths[col_index] = new_len return max_lengths @@ -1591,7 +1592,7 @@ class QOutputPrinter(object): fmt_str = formatting_dict[str(i + 1)] else: if self.output_params.beautify: - fmt_str = six.u("{{0:<{}}}") % max_lengths[i] + fmt_str = six.u("{{0:<{}}}").format(max_lengths[i]) else: fmt_str = six.u("{}") From a1de801ce60a5f9cfb83d7e5aedd018418eb92e6 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Tue, 11 Dec 2018 16:02:48 +0200 Subject: [PATCH 018/111] remove deprecation warnings --- test/test-suite | 1230 +++++++++++++++++++++++------------------------ 1 file changed, 615 insertions(+), 615 deletions(-) diff --git a/test/test-suite b/test/test-suite index 49f28829..42c7a4cf 100755 --- a/test/test-suite +++ b/test/test-suite @@ -242,7 +242,7 @@ class BasicTests(AbstractQTestCase): self.assertTrue(len(o) == 0) self.assertTrue(len(e) == 1) - self.assertEquals(e[0],six.b('Cannot decompress standard input. Pipe the input through zcat in order to decompress.')) + self.assertEqual(e[0],six.b('Cannot decompress standard input. Pipe the input through zcat in order to decompress.')) self.cleanup(tmpfile) @@ -253,8 +253,8 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 3) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 3) self.assertTrue(e[0].startswith( six.b("Warning: column count is one - did you provide the correct delimiter"))) @@ -269,11 +269,11 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 1) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) - self.assertEquals(o[0],six.b("1")) + self.assertEqual(o[0],six.b("1")) self.cleanup(tmpfile) @@ -283,11 +283,11 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 1) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) - self.assertEquals(o[0],six.b("2")) + self.assertEqual(o[0],six.b("2")) self.cleanup(tmpfile) @@ -297,11 +297,11 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) - self.assertEquals(six.b(" ").join(o), six.b('a b c')) + self.assertEqual(six.b(" ").join(o), six.b('a b c')) self.cleanup(tmpfile) @@ -311,12 +311,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -t "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) - self.assertEquals(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) - self.assertEquals(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) self.cleanup(tmpfile) @@ -326,12 +326,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -t -d , "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) - self.assertEquals(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) - self.assertEquals(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) self.cleanup(tmpfile) @@ -340,13 +340,13 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -D "|" "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) - self.assertEquals(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) - self.assertEquals(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) self.cleanup(tmpfile) @@ -355,13 +355,13 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -T "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) - self.assertEquals(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) - self.assertEquals(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) self.cleanup(tmpfile) @@ -370,13 +370,13 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -T -D "|" "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) - self.assertEquals(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) - self.assertEquals(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) self.cleanup(tmpfile) @@ -384,26 +384,26 @@ class BasicTests(AbstractQTestCase): cmd = six.b('printf "%s" | ../bin/q -d , "select c1,c2,c3 from -"') % sample_data_no_header retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0]) - self.assertEquals(o[1], sample_data_rows[1]) - self.assertEquals(o[2], sample_data_rows[2]) + self.assertEqual(o[0], sample_data_rows[0]) + self.assertEqual(o[1], sample_data_rows[1]) + self.assertEqual(o[2], sample_data_rows[2]) def test_column_separation(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], sample_data_rows[0]) - self.assertEquals(o[1], sample_data_rows[1]) - self.assertEquals(o[2], sample_data_rows[2]) + self.assertEqual(o[0], sample_data_rows[0]) + self.assertEqual(o[1], sample_data_rows[1]) + self.assertEqual(o[2], sample_data_rows[2]) self.cleanup(tmpfile) @@ -413,11 +413,11 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEquals(o[1].strip(), six.b('`c1` - text')) - self.assertEquals(o[2].strip(), six.b('`c2` - int')) - self.assertEquals(o[3].strip(), six.b('`c3` - int')) + self.assertEqual(retcode, 0) + self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEqual(o[1].strip(), six.b('`c1` - text')) + self.assertEqual(o[2].strip(), six.b('`c2` - int')) + self.assertEqual(o[3].strip(), six.b('`c3` - int')) self.cleanup(tmpfile) @@ -427,11 +427,11 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEquals(o[1].strip(), six.b('`c1` - text')) - self.assertEquals(o[2].strip(), six.b('`c2` - int')) - self.assertEquals(o[3].strip(), six.b('`c3` - int')) + self.assertEqual(retcode, 0) + self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEqual(o[1].strip(), six.b('`c1` - text')) + self.assertEqual(o[2].strip(), six.b('`c2` - int')) + self.assertEqual(o[3].strip(), six.b('`c3` - int')) def test_header_exception_on_numeric_header_data(self): tmpfile = self.create_file_with_data(sample_data_no_header) @@ -439,8 +439,8 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 3) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 3) self.assertTrue( six.b('Bad header row: Header must contain only strings') in e[0]) self.assertTrue(six.b("Column name must be a string") in e[1]) @@ -454,13 +454,13 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) - self.assertEquals(len(o),4) - self.assertEquals(len(e),2) - self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEquals(o[1].strip(), six.b('`name` - text')) - self.assertEquals(o[2].strip(), six.b('`value1` - int')) - self.assertEquals(o[3].strip(), six.b('`value2` - int')) - self.assertEquals(e[0].strip(),six.b('query error: no such column: c1')) + self.assertEqual(len(o),4) + self.assertEqual(len(e),2) + self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEqual(o[1].strip(), six.b('`name` - text')) + self.assertEqual(o[2].strip(), six.b('`value1` - int')) + self.assertEqual(o[3].strip(), six.b('`value2` - int')) + self.assertEqual(e[0].strip(),six.b('query error: no such column: c1')) self.assertTrue(e[1].startswith(six.b('Warning - There seems to be a '))) self.cleanup(tmpfile) @@ -470,9 +470,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select name from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(six.b(" ").join(o), six.b("a b c")) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(six.b(" ").join(o), six.b("a b c")) self.cleanup(tmpfile) @@ -481,12 +481,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select name from %s" -H -O' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 4) - self.assertEquals(o[0],six.b('name')) - self.assertEquals(o[1],six.b('a')) - self.assertEquals(o[2],six.b('b')) - self.assertEquals(o[3],six.b('c')) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 4) + self.assertEqual(o[0],six.b('name')) + self.assertEqual(o[1],six.b('a')) + self.assertEqual(o[2],six.b('b')) + self.assertEqual(o[3],six.b('c')) self.cleanup(tmpfile) @@ -497,10 +497,10 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 2) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 2) self.assertTrue(six.b('no such column: c3') in e[0]) - self.assertEquals( + self.assertEqual( e[1], six.b('Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names')) self.cleanup(tmpfile) @@ -510,16 +510,16 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 4) - self.assertEquals(len(e), 1) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 4) + self.assertEqual(len(e), 1) - self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEquals(o[1].strip(), six.b('`c1` - text')) - self.assertEquals(o[2].strip(), six.b('`c2` - text')) - self.assertEquals(o[3].strip(), six.b('`c3` - text')) + self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEqual(o[1].strip(), six.b('`c1` - text')) + self.assertEqual(o[2].strip(), six.b('`c2` - text')) + self.assertEqual(o[3].strip(), six.b('`c3` - text')) - self.assertEquals( + self.assertEqual( e[0], six.b('Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data')) self.cleanup(tmpfile) @@ -529,9 +529,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 1) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) self.assertTrue(six.b('Warning - data is empty') in e[0]) @@ -543,8 +543,8 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 1) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) m = six.b("Header line is expected but missing in file %s" % tmpfile.name) self.assertTrue(m in e[0]) @@ -556,11 +556,11 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c2 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 1) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], six.b('value1')) + self.assertEqual(o[0], six.b('value1')) self.cleanup(tmpfile) @@ -569,9 +569,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c2 from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 1) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) self.assertTrue(six.b('Warning - data is empty') in e[0]) @@ -582,13 +582,13 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 3) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) - self.assertEquals(o[0], six.b('a')) - self.assertEquals(o[1], six.b('b')) - self.assertEquals(o[2], six.b('c')) + self.assertEqual(o[0], six.b('a')) + self.assertEqual(o[1], six.b('b')) + self.assertEqual(o[2], six.b('c')) self.cleanup(tmpfile) @@ -597,13 +597,13 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c1 from %s" -k' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 3) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) - self.assertEquals(o[0], six.b('a')) - self.assertEquals(o[1], six.b(' b')) - self.assertEquals(o[2], six.b('c')) + self.assertEqual(o[0], six.b('a')) + self.assertEqual(o[1], six.b(' b')) + self.assertEqual(o[2], six.b('c')) self.cleanup(tmpfile) @@ -612,14 +612,14 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c2 from %s" -k -A' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 4) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 4) - self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEquals(o[1].strip(), six.b('`c1` - text')) - self.assertEquals(o[2].strip(), six.b('`c2` - int')) - self.assertEquals(o[3].strip(), six.b('`c3` - int')) + self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEqual(o[1].strip(), six.b('`c1` - text')) + self.assertEqual(o[2].strip(), six.b('`c2` - int')) + self.assertEqual(o[3].strip(), six.b('`c3` - int')) self.cleanup(tmpfile) @@ -629,13 +629,13 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 3) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) - self.assertEquals(o[0], six.b('a,1')) - self.assertEquals(o[1], six.b('b,2')) - self.assertEquals(o[2], six.b('c,')) + self.assertEqual(o[0], six.b('a,1')) + self.assertEqual(o[1], six.b('b,2')) + self.assertEqual(o[2], six.b('c,')) self.cleanup(tmpfile) @@ -645,14 +645,14 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H -A' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 4) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 4) - self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEquals(o[1].strip(), six.b('`name` - text')) - self.assertEquals(o[2].strip(), six.b('`value 1` - int')) - self.assertEquals(o[3].strip(), six.b('`value2` - int')) + self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEqual(o[1].strip(), six.b('`name` - text')) + self.assertEqual(o[2].strip(), six.b('`value 1` - int')) + self.assertEqual(o[3].strip(), six.b('`value2` - int')) self.cleanup(tmpfile) @@ -660,64 +660,64 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , ""' retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 1) - self.assertEquals(len(e), 1) - self.assertEquals(len(o), 0) + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) - self.assertEquals(e[0],six.b('Query cannot be empty (query number 1)')) + self.assertEqual(e[0],six.b('Query cannot be empty (query number 1)')) def test_empty_query_in_command_line(self): cmd = '../bin/q -d , " "' retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 1) - self.assertEquals(len(e), 1) - self.assertEquals(len(o), 0) + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) - self.assertEquals(e[0],six.b('Query cannot be empty (query number 1)')) + self.assertEqual(e[0],six.b('Query cannot be empty (query number 1)')) def test_failure_in_query_stops_processing_queries(self): cmd = '../bin/q -d , "select 500" "select 300" "wrong-query" "select 8000"' retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 1) - self.assertEquals(len(e), 1) - self.assertEquals(len(o), 2) - self.assertEquals(o[0],six.b('500')) - self.assertEquals(o[1],six.b('300')) + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 2) + self.assertEqual(o[0],six.b('500')) + self.assertEqual(o[1],six.b('300')) def test_multiple_queries_in_command_line(self): cmd = '../bin/q -d , "select 500" "select 300+100" "select 300" "select 200"' retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 4) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 4) - self.assertEquals(o[0],six.b('500')) - self.assertEquals(o[1],six.b('400')) - self.assertEquals(o[2],six.b('300')) - self.assertEquals(o[3],six.b('200')) + self.assertEqual(o[0],six.b('500')) + self.assertEqual(o[1],six.b('400')) + self.assertEqual(o[2],six.b('300')) + self.assertEqual(o[3],six.b('200')) def test_literal_calculation_query(self): cmd = '../bin/q -d , "select 1+40/6"' retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 1) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 1) - self.assertEquals(o[0],six.b('7')) + self.assertEqual(o[0],six.b('7')) def test_literal_calculation_query_float_result(self): cmd = '../bin/q -d , "select 1+40/6.0"' retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 1) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 1) - self.assertEquals(o[0],six.b('7.666666666666667')) + self.assertEqual(o[0],six.b('7.666666666666667')) def test_use_query_file(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) @@ -726,13 +726,13 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 3) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) - self.assertEquals(o[0], six.b('a')) - self.assertEquals(o[1], six.b('b')) - self.assertEquals(o[2], six.b('c')) + self.assertEqual(o[0], six.b('a')) + self.assertEqual(o[1], six.b('b')) + self.assertEqual(o[2], six.b('c')) self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) @@ -744,9 +744,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -q %s -H -Q ascii' % tmp_query_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,3) - self.assertEquals(len(o),0) - self.assertEquals(len(e),1) + self.assertEqual(retcode,3) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) self.assertTrue(e[0].startswith(six.b('Could not decode query number 1 using the provided query encoding (ascii)'))) @@ -760,14 +760,14 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -q %s -H -Q utf-8 -O' % tmp_query_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(o),4) - self.assertEquals(len(e),0) + self.assertEqual(retcode,0) + self.assertEqual(len(o),4) + self.assertEqual(len(e),0) - self.assertEquals(o[0].decode(SYSTEM_ENCODING), u'name,Hr\xe1\u010d') - self.assertEquals(o[1].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d') - self.assertEquals(o[2].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d') - self.assertEquals(o[3].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d') + self.assertEqual(o[0].decode(SYSTEM_ENCODING), u'name,Hr\xe1\u010d') + self.assertEqual(o[1].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d') + self.assertEqual(o[2].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d') + self.assertEqual(o[3].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d') self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) @@ -779,13 +779,13 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -q %s -H -Q utf-8' % tmp_query_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 3) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) - self.assertEquals(o[0].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d') - self.assertEquals(o[1].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d') - self.assertEquals(o[2].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d') + self.assertEqual(o[0].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d') + self.assertEqual(o[1].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d') + self.assertEqual(o[2].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d') self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) @@ -797,9 +797,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -q %s -H "select * from ppp"' % tmp_query_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 1) - self.assertEquals(len(e), 1) - self.assertEquals(len(o), 0) + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) self.assertTrue(e[0].startswith(six.b("Can't provide both a query file and a query on the command line"))) @@ -814,13 +814,13 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding) retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 3) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) - self.assertEquals(o[0].decode(target_encoding), u'Hr\xe1\u010d') - self.assertEquals(o[1].decode(target_encoding), u'Hr\xe1\u010d') - self.assertEquals(o[2].decode(target_encoding), u'Hr\xe1\u010d') + self.assertEqual(o[0].decode(target_encoding), u'Hr\xe1\u010d') + self.assertEqual(o[1].decode(target_encoding), u'Hr\xe1\u010d') + self.assertEqual(o[2].decode(target_encoding), u'Hr\xe1\u010d') self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) @@ -832,9 +832,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 3) - self.assertEquals(len(e), 1) - self.assertEquals(len(o), 0) + self.assertEqual(retcode, 3) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) self.assertTrue(e[0].startswith(six.b('Cannot encode data'))) @@ -848,9 +848,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 1) - self.assertEquals(len(e), 1) - self.assertEquals(len(o), 0) + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) self.assertTrue(e[0].startswith(six.b("Query cannot be empty"))) @@ -860,9 +860,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -q non-existent-query-file -H' retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 1) - self.assertEquals(len(e), 1) - self.assertEquals(len(o), 0) + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) self.assertTrue(e[0].startswith(six.b("Could not read query from file"))) @@ -873,9 +873,9 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),4) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) self.assertTrue(o[0],'non_quoted') self.assertTrue(o[1],'control-value-1') @@ -890,9 +890,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " "select c2 from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),4) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) self.assertTrue(o[0],'regular_double_quoted') self.assertTrue(o[1],'control-value-2') @@ -907,9 +907,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " "select c3 from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),4) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) self.assertTrue(o[0],'double_double_quoted') self.assertTrue(o[1],'control-value-3') @@ -924,9 +924,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " "select c4 from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),4) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) self.assertTrue(o[0],'escaped_double_quoted') self.assertTrue(o[1],'control-value-4') @@ -941,12 +941,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " -m relaxed -D , -w none -W none "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('"quoted,data",23')) - self.assertEquals(o[1],six.b('unquoted-data,54,')) + self.assertEqual(o[0],six.b('"quoted,data",23')) + self.assertEqual(o[1],six.b('unquoted-data,54,')) self.cleanup(tmp_data_file) @@ -957,8 +957,8 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode,0) - self.assertEquals(len(e),1) - self.assertEquals(len(o),0) + self.assertEqual(len(e),1) + self.assertEqual(len(o),0) self.assertTrue(e[0].startswith(six.b('Strict mode. Column Count is expected to identical'))) @@ -970,12 +970,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " -D , -w minimal "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('quoted data,23')) - self.assertEquals(o[1],six.b('unquoted-data,54')) + self.assertEqual(o[0],six.b('quoted data,23')) + self.assertEqual(o[1],six.b('unquoted-data,54')) self.cleanup(tmp_data_file) @@ -985,12 +985,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " -D , -w all "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('quoted data,23')) - self.assertEquals(o[1],six.b('unquoted-data,54')) + self.assertEqual(o[0],six.b('quoted data,23')) + self.assertEqual(o[1],six.b('unquoted-data,54')) self.cleanup(tmp_data_file) @@ -1001,8 +1001,8 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode,0) - self.assertEquals(len(e),1) - self.assertEquals(len(o),0) + self.assertEqual(len(e),1) + self.assertEqual(len(o),0) self.assertTrue(e[0].startswith(six.b('Input quoting mode can only be one of all,minimal,none'))) self.assertTrue(six.b('unknown_wrapping_mode') in e[0]) @@ -1015,12 +1015,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " -D , -w all -W none "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('quoted data,23')) - self.assertEquals(o[1],six.b('unquoted-data,54')) + self.assertEqual(o[0],six.b('quoted data,23')) + self.assertEqual(o[1],six.b('unquoted-data,54')) self.cleanup(tmp_data_file) @@ -1030,12 +1030,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " -D , -w all -W minimal "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('quoted data,23')) - self.assertEquals(o[1],six.b('unquoted-data,54')) + self.assertEqual(o[0],six.b('quoted data,23')) + self.assertEqual(o[1],six.b('unquoted-data,54')) self.cleanup(tmp_data_file) @@ -1046,12 +1046,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " -D " " -w all -W minimal "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('"quoted data" 23')) - self.assertEquals(o[1],six.b('unquoted-data 54')) + self.assertEqual(o[0],six.b('"quoted data" 23')) + self.assertEqual(o[1],six.b('unquoted-data 54')) self.cleanup(tmp_data_file) @@ -1061,12 +1061,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " -D , -w all -W nonnumeric "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('"quoted data",23')) - self.assertEquals(o[1],six.b('"unquoted-data",54')) + self.assertEqual(o[0],six.b('"quoted data",23')) + self.assertEqual(o[1],six.b('"unquoted-data",54')) self.cleanup(tmp_data_file) @@ -1076,12 +1076,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " -D , -w all -W all "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('"quoted data","23"')) - self.assertEquals(o[1],six.b('"unquoted-data","54"')) + self.assertEqual(o[0],six.b('"quoted data","23"')) + self.assertEqual(o[1],six.b('"unquoted-data","54"')) self.cleanup(tmp_data_file) @@ -1094,11 +1094,11 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(chained_cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(six.b("\n").join(o),input_data) + self.assertEqual(six.b("\n").join(o),input_data) self.cleanup(tmp_data_file) @@ -1117,13 +1117,13 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , -H -O -e utf-8-sig "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),3) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),3) - self.assertEquals(o[0],six.b('typeid,limit,apcost,date,checkpointId')) - self.assertEquals(o[1],six.b('1,2,5,"1,2,3,4,5,6,7","3000,3001,3002"')) - self.assertEquals(o[2],six.b('2,2,5,"1,2,3,4,5,6,7","3003,3004,3005"')) + self.assertEqual(o[0],six.b('typeid,limit,apcost,date,checkpointId')) + self.assertEqual(o[1],six.b('1,2,5,"1,2,3,4,5,6,7","3000,3001,3002"')) + self.assertEqual(o[2],six.b('2,2,5,"1,2,3,4,5,6,7","3003,3004,3005"')) self.cleanup(tmp_data_file) @@ -1136,26 +1136,26 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),1) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),1) - self.assertEquals(o[0].decode('utf-8'),u'111,22.22,testing text with special characters - citt\xe0 ,http://somekindofurl.com,12.13.14.15,12.1') + self.assertEqual(o[0].decode('utf-8'),u'111,22.22,testing text with special characters - citt\xe0 ,http://somekindofurl.com,12.13.14.15,12.1') cmd = '../bin/q -d , "select * from %s" -A' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),7) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),7) self.assertTrue(o[0].startswith(six.b('Table for file'))) - self.assertEquals(o[1].strip(),six.b('`c1` - int')) - self.assertEquals(o[2].strip(),six.b('`c2` - float')) - self.assertEquals(o[3].strip(),six.b('`c3` - text')) - self.assertEquals(o[4].strip(),six.b('`c4` - text')) - self.assertEquals(o[5].strip(),six.b('`c5` - text')) - self.assertEquals(o[6].strip(),six.b('`c6` - float')) + self.assertEqual(o[1].strip(),six.b('`c1` - int')) + self.assertEqual(o[2].strip(),six.b('`c2` - float')) + self.assertEqual(o[3].strip(),six.b('`c3` - text')) + self.assertEqual(o[4].strip(),six.b('`c4` - text')) + self.assertEqual(o[5].strip(),six.b('`c5` - text')) + self.assertEqual(o[6].strip(),six.b('`c6` - float')) self.cleanup(tmp_data_file) @@ -1166,9 +1166,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " "select replace(c5,X\'0A\',\'::\') from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),4) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) self.assertTrue(o[0],six.b('multiline_double_double_quoted')) self.assertTrue(o[1],six.b('control-value-5')) @@ -1184,9 +1184,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " "select replace(c6,X\'0A\',\'::\') from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),4) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) self.assertTrue(o[0],'multiline_escaped_double_quoted') self.assertTrue(o[1],'control-value-6') @@ -1204,32 +1204,32 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " --disable-double-double-quoting "select c2 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('double_double_quoted')) - self.assertEquals(o[1],six.b('this is a quoted value with "double')) + self.assertEqual(o[0],six.b('double_double_quoted')) + self.assertEqual(o[1],six.b('this is a quoted value with "double')) cmd = '../bin/q -d " " --disable-double-double-quoting "select c3 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('')) - self.assertEquals(o[1],six.b('double')) + self.assertEqual(o[0],six.b('')) + self.assertEqual(o[1],six.b('double')) cmd = '../bin/q -d " " --disable-double-double-quoting "select c4 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('')) - self.assertEquals(o[1],six.b('quotes"""')) + self.assertEqual(o[0],six.b('')) + self.assertEqual(o[1],six.b('quotes"""')) self.cleanup(tmp_data_file) @@ -1242,32 +1242,32 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c2 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('escaped_double_quoted')) - self.assertEquals(o[1],six.b('this is a quoted value with \\escaped')) + self.assertEqual(o[0],six.b('escaped_double_quoted')) + self.assertEqual(o[1],six.b('this is a quoted value with \\escaped')) cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c3 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('')) - self.assertEquals(o[1],six.b('double')) + self.assertEqual(o[0],six.b('')) + self.assertEqual(o[1],six.b('double')) cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c4 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) - self.assertEquals(len(o),2) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('')) - self.assertEquals(o[1],six.b('quotes\\""')) + self.assertEqual(o[0],six.b('')) + self.assertEqual(o[1],six.b('quotes\\""')) self.cleanup(tmp_data_file) @@ -1279,38 +1279,38 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " --disable-double-double-quoting --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) o = o[1:] # remove the first "Table for file..." line in the output - self.assertEquals(len(o),7) # found 7 fields + self.assertEqual(len(o),7) # found 7 fields cmd = '../bin/q -d " " --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) o = o[1:] # remove the first "Table for file..." line in the output - self.assertEquals(len(o),5) # found 5 fields + self.assertEqual(len(o),5) # found 5 fields cmd = '../bin/q -d " " --disable-double-double-quoting "select * from %s" -A' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) o = o[1:] # remove the first "Table for file..." line in the output - self.assertEquals(len(o),5) # found 5 fields + self.assertEqual(len(o),5) # found 5 fields cmd = '../bin/q -d " " "select * from %s" -A' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e),0) + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) o = o[1:] # remove the first "Table for file..." line in the output - self.assertEquals(len(o),3) # found only 3 fields, which is the correct amount + self.assertEqual(len(o),3) # found only 3 fields, which is the correct amount self.cleanup(tmp_data_file) @@ -1320,10 +1320,10 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode,0) - self.assertEquals(len(o),0) - self.assertEquals(len(e),1) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) - self.assertEquals(e[0],six.b("No files matching 'non-existent-file' have been found")) + self.assertEqual(e[0],six.b("No files matching 'non-existent-file' have been found")) def test_default_column_max_length_parameter__short_enough(self): huge_text = six.b("x" * 131000) @@ -1335,11 +1335,11 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -H -d , "select a from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 1) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) - self.assertEquals(o[0],six.b('1')) + self.assertEqual(o[0],six.b('1')) self.cleanup(tmpfile) @@ -1353,9 +1353,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -H -d , "select a from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 31) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 1) + self.assertEqual(retcode, 31) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) self.assertTrue(e[0].startswith(six.b("Column length is larger than the maximum"))) self.assertTrue(six.b("Offending file is '{}'".format(tmpfile.name)) in e[0]) @@ -1370,9 +1370,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -H -d , -M 3 "select a from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 31) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 1) + self.assertEqual(retcode, 31) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) self.assertTrue(e[0].startswith(six.b("Column length is larger than the maximum"))) self.assertTrue((six.b("Offending file is '%s'" % tmpfile.name)) in e[0]) @@ -1381,11 +1381,11 @@ class BasicTests(AbstractQTestCase): cmd2 = '../bin/q -H -d , -M 300 -H "select a from %s"' % tmpfile.name retcode2, o2, e2 = run_command(cmd2) - self.assertEquals(retcode2, 0) - self.assertEquals(len(o2), 1) - self.assertEquals(len(e2), 0) + self.assertEqual(retcode2, 0) + self.assertEqual(len(o2), 1) + self.assertEqual(len(e2), 0) - self.assertEquals(o2[0],six.b('very-long-text')) + self.assertEqual(o2[0],six.b('very-long-text')) self.cleanup(tmpfile) @@ -1396,9 +1396,9 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -H -d , -M 0 "select a from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 31) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 1) + self.assertEqual(retcode, 31) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) self.assertTrue(e[0].startswith(six.b('Max column length limit must be a positive integer'))) @@ -1412,12 +1412,12 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -H -d , "select a from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 35) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 2) + self.assertEqual(retcode, 35) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 2) self.assertTrue(e[0].startswith(six.b('Bad header row:'))) - self.assertEquals(e[1],six.b("'a': Column name is duplicated")) + self.assertEqual(e[1],six.b("'a': Column name is duplicated")) self.cleanup(tmpfile) @@ -1430,8 +1430,8 @@ class ParsingModeTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 1) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) self.assertTrue(six.b("Column Count is expected to identical") in e[0]) @@ -1443,10 +1443,10 @@ class ParsingModeTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 1) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) - self.assertEquals( + self.assertEqual( e[0], six.b("Strict mode. Column count is expected to be 4 but is 3")) self.cleanup(tmpfile) @@ -1457,10 +1457,10 @@ class ParsingModeTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 1) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) - self.assertEquals( + self.assertEqual( e[0], six.b("Strict mode. Column count is expected to be 2 but is 3")) self.cleanup(tmpfile) @@ -1471,14 +1471,14 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -d , -m relaxed "select count(*) from %s" -H -A' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 4) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 4) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEquals(o[1].strip(), six.b('`name` - text')) - self.assertEquals(o[2].strip(), six.b('`value1` - int')) - self.assertEquals(o[3].strip(), six.b('`c3` - int')) + self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) + self.assertEqual(o[1].strip(), six.b('`name` - text')) + self.assertEqual(o[2].strip(), six.b('`value1` - int')) + self.assertEqual(o[3].strip(), six.b('`c3` - int')) self.cleanup(tmpfile) @@ -1489,10 +1489,10 @@ class ParsingModeTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) - self.assertEquals(len(o), 0) - self.assertEquals(len(e), 1) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) - self.assertEquals( + self.assertEqual( e[0], six.b('Strict mode. Header row contains less columns than expected column count(2 vs 3)')) self.cleanup(tmpfile) @@ -1502,13 +1502,13 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -d , "select * from %s" -D ";"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], six.b('a;1;0')) - self.assertEquals(o[1], six.b('b;2;0')) - self.assertEquals(o[2], six.b('c;;0')) + self.assertEqual(o[0], six.b('a;1;0')) + self.assertEqual(o[1], six.b('b;2;0')) + self.assertEqual(o[2], six.b('c;;0')) self.cleanup(tmpfile) @@ -1517,11 +1517,11 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -d , "select avg(c2) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 1) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], six.b('1.5')) + self.assertEqual(o[0], six.b('1.5')) self.cleanup(tmpfile) @@ -1530,11 +1530,11 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -d , "select * from %s where c2 is null"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 1) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], six.b('c,,0')) + self.assertEqual(o[0], six.b('c,,0')) self.cleanup(tmpfile) @@ -1545,11 +1545,11 @@ class ParsingModeTests(AbstractQTestCase): tmpfile.name, "''") retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 1) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], six.b('c,,0')) + self.assertEqual(o[0], six.b('c,,0')) self.cleanup(tmpfile) @@ -1558,21 +1558,21 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m relaxed "select count(*) from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) table_name_row = o[0] column_rows = o[1:] - self.assertEquals(len(column_rows), 11) + self.assertEqual(len(column_rows), 11) column_tuples = [x.strip().split(six.b(" ")) for x in column_rows] column_info = [(x[0], x[2]) for x in column_tuples] column_names = [x[0] for x in column_tuples] column_types = [x[2] for x in column_tuples] - self.assertEquals(column_names, [six.b('`c{}`'.format(x)) for x in range(1, 12)]) - self.assertEquals(column_types, list(map(lambda x:six.b(x),[ + self.assertEqual(column_names, [six.b('`c{}`'.format(x)) for x in range(1, 12)]) + self.assertEqual(column_types, list(map(lambda x:six.b(x),[ 'text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text', 'text', 'text']))) self.cleanup(tmpfile) @@ -1582,21 +1582,21 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m relaxed "select count(*) from %s" -A -c 9' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) table_name_row = o[0] column_rows = o[1:] - self.assertEquals(len(column_rows), 9) + self.assertEqual(len(column_rows), 9) column_tuples = [x.strip().split(six.b(" ")) for x in column_rows] column_info = [(x[0], x[2]) for x in column_tuples] column_names = [x[0] for x in column_tuples] column_types = [x[2] for x in column_tuples] - self.assertEquals(column_names, [six.b('`c{}`'.format(x)) for x in range(1, 10)]) - self.assertEquals( + self.assertEqual(column_names, [six.b('`c{}`'.format(x)) for x in range(1, 10)]) + self.assertEqual( column_types, list(map(lambda x:six.b(x),['text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text']))) self.cleanup(tmpfile) @@ -1606,14 +1606,14 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m relaxed "select c9 from %s" -c 9' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 9) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 9) + self.assertEqual(len(e), 0) expected_output = list(map(lambda x:six.b(x),["/selinux", "/mnt", "/srv", "/lost+found", '"/initrd.img.old -> /boot/initrd.img-3.8.0-19-generic"', "/cdrom", "/home", '"/vmlinuz -> boot/vmlinuz-3.8.0-19-generic"', '"/initrd.img -> boot/initrd.img-3.8.0-19-generic"'])) - self.assertEquals(o, expected_output) + self.assertEqual(o, expected_output) self.cleanup(tmpfile) @@ -1622,13 +1622,13 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m relaxed "select c1 from %s" -d ,' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 1) - self.assertEquals(len(o),2) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 1) + self.assertEqual(len(o),2) - self.assertEquals(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) - self.assertEquals(o[0],six.b('data without commas 1')) - self.assertEquals(o[1],six.b('data without commas 2')) + self.assertEqual(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) + self.assertEqual(o[0],six.b('data without commas 1')) + self.assertEqual(o[1],six.b('data without commas 2')) self.cleanup(tmpfile) @@ -1637,13 +1637,13 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m relaxed "select c1 from %s" -d , -m strict' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 1) - self.assertEquals(len(o),2) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 1) + self.assertEqual(len(o),2) - self.assertEquals(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) - self.assertEquals(o[0],six.b('data without commas 1')) - self.assertEquals(o[1],six.b('data without commas 2')) + self.assertEqual(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) + self.assertEqual(o[0],six.b('data without commas 1')) + self.assertEqual(o[1],six.b('data without commas 2')) self.cleanup(tmpfile) @@ -1653,12 +1653,12 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m relaxed "select c1 from %s" -d , -m relaxed -c 1' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o),2) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('data without commas 1')) - self.assertEquals(o[1],six.b('data without commas 2')) + self.assertEqual(o[0],six.b('data without commas 1')) + self.assertEqual(o[1],six.b('data without commas 2')) self.cleanup(tmpfile) @@ -1667,12 +1667,12 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m relaxed "select c1 from %s" -d , -m strict -c 1' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o),2) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o),2) - self.assertEquals(o[0],six.b('data without commas 1')) - self.assertEquals(o[1],six.b('data without commas 2')) + self.assertEqual(o[0],six.b('data without commas 1')) + self.assertEqual(o[1],six.b('data without commas 2')) self.cleanup(tmpfile) @@ -1681,14 +1681,14 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m fluffy "select c9 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 9) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 9) + self.assertEqual(len(e), 0) expected_output = list(map(lambda x:six.b(x),["/selinux", "/mnt", "/srv", "/lost+found", "/initrd.img.old", "/cdrom", "/home", "/vmlinuz", "/initrd.img"])) - self.assertEquals(o, expected_output) + self.assertEqual(o, expected_output) self.cleanup(tmpfile) @@ -1702,8 +1702,8 @@ class ParsingModeTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode,0) - self.assertEquals(len(o),0) - self.assertEquals(len(e),1) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) self.assertTrue(e[0].startswith(six.b("Deprecated fluffy mode"))) self.assertTrue(six.b(' row 951 ') in e[0]) @@ -1719,8 +1719,8 @@ class ParsingModeTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode,0) - self.assertEquals(len(o),0) - self.assertEquals(len(e),1) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) self.assertTrue(e[0].startswith(six.b("Strict mode - Expected 4 columns instead of 3 columns"))) self.assertTrue(six.b(' row 751.') in e[0]) @@ -1736,8 +1736,8 @@ class ParsingModeTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode,0) - self.assertEquals(len(o),0) - self.assertEquals(len(e),1) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) self.assertTrue(e[0].startswith(six.b("Strict mode - Expected 4 columns instead of 5 columns"))) self.assertTrue(six.b(' row 751.') in e[0]) @@ -1754,11 +1754,11 @@ class FormattingTests(AbstractQTestCase): retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 1) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], six.b('55.000 5.500')) + self.assertEqual(o[0], six.b('55.000 5.500')) def test_column_formatting_with_output_header(self): perl_regex = "'s/1\n/column_name\n1\n/;'" @@ -1768,12 +1768,12 @@ class FormattingTests(AbstractQTestCase): retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 2) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], six.b('mysum myavg')) - self.assertEquals(o[1], six.b('55.000 5.500')) + self.assertEqual(o[0], six.b('mysum myavg')) + self.assertEqual(o[1], six.b('55.000 5.500')) def test_failure_to_parse_universal_newlines_without_explicit_flag(self): data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') @@ -1783,8 +1783,8 @@ class FormattingTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) - self.assertEquals(len(e), 1) - self.assertEquals(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) self.assertTrue(e[0].startswith(six.b('Data contains universal newlines'))) @@ -1807,13 +1807,13 @@ class FormattingTests(AbstractQTestCase): cmd = '../bin/q -d , -H -U "select permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode,0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 6) + self.assertEqual(retcode,0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 6) actual_output = map(lambda row: row.split(","),o) - self.assertEquals(actual_output,expected_output) + self.assertEqual(actual_output,expected_output) self.cleanup(tmp_data_file) @@ -1825,13 +1825,13 @@ class SqlTests(AbstractQTestCase): cmd = '../bin/q "select c5,c6,sum(c7)/1024.0/1024 as total from %s group by c5,c6 order by total desc"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) - self.assertEquals(len(e), 0) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) - self.assertEquals(o[0], six.b('mapred mapred 0.9389581680297852')) - self.assertEquals(o[1], six.b('root root 0.02734375')) - self.assertEquals(o[2], six.b('harel harel 0.010888099670410156')) + self.assertEqual(o[0], six.b('mapred mapred 0.9389581680297852')) + self.assertEqual(o[1], six.b('root root 0.02734375')) + self.assertEqual(o[2], six.b('harel harel 0.010888099670410156')) self.cleanup(tmpfile) @@ -1839,31 +1839,31 @@ class SqlTests(AbstractQTestCase): cmd = '../bin/q "select myfiles.c8,emails.c2 from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"' retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 2) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) - self.assertEquals(o[0], six.b('ppp dip.1@otherdomain.com')) - self.assertEquals(o[1], six.b('ppp dip.2@otherdomain.com')) + self.assertEqual(o[0], six.b('ppp dip.1@otherdomain.com')) + self.assertEqual(o[1], six.b('ppp dip.2@otherdomain.com')) def test_join_example_with_output_header(self): cmd = '../bin/q -O "select myfiles.c8 aaa,emails.c2 bbb from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"' retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(o), 3) + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) - self.assertEquals(o[0], six.b('aaa bbb')) - self.assertEquals(o[1], six.b('ppp dip.1@otherdomain.com')) - self.assertEquals(o[2], six.b('ppp dip.2@otherdomain.com')) + self.assertEqual(o[0], six.b('aaa bbb')) + self.assertEqual(o[1], six.b('ppp dip.1@otherdomain.com')) + self.assertEqual(o[2], six.b('ppp dip.2@otherdomain.com')) def test_self_join1(self): tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)"' % (tmpfile.name,tmpfile.name) retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 10) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 10) self.cleanup(tmpfile) @@ -1872,13 +1872,13 @@ class SqlTests(AbstractQTestCase): cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)" -A' % (tmpfile.name,tmpfile.name) retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 3) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) - self.assertEquals(o[0],six.b('Table for file: %s' % tmpfile.name)) - self.assertEquals(o[1],six.b(' `c1` - int')) - self.assertEquals(o[2],six.b(' `c2` - int')) + self.assertEqual(o[0],six.b('Table for file: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' `c1` - int')) + self.assertEqual(o[2],six.b(' `c2` - int')) self.cleanup(tmpfile) @@ -1887,9 +1887,9 @@ class SqlTests(AbstractQTestCase): cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2)"' % (tmpfile1.name,tmpfile1.name) retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 10*10) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 10*10) self.cleanup(tmpfile1) @@ -1897,9 +1897,9 @@ class SqlTests(AbstractQTestCase): cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2) join %s a3 on (a1.c2 = a3.c2)"' % (tmpfile2.name,tmpfile2.name,tmpfile2.name) retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 10*10*10) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 10*10*10) self.cleanup(tmpfile2) @@ -1916,59 +1916,59 @@ class SqlTests(AbstractQTestCase): retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 5) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 5) - self.assertEquals(o[0],six.b('Table for file: %s' % tmpfile.name)) - self.assertEquals(o[1],six.b(' `regular_text` - text')) - self.assertEquals(o[2],six.b(' `text_with_digits1` - int')) - self.assertEquals(o[3],six.b(' `text_with_digits2` - int')) - self.assertEquals(o[4],six.b(' `float_number` - float')) + self.assertEqual(o[0],six.b('Table for file: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' `regular_text` - text')) + self.assertEqual(o[2],six.b(' `text_with_digits1` - int')) + self.assertEqual(o[3],six.b(' `text_with_digits2` - int')) + self.assertEqual(o[4],six.b(' `float_number` - float')) # Check column types detected when actual detection is disabled cmd = '../bin/q -A -d , -H --as-text "select * from %s"' % (tmpfile.name) retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 5) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 5) - self.assertEquals(o[0],six.b('Table for file: %s' % tmpfile.name)) - self.assertEquals(o[1],six.b(' `regular_text` - text')) - self.assertEquals(o[2],six.b(' `text_with_digits1` - text')) - self.assertEquals(o[3],six.b(' `text_with_digits2` - text')) - self.assertEquals(o[4],six.b(' `float_number` - text')) + self.assertEqual(o[0],six.b('Table for file: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' `regular_text` - text')) + self.assertEqual(o[2],six.b(' `text_with_digits1` - text')) + self.assertEqual(o[3],six.b(' `text_with_digits2` - text')) + self.assertEqual(o[4],six.b(' `float_number` - text')) # Get actual data with regular detection cmd = '../bin/q -d , -H "select * from %s"' % (tmpfile.name) retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 4) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 4) - self.assertEquals(o[0],six.b("regular text 1,67,67,12.3")) - self.assertEquals(o[1],six.b("regular text 2,67,67,22.3")) - self.assertEquals(o[2],six.b("regular text 3,123,123,33.4")) - self.assertEquals(o[3],six.b("regular text 4,-123,-123,122.2")) + self.assertEqual(o[0],six.b("regular text 1,67,67,12.3")) + self.assertEqual(o[1],six.b("regular text 2,67,67,22.3")) + self.assertEqual(o[2],six.b("regular text 3,123,123,33.4")) + self.assertEqual(o[3],six.b("regular text 4,-123,-123,122.2")) # Get actual data without detection cmd = '../bin/q -d , -H --as-text "select * from %s"' % (tmpfile.name) retcode, o, e = run_command(cmd) - self.assertEquals(retcode, 0) - self.assertEquals(len(e), 0) - self.assertEquals(len(o), 4) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 4) - self.assertEquals(o[0],six.b("regular text 1,67,67,12.3")) - self.assertEquals(o[1],six.b("regular text 2,067,067,22.3")) - self.assertEquals(o[2],six.b("regular text 3,123,123,33.4")) - self.assertEquals(o[3],six.b("regular text 4,-123,-123,0122.2")) + self.assertEqual(o[0],six.b("regular text 1,67,67,12.3")) + self.assertEqual(o[1],six.b("regular text 2,067,067,22.3")) + self.assertEqual(o[2],six.b("regular text 3,123,123,33.4")) + self.assertEqual(o[3],six.b("regular text 4,-123,-123,0122.2")) self.cleanup(tmpfile) @@ -1982,12 +1982,12 @@ class BasicModuleTests(AbstractQTestCase): r = q.execute('select * from %s' % tmpfile.name) self.assertTrue(r.status == 'ok') - self.assertEquals(len(r.warnings),0) - self.assertEquals(len(r.data),2) - self.assertEquals(r.metadata.output_column_name_list,['a','b','c']) - self.assertEquals(r.data,[(1,2,3),(4,5,6)]) - self.assertEquals(len(r.metadata.data_loads),1) - self.assertEquals(r.metadata.data_loads[0].filename,tmpfile.name) + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),2) + self.assertEqual(r.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r.data,[(1,2,3),(4,5,6)]) + self.assertEqual(len(r.metadata.data_loads),1) + self.assertEqual(r.metadata.data_loads[0].filename,tmpfile.name) self.cleanup(tmpfile) @@ -2000,19 +2000,19 @@ class BasicModuleTests(AbstractQTestCase): r2 = q.execute('select * from %s' % tmpfile.name) self.assertTrue(r1.status == 'ok') - self.assertEquals(len(r1.warnings),0) - self.assertEquals(len(r1.data),2) - self.assertEquals(r1.metadata.output_column_name_list,['a','b','c']) - self.assertEquals(r1.data,[(1,2,3),(4,5,6)]) - self.assertEquals(r1.metadata.data_loads[0].filename,tmpfile.name) + self.assertEqual(len(r1.warnings),0) + self.assertEqual(len(r1.data),2) + self.assertEqual(r1.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r1.data,[(1,2,3),(4,5,6)]) + self.assertEqual(r1.metadata.data_loads[0].filename,tmpfile.name) self.assertTrue(r2.status == 'ok') - self.assertEquals(len(r1.metadata.data_loads),1) - self.assertEquals(r1.metadata.data_loads[0].filename,tmpfile.name) - self.assertEquals(len(r2.metadata.data_loads),0) - self.assertEquals(r2.data,r1.data) - self.assertEquals(r2.metadata.output_column_name_list,r2.metadata.output_column_name_list) - self.assertEquals(len(r2.warnings),0) + self.assertEqual(len(r1.metadata.data_loads),1) + self.assertEqual(r1.metadata.data_loads[0].filename,tmpfile.name) + self.assertEqual(len(r2.metadata.data_loads),0) + self.assertEqual(r2.data,r1.data) + self.assertEqual(r2.metadata.output_column_name_list,r2.metadata.output_column_name_list) + self.assertEqual(len(r2.warnings),0) self.cleanup(tmpfile) @@ -2023,12 +2023,12 @@ class BasicModuleTests(AbstractQTestCase): r = q.execute('select * from -',stdin_file=codecs.open(tmpfile.name,'rb',encoding='utf-8')) self.assertTrue(r.status == 'ok') - self.assertEquals(len(r.warnings),0) - self.assertEquals(len(r.data),2) - self.assertEquals(r.metadata.output_column_name_list,['a','b','c']) - self.assertEquals(r.data,[(1,2,3),(4,5,6)]) - self.assertEquals(len(r.metadata.data_loads),1) - self.assertEquals(r.metadata.data_loads[0].filename,'-') + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),2) + self.assertEqual(r.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r.data,[(1,2,3),(4,5,6)]) + self.assertEqual(len(r.metadata.data_loads),1) + self.assertEqual(r.metadata.data_loads[0].filename,'-') self.cleanup(tmpfile) @@ -2039,12 +2039,12 @@ class BasicModuleTests(AbstractQTestCase): r = q.execute('select a from my_stdin_data',stdin_file=codecs.open(tmpfile.name,'rb',encoding='utf-8'),stdin_filename='my_stdin_data') self.assertTrue(r.status == 'ok') - self.assertEquals(len(r.warnings),0) - self.assertEquals(len(r.data),2) - self.assertEquals(r.metadata.output_column_name_list,['a']) - self.assertEquals(r.data,[(1,),(4,)]) - self.assertEquals(len(r.metadata.data_loads),1) - self.assertEquals(r.metadata.data_loads[0].filename,'my_stdin_data') + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),2) + self.assertEqual(r.metadata.output_column_name_list,['a']) + self.assertEqual(r.data,[(1,),(4,)]) + self.assertEqual(len(r.metadata.data_loads),1) + self.assertEqual(r.metadata.data_loads[0].filename,'my_stdin_data') self.cleanup(tmpfile) @@ -2056,23 +2056,23 @@ class BasicModuleTests(AbstractQTestCase): r1 = q.execute('select * from -',stdin_file=codecs.open(tmpfile1.name,'rb',encoding='utf-8')) self.assertTrue(r1.status == 'ok') - self.assertEquals(len(r1.warnings),0) - self.assertEquals(len(r1.data),2) - self.assertEquals(r1.metadata.output_column_name_list,['a','b','c']) - self.assertEquals(r1.data,[(1,2,3),(4,5,6)]) - self.assertEquals(len(r1.metadata.data_loads),1) - self.assertEquals(r1.metadata.data_loads[0].filename,'-') + self.assertEqual(len(r1.warnings),0) + self.assertEqual(len(r1.data),2) + self.assertEqual(r1.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r1.data,[(1,2,3),(4,5,6)]) + self.assertEqual(len(r1.metadata.data_loads),1) + self.assertEqual(r1.metadata.data_loads[0].filename,'-') r2 = q.execute('select * from -',stdin_file=codecs.open(tmpfile2.name,'rb',encoding='utf-8')) self.assertTrue(r2.status == 'ok') - self.assertEquals(len(r2.warnings),0) - self.assertEquals(len(r2.data),2) - self.assertEquals(r2.metadata.output_column_name_list,['d','e','f']) - self.assertEquals(r2.data,[(7,8,9),(10,11,12)]) + self.assertEqual(len(r2.warnings),0) + self.assertEqual(len(r2.data),2) + self.assertEqual(r2.metadata.output_column_name_list,['d','e','f']) + self.assertEqual(r2.data,[(7,8,9),(10,11,12)]) # There should be another data load, even though it's the same 'filename' as before - self.assertEquals(len(r2.metadata.data_loads),1) - self.assertEquals(r2.metadata.data_loads[0].filename,'-') + self.assertEqual(len(r2.metadata.data_loads),1) + self.assertEqual(r2.metadata.data_loads[0].filename,'-') self.cleanup(tmpfile1) self.cleanup(tmpfile2) @@ -2085,32 +2085,32 @@ class BasicModuleTests(AbstractQTestCase): r1 = q.execute('select * from my_stdin_data1',stdin_file=codecs.open(tmpfile1.name,'rb',encoding='utf-8'),stdin_filename='my_stdin_data1') self.assertTrue(r1.status == 'ok') - self.assertEquals(len(r1.warnings),0) - self.assertEquals(len(r1.data),2) - self.assertEquals(r1.metadata.output_column_name_list,['a','b','c']) - self.assertEquals(r1.data,[(1,2,3),(4,5,6)]) - self.assertEquals(len(r1.metadata.data_loads),1) - self.assertEquals(r1.metadata.data_loads[0].filename,'my_stdin_data1') + self.assertEqual(len(r1.warnings),0) + self.assertEqual(len(r1.data),2) + self.assertEqual(r1.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r1.data,[(1,2,3),(4,5,6)]) + self.assertEqual(len(r1.metadata.data_loads),1) + self.assertEqual(r1.metadata.data_loads[0].filename,'my_stdin_data1') r2 = q.execute('select * from my_stdin_data2',stdin_file=codecs.open(tmpfile2.name,'rb',encoding='utf-8'),stdin_filename='my_stdin_data2') self.assertTrue(r2.status == 'ok') - self.assertEquals(len(r2.warnings),0) - self.assertEquals(len(r2.data),2) - self.assertEquals(r2.metadata.output_column_name_list,['d','e','f']) - self.assertEquals(r2.data,[(7,8,9),(10,11,12)]) + self.assertEqual(len(r2.warnings),0) + self.assertEqual(len(r2.data),2) + self.assertEqual(r2.metadata.output_column_name_list,['d','e','f']) + self.assertEqual(r2.data,[(7,8,9),(10,11,12)]) # There should be another data load, even though it's the same 'filename' as before - self.assertEquals(len(r2.metadata.data_loads),1) - self.assertEquals(r2.metadata.data_loads[0].filename,'my_stdin_data2') + self.assertEqual(len(r2.metadata.data_loads),1) + self.assertEqual(r2.metadata.data_loads[0].filename,'my_stdin_data2') r3 = q.execute('select aa.*,bb.* from my_stdin_data1 aa join my_stdin_data2 bb') self.assertTrue(r3.status == 'ok') - self.assertEquals(len(r3.warnings),0) - self.assertEquals(len(r3.data),4) - self.assertEquals(r3.metadata.output_column_name_list,['a','b','c','d','e','f']) - self.assertEquals(r3.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) - self.assertEquals(len(r3.metadata.data_loads),0) + self.assertEqual(len(r3.warnings),0) + self.assertEqual(len(r3.data),4) + self.assertEqual(r3.metadata.output_column_name_list,['a','b','c','d','e','f']) + self.assertEqual(r3.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) + self.assertEqual(len(r3.metadata.data_loads),0) self.cleanup(tmpfile1) self.cleanup(tmpfile2) @@ -2127,11 +2127,11 @@ class BasicModuleTests(AbstractQTestCase): r = q.execute('select aa.*,bb.* from %s aa join %s bb' % (tmpfile1.name,tmpfile2.name)) self.assertTrue(r.status == 'ok') - self.assertEquals(len(r.warnings),0) - self.assertEquals(len(r.data),4) - self.assertEquals(r.metadata.output_column_name_list,['a','b','c','c1','c2','c3']) - self.assertEquals(r.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) - self.assertEquals(len(r.metadata.data_loads),0) + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),4) + self.assertEqual(r.metadata.output_column_name_list,['a','b','c','c1','c2','c3']) + self.assertEqual(r.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) + self.assertEqual(len(r.metadata.data_loads),0) self.cleanup(tmpfile1) self.cleanup(tmpfile2) @@ -2148,11 +2148,11 @@ class BasicModuleTests(AbstractQTestCase): r = q.execute('select aa.*,bb.* from %s aa join %s bb' % (tmpfile1.name,tmpfile2.name)) self.assertTrue(r.status == 'ok') - self.assertEquals(len(r.warnings),0) - self.assertEquals(len(r.data),4) - self.assertEquals(r.metadata.output_column_name_list,['a','b','c','c1','c2','c3']) - self.assertEquals(r.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) - self.assertEquals(len(r.metadata.data_loads),0) + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),4) + self.assertEqual(r.metadata.output_column_name_list,['a','b','c','c1','c2','c3']) + self.assertEqual(r.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) + self.assertEqual(len(r.metadata.data_loads),0) self.cleanup(tmpfile1) self.cleanup(tmpfile2) @@ -2176,12 +2176,12 @@ class BasicModuleTests(AbstractQTestCase): r2 = q.execute('select * from %s' % tmpfile.name,input_params=overwriting_input_params) self.assertTrue(r2.status == 'ok') - self.assertEquals(len(r2.warnings),0) - self.assertEquals(len(r2.data),2) - self.assertEquals(r2.metadata.output_column_name_list,['a','b','c']) - self.assertEquals(r2.data,[(1,2,3),(4,5,6)]) - self.assertEquals(len(r2.metadata.data_loads),1) - self.assertEquals(r2.metadata.data_loads[0].filename,tmpfile.name) + self.assertEqual(len(r2.warnings),0) + self.assertEqual(len(r2.data),2) + self.assertEqual(r2.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r2.data,[(1,2,3),(4,5,6)]) + self.assertEqual(len(r2.metadata.data_loads),1) + self.assertEqual(r2.metadata.data_loads[0].filename,tmpfile.name) self.cleanup(tmpfile) @@ -2217,22 +2217,22 @@ class BasicModuleTests(AbstractQTestCase): self.assertTrue(q_output.status == 'ok') self.assertTrue(q_output.error is None) - self.assertEquals(len(q_output.warnings),0) - self.assertEquals(len(q_output.data),2) - self.assertEquals(q_output.data,[ (1,3),(4,6) ]) + self.assertEqual(len(q_output.warnings),0) + self.assertEqual(len(q_output.data),2) + self.assertEqual(q_output.data,[ (1,3),(4,6) ]) self.assertTrue(q_output.metadata is not None) metadata = q_output.metadata - self.assertEquals(metadata.output_column_name_list, [ 'a','c']) - self.assertEquals(len(metadata.data_loads),1) - self.assertEquals(len(metadata.table_structures),1) + self.assertEqual(metadata.output_column_name_list, [ 'a','c']) + self.assertEqual(len(metadata.data_loads),1) + self.assertEqual(len(metadata.table_structures),1) table_structure = metadata.table_structures[0] - self.assertEquals(table_structure.column_names,[ 'a','b','c']) - self.assertEquals(table_structure.column_types,[ 'int','int','int']) - self.assertEquals(table_structure.filenames_str,tmpfile.name) + self.assertEqual(table_structure.column_names,[ 'a','b','c']) + self.assertEqual(table_structure.column_types,[ 'int','int','int']) + self.assertEqual(table_structure.filenames_str,tmpfile.name) self.assertTrue(len(table_structure.materialized_files.keys()),1) self.assertTrue(table_structure.materialized_files[tmpfile.name].filename,tmpfile.name) self.assertFalse(table_structure.materialized_files[tmpfile.name].is_stdin) @@ -2248,22 +2248,22 @@ class BasicModuleTests(AbstractQTestCase): self.assertTrue(q_output.status == 'ok') self.assertTrue(q_output.error is None) - self.assertEquals(len(q_output.warnings),0) - self.assertEquals(len(q_output.data),2) - self.assertEquals(q_output.data,[ (1,3),(4,6) ]) + self.assertEqual(len(q_output.warnings),0) + self.assertEqual(len(q_output.data),2) + self.assertEqual(q_output.data,[ (1,3),(4,6) ]) self.assertTrue(q_output.metadata is not None) metadata = q_output.metadata - self.assertEquals(metadata.output_column_name_list, [ 'a','c']) - self.assertEquals(len(metadata.data_loads),1) - self.assertEquals(len(metadata.table_structures),1) + self.assertEqual(metadata.output_column_name_list, [ 'a','c']) + self.assertEqual(len(metadata.data_loads),1) + self.assertEqual(len(metadata.table_structures),1) table_structure = metadata.table_structures[0] - self.assertEquals(table_structure.column_names,[ 'a','b','c']) - self.assertEquals(table_structure.column_types,[ 'int','int','int']) - self.assertEquals(table_structure.filenames_str,tmpfile.name) + self.assertEqual(table_structure.column_names,[ 'a','b','c']) + self.assertEqual(table_structure.column_types,[ 'int','int','int']) + self.assertEqual(table_structure.filenames_str,tmpfile.name) self.assertTrue(len(table_structure.materialized_files.keys()),1) self.assertTrue(table_structure.materialized_files[tmpfile.name].filename,tmpfile.name) self.assertFalse(table_structure.materialized_files[tmpfile.name].is_stdin) @@ -2281,22 +2281,22 @@ class BasicModuleTests(AbstractQTestCase): self.assertTrue(q_output.status == 'ok') self.assertTrue(q_output.error is None) - self.assertEquals(len(q_output.warnings),0) + self.assertEqual(len(q_output.warnings),0) self.assertTrue(len(q_output.data),1000) - self.assertEquals(len(set(q_output.data)),1) - self.assertEquals(list(set(q_output.data))[0],(2.5,'value3')) + self.assertEqual(len(set(q_output.data)),1) + self.assertEqual(list(set(q_output.data))[0],(2.5,'value3')) metadata = q_output.metadata self.assertTrue(metadata.output_column_name_list,['column2','column3']) - self.assertEquals(len(metadata.data_loads),0) + self.assertEqual(len(metadata.data_loads),0) self.assertTrue(len(metadata.table_structures),1) table_structure = metadata.table_structures[0] - self.assertEquals(table_structure.column_names,['column1','column2','column3']) - self.assertEquals(table_structure.column_types,['text','float','text']) - self.assertEquals(table_structure.filenames_str,'my_data') + self.assertEqual(table_structure.column_names,['column1','column2','column3']) + self.assertEqual(table_structure.column_types,['text','float','text']) + self.assertEqual(table_structure.filenames_str,'my_data') self.assertTrue(len(table_structure.materialized_files.keys()),1) self.assertTrue(table_structure.materialized_files['my_data'].filename,'my_data') self.assertTrue(table_structure.materialized_files['my_data'].is_stdin) From b5defb0625c34b80381cebdebea52856fb3d7b21 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Tue, 11 Dec 2018 16:16:33 +0200 Subject: [PATCH 019/111] wip --- test/test-suite | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/test-suite b/test/test-suite index 42c7a4cf..7a36b247 100755 --- a/test/test-suite +++ b/test/test-suite @@ -252,7 +252,7 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " "select * from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode, 0) + self.assertNotEqual(retcode, 0) self.assertEqual(len(o), 0) self.assertEqual(len(e), 3) @@ -438,7 +438,7 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select * from %s" -A -H' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode, 0) + self.assertNotEqual(retcode, 0) self.assertEqual(len(o), 0) self.assertEqual(len(e), 3) self.assertTrue( @@ -453,7 +453,7 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c1 from %s" -A -H' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode, 0) + self.assertNotEqual(retcode, 0) self.assertEqual(len(o),4) self.assertEqual(len(e),2) self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) @@ -496,7 +496,7 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode, 0) + self.assertNotEqual(retcode, 0) self.assertEqual(len(o), 0) self.assertEqual(len(e), 2) self.assertTrue(six.b('no such column: c3') in e[0]) @@ -542,7 +542,7 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d , "select c1 from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode, 0) + self.assertNotEqual(retcode, 0) self.assertEqual(len(o), 0) self.assertEqual(len(e), 1) @@ -956,7 +956,7 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " -m strict -D , -w none "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode,0) + self.assertNotEqual(retcode,0) self.assertEqual(len(e),1) self.assertEqual(len(o),0) @@ -1000,7 +1000,7 @@ class BasicTests(AbstractQTestCase): cmd = '../bin/q -d " " -D , -w unknown_wrapping_mode "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode,0) + self.assertNotEqual(retcode,0) self.assertEqual(len(e),1) self.assertEqual(len(o),0) @@ -1319,7 +1319,7 @@ class BasicTests(AbstractQTestCase): retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode,0) + self.assertNotEqual(retcode,0) self.assertEqual(len(o),0) self.assertEqual(len(e),1) @@ -1429,7 +1429,7 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m strict "select count(*) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode, 0) + self.assertNotEqual(retcode, 0) self.assertEqual(len(o), 0) self.assertEqual(len(e), 1) @@ -1442,7 +1442,7 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -d , -m strict -c 4 "select count(*) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode, 0) + self.assertNotEqual(retcode, 0) self.assertEqual(len(o), 0) self.assertEqual(len(e), 1) @@ -1456,7 +1456,7 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -d , -m strict -c 2 "select count(*) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode, 0) + self.assertNotEqual(retcode, 0) self.assertEqual(len(o), 0) self.assertEqual(len(e), 1) @@ -1488,7 +1488,7 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -d , -m strict "select count(*) from %s" -H -A' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode, 0) + self.assertNotEqual(retcode, 0) self.assertEqual(len(o), 0) self.assertEqual(len(e), 1) @@ -1701,7 +1701,7 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m fluffy "select * from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode,0) + self.assertNotEqual(retcode,0) self.assertEqual(len(o),0) self.assertEqual(len(e),1) self.assertTrue(e[0].startswith(six.b("Deprecated fluffy mode"))) @@ -1718,7 +1718,7 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m strict "select * from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode,0) + self.assertNotEqual(retcode,0) self.assertEqual(len(o),0) self.assertEqual(len(e),1) self.assertTrue(e[0].startswith(six.b("Strict mode - Expected 4 columns instead of 3 columns"))) @@ -1735,7 +1735,7 @@ class ParsingModeTests(AbstractQTestCase): cmd = '../bin/q -m strict "select * from %s"' % tmpfile.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode,0) + self.assertNotEqual(retcode,0) self.assertEqual(len(o),0) self.assertEqual(len(e),1) self.assertTrue(e[0].startswith(six.b("Strict mode - Expected 4 columns instead of 5 columns"))) @@ -1782,7 +1782,7 @@ class FormattingTests(AbstractQTestCase): cmd = '../bin/q -d , -H "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) - self.assertNotEquals(retcode, 0) + self.assertNotEqual(retcode, 0) self.assertEqual(len(e), 1) self.assertEqual(len(o), 0) From f0b62b15b91583cd944ea2e8daf6f730198959fa Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Tue, 11 Dec 2018 21:38:58 +0200 Subject: [PATCH 020/111] everything running on both versions, requires more consolidation and optimization --- bin/q | 30 ++++++++++++++++++++---------- test/test-suite | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/bin/q b/bin/q index 4a6c49b0..507991da 100755 --- a/bin/q +++ b/bin/q @@ -53,6 +53,7 @@ import hashlib import uuid import math import six +import io if six.PY3: long = int @@ -704,10 +705,7 @@ class TableColumnInferer(object): def py3_encoded_csv_reader(encoding, f, dialect, is_stdin,**kwargs): try: - if not is_stdin: - csv_reader = csv.reader(codecs.iterdecode(f,encoding), dialect,**kwargs) - else: - csv_reader = csv.reader(f, dialect, **kwargs) + csv_reader = csv.reader(f, dialect, **kwargs) for row in csv_reader: yield row @@ -780,7 +778,11 @@ class MaterializedFileState(object): # multiple people. if self.encoding == 'utf-8-sig' and self.lines_read == 0 and not self.skipped_bom: try: - BOM = self.f.read(3) + if six.PY2: + BOM = self.f.read(3) + else: + BOM = self.f.buffer.read(3) + if BOM != six.b('\xef\xbb\xbf'): raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM)) except Exception as e: @@ -875,13 +877,19 @@ class TableCreator(object): raise CannotUnzipStdInException() else: if self.gzipped or filename.endswith('.gz'): - f = gzip.GzipFile(fileobj=open(filename,'rb')) + f = codecs.iterdecode(gzip.GzipFile(fileobj=io.open(filename,'rb')),encoding=self.encoding) else: - if self.with_universal_newlines: - file_opening_mode = 'rbU' + if six.PY3: + if self.with_universal_newlines: + f = io.open(filename, 'rU',newline=None,encoding=self.encoding) + else: + f = io.open(filename, 'r', newline=None, encoding=self.encoding) else: - file_opening_mode = 'rb' - f = open(filename,file_opening_mode) + if self.with_universal_newlines: + file_opening_mode = 'rbU' + else: + file_opening_mode = 'rb' + f = open(filename, file_opening_mode) return f def _pre_populate(self,dialect): @@ -1419,6 +1427,8 @@ class QTextAsData(object): except KeyboardInterrupt as e: warnings.append(QWarning(e,"Interrupted")) except Exception as e: + if DEBUG: + print(traceback.format_exc()) error = QError(e,repr(e),199) return QOutput(warnings = warnings,error = error , metadata=QMetadata(table_structures=table_structures,data_loads = data_loads)) diff --git a/test/test-suite b/test/test-suite index 7a36b247..978a58bd 100755 --- a/test/test-suite +++ b/test/test-suite @@ -1775,7 +1775,7 @@ class FormattingTests(AbstractQTestCase): self.assertEqual(o[0], six.b('mysum myavg')) self.assertEqual(o[1], six.b('55.000 5.500')) - def test_failure_to_parse_universal_newlines_without_explicit_flag(self): + def py2_test_failure_to_parse_universal_newlines_without_explicit_flag(self): data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') tmp_data_file = self.create_file_with_data(data) @@ -1790,6 +1790,39 @@ class FormattingTests(AbstractQTestCase): self.cleanup(tmp_data_file) + def py3_test_successfuly_parse_universal_newlines_without_explicit_flag(self): + def list_as_byte_list(l): + return list(map(lambda x:six.b(x),l)) + + expected_output = list(map(lambda x:list_as_byte_list(x),[['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-May-07', '6850000', 'USD', 'b'], + ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Oct-06', '6000000', 'USD', 'a'], + ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Jan-08', '25000000', 'USD', 'c'], + ['mycityfaces', 'MyCityFaces', '7', 'web', 'Scottsdale', 'AZ', '1-Jan-08', '50000', 'USD', 'seed'], + ['flypaper', 'Flypaper', '', 'web', 'Phoenix', 'AZ', '1-Feb-08', '3000000', 'USD', 'a'], + ['infusionsoft', 'Infusionsoft', '105', 'software', 'Gilbert', 'AZ', '1-Oct-07', '9000000', 'USD', 'a']])) + + data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') + tmp_data_file = self.create_file_with_data(data) + + cmd = '../bin/q -d , -H "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 6) + + actual_output = list(map(lambda row: row.split(six.b(",")),o)) + + self.assertEqual(actual_output,expected_output) + + self.cleanup(tmp_data_file) + + if six.PY2: + test_parsing_universal_newlines_without_explicit_flag = py2_test_failure_to_parse_universal_newlines_without_explicit_flag + else: + test_parsing_universal_newlines_without_explicit_flag = py3_test_successfuly_parse_universal_newlines_without_explicit_flag + + def test_universal_newlines_parsing_flag(self): def list_as_byte_list(l): return list(map(lambda x:six.b(x),l)) @@ -1811,7 +1844,7 @@ class FormattingTests(AbstractQTestCase): self.assertEqual(len(e), 0) self.assertEqual(len(o), 6) - actual_output = map(lambda row: row.split(","),o) + actual_output = list(map(lambda row: row.split(six.b(",")),o)) self.assertEqual(actual_output,expected_output) From 746cb9f67657c9d484befbe15862d788b18be876 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 21 Dec 2018 17:47:02 +0200 Subject: [PATCH 021/111] handle python 3.7 deprecation warning for universal file mode --- test/test-suite | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/test-suite b/test/test-suite index 978a58bd..e17afcd1 100755 --- a/test/test-suite +++ b/test/test-suite @@ -1841,7 +1841,15 @@ class FormattingTests(AbstractQTestCase): retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) - self.assertEqual(len(e), 0) + + if len(e) == 2: + # In python 3.7, there's a deprecation warning for the 'U' file opening mode, which is ok for now + self.assertEqual(len(e), 2) + self.assertTrue(b"DeprecationWarning: 'U' mode is deprecated" in e[0]) + elif len(e) != 0: + # Nothing should be output to stderr in other versions + self.assertTrue(False,msg='Unidentified output in stderr') + self.assertEqual(len(o), 6) actual_output = list(map(lambda row: row.split(six.b(",")),o)) From 0719c6f35fd1c8e4abc80303e896b41775ceca6a Mon Sep 17 00:00:00 2001 From: Misha Brukman Date: Sun, 6 Jan 2019 14:26:37 -0500 Subject: [PATCH 022/111] Replace image code samples with text. This enables easy copy-pasting of sample code. Also added code formatting for SQL keywords. --- README.markdown | 8 ++++++-- doc/basic-examples.png | Bin 6635 -> 0 bytes 2 files changed, 6 insertions(+), 2 deletions(-) delete mode 100644 doc/basic-examples.png diff --git a/README.markdown b/README.markdown index 85863af7..9477de3d 100644 --- a/README.markdown +++ b/README.markdown @@ -3,7 +3,7 @@ # q - Text as Data q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files). -q treats ordinary files as database tables, and supports all SQL constructs, such as WHERE, GROUP BY, JOINs etc. It supports automatic column name and type detection, and q provides full support for multiple character encodings. +q treats ordinary files as database tables, and supports all SQL constructs, such as `WHERE`, `GROUP BY`, `JOIN`s, etc. It supports automatic column name and type detection, and q provides full support for multiple character encodings. q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/). It contains everything you need to download and use q immediately. @@ -14,7 +14,11 @@ Instructions for all OSs are [here](http://harelba.github.io/q/install.html). ## Examples -![blah](doc/basic-examples.png) +``` +q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" + +ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" +``` Go [here](http://harelba.github.io/q/examples.html) for more examples. diff --git a/doc/basic-examples.png b/doc/basic-examples.png deleted file mode 100644 index 6943d47833761fb48a8a969dca9f287ae2e3838a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6635 zcmb`M2T+qsyT=g(6e&s(=|=^rB1#X#Jw!PeXH#6Yi5g3@U}|B%hU zaH3z#UhzUwoQoP%J?KbFGs;`g*B61Y*SwHS)^EO5qa`uNGwfRvFPRrJat3n8IFrJi zF*~TT9_T}CrUQm+AeDZ852Nf6!h9l$1dQP_4p%VpSdN#afIPdy4TKa&rRkUKWZbV9 zJt)BkX?T5-y`3x!)MPWY8Ub#Tbc#k~4aoNKpz!eUEBC1xy!!FqZYe0FGf2aoQ-A#U zK_-)-;DbI;sxa{CR-Y);%iB9KAwixlHQ!Xc#QcMufRIq+;bw1o9<^u%91j2Z@uO=d z1bejXfyz;V2rql>G`xSr%-yFCDXxz*iSt=gjS~g}V3$hvHM^YCGpO+&dL>_6d&H=| zPaefz0Yc(Ug#R)(sjiS;=9Uh%Wq+Gr&cI)$?~?XkhNnmmBK)^;<^&Rxfsb#~)#SKQ zJ@Km&aya@_Or#H))=rD9nJy>=p`J91hKg=Zcy9KUcvA*XywPqOcMq4jn;F`jnr
  • 4tlt(&%ztiw>`?mVFD(ej#i$UN*2)`Zj$U7E2 zJyCQ#e4OIl5-JD_LvNB_-*qsvY)B;e>E8cQRMD=Tc+_QS!SA7v|0i93LRDTOl3Fim zm=Zi}S9vk-=e^IbRwkx!_4{a}uoTp?f9Uw$m7_6-&w9Kge8G?rM^@WoFtO7pwT~}& zx>}c4YBArO-rnN2>eYBQZNaSqYWpaH#57Ioz_;Xh@pGKu z3g&yTvO>)rdiCegv|%V<3x7r{0M+ek8X;&RQX^9FJS0CpPyMmRx%%2utHx0w8%d5j=CluPqwrM@1dw}+t zK$B<8CVc6@&xGcDHe4v?tSP6=HuIK>Z4y|I1P}Q>Cnox|3}3bCQ%4=D&OT!ro%+zM zyCIH3GCy#*s8BfNmuB^*FTUei99w8!NA;ehR&w3@-Ut1iUzUW|g-@RF#OiJUth!7y zrPHT&_p%O$8U*M~Kl7(7%=SJkW)4aTE4^{xo<9iF8yJ}oYHV814;c!QZCNI_4S&F= z>I`i*dY`%{ihjWoY>SJ-WyCINFJ@HL{F2JKJO7BH#MzivO6R zgOK3G8m#ET8tKe2gGN_+wcm1Ah3Pnbg30)mjSrt@v&EHSKGRG3jvj|?1E##l_8rJ+ z%9+Ilw`Y^tL&PC%ELb4Wa}iv`srVU+G)5md#ZwevBTam_PHdE*8&N_CLY_PdY(6uLs?$pZlMsv?%H_m4=J! zU~+2D8@khbYjQO@@?CQ%^eyqgPAVJJpqJtKN2b$X$tqQyoTH8!>)tjuroH_2ui}H7 z<}IxJxz71rUJL1)O*nOAV6#_xg^ogquH2l?Ik%|pNfD>CUXgcGEj5uO3`!tc&2Se- zamv&D<&^7nvn_iH;Qmzq8YS+pSgak~E^?TyuEA>CWJe|8@?eo1quTQQIyXYH{zSl| z3>5~0?g^A#;vtR@S7(H51HsG*hsmO)fqENc9e9)jiX$et7>@H{qTmJ8Y;iNRM1 zn0S(vX7+Bp0l0tI$%K5vZ*7_W2Sp4%t_&Z?cc|-2SZ}K0`F&Oo!55&aiJD?cr&3IL z$-NXg^6I0oYRNiuFA#1UPYXqre^jHkSK1g8wii$)Uda)5QUaZ<>Pw`5pLAfEYW4_` z;gr)G!5|hbFarBv_klpHeClL*v8-*@ zqbiQr&g&0U7#cDwF0c&s<*ab4cEH=*6$UXFOR2D*S|Q2eV%g!9ejN}5xzhS;#Y8B z@~AalJI%8BD&7Pu6ic{Xcx!YF=&pDW5Fg{F6KQPyvFNT_=9c%ZE(3gdASMGAXqR^H zsajI^U{8j`F7;fb%n>5b&tmr4whPize-gfp5u+^o^b^B>hf2A;Uh{b9+jWFe=V#(Y0^y)xLe zQ$MPXHx9PJU>5{yYG&UK$ulo)XpV2lud!OQsIL>^{_>d<*upy7El@L7tZpCe9(;DT z{o9w)v;IqafTX4dR+159cvWCEw{GYT*nLM8J9f#A^w6D@^7VbXs!tsz(z(z6AoN`$ zX?Nzc$53$FYB~R+f^~4QpMXa#?ZfXc_%>Ab+rIlVkSW)D{V%~bcZ?^Z308{K1Bd(T zOAFb%RmLit{s;2)mY!d&=(?U0v(z?a?QpOzUepd=i`b&{^A;*MRb~6|F60=w7T}*T zCF@^*lvho^9e%<|lz@-o0iR1nCkKSB-4OCXiqiO^;hscgcOqTZ+Q}m|l+rt3a0)#1 zQ?AryRA~1KS{SL~w>Pj6QTMZf+_lEKrp28L1s&v1E+>ZWim`{Mt0rS6vQ}DM8{P8# z6ElJ}J+EF=QJms#U4p(m{Pr`X4J|;Fmwqf7VocT#vsX+dpko)c>=@Zs_5vNEcZJ)B~fF&sSoG^e$l_~7l%#o?mxjSfcV!UqzIUWbK?B|#`A zFXR&o0-iUV%hgdOWSkTcK!XTFB>n{LeJeR~BJOAGc;C_)fi3A~CY>lk=Wpt7!_VXx zU2pU3@h6&kbqHO*+IML&0(2Td8bu5+SgdasHHFO@#(r!ot+YL-N7dNH+q@>>&fLw@V~-$we?{i2atPGR6TOi#%F$oU8P|8ETN5Ay#v2>cKE|L-6C zPiWDqr+!F`Jn#J%Rq?E`v(%c7m7Z;OBu4K!bl8PMyh^^ zYrKifeR3^NsvN>OlS(WxddlLVeNl=X79Xj3(-kz{0 zHZj~A@Uh#`*)JwGirdp&zDMDW8%KqgX?w)bzUNry6sp=gFd4{!ug?U;kVud1t-^;sEE}6{aGymo=goq#8x@>(y${Ai84j6*AjD^_qU>Ihkeu zfwm&ZQ`oR~_0!GqgnkY4Y#TNpmS-Rb);D z0qS;`*Bc{2r-K|Wrt9z*M4CxCgpT;xWZ?8EqQf}Qw`Rb=ws$Hf7_?r)MrmHx@t$qh zEI0{Y8bxxX$!KhwW11pa&F7z%J*f}5BADk1`)S2$xtN{?HamH1dVKYIlS$JX@WACt zp`qoBI?gp{5C~&CzIU{TaS6^A-c_?%2~gwBVwRSJ3PL6sn#s?#wNhw_RQHDoCy z0Ck(?8jhi=qYMsu7$vgrwgo)iH0r!HitF89VE1Z|)+4B+ko^3oYEMyo8OP^S{F^S6 zQDiDaV`~}eWw;SQ#dm`kf557*W*EJHGW^39{ zdv&U#I#sJWp+n0yYXrjB>TaP(kobDN>Ko%7D|UUN@a2^;Qv+?)yYr;wSBmycjIT8j z6Wf+uX>8}pG+IMaC4uqN5+NWo>NOhXJ*#SIFN~~{yW^4Hl^@+wwdrIAa3_E2VN7X( z{mdS&1oo!@0px8lmVJ?)^9hfBz71#_99?gU3}(p}jV|N(F+eOF@%)-KB-EDJABc#s z*_BgAKo*Ke)S0BXGj)|s8r|X920b2@iy?}2o9FYp%l%$$i&g)CPl3UdLAPU7{mNtF zPXEc6*x{#PARep!h9Qh(gV8oqew1cyc3^p=X$B~g{Zz6?-DkaOjdReuvly*^|}NBj_~!}fZ7zbOvs)J2&Q>x5$z|2lt6XaW-h&b zT&wQMvE;yI=E$m0z|2_7{$m1A#Z0jqmJ8?F-0G|_&3P_wA!&BTvz9jCdkm(Q>ncPu zGm7YVp`UOS!Bo}35lkC-qxQ`Zq1REOC&N$!8nSN_ZA;?{TGgBgAL{x!cV_QgFi5vEAE~^$F%zEAo#pa#HQ5*LwB_C-WFR2U%G6yo<2nHGmjVwu3dzDm>N=~3jEq~p?T>Y|!?!@{x-$w$C5S3Be54uHi*Puffve(xC(Y41_{%QCG3NQK|CZgE7 zl0E!-!O2uO&y40~&a}Q5`y2G<)z_IsIAcz&0D4Wg?`hEL8?tP;4NV{6&cKi7x1+rxtz`NsG+mLN7`u%BH8G2J0KCTj8u1~DHKdZ0l*c**K zj^nfgkI0`?pMi%UiE{G$vm3gZaehdvZ#Ha>wED&pfwG_dDXy#UnE^9n{z2NQ&cxUj zV89?AeE()>X$tuGk9pkl-9s(zJ@#W}jdtm^&Zdowz0WY59vmW7MtW(ZGP7i`s4EEkQ-4$d_r4 zRW;+sSe?6Ov*&XJ#qy4dyZUFY34%~v+@44HdUD$2cX!FWe6ppFPz4^-kKMn)6WuuZ zNI?W~NJ^1VoPJ|-54PdH(-H(4sBf6n=&+A-Dfkfh@m%bM!Ehe)F6xFEiDh%7SC931 zGjrzqOP?zmY0Zlh?+Mz{DMjAgWPD})^noEl4^Sl5RPV*D-A_Fr1kFw+x0~adVBjD1 zLw-1Ao#Ez7bNAHAvDCsKN}kZJpBSV6(I$0wS52bLdX0J638 zm|6kS+G?$k3#QFyD%8$dc{#)!;_81R+kS#TY`0>TmNb4@aSYX&jM2=D#><=<%XAP! zn%}H2PT0jmMRl7lEVeP-ND0KYVGX()u1i9#jo;k&u%l7HxdehsA+O)=b|x|U=v43P z&ZTQ`H-WYVIwYoJ5`q^BnIffpS^g-6t!@l1uY4D3 zD*!7Q-S`=1oS@Q3rwWpp4jgGftXQ#{@=%Y;<6xbWw-1jslA5Q7N^LoB#8!cWamU+Q zCvADwLalo;5l^5cSJK-Ao2ld}qkHi#Z^NB>yi2YsRWc(yqYD@6d zay$fKxql>PNp4A(>O?@=ZWQA;)n7kc*S7JrUzx23VUtV`OOMDeUGrK}RbP$v1b|B7P~R3W(G$K_-gp?b?^O50qSsJkrH z)PYU<)^Mn6>wWbJ_ffj0nMUqxWZ=WvNNnGeAWIC%dI;Tzd_9P|$GY37r7^sl`Y$K| ze7fYK=xU6GDBfN$C)t+ok{3uyYfVW@0SXtLA8(PZQ%dd{M>p8r@N@TI&33C+fJoPb z!jC%`h2me{uSl)mj7`tL^4#@R>MsK@Zb%w%eRiom9YDtklvr`<71P~tc=YC-{L@IV zr2oTk%bwx Date: Tue, 26 Nov 2019 23:45:37 +0200 Subject: [PATCH 023/111] bump to 2.0.3 --- bin/q | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/q b/bin/q index 507991da..2a17db7f 100755 --- a/bin/q +++ b/bin/q @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (C) 2012-2018 Harel Ben-Attia +# Copyright (C) 2012-2019 Harel Ben-Attia # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -q_version = "1.8" +q_version = "2.0.3" __all__ = [ 'QTextAsData' ] From 26d04e92a53dbd7eb826c97ad174cc23ae6f455c Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 29 Nov 2019 11:52:19 +0200 Subject: [PATCH 024/111] wip --- dist/create-rpm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dist/create-rpm b/dist/create-rpm index 7d86661f..e0f00a00 100755 --- a/dist/create-rpm +++ b/dist/create-rpm @@ -43,6 +43,8 @@ mkdir -p ${rpm_build_area}/SOURCES pushd ${rpm_build_area}/SOURCES >/dev/null tar xvzf ./q.tar.gz --strip-components=1 rm -vf ./q.tar.gz +wget -q -o ./q "https://github.com/harelba/q/releases/download/${VERSION}/q-x86_64-Linux" +chmod +x ./q popd >/dev/null find ${rpm_build_area}/ -ls From fa1665c4ceefa6bc1601d7587871dc4daf27cfe4 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 30 Nov 2019 18:16:54 +0200 Subject: [PATCH 025/111] fix rpm/deb to use binary executable instead of python source --- .gitignore | 1 - dist/create-rpm | 4 ++-- dist/q-text-as-data.spec.template | 2 +- package-release | 4 ++-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index a1e04869..8f686707 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,3 @@ win_build packages .idea/ dist/windows/ -dist/ diff --git a/dist/create-rpm b/dist/create-rpm index e0f00a00..753da2de 100755 --- a/dist/create-rpm +++ b/dist/create-rpm @@ -43,8 +43,8 @@ mkdir -p ${rpm_build_area}/SOURCES pushd ${rpm_build_area}/SOURCES >/dev/null tar xvzf ./q.tar.gz --strip-components=1 rm -vf ./q.tar.gz -wget -q -o ./q "https://github.com/harelba/q/releases/download/${VERSION}/q-x86_64-Linux" -chmod +x ./q +curl -R -L -o ./bin/q "https://github.com/harelba/q/releases/download/${VERSION}/q-x86_64-Linux" +chmod +x ./bin/q popd >/dev/null find ${rpm_build_area}/ -ls diff --git a/dist/q-text-as-data.spec.template b/dist/q-text-as-data.spec.template index ad3d0c2f..8512aa2d 100644 --- a/dist/q-text-as-data.spec.template +++ b/dist/q-text-as-data.spec.template @@ -10,7 +10,7 @@ Summary: q - Text as Data Group: Applications/Text License: GPLv3 URL: https://github.com/harelba/q -BuildArch: noarch +BuildArch: x86_64 %description q allows to perform SQL-like statements on tabular text data. diff --git a/package-release b/package-release index 0bbe6044..3bb8197c 100755 --- a/package-release +++ b/package-release @@ -29,7 +29,7 @@ mkdir -p ${base_folder}/packages sleep 1 docker exec -it ${cid1} /bin/bash -i -c "/q/dist/create-rpm ${TAG}" -docker cp ${cid1}:/q/dist/rpm_build_area/RPMS/noarch/q-text-as-data-${TAG}-1.el6.noarch.rpm ${base_folder}/packages/q-text-as-data-${TAG}-1.noarch.rpm +docker cp ${cid1}:/q/dist/rpm_build_area/RPMS/x86_64/q-text-as-data-${TAG}-1.el6.x86_64.rpm ${base_folder}/packages/q-text-as-data-${TAG}-1.x86_64.rpm -docker exec -it ${cid2} /bin/bash -i -c "cd /q/packages && alien ./q-text-as-data-${TAG}-1.noarch.rpm" +docker exec -it ${cid2} /bin/bash -i -c "cd /q/packages && alien ./q-text-as-data-${TAG}-1.x86_64.rpm" From 1f9b576bca91fd54d385f3fe8e3f55ec29a82082 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Mon, 2 Dec 2019 01:42:21 +0200 Subject: [PATCH 026/111] Bumped version to 2.0.6 --- bin/q | 2 +- dist/create-rpm | 12 +++++++----- package-release | 13 +++++++------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/bin/q b/bin/q index 2a17db7f..fbd58791 100755 --- a/bin/q +++ b/bin/q @@ -31,7 +31,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -q_version = "2.0.3" +q_version = "2.0.6" __all__ = [ 'QTextAsData' ] diff --git a/dist/create-rpm b/dist/create-rpm index 753da2de..db1a255b 100755 --- a/dist/create-rpm +++ b/dist/create-rpm @@ -1,13 +1,13 @@ -#!/bin/bash +#!/bin/bash -x # # Commit tag and Version number should be provided as input in the command line # # -if [ $# -ne 1 ]; +if [ $# -ne 2 ]; then - echo 'create-rpm ' + echo 'create-rpm ' exit 1 fi @@ -27,6 +27,8 @@ mkdir -p ${rpm_build_area}/{SOURCES,SPECS,BUILD,RPMS,SRPMS,BUILDROOT} echo RPM build area is in ${rpm_build_area} VERSION=$1 +BASED_ON_TAG=$2 + REAL_PACKAGE_NAME=q RPM_PACKAGE_NAME=q-text-as-data @@ -38,12 +40,12 @@ then exit 1 fi -curl -o ${rpm_build_area}/SOURCES/q.tar.gz -L -R "https://github.com/harelba/q/tarball/${VERSION}" +curl -o ${rpm_build_area}/SOURCES/q.tar.gz -L -R "https://github.com/harelba/q/tarball/$BASED_ON_TAG" mkdir -p ${rpm_build_area}/SOURCES pushd ${rpm_build_area}/SOURCES >/dev/null tar xvzf ./q.tar.gz --strip-components=1 rm -vf ./q.tar.gz -curl -R -L -o ./bin/q "https://github.com/harelba/q/releases/download/${VERSION}/q-x86_64-Linux" +curl -o ./bin/q -L -R "https://github.com/harelba/packages-for-q/raw/master/single-binary/x86_64/${VERSION}/q" chmod +x ./bin/q popd >/dev/null find ${rpm_build_area}/ -ls diff --git a/package-release b/package-release index 3bb8197c..f27186fa 100755 --- a/package-release +++ b/package-release @@ -5,14 +5,15 @@ set -e base_folder=$(dirname $0) pushd ${base_folder} >/dev/null -if [ $# -ne 1 ]; +if [ $# -ne 2 ]; then - echo "Usage: $(dirname $0) " + echo "Usage: $(dirname $0) " echo echo "Note that the git tag must be pushed to github before doing this." exit 1 fi -TAG="$1" +VERSION="$1" +BASED_ON_TAG="$2" d=`pwd` cid1=`docker run -i -d -v ${d}:/q q-text-as-data-rpm-builder:0.1` @@ -27,9 +28,9 @@ rm -rvf ${base_folder}/packages mkdir -p ${base_folder}/packages sleep 1 -docker exec -it ${cid1} /bin/bash -i -c "/q/dist/create-rpm ${TAG}" +docker exec -it ${cid1} /bin/bash -i -c "/q/dist/create-rpm ${VERSION} ${BASED_ON_TAG}" -docker cp ${cid1}:/q/dist/rpm_build_area/RPMS/x86_64/q-text-as-data-${TAG}-1.el6.x86_64.rpm ${base_folder}/packages/q-text-as-data-${TAG}-1.x86_64.rpm +docker cp ${cid1}:/q/dist/rpm_build_area/RPMS/x86_64/q-text-as-data-${VERSION}-1.el6.x86_64.rpm ${base_folder}/packages/q-text-as-data-${VERSION}-1.x86_64.rpm -docker exec -it ${cid2} /bin/bash -i -c "cd /q/packages && alien ./q-text-as-data-${TAG}-1.x86_64.rpm" +docker exec -it ${cid2} /bin/bash -i -c "cd /q/packages && alien ./q-text-as-data-${VERSION}-1.x86_64.rpm" From 5c96ad2904b2405e4fada5fdbdcacb8d2f63eed2 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Tue, 17 Dec 2019 17:58:21 +0200 Subject: [PATCH 027/111] make rpm creation more robust --- dist/create-rpm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dist/create-rpm b/dist/create-rpm index db1a255b..8c247f99 100755 --- a/dist/create-rpm +++ b/dist/create-rpm @@ -40,12 +40,12 @@ then exit 1 fi -curl -o ${rpm_build_area}/SOURCES/q.tar.gz -L -R "https://github.com/harelba/q/tarball/$BASED_ON_TAG" +curl -f -o ${rpm_build_area}/SOURCES/q.tar.gz -L -R "https://github.com/harelba/q/tarball/$BASED_ON_TAG" mkdir -p ${rpm_build_area}/SOURCES pushd ${rpm_build_area}/SOURCES >/dev/null tar xvzf ./q.tar.gz --strip-components=1 rm -vf ./q.tar.gz -curl -o ./bin/q -L -R "https://github.com/harelba/packages-for-q/raw/master/single-binary/x86_64/${VERSION}/q" +curl -f -o ./bin/q -L -R "https://github.com/harelba/packages-for-q/raw/master/single-binary/x86_64/${VERSION}/q" chmod +x ./bin/q popd >/dev/null find ${rpm_build_area}/ -ls From a603ab65c560bd4deec97c7068c47740e1bce7e5 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 21 Dec 2019 15:11:48 +0200 Subject: [PATCH 028/111] Fix output header issue for multi-file tables https://github.com/harelba/q/issues/212 --- bin/q | 47 +++++++++++++++------ test/test-suite | 109 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 142 insertions(+), 14 deletions(-) diff --git a/bin/q b/bin/q index fbd58791..56c6fe0e 100755 --- a/bin/q +++ b/bin/q @@ -476,16 +476,18 @@ class TableColumnInferer(object): self.rows = [] self.skip_header = skip_header self.header_row = None + self.header_row_filename = None self.expected_column_count = expected_column_count self.input_delimiter = input_delimiter self.disable_column_type_detection = disable_column_type_detection - def analyze(self, col_vals): + def analyze(self, filename, col_vals): if self.inferred: raise Exception("Already inferred columns") if self.skip_header and self.header_row is None: self.header_row = col_vals + self.header_row_filename = filename else: self.rows.append(col_vals) @@ -905,17 +907,36 @@ class TableCreator(object): mfs = MaterializedFileState(filename,f,self.encoding,dialect,is_stdin) self.materialized_file_dict[filename] = mfs + def _should_skip_extra_headers(self, filenumber, filename, mfs, col_vals): + if not self.skip_header: + return False + + if filenumber == 0: + return False + + header_already_exists = self.column_inferer.header_row is not None + + is_extra_header = self.skip_header and mfs.lines_read == 1 and header_already_exists + + if is_extra_header: + if tuple(self.column_inferer.header_row) != tuple(col_vals): + raise BadHeaderException("Extra header {} in file {} mismatches original header {} from file {}. Table name is {}".format(",".join(col_vals),mfs.filename,",".join(self.column_inferer.header_row),self.column_inferer.header_row_filename,self.filenames_str)) + + return is_extra_header + def _populate(self,dialect,stop_after_analysis=False): total_data_lines_read = 0 # For each match - for filename in self.materialized_file_list: + for filenumber,filename in enumerate(self.materialized_file_list): mfs = self.materialized_file_dict[filename] try: try: for col_vals in mfs.read_file_using_csv(): - self._insert_row(col_vals) + if self._should_skip_extra_headers(filenumber,filename,mfs,col_vals): + continue + self._insert_row(filename, col_vals) if stop_after_analysis and self.column_inferer.inferred: return if mfs.lines_read == 0 and self.skip_header: @@ -937,7 +958,7 @@ class TableCreator(object): if not self.table_created: self.column_inferer.force_analysis() - self._do_create_table() + self._do_create_table(filename) if total_data_lines_read == 0: @@ -960,20 +981,20 @@ class TableCreator(object): self.state = TableCreatorState.FULLY_READ return - def _flush_pre_creation_rows(self): + def _flush_pre_creation_rows(self, filename): for i, col_vals in enumerate(self.pre_creation_rows): if self.skip_header and i == 0: # skip header line continue - self._insert_row(col_vals) + self._insert_row(filename, col_vals) self._flush_inserts() self.pre_creation_rows = [] - def _insert_row(self, col_vals): + def _insert_row(self, filename, col_vals): # If table has not been created yet if not self.table_created: # Try to create it along with another "example" line of data - self.try_to_create_table(col_vals) + self.try_to_create_table(filename, col_vals) # If the table is still not created, then we don't have enough data, just # store the data and return @@ -1069,19 +1090,19 @@ class TableCreator(object): # print self.db.execute_and_fetch(self.db.generate_end_transaction()) self.buffered_inserts = [] - def try_to_create_table(self, col_vals): + def try_to_create_table(self, filename, col_vals): if self.table_created: raise Exception('Table is already created') # Add that line to the column inferer - result = self.column_inferer.analyze(col_vals) + result = self.column_inferer.analyze(filename, col_vals) # If inferer succeeded, if result: - self._do_create_table() + self._do_create_table(filename) else: pass # We don't have enough information for creating the table yet - def _do_create_table(self): + def _do_create_table(self,filename): # Then generate a temp table name self.table_name = self.db.generate_temp_table_name() # Get the column definition dict from the inferer @@ -1101,7 +1122,7 @@ class TableCreator(object): self.db.execute_and_fetch(create_table_stmt) # Mark the table as created self.table_created = True - self._flush_pre_creation_rows() + self._flush_pre_creation_rows(filename) def drop_table(self): if self.table_created: diff --git a/test/test-suite b/test/test-suite index e17afcd1..bc7fc379 100755 --- a/test/test-suite +++ b/test/test-suite @@ -93,6 +93,9 @@ sample_data_with_empty_string_no_header = six.b("\n").join( sample_data_with_header = header_row + six.b("\n") + sample_data_no_header sample_data_with_missing_header_names = six.b("name,value1\n") + sample_data_no_header +def generate_sample_data_with_header(header): + return header + six.b("\n") + sample_data_no_header + sample_quoted_data = six.b('''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6" non-quoted-value "this is a quoted value" "this is a ""double double"" quoted value" "this is an escaped \\"quoted value\\"" "this is a double double quoted ""multiline @@ -1422,6 +1425,109 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) +class MultiHeaderTests(AbstractQTestCase): + def test_output_header_when_multiple_input_headers_exist(self): + TMPFILE_COUNT = 5 + tmpfiles = [self.create_file_with_data(sample_data_with_header) for x in range(TMPFILE_COUNT)] + + tmpfilenames = "+".join(map(lambda x:x.name, tmpfiles)) + + cmd = '../bin/q -d , "select name,value1,value2 from %s order by name" -H -O' % tmpfilenames + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), TMPFILE_COUNT*3+1) + self.assertEqual(o[0], six.b("name,value1,value2")) + + for i in range (TMPFILE_COUNT): + self.assertEqual(o[1+i],sample_data_rows[0]) + for i in range (TMPFILE_COUNT): + self.assertEqual(o[TMPFILE_COUNT+1+i],sample_data_rows[1]) + for i in range (TMPFILE_COUNT): + self.assertEqual(o[TMPFILE_COUNT*2+1+i],sample_data_rows[2]) + + for oi in o[1:]: + self.assertTrue(six.b('name') not in oi) + + for i in range(TMPFILE_COUNT): + self.cleanup(tmpfiles[i]) + + def test_output_header_when_extra_header_column_names_are_different(self): + tmpfile1 = self.create_file_with_data(sample_data_with_header) + tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(six.b('othername,value1,value2'))) + + cmd = '../bin/q -d , "select name,value1,value2 from %s+%s order by name" -H -O' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 35) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertTrue(e[0].startswith(six.b("Bad header row:"))) + + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + def test_output_header_when_extra_header_has_different_number_of_columns(self): + tmpfile1 = self.create_file_with_data(sample_data_with_header) + tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(six.b('name,value1'))) + + cmd = '../bin/q -d , "select name,value1,value2 from %s+%s order by name" -H -O' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 35) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertTrue(e[0].startswith(six.b("Bad header row:"))) + + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + def test_output_header_when_extra_header_has_different_number_of_columns2(self): + original_header = header_row + tmpfile1 = self.create_file_with_data(sample_data_with_header) + different_header = six.b('name,value1,value2,value3') + tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(different_header)) + + SELECT_table_name = '%s+%s' % (tmpfile1.name,tmpfile2.name) + cmd = '../bin/q -d , "select name,value1,value2 from %s order by name" -H -O' % (SELECT_table_name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 35) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + expected_message = six.b('Bad header row: Extra header %s in file %s mismatches original header %s from file %s. Table name is %s') % \ + (different_header,six.b(tmpfile2.name),original_header,six.b(tmpfile1.name),six.b(SELECT_table_name)) + + self.assertEqual(e[0],expected_message) + + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + # Not the best behavior, this means that if the first file in additional files contains exactly the + # same content as the original header, then q would skip this line instead of failing. + # Extremely rare case, and for any table with numeric values, this is not an issue, since column names + # cannot be numbers. + def test_output_header_when_additional_files_dont_have_a_header(self): + original_header = header_row + tmpfile1 = self.create_file_with_data(sample_data_with_header) + tmpfile2 = self.create_file_with_data(sample_data_no_header) + + SELECT_table_name = '%s+%s' % (tmpfile1.name,tmpfile2.name) + cmd = '../bin/q -d , "select name,value1,value2 from %s order by name" -H -O' % (SELECT_table_name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 35) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + expected_message = six.b('Bad header row: Extra header %s in file %s mismatches original header %s from file %s. Table name is %s') % \ + (sample_data_rows[0],six.b(tmpfile2.name),original_header,six.b(tmpfile1.name),six.b(SELECT_table_name)) + + self.assertEqual(e[0],expected_message) + + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + class ParsingModeTests(AbstractQTestCase): def test_strict_mode_column_count_mismatch_error(self): @@ -2351,7 +2457,8 @@ def suite(): formatting = tl.loadTestsFromTestCase(FormattingTests) basic_module_stuff = tl.loadTestsFromTestCase(BasicModuleTests) save_db_to_disk_tests = tl.loadTestsFromTestCase(SaveDbToDiskTests) - return unittest.TestSuite([basic_module_stuff, basic_stuff, parsing_mode, sql, formatting,save_db_to_disk_tests]) + multi_header_tests = tl.loadTestsFromTestCase(MultiHeaderTests) + return unittest.TestSuite([basic_module_stuff, basic_stuff, parsing_mode, sql, formatting,save_db_to_disk_tests,multi_header_tests]) if __name__ == '__main__': if len(sys.argv) > 1: From ec1777334f766302e2f92d7a32f63999853bde72 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 12 Jan 2020 17:58:26 +0200 Subject: [PATCH 029/111] wip --- .travis.yml | 149 +++++++++++++++-- Makefile | 35 ++++ bin/q | 5 +- do-manual-release.sh | 29 ++++ pytest.ini | 2 + setup-pyenv.sh | 134 ++++++++++++++++ setup.py | 24 +++ test-requirements.txt | 2 + test/test-suite | 365 +++++++++++++++++------------------------- 9 files changed, 512 insertions(+), 233 deletions(-) create mode 100644 Makefile create mode 100755 do-manual-release.sh create mode 100644 pytest.ini create mode 100644 setup-pyenv.sh create mode 100644 setup.py create mode 100644 test-requirements.txt diff --git a/.travis.yml b/.travis.yml index 402d9e12..e8186f1e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,137 @@ -language: python -python: - - "2.7" - - "3.6" -matrix: +sudo: false + +stages: + - integration + - release + +env: + global: + - CACHE_NAME=${TRAVIS_JOB_NAME} + + +_commands_provider: + + _test: &_test make test + + _lint: &_lint make lint + + _release: &_release make local-release + + _install_requirements: &_install_requirements make dep + + # https://ttcshelbyville.wordpress.com/2012/12/19/disable-remote-differential-compression-form-the-command-line/ + _disable_windows_compression: &_disable_windows_compression "powershell Disable-WindowsOptionalFeature -Online -FeatureName MSRDC-Infrastructure" + + # https://travis-ci.community/t/yarn-network-troubles/333/7 + _disable_windows_defender: &_disable_windows_defender "powershell Set-MpPreference -DisableRealtimeMonitoring \\$true" + + +_steps_provider: + + _test: &_step_test + + install: + - *_install_requirements + before_script: *_lint + script: *_test + + _release: &_step_release + + install: *_install_requirements + script: *_release + + + +jobs: include: - - python: "3.7" - dist: xenial # Need for python 3.7 - allow_failures: - - python: "3.6" - - python: "3.7" -install: pip install -r requirements.txt -before_script: flake8 ./bin/q ./test/test-suite --count --select=E901,E999,F821,F822,F823 --show-source --statistics -script: test/test-all + + - stage: integration + name: py27-macos + os: osx + language: generic + osx_image: xcode7.3 + env: + - PYENV_VERSION=2.7.14 + before_install: source setup-pyenv.sh + <<: *_step_test + cache: + directories: + - ${HOME}/.pyenv_cache + + - stage: integration + name: py36-macos + os: osx + language: generic + osx_image: xcode7.3 + env: + - PYENV_VERSION=3.6.4 + before_install: source setup-pyenv.sh + <<: *_step_test + cache: + directories: + - ${HOME}/.pyenv_cache + + - stage: integration + name: py37-macos + os: osx + language: generic + osx_image: xcode7.3 + env: + - PYENV_VERSION=3.7.3 + before_install: source setup-pyenv.sh + <<: *_step_test + cache: + directories: + - ${HOME}/.pyenv_cache + + - stage: integration + name: py27-linux + language: python + python: "2.7" + <<: *_step_test + + - stage: integration + name: py36-linux + language: python + python: "3.6" + <<: *_step_test + + - stage: integration + name: py37-linux + language: python + dist: xenial + python: "3.7" + <<: *_step_test + + - stage: release + name: macos + os: osx + language: generic + osx_image: xcode7.3 + env: + - PYENV_VERSION=3.7.3 + before_install: source setup-pyenv.sh + <<: *_step_release + cache: + directories: + - ${HOME}/.pyenv_cache + + - stage: release + name: linux + language: python + dist: xenial + python: "3.7" + <<: *_step_release + + - stage: release + name: windows + os: windows + language: shell + env: + - PATH=/c/Python37:/c/Python37/Scripts:$PATH + before_install: + - *_disable_windows_compression + - *_disable_windows_defender + - choco install make + - choco install python --version 3.7.3 + <<: *_step_release diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..1c7b9719 --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ +SHELL := /bin/bash + +PROJECT_NAME=$(shell dirname "$0") +ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) + +.PHONY: test help +.DEFAULT_GOAL := ci + +ci: lint test ## Equivelant to 'make lint test' + +help: ## Show this help message. + + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +dep: ## Install the dependent libraries. + + pip install -r test-requirements.txt + pip install -e . + +lint: dep ## Run lint validations. + + flake8 q/ --count --select=E901,E999,F821,F822,F823 --show-source --statistics + +test: dep ## Run the unit tests. + + py.test -rs -c pytest.ini -s -v q/tests/suite.py --rootdir . + +release: ## Run release + pip install py-ci + pyci release --no-wheel-publish --wheel-universal + +local-release: + pip install py-ci + ./do-manual-release.sh + diff --git a/bin/q b/bin/q index 56c6fe0e..a4004dc1 100755 --- a/bin/q +++ b/bin/q @@ -31,7 +31,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -q_version = "2.0.6" +q_version = "2.0.9" __all__ = [ 'QTextAsData' ] @@ -1143,7 +1143,8 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter): def print_credentials(): print("q version %s" % q_version, file=sys.stderr) - print("Copyright (C) 2012-2017 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr) + print("Python: %s" % (sys.version.replace("\n","//")), file=sys.stderr) + print("Copyright (C) 2012-2019 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr) print("http://harelba.github.io/q/", file=sys.stderr) print(file=sys.stderr) diff --git a/do-manual-release.sh b/do-manual-release.sh new file mode 100755 index 00000000..56b302ce --- /dev/null +++ b/do-manual-release.sh @@ -0,0 +1,29 @@ +#!/bin/bash -x + +set -e + +VERSION=2.0.9 + +echo "Packing binary for $TRAVIS_OS_NAME" + +if [[ "$TRAVIS_OS_NAME" == "osx" || "$TRAVIS_OS_NAME" == "linux" ]] +then + echo "Packing $TRAVIS_OS_NAME installer - packing binary" + pyci pack binary + echo "Packing $TRAVIS_OS_NAME installer - uploading" + pyci github upload-asset --asset q-$(uname -m)-$(uname -s) --release $VERSION +else + echo "Packing windows installer - packing binary" + pyci pack binary + echo "Packing windows installer - listing files" + find `pwd` -ls | grep -v \.git/ + echo "Packing windows installer - packing nsis" + BINARY_LOCATION="c:\\Users\\travis\\build\\harelba\\q\\q-AMD64-Windows.exe" + pyci pack nsis --program-files-dir q-TextAsData --binary-path $BINARY_LOCATION --version ${VERSION}.0 + echo "Packing windows installer - uploading" + pyci github upload-asset --asset $BINARY_LOCATION --release $VERSION + SETUP_LOCATION="c:\\Users\\travis\\build\\harelba\\q\\q-AMD64-Windows-installer.exe" + pyci github upload-asset --asset $SETUP_LOCATION --release $VERSION +fi + +echo "done" diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..9d60edec --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +log_print = True diff --git a/setup-pyenv.sh b/setup-pyenv.sh new file mode 100644 index 00000000..6b29d86d --- /dev/null +++ b/setup-pyenv.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +# NOTE: This script needs to be sourced so it can modify the environment. +# +# Environment variables that can be set: +# - PYENV_VERSION +# Python to install [required] +# - PYENV_VERSION_STRING +# String to `grep -F` against the output of `python --version` to validate +# that the correct Python was installed (recommended) [default: none] +# - PYENV_ROOT +# Directory in which to install pyenv [default: ~/.travis-pyenv] +# - PYENV_RELEASE +# Release tag of pyenv to download [default: clone from master] +# - PYENV_CACHE_PATH +# Directory where full Python builds are cached (i.e., for Travis) + +# PYENV_ROOT is exported because pyenv uses it +export PYENV_ROOT="${PYENV_ROOT:-$HOME/.travis-pyenv}" +export PYTHON_CONFIGURE_OPTS="--enable-shared" +PYENV_CACHE_PATH="${PYENV_CACHE_PATH:-$HOME/.pyenv_cache}" +version_cache_path="$PYENV_CACHE_PATH/$PYENV_VERSION" +version_pyenv_path="$PYENV_ROOT/versions/$PYENV_VERSION" + +# Functions +# +# verify_python -- attempts to call the Python command or binary +# supplied in the first argument with the --version flag. If +# PYENV_VERSION_STRING is set, then it validates the returned version string +# as well (using grep -F). Returns whatever status code the command returns. +verify_python() { + local python_bin="$1"; shift + + if [[ -n "$PYENV_VERSION_STRING" ]]; then + "$python_bin" --version 2>&1 | grep -F "$PYENV_VERSION_STRING" &>/dev/null + else + "$python_bin" --version &>/dev/null + fi +} + +# use_cached_python -- Tries symlinking to the cached PYENV_VERSION and +# verifying that it's a working build. Returns 0 if it's found and it +# verifies, otherwise returns 1. +use_cached_python() { + if [[ -d "$version_cache_path" ]]; then + printf "Cached python found, %s. Verifying..." "$PYENV_VERSION" + ln -s "$version_cache_path" "$version_pyenv_path" + if verify_python "$version_pyenv_path/bin/python"; then + printf "success!\n" + return 0 + else + printf "FAILED.\nClearing cached version..." + rm -f "$version_pyenv_path" + rm -rf "$version_cache_path" + printf "done.\n" + return 1 + fi + else + echo "No cached python found." + return 1 + fi +} + +# output_debugging_info -- Outputs useful debugging information +output_debugging_info() { + echo "**** Debugging information" + printf "PYENV_VERSION\n%s\n" "$PYENV_VERSION" + printf "PYENV_VERSION_STRING\n%s\n" "$PYENV_VERSION_STRING" + printf "PYENV_CACHE_PATH\n%s\n" "$PYENV_CACHE_PATH" + set -x + python --version + "$version_cache_path/bin/python" --version + which python + pyenv which python + set +x +} + +# Main script begins. + +if [[ -z "$PYENV_VERSION" ]]; then + echo "PYENV_VERSION is not set. Not installing a pyenv." + return 0 +fi + +# Get out of the virtualenv we're in (if we're in one). +[[ -z "$VIRTUAL_ENV" ]] || deactivate + +# Install pyenv +echo "**** Installing pyenv." +if [[ -n "$PYENV_RELEASE" ]]; then + # Fetch the release archive from Github (slightly faster than cloning) + mkdir "$PYENV_ROOT" + curl -fsSL "https://github.com/yyuu/pyenv/archive/$PYENV_RELEASE.tar.gz" \ + | tar -xz -C "$PYENV_ROOT" --strip-components 1 +else + # Don't have a release to fetch, so just clone directly + git clone --depth 1 https://github.com/yyuu/pyenv.git "$PYENV_ROOT" +fi + +export PATH="$PYENV_ROOT/bin:$PATH" +eval "$(pyenv init -)" + +# Make sure the cache directory exists +mkdir -p "$PYENV_CACHE_PATH" + +# Try using an already cached PYENV_VERSION. If it fails or is not found, +# then install from scratch. +echo "**** Trying to find and use cached python $PYENV_VERSION." +if ! use_cached_python; then + echo "**** Installing python $PYENV_VERSION with pyenv now." + if pyenv install "$PYENV_VERSION"; then + if mv "$version_pyenv_path" "$PYENV_CACHE_PATH"; then + echo "Python was successfully built and moved to cache." + echo "**** Trying to find and use cached python $PYENV_VERSION." + if ! use_cached_python; then + echo "Python version $PYENV_VERSION was apparently successfully built" + echo "with pyenv, but, once cached, it could not be verified." + output_debugging_info + return 1 + fi + else + echo "**** Warning: Python was succesfully built, but moving to cache" + echo "failed. Proceeding anyway without caching." + fi + else + echo "Python version $PYENV_VERSION build FAILED." + return 1 + fi +fi + +# Now we have to reinitialize pyenv, as we need the shims etc to be created so +# the pyenv activates correctly. +echo "**** Activating python $PYENV_VERSION and generating new virtualenv." +eval "$(pyenv init -)" +pyenv global "$PYENV_VERSION" diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..d78de8e6 --- /dev/null +++ b/setup.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +from setuptools import setup + +setup( + name='q', + url='https://github.com/harelba/q', + license='LICENSE', + version='2.0.9', + author='Harel Ben-Attia', + description="Run SQL directly on CSV or TSV files", + author_email='harelba@gmail.com', + install_requires=[ + 'six==1.11.0' + ], + packages=[ + 'q' + ], + entry_points={ + 'console_scripts': [ + 'q = bin.q:run_standalone' + ] + } +) diff --git a/test-requirements.txt b/test-requirements.txt new file mode 100644 index 00000000..a89474ca --- /dev/null +++ b/test-requirements.txt @@ -0,0 +1,2 @@ +pytest==4.6.2 +flake8==3.6.0 \ No newline at end of file diff --git a/test/test-suite b/test/test-suite index bc7fc379..f5f0b298 100755 --- a/test/test-suite +++ b/test/test-suite @@ -1,12 +1,13 @@ #!/usr/bin/env python # +# test suite for q. +# +# All tests must be end-to-end tests, running the actual q command and testing stdout/stderr, and the return code. +# Some utilities are provided for making that easy, see other tests for examples. # -# Simplistic test suite for q. -# -# Currently takes into account the project folder structure for running, so it needs -# to be executed from the current folder -# +# Don't forget to use the Q_EXECUTABLE instead of hardcoding the q command line. This will be used in the near future +# in order to test the resulting binary executables as well, instead of just executing the q python source code. # import unittest @@ -24,18 +25,22 @@ import six from six.moves import range import codecs -sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin')) -from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams + +import QTextAsData,QOutput,QOutputPrinter,QInputParams # q uses this encoding as the default output encoding. Some of the tests use it in order to # make sure that the output is correctly encoded SYSTEM_ENCODING = locale.getpreferredencoding() +EXAMPLES = os.path.abspath(os.path.join(q.__file__, os.pardir, os.pardir, 'examples')) + +Q_EXECUTABLE = os.getenv('Q_EXECUTABLE', '../bin/q') DEBUG = False if len(sys.argv) > 2 and sys.argv[2] == '-v': DEBUG = True + def run_command(cmd_to_run): global DEBUG if DEBUG: @@ -61,6 +66,7 @@ def run_command(cmd_to_run): print("RESULT:{}".format(res)) return res + uneven_ls_output = six.b("""drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux drwxr-xr-x 2 root root 4096 Apr 19 2013 /mnt drwxr-xr-x 2 root root 4096 Apr 24 2013 /srv @@ -72,6 +78,7 @@ lrwxrwxrwx 1 root root 29 Jun 21 2013 /vmlinuz -> boot/vmlinuz-3.8 lrwxrwxrwx 1 root root 32 Jun 21 2013 /initrd.img -> boot/initrd.img-3.8.0-19-generic """) + find_output = six.b("""8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 11:00 /tmp 8299123 4 drwxrwxr-x 2 harel harel 4096 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576 8263229 964 -rw-rw-r-- 1 mapred mapred 984569 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormcode.ser @@ -84,6 +91,7 @@ find_output = six.b("""8257537 32 drwxrwxrwt 218 root root 28672 Ma 8263604 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514175754.version """) + header_row = six.b('name,value1,value2') sample_data_rows = [six.b('a,1,0'), six.b('b,2,0'), six.b('c,,0')] sample_data_rows_with_empty_string = [six.b('a,aaa,0'), six.b('b,bbb,0'), six.b('c,,0')] @@ -135,6 +143,7 @@ long_value1 = "23683289372328372328373" int_value = "2328372328373" sample_data_with_long_values = "%s\n%s\n%s" % (long_value1,int_value,int_value) + def one_column_warning(e): return e[0].startswith(six.b('Warning: column count is one')) @@ -159,19 +168,20 @@ class AbstractQTestCase(unittest.TestCase): path = '/var/tmp' return '%s/%s-%s.%s' % (path,prefix,random.randint(0,1000000000),postfix) + class SaveDbToDiskTests(AbstractQTestCase): def test_store_to_disk(self): db_filename = self.random_tmp_filename('store-to-disk','db') self.assertFalse(os.path.exists(db_filename)) - retcode, o, e = run_command('seq 1 1000 | ../bin/q "select count(*) from -" -c 1 -S %s' % db_filename) + retcode, o, e = run_command('seq 1 1000 | ' + Q_EXECUTABLE + ' "select count(*) from -" -c 1 -S %s' % db_filename) self.assertTrue(retcode == 0) self.assertTrue(len(o) == 0) self.assertTrue(len(e) == 5) self.assertTrue(e[0].startswith(six.b('Going to save data'))) - self.assertTrue(db_filename.encode(sys.stdout.encoding) in e[0]) + self.assertTrue(db_filename.encode(sys.stdout.encoding or 'utf-8') in e[0]) self.assertTrue(e[1].startswith(six.b('Data has been loaded in'))) self.assertTrue(e[2].startswith(six.b('Saving data to db file'))) self.assertTrue(e[3].startswith(six.b('Data has been saved into'))) @@ -191,12 +201,12 @@ class SaveDbToDiskTests(AbstractQTestCase): db_filename = self.random_tmp_filename('store-to-disk', 'db') self.assertFalse(os.path.exists(db_filename)) - retcode, o, e = run_command('seq 1 1000 | ../bin/q "select count(*) from -" -c 1 -S %s' % db_filename) + retcode, o, e = run_command('seq 1 1000 | ' + Q_EXECUTABLE + ' "select count(*) from -" -c 1 -S %s' % db_filename) self.assertTrue(retcode == 0) self.assertTrue(os.path.exists(db_filename)) - retcode2, o2, e2 = run_command('seq 1 1000 | ../bin/q "select count(*) from -" -c 1 -S %s' % db_filename) + retcode2, o2, e2 = run_command('seq 1 1000 | ' + Q_EXECUTABLE + ' "select count(*) from -" -c 1 -S %s' % db_filename) self.assertTrue(retcode2 != 0) self.assertTrue(e2[0].startswith(six.b('Going to save data into a disk database'))) self.assertTrue(e2[1] == six.b('Disk database file {} already exists.'.format(db_filename))) @@ -208,7 +218,7 @@ class BasicTests(AbstractQTestCase): def test_basic_aggregation(self): retcode, o, e = run_command( - 'seq 1 10 | ../bin/q "select sum(c1),avg(c1) from -"') + 'seq 1 10 | ' + Q_EXECUTABLE + ' "select sum(c1),avg(c1) from -"') self.assertTrue(retcode == 0) self.assertTrue(len(o) == 1) self.assertTrue(len(e) == 1) @@ -221,7 +231,7 @@ class BasicTests(AbstractQTestCase): tmpfile = self.create_file_with_data( six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00')) - cmd = '../bin/q -z "select sum(c1),avg(c1) from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -z "select sum(c1),avg(c1) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertTrue(retcode == 0) @@ -238,7 +248,7 @@ class BasicTests(AbstractQTestCase): tmpfile = self.create_file_with_data( six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00')) - cmd = 'cat %s | ../bin/q -z "select sum(c1),avg(c1) from -"' % tmpfile.name + cmd = 'cat %s | ' % tmpfile.name + Q_EXECUTABLE + ' -z "select sum(c1),avg(c1) from -"' retcode, o, e = run_command(cmd) self.assertTrue(retcode != 0) @@ -252,7 +262,7 @@ class BasicTests(AbstractQTestCase): def test_delimition_mistake_with_header(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d " " "select * from %s" -H' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d " " "select * from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode, 0) @@ -269,7 +279,7 @@ class BasicTests(AbstractQTestCase): def test_regexp_int_data_handling(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -283,7 +293,7 @@ class BasicTests(AbstractQTestCase): def test_regexp_null_data_handling(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -297,7 +307,7 @@ class BasicTests(AbstractQTestCase): def test_select_one_column(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -311,7 +321,7 @@ class BasicTests(AbstractQTestCase): def test_tab_delimition_parameter(self): tmpfile = self.create_file_with_data( sample_data_no_header.replace(six.b(","), six.b("\t"))) - cmd = '../bin/q -t "select c1,c2,c3 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -t "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -326,7 +336,7 @@ class BasicTests(AbstractQTestCase): def test_tab_delimition_parameter__with_manual_override_attempt(self): tmpfile = self.create_file_with_data( sample_data_no_header.replace(six.b(","), six.b("\t"))) - cmd = '../bin/q -t -d , "select c1,c2,c3 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -t -d , "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -340,7 +350,7 @@ class BasicTests(AbstractQTestCase): def test_output_delimiter(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , -D "|" "select c1,c2,c3 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , -D "|" "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -355,7 +365,7 @@ class BasicTests(AbstractQTestCase): def test_output_delimiter_tab_parameter(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , -T "select c1,c2,c3 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , -T "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -370,7 +380,7 @@ class BasicTests(AbstractQTestCase): def test_output_delimiter_tab_parameter__with_manual_override_attempt(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , -T -D "|" "select c1,c2,c3 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , -T -D "|" "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -384,7 +394,7 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) def test_stdin_input(self): - cmd = six.b('printf "%s" | ../bin/q -d , "select c1,c2,c3 from -"') % sample_data_no_header + cmd = six.b('printf "%s" | ' + Q_EXECUTABLE + ' -d , "select c1,c2,c3 from -"') % sample_data_no_header retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -397,7 +407,7 @@ class BasicTests(AbstractQTestCase): def test_column_separation(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , "select c1,c2,c3 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -413,7 +423,7 @@ class BasicTests(AbstractQTestCase): def test_column_analysis(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -427,7 +437,7 @@ class BasicTests(AbstractQTestCase): def test_column_analysis_no_header(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -438,7 +448,7 @@ class BasicTests(AbstractQTestCase): def test_header_exception_on_numeric_header_data(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , "select * from %s" -A -H' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select * from %s" -A -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode, 0) @@ -453,7 +463,7 @@ class BasicTests(AbstractQTestCase): def test_column_analysis_with_header(self): tmpfile = self.create_file_with_data(sample_data_with_header) - cmd = '../bin/q -d , "select c1 from %s" -A -H' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode, 0) @@ -470,7 +480,7 @@ class BasicTests(AbstractQTestCase): def test_data_with_header(self): tmpfile = self.create_file_with_data(sample_data_with_header) - cmd = '../bin/q -d , "select name from %s" -H' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select name from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -481,7 +491,7 @@ class BasicTests(AbstractQTestCase): def test_output_header_when_input_header_exists(self): tmpfile = self.create_file_with_data(sample_data_with_header) - cmd = '../bin/q -d , "select name from %s" -H -O' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select name from %s" -H -O' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -495,7 +505,7 @@ class BasicTests(AbstractQTestCase): def test_generated_column_name_warning_when_header_line_exists(self): tmpfile = self.create_file_with_data(sample_data_with_header) - cmd = '../bin/q -d , "select c3 from %s" -H' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c3 from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) @@ -510,7 +520,7 @@ class BasicTests(AbstractQTestCase): def test_column_analysis_with_unexpected_header(self): tmpfile = self.create_file_with_data(sample_data_with_header) - cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -529,7 +539,7 @@ class BasicTests(AbstractQTestCase): def test_empty_data(self): tmpfile = self.create_file_with_data(six.b('')) - cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -542,7 +552,7 @@ class BasicTests(AbstractQTestCase): def test_empty_data_with_header_param(self): tmpfile = self.create_file_with_data(six.b('')) - cmd = '../bin/q -d , "select c1 from %s" -H' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode, 0) @@ -556,7 +566,7 @@ class BasicTests(AbstractQTestCase): def test_one_row_of_data_without_header_param(self): tmpfile = self.create_file_with_data(header_row) - cmd = '../bin/q -d , "select c2 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c2 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -569,7 +579,7 @@ class BasicTests(AbstractQTestCase): def test_one_row_of_data_with_header_param(self): tmpfile = self.create_file_with_data(header_row) - cmd = '../bin/q -d , "select c2 from %s" -H' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c2 from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -582,7 +592,7 @@ class BasicTests(AbstractQTestCase): def test_dont_leading_keep_whitespace_in_values(self): tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) - cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -597,7 +607,7 @@ class BasicTests(AbstractQTestCase): def test_keep_leading_whitespace_in_values(self): tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) - cmd = '../bin/q -d , "select c1 from %s" -k' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -k' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -612,7 +622,7 @@ class BasicTests(AbstractQTestCase): def test_no_impact_of_keeping_leading_whitespace_on_integers(self): tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) - cmd = '../bin/q -d , "select c2 from %s" -k -A' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select c2 from %s" -k -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -629,7 +639,7 @@ class BasicTests(AbstractQTestCase): def test_spaces_in_header_row(self): tmpfile = self.create_file_with_data( header_row_with_spaces + six.b("\n") + sample_data_no_header) - cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select name,\`value 1\` from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -645,7 +655,7 @@ class BasicTests(AbstractQTestCase): def test_column_analysis_for_spaces_in_header_row(self): tmpfile = self.create_file_with_data( header_row_with_spaces + six.b("\n") + sample_data_no_header) - cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H -A' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select name,\`value 1\` from %s" -H -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -660,7 +670,7 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) def test_no_query_in_command_line(self): - cmd = '../bin/q -d , ""' + cmd = Q_EXECUTABLE + ' -d , ""' retcode, o, e = run_command(cmd) self.assertEqual(retcode, 1) @@ -670,7 +680,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(e[0],six.b('Query cannot be empty (query number 1)')) def test_empty_query_in_command_line(self): - cmd = '../bin/q -d , " "' + cmd = Q_EXECUTABLE + ' -d , " "' retcode, o, e = run_command(cmd) self.assertEqual(retcode, 1) @@ -680,7 +690,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(e[0],six.b('Query cannot be empty (query number 1)')) def test_failure_in_query_stops_processing_queries(self): - cmd = '../bin/q -d , "select 500" "select 300" "wrong-query" "select 8000"' + cmd = Q_EXECUTABLE + ' -d , "select 500" "select 300" "wrong-query" "select 8000"' retcode, o, e = run_command(cmd) self.assertEqual(retcode, 1) @@ -690,7 +700,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(o[1],six.b('300')) def test_multiple_queries_in_command_line(self): - cmd = '../bin/q -d , "select 500" "select 300+100" "select 300" "select 200"' + cmd = Q_EXECUTABLE + ' -d , "select 500" "select 300+100" "select 300" "select 200"' retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -703,7 +713,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(o[3],six.b('200')) def test_literal_calculation_query(self): - cmd = '../bin/q -d , "select 1+40/6"' + cmd = Q_EXECUTABLE + ' -d , "select 1+40/6"' retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -713,7 +723,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(o[0],six.b('7')) def test_literal_calculation_query_float_result(self): - cmd = '../bin/q -d , "select 1+40/6.0"' + cmd = Q_EXECUTABLE + ' -d , "select 1+40/6.0"' retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -726,7 +736,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data(six.b("select name from %s" % tmp_data_file.name)) - cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name + cmd = Q_EXECUTABLE + ' -d , -q %s -H' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -744,7 +754,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) - cmd = '../bin/q -d , -q %s -H -Q ascii' % tmp_query_file.name + cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q ascii' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,3) @@ -760,7 +770,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' Hr\xc3\xa1\xc4\x8d from %s" % tmp_data_file.name),encoding=None) - cmd = '../bin/q -d , -q %s -H -Q utf-8 -O' % tmp_query_file.name + cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -O' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -779,7 +789,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) - cmd = '../bin/q -d , -q %s -H -Q utf-8' % tmp_query_file.name + cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -797,7 +807,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data(six.b("select name from %s" % tmp_data_file.name)) - cmd = '../bin/q -d , -q %s -H "select * from ppp"' % tmp_query_file.name + cmd = Q_EXECUTABLE + ' -d , -q %s -H "select * from ppp"' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 1) @@ -814,7 +824,7 @@ class BasicTests(AbstractQTestCase): tmp_query_file = self.create_file_with_data(six.b("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) for target_encoding in ['utf-8','ibm852']: - cmd = '../bin/q -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding) + cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding) retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -832,7 +842,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data(six.b("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) - cmd = '../bin/q -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name + cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 3) @@ -848,7 +858,7 @@ class BasicTests(AbstractQTestCase): def test_use_query_file_with_empty_query(self): tmp_query_file = self.create_file_with_data(six.b(" ")) - cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name + cmd = Q_EXECUTABLE + ' -d , -q %s -H' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 1) @@ -860,7 +870,7 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmp_query_file) def test_use_non_existent_query_file(self): - cmd = '../bin/q -d , -q non-existent-query-file -H' + cmd = Q_EXECUTABLE + ' -d , -q non-existent-query-file -H' retcode, o, e = run_command(cmd) self.assertEqual(retcode, 1) @@ -872,7 +882,7 @@ class BasicTests(AbstractQTestCase): def test_non_quoted_values_in_quoted_data(self): tmp_data_file = self.create_file_with_data(sample_quoted_data) - cmd = '../bin/q -d " " "select c1 from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " "select c1 from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) @@ -890,7 +900,7 @@ class BasicTests(AbstractQTestCase): def test_regular_quoted_values_in_quoted_data(self): tmp_data_file = self.create_file_with_data(sample_quoted_data) - cmd = '../bin/q -d " " "select c2 from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " "select c2 from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -907,7 +917,7 @@ class BasicTests(AbstractQTestCase): def test_double_double_quoted_values_in_quoted_data(self): tmp_data_file = self.create_file_with_data(sample_quoted_data) - cmd = '../bin/q -d " " "select c3 from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " "select c3 from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -924,7 +934,7 @@ class BasicTests(AbstractQTestCase): def test_escaped_double_quoted_values_in_quoted_data(self): tmp_data_file = self.create_file_with_data(sample_quoted_data) - cmd = '../bin/q -d " " "select c4 from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " "select c4 from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -941,7 +951,7 @@ class BasicTests(AbstractQTestCase): def test_none_input_quoting_mode_in_relaxed_mode(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) - cmd = '../bin/q -d " " -m relaxed -D , -w none -W none "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " -m relaxed -D , -w none -W none "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -956,7 +966,7 @@ class BasicTests(AbstractQTestCase): def test_none_input_quoting_mode_in_strict_mode(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) - cmd = '../bin/q -d " " -m strict -D , -w none "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " -m strict -D , -w none "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode,0) @@ -970,7 +980,7 @@ class BasicTests(AbstractQTestCase): def test_minimal_input_quoting_mode(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) - cmd = '../bin/q -d " " -D , -w minimal "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " -D , -w minimal "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -985,7 +995,7 @@ class BasicTests(AbstractQTestCase): def test_all_input_quoting_mode(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) - cmd = '../bin/q -d " " -D , -w all "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " -D , -w all "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1000,7 +1010,7 @@ class BasicTests(AbstractQTestCase): def test_incorrect_input_quoting_mode(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) - cmd = '../bin/q -d " " -D , -w unknown_wrapping_mode "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " -D , -w unknown_wrapping_mode "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode,0) @@ -1015,7 +1025,7 @@ class BasicTests(AbstractQTestCase): def test_none_output_quoting_mode(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) - cmd = '../bin/q -d " " -D , -w all -W none "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W none "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1030,7 +1040,7 @@ class BasicTests(AbstractQTestCase): def test_minimal_output_quoting_mode__without_need_to_quote_in_output(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) - cmd = '../bin/q -d " " -D , -w all -W minimal "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W minimal "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1046,7 +1056,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(sample_quoted_data2) # output delimiter is set to space, so the output will contain it - cmd = '../bin/q -d " " -D " " -w all -W minimal "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " -D " " -w all -W minimal "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1061,7 +1071,7 @@ class BasicTests(AbstractQTestCase): def test_nonnumeric_output_quoting_mode(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) - cmd = '../bin/q -d " " -D , -w all -W nonnumeric "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W nonnumeric "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1076,7 +1086,7 @@ class BasicTests(AbstractQTestCase): def test_all_output_quoting_mode(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) - cmd = '../bin/q -d " " -D , -w all -W all "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W all "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1092,7 +1102,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(input_data) - basic_cmd = '../bin/q -w %s -W %s "select * from -"' % (input_wrapping_mode,output_wrapping_mode) + basic_cmd = Q_EXECUTABLE + ' -w %s -W %s "select * from -"' % (input_wrapping_mode,output_wrapping_mode) chained_cmd = 'cat %s | %s | %s | %s' % (tmp_data_file.name,basic_cmd,basic_cmd,basic_cmd) retcode, o, e = run_command(chained_cmd) @@ -1117,7 +1127,7 @@ class BasicTests(AbstractQTestCase): utf_8_data_with_bom = six.b('\xef\xbb\xbf"typeid","limit","apcost","date","checkpointId"\n"1","2","5","1,2,3,4,5,6,7","3000,3001,3002"\n"2","2","5","1,2,3,4,5,6,7","3003,3004,3005"\n') tmp_data_file = self.create_file_with_data(utf_8_data_with_bom,encoding=None) - cmd = '../bin/q -d , -H -O -e utf-8-sig "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d , -H -O -e utf-8-sig "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1136,7 +1146,7 @@ class BasicTests(AbstractQTestCase): data = six.b('111,22.22,"testing text with special characters - citt\xc3\xa0 ",http://somekindofurl.com,12.13.14.15,12.1\n') tmp_data_file = self.create_file_with_data(data) - cmd = '../bin/q -d , "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d , "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1145,7 +1155,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(o[0].decode('utf-8'),u'111,22.22,testing text with special characters - citt\xe0 ,http://somekindofurl.com,12.13.14.15,12.1') - cmd = '../bin/q -d , "select * from %s" -A' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d , "select * from %s" -A' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1166,7 +1176,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(sample_quoted_data) # FIXME Need to convert \0a to proper encoding suitable for the person running the tests. - cmd = '../bin/q -d " " "select replace(c5,X\'0A\',\'::\') from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " "select replace(c5,X\'0A\',\'::\') from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1184,7 +1194,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(sample_quoted_data) # FIXME Need to convert \0a to proper encoding suitable for the person running the tests. - cmd = '../bin/q -d " " "select replace(c6,X\'0A\',\'::\') from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " "select replace(c6,X\'0A\',\'::\') from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1204,7 +1214,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(double_double_quoted_data) - cmd = '../bin/q -d " " --disable-double-double-quoting "select c2 from %s" -W none' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c2 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1214,7 +1224,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(o[0],six.b('double_double_quoted')) self.assertEqual(o[1],six.b('this is a quoted value with "double')) - cmd = '../bin/q -d " " --disable-double-double-quoting "select c3 from %s" -W none' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c3 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1224,7 +1234,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(o[0],six.b('')) self.assertEqual(o[1],six.b('double')) - cmd = '../bin/q -d " " --disable-double-double-quoting "select c4 from %s" -W none' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c4 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1242,7 +1252,7 @@ class BasicTests(AbstractQTestCase): tmp_data_file = self.create_file_with_data(escaped_double_quoted_data) - cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c2 from %s" -W none' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c2 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1252,7 +1262,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(o[0],six.b('escaped_double_quoted')) self.assertEqual(o[1],six.b('this is a quoted value with \\escaped')) - cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c3 from %s" -W none' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c3 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1262,7 +1272,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(o[0],six.b('')) self.assertEqual(o[1],six.b('double')) - cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c4 from %s" -W none' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c4 from %s" -W none' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1279,7 +1289,7 @@ class BasicTests(AbstractQTestCase): # these flags will be removed completely in the future tmp_data_file = self.create_file_with_data(combined_quoted_data) - cmd = '../bin/q -d " " --disable-double-double-quoting --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1288,7 +1298,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(len(o),7) # found 7 fields - cmd = '../bin/q -d " " --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1297,7 +1307,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(len(o),5) # found 5 fields - cmd = '../bin/q -d " " --disable-double-double-quoting "select * from %s" -A' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select * from %s" -A' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1306,7 +1316,7 @@ class BasicTests(AbstractQTestCase): self.assertEqual(len(o),5) # found 5 fields - cmd = '../bin/q -d " " "select * from %s" -A' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d " " "select * from %s" -A' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) @@ -1318,7 +1328,7 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmp_data_file) def test_nonexistent_file(self): - cmd = '../bin/q "select * from non-existent-file"' + cmd = Q_EXECUTABLE + ' "select * from non-existent-file"' retcode, o, e = run_command(cmd) @@ -1335,7 +1345,7 @@ class BasicTests(AbstractQTestCase): tmpfile = self.create_file_with_data(file_data) - cmd = '../bin/q -H -d , "select a from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1353,7 +1363,7 @@ class BasicTests(AbstractQTestCase): tmpfile = self.create_file_with_data(file_data) - cmd = '../bin/q -H -d , "select a from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 31) @@ -1370,7 +1380,7 @@ class BasicTests(AbstractQTestCase): file_data = six.b("a,b,c\nvery-long-text,2,3\n") tmpfile = self.create_file_with_data(file_data) - cmd = '../bin/q -H -d , -M 3 "select a from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -H -d , -M 3 "select a from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 31) @@ -1381,7 +1391,7 @@ class BasicTests(AbstractQTestCase): self.assertTrue((six.b("Offending file is '%s'" % tmpfile.name)) in e[0]) self.assertTrue(six.b('Line is 2') in e[0]) - cmd2 = '../bin/q -H -d , -M 300 -H "select a from %s"' % tmpfile.name + cmd2 = 'q -H -d , -M 300 -H "select a from %s"' % tmpfile.name retcode2, o2, e2 = run_command(cmd2) self.assertEqual(retcode2, 0) @@ -1396,7 +1406,7 @@ class BasicTests(AbstractQTestCase): file_data = six.b("a,b,c\nvery-long-text,2,3\n") tmpfile = self.create_file_with_data(file_data) - cmd = '../bin/q -H -d , -M 0 "select a from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -H -d , -M 0 "select a from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 31) @@ -1412,7 +1422,7 @@ class BasicTests(AbstractQTestCase): file_data = six.b("a,b,a\n10,20,30\n30,40,50") tmpfile = self.create_file_with_data(file_data) - cmd = '../bin/q -H -d , "select a from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 35) @@ -1432,7 +1442,7 @@ class MultiHeaderTests(AbstractQTestCase): tmpfilenames = "+".join(map(lambda x:x.name, tmpfiles)) - cmd = '../bin/q -d , "select name,value1,value2 from %s order by name" -H -O' % tmpfilenames + cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from %s order by name" -H -O' % tmpfilenames retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1456,7 +1466,7 @@ class MultiHeaderTests(AbstractQTestCase): tmpfile1 = self.create_file_with_data(sample_data_with_header) tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(six.b('othername,value1,value2'))) - cmd = '../bin/q -d , "select name,value1,value2 from %s+%s order by name" -H -O' % (tmpfile1.name,tmpfile2.name) + cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from %s+%s order by name" -H -O' % (tmpfile1.name,tmpfile2.name) retcode, o, e = run_command(cmd) self.assertEqual(retcode, 35) @@ -1471,68 +1481,17 @@ class MultiHeaderTests(AbstractQTestCase): tmpfile1 = self.create_file_with_data(sample_data_with_header) tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(six.b('name,value1'))) - cmd = '../bin/q -d , "select name,value1,value2 from %s+%s order by name" -H -O' % (tmpfile1.name,tmpfile2.name) + cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from %s+%s order by name" -H -O' % (tmpfile1.name,tmpfile2.name) retcode, o, e = run_command(cmd) self.assertEqual(retcode, 35) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - self.assertTrue(e[0].startswith(six.b("Bad header row:"))) - - self.cleanup(tmpfile1) - self.cleanup(tmpfile2) - - def test_output_header_when_extra_header_has_different_number_of_columns2(self): - original_header = header_row - tmpfile1 = self.create_file_with_data(sample_data_with_header) - different_header = six.b('name,value1,value2,value3') - tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(different_header)) - - SELECT_table_name = '%s+%s' % (tmpfile1.name,tmpfile2.name) - cmd = '../bin/q -d , "select name,value1,value2 from %s order by name" -H -O' % (SELECT_table_name) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 35) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - expected_message = six.b('Bad header row: Extra header %s in file %s mismatches original header %s from file %s. Table name is %s') % \ - (different_header,six.b(tmpfile2.name),original_header,six.b(tmpfile1.name),six.b(SELECT_table_name)) - - self.assertEqual(e[0],expected_message) - - self.cleanup(tmpfile1) - self.cleanup(tmpfile2) - - # Not the best behavior, this means that if the first file in additional files contains exactly the - # same content as the original header, then q would skip this line instead of failing. - # Extremely rare case, and for any table with numeric values, this is not an issue, since column names - # cannot be numbers. - def test_output_header_when_additional_files_dont_have_a_header(self): - original_header = header_row - tmpfile1 = self.create_file_with_data(sample_data_with_header) - tmpfile2 = self.create_file_with_data(sample_data_no_header) - - SELECT_table_name = '%s+%s' % (tmpfile1.name,tmpfile2.name) - cmd = '../bin/q -d , "select name,value1,value2 from %s order by name" -H -O' % (SELECT_table_name) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 35) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - expected_message = six.b('Bad header row: Extra header %s in file %s mismatches original header %s from file %s. Table name is %s') % \ - (sample_data_rows[0],six.b(tmpfile2.name),original_header,six.b(tmpfile1.name),six.b(SELECT_table_name)) - - self.assertEqual(e[0],expected_message) - - self.cleanup(tmpfile1) - self.cleanup(tmpfile2) class ParsingModeTests(AbstractQTestCase): def test_strict_mode_column_count_mismatch_error(self): tmpfile = self.create_file_with_data(uneven_ls_output) - cmd = '../bin/q -m strict "select count(*) from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m strict "select count(*) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode, 0) @@ -1545,7 +1504,7 @@ class ParsingModeTests(AbstractQTestCase): def test_strict_mode_too_large_specific_column_count(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , -m strict -c 4 "select count(*) from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , -m strict -c 4 "select count(*) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode, 0) @@ -1559,7 +1518,7 @@ class ParsingModeTests(AbstractQTestCase): def test_strict_mode_too_small_specific_column_count(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , -m strict -c 2 "select count(*) from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , -m strict -c 2 "select count(*) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode, 0) @@ -1574,7 +1533,7 @@ class ParsingModeTests(AbstractQTestCase): def test_relaxed_mode_missing_columns_in_header(self): tmpfile = self.create_file_with_data( sample_data_with_missing_header_names) - cmd = '../bin/q -d , -m relaxed "select count(*) from %s" -H -A' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , -m relaxed "select count(*) from %s" -H -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1591,7 +1550,7 @@ class ParsingModeTests(AbstractQTestCase): def test_strict_mode_missing_columns_in_header(self): tmpfile = self.create_file_with_data( sample_data_with_missing_header_names) - cmd = '../bin/q -d , -m strict "select count(*) from %s" -H -A' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , -m strict "select count(*) from %s" -H -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode, 0) @@ -1605,7 +1564,7 @@ class ParsingModeTests(AbstractQTestCase): def test_output_delimiter_with_missing_fields(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , "select * from %s" -D ";"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select * from %s" -D ";"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1620,7 +1579,7 @@ class ParsingModeTests(AbstractQTestCase): def test_handling_of_null_integers(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , "select avg(c2) from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select avg(c2) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1633,7 +1592,7 @@ class ParsingModeTests(AbstractQTestCase): def test_empty_integer_values_converted_to_null(self): tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = '../bin/q -d , "select * from %s where c2 is null"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -d , "select * from %s where c2 is null"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1647,7 +1606,7 @@ class ParsingModeTests(AbstractQTestCase): def test_empty_string_values_not_converted_to_null(self): tmpfile = self.create_file_with_data( sample_data_with_empty_string_no_header) - cmd = '../bin/q -d , "select * from %s where c2 == %s"' % ( + cmd = Q_EXECUTABLE + ' -d , "select * from %s where c2 == %s"' % ( tmpfile.name, "''") retcode, o, e = run_command(cmd) @@ -1661,7 +1620,7 @@ class ParsingModeTests(AbstractQTestCase): def test_relaxed_mode_detected_columns(self): tmpfile = self.create_file_with_data(uneven_ls_output) - cmd = '../bin/q -m relaxed "select count(*) from %s" -A' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m relaxed "select count(*) from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1685,7 +1644,7 @@ class ParsingModeTests(AbstractQTestCase): def test_relaxed_mode_detected_columns_with_specific_column_count(self): tmpfile = self.create_file_with_data(uneven_ls_output) - cmd = '../bin/q -m relaxed "select count(*) from %s" -A -c 9' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m relaxed "select count(*) from %s" -A -c 9' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1709,7 +1668,7 @@ class ParsingModeTests(AbstractQTestCase): def test_relaxed_mode_last_column_data_with_specific_column_count(self): tmpfile = self.create_file_with_data(uneven_ls_output) - cmd = '../bin/q -m relaxed "select c9 from %s" -c 9' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m relaxed "select c9 from %s" -c 9' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1725,7 +1684,7 @@ class ParsingModeTests(AbstractQTestCase): def test_1_column_warning_in_relaxed_mode(self): tmpfile = self.create_file_with_data(one_column_data) - cmd = '../bin/q -m relaxed "select c1 from %s" -d ,' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d ,' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1740,7 +1699,7 @@ class ParsingModeTests(AbstractQTestCase): def test_1_column_warning_in_strict_mode(self): tmpfile = self.create_file_with_data(one_column_data) - cmd = '../bin/q -m relaxed "select c1 from %s" -d , -m strict' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m strict' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1756,7 +1715,7 @@ class ParsingModeTests(AbstractQTestCase): def test_1_column_warning_suppression_in_relaxed_mode_when_column_count_is_specific(self): tmpfile = self.create_file_with_data(one_column_data) - cmd = '../bin/q -m relaxed "select c1 from %s" -d , -m relaxed -c 1' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m relaxed -c 1' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1770,7 +1729,7 @@ class ParsingModeTests(AbstractQTestCase): def test_1_column_warning_suppression_in_strict_mode_when_column_count_is_specific(self): tmpfile = self.create_file_with_data(one_column_data) - cmd = '../bin/q -m relaxed "select c1 from %s" -d , -m strict -c 1' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m strict -c 1' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1784,7 +1743,7 @@ class ParsingModeTests(AbstractQTestCase): def test_fluffy_mode(self): tmpfile = self.create_file_with_data(uneven_ls_output) - cmd = '../bin/q -m fluffy "select c9 from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m fluffy "select c9 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1804,7 +1763,7 @@ class ParsingModeTests(AbstractQTestCase): data_list[950] = six.b("column1 column2 column3 column4 column5") tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) - cmd = '../bin/q -m fluffy "select * from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m fluffy "select * from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode,0) @@ -1821,7 +1780,7 @@ class ParsingModeTests(AbstractQTestCase): data_list[750] = six.b("column1 column3 column4") tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) - cmd = '../bin/q -m strict "select * from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m strict "select * from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode,0) @@ -1838,7 +1797,7 @@ class ParsingModeTests(AbstractQTestCase): data_list[750] = six.b("column1 column2 column3 column4 column5") tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) - cmd = '../bin/q -m strict "select * from %s"' % tmpfile.name + cmd = Q_EXECUTABLE + ' -m strict "select * from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode,0) @@ -1850,13 +1809,12 @@ class ParsingModeTests(AbstractQTestCase): self.cleanup(tmpfile) - class FormattingTests(AbstractQTestCase): def test_column_formatting(self): # TODO Decide if this breaking change is reasonable - #cmd = 'seq 1 10 | ../bin/q -f 1=%4.3f,2=%4.3f "select sum(c1),avg(c1) from -" -c 1' - cmd = 'seq 1 10 | ../bin/q -f 1={:4.3f},2={:4.3f} "select sum(c1),avg(c1) from -" -c 1' + #cmd = 'seq 1 10 | ' + Q_EXECUTABLE + ' -f 1=%4.3f,2=%4.3f "select sum(c1),avg(c1) from -" -c 1' + cmd = 'seq 1 10 | ' + Q_EXECUTABLE + ' -f 1={:4.3f},2={:4.3f} "select sum(c1),avg(c1) from -" -c 1' retcode, o, e = run_command(cmd) @@ -1869,8 +1827,8 @@ class FormattingTests(AbstractQTestCase): def test_column_formatting_with_output_header(self): perl_regex = "'s/1\n/column_name\n1\n/;'" # TODO Decide if this breaking change is reasonable - #cmd = 'seq 1 10 | perl -pe ' + perl_regex + ' | ../bin/q -f 1=%4.3f,2=%4.3f "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' - cmd = 'seq 1 10 | perl -pe ' + perl_regex + ' | ../bin/q -f 1={:4.3f},2={:4.3f} "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' + #cmd = 'seq 1 10 | perl -pe ' + perl_regex + ' | ' + Q_EXECUTABLE + ' -f 1=%4.3f,2=%4.3f "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' + cmd = 'seq 1 10 | perl -pe ' + perl_regex + ' | ' + Q_EXECUTABLE + ' -f 1={:4.3f},2={:4.3f} "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' retcode, o, e = run_command(cmd) @@ -1885,7 +1843,7 @@ class FormattingTests(AbstractQTestCase): data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') tmp_data_file = self.create_file_with_data(data) - cmd = '../bin/q -d , -H "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertNotEqual(retcode, 0) @@ -1910,7 +1868,7 @@ class FormattingTests(AbstractQTestCase): data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') tmp_data_file = self.create_file_with_data(data) - cmd = '../bin/q -d , -H "select * from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1943,14 +1901,14 @@ class FormattingTests(AbstractQTestCase): data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') tmp_data_file = self.create_file_with_data(data) - cmd = '../bin/q -d , -H -U "select permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round from %s"' % tmp_data_file.name + cmd = Q_EXECUTABLE + ' -d , -H -U "select permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round from %s"' % tmp_data_file.name retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) - if len(e) == 2: + if len(e) == 2 or len(e) == 1: # In python 3.7, there's a deprecation warning for the 'U' file opening mode, which is ok for now - self.assertEqual(len(e), 2) + self.assertIn(len(e), [1,2]) self.assertTrue(b"DeprecationWarning: 'U' mode is deprecated" in e[0]) elif len(e) != 0: # Nothing should be output to stderr in other versions @@ -1969,7 +1927,7 @@ class SqlTests(AbstractQTestCase): def test_find_example(self): tmpfile = self.create_file_with_data(find_output) - cmd = '../bin/q "select c5,c6,sum(c7)/1024.0/1024 as total from %s group by c5,c6 order by total desc"' % tmpfile.name + cmd = Q_EXECUTABLE + ' "select c5,c6,sum(c7)/1024.0/1024 as total from %s group by c5,c6 order by total desc"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1983,7 +1941,7 @@ class SqlTests(AbstractQTestCase): self.cleanup(tmpfile) def test_join_example(self): - cmd = '../bin/q "select myfiles.c8,emails.c2 from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"' + cmd = Q_EXECUTABLE + ' "select myfiles.c8,emails.c2 from {0}/exampledatafile myfiles join {0}/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'.format(EXAMPLES) retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -1993,7 +1951,7 @@ class SqlTests(AbstractQTestCase): self.assertEqual(o[1], six.b('ppp dip.2@otherdomain.com')) def test_join_example_with_output_header(self): - cmd = '../bin/q -O "select myfiles.c8 aaa,emails.c2 bbb from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"' + cmd = Q_EXECUTABLE + ' -O "select myfiles.c8 aaa,emails.c2 bbb from {0}/exampledatafile myfiles join {0}/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'.format(EXAMPLES) retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -2005,7 +1963,7 @@ class SqlTests(AbstractQTestCase): def test_self_join1(self): tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) - cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)"' % (tmpfile.name,tmpfile.name) + cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)"' % (tmpfile.name,tmpfile.name) retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -2016,7 +1974,7 @@ class SqlTests(AbstractQTestCase): def test_self_join_reuses_table(self): tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) - cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)" -A' % (tmpfile.name,tmpfile.name) + cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)" -A' % (tmpfile.name,tmpfile.name) retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -2031,7 +1989,7 @@ class SqlTests(AbstractQTestCase): def test_self_join2(self): tmpfile1 = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) - cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2)"' % (tmpfile1.name,tmpfile1.name) + cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c2 = a2.c2)"' % (tmpfile1.name,tmpfile1.name) retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -2041,7 +1999,7 @@ class SqlTests(AbstractQTestCase): self.cleanup(tmpfile1) tmpfile2 = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) - cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2) join %s a3 on (a1.c2 = a3.c2)"' % (tmpfile2.name,tmpfile2.name,tmpfile2.name) + cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c2 = a2.c2) join %s a3 on (a1.c2 = a3.c2)"' % (tmpfile2.name,tmpfile2.name,tmpfile2.name) retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) @@ -2059,7 +2017,7 @@ class SqlTests(AbstractQTestCase): ''')) # Check original column type detection - cmd = '../bin/q -A -d , -H "select * from %s"' % (tmpfile.name) + cmd = Q_EXECUTABLE + ' -A -d , -H "select * from %s"' % (tmpfile.name) retcode, o, e = run_command(cmd) @@ -2075,7 +2033,7 @@ class SqlTests(AbstractQTestCase): self.assertEqual(o[4],six.b(' `float_number` - float')) # Check column types detected when actual detection is disabled - cmd = '../bin/q -A -d , -H --as-text "select * from %s"' % (tmpfile.name) + cmd = Q_EXECUTABLE + ' -A -d , -H --as-text "select * from %s"' % (tmpfile.name) retcode, o, e = run_command(cmd) @@ -2090,7 +2048,7 @@ class SqlTests(AbstractQTestCase): self.assertEqual(o[4],six.b(' `float_number` - text')) # Get actual data with regular detection - cmd = '../bin/q -d , -H "select * from %s"' % (tmpfile.name) + cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % (tmpfile.name) retcode, o, e = run_command(cmd) @@ -2104,7 +2062,7 @@ class SqlTests(AbstractQTestCase): self.assertEqual(o[3],six.b("regular text 4,-123,-123,122.2")) # Get actual data without detection - cmd = '../bin/q -d , -H --as-text "select * from %s"' % (tmpfile.name) + cmd = Q_EXECUTABLE + ' -d , -H --as-text "select * from %s"' % (tmpfile.name) retcode, o, e = run_command(cmd) @@ -2447,32 +2405,3 @@ class BasicModuleTests(AbstractQTestCase): self.assertTrue(len(table_structure.materialized_files.keys()),1) self.assertTrue(table_structure.materialized_files['my_data'].filename,'my_data') self.assertTrue(table_structure.materialized_files['my_data'].is_stdin) - - -def suite(): - tl = unittest.TestLoader() - basic_stuff = tl.loadTestsFromTestCase(BasicTests) - parsing_mode = tl.loadTestsFromTestCase(ParsingModeTests) - sql = tl.loadTestsFromTestCase(SqlTests) - formatting = tl.loadTestsFromTestCase(FormattingTests) - basic_module_stuff = tl.loadTestsFromTestCase(BasicModuleTests) - save_db_to_disk_tests = tl.loadTestsFromTestCase(SaveDbToDiskTests) - multi_header_tests = tl.loadTestsFromTestCase(MultiHeaderTests) - return unittest.TestSuite([basic_module_stuff, basic_stuff, parsing_mode, sql, formatting,save_db_to_disk_tests,multi_header_tests]) - -if __name__ == '__main__': - if len(sys.argv) > 1: - suite = unittest.TestSuite() - if '.' in sys.argv[1]: - c,m = sys.argv[1].split(".") - suite.addTest(globals()[c](m)) - else: - tl = unittest.TestLoader() - tc = tl.loadTestsFromTestCase(globals()[sys.argv[1]]) - suite = unittest.TestSuite([tc]) - else: - suite = suite() - - test_runner = unittest.TextTestRunner(verbosity=2) - result = test_runner.run(suite) - sys.exit(not result.wasSuccessful()) From 90e79c475192e4e6212048fae5220bc30fd78dfd Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 12 Jan 2020 18:16:39 +0200 Subject: [PATCH 030/111] wip --- test/test-suite | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/test/test-suite b/test/test-suite index f5f0b298..af941688 100755 --- a/test/test-suite +++ b/test/test-suite @@ -3,7 +3,7 @@ # # test suite for q. # -# All tests must be end-to-end tests, running the actual q command and testing stdout/stderr, and the return code. +# Prefer end-to-end tests, running the actual q command and testing stdout/stderr, and the return code. # Some utilities are provided for making that easy, see other tests for examples. # # Don't forget to use the Q_EXECUTABLE instead of hardcoding the q command line. This will be used in the near future @@ -26,13 +26,14 @@ from six.moves import range import codecs -import QTextAsData,QOutput,QOutputPrinter,QInputParams +sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin')) +from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams # q uses this encoding as the default output encoding. Some of the tests use it in order to # make sure that the output is correctly encoded SYSTEM_ENCODING = locale.getpreferredencoding() -EXAMPLES = os.path.abspath(os.path.join(q.__file__, os.pardir, os.pardir, 'examples')) +EXAMPLES = os.path.abspath(os.path.join(os.pardir, 'examples')) Q_EXECUTABLE = os.getenv('Q_EXECUTABLE', '../bin/q') @@ -1391,7 +1392,7 @@ class BasicTests(AbstractQTestCase): self.assertTrue((six.b("Offending file is '%s'" % tmpfile.name)) in e[0]) self.assertTrue(six.b('Line is 2') in e[0]) - cmd2 = 'q -H -d , -M 300 -H "select a from %s"' % tmpfile.name + cmd2 = Q_EXECUTABLE + ' -H -d , -M 300 -H "select a from %s"' % tmpfile.name retcode2, o2, e2 = run_command(cmd2) self.assertEqual(retcode2, 0) @@ -2405,3 +2406,30 @@ class BasicModuleTests(AbstractQTestCase): self.assertTrue(len(table_structure.materialized_files.keys()),1) self.assertTrue(table_structure.materialized_files['my_data'].filename,'my_data') self.assertTrue(table_structure.materialized_files['my_data'].is_stdin) + +def suite(): + tl = unittest.TestLoader() + basic_stuff = tl.loadTestsFromTestCase(BasicTests) + parsing_mode = tl.loadTestsFromTestCase(ParsingModeTests) + sql = tl.loadTestsFromTestCase(SqlTests) + formatting = tl.loadTestsFromTestCase(FormattingTests) + basic_module_stuff = tl.loadTestsFromTestCase(BasicModuleTests) + save_db_to_disk_tests = tl.loadTestsFromTestCase(SaveDbToDiskTests) + return unittest.TestSuite([basic_module_stuff, basic_stuff, parsing_mode, sql, formatting,save_db_to_disk_tests]) + +if __name__ == '__main__': + if len(sys.argv) > 1: + suite = unittest.TestSuite() + if '.' in sys.argv[1]: + c,m = sys.argv[1].split(".") + suite.addTest(globals()[c](m)) + else: + tl = unittest.TestLoader() + tc = tl.loadTestsFromTestCase(globals()[sys.argv[1]]) + suite = unittest.TestSuite([tc]) + else: + suite = suite() + + test_runner = unittest.TextTestRunner(verbosity=2) + result = test_runner.run(suite) + sys.exit(not result.wasSuccessful()) From 642e4bb9479ed2b7ca36269a04df3d18b091cb57 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 12 Jan 2020 19:50:03 +0200 Subject: [PATCH 031/111] wip --- bin/q | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/q b/bin/q index a4004dc1..d6dc1fbe 100755 --- a/bin/q +++ b/bin/q @@ -1143,7 +1143,7 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter): def print_credentials(): print("q version %s" % q_version, file=sys.stderr) - print("Python: %s" % (sys.version.replace("\n","//")), file=sys.stderr) + print("Python: %s" % " // ".join([str(x).strip() for x in sys.version.split("\n")]), file=sys.stderr) print("Copyright (C) 2012-2019 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr) print("http://harelba.github.io/q/", file=sys.stderr) print(file=sys.stderr) diff --git a/setup.py b/setup.py index d78de8e6..97753d71 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ 'six==1.11.0' ], packages=[ - 'q' + 'bin' ], entry_points={ 'console_scripts': [ From 804c0d7daf277af295222f09bb89c69c34c567c3 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 12 Jan 2020 19:54:56 +0200 Subject: [PATCH 032/111] wip --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1c7b9719..e612a54a 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,9 @@ lint: dep ## Run lint validations. test: dep ## Run the unit tests. - py.test -rs -c pytest.ini -s -v q/tests/suite.py --rootdir . + test/test-all + ## TODO Bring back pytest + ## py.test -rs -c pytest.ini -s -v q/tests/suite.py --rootdir . release: ## Run release pip install py-ci From 32e1d656469fdf50db80daabea49681bf25f6714 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 12 Jan 2020 20:38:29 +0200 Subject: [PATCH 033/111] wip --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 97753d71..2551047d 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ ], entry_points={ 'console_scripts': [ - 'q = bin.q:run_standalone' + 'q = bin.q_standalone:run' ] } ) From 6b3d9c149ec8e0e143d950a25fe54636070c083d Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Mon, 20 Jan 2020 13:11:32 +0200 Subject: [PATCH 034/111] wip --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2551047d..c170b34d 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ ], entry_points={ 'console_scripts': [ - 'q = bin.q_standalone:run' + 'q = bin.qtextasdata:run_standalone' ] } ) From 9605f982006a795c4e31b1c723b4aa98ce8d2418 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Mon, 20 Jan 2020 13:38:46 +0200 Subject: [PATCH 035/111] wip --- bin/{q => q.py} | 0 setup.py | 2 +- test/test-suite | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) rename bin/{q => q.py} (100%) diff --git a/bin/q b/bin/q.py similarity index 100% rename from bin/q rename to bin/q.py diff --git a/setup.py b/setup.py index c170b34d..97753d71 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ ], entry_points={ 'console_scripts': [ - 'q = bin.qtextasdata:run_standalone' + 'q = bin.q:run_standalone' ] } ) diff --git a/test/test-suite b/test/test-suite index af941688..4b8c0bef 100755 --- a/test/test-suite +++ b/test/test-suite @@ -27,7 +27,7 @@ import codecs sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin')) -from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams +from q import QTextAsData,QOutput,QOutputPrinter,QInputParams # q uses this encoding as the default output encoding. Some of the tests use it in order to # make sure that the output is correctly encoded @@ -35,7 +35,7 @@ SYSTEM_ENCODING = locale.getpreferredencoding() EXAMPLES = os.path.abspath(os.path.join(os.pardir, 'examples')) -Q_EXECUTABLE = os.getenv('Q_EXECUTABLE', '../bin/q') +Q_EXECUTABLE = os.getenv('Q_EXECUTABLE', '../bin/q.py') DEBUG = False if len(sys.argv) > 2 and sys.argv[2] == '-v': From 71fe8372398018a27e8f13925c5b15a30658a99a Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Thu, 23 Jan 2020 01:53:27 +0200 Subject: [PATCH 036/111] new site source code, based on mkdocs --- mkdocs/docs/about.md | 8 + mkdocs/docs/index.md | 380 ++++++++++++++++++++++++++++++ mkdocs/docs/stylesheets/extra.css | 1 + mkdocs/mkdocs.yml | 41 ++++ 4 files changed, 430 insertions(+) create mode 100644 mkdocs/docs/about.md create mode 100644 mkdocs/docs/index.md create mode 100644 mkdocs/docs/stylesheets/extra.css create mode 100644 mkdocs/mkdocs.yml diff --git a/mkdocs/docs/about.md b/mkdocs/docs/about.md new file mode 100644 index 00000000..15c04df3 --- /dev/null +++ b/mkdocs/docs/about.md @@ -0,0 +1,8 @@ +# About + +### Linkedin: [Harel Ben Attia](https://www.linkedin.com/in/harelba/) + +### Twitter [@harelba](https://twitter.com/harelba) + +### Email [harelba@gmail.com](harelba@gmail.com) + diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md new file mode 100644 index 00000000..9c672bad --- /dev/null +++ b/mkdocs/docs/index.md @@ -0,0 +1,380 @@ +# q - Text as Data + +[![GitHub Stars](https://img.shields.io/github/stars/harelba/q.svg?style=social&label=GitHub Stars&maxAge=600)](https://GitHub.com/harelba/q/stargazers/) +[![GitHub forks](https://img.shields.io/github/forks/harelba/q.svg?style=social&label=GitHub Forks&maxAge=600)](https://GitHub.com/harelba/q/network/) +[![License](https://img.shields.io/github/license/harelba/q.svg?style=social&label=License&maxAge=600)](https://github.com/harelba/q/blob/master/LICENSE) + + +## Overview +q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files). + +q treats ordinary files as database tables, and supports all SQL constructs, such as WHERE, GROUP BY, JOINs etc. It supports automatic column name and column type detection, and provides full support for multiple encodings. + +``` bash +q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" +``` + +``` bash +ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" +``` + +Look at the examples page for some more examples, or just download the tool using the links above or in the installation page and play with it. + +| | | +|:--------------------------------------:|:-----------------------------------------------:| +| 完全支持所有的字符编码 | すべての文字エンコーディングを完全にサポート | +| 모든 문자 인코딩이 완벽하게 지원됩니다 | все кодировки символов полностью поддерживаются | + +**Non-english users:** q fully supports all types of encoding. Use `-e data-encoding` to set the input data encoding, `-Q query-encoding` to set the query encoding, and use `-E output-encoding` to set the output encoding. Sensible defaults are in place for all three parameters. Please contact me if you encounter any issues and I'd be glad to help. + +## Installation + +| Format | Instructions | Comments | +:---|:---|:---| +|[OSX](https://github.com/harelba/packages-for-q/raw/master/single-binary/Darwin/2.0.9/q)|Just run `brew install q` or download the executable from the link on the left, make it executable, and use it.|Make sure that you run `brew update` if needed|| +|[RPM Package](https://github.com/harelba/packages-for-q/raw/master/rpms/q-text-as-data-2.0.9-1.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter man q.| +|[DEB Package](https://github.com/harelba/packages-for-q/raw/master/deb/q-text-as-data_2.0.9-2_amd64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`.| +|[Windows Installer](https://github.com/harelba/packages-for-q/raw/master/windows/setup-q-2.0.9.exe)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new cmd window after the installation is done.| +|[tar.gz](https://github.com/harelba/q/archive/2.0.9.tar.gz)|Full source file tree for latest stable version|| +|[zip](https://github.com/harelba/q/archive/2.0.9.zip)|Full source file tree for the latest stable version|| + +**Older versions can be downloaded [here](https://github.com/harelba/packages-for-q). Please let me know if you plan on using an older version, and why - I know of no reason to use any of them.** + +## Requirements +As of version `2.0.9`, there's no need for any external dependency. Python itself (3.7), and any needed libraries are self-contained inside the installation, not affecting anything but q itself. + +## Limitations +Here's the list of known limitations. Please contact me if you have a use case that needs any of those missing capabilities. + +* `FROM ` is not supported +* Common Table Expressions (CTE) are not supported +* Spaces in file names are not supported. Use stdin for piping the data into q, or rename the file +* Some rare cases of subqueries are not supported yet. + +## Usage + +``` bash +q "" + + Simplest execution is `q "SELECT * FROM myfile"` which prints the entire file. +``` + +q allows performing SQL-like statements on tabular text data. Its purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data. + +Query should be an SQL-like query which contains *filenames instead of table names* (or - for stdin). The query itself should be provided as one parameter to the tool (i.e. enclosed in quotes). Multiple files can be used as one table by either writing them as `filename1+filename2+...` or by using shell wildcards (e.g. `my_files*.csv`). + +Use `-H` to signify that the input contains a header line. Column names will be detected automatically in that case, and can be used in the query. If this option is not provided, columns will be named cX, starting with 1 (e.g. `q "SELECT c3,c8 from ..."`). + +Use `-d` to specify the input delimiter. + +Column types are auto detected by the tool, no casting is needed. Note that there's a flag `--as-text` which forces all columns to be treated as text columns. + +Please note that column names that include spaces need to be used in the query with back-ticks, as per the sqlite standard. + +Query/Input/Output encodings are fully supported (and q tries to provide out-of-the-box usability in that area). Please use `-e`,`-E` and `-Q` to control encoding if needed. + +All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table). + +### Query +q gets one parameter - An SQL-like query. + +Any standard SQL expression, condition (both WHERE and HAVING), GROUP BY, ORDER BY etc. are allowed. + +JOINs are supported and Subqueries are supported in the WHERE clause, but unfortunately not in the FROM clause for now. Use table aliases when performing JOINs. + +The SQL syntax itself is sqlite's syntax. For details look at http://www.sqlite.org/lang.html or search the net for examples. + +NOTE: Full type detection is implemented, so there is no need for any casting or anything. + +NOTE2: When using the `-O` output header option, use column name aliases if you want to control the output column names. For example, `q -O -H "select count(*) cnt,sum(*) as mysum from -"` would output `cnt` and `mysum` as the output header column names. + +### Flags + +``` text +Usage: + q allows performing SQL-like statements on tabular text data. + + Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line. + + Basic usage is q "" where table names are just regular file names (Use - to read from standard input) + When the input contains a header row, use -H, and column names will be set according to the header row content. If there isn't a header row, then columns will automatically be named c1..cN. + + Column types are detected automatically. Use -A in order to see the column name/type analysis. + + Delimiter can be set using the -d (or -t) option. Output delimiter can be set using -D + + All sqlite3 SQL constructs are supported. + + Examples: + + Example 1: ls -ltrd * | q "select c1,count(1) from - group by c1" + This example would print a count of each unique permission string in the current folder. + + Example 2: seq 1 1000 | q "select avg(c1),sum(c1) from -" + This example would provide the average and the sum of the numbers in the range 1 to 1000 + + Example 3: sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc" + This example will output the total size in MB per user+group in the /tmp subtree + + + See the help or https://github.com/harelba/q/ for more details. + + +Options: + -h, --help show this help message and exit + -v, --version Print version + -V, --verbose Print debug info in case of problems + -S SAVE_DB_TO_DISK_FILENAME, --save-db-to-disk=SAVE_DB_TO_DISK_FILENAME + Save database to an sqlite database file + --save-db-to-disk-method=SAVE_DB_TO_DISK_METHOD + Method to use to save db to disk. 'standard' does not + require any deps, 'fast' currenty requires manually + running `pip install sqlitebck` on your python + installation. Once packing issues are solved, the fast + method will be the default. + + Input Data Options: + -H, --skip-header Skip header row. This has been changed from earlier + version - Only one header row is supported, and the + header row is used for column naming + -d DELIMITER, --delimiter=DELIMITER + Field delimiter. If none specified, then space is used + as the delimiter. + -t, --tab-delimited + Same as -d . Just a shorthand for handling + standard tab delimited file You can use $'\t' if you + want (this is how Linux expects to provide tabs in the + command line + -e ENCODING, --encoding=ENCODING + Input file encoding. Defaults to UTF-8. set to none + for not setting any encoding - faster, but at your own + risk... + -z, --gzipped Data is gzipped. Useful for reading from stdin. For + files, .gz means automatic gunzipping + -A, --analyze-only Analyze sample input and provide information about + data types + -m MODE, --mode=MODE + Data parsing mode. fluffy, relaxed and strict. In + strict mode, the -c column-count parameter must be + supplied as well + -c COLUMN_COUNT, --column-count=COLUMN_COUNT + Specific column count when using relaxed or strict + mode + -k, --keep-leading-whitespace + Keep leading whitespace in values. Default behavior + strips leading whitespace off values, in order to + provide out-of-the-box usability for simple use cases. + If you need to preserve whitespace, use this flag. + --disable-double-double-quoting + Disable support for double double-quoting for escaping + the double quote character. By default, you can use "" + inside double quoted fields to escape double quotes. + Mainly for backward compatibility. + --disable-escaped-double-quoting + Disable support for escaped double-quoting for + escaping the double quote character. By default, you + can use \" inside double quoted fields to escape + double quotes. Mainly for backward compatibility. + --as-text Don't detect column types - All columns will be + treated as text columns + -w INPUT_QUOTING_MODE, --input-quoting-mode=INPUT_QUOTING_MODE + Input quoting mode. Possible values are all, minimal + and none. Note the slightly misleading parameter name, + and see the matching -W parameter for output quoting. + -M MAX_COLUMN_LENGTH_LIMIT, --max-column-length-limit=MAX_COLUMN_LENGTH_LIMIT + Sets the maximum column length. + -U, --with-universal-newlines + Expect universal newlines in the data. Limitation: -U + works only with regular files for now, stdin or .gz + files are not supported yet. + + Output Options: + -D OUTPUT_DELIMITER, --output-delimiter=OUTPUT_DELIMITER + Field delimiter for output. If none specified, then + the -d delimiter is used if present, or space if no + delimiter is specified + -T, --tab-delimited-output + Same as -D . Just a shorthand for outputting tab + delimited output. You can use -D $'\t' if you want. + -O, --output-header + Output header line. Output column-names are determined + from the query itself. Use column aliases in order to + set your column names in the query. For example, + 'select name FirstName,value1/value2 MyCalculation + from ...'. This can be used even if there was no + header in the input. + -b, --beautify Beautify output according to actual values. Might be + slow... + -f FORMATTING, --formatting=FORMATTING + Output-level formatting, in the format X=fmt,Y=fmt + etc, where X,Y are output column numbers (e.g. 1 for + first SELECT column etc. + -E OUTPUT_ENCODING, --output-encoding=OUTPUT_ENCODING + Output encoding. Defaults to 'none', leading to + selecting the system/terminal encoding + -W OUTPUT_QUOTING_MODE, --output-quoting-mode=OUTPUT_QUOTING_MODE + Output quoting mode. Possible values are all, minimal, + nonnumeric and none. Note the slightly misleading + parameter name, and see the matching -w parameter for + input quoting. + + Query Related Options: + -q QUERY_FILENAME, --query-filename=QUERY_FILENAME + Read query from the provided filename instead of the + command line, possibly using the provided query + encoding (using -Q). + -Q QUERY_ENCODING, --query-encoding=QUERY_ENCODING + query text encoding. Experimental. Please send your + feedback on this +``` + +## Examples +The `-H` flag in the examples below signifies that the file has a header row which is used for naming columns. + +The `-t` flag is just a shortcut for saying that the file is a tab-separated file (any delimiter is supported - Use the `-d` flag). + +Queries are given using upper case for clarity, but actual query keywords such as SELECT and WHERE are not really case sensitive. + +Example List: + +* [Example 1 - COUNT DISTINCT values of specific field (uuid of clicks data)](#example-1) +* [Example 2 - Filter numeric data, controlling ORDERing and LIMITing output](#example-2) +* [Example 3 - Illustrate GROUP BY](#example-3) +* [Example 4 - More complex GROUP BY (group by time expression)](#example-4) +* [Example 5 - Read input from standard input](#example-5) +* [Example 6 - Use column names from header row](#example-6) +* [Example 7 - JOIN two files](#example-7) + +### Example 1 +Perform a COUNT DISTINCT values of specific field (uuid of clicks data). + +``` bash +q -H -t "SELECT COUNT(DISTINCT(uuid)) FROM ./clicks.csv" +``` +Output +``` text +229 +``` +### Example 2 +Filter numeric data, controlling ORDERing and LIMITing output + +Note that q understands that the column is numeric and filters according to its numeric value (real numeric value comparison, not string comparison). + +``` bash +q -H -t "SELECT request_id,score FROM ./clicks.csv WHERE score > 0.7 ORDER BY score DESC LIMIT 5" +``` +Output: +``` text +2cfab5ceca922a1a2179dc4687a3b26e 1.0 +f6de737b5aa2c46a3db3208413a54d64 0.986665809568 +766025d25479b95a224bd614141feee5 0.977105183282 +2c09058a1b82c6dbcf9dc463e73eddd2 0.703255121794 +``` + +### Example 3 +Illustrate GROUP BY + +``` bash +q -t -H "SELECT hashed_source_machine,count(*) FROM ./clicks.csv GROUP BY hashed_source_machine" +``` +Output: +``` text +47d9087db433b9ba.domain.com 400000 +``` + +### Example 4 +More complex GROUP BY (group by time expression) + +``` bash +q -t -H "SELECT strftime('%H:%M',date_time) hour_and_minute,count(*) FROM ./clicks.csv GROUP BY hour_and_minute" +``` +Output: +``` text +07:00 138148 +07:01 140026 +07:02 121826 +``` + +### Example 5 +Read input from standard input + +Calculates the total size per user/group in the /tmp subtree. + +``` bash +sudo find /tmp -ls | q "SELECT c5,c6,sum(c7)/1024.0/1024 AS total FROM - GROUP BY c5,c6 ORDER BY total desc" +``` +Output: +``` text +mapred hadoop 304.00390625 +root root 8.0431451797485 +smith smith 4.34389972687 +``` + +### Example 6 +Use column names from header row + +Calculate the top 3 user ids with the largest number of owned processes, sorted in descending order. + +Note the usage of the autodetected column name UID in the query. + +``` bash +ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" +``` +Output: +``` text +root 152 +harel 119 +avahi 2 +``` + +### Example 7 +JOIN two files + +The following command joins an ls output (exampledatafile) and a file containing rows of group-name,email (group-emails-example) and provides a row of filename,email for each of the emails of the group. For brevity of output, there is also a filter for a specific filename called ppp which is achieved using a WHERE clause. + +``` bash +q "SELECT myfiles.c8,emails.c2 FROM exampledatafile myfiles JOIN group-emails-example emails ON (myfiles.c4 = emails.c1) WHERE myfiles.c8 = 'ppp'" +``` +Output: +``` text +ppp dip.1@otherdomain.com +ppp dip.2@otherdomain.com +``` + +You can see that the ppp filename appears twice, each time matched to one of the emails of the group dip to which it belongs. Take a look at the files `exampledatafile` and `group-emails-example` for the data. + +Column name detection is supported for JOIN scenarios as well. Just specify `-H` in the command line and make sure that the source files contain the header rows. + +## Implementation +The current implementation is written in Python using an in-memory database, in order to prevent the need for external dependencies. The implementation itself supports SELECT statements, including JOINs (Subqueries are supported only in the WHERE clause for now). If you want to do further analysis on the data, you can use the `--save-to-db` option to write the resulting tables to an sqlite database file, and then use `seqlite3` in order to perform queries on the data separately from q itself. + +Please note that there is currently no checks and bounds on data size - It's up to the user to make sure things don't get too big. + +Please make sure to read the limitations section as well. + +## Development + +### Tests +The code includes a test suite runnable through test/test-all. If you're planning on sending a pull request, I'd appreciate if you could make sure that it doesn't fail. + +## Rationale +Have you ever stared at a text file on the screen, hoping it would have been a database so you could ask anything you want about it? I had that feeling many times, and I've finally understood that it's not the database that I want. It's the language - SQL. + +SQL is a declarative language for data, and as such it allows me to define what I want without caring about how exactly it's done. This is the reason SQL is so powerful, because it treats data as data and not as bits and bytes (and chars). + +The goal of this tool is to provide a bridge between the world of text files and of SQL. + +### Why aren't other Linux tools enough? +The standard Linux tools are amazing and I use them all the time, but the whole idea of Linux is mixing-and-matching the best tools for each part of job. This tool adds the declarative power of SQL to the Linux toolset, without loosing any of the other tools' benefits. In fact, I often use q together with other Linux tools, the same way I pipe awk/sed and grep together all the time. + +One additional thing to note is that many Linux tools treat text as text and not as data. In that sense, you can look at q as a meta-tool which provides access to all the data-related tools that SQL provides (e.g. expressions, ordering, grouping, aggregation etc.). + +### Philosophy +This tool has been designed with general Linux/Unix design principles in mind. If you're interested in these general design principles, read this amazing [book](http://catb.org/~esr/writings/taoup/) and specifically [this part](http://catb.org/~esr/writings/taoup/html/ch01s06.html). If you believe that the way this tool works goes strongly against any of the principles, I would love to hear your view about it. + +## Future + +* Expose python as a python module - Mostly implemented. Requires some internal API changes with regard to handling stdin before exposing it. +* Allow to use a distributed backend for scaling the computations + + diff --git a/mkdocs/docs/stylesheets/extra.css b/mkdocs/docs/stylesheets/extra.css new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/mkdocs/docs/stylesheets/extra.css @@ -0,0 +1 @@ + diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml new file mode 100644 index 00000000..e6685fa4 --- /dev/null +++ b/mkdocs/mkdocs.yml @@ -0,0 +1,41 @@ +site_name: q - Text as Data +site_url: http://harelba.github.io/q/ +repo_url: http://github.com/harelba/q +edit_uri: "" +site_description: Text as Data - q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files). +site_author: Harel Ben-Attia +copyright: 'Copyright © 2012-2019 Harel Ben-Attia' +google_analytics: + - "UA-48316355-1" + - "auto" +nav: + - Home: index.md + - About: about.md +theme: + name: material + language: 'en' + palette: + primary: purple + accent: amber + fonts: + text: 'Roboto' + code: 'Roboto Mono' + favicon: 'images/q-logo.png' +extra: + social: + - type: 'github' + link: 'https://github.com/harelba' + - type: 'twitter' + link: 'https://twitter.com/harelba' + - type: 'linkedin' + link: 'https://www.linkedin.com/in/harelba' +extra_css: + - 'stylesheets/extra.css' +markdown_extensions: + - meta + - toc: + permalink: true + - tables + - fenced_code + - admonition + - codehilite From 4644850b18bbc56aa260e11f36aa0f253102ffc2 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Thu, 23 Jan 2020 01:54:22 +0200 Subject: [PATCH 037/111] requirements for new site generator --- mkdocs/requirements.txt | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 mkdocs/requirements.txt diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt new file mode 100644 index 00000000..b9a11148 --- /dev/null +++ b/mkdocs/requirements.txt @@ -0,0 +1,28 @@ +Click==7.0 +Deprecated==1.2.7 +Jinja2==2.10.3 +Markdown==3.1.1 +MarkupSafe==1.1.1 +PyGithub==1.45 +PyJWT==1.7.1 +PyYAML==5.3 +Pygments==2.5.2 +certifi==2019.11.28 +chardet==3.0.4 +htmlmin==0.1.12 +idna==2.8 +jsmin==2.2.2 +livereload==2.6.1 +mkdocs-bootstrap4==0.1.2 +mkdocs-bootswatch==1.0 +mkdocs-git-committers-plugin==0.1.8 +mkdocs-material==4.6.0 +mkdocs-minify-plugin==0.2.1 +mkdocs==1.0.4 +pep562==1.0 +pymdown-extensions==6.2.1 +requests==2.22.0 +six==1.14.0 +tornado==6.0.3 +urllib3==1.25.8 +wrapt==1.11.2 From 19df2fdfa5a0fd3757cfdbc43d466ca83a13e3fb Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Thu, 23 Jan 2020 21:41:08 +0200 Subject: [PATCH 038/111] visual fixes --- mkdocs/docs/index.md | 18 +++++++++--------- mkdocs/docs/stylesheets/extra.css | 21 +++++++++++++++++++++ mkdocs/mkdocs.yml | 2 +- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 9c672bad..2b26d7ef 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -10,7 +10,7 @@ q is a command line tool that allows direct execution of SQL-like queries on CSV q treats ordinary files as database tables, and supports all SQL constructs, such as WHERE, GROUP BY, JOINs etc. It supports automatic column name and column type detection, and provides full support for multiple encodings. -``` bash +``` q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" ``` @@ -90,7 +90,7 @@ NOTE2: When using the `-O` output header option, use column name aliases if you ### Flags -``` text +``` bash Usage: q allows performing SQL-like statements on tabular text data. @@ -252,7 +252,7 @@ Perform a COUNT DISTINCT values of specific field (uuid of clicks data). q -H -t "SELECT COUNT(DISTINCT(uuid)) FROM ./clicks.csv" ``` Output -``` text +``` bash 229 ``` ### Example 2 @@ -264,7 +264,7 @@ Note that q understands that the column is numeric and filters according to its q -H -t "SELECT request_id,score FROM ./clicks.csv WHERE score > 0.7 ORDER BY score DESC LIMIT 5" ``` Output: -``` text +``` bash 2cfab5ceca922a1a2179dc4687a3b26e 1.0 f6de737b5aa2c46a3db3208413a54d64 0.986665809568 766025d25479b95a224bd614141feee5 0.977105183282 @@ -278,7 +278,7 @@ Illustrate GROUP BY q -t -H "SELECT hashed_source_machine,count(*) FROM ./clicks.csv GROUP BY hashed_source_machine" ``` Output: -``` text +``` bash 47d9087db433b9ba.domain.com 400000 ``` @@ -289,7 +289,7 @@ More complex GROUP BY (group by time expression) q -t -H "SELECT strftime('%H:%M',date_time) hour_and_minute,count(*) FROM ./clicks.csv GROUP BY hour_and_minute" ``` Output: -``` text +``` bash 07:00 138148 07:01 140026 07:02 121826 @@ -304,7 +304,7 @@ Calculates the total size per user/group in the /tmp subtree. sudo find /tmp -ls | q "SELECT c5,c6,sum(c7)/1024.0/1024 AS total FROM - GROUP BY c5,c6 ORDER BY total desc" ``` Output: -``` text +``` bash mapred hadoop 304.00390625 root root 8.0431451797485 smith smith 4.34389972687 @@ -321,7 +321,7 @@ Note the usage of the autodetected column name UID in the query. ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" ``` Output: -``` text +``` bash root 152 harel 119 avahi 2 @@ -336,7 +336,7 @@ The following command joins an ls output (exampledatafile) and a file containing q "SELECT myfiles.c8,emails.c2 FROM exampledatafile myfiles JOIN group-emails-example emails ON (myfiles.c4 = emails.c1) WHERE myfiles.c8 = 'ppp'" ``` Output: -``` text +``` bash ppp dip.1@otherdomain.com ppp dip.2@otherdomain.com ``` diff --git a/mkdocs/docs/stylesheets/extra.css b/mkdocs/docs/stylesheets/extra.css index 8b137891..cbafc84b 100644 --- a/mkdocs/docs/stylesheets/extra.css +++ b/mkdocs/docs/stylesheets/extra.css @@ -1 +1,22 @@ +div.md-content pre { + background-color: black; + color: #41FF00; +} + +.md-typeset code pre { + background-color: black; + color: #41FF00; +} + +.md-typeset p code { + color: rgba(0,0,0,.87); +} + +.md-typeset code.bash { + color: #41FF00; +} + +.md-typeset__scrollwrap { + text-align: center; +} diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml index e6685fa4..3a56fb2b 100644 --- a/mkdocs/mkdocs.yml +++ b/mkdocs/mkdocs.yml @@ -38,4 +38,4 @@ markdown_extensions: - tables - fenced_code - admonition - - codehilite + # - codehilite From 68bf5733f81ba0c663f66624c6c75a87ccf012bf Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Tue, 28 Jan 2020 22:44:06 +0200 Subject: [PATCH 039/111] wip --- mkdocs/docs/.DS_Store | Bin 0 -> 6148 bytes mkdocs/docs/img/bg_hr.png | Bin 0 -> 943 bytes mkdocs/docs/img/blacktocat.png | Bin 0 -> 1428 bytes mkdocs/docs/img/icon_download.png | Bin 0 -> 1162 bytes mkdocs/docs/img/q-logo.png | Bin 0 -> 25042 bytes mkdocs/docs/img/q-logo1.ico | Bin 0 -> 106665 bytes mkdocs/docs/img/q-logo1.png | Bin 0 -> 19560 bytes mkdocs/docs/img/sprite_download.png | Bin 0 -> 16799 bytes mkdocs/docs/img/sprite_download3.png | Bin 0 -> 20826 bytes mkdocs/docs/img/sprite_download4.png | Bin 0 -> 23052 bytes mkdocs/docs/index.md | 38 ++++++++++++++------------- mkdocs/docs/stylesheets/extra.css | 9 +++++++ mkdocs/mkdocs.yml | 3 ++- 13 files changed, 31 insertions(+), 19 deletions(-) create mode 100644 mkdocs/docs/.DS_Store create mode 100644 mkdocs/docs/img/bg_hr.png create mode 100644 mkdocs/docs/img/blacktocat.png create mode 100644 mkdocs/docs/img/icon_download.png create mode 100644 mkdocs/docs/img/q-logo.png create mode 100644 mkdocs/docs/img/q-logo1.ico create mode 100644 mkdocs/docs/img/q-logo1.png create mode 100644 mkdocs/docs/img/sprite_download.png create mode 100644 mkdocs/docs/img/sprite_download3.png create mode 100644 mkdocs/docs/img/sprite_download4.png diff --git a/mkdocs/docs/.DS_Store b/mkdocs/docs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..6f61d6dcb9996c78894bd0263e013376931928c3 GIT binary patch literal 6148 zcmeHK%}T>S5Z-O8CKe=u2L%rc9=!BWsoHC(KPN#kq6dF!LV^v%MAFnAN+I|FK7d~R zc@Om=d=c-?>~2M=-bAF#!0b0WJ2T6E3H!qsQy12A=+^hw4j0O8NmrR<5*|cVRy;^OhB5KR^ens@^ z^=d^d*H-&|(=g^1H+N4uSJyYUclQsEPtS^f4!^3DX^jJT0Yg)9>7;R##Ao0cL=U0} zi2-7O7#INt^wwsSM_?_q1Y&>~_+18Ye-NOEw#Hnc+&ZAa>m!br5K+L!w*;clXlu+B z!Uzagseme#>n8?R>EIVS&eoVKROyV%nW2uJnd=t{m$QRk$Z*DOh13!Q#K2buimGej z`M>e=`~Pba^@sstV5At}rFCb$0ZY(JuxtQDZApePuZEBr_SLmtHti%0P+s1)!E XGyrXlxkB)O&_zJfKn*eQrwqIU@W@~L literal 0 HcmV?d00001 diff --git a/mkdocs/docs/img/bg_hr.png b/mkdocs/docs/img/bg_hr.png new file mode 100644 index 0000000000000000000000000000000000000000..7973bd69888c7e10ccad1111d555ceabb7cd99b6 GIT binary patch literal 943 zcmaJ=O^ee&7!FiK7FWCot{@Ck@nrMW&tx0B-6VAbrk1u~FTzffX&bu9#AIsIdef8t z!QZfdz=K}>3m(LO;6X3qN}Y6@>cJYA%)G<%Jn!ec>9im1@7>wsIBwrMF}iHO!q%;8 zSJ@xEd~(FL18NRvkBsOXMVM>4WQc*~qcQGc17IjxRnj!O_^B1gan0x#EWT48PK->5B2>mI;LIx zC*FSw$Nfc!g)WZCEOJ=mM)}lLsOk|$ltg_(&ax_YCWMlBLPDVT%D_gB7o_$YZ`-OB z#1sV%whRq21>W;qwN$N?OUGtQQe;JvOsQrna;+v+j8dth=*?orHHb6waX>S!yXCgT zo!oR3{E&GzaOAzfZYv@_Sf{LdyJInS>TS60&R9%yCs$y>2x(*gYIJtRrYAja$Ceq} z!N&oc_K1!3-Ft`U>`CM;quEbB4KG%!MovB*9_3!QzFhqHwrbwK|Doo-y>auDJNSP6 T=d)j*_4El@X4^PFK7I8YBT*xD literal 0 HcmV?d00001 diff --git a/mkdocs/docs/img/blacktocat.png b/mkdocs/docs/img/blacktocat.png new file mode 100644 index 0000000000000000000000000000000000000000..6e264fe57a2e35a2855405ac7d4102c3f6ddcdae GIT binary patch literal 1428 zcmeAS@N?(olHy`uVBq!ia0vp^av;pX1|+Qw)-3{3k|nMYCBgY=CFO}lsSJ)O`AMk? zp1FzXsX?iUDV2pMQ*9U+n3Xa^B1$5BeXNr6bM+EIYV;~{3xK*A7;Nk-3KEmEQ%e+* zQqwc@Y?a>c-mj#PnPRIHZt82`Ti~3Uk?B!Ylp0*+7m{3+ootz+WN)WnQ(*-(AUCxn zQK2F?C$HG5!d3}vt`(3C64qBz04piUwpD^SD#ABF!8yMuRl!uxKsVXI%s|1+P|wiV z#N6CmN5ROz&_Lh7NZ-&%*U;R`*vQJjKmiJrfVLH-q*(>IxIyg#@@$ndN=gc>^!3Zj z%k|2Q_413-^$jg8EkR}&8R-I5=oVMzl_XZ^<`pZ$OmImpPAEg{v+u2}(t{7puX=A(aKG z`a!A1`K3k4z=%sz23b{L-^Aq1JP;qO z-q+X4Gq1QLF)umQ)5TT^Xo6m5W{Q=eg`=5?o13Glvx}*rp{t>#shg3DvyriZv5}jZ ztD`wguSMv>2~2MaLa!4}y`ZF!TL84#CABECEH%ZgC_h&L>}9J=EN(GzcCm0X zaRr%YgxxI=y(w7S0@dq`Q?EYIG5Vm0MT%&c5HR(CnDAr^T6f1avxRvmvnsN+?-j}Z~1)Zr#rqzrt`edmo44*B<0=C4>mrxHF6$p zVws~UocMfeI`gB8pYMLYTzA87`NOI2w2B*JM5L`^AkN4AFQu&S+6ULTPjv;vzl4& z-eaK_F|D4~l3hzBSF~icNT@MID=v+_X`vpuvf=8+S(|^vlRdHe0<)v-^wiVR3w=TQ)uFA9F z>vmqc-mj#PnPRIHZt82`Ti~3Uk?B!Ylp0*+7m{3+ootz+WN)WnQ(*-(AUCxn zQK2F?C$HG5!d3}vt`(3C64qBz04piUwpD^SD#ABF!8yMuRl!uxKsVXI%s|1+P|wiV z#N6CmN5ROz&_Lh7NZ-&%*U;R`*vQJjKmiJrfVLH-q*(>IxIyg#@@$ndN=gc>^!3Zj z%k|2Q_413-^$jg8EkR}&8R-I5=oVMzl_XZ^<`pZ$OmImpPAEg{v+u2}(t{7puX=A(aKG z`a!A1`K3k4z=%sz23b{L-^Aq1JP;qO z-q+X4Gq1QLF)umQ)5TT^Xo6m5W{Q=$skw`#i#v$3O_v5UEZv#YC% zp@9obuSMv>2~2MaLa!N4y`ZF!TL84#CABECEH%ZgC_h&L>}9J=+-@<(X&zK> z3U0TU;MA)Rbc{YIVv!;mCIn19ASOK70y*%6pPC0u?M1+3t#h8?05D7Z^K@|xskoK& z=l_5E!ww8;ZH!Ed#V+%1n6Rkg{=V8A2QTsNE8^> zvHmCezoM^A29GnE>#ih4F*YzTGbm`! V-6~#faTQcLc)I$ztaD0e0svx!CVP{;LJ!$wlO$QmURhbmUin|A zzMtR!^*Qx<#q*qV?)%*Lb-l0m`+c1tWhEIRd|G@gEG#0~`%W;eG=xVBKg2vF$Y2)IqP}N0gWfeX~JT6Q|kcx4< zR^VX=Ll)mXBh+is?u&OQq=`E2-PRZE_)Zzi@=fb(F!lw4Gv_G@&$bX&eO_xg{&Uq; zU;WeP@uy!V7RNUh-QA*SZn~y6?2R1N4eLMSF3>K1cn6o!k0kR>;Sd+T|5?Wxy5M#u zIJjc*;U_GzFS|@%j@Y~!vhwdS^X;=(-LbzD-l2=`Iv>$Fz}zQ=kwPvt!%2Z3*g5AMl5KBXp*^#8 zV91}y5%>2d#Nn#5nWocU(aeYU{QrHrPz^4N@2@UT@KuvPk?>qS9x};1 z2c8Tw4m8@Yud+o4rLn?9`Q^*-#NTBhd-2dy@LpErzc2rM-D3v#EMQXFZ1{X?Bk$i0 z|2E~nB$cJ2II_BTWF$bFcxVW7A?;39@@GmIhvj()D$s0H*_EGxx z#76RCdHb}#SZDR&mJZkToJRaRXPGijyr4>psm}ScXJ`nqga3|)<8F@X9S#a~jqbOU zHHHWrs*uNZr;`6J`7=|yn29XtjXIa*7nh=;;JCOr`ah!omQz7UNeR!7E%%-gy=&%W z13nRY3NSmbKafRqMs}GBSUxLG!TWa>3dU3==0pXn{@vp4L}e5yOoEil?*p;g+FAk2 zBjNx1*+aRUsD)1ngWn(CB>#7BAMe4viO_vBKP0Wsd&e00Zx3B`J_%*>IZA zI*53j?EPDh{mif!9cxn9Y=sjQ#<>6Y>DRSgru<0_2iP{UL6Nf;{Y=;3GQf@2X$g7Pp2@>4?uE>%b;NiYa5{x!s$^ zd4>O7-MVZ9MHb{|wq|c>x%}k6L2L)B7Wi2R6h1oQrXm=(tFr0M`upte3&XmNyf0OL zW+IBEd1G&{@V^Q3P(V@L#b^z_J@MlFZww4Dsjrpt1g#qfmRHsOJNGI@F?-tEy#K!F z68LQ{2J0#b?nm0Zf1CHUoiT<6=GAO%&!d4ixhGgW{qI_^G=9&JXIefo?N~!`LiBgG zvt>DB<&whH_-Vs_9i0T9690XA{eDY~N?jX!t%l0_pZ+~O#rsT=U!5datHy|6wm#Xb z{C%W~_h4V}C&EobXyNAOE<9oQcLJ%H?#GJ5UN>k6%i?cj{BP)bdV2ZlTuBvEi(Abw zH`l(3*dVKC;cBcLV|=&eH}ClF3w0AEha0N@PJSDsB_+l%{Px7G{1QV%9%G~J>@P+p z8GOsCRJlI4kCu-Bi~>9k(8Z#pT>53*^y0@ z(prPtJc}ubExk9ZsHkY&LrQQMB~1?N;aLfhKN?14SH(<%D=-M#EAtjS;UFpWF5MO4 zfO8V*5uH!{NPZhY%)BKkssek_va!!X>h#ZvMpu6Wvm2p|dX735BW+lwGRMb1s|sws zuy71{zyG=zLV}ipcxm~Nld`%BWhmz}+D%(k1Uud8QM&3UK)>vyJ@BaZ#Jlk) zvik4e<%pFF?qpvh}qxX8rB4cfYLXOs;abwWC9^vo0dN^vNatc zM+c_$9d0I1Z49@GDPp7!rC@Eakdb-|w&#r{_ZXTlk4qd%aj#w$Kht(bi(dseXY8N%a~n?T-%VhX>rT)lUWH% zw&oRc4vPNI&Yq;@O=RdmI{39>8xpIes35tw%)HC}O%>!#8B2mbMBhsNwz`>||99pm zAOZNb8Y#}hb60q$UA!uW4lLn_?@lT{VRJ-oPL(=y(0!$Q-Jb-#{pw-Fl+<6LRVlb+V7qbBTjzpUD!_a6woV~UR* zCDr~L&^#1wrT$xguuc`wvbcSev2xPoo%V3G8-IrtaX&{j>E!B}wVW^Pey`I=B02?Q zr%C|T3=I{V2D832TmiT)-X^;3El)O0DEs><4ybE z0qkJr!l6R5R;Fzcb~ThBA4ZF?KwWNNV|vFUu^^cwQ#7h8n+-qChdjPJr@Jmp9H736$mm5Wb z@Abd<83%bPa!6;X^en7=Kg=RLfA({~BAfuQ7i`q0F3-dm5_-%gb_v>5$?%2JxC|Vb%RkUd`fT; z;rB^u1&LxO6!N#Ied481J}pz$C6#N6)1mr|%rdj~=Ym0$<=IOA z7k3|CLCY#dMB(Cf&=KRnAsP=GYj(4G!IA6W#G8h&hx0N=oPd%EuHgRxdmJ8CH1U zhdyJa95Z?H+n8+Te+%5^3YwSZSeqg1-b8Ns*ct@XRYPu((E}DxR`VML#Dlj>?6fQ7Rt7(htX* zJ>RW)X_ADHPrj06A-m5)w&Pme1xcd%?nNc$wkUx7BQ|C-YxUD@ zy6MI}wF3Zm`|7)aWV0|_iEK@HT8lj;adOHvx;%eBMfGn_#5Hehx_LNd-C2tmwGk3_ z{zHro^zPGE228XdeL0xbwr^<2-|3d#-qT9zkKDmpR!)@;5Xv1HhRNP`%3`uFS;=<) z?Dl{=eWxAhL2-15y1?nKtQGlk%qqk90T-8=vi356E;J5x?hPH$8TELtYLPY#=nG4JL$;@S{^K}S5y8grOLYbf(E_Pm_vJR?2q(%zrrJD`%2E(bj*q4n0gN&ptQd#2SwpEiI1>_ zXb3-jxE3hIRJNg1#rhkYnC|=HRVN4|7_GLl$KAi0ajqCH_vdJR@T^e|v2XM~aoC%1 z9q4{f1eL3h{p4Fb9l*reIy#A!{>HMaUAz?dL-V<7vV7wo%G|fi+FgrX>gZqGe(w9i zVNRXPX#P{|{)p@GBqdr11iMbU;}B5uv}H<%m~vWX3!TFzY$AUk z)_nH+DQl{db?)1s-o-h^A0I?Gzn}m4x_6P3I6x&rs8~2a^?@=oZ{*Nh*>a545En|f zn#8{GMCjpLt&D@c<)x*jRM*L%u4r~)uPG}|wW=PoEajif+bByZj-iqrF0YPyqgL#8 z$%m6KnR}X$89H1koeLA5ye%UW@Q3Q6Y|D*aF<-{JuLPVWty%NO40+%9oc>Xe7?D|e zq?(}16W}1CJA6Zzhq_+BluMK)$H#txZd$QFrO<(}hJb%mTKJ5`y9GlYq zG`O~R#2NE*gzjs3HrGn{qIlL+?I3V-&qY6=Gq1T9@|+NHi4&Z|LLYa0XpbWEoYpcba*o9i`%z% zn6KrmwD7%4wnzk|TQ%uH&?|W=M#Zgt{?>eQyj)IZ;-NjD=9_DX=k9KYP=!xH*)g?Yj)0?0i8K!v+Iv}~ z#l^)$4sz^M7tRo-CM zClVHEy?8x4EEJ~Brb_GL=j5cab}w*G~& zflW*t;F~IqVR`iiVukZ#+lmnz-1BE!CR7dU$@<2s4m&Q}PeZI|`#DrJay7g6$JRk@ z3-@4?A@4gVPohMk?E%3+4I+OK;*qMxi7~}!xp~Fr-z!jW>Av0=igU%2jr=aPl@z*Eq6FEQl(@n)_bQ_jVDAGC69S;-MaPtsuWfK zvXkD{2U^Gn9sa7QAVwybG`SpAJ>N%8)Ry8KG8idZX}0L*@?MFM#^d?xC~34Q!Y(mH z5ZGq*S(rxZ!sX6OK~(HsnRToIP$|fW&lwl^@Z-Fr+d`uGuV1Yft#jXS_EAAp-#@e$ z^Ul)h_gA*EwCoH0sfkAM=?8sOlt|y8lHFz@WnJ{?Q@il~{d@qjLb0WW$C^W? z3I+5{A)a3GJvs)DB}OO}gm6_-ba~i4PlTzuax^1>I-qcS7SSFW`QGB=+m|b`Yet z%Qwuwv?U2zf2X0Hh6vrUUbj&a;tKQ&Iu9S znEiV9Vmn-nLRR?qsCDTK{Lwhp15|7G;lM1E?xw1m7Z9X=Uyki6wEIR!Kgs$U%X+~g zo^aRRz8Yf{5|^^LjqQ7;Zncb{k*91z@l)$X$xs;v_v{kw5bbsgf!XKhh?`~tu7qDX z-S7PFJgi4~+M~|ppA{jYjZru;&{1c>0|>z9eX?R;&1R<(M3j}XHqz+hV<&N0L9No8 zKk@BRhp)b$s;wp~dws#3N+3>}>)p3_pY5*Z%5zu>&_4Nk2WKY1B)mCaB}-lLA@Hw# zlanRE%4aLbIg)Rf6GP4lULMFJ@NtUiUG%4vtEhN|mc{BDwMonQTf*PT*fs!~U{$lc zBF;RRqj$`{F|2Om!)5E?65GN(1_@6QC5Z!5I_E(ZqOJvl0(IUevr?baB^wzxP8&A7 zE|#13Qh9tBuXith$DqW}sDctafAj4AHcO0ET_LawD>rLm*;WOzVL_I3Y zww$bibwwlYA)s_?Is27CLE~n}twk5*n*d)dKaCyHk!kb0v?HBRA@c{>* zsjql#lP-Kt^sTHE$~aWmJ|83IO}=`0xXv8t{mh~wf<%SflEtN+(cJOj$572+6VfzY zFA7Q&wr5zcSxl>qsH=EBywaY)XX5a)AP?WUU?x(Ap?Sh}iaS&sVlCj2N@Xv0nbkto z?>0i`eeWo-oz1Ww0p)-+?A{9T}zUy5CS>i_<#3}BK z>^&Vl$Ln?q?Nhoay;=&jJf-$p=ap?y*(3$zLBNFRuEL00Mr%yF$PiS64t`FqByxvF z2lDkQCBcstv-0K{2M6N}t^V-8@w5T+k(m2l_<=}H8?2z zY}t`@XDP>V^KX%nyX4t%Epjq=1qs;~#eO~KF^Ps!_>07mO^Vm&xvShgmi7%NW#8hdLr9PZ=%n1jx_TU1~-=}V;Jn6?Qo|BbzZdlJXmB+qz#75|N zmb$SCDuYta2uCr7&gv&~ekd$!*oJ^DXRGJJ$vbj^SM!v(o}K?=^KyHXnI81CJJoib`3iZLum!& z@}1G@rDPFPP=7PW*a%%d_LgLk&C~);RVOUs(hale5%ct+p=SWs)Rb;sXguh#_q#Yg zso(g%s>-B2m=cYjgM8>+D>1o}ZZOU9m;Vj#Z-x5R30K}ZJ6QV%;&Hei z--CKI1JFW}D#wsZp5ucUsVns-yJOXWYn`1}g*^8iblvA;Y50u&a;HoVx0l^uzWmC& z?W*P=b9F$SaXrNrc%qL4c>1OB$kOMBZUCeF%e$AIX49UnH+vtgv8UK(#tC8ozuihrao~uH zxf`BY%@$oy;2%Y13@Vdtv|J9aWj_nl7RuBcbB;9*o9)zd)Q$W8JX}?hfA;6xAVa`i zuDO^%5ZV8;*LdBujx}0#hCgX*Kg?K`9NlFhkXcbtu{@FPh$Grxo~`$-)F5JWu9LvM zyzn|fUqN14Fd%((;e&pCx0wsaGZ%eGzVMU|hakxw5FAz61jOFgGF|$mn$&00jAaco zVwCb$JYF>$zEM8B?&uLmh<$EMJqx1d`L(sR+Q0|5VDu2+!!zy%{LJ;4^ggm3cN$P^ z8KJ~I4>`dm!+JhQ3-xybgMq;MNhKG>l2K6+8_e)~*e^>{Hu-b1@Wp|=2g9C1>vw#y zjgI#>(epczzJSn3G!48e98LyG4J08Q;{XaM`ubG2!TQzsFJ!DwGSoLpG^1D`%JKX> z^>3c|0SUX!BtkrQ*%gpM_4G@EU)=_O$Q~AHXr62>=cOCNxq)^tz88l9Jk=Tr>Pq-O?NsE#EOFz^|~z=mElLUL7fip%>2GG(Ef2|GIu&bXLF-XA0o^awz>S)H*5UB#4$ zMliW&oDc5{q{zN+-(ES0yp=tIz;*k$@pSLhkC>w*9_rCv<4unJ1MB5Yu@qW;udvUvj#A)O^Vttx(dy1@@&zOtZ({&>9RE%Ph;9^k;SmE*?%}-E2&$*HFBfkTF-07yU1?rp{`%! z1c)mNTiOTr@xTilhCo=2@@mv3qy3uf|9Jrb-K}13_TZ%r14$z#kR$E~rCgtddR+6! z>dqwSgzs|1ItB~W{JZt*%QyBk8xJXYYL_x|FR8&8t}J{>y%wVg8G^&?b3(?=h(HUB z92CITr0r4lb!Tq+sd6u~$lI+ZSXFQp(t0P zl(Sjwfw(^ogWGM6ZU@>EGJXkAm^UAo%)*5OhXpY5oCV(uAI&WEy5Ec!f|T2T>@ zQIeH^A9$*7iu*NB9L{fcUZF1IKiNenE{BtyVUu6e&5A=05TYf9Q_Zhz*^gYQ35GAo zsdXbURQZX!T~#tW9fkaFWey79nI8w`Sy);cU0BqwY#2{F)9=RMwXHG^@HFY&kX@At zY+aF^LAB(PbcsLvCJHGcD)}wdCLN*8@QFOx4E5rN{=picqoXX*7?z8`*9&C;#pvWy ztmTA}>Q$X{Ki=#VWR4*gnZ{B`MdIM-6zwHf7Au)n7MuaGPihelOELrAhk;!vhBE-(cT{`!9y!iT`-Ks{**>7VX zj@Cm8)bf7Ts&|D0Z$%?)mtw=0C(@ zhJCgejU}1#2R)hVm?sv07r$N{BK@w$8d7eIg+$l{cv3&_rf0=*43&&c43 zOFN||t_IB>?6D@uws{AYl*`yDKs-;_ElNpa)YB17qKwEYZ=fsL{%jB{Lo25|D68NE z0fB1oGz~D}LQgWgliB*yUU6!OPD!7KnW@2&f#mqc(-V6mo7$g+MNZ^5*n%}-p?zb_ z(JJ)Gi~x?2`zuw5f6#}YZxDE(_b4hmSD$cbsnkJaoZkFl@k5MZ^V#F&2R>w#-*k9s z+7yK+{CtUTvGP({O#Z`L3G?*3?2jvn>gp9wJe6vL@7|AO29iGlk(C8JFw3!Yl8 zLURy%T7L2_WqT3AO!VKus9ynr8;buj@!pL+wFUyVIGWOI!Xc|2Ycyx%{pp-|{g%u2 z^^Ce1Uq@f^8MT~8iiA}$ti358$u#dnOGX<$+OPy6TUrE;PrSG1i6_s+rhCezf*RtE zvErk0oOY>u^vTb-z|v1ufOc)Y2x_K-twocW7JuMZTG~00SPy*KwO53fTE3k7_4!7x zA*3f`g`fW)!fD!~KE2Aw$?<=BwOw^8sB`Xq-ay$UGpHx~`&$^Ij7S&wQuf>CHrGum z0XkkFpqehOnrs0$Z`$>MjU6yGf60eKt4W2E>!lwKx$GKGTo`RY$gKu=_2j!zz;a>O z)?8pRHAhEN4f5w>+JP>ZbiiSG51HV}QHkPde!ln72Oqh~gg%{h72hw^{x6 z9-orzBz@vI0UJBP0nGo>&!0d4Y?LDGnbOJyiOXORWA!Ap)wJ6`LPS@{HcL&~`* z*$+QbEq?f!@BCKbimmN7goNV>jG0!ABkPC#S%OF66+4dbZGhbrj$pH@FfyQt6ANZR5LoSS%Rv>1%IMI)-fO-0}k=zaA&3BZijjVspGIH z_%c4{u~cgr*GGvBs-6ca8;-ZSHDn>w*E>cltf9E`4@BG>3A{$=0jj16hRecs6fm%{ zwGJpv2quXLTJ)sh1+v?#C+nkzHi@C;&TGuxudnJE21{n zGW$3>!s&}X9uLP~@Z4P;`TlI9!Fg#_(kl#Z z(`P0YtI1{(qHo6jb+&O({ETGIyS^DOzKa?%M#WSgG}DTXlFDaqfJ_G}5LOdJIfOl$ z28I%tL92aY?I@Pe%fiHsheM^`Ri?mHH3NE1hs+hPK?faBP6BT}T`+_+(&wD4$rbx5 zgf*-0qplAvnVfi722^2QwNl@hG3p^0<=Ma{8=gp5U0{iO1MY z+(HzIE8hEzz-$6;y_}TS36;k}vi&zbrn@!O%el$#9A;Wxjq$ntG6eXW$64>a>{L5b zQv~FaiG^Sp-dfM&8>PczdE`;4!8zKOuyew3#TYI|xKL=P#FA8I+ouJAC?=QlQklcg zpMctUs89oqn{(zIwjrQ3lujA_di5MJ!e$XwnM^mms6PF31Q@pPNT~E-5z!tOY~I{= zeOT?r7)^ASUDe)AA&)#`PO1=27v}-13jTZ@#u$>SZPZTM=%zD>fc%MV$XY-de4sP% zK6u>vber(95TWaYGxf{ntD;tyk#OGvvQ3|ZDEt7(mkA0q-gn3B{w1k&1*5F6nhIg( z8${T5^-gNcd|fd)CTtW&zZM9>t<*)sD=r@^uDt~;1Tmns^NJ5jigynoZN#9S zp^zJ!0q&s{_71IG0PrnddlLZw^o142ghz`8AxcIm(I)5s|6AJscHi?ut#MY>wK%Xs zX5Qoh8wkM@uKeo6(6UVsMgB>GZ?!A%p;DFjR;)Ap%Plnh^mws)>x<~}&;)U5ChxAJupo4nRrPZxWqk~2M)!{oOjAlGNQ&3H0)9C02p zYx}jjJSKA;VMxf4IQT){W{i1-p92->q}$jTsO1=dvl#p}Z45%GTqnJoF_ zZNdHc{LXLwozGYt71$nyYbcIsTV(a+XI{T1OJ?818Av80M!3E{;b(A^kk!W_ak#`> zzc=|TnC0nOPq;fIL4=+WwGc&~43vOGy>IOJ6CFC~T?O=Zw;gN7zxK0%c6(=nR%^^- z6E-Lc~p*_?tRD!TIwKg2Iv?LK%>qC zGKU-D*alQs56^-)Y41kQFB`=R2o%`QvgN}!eHb~ypH#_763avq=?^L^y5Cn6YBGuL z?Y5DrV6)D~0$H&YwhhedK*LJGyxQT9=m&@-_3DDt7~~HCE+R{Be81lr%lKD1QDzd4 zR1E8f00UYJXV)&~@<_BW2R*zo8$;Mu+(v1%ynm!`(v%L;z(|L~ zOPM2ak!b%`zoFLX2@OH@_Iji?I>#LjUC9f*t}0L&{mj~J7C3d>Zc zsG?Z-@^u^-C&tdsnUz18`R%J$ zpXEmsRB8VDp}ZNY*&nBs`OM=8^ybo<0B~d>{J;ar6AC1%-tVb0NJS-U;` zXWg~26*4oeT{j+k%Jbdb-Q2Yh$Q|#T&VU0)xcmZjUxRqs4wwc-ObM;7d31c>kg3sq z*cNi9w||%Ypx9rvLc1NpA;x7*`>ht{Y-(z1;T3K$q2lMH02)X?OiOZc=NH~%WJuBzwT&SYiUcH{AHdDx=Z z^z`(FR~p?L^gpMGC!;Od$AQanTPd!hwd;Im)an4~N!VEnU7>9oxJPM7>e@G4HaX~d zuF_OL{>GcS8RH-hoVnORa&W^eSyMB8n&S9z{X`khszG z@6WB*uJF|VY9W9)_w~)4Y9LROyfI%Ew}=Q+t38q$f3=X{A2H3w{d zVDIQ*ZOBybzv~hA=K+cBb;}iifCxkbC5a_k7MufSx_)Z%H#vsTrnu~A^OPspqK}Ji%J&?53j$Ajm*6}zRmhG93W%JcVRnYr zWvm>wR5NP%ij|@qHLTHH*Kv=jWX+A^)_DbR zCqIsB`nyR)`~K$K%_Y~{=H~1WO~zLIT;vdBH}2TYt~9A?mOb*hwN$LwLL(Ldc5Q6LV zuT#r>!^%dJAA7{Mk$i064%Hc8&7?>Y8jKRP^`d+b#Ugz{4Wng>ev?&P=qxU(keR8|KYPS#_#fL5is4#G^~-Z!IOCpMJ#IiM`@fsVk=#E65w}HYq${(4 z?Oqm!arkrBDKG)g^k}HRJND z5lpV=^Vq`W%0Ib@1F|w|VqDsYy^iHP;%OtTeT@s;+g1OXEZ57Gqge0^ymly|+APjb zh4PhJ-emPTXcjb3rjT}|A()Tmh8dygy(S{ZEPioe(Gi=<2Z)cNWpNAdNS}yYUT{a& z;{k;BRjQOei@^yyRD5?vd3kwnbOo4Odly?F$!F!KLUAZO<#}UHzp@n|OE45!nl#UL zCtMAH6^o>2exn63`8b@GCFam^rN>5yAX2`d*I0JBnGGE&&)q-P;@fb?ULT}Z%mA&u zC^;{fRbaDnBQQ_ewgE_UUZ*8x;$PsqLnuy~XJ=nvAr-!vI<-2A@1d+0oITwS7+&y4 zQvBf$u{0;}AF|T0d2L?)2uk3|{pAbrPnF05YK%HhJfb?%*0Dw7LA7=-OXbPM7O1pd z7@Qss(@l>}|9|Nc##A-oa`{~CvlpYAa+wp^R@fPXV}8*U_M9|?Rq z-r`Z+^AtTBSvVM2gU?bWmy@-MAA$q`wpv8qSz20B{;Ush@$n}L56;SFNUNwA<+INL z6^GLBy-Xb-7*wD%snEXky_gA#ADI8YPBtsHyeTCJYIqGXVygWsP3mOaAz+7r1oTRo zLtRrLeqi}{ROT3nF;1&JpvHO}*~UoanA$ncG=a+PFzLRCd3v5oew(X6hADE%xwvW+ z8&i2{2wybT2?oy=EsIH!fEFxhP@>FucDq1bRe)pC7-J#yLI*6YnwpvubAxl6p4^=> z5ZfnxPQBV!*P-;}Y1mdJUbp`R=ATQz7EpZ31%+e=7%js^T%NcDj1E0G6w~dgan7FU zDL%j^#Wr|AkRT+w&E=J>y`Bv>9*-ypvPEJR`@OayfhCk(SnHld5yD8<8t`Dbr4~DWeP#P8ZfTx zG+Kf^+iO>q2COC=ZY&u37>Pmw+OU=>xqSCwuqdI1U+eKui!t=N%)LjLjrPfT$h41x zLW{*N3XeOS(+8?=#^$n!!_|_;KOQbo*ni_bx5$OYKHwhBuV-}5^&OMCL|k+oa!74A z=8L;Mg1B8b@noDzRk`V(>zY905Of)=^S4`cL%UDa#s?&w zvrTLdp}zK8KZ<1rgbR&tMgBZ93sDlH&9Pg+)=QbryPqY(! z^+}XaXKoA7N`nK`?N2DTY-7y5C>A3-rcamZ8wB-Bt28?~A-AghI0={h<3`9JR$b7! z4M>E~WmU`O`@1{$kHFA}V3Z+MY^0wsgOWn-w~G9_PA}c+U%Qp_5KjY{JXBD9=VNuQ zCjeIk5iLnEiUS9p17&FxZWjksIjRLQtMRx{86JE4l(MVk1TW|V!LBGy>H}-58g|8z zx`P@wBK&{{&v7Vbx$faPz&CBS;ss}(b5oLRv&DFD$35qsWEHHZub{gH=-l!G^bHQ2 zNJYcilFOkW^%t#Ms}u(k?8D*DYa?ry1g$7`!DX168W@jy^b&#L=y;Js#8nGnJERlXoe1v-qF46oLDg`hXC<;3aEWfwXv>OW7gzWXvlplI@f>#mb z41-8NCn0%y{8W1K$mh?ZPm&9hH!xXz5cM?3DR$$)*g0ERR5Dx0S)TXOSk@C-?wBI) z>z(A7XAH7qpw0$Y{EFki&MO7BbFxuR6L@2zVMF9v?6Xp8#SalJ09vbh%qT-lfV%wU z(cl~_ri?}r(F)))%Y@r}3fh0*b4YI1lf+t|Dm>vtK5(9cH=grDV4?!*i7HNkdf~bn zVFN*qL{N@)Yd$LsNk7i20v~Bhka2`=%^~{4C~`%h8)1pEMXPM5hTdZaj~;3`K(kbz zyUwd8;Y_U-%6Rm>8#tn>9Y2NAZqam`?s%OW8yeEs3o{YNe%j3z2M~I8 z{=o-XTMQJUwUg3=V%M!0_egbfRF{brdgmPrS5c`-?4VBC%o&5Bt!%^d!Oi)Y+@!#{ zO%aGtlms#H(ZKH9kj;P99<>yUBL((-MzBWG|LjAdk>nZ0(%zkJ69S?qP+uSqcVC3) z`lhHzerW2gCQ7471FC7e7lXsKcf2N44m~ACB1lDV-7@u>B$c7-6ClUOdTY?gpsD-h z0`NcV^~JD>kRXn}d!d1vLf$yCdmcEvpX*}%vs4S-XrVIUfa?QM#9{~nIGB2Yt&k3X420*~UeSdFlO!`VV#h?P>p~a2 zY<)LyLkudgkl{W0pa?B2A!KZYf#4A(3~A3SWH(xo7NsNJJFuz_M8nQ6aOv4iMr;^a zU`SFeaAJ?V7T?VgD`#-@KrlkDDxiD*<=EQ8RgQ;t8wu^k%@N+)S-A-cf7LoDk&_BG zL@d5u{p-8B0F*8)<+5yxW!slixll0qU!x5Z`n<30o%Vr+p28nGXsmAUeFKUNfx_f# zNK@jD2W2|V{FgnakBk9LGY(50WWa+U+@&AtRUI=hT5V-=v@%gF4?UKeQ>eo3J$__z z?A^G&ctg4kd@Y*w)$3p2X1~|F-+*>x@a~&QqyU1~La|w|-4Hj13%Jy2kheNc{TBQE zk=c1d61W2(&Cdc6e?xKPq&)qE5QpB0M3R@XEfhdR({91TA|w~ZauA$93s_KbEG^eV z@;tN(St-3)gU%z^DPQ~h&&GGNZp*{~$`5I$<>Hs_oN+)Ll{?nWj=MoDvin^Ryfeon zwBK8hB0eJRT=yE-RmgQE`6-n>80;1twM3sXeS9)j{H>$e@ZEC5JLhyuc&PBm7-uQ0 zwbN4hQyLoZtu;%fWw9^Y#ace8q(lQsFrlfyCkQ2yl-n<&|d?ilp~K z$8Mo2g=Pp~D)88g(1pL+pqLk1ADH7Vg7t4*+CX*1=NQAbCJC1OCy?{EvIc^*GR>D) zR?<>3=mGFRWp`%VF^JVr{!P_=b z=eN#0A6TZ~5;0IxX#gy~QrJ7`wWkLH{-dXzb~+FKfD=SeQi5`ZFXh%p`cWf{776Zo z8nNqD2-bxVg~quSFUJa;1RT)a42TdpP*l)>Q7c&sHtIBr9vby$%OY?mcM5U4Rxf3S?DGjt zk7<$>|JYW22~Z3y-;ahF8h-FlXrrLAhw(svyrPg)f%@!|#Lv;$Fw3noU&j+&t*xym zj4i zqNXc}t_ciHErQw9qA=_%WO0bP0!Dit@gi%2DUZ;7=eg(j>mxzkyWR9FR)$$Z<2q_@MjC6+pzJlT)7h72ebw-DaG;d%zS3>^V4scvR zM0?q~3aPJ;v9uX`R1_W-LR@9t zY#q!9ktFxzeh<3K?X6x~Rr&XI2-Qq$=J1|`y(9OzV(1355NKI*A&4AG{y32Su&6?Z zPOvqAwR5&Gc6GJSf?{v{=|#kZHIg<07AM550vzc5J;`Sv=l={px6;id4j{<8?Q9@x zja`CW2s-0U9Y0;gdtt{bHA7$^gAhXa!!BFvY^BuTjmFoN?J8>zZm_XLx0X{72l#t; zdP+n9uW-2hJH_?0@T=P62^zN2L=&ZE#SIjZhK6by+|H zVhoyZ>EfUx8d{vW@EitU*>$_;F9mTWJI61&)WL?YCFhN@e+o%qkTnjW#f&i=>eD}3 z{0aEXztDjcs`_oy3GDWWjdH9(aR9fpzn$9P^O*QqqpC5WWhif^l|a+^E)QjhGz8TV zwKJYi#mzjU-WcOyfyM=`f#gE?FTu(H4GuU1=ZM9(mBn_yn|S>>g4Rs}DpXUS%$12> zu8-aso>^}^^#(mh?%N_UNbQKL0PNpYM;jn%9M4@jRo?N!A)@t*ejt(PtMyJXse4`( z6<6^PM_nFtZ@6?jJ-%JlWiY?^Dw|?V1!!Th_(x1xybAgRQ$FMHj?d%cMW6lF+UU1X z2%gXgTB+Zi{Q#W+`d&L4V&DLDUJMi~pGEepQz7q{PJNdMrB^H}aNT+ivaW3M^|t-~U4;!U7E zb`LJWBW*^kv)~ylGIIr56P!sQ?pq6-s=r)-`l<$l#@9xl)6_q8z-K*O{Gw4Y1rv3V z4ir?nrnAPpG>{Ho!u3CHnP3vWK8Xi~4LTn%2`pcZXaUp2&52OOYf#HZL8f8CtSP`TX&~+(!abBs$J)1^ry|;_App6;-4U?6 zCVi+b{0Kc0SEIW)9Mx)}H-jt)n$z%aU|Vrwf%Cwss`XK&REA(hq3Z5OXg2!tMTxCQ z{meSokGbQm-p*jkYd^ie0K9aVkJVh~{g5Y&$j^wdj*!{XAyk}k!ys4AL6cO+BlFau zm8@bY;ZWM|?^k>PZ+-*fmU;3OA9_(Ww{zbQSxH|dvB)=shY5fkj>2MhcyEDsWBN#S z1rI=ByZas2Ilm`vvvqxeAQAvBovz(_aJ4Q~ULIaPA#5*vEwE7KwO~ZRXrgEG8E6}ZuE?f`iI;9VvLL=FPtd>1#Ox1#&rjS8 zfJzCmvl{Z^Ms%KoGJLHPqBhhY2)}~rFnLA?pQe%Y_S8%KobJAGNyB#1wrauw+7O>K zS)B(vDZF4q>z5C_PyrSYTw9kX-dH~uo$(wXG&Ct6O1~PV0}>!nRF=qGs^ZL|G4yWj zT)16Df8Rc8))Pi80xe6<>5<;w-x*n0Td!#7_E*AWip=>d%5jT>L$QemLV+5Z8Vy%R03d*Eqe9EtvAB5 z^iZ5VcwbS0aNYg$M??%_G@zwMui>tMMMqVGgNTL~i#^5REKXrZ{^JY4qyhwf6^aoovWSTqEDO`?+DgduK^AO;Nq7tDa26b_JW9ka<#q3DmEz(Z)wFr5K1GM#LNK2;m7!Xv%)oIrjpHm8);G zMsETM2=t5P1O|MAV7MLEuy_t9p%b2T%OZKAKoxQUF}ZhyUh)ONqctSm0Cn*8QeSO^ zBq6r1$b5{NL&qiarj5O=bIPwFlmQ0=yXnXk6cbqGFDC5FJJnD-;eTJ&5hlgd_f?n5 z3SN=}Z+cmh?Z3LU*xPycqBx7ZWy~bJ$H@Vb>0en17))9hwFShP1~#kY+(u~3RTAG( zKVA@Se9<{KgY)Jputv~G9-nM(7s?rD3RUP;c(F*V+;TswHGkr?gGlYUiJ-~@Si?xe z;51O(?9ge?)W`>=AQCA=R^PSqI{vuNsJrVp^mTw1baH4cxQSeZ*xO&Y7fufWU@H>} zZ2+@Sb70%4p*We7R0}A;RPmaRsi7H=GT=h!_mIg@1E>W}w=Z(!{n0O;`G z-aq#OYj@Q8y62+kRP#meU*~}FJqu(Ix?u$Xhy(P`2s!&)f!jgsbrB2dSD^xd#{_Ri z`D6#Bz`yqIYH%7myU;+^g06iv^AV_K;88(w1pY(LpUKFqgEk2{1l~slUv(Ys1ltvW zF$g_yz?+I_puLN&ZJS7V{5c5n8Ouk)G=#Hzji=&>D}dM~_CuaFy|jw(*NW(K0j+ng zleMdwxNg7%zyX9+zR4|kb5qgK=t;m8{61z_?O8GrW}hO8>v5{a9SOkD+uorZ2?D%c zX>lJz-e`(_)!3sbKVw-j5S75S2t*J3jqm@`>cz}o<{1yN4=gLV`%Rf_PB-u|K7ou{ z##ouA&WZ;)v=#hxogEfm*?@8>ETaMmNqlkG>dR&zIP9N77+u%C4K#{Q(2~`aua!x+ zp`Vfme)C-3J?hj8(QJbq*k&OHfVQgxY?-f{^*)V!!VTy}D}R{LK0e zXe1TdYx+1=r<}5?d+D}4@G_r7c5LKzOl2liD6QuRJ!TX5|6028cqrE|juyEU5k&jLbGh{C%7umPQ-o3Jn5TaXl*BWEZ9)9N;eLnTq zyzlcq=RD^*&w0-Ieh;b@Fm(XifKv5O)x#QJ*2Ns_68W6JFY6pg1<@-Momv`yhb|k( zx||}|PKBJ91$hg_e^^vxSh0?lnP&G4SQ+D~ckRHK=PDtJggUI6?Hhcn;aO-iTWM$YN zVaK3VQ<$Y$wa-};=otsj0PCayJhI4^hiu{z6fOZ?ssKd_M)yL zKoI~h&0R7_VCzpWFLCUqLUhOtXx;h6y4|fhPGz4$@%dE%38OfL=Q@CDSv7t%_LDt~ z#Z2y1^cgFC&n<#0{eKWZf3SYS`-U>#mZA{1FM=jc8lq!^e>dL91J)%!v_Dw!_9Wk0 z|A&8vM)q190`PhC0?-xRtyob5u+GB>(P>lke6$YQrqK4Zi-sI0D);dCO}&iR?cK+s zT>nUmreLYRNJU#HPuL_5F>UP5n1-3N{9Mo z+E;Vo=2^Y@Jr_m-eYpW(m1%7Uz|Jv3+C`ID-44|&z2-djG(DA4Aa)0oIr}benc;yS zM1W*)AgX3Y-OEr;4DAti=;L4oh6SS`EK39Hl|y~Z08t3d=(r8(r0CH@$mH!~5rrOh zA3O#YBjBktTerKzDQ6#J(*X(4bx`7k;k_rHB2L`m(E8vSc%t4}zw3&h{DHE~bAN9} z4G?vrpy5N%Bt*Qun*Kb2>mv~=yMRBMcp->(UIqr)(T5;gh|Oq;t?<=HO zz|k5py%wR3Wd{-Y@O;p2)o7i^$e$-2?`Ftx76PtDboD2AQ7DJ1@;u!8XgxF_2!qSu zlJ2elg3_u>V((3^3$gHA)y02xF_$=DI zwWu=b9O8sJ4JsqFm9<&=Q!}TQ>Ro<^R@(X z2j}`OL-&a5vtMzH(EJL`<^l90JL@{AtQ7=FjFHlA{ccaC73fS+ba&ESfmw5J#!Gu=5)r4q23IIu_ zi9(Wq_6@)hWCNaW0ZB4)4|LSh0Zx&SA$$!T~aq_0e1PhO&)pwb$ z!q7wp4pn0MK209K;1^PB(N$Ic74W}ca(N|XS)rq8w`RDy!`zSRUpu+X%fh_K_@3SS8tmOpr5iUeYR=$*XAs` zi?gn!x7#ql+k+W7Ig}qwPGggkPV(}y*A%{t@Q4Q&?^omI;NW;OA%ekZ8yOM3JZ9$S z<vK;};-)MdIj_cNE32yXTFUyUD!cv4w6{ql(whm$wfjQN zsq9SBov`q5>$CI}Ho71&9jV&&43nkMo_5j6aR=Bu zuc@;)Z{E~xIm*NoJ2hn=e;nzcH1|dU?%K6zhmFq8PBwo20_xB2Gc$TE0dQ?7t7$rn zo)Nnfv_EUv7{eMH8hX&s{Up}3-kqkNVJI4(JJaV_Mu9darKtl83xy7ASd55BNeUgF zDL8B3o!h0&%}w3b+m)W#A)%p;7wZ-m-NwnV<7T2Ik-+KJ6KGsOBiUb6Ezwtwj*N^O z9=1BcPMaG9s>$(GPeqKRr2fo?MU7t%v7bm}|21>CeLLK{ckYa;l+?AE4X#k@Z=}qq zs3X7Lh2s;zeFLpsaD3}3kfD)ZGf)B>AESwZ-224YM0LmxikNS z(xxP%tnB=4cUDUiF+ZqxeIU36p`|Q8{(5(u!okJOEsN>1DZ1+ARo=VoLB92-zre1d z4|Dcx?12Md0?KV~a|Hx~>J-c-OGuiUj!sT`V#x?O6#3!rU!!a+;k%2sM+Dy`C7m*K z_wevA(+AtA?*@In&dmHkbWUy)o}Qlm*0BJ&EsAWGUUFmh0Me@bI#3vOTiYoVLilkJ zM^GCdYste6_eAMOsx|0=rfo=Pyuqu&&G}nLM^r9OU{m$fL8Q>FVl+uHG3~95#6{&A zz;09E)c^J2g9i`lR^1kpl{MUQbuTL~Pm)p17e{-@aukk&H~7i1F?-dzoE-7-#Pi8d zUEiah21g-$KZ}vaVn0eT8@|oYH%Z%ok4N@6;uVlCU9Eaan4tAT`DC(LQc@CevKA_= zSYeu79{DE*$dk#0ZK&$Oz`*+!>(JHH8*g+$dN79@dCR9yF-!e1F~_vEwL1d?VOyG{ zks+cwL9^Twb=#}SVq~NveobHMw{z3qzZ)be0M*nN81bgMv{B^FpHCtrlSrA{v-)Yu z*~-hL80lr|j6N5xBglxu-A3sDp+M1g`1$u!`z%6TBx`bc(-h-t*pa)Pe zy_TMB!FPj$mf6|a|4SmnapgM}d&Lxo9Kl-g*!>Ga;^Md&vayj-^ZNR`V&&wvMmKue zIs?i#e(7UrsT_6E>*u;#rY~gVI(PV_kYFMYJUy=i8WR}4xJl?TXh;24h80AUV!@03 z{<|6>F0L%?YpmcE;sggf`&A#G3$2H)$J6<|f(uKgFJ$X_HvfCQvlFP`J(p5k4ZVsy z3XrcE&VyCyt8kTQ`@Egg1?DvdsC0w)8%9PA^25m4xIWlgy_Y#O#m1_Q!*%)H5m$Ze z0jOxl9`O^b$ihr&YHD!H4B*#QR4n^^EvkL~H+{(ioxTox#tW|UMv^?FYV%YLAT}x> z;vpuEzFSYTCKoF!HHSg4_p(%Dz$^e-l7+H_!{JQTkjz6AgIl*!%1aarV|XthLuthO?OV%8tOe0dsgh7{v|`PHje?o*#mW6W4tSq(t+QQ9CHN?d;VNhK|uk_i&Uy(q*5*jYh1Nq zbmNAOlT&V=0{B0ij*i&{BvpEA7eJRwlRE}${7GX_AGU1WpaWHYlRa`tyB!GmrBx@h zDl2co_UUgAkBTBfE|i4dhb%NUXOb}OWfyGCS$wyH4`XIt;X(rE8GjV5gw~@+k8th) z=rHksJ@FQjAMa92Jf{s{FIjh@rM#W_onoM=8FF40yjdKCV;`w0g@w{zi}8nb4kDGv z7~hBoSYSq*?lZoN;(+jXnMBAc8@Oo7pqVFRR~En45rYmyxH)5-o*rquHKD|cri_xW z&Nz423T4lm;67w$WmTVdB)3N#CWt4aZ}Wycc_Pum^xJPs8)lx~-V(kC`vHqO23Lb$ zzXPLRTl-m#f4cHCTqk2kMyxAj*x1WgTUzoRJ^({ZUG5IDlIP^)!mK$}Rw`L|6@|VO z-1e40lnnxr*uaLKQ8aWD{sjI`PEKz9qmzrXv**@^E4T~;Q`2S!^gw0uMgMX_icW&$ zM#Bs*A76Y|mqCR}U}RWW46l+`p)!(4^|_LFK7q%(L(I?AYkrX0TQP(^5jopWj(al3 z0AuV32n%1<)(;4%0q^R&;&jn>>2=Blx4-kEV6gDWNLx2LSool`aPtNlAFjCnjKG~r zxBo)>_*2IxCtD#yZhDH2h^bG9{2KVnXN0%FIB%Jn`s+(gV{NU5rvQa0Pz%AmoB=^SYp5{*f#KjwpH0jgei^$4u0_IA`zku} zBrb?QefqQouFS7%Yj+;Wf^i6skB_sRIz`Dqn`1{D+zZ3LeIz^ZgUtZORo+Pg&jok6 zp*+E(V0f9;)hUd7GPgUH;T}G$F0KyPT5V}=4r_|+0@&noJ(s$=`Vt(a;#=jvBb($; zh>LrL&CSZX!px^U75n9^qM|tTbjVLp3D|rj3m?OO$+zHPr30yk(~C<=NCE63OKZ70 zurXLx&K+T9#<{r_nKRn$yb{IR*%h*bOmsU?Fwk*#FD|YIjE~ZP-Vlu#B>@&t1SEd5J7{cdY;gVh@T#pYg+j3dNHvy$ zuHrycbab@rK{eG->tn5B367-L>5HVogYQIh1u#$p6BA*uBrH80 z<{jAdX0-p``&&9XZlqP&HZf;5F>!{TZi?nlfPypGf+&0~4eP%)mnlkrJwie!xDk#x z#Bh+2mVTk^KT!E#320<03nf#aZHUfVAqgxQ^mFk^UtM(fG;wLk-AysIdXXl&H-xQa z4>*={UlO=y4A)%{gW{FUXAG{CdMYqfU zk2E^aaQTBjMT&}w2+7G3;o;#jP?sI~r-c5Qwr0gkCw2=XMsPz=n?CG_<4a6w*y zfwk->PpU3(NFrUvR8}hb0`^5SD`*Zl6QCbqGMQZco=*3}(ZOK;FgV@+{#)aoq^o;$(FNY~?!y+IZ!0oSESftMHy#MV`4D_#S1uPnVv?07Y8%g&Wg5a)yE)uQ2mm_}$jlmTxzptEL#V2P0U z$@E~y_5MA*y%Y%H#D)UaQnMLNLqkHY`uSBtQx8oUSy{3hH}dAL)3dWg*u7vwvA~!l zTcp5dz??sCGCDeH2N?rGN{X&J@`x{%Mi#U&o>oRX^K4Xy4`Bo`GG zb@l%J!jO=VupUR!!ovMT72q>oz8nFD^ZYp{bT&%c&&VjNOQ$mJ>ew1^FIM^_7yQ8m OI&Cd|&3p~3$NvY$o}gL) literal 0 HcmV?d00001 diff --git a/mkdocs/docs/img/q-logo1.ico b/mkdocs/docs/img/q-logo1.ico new file mode 100644 index 0000000000000000000000000000000000000000..427a0d96d176b4e43c18608ebc63bc32670a5afd GIT binary patch literal 106665 zcmeHQ2V9M98$T^+kdY)QN|BL~QAC-^$ljx@>=lvOF*8b5q{xz>a!CxJjoP(lDw5>yp5uPhL_Qd}tfnyy(= zNwAIb?Crm$TMGoyrb>eP^}nX~mxW91k1WP zIh4{g(4?kIId*8<15bgH^jz@wY?qe;L1ksfwyk=3+tOAIGepWjd|Vz1NOZA)m_BUKL`tK99OsdxF{iRu(Uh4#D4#BOL zv+AAggyq^GdiUx2eK`gyPbgmR`nwLFrh8d*&Do|}#(2#;ll0q_M~#2h)l7JL>$Nrm zp2an6vo_wYrqX3E%e%W0ZdBS7pjm>Rt=iG^ZCqLNXLhxeF3ekh}_P?67PglRY$zd~#KR<7%QDffA{3G^Z~5l(DazIl*URlQA_S2bq}tnO#xez2&x# z%A;otI=}VAD!=QsMhBJfJez3|KkvLvQs9^hwRgEkId;u1eY*0s_zLzxmPsv#RGQ!Z z`ZUKNo7UI9>~z(%cYV2hV!FtG$LP4V1gjWM~SxF(mf$)xcg2{ zt$UTG_$TNG>%OTFZ&_|lLUOgx#JU4a567=Nx;1Wk%4^ zPI+{2($!14!GSZ*1=gzdJlQ_*=DLl(VaeLpG^&p4cWj?kpY%Hw#rs}4M;>Xrc3RFS z{p|QMegf}pLz=Z&5;6F`a<6r-lkL6MJ&a2t?nbL#9jbr!rhX&u7cZL$FCD0obK;oR z+&EK*68f$xA*vTQEb1llPNc@WmlXN$IvX?nMqph%p;71PQs%8zwp@Rt-s~glEvjF= zI{p3$C!a3&W7^EGW3cgRJzpzNlS|IiuQ|^6Gg$M>`p8wgx@LHqom?`nZuhGRx##!*^eFLe-)w59`xud`115X_0??dhJ=cwzc=d+g)mH zuNG$aM%(w}Co`MOkVh%q)|z)#8om2jy}KUoFLxPOTWhP&skfG;hhGd%_e)W+eHE)0 z<>>EuBYMcfvO$8lDHqjF-W#-akJ{*4S5hpdr;lFgI>T9fp>}`c@&g^0MR|Hv5D%GD z->}hNmHs*qVYsRMc0HHvY7T%SDV=HLuANSnD>ts6 zK}X|YH(Speix!Wmq!Fg}_Ddy=HUfhSC5}bKH#yPr=2)#W!k#bUKYMJL_O8e2s4F?D zM%lNLCwB9maZ;OHTaywqgF7p3y66_hiKQn$H0${@ht-tCAo?Y4S-QQhpYexz;P!9&6WJu~box_D>asyyL)1D{Hu-YCSLl<9fkTxoA9c=oF!ixcW(}jq8biw84-dRF>gtvB{t=B2?LOlD zA^G92! zI2+r)Qp+n1R*t%P+QFsqP3MjGu7@lQ@XRn53||-Oos_uTU3AM|uWF>trWawcP2)VL z=!BeeIx~9Crk;lJO*h{=bLWV+(}pH{Bf3o+x9c%x5Ao6v58ceyjZI2ziZB{<+3jg& ziO~X!}4{eY9+hBnL7Qf{?1()o9*65X;d9P;NioNPwhhc zF0(G%c8N~r(C9lC3=-X}x^{BDc4X7@er`UMPsfk+UORo`m!usD>r70A$A_kR@$K}N*ENOx!20!jBlW=&Cb3VBO_;=ic0GDe886pmT#^+uAeq<^OW+!k1w~B z)Lk)_CZ1H=lyVp29+XReY8&1(Yhm!Y0WrH)#-3a;D7>p#`!Wf~JZG)@L#%7DVbbhU zqt>aIPim;ORc~X($;KY4zt(`}%h}4^))YqotIPZFyTib3l*u3(K=ZOYW zcmA#1A;4|+&FR%=lzcnTr-72rqgt~DC*F?Mpx;ZVmN{zWoKAMECuM>Ml9Magb z%gGJ`jZd97G&PEue0jXmqUR-?tbA3@TzwqB@94_?GsBZ^c`kWqcriI*vC*Z_O(AYO zr#!EIfR@|_y*+JoTbS1g?Op1~hS;l-^EW;GlY~OT+&B7K{9_%Z7F}zC4 z=7v7Qvo^JPKPk<2$ltxT-}vmj?B>V)*Bpb|et8x@OKIAGSy#u7o;ka5n0{45rAK$x zerSH^*-W3|qep+4 zcF9E8vF7!dH+oJFUDUn)Fg`7ezZmPBU{-pFxUY5BhSOb21b?7~?wUBRmQ$OS5qs5> zoO~Z&s#R^kj!LcSxJ*_$eysK=w>=ZK40u@9QfqR>*;D-5)Qm3CGThwP%)H89Wp-@( zob)7U;!N$HacgPjs(p2&|Lnip4OJR(d)aWCxRamuXZt@_Ip1@|nfpWEiz;tQKl5o@ zc!hkb=j{VK&?yl+eWVfTH`GSReTSU*%b30o9VO9Ip z&-+#0sunx7aqRfeDk)kkwOYIwJ>ziiC8mMd(Vb_!Znp5Ruo=sT1fDoDbji4r{S7X< zln-MI>*Lzi#*eQZ2Jc*oh*lX-kTKJKisNi$6JPJo;$3>&+dKb{@mLsCQPWQ z^L&zOC6&%jz3VZHx53VpV<(1&pmPgJu@qgbl8254h>yuX=4DvC5 z8#-@5ViW(6vdW{&k5GGCL#W$ggmB^mOYKj4-VC3pb2Y1m^=2KLIpW$EpAI}b=ALc! zD;<=FH~OoKuZ`h?kDFRwI^OM*S$Fl6MAc0(9k(A@zN_`?Ijc04eb#0z3NJr%V<(kG zS8j)juXWCu-X-z*6Q{5_7B0G~&wGZ)`1|Slx16VIw|0X|MW-?=gPk9(v44I2+MEYN zgC=UFt83mk9;oEM^wF0s(f1Bmr8EllcNyUjU}_b;>aFWY)yE#5osQkO6*9$pa@wga zomAfM@|v=`&3oU;Hl7;YX2lHMvNPkQc;kVW&B7B7_E}JETE_+RdTLoX?|w4){l1TL z{)~B~?ND++ux3(b58?joftJ(tcRio8Lagylw_%fzo{g11hqfFe_8+KjHM4)?p?)4+ zwpF)G{?uotl~Vb4f5i8E_M%$U6*V2VNy4UMDr~Un{OQU43Q_w@-oDs5HB>yo- znTCfq2OVy(x6`&Ip8BU=_^-Ox#bck^o*w1)9A7)E>dR9akr&;RJKUJ?q-D&zo~eTu zl-u&Cdvf2?0i6sttX_9IV)$j>rR#hIm*$8Y&w4iG)X^6WFIp%)(N$B9%DVe@M2TxJ zPFYqHs?O~BD9(D}%_k%M9^Y>;@okG$&6bNNJ6?UeLy#5_Hc_?AkgVVa<7RK1aCAa! z+^z~y1NU{>u|gv`ex3EF__{TgwqJjKO#izXD^u28xO2rV=V5kO(ycM)1M8iBlO1lU_$F3QxS<)+0TtpG8!| zCyDO^QyL{qcTB7=>Rqkz^<#!M(=lh`uB-m)rQ|VeYbIv zquVrpBR{8YmTg=+x(HfEZ<*8LsPCIr6&?C^OT45V5MOWcL!~%_OCM{E&(s^+Q!LVY zw0OhaWv^^(?@xSPd9~UqlP)Wp#7^p~-*A!R2vsA|xw*DSEnCLV9+dd0!iSGF2YNr* zyQHjM$m*uk?vI=_$xdIb+t5q9yT`44=fD4CY{bC<2hWLgEgBxzm@r~u#=Qzdvx< z(Rmwcv|3QzG4)le`eUl-XiU5k&?B{}k;>ji>od)kO*_)eIdgRjQRP-Q9v;6oykoNI zgpWzXbw#Qp@AZG)D6`Urezn$YyEjy?+S362y`#@HOBgiP$33Xo?nX5-CLNk-qcksm zp;3&x!vOM8iGG^L~TjQT!t7Y2p~ z_qO?S&YJSOns`-8Te7O+=;gKUd8)5+4cc3`PSngVZT;12s=Zw~Wr2pFHD5kyPL|KepvojR=dg_uDSp7dGH_T$88H{UX}EFmrcrW~UMTX-lhV&N%}FKBeE9 z2ac%|wcKRt$RzzK@%#6$u3r1Z;}rWupRQ9gKeA=C*0I@ht~=_DZnfPOVsXlFqKy%eDtUT~%&iDdkmX z-l(c)IxZC3r_UG^*?hvQ?&%R%^)4sYujYOFZIh&coj1}3>~E-hv-ijf_C7=Jy>BwK z+wM43Gh>Zm9nZ(ibKmmk$^ODWtyc`!p3r;s>nZ9L4xRkyo8J1(mw_SY$5wh~?&zp< zw~^a|LwAx6CwlJ*tykYDdi~ShODuY1{waE98~?_A%LmhO&9yrpwi-40XshbI=Oy+& zzbIk$1Xn@qeO*{Ns|K#3cq5ISi zpC>n*p=s25{jN=yyYE&?*SnZ_p|ijuCv10ijk$F{t{J-Sz^p>w^x)`Cn04vvtF~d(LsIj`i<6;Hc%BcfMaldltT0rRSwvNqfTM z&kyEA-u&Qeb-9Uo)b%zsY+5xuTjR{?iorfzd^%aT>i2igO_hf((DV7Y;E26X_XzFK zfT~@ZzuYjaORk&jkU_#Mjp)mWAb>7Qzh44e6ndm zGpq65ffJI%ZX0VgZd9)Ngj)07+8$Y9TgIuy7yV|L%5}Nw`o2}an+dAM; z(*uwD$Eq~{!#q7P*0xhnotHaj99UM(RCYeGiuGT!GHD;?}^bht8y;P@kr%Px0l&k zEZ08f=X3tAta}DcC$9=gewJ=9uTolbfAda%Yzy7;&QEW!`ttd2uIumWu(nzkHwROj znHw9A?X|btwwozFTQ3|lZu)shMxT@c%jOEtZ(VKDX32;@%io$|8y_8gvc&jGsk_F6 zblWDl;b*t_LDZv&`POElS&?~MB#Ueab-%(_Z#!m~`8yqo#y&4>6P&8A0JnJ$jh z8gJgFvQ0C?NZU%5E<4iWw!V6zs->gR*3?b)gy!qx8y}rrJaCMuL&XbqTr0)i4&S}& z$-}jS%1mkUx;7qX>$*h*B@%p(!4@%>v8_gm9rdDuf0Ec&-OxeyGyU9r|)r1?&u#^ zLFq+AwFNuOF1}7VZ1Aw-Nv)Mz^nzDTJybHK*S0gC2L`!ihcwMvo$_SJrGydd8#YzF z8hd2Hj@}ceSDE*vbNvk)|GMg$+|47dl#-!E+-Gs+Ws4u*`?xvfNyj8{VB2{XS16|8bZ6%29Z)yE$VCuOE?T%L)^uYb)ERq@1cP>U2HTd3&!FdrQ>WJf;lg%~o4+d)lj2e?M8##BIyw zwL@E{Sqkga&|G;jA^f`8I<>*=u2%6Ku=KQ+%ZaKX`u8i;PuQ)STEe%K`2*@>yGUS| zmTnZ&L8Ic;CX*L_7`^P2KrgdN*fp;^&$f9_pI@qh!RU11dXopr)}0TR*cz2+d*!Zs z$u#95E1LB>Hsx}E!G!ZYC%^N}zSh&CxyE|8>@#;x=^0JY6Pe6cKOE7uh2^2Uow_Xv zeRRC8<0iF?((^x_j2}KIB;w-w;by)QI&>;ON@=OB(AVR5@;$Xq(M@MHG#R)lOVg#= z=YA`n-hX{$UCRf9W%V^*kPAr&bL+6gt)A`cVRW&- zsfM^*a0B}}YF8~f)U`_pv0Z=N^7gh7gD*c1s@!>yT83U&t*v%PuMM|QA86vKnQh{v zM`aI(h~^$km^#bPw62~gGu!#tq`HIBY6-v8b%+?$T68e(*fc@oSH5+8`>kCXEVz5; zPq*<4)60tP^&8#nZC|5te;-`j#a3Iuy+0z<J?L=Bx5X zvHe``^m7%?4xBqYwR~5lI!CeuC*K~8Y`F69(kBU7ZI$B6bXDqCKCH##2FXqfdUkd= znRR#Dfyc{&1=CL&&b&KZV^ZUI$GZP zol%v0gxczU_{*3wZx74589i*TxY^nY?puaD+CODQPT8_umGp;uHY&Zk!>h9kd~a@U zd})_9P_q)8kPu72)XuB(!_zux3=o6Mpz8?>wFyzgG80})m38;nZcQdazV!O12D4wWx& z4vw3Zy0Q6Gi~Y}ACda!^4a*8NIi#$gqJLJ~Vg1`0JtGB&*EBY)|HUzV;vt`;Ap41z zdke}RKNjQid6f0KF5`nH-tYM$#%9jGgv`EaeJ+;W^1{yT^61xE<*Q_0EoV3&b&ApB zE=OO4wK~w~wp#cczt8DT$#(7Ls0lnH{jOJTcTsff(&daJH4Y6Hh+I1!Sa&*H*)Byh zv9{On)|=F}X4tei?|_tiQdwKE>+SM|cV1U;j4r6J2dFAA}G zQ&BMD_KNWS(Zl{)z1!X{aAwf^?2X$z%w`wlJyPzAr^~Cv>a!y*#b3?#+(a+M1di>T z+8%1{vY;l#g~*^cW_YFcm6QbKwFJ5NmZ1m~8c=9Jp#g;k6dF)yK%oJJ1{4}lXh5L> zg$5KFP-sA*0fhz>8c=9Jp#g;k6dF)yK%oJJ1{4}lXh5L>g$5KFP-sA*fnPxb%EV-K zLQld7!XQFRLRo@>es2xvk$7tfv4l7tu26b8p*BH5zn=!^#{>k22z3?`f+urhlnZb(qjude}J65%p|*%o1631XF^yrrCS@COqK zSl6;$N?G8&jmoHBbN?3xef>hv6+pJsJ- zQyf1#q@Z6@13|<#vnAGjeHp}{AFL9Da_Y^eIDT(RLBEy;V6!6xW`7aIOA=(E0aS?9 z!6-A3y1F-*L0@wbdTbyeFN8rI?CCGqIl71KuBG4Z5K&tdC&9wI|RH-P|N|O zgkOM$5RY8@Bi>3*z8g?^{(Hmra<-u;UNjnLOx^PBkA1v6gt5-OM?ibHS5lk>_qPR=$I#fwG*10~oFpYJFk2RrZ=z7ILeQD0HC=rn+Lcb5r#`^ocOcaVho zQ;SX{MbC0-V4(#2ZKXV{53&$GGs^cLtO*MGH8fC<*yQg6k;Wa7EHsA-@$HX$0R{aU z8o)T;M&R4fkMi=n%foX?d&u6Dub^K?1N_ee!xu0I)Xzr*BYGZ9;Q9{gndVbZ@wj+t zK%LmZ{V&&!i2D3$23Enc6r(z2F1vF5F zc*N%b_&yLqdH8P77{U_5K?3}OUk+*bgtYmB9{tvYy%6qAv4i)anRSEg+ zr>IDg67K&c*;ST2-23A@rG-h*8FU97GJR@O9P3qa@iVN0kh>DWFdv`{zE=M5s7T05pOxu3?yqG1?gAAp zbkd|2@m;vQ_>}ex?^)!zXO)$C%;UiXu8sI{%<1?H5XKajFsAks_~nsaKo~)oM3_dv z9p7Mr8KJNU`oMRQr0GmrK70l9#J@?})R{DXWX*>U!v@mEqrCD!2Yma5Qy%X0&k^ul zFRpDbP#QMHn#HIeMYv^H9G?*_EIwIZ+z_^hKDfF=8uJRq8Ds;Wm}{lY4Z8FUbKlP< z@WSQuN8^yEFy?~3)DE*(EX844=ne$iK|k#Ys|e8qR`vwNI}kAbv3C&>*fSQ!P+mjA z&mi!|ANR05^em0=Jp}jy-)Ar%bNyLb{etF0M+Kep^YRnOfxQ9jRZt%eq`J&r*r$V6 zX=F+dmJ?uC)PcX?&JXK{2Z6Q24Ff1YuQus%fk>Nb@pXf>T^3^dFTNjO-uM~w6?~4* z>(5AIJfJV^Z)A}J;{o$Iv)5LN%R)9(=rDoR6H$By0sg@9xS>7e7dByD;_Fb}F^n<8 zZwH@4fqxVhp)J0j@%aW1;2rkBSOE~nxaRZ6uPZH$`3Am_*BXKLxi&jTX<5%1;8)12 z&$T%>4qriEg-MdmP6h1)^|DQEwwjCwa#B4if11E;habl|qL!TP zN-HkS{upaNLTDFuVfun@v6dE;r0x0TwLR+}c4fSALuow1XV4dSUVuNcJu>9oMc~@% z9Hk2~k75safY%ncUBvekux+aLKZN;?s{_QbRu%?f9$`BB5g)*Ql+PpPYUsTngnE+Z z6P&;DBJ4qMPT=yw?hvL>KHpA*@@g!PXOMTNg!Z`YW1mqiN5I=@}`nXH8MANv5-2K<5l zd3@MwF`=Mi3HwS(Z8AT=Z$g<6euS__F&VfaWGhU9zS)?8p5YVu5nFe;d?AiGxG)H7 z2)}LgDGzB=VYbE%qBwsJK)SO$4f4uj9@s`;Hs^=?=~;g37O%-r(8^Ixh^u_~N#UctQ4T*at#iri5Su-$s`xuO^|O1b^R4;J3@ttLa&O zy3DI@+=27;fb@^}m9+lAC$lN~<({MXbtLhGJht!VmzOt$Scgf37d~t zUsw|gO1LvHCioHf_CXqRwXAzgD*8WEjdD&CB(;rorl8;bku(mNK7Pg=0KuRuwy&4A zo=ZCSfvHNDLhvNbA0^f2=V2bOCUD;eaP75+p8bfA)uQ@g1a6xw9!>e3 z2n8WYdm*Ne!rlik{xFX--D5506YL=Ad@9Xf`1NH;OWOa+vNz4EZOAp85JOW+v`yZxXekf^;mZSN6i=zL$2h2_!XBY%vBo+T-)&Du$3!8-nAA!FguU7u}{ESAq(X<2mHwSRhIr`DT6fv zzQ}BbGXiu_P(od9duUg)ppE5GPn(_v5cqcIr*~2ri#*zq^a%dCl)!I`rExaI-IOep z*Bk)d;Cxfm#P@+8-4F85zu1rb41!;+A~3s13o#c=A;`1G!QAMVN88vd_9x^=`ON`1 zC*-$Q=hsJJJcl20$L){o+u0d}%_%=~p2Pe;mmqE5#`TF~RIVLCR()$z*{uX_8{9a2 z0QZFX&6jzd1)!s!c_%>4%S?P9DCoF_-k9wX!pDCmLH~mZSev=_krwx-aux(x2v@&P=94!Anu$1w*N2g01nod*hXXAN2S^8M((QWo7|_Hs`0M^_^F{KkFHI z;jRVp_yFJ6f94)gn*7)s_92`hFdNAdo}=>f2@V9z%V-Bajrn;zf!_w!k}(9#d-)O0 z*L<6E)5U2WfGju%amTVYGG!o~A-Qs)4d}2a2>b7m1boJiZ#!Ai=c(R7!eK&;gf=Ep zo~)!s1-W{W6fZ940VWgQ2cXaV2x}Q%HppI-1fDQXrG1`27GIE5W~iJ(@z2M6Jt2*? zqBs%eK<+${-+C_T%&T2Y1j4!2g>Z@>$)57&$!mYC6I@+NvOmro#pOIetmG2LFZUdq z*M1de2)sT}RGGQ+%sVrp|DS;*XUr1}o+wi|o9zKA5D&rS^ z%D1bueNJ&(*ZAtfoQrdwD`7L?GJ)AyQi%Jeg6`KOjS;3(7Gj-%zKa`S4dePR-uWPp z5%kDp;D)$Q{gnvg&yvuCfPMZ#f)8N@0Y37hYa0LaGF)A;IL?gx@kLohPOvfeT_5HI zuCH?M4wx*+D{lLmqEsZtOn!So=uVb1S5e3TTXOF-APc+0;;s?Q{>53AqR=1u)FyEC zBPot?gELrhB)$)TSIETm3$9GX@oZC+dX;rAi1Ab$*8%7bd|@r*+F8>4&z}Ek`vA!J zGf9@cAaqyIz2?v4owW@g;NSZ)drJz7-Jz zPPjPf5$h=TZiR1mmc|~7d)6vWGB92w-TyQGxgpjx#d&~f?d!ciS3lC?#q~`1k0js! zZk04Ske}B*GPk||m$GqO0}2f&G@#IcLIVm7C^VqZfI$E3W5d%->V@<;Q3ufw!rwi zbe2Hp`?Qhz_i1C5@6&ekze{J>iFoN0e%h0pkzto%mv=hL_`B!E->0*Tv%X_E%Q%~# z{!V}_ot*E|+5emVPQYyS99~-dU7DWenHDR3|NMW_g74cG3;F3e-?c9mP5zF5u}Hwu zqFlj6WWa1Cs7yfrXQ`wJjKvuOJFz%hK(3G>NTGNNjGQINjn${9NI-p1+E|=ok4M5R zs+vLHs}zf}(7X^{L}`ItSPs%kVxcEBLO;YI5(}xSk~qaqoT8p0P!fqnxoI+z$T&+? zAQER#Q%YhoipVZoT`2I(qNbF@WCW3Cwt9xZGn>*vv3Mc0Wu;Ksi#nN4N&*8Zq=SVbPf8O5l+I8Wi^Eeqi77FuM_`BaL-JdZSS(a0 zhO%IyEE1ViNv*3xeYqk-Yn~}8xoHwP1zNVtP=d_hp3>O@oh-rEK{;PcU66jW1@i=+ z^jwIvSU^whNC(ukC(#QU=|E zvUAhfC0Uw8pxj*F!DEUUBY@j|B~mL9eCaEFO_MUe5viFdCAV9|;AK!Q{s(<6`IQZ% zQ*vv5WdrGNC3D&On)XES-!z#8X%YSdkssuS>_7fAne&xF_Bhus{z*_wkjwBtS^tRk zHSZtMzNWu2{7?CRMEjceRpb9W|LQ{jq;vkOif?)URoy>j{=Z^1$Bw!37oPgf~1L3EUmX*OO#`e6X=3f)8MRxJ1Br+I}~( zCV?P-EJ51$voQ~#41D2tqp~Cr>_NG5ONwJ3 zlb=7J9msc-;6iXEY$8Mw<`AG`gv{O}DJ~DOzq5zmd0{?^@q>LOc*bu*AT1ApC)AY% z&@;-T4@tW2OZkwk7lBK)DY2Dsk%0A&$&YyedXjEy415SaW3=MZEe?%&n`-_i)S z0gsSHl8w0aFqW8X(tbk_KF=K=T-`9ggHIrT5CJk_>_c8j)QU2>a{%^)O(hlN<#FZD z?{{xisV>F{Y?qe~;LngBy2^{7Q>G_=->?Phqpi9GX4_5_mqrcffr!A$o}_qwR~KEcU<2EPi!BdEv+IapX-15=?iDv(*&ji=mYbYH0nwZn9m^WCZ!;F zU_3()d9}%J0(CKNxN9fkXb(2U^Zbad1N=EqQoVYViS?c7-~h! zl*K&Bd;q-5!Z&LQcH{HT{94i+!RoP)%>i6LV$URn4JnhWgUgieLy)#6O`vB?U+{x| z1U^9qCO^Ww=2(7RX=#jEw3Xi)1b;(0%&D?c9V&Q%z~n;cPcSC%3FCPVft7_0;1_&i zb^tE`-=_TXvZR>~^0F;kGck_jp=wk(guvuNhjCSF?GL!;4et6|7;_Hmt2#x-5`qaa z1g`wEC@qb^Ba@wl@ZZ8Bj79hX^a_1*_n_L8{uzWm>J!=$Yzg_DPrw_K9UPeeIU-C9}3cTVbtNzg+GHg=m2vpUk8P? z?)<18*xJP9558NNgmJ~!0n){RAS-_yV0|dec^YzJyzzBi*nOa%f)ICpVt!CqdqXZi zkQePjzdy1U;!7%-*Z-#Ou|LHA6|(<7+5g|vv7#@91{4}lXh5L> zg$9bX1`6R<5mG$AHX^e78eTH~8b*Hm{fkJiZ|TUdVR$$%O#PaE_$^Fj;hV4d87%yk zpV5l)v+U@XH*$2uDLUCDRp=K;MELa-`q>DPz(Zu5k>M#6l*};B$g*1~63o%b$T3z= z70px65bNlqrr4=uczWr)$uORu;Tf)zk*y=jvP-2h>M0&}Z@e;cR8r;`e+%xX(t|o!zq1=fIF7r zOt=S&CXC}T3BKV|`20z6CcNJg5%75lKFy$v2mNu+z$f12a%nduV4LFdPM^ygH;&H(!B<*w zGbw(EfcfcXyzk_7^+QQ}stx{k5}2=;Qyjd*F7TzFMQFPOfp2?x`onuf_Sw7z6kkdZ z5x{?2f;7Ur2-F1-mxoXXpTFV0$H84bd^NAOs?zg~gcIMwZWKa(7XoZ>g)p6fxgUE9 zX{1dL*!u&70N$m^Vk_7fymEQx#_?Vf??L#|Tqa;F^Tz>Y zeMi`Lpg-=ofDW)1gdFHQFT(qM#xFkqhVlT~WBFKj0PGPo2_V?zI6+9@6682Qh$O%U zh=WII1U;i4EeQos54tTS;Tf{P17t*fNpiBb5uz;GgI?fk@VQ+C><##J;dlN7@DEHR zNF&w<>PRBk9eR`WISOVw@Fa;ajv*WTgG>0_9`Ys-z~3kWpV0Oi0-j^+HX}$PZOTL+ z(1|pH?!Y5oMy&JFY{%L{yYL;p|6^>`B!~#`l_mr}!7j`W=P54fbM>$zcmUw{(um2x z^)p%chs@I2hD;*}&~YfCK7mj0-R%UF2_e82_yk*`5A0#{BFKRHuq_Dwf$@=-?cghD z8+MjN(4RK}{Dl+R5%>h#A0(hmFoEx{&?))=_+yE(z9Z<2J6EB9jA6cSu}UZp|H0fO zjUXG>*QZmO&j;E#L4XW<30$2)uWSxSh(6^Z_#I?mb0fbl?8_g+{4%i93Ig~8;44iD z;EjWBg-%f*%)@U{f|X$g&m`v>$Fg=nZ@`8KAd<%&DwR z)P-ID4gX<%^8Kiwb1G}EC_>C1%mxSxI;R!|@8}bD$9|8=0H1SloFOrhdbYt`YzPo!^jN%suZS1W(n$h z3ItgWIfD9j0ztNeSm;3^eIhHx!3*G4q)?*))SB>Hi`aYPvq+BL(gh804nbS8P+KY#O~5#S z?avY<-SeYOjFY?wcZl3OC7ge7&&VbCxi4V}VHTkV;UpoL;6&j2Kg#2N4DS}u9?rSo zg-_rOb#ZRQ`5izU=NR}C?)9LP288tl5blfI2qy^8fhmDcI18a~M&JkSA>r*nf9Dh7OR`A5-0b>X4^9ecuAB=FfxSilb z;QELorDF&)2wZ|MK!=ia4jnW5O6wnd(vIajs!yp%0_IeULJm~PoY00)qW}$5r8*-B*yBtm#zPQBK+#1O6dnegv7Mtr5^me#F)qW|#cROL`7F7Wq2y r^=_B$S3L=KUlD}SQjJiRoQNbp7d(P*Kd+xpG-kUf>eN8eFMiMM6EHZ5^HDfF+8~_#;b`uc+hT>)Q z*%I>)&-t<5V=OFq66xhre9U(OAV5*n_FLuY9!hC=MEWd+8(d+%`*baM~7)O>NO znfp4gpuIFdykfLmZsVf0x~oI2yj1_Yt$)`>b^G2tltG1vEt*nxB?tV7;wzyaeFSqP z?ww4+^+*PXIV7F+HCY@N=DEr>23$q{ww2caiR(*nZ_jpuEvPF%6~*+SU8F@3=zPf zOItX^nT`=flw7B^h4uR2)4Y>7*^Zr1=6}=_!!3d)Y}>m5j$e=g00CkQEOyBPiCEe~GTh>e zHm(RM9*~`W2UYJu6=cE&`R)6s1fF!zEH>$g2mlrOKMCjHKK~c}73A@c#3;nZ3Gp0lf$F_F{D->QaBIwvX9U|3 zaHIYY9z1&7SR_#Y9wF^`+CMpW3K}3R52P`c3jd@0{vYjm!DMQ=VU|RF9PUj)Z^|ua z6%rVXnp>#8S{SQ{|BB+D{fW3BazjPYLpnQB_3HmX^-}9F#~5xQYcCu@Qtfj$Jpc3! zh)L;L4jC4m{rwKQYqNdx;6Jh~-mj6`<#EfmViIa0oz1I>9%(dAnE>*r~o`< zII7Tqn|@#Je}ZG#ARz|>Ub1!evNZqXM$_ro{JJ~Gdg*FKA$qg8C;sF{sH?K3-%QML*UmVHw@MK z>lPq~N;&IH&H=}cjo1I+H}FB)&;wqrVx&7{-!uPVO(*4gBkYvo_Ne(s(z1Z*tADCn zj*5KE^|`ST`kzdHBhI}0$TKQ#ssD$+O{LTNFmJ*8f86iu@~p9x;?>GO%Sri=is`5f%AfDl)_i15{TokNw_$?G0qjx#Zvxpj&#tdo zMeAe4A`JgYBN}bhdt~3AB;pA&ak?q{hv`niB$@lr8P$3uoG-iuu+Bkk)ys@{bTFOJ z{l&F!nO!NPKMZaOifR=btBj#!KT@3fu?q3kqj7-TFU5G**ane}wy>ml(p!DD&Ewt} z_~*0Fs2+AINUAECRwY()NKx<6q%Sw&3yaOwC#F1@n2LJ6`}RcELAMZ-Nz51B0-s$H znSh$w$3-EQ)OQ7*ScI)bj~6YLF=q9C&RNi)Mp0XZwj*X!OFdn< zUf~e4=tsIs#UyG8#MRO5Qr32IW^Z%tOI)iEecQ{Mk4qvRNn;S7&(Lw^Ojp~%-^TP4 zUghQiL%0$DxrR2-{$#xjX?8aLv=3qrKY-&9+(P8y|V>qEEJD7kZ z5`>%|XnK_kOnHvHr7UE-*ylc}@q15`mM$*)E29|+z_TkWZ;e>9qE?Z3db7*E&)&)b zlFE18z+f$lX=BJwlb8IdcvnRtDko`~d&FIAja)|{E9@le4mx*E; zC6xRPeoXe6FPC~V9s-?sN6hBhM!ykuDTRsYRzGtL!#0_WI^PWkc~aixYD^B6I$6PB zp^Hk+krRM(THEHx4{pd^H-Pkrj7)r$!U5H{NduGO+Aw;{t_fyMA~7smSSeEW4{TD_ zbTNv>#>|=!!E$`*MzUw$feeU@>aSW-!`!;LY;F@tL%T>QCShl{2j<^CQ}tC02ejo3 zeKHJiG}0`PNJ>OSQ~sV$C5-ISZC6e?ieU;)?{$r@tD)496-(cqj;B=lSoaX4Kzi7% z-{8sWvq=UrP!4gB6wHcNW%AwALj|%gg|{G|oAHpL0GU5yldbm1F$2>Ep?gHh;@H1u zPLIh~UJ-(wbhUY6C?)4J_`L^NlgRiMi zrw5Sk)zwGr@JWG!hJ>(f5<%zYL*82-8EjaO7oU{daHeO$vE)e#CT(5~%vubf9qg?3^OISsRPRyR9xhP+x#h>5zJIt-Bp?NuG!-lql`=DF7`~etTfNVVmdH3Z%e@l^7+0BvHYUPLx!8Ja!*+alPo+XAf06!SvlRR zL`K_+3e6w(Wc|Kr0D0@x1BgWKeJH<++e$+XSk3al7=K8xG-b<2N5F|AB|Rm*G5g1F zho}YKSgxK@VN6PC+g-zbMeWu`nHMOWJ}ey5+FF=C2Fn8$AblY3n)>&c zQ=Uc+h*TD%LsfW;&P%H-1Wn<3XrEwI>f7#l+=21y#kLz%gVvH72OE`VKr$W1k-@@Vjrq`}(8dn&S;nLT- zjZHpM++L3$)0t<7L(V}huTB;kws~K-f4gS3{%o-gf$pYD673RJlghv;H3RYWOBid6 z_}}4ogB#Rr8u0T%6WdMRC~9D(Axkd z@fjk)m+cny5DF!6xR7P$%&+6-0F`793Kq_LuL+hcsqnN)Wx4Rb8JFYb5Hb4xQGH_S zhB!v1jb3SwColjjV6^n#N~GwMe}H++jFNbpoV9`4KU+$Y`0&O#1~)=lVuwM8 zJT7$>jaQuM#tWv>^zqM&)P}w+;)p5&;wv6=idhdYBCa9_P&67O@Mlc6KdB}a33({e zdyRcbpOcDAYS56z7-m(XMSHBW;?{4iznT5-p@KiG#@z!=NfWFxoJB@fI6R@4_#Vq0%Uqvjr^` z_h`Xl)?q0pQ0gsTTB*fE6`sb_Mt{b9Z?Q%jlD8Jr{x+4Tgx3Tzb{y<;wJ^Im(q^7l z>W^-Rc80e_BYO z(xIX;#>`zQS%s1?DhDLQ@_br9*Xq8`e*05L!0h^WTkc6pQUY|g3(2BrJ%?qLHiNR> zK{(HalY6&ls6oztWQG`?+KEpr@xe_ur+b?SNMM!$A~gtA_@~AoVN@`}hcQ7whT-gm zUY=8Ag9$zo=i-^~-5tQ;&qW@{oc}@6y7lI)Y;zpi`B=6XKr++ulzz)IsQHty}qydJsqfcotiKBQH#kFS~K_QV+v7bL1g@iaB!Osg@Pb zjG~E9m8qI8o0YDbC{SQINfXb`b`XjS!AlmhvmGUjYcEm1Y@u*ngU@1h5OjGb$n$M; z3r=5Ezsi6Y@wZIJmPgWEu3QlIxyZyHoGDGHqO4%g#yU%Fn1MC7k z?}wN01=+aS=Uc=){+H<~os*08rUJ35=%N0(g7j%Ajwczp()DhQ4UB>i7W|ILlcaXA z1Fa1G*%O@%#vHIg4{K=2X9#qWUsRrqs+}?3MSLE%SiEExGZ{T@K1Z=s2@WU44~#e5 z-VH*fNpO_^jKF2Z?Mj|INCEGS(a`9;&kTgeuIZQ|nLb2!Q3Ga)VV#l&cyw}&-I%Gq znz1liAdD|^G)s9eDe}eo6^(_fa`p`!p!17fIJpuO+a?4cQ8>kju37C&?yJ%v@(eZIjVYa&Rsk3)xZp#0e(!KltgQfpbXi#@m*hkdyDW zJa@A-ekGV;X8Y{r^g&AKUPk400VS4Aj`9q&_h>FwPTH@$u1d=6^7pPeDT&LR>2)+Z zt7bpd%0FF>ZKS|08X8AE2|MD>LsCm3$5xW&Po9aeHuo+|03_in^Tu)8zN5c674&a? z`$dOGFLU;}>bm`bPGOql6#k_oT6c>fTLM1}=S)qA6`)>T7>L_L955Csj@+3i{>{5K z@AbE#1`lTL9=mQ2)Lw8;*u`7YRrG4p!@WsV_GB7NLIuu%S* zT~;=Eklkx6rQ%W%MZh#yUmOGrA%PL1yevH`W{XB}^Kq3RHu~A@N5}Ho^5We>f-i0K7 z$5&){mcz1C@_J0P5p%b;Kp&mX8$3!KFru0uRMijfp}DnvM11$IIej(GsA#>{l!NCr zd63^r+iWa52T%s545&O^5-M6xyemTu5G6LE4ea`%!u#4X#hG`X{($@PU{ulVP-_)& z5fxFRx)!jpkb2<4Wb7!6RuI3aQG4SVdn}fW)l6F@~-kg-M1q`gSmdU6j#Ttd(u6~l@mZMcTN2K&)=iFmr^ zC0CLZ_}Q+kW&q&}R+lhbHaP79N_y2xtYMf%JbTJi^Df{}VC{f<>E)^U&nL^fuBcsN zP%OEmrhMcn%Im76H|^{mj7jWXKdlM=I|+&W{9a>X72fDiNxW&7Z=(ZthZ}RQ0YT0O z)Jk#_nwf-3T4YHJ=_B7Lmx}y?fVS=TgQ}ssLk@RBqq=(A8ZFo!Py^sn?9D>8cFevgiIWF`ZaNptVi_{BGhb6Y5H<|X{TTTo0Jcg(;(MD+|ALPOfFrua&oG#jT?=4CAAWZ~fqb1X!n1U-{P1H^^WHz3n zDg1`)?2*aKr}2e-Nq~1WG=x8HbF2*Aj@4Kr1=ed{__Ic3p3K zZ&7|NjLHB_u34cv5d+zrZMk3yeeb+3;tsiqI}^Npd<@sXml%|FR>uUiIvmCa+Zvx1 zf4{(1hDz{H)by((m_~LgUXM&DMu}5zPgz7xukl#MizGr;4o|kCRe`(^pHtLPXLM}o zOex8zAQdLvHDJ4Rh~$xU@TBhP|A=1-uZ}wM6GKal^SuidLa*z_q;OPO!emXy)quS3 zeF6z-bte3~&2yqKIiUQUzCTr*T8AFnc@myCFSouqg-}j4XM8i3Ijam(_;^*kW@fC< z6$g8etxjKy`-|u}WvOV>&E--aXY90aVI*yAb9Hd_mj`1|2J}T+!I2-Q(yI`ixW~$# zAZI6qlJelR{t?~0fH+uj^f+ zXs7&9f!cof z3cP29u;(1l#Si_V%QAjg*vV*w}Ru%m!VtpyCTzG38qGzn!_j$krW=!#-v zc^|JW`Qfp^TYAnKS0mX%2|=p!%wvajappkbF%_XSg)`zq;Pj8Xz_!I zx9v$)pcD0xxdd=v zAd0N`$DeG1>m{U6*$>0g6|?c%d4UbnRXXCzILBBf1h-kba)kZYa1#aV2BFf_eiY&W zQN@9@mA|A|a`b5h?e@3I$N7=0>SGH_)aOrm=Y&#Qk*1Q64o$soJIK@S+30tbjeBm9 z&HT&0>DwC#Ev`Q#Z&uRAZ4tH$rUXT}Ei~Tj-U3e*8+Q-&P-Gyfa;tWqy+pz#^_gJ- zCMTSrJm^(nNl)%)YmrAW^G7zNok2A}ec_#`oc1oWH@X(txkR@6&#~7`bZjvTG+rdT zoemZ1Z8uGnNt7cxYKga(R}kgZ{=!fhUn>fSUrDl@zfJGD|v93FKl9~FSOW(YA=0&P3nIrWGz2(D1PCe4Ejw%?i z?SP<+=lv(y?Nx|S_T31#uqM zmwx}q!gQRIHG9*xRLMnlugMX&zx@?j6EFUkf(${yEA|bHi+{$n`-Oq;crz*~A_TQK zVt;QVmbG}QT8Q&wPOr^Iej~)0-mmh-IO?yNesNAXp+A47n3$8kD%@`UYtZ@-!#xJZlE8A}t>nu@b5s#VBp5hnMBaz zLbCrm3veX5uk}bk@WYb`Sz_4le#5a zNal~Dxhf7`W|*;xo#Q(?*2Su+uwn0s>?p~ssc5aB)X_tqlNrC*t1#;O3Ob@{tLh8( z_!C|1#>{NR*F(7->!7PCZ)DhEY@HXkd$zjG+!ucutrk7rc;>cY;aXyz)80+UlK#k| z8avMkBrV~9Wd0tSR+9Fao7860(}+jULqaM7;6)pZf%cb9*!QT zRLZ;{KU;0C03Sag=omx>xZLt{#xl_Pz~7F^U7pVezn+@BC^Uf84mo;xz&zK1@<($% z??>Ia-8-9cjS_OhddE8yBAL!r?Akuh5$53K=rVpnXck5|@PGELY<%ih`y@l-myiBc?{IX1Vi ztfo~Y4xB|G;@2?eBCZs$g4A(rt-c^%IKArcdgDsEUSa6T;$9#*7B-*uxXoO2=AM%- z`LicS%nA3h6MBu^vaiPv{2yW})^TS)Q+w``!oYs39wWi+Ola;33ZXm#JHu(@f}5eg zOR7=u*HrtWVbY|UH^DgfD+8(ucbYZA?Np^vK)+fqiUUrL(1j9>VbH+vGJ|!OW%T@=mFUQtY$*;qRXO~MQR!1S=z$4eix9I9cFRVt2HOK7`=-Qe(cr{MCj zS6nxCbLwHzvc`dvC?{lUy%Et#^J~BfF`Shn*hYlQQ1)wM1F6gFKHdG1<9!D zcPqo0JKsKO>;r1gH8xX;=raEc&2u+G>UJHnGZjYG@JIO zq*|%5nUAXmr?-5Q?64l6>N$``HPNe-w-nuSP`dI%Dhgse5(q9V3X`Al^JgJ4DfmkJcQjB^0DpI zhqhA41(xD?=|5YRoj`WgSj`#;m_>EyY!buF!HU*w5S{k>MlqJghQUFO+Yb_*UCbTZ zOAHGsHRTTmz&(%*;dN%viKNv{7C2BI#av9~iRKl*XaG5ND^}LL%+E8nO8t3nDN=BU zcV52Cu_c7kZNjC&(mLQpqa&G(QXM5g{-7N9OYMGA_UyxFCm7?178~88M}@{br@7d- zGV35Uo#L$WZ-Y)9xhG`Jn$Eu1Eqfj*=frIl3u`41UQzPQ)7*R%5|v8w$<(|evL<`h zO6-s1#-0eKpZ>sW12dp??iO}2n8jKKr;S^esy5hlge)NWhFa{tCujB=%e(?p13nG- zMZU7P_djFuezTgi`2{_0N7A8h-o3s+hP%(5@VRAr)vIgsZAftFl=aTUuZNDf*=X35 z_1*R&v){pzaM}xg%5V3eC=dFR7l}2@@?OC?hxtN{J&#ubM z)Cwqpoja&qLR`8i<1h&Y^1~H^`pao;+bvt^48kTWEaz~0NBQOjQ;9VM0#Dp==?~KV zg@A+$#P(I;Ezt=PO_Vwz2h~b}i)Sq#XlORaE7LAq&3>A>V&aHR@Y;CZ8X}CvU|UR3Ysq7tQ9()))5^+2fO*lj9Y3pEj6?1$ryUbFB6)7m)LnD zRME3-!!qF)EzlR*Q(fFOxO>@J_FbMJ)^W(kwxu?{Gs|>~^7G*RqTioe0BxLcDih2L zkcG|u%AlBb{t|~Rp_i9xp}mKERy2GC!u`?=-bjwH{r>nSpBqG*%oha!^)Jq5G z6oWOrtIZDD+nLQuT6}Qb7E!r0qJQaQz%cZ1*#R{DW!xgShaSs7w^$w^c}^SIeUj5k za&kqS5ax8$38}39-s!u5Op%wf{q5ui0*|(;uP{q$2{dXVgP^lnLUwN`Mwc2saZCMJ z-35}u#@Zdyx0`%knz&WqJ1? zm}AQ8y-?j?PSuc%zuZQ^*N}q#Y{3;3k-n1$m*i(31j71S!>oZSF4#e>IBhmrJ$0*T z+>%=`Qdkw!6F|GK%!p-@VO%gR{Y9U>=6P)PvU3vwX>-QZ(L?4zU3lz9WGJ>gN5E{P z_)nOnk5E2&hX~v=7gO~(>;2nAIofnm{Nt0Qn4z|%;wDDeb&*|p2&8M zo%7#_P^U%d>@iHmKEtxPn!v7RR@+X1*D+Am%`Y=yS!gMDUcS2$f0gO&OcGldqS%rt zIL))mfmunY-1b5uEaVDTdhU%{cywUJ;Ks=Mu~(Ec_f(Zt*rVp^A}{xZbTbM+YpZ<# zbgarebbot2q{6SR`Rraj=PxmF1IRAFQ5aY{~vlts^ zajV+gAg^(U56(-S#*fOtdrfB$>~D4mvF$6V9~STRtD4ushLK98R&yeYT-WKo{C$go z&F}+`@82f3I{S<-Q{6Aad=-ynH_rVy;e4ve^Y&U-=AXs(F()dXsIm+U@Kn*2`tQdk z9r>pXw)oh!qv4Mz^lL+>vAiY)oY>OW{YZtj7#BzTJEI+2S)Vqql=JlXOMRr~;b(O9 zB4!$OCvf!U5xK9$laM+4>oLxpK@yj46!_V!^!jqJP2p{%#jHSdu6Vo%@dE~Iisa`q z2ReD_`V&CCAgR?Hn+JB#1lYAZLI1PD3sKY0UP$gX!wh$JQ^5}WDIc1m7W_b!;(_x* z=#^1+Efa!OeM$10P+k#)+$xSMS0~!O=<2NHYeM*kJ@NWZJ&BbeYP$v1(N>r5zgxdq zhVqoYUeX1QFGvp!mhYtaa-u}`mBky zkDaL75FZ*f?XpcQHq8t%SyxlxUC5CQYf+UCMYli`q|6VpXP^AUO9S}f%;$ime`axt zzW;V&oM=&g_zV*B(1qrQFcosOrn0FG+RvAD@#D2uZ!`YVP(O)hdSCX)4ne0B|DOjL zmKAmjO{t7n4uzg7E=8{uv}KnPSvuah7`(!t@LrT=+<0Qo_8u7f`PE{8O_hO7l#SZ2 zv0zYu`YY)blgJ46U24snr=aCYH}PL@YI==Xw(A#}ya1R&Q$_a^IG$4TS2jc*0ps3- zHQUx)-aoVVmFQe}#xpk9m$DnaMUL5Mh8*9_qY};@BN!mrA8O62v6rf+L56ZV=ge`i zlPPE6rJ~8vuTV^b1m=F~&sc`Mbk>U0@}mR#BPboo1(DB-vooSAwr^LojU7s~yDY;}fG&(2CXZoTX_)dgr*FLDb?e&i*%;*jH|fbD zDixO=;1CiiOW%Guq;rfdp95|;5gvg*#@*wEghP=Hv(9fQ`t7N;6uC}bknih}ym;3l z?j5@9@#VGgOs^4Q7V(Y2Z#r_i5^1aees8Jc`QqfRO8H{ztVOAUj#2M;UCkf8yT`3e zG7V@Ez3IWob{q!pN@Ua5fkfbCJE_s6sw8&%>#;*6s!huPP1H%(w+{(to(LPBeQK?C z%&9+MxweW6_(P+t`5S6(SS*ZvW88fxFPcj(a(A?q%$Cf+lxO!thg;JHF^v7g!rj$2 zh+1`hZPMD!Y%;<-VbzUHP$Mh;>k_75rOZ@ErF=K&DgMidq9+NryKGr-Dfxp^IyU2m zcTxxJYVCGWK4FWQ6*G9pckN1MbrXUjeR10|CUEa34;kvw$4i{_-;-OO_ydx%I!eIo zN|3qG_1MNsmwz-q$>dY~xQ*-W_8}0N*S7Br?;WSTrZaZ#8}lv$dlsT)1fBsA%rRPw zUwxIzwB~FI?~%V4KAM+Yjt#`w$MQ_In!f2l3QZoq6$Y_4*FdlffUsR8OAPG#2C@vIXbhv}ERz{E>mbV5rvrB1H(c%@hz#RGv0Ywtl>QPhRx(mMm( zCADDax~ix(kpflz%tftBti_Y;0?&5AgoEmm(5I?+8~U_ck})zdTwI;yls+`P!C7xR z#CsswMSRT({)Lq7W+OIQJ}237OLV*)g=5&)m>m$AdhO;QK8-xgAim*nkmnQCfBKq4 z$$I-?I?^33gEJ5L3ihWhbQzSDHj6ERk|Z?K@009K7~7eBc!!%xjJN$pVXtygkbh%= z{L-U2Rs2OXYxb-~<4;$P^q*F9Hyqs0o)0K26%}KJe73z{G0+uQe6Aex@S9%L9iDU5`FJ&pOIFUi^(WwBMDhWiY=pe~z&w9W(yH@PY+hjAUJN1f7Hmej3Mj9R&*fZe&<3{OZRB;Z#$NhrR6l z39hfi-T%F55Ar+e`)p@<;yiqvZkGr66DyZ-<)5lSUT6yK2~FlUxiRKVOw)8xPCb_d zHB}O19}u5C*mD$%fNV!mJm)p9J1NR~{wl^e0&Wl!b~?!(_;egtjuz5GCFS}ulQL(- zEOM5TL_y}Er)1c@<98*`fLHve*=X>tyK$=8(^o(1#53pRO?;dv;**6XD zYyn$=yE`M60&yqMwUUONaZmMB0 zrxvO}+=y4FJpTCiho`e<4RXO_1(ypzf}C=g^&d@B0r+I>L}cOMxyX;3P9j|V?2xPNz&ys1uLBUO%~9D*bhIW~uX|te>C>2? zR#0;8qHOcsAZ#=`a>S0J;}de!{$y;=;KIj&DOkH(6Cgtjr`>wJsdN1E#HTLNzZG)A zeVjZ`O>=gS3*Xm*E1jg@D7;{y@0%`LVpisRfoNx!(3NZ-VWqo>WlAZ-W5C&+Swe!h zO7NEhms-r8oa_2j%@V2|#T_sFsR4a+_qmt8tzk~G42scE0{T|&VFx^qO?6JpKGYi- zZ{4mpDc?NEdx3KVy zCnt6(>_RT22Vb5FqHtTCof6|>VJk7>sgDc4Xt-iuR`ppqn0wBc$#(0({Zhcy@-T$< z#)9HOIhOt%cY}PKrOtkjEWQq%{psDj;!HlfDbd7I`N5fX)7~QwZp?XzH6A5iGq|aU z`8_zbRLLiaCHzW!uy;6~0xTgSf>MV041|+?u!*RTq`{GXA-k0Ax5?PuiGNm~!urP* zMJo0qy6fwuOOMO}(CW?LzOlLmTMAH~_9Sc8&g$uBU^&srlIqFk`FzK4rXzNi$>;+$!IV+w5^aQL?^^@z29%Zr})TBvu-FEaTsOD`1_mCPJIS?=8 zRAz&~cV_kNYLgyUY_a+!eEyxTun*jnsKjN>u7Wl=RSMKBKH6G?ljKnZT}8tmJEyf! z?c=dn#6s@jdyU~PVGayc6$vw|+kg2Ph?sWzq+MOEzOm4SS#UVa$Y!`Nz`)fM``Fup zKQHt2B4A|7viJRvKMw?MeB-DXoPe0qbu z6!1tc@|6z-3Z1gCvk33E!h6cPR7AbHIDzdB9^KXHBS?o~-QzoWQCY_&x?cD3Y-uIz zLcag{*0 z8_3}Ockb?zkhoBj5Q2!y2Fn9+a@q)|&ZBkorkmR*-glT&)u$BppxIMevv87Fa3w>7M>*AtCi7EhD(wv86iC>6)Yg;; zLb(u??Mpr)zft{p1COz%6m01`PqAWqDj!F`3TIR%720jym*2qJ6LPVV0yWWel9S-F{SSJ1F#QOP&H`%$T*+XPlK>t*$)D}1i~ zzzj;~EL>}_RP0;!>`1!4Gv)`G3(1@c=KId?x9r#J>XI*dE74Kk3KDBstm_Qhnkb#w zYy2BgIr9maO&x~m`r~Q_-_a4UN#D!hutH>?ilI?#K1|u7gQGp;3PuT*Hwk;dFIBz= zEVtxw9F#b`wUNW5CaPN%ZZ?>0noxCueWNqu%Vy@yg)UIx6eM6NGKoA* zI}J!DY5Dzx$7UMC0HHU(poY3B%d4j#n$cE#hitkpbE+{N+Gg`x4dA^$ zXFJ~qoX&`p36+4L<%BaPOhHAdM$#z0QF0rzEfz%K=c4BMjB#IYe8A`XOQp1#T6T&F z^NLoZ_j0K*hQ>?967K$3L8MBXz%2o+gb9WuaXlL>wp;X4nZ|;HCDJI=;GOLSXCAIrM7BB&l_OV%#m4z z8qlRLW+mOj_cLp2!lw1}y6kgsWH+as%3weXi@olNIMYyS!za-{@~9d>SU=SzS(gos zbj19ABx&S64*e!iF|Ey9v~b&e!^G=W;8=e8T`hqs=YnHYr>>s=fc|Pt^j0PNmEP49 z!cCha(S`1e#ajryDrUc>{$okl(j$*X340VkKrA|_#2ysvQ2F7|( zu3_oBtXTBLs5%NuRblD=sKBh*jb~Til=Cj)lQtIAQ?Y6K-oSC1xzY<`OZ0-gOq|zO zyJEW!70QL*&Ux-J#{{)-baf2v9>4(BcQ4zcmT_*#E~#5)ke*sQRt8u~Q)u2BQM4=b zNE*=Vs;~Wnbba+09lMK;59aue(~GCacMEw->vw`XhW$x0&EB4CcploGJ^I;$hAVb1 zn6kPR%xFD~@1{b{-S5SO({DtNWX}$cohzntWe91?BcQ&Cw3Cy~gwSgP%eQJyuUBd^ z8WK0@(9N@;NCrV#qY3&c5v5wp)iKXQmX2}c=!7A}_08gfX-$=D(!_X|GAxA?Q?wMK zvv8m3?29-tv~xHMJCL?;Pp^NgOidQ7t;ihIr2$mF6Tx{ZNv7_^`fZV`lhoa1Oc-zc zW8w^Fy6+Q-+yzt8viR(Wg*MiYJk!)`TXvK)uIb)(b44|L;E?==${43d!!1|VF9wE3 zcIFx1K6CeP41DZ-K<-W0LE%2TtP^^Zqkrf2^5=?!@P~=0h;gltUj)!k#M>=Z+0>T& zo)bk=GW{fGd?~q!Ju2`8fw}mKc~|eVY#;zt%^3Ge>v{b4jhC!9_+ID-1t>x#W(L18ATBr61wJQr6S3*T>9f9cG$Xns+0z zsqahS=DC0}246vEC6!q7v-p(ofk^Qp33n+MkiAsBGp+$nm&wZ6 zZ!BR512G%s_GuZLde?^w$T4^$Yj*A1UFfVAatDUE!KvokoZZ{cX)hAcA;R29Rl5qh zzeZb`SJ+Y1#r5sj1XYfp&^aI47|QvlVr*f>MG@eNmVZ|cMXZsMVO7JojdbN#Wvnbx>2 zZ^xJ*&E+r9!TTEvyOHx5XcG%;d#xYP5qDN}KVzIB8|F3)Wg&s^@hP2ME~iL6X{49> zHIz_(Bg6@)a(i=DtWQ?@jzU>*Cd|Nyg=kW^ZYCGa5awWvcCqoL&)$>edp2s#D8=#Osw7U)*)8@eXaZYXK3=g zvX%KiZo=1v(aSAJWLN%B(B=Ki*qLkM(N-a7E+yuEZ0sHHQ8KI~o*$)3kl{>PC&O^6 zgNJRAHbdusE%)Cik zOK}F&uAy#`H6R&Of;{PG7WSt71!|<1d{^Q?^ zu^n~<-5=+0Pf*6xvq2z9mWx&KU(v@w_rB8nkybJ)I(W-qA;Aa!$A$%-gsR5xvc*uL zZ*(wWa2x-|UN1KGv@Yem^G6#RhFp(zFp-7nKF`UM5*Mx|ygf(y1=f?-ev4`U#U6*b z1c}8$`Nz2n2)b~U^0Q%YdXXSt@|TmG!uzu|NhPZupLponWs(@+pSI}O!QGo)WOdn( z7LSCPlkfl0iBUQ7I5M(_V__8~JJOjW)(?3DgD z_OdwAl`QK0u!m)z{D(b(_w70h^TEUQa{IPO7c z^JDA_yi;ga(lT-K+*OS3KV=yggn{!D2#YN_@6lGJl|BJmo{7I!Ho6+V4nUm5Dj)cZ zGu0eWE|Ggv&E*mbrN@x`O-p$f>Yg`8lC0g93}cv0QwC`1Um;tq2E9!+V)X!5rSQOa zlY998L$;bDPa?y;Vh~oozB>I_VQl==so<|UDmGY>=WmQh2KEDbhZbH`L9aB{|0-D+ zvc~}5&4ie;NTmq*xT z1%}H$$_auQq1gY2OI8p#zg!dS$rWkB5jWxmo$WyWv%v2K%o9DCm+he#PTc<~FFM+~ zi>P9kRK~un#Rv(BmI^HRxcC=u3QCA-{yGsl^8NpsIQDp^_x3LB!g75}9Jfx$s%2%l zMJLCrmTMd*jx$P~%xJ{H+}T8?@=6xDm3tg1wTeXhiO*7|n>vc671>F4%uyD?``vFo z@8|vZdG`H2-{aY`mw%LylwjZ`NvHQUZ9_Rdx3EOK?{{_M*q}vTkwoMof48|Yx#7mJq&#) zw=5WX<3TyM>4)_$W#HW&zOw%g?uWpL`#R$5P>fFLl3>Lw!#gG&*~8R`rC|;I`#G+R znN$pmpj6zxoEr8LUgnKlw*x%2jH>$gKO|)Vcuz7AdA|7jhC7OMiD)z?N zVxndW<9D~KF7MjMcaV~&c&`irvRlE0WO?Xit0omn?}2MX>)+I9NA_J@)a)qTc2l9s z6f>^Jfl5c;nV6JS64Y%w&T|YNX60NBI8F`r%Z+B*9tQp?B6htlKj?Orw1gFTRLeHyGbi z9;+~CJ`>0CnrSd*h^ZkSxppkQ_H!0Ha`BPqXS6pAf4>?WT^espl!^@W8 zjq%(%j|eA!&<>$Yhxx5QKO3lvN64S-_wBj<&d1=cl*12AhL!P8$oddk3P?kg{%7Df zju&TWzM*9C#}$vFHIKd>A83s0br{revhax=M4R$ntDJTYs9|5e`cZ$^npY+b&3qB_ zSGvjznYLRLX+9c2-HI(JN&EN46({IYBx|dVIsqbaM6@Tnew@?VD~5gC4jL&quzNbm zzmoMs1U=x<3N&HY)E_jG_qzi-t(^S=(57b_)vEdr;0r9PCeFRw9LkQpicVxzj*l?u z6l)vU&XaCvH&h%uW^1LuMXjx@0Cfi#YYL2+(|yE`OAE&KI1FOG1+(jCQO65&Ra}($ zT&&w=^IahoO~o>#o)p9L|A+XMn^}zw;{H-M(?8i~O;+ti(P^ldE zG@y!sw1lWzl(Ge9;81nzv0?}MEVZNvR!DLS*~gJ!qkmd}n;&A1hCzb9D_Kg+iyT1x zaXGHbA;C^^FVdER>=26dLj7Ho3ZCd5ah9zPCGpzX#W==*Pa&;cQhO^-;(ZbWz)7Z3B- zeR+Ej7k7)21vQPi7JG>dT&Qdgh?V9NwCXeT@>Qo)=gU>b{pShdj>SG3VVQ7iz;7 zF{-wAU)o`4V+wn)_&~^@^66p*1%qBmlf;se3QI8kK^gPGUSV7*R?ULnu zQK|ESxamTj()3%gR=?~-N`rDhn-=E@R@>bS;+unE1wZZ{9}Cp2781l^9ZRTfMpbk9 z=&8-_SD5?EkqYF}y69B)Qc(7jnz-}ygYZ|Cv4;ohvqNjgGYB0ZgTkoVO+_hiCR&rL zU*unUmp`)CW)wCl*QO(l=&!2|ci1&ezl8?n@gLK*JP~Qo`WB5gn`oNkXGt3*Sag$2 z<|Vg*(MUkBpiuj$`!24So4qB98A9+%HrPCi^N`d*;cLHrBg!o#d}W>4NtkxQ4aUy3 ziMr9`_ZwTvQR(An^+&v<3VOpSMBy)EQr&nS?B!KfJZD{)mx&GKC*`@M z{H;q-YDTElh(%}ewi60HFZ6Ga4fGPU^%kQA)_jA!dhHF=rHnrF{kB1UDdAuS7{YCj Lx9e4xkj(!Ax==r6 literal 0 HcmV?d00001 diff --git a/mkdocs/docs/img/sprite_download.png b/mkdocs/docs/img/sprite_download.png new file mode 100644 index 0000000000000000000000000000000000000000..f2babd575dc1cbd6e9342cc58ca795377d35afdb GIT binary patch literal 16799 zcmaL9byS-{(CCd7Ep7#ZYjJli4n>P=aVwVK4enmt9g0IKZo#d%7nk4^w@~b(uifvD z``(=MFknn*JH!$I|dc`^>cnF`118Y;wG!- z_Q~1W&C?8M1t(?cY;HxR=xAnRrDFqjVB?XVPEmW7Xl zg^(qUggBL`m+-3rM=LioDlf+`P9R|~F`ECdEBt!??=}Yw)qjY%If&8xr&D?=>QvIs zKr1Rfc1|`6PJT`*elB({9$ot`v%N~NfxMmE%)Ho~K(zmD zLBu>zoJ}(rGZvvZq7h0XXh{f z9Yu9pXE$e%t+NZ2^d~+l6*CJvr+?S~A+Mq$tmp)CGjp=AQj`&+d9}c9XJ;wQ&CM;t zBP+;Tt70}MZ%2E#K>`3(=RTj4U-+kfyU+w*uuI2yk3)lau%kk05?ukdhi;`oX(Qd(Zie|+td0lF!B(ZgdEn&k}~O&w^8 z>?^KhaE^p%K#G;csY3icy5ewJ$krr-^7@+4EHpGa#pDKa+M{G(JcMAk2y@ zAD4bbfGckvCZKO$D4eZfeFQD1|6@RV6@1dY-!HZip7n9y6F|ybPIQY;UY&domoq^$ znnL$MBL=odWST@B_g;kDOd=z~0LQJ9!zQ&qM$$&IgTXny;Z0Zk5gd0m95{LV4p;Lg z8+Ex$iXYRl_%@~x>ANvXi<@~XA@B=8i|)%}?buwZ+!X?a3Y8yVnUE0Qeo6SMC8Aws z%oTAu9Q2kmVDg4^0;oI}|4=6MK~4_-4;-B-+44!cYW9I=iC^WT=PRN#<7uR2G;gX^m~zA)LhEquX)c?AGh2jr8?EN4OcXVV z;~SPr3a2dln~!dJXklj=nG><%dSc7eo7xW;2yhgKuf<^15ZR7 zUEEA3kE=8gb=FL$&gf{@0wF=_TtZ_KqgzL6nv?JpI3FKMS`Li6q^-nGqp!0~jK z&Hlv0L(YyC>gE8|dPLM;-oe__-3N@b41Zvsb@qTCV*MRwZe!@b(0!)+0&c{o0{S%1 zW01+)!2R+C-F1r-pJk9*5|M`f2tOqLoQ4Z)CPSKaQ67mtJB zf~Z+z98vUy`wi2tN08e*72TJeg@}!3N6n#{y$O;{GJyaQd8jpTz`TBE2V)#ocq31~ z!DHeRdw(Lais)#Qn#!mvBe^;hCsL}okh7kvm@s!By?Ue6nbAR#le#~q-&gU@yQ!Pi zv}<+lsMJe!7w*Fk(j+S<-1mdt#8d3U%X}W3q|sxS?#FO{$Wv`+`VYS@0I!j(gykt8 zjVk0ac&Y+o3M9%E3piX?>%J3K(71|O$W&KS^usI8M>t51StG2gAwVis9RKVT#W@=p zzJ=9< z;LTNs0;5@f?4#MJA-0s3Z3|8M^gxY*RS{C2Ich`|AIFCJ%5YKaz#L^PFm_E zo@OVpm!ESz&S%FC3((q#q%aX0S)Gb?CWjz+8Y1Qk+VMd=v|K}y)zfqhVpgiFUYT|u ztHh3AgN83Je|(%tq*5S%yaM0 z{Oq1@nou^|=X^xJi6muVAJQ?)Seg`OiQXXs(8zc>zH(f=gfjHho)iq!#Ob5-xlH=T zXY5(nYBg?p9;7*c?LGENVQX$tnlCE0rs7&8(whLtMvpJ==b0~bqFxvaalqIOJqv^$ zE=|+JotCVREY1M|92FXGuzq5Xot#~}zPuQH{3-4ihzBwMc>a77x%vlk7hp$WEBt`Q zInf=VkVI#DR)MsphZBrTlvNzbJoxTizvNhs;#G&|7v3QW=z#S_?QfR?C)7?>zI$x5*H38H#y94`6XM#84uhuOkiOWQ zDVnfMs~SPqvCfv>jk3u*P%fi|%~$W)P7v(j^rZ{f=OBPz;os`U?KK6=k^MjvMoOHNL|+Nb%; zclDh8@cko=nq5^CZTCpwkDb`;g?vcADHCwl<8TkR{V?Qr=M5Ssq9}=5X=|sKRC0G4ckVGg}HQV?XrymN&Do2h;IK~_{KX&+$s-$N2_}FP>iT+i^4k5D zFQw1VyvSB_LTs)yu6GOHu?EZD$$h(buHxg|vKDxbKb1ygl>P4J7|Y?Y9$ev2#&){G zc3h2Ff2k!uMI;cDnQ5@amRLc7rJ!~97sQKv=f8})fexlU7>l|oZ5uAf1XW%ww0m|634J{>o#6qtVhg@F<0bw6E51KgTaTFqu@IE0_M^Ba zYEwd}WOD{Fz48tS&lJsbWEe362uJf58?onE&1f}B$=@!P^7kIP9S$QKtIMcXd*I=q zFiZ{w=J&`c&IF$CX1Dm3#nck)UgzQ)ZDIM&Y^~hF;`)eHCRyzlpgnGfK9PWmHK{h!zv9q1d@0}x4S*i^C%VWe*H6@e zEE|?ysUR17UXhCnXMfU^mGTmN1;!K<=e$#cjd1=h)j)r2?Pc0#8ya$EYf z;7p+hK4$@C)wX^s|BQ8ga`ZYHspd_i7R}MWz?_9DuScwbf;r4X|NiQT;Hk#p>J~rw z`n+RTH%jGei%y@iJ?QSq#hsVwBW6?ZVzsDmlF*^Pzq8+E-C0J4@34vRcM8v{Ip7#g z<0^@3Lyh_mmDfym-^-|d26f+U<3fDT#ZJer#ufLeAsgJ`9{gLG{XF4SSpt$q7Sp6d z8M9c{vpobO3|}s%OZ=}i>R}-mC;7j_Z^Nt>4j~-YK64mHzv*U2MTa*1rXs-I`b*7r zHlSt4W`)L@t+5-&1VJdf;3Ty|^G@o^n2ALR8YWF^ah<8{p}o{N=DlAT|E3PEf}TG6K(UssQ!AV z+IsY54dHEp#RYlRn97Qk=-@|7d3N~s@#LNp*`5|XKd%4}Hm86i&Sr%}_}#ZVfDaX< z2E5UeMnZk9zj}oTfp~t^Z;3&pCP1We6nh;Jcvdzyg7KUt+=|H-{njmTWvUr_{SARt z-5r2Ld9Ky9bthe0pl)Z0798I1Iq+9yLQp1!Ew*LZNLLfXmz{@{F&zrv%dQt=m-xtq z5gIgU%xBP)xktKf9#2MrTF9@ktDxJeHp97G<#7hP$7sPypSUaDg1ALK$?lJ+Pg(oE zFK0S+-wUrvb7HU~aJ^typ@W7Zjy`mwu+-?%_g{x4S*eD|p;j1Tq)6ZsvJ2j|4_COK zHoxnL^8K)cx?y%9OI*(L7FqE;o;FYJz%PKk%&P;8ze7Qt&nGX|?9v#g+j_YJr$7~n z;gV;?grS0{3I%YxRk<>rx_=Yb{+RE2Waxw@6h%wVHAMdsb52gNF=r6nTBCCwphO~N z@Mh+Zcf>kV+%t1*f;wH5sYpRaMWZ%fU!^9?L*%BPQ5cylYReTsW*$=?Z1}J71ST`J z(VhuMzf_5o7)OxKR95uo%pF?px2Dg&#dMmVW!-BlemiohUTb7cpk%*@%x&3XE3So3 zl9a0~hwsyvnJc%8}Sip)Hp5#)Z@9p@v}@_$Y;&d z3EA=_6+P8$%@!hi;$zq9@L74{gP+p-g<;S4_`rx2Z4yP&#m#5!j1MC#JrN{qp^5qq z-kF(LK0=~g^5!J?M4s=tVsIhS+gU>3r(da6vq|Ea^*ipd(#^`<_W8f`nUi#P0<@|l zi_}Xyh$z2FCI?(>Ox?ls5sjh3GY6=LMcgqT@7`O*&_^m7j-R5#&l;1j`wp-AhYPX1 zMz4=pYg1=bQIIDhtw^5HJ|+8+`l1_pp2?!{mxpht&4_}4o4e(WQ6pT#uZVPh862vs$WG<6TVIe9t@IE(eAyZwx)`XtHzNB7NbYwl2LpGnr#d)Lx;bk-{>=U- zU^!(JY&%(Dbi^r}e)4#--M@eGSr@1(IPoYa@ zQZS%&Ft?SsqUMU1d!xXlMzaO?x2U($vF*_Tf7RQE&Wv{VDYr!4Ldd&&y@f8#Isr`l zBI7zEy?X+s8A_{#dbRuu##U6-IuJ|0-_nRGvr8XZkv0E>Axl_BxIV@GRhzU=3xmgs z7t2l$j_1Xg@2zmvU&sIE?o^5k>4UEDqfk19y_0(>Rkb#F)1Jmo!R~V~c%3_`fRKf( z+*Z!J-^LKc>qLWyK;4{(Tu9(M| zj(>DYad4l8iFxUy5`4{s&9@|ti6?Cf@Axp|D{AiaTuX4bw^{ugD+*7f+svF5Z^0+C|OQkI|aCZ*P0X=FFkmao_pq{_;VPBPE6e zck-Q?JoTm&@NadJ#cvMsWLl1BxE#ECyG@Ca{MwSE5L;#`EK?#83??D&H6xPdLyZ}w z)dyS%BGlp1Xd_f`rwKYu{1$57!lm_1hM{&?PeS*=Y9WcpqNJexcN>|#7>`_k5PJIpc`w||MFXxqmUsl>$$BbJVDG@rqV)ExE z%du4Kr;M29@Ym=ajtM|!XJ_~HhuWu~_a+4>`M}yv4=oor7?vOl7{bzzUp=yxSCXSd z15j+1Q7zXu;+Ckx8O+M6b|ZV-WXe!ZgBvfWP=}FyZMl>xwgTg!r!FHlm$1)Y%N`^5 z0&nZOi6ieTR8D7{pIJrPV3&$Cd0Q8o$3UwvPV{O8(K#;t#1v~RQ+-ME@`ehk*~LiL zA69D(Q;7DJ0uA=JqARQo1PatUjv}`RHYQu^FHSaR`PUdDniOGVKgJqtgx9*Yn8Xc_ z{}!%<<3F@pggPsviG6_GRzLHyLKJz>s$p2L07$be z&(~)r5{`K{^36{C`{EYM;7#mU?_1J43GnIU<8mea)Wk+-PvHH$NUV@!Yu#eaeZKlE zLt0k+%QQ1+AY<^415M5McZeO6D%fP8n>WI&8*M}BWKL_Og92AenwbUUJ5wH$U2#12 zi3|){``@`{bKcLuP^*cdg|r0byEJm3?+zmLilbT4QjjXti4y3bQHLsubE{3r^~(!` zI5dBTPhoDOYb>4E&tO`m9iO8wWa?KpI>&Gr4Z)RoqK*#1T`me(W379?05R`w@L_BG zm)%vcZtI!TD)J($`y%zl+E0t+Wnxl(V9fJqXk0p)g(Z#~+d9fd_+bAnZAfjUio6M3 z9zH(y<}On?01oy$sObo{-)*nF>0RnYz*-YtySuf}LNRfhn9YP!@ORI+obUEvb>Gnv zymotjN&!lr{EFl`9^R~vB`wqG^n|>o0D7bTEqIIw<1>q(VuD^UjDIlczW+6x?pgQI z{zrZ$R|VDi@*55&$E~;F&m=YXzjUs8IovMl09lGibV@s`OuNO5J11moe2c4Z9A9=j z_oTa+B!ntFIAEDv9BqR+g5C!$R^e#S==J=D*$VS_Pidd^_x%}Jl(Owb=w0FNCzOKA zu(V(HD?*x@$u|-dtpha3zBZ>j8lLj4oNgFwGuOUQKW6wgu-0swT!cGMpK1G9ui`efd3=bH2EG z5srbg|eJ)iXLY z;pmT{w`-`?hDl~7Bxag#M`amvO%5D~h5T+_`0oM&zmwGB+qVieS)uuB*Cxz;8XqqH z?p~&UF!eJ;ipju(^?V*Y{BSC;GUju&Tu-{UeKXr>4}UCiv>-O3GKHMS^kD6~@)hU! zaD5-y_`%aSlg+I4{p19`=pNEAnNd|&bKN$k`L8hk1n z6|fvsu3oB_dh3{0sr@~9`n^7%JhY`iGHQpv;Dk`&4K-g#POWc`TLH74wuQCnG^A>E zY#!_Q<8kwsE&`$^_eCG~j(iH0Hjg=B23Qnya>A9F1UO1;;_E4}`2lJC58;Ep6M!ya z*(7)aszaDPyw!Gyd0d4OsfAhTXWMxC%gnQiOs{5y`t8ZLx0Zz5j?<^bNK6~}2F$12 zjp{5E!y@cOW|!0r^iSY7D8!S)uZySZEo;wzURrcD`KGKawPPjKW%2F?j-~QCB={%2 z<#ahZUIGqp=%zr$j&L10Wqd*|+P;~|t-!SNee#W&`o9}BcO_g+qDQVJ1|+=Gu4u_S zkb~QYBuwM96*l7=1jgZ%&w5?AMg`H*?eyAE;)feeR593cCw2H(_yTRXqxPyp8(_`o zukwSVCavjLyd{4|k!4AC;)f_Z9*KtK{=3 zhRuH#@IwI<8EZ-3vsULfuupib_sC5>jPCaAuF6eGK$9ln%te;-y z`q|~jFps&h@#g~K^@!ZDpL1V@klE)B@aDN(_$Fa~Pp36z;rJfA2zMPa;4-Ywa3Mza z$7#&mMr|r$cQ2Lx!k;mnx4U&8&$uD3vXQ;8!CubzdN7-JO;dRy4UronM?9E83qaEd_unf{kx2>BlOqiHY(h^ z%m(a?`Wh3*g`9>#yxTyOvp=e+qFZ+k>;7L`li9Oni>I2!I;|sf0JlUTLD&tZCVhsY={r3@tA+hN4;zd*Pj<~bWba%b4G&(gP= z^}AbVj8cKzOQyAy+@?K!?Ms6UySts&9o+m`YZner(=rx%ny!-MI*o*dvQcdRMg}_{ zt1l9>e$qtgC%&=JqIddgN#b&3B|A5z6t>ayOHn?Pm@dW{>q+^8c9IWT=C8ml>~;(* zu92=2eA{h`sSmQqjcYLtvdKR`=X>~0cZ~oaMBBoUF@SbQ_>iGvTrfB5J)ZZr5sgMz zbl(T7!`G!Gsv3YG?H&o4_*C6cto$aqm)O{4(PZxr@lP`x!pfgwfAgJ& zv7*k#a&_L1ut-jMZ#_;b-%mNsqZ4IG(K0BHW~)@z>NIA=>}vAtg5My-RpMkP{rbbb zo@-44YNm+P2fVG32PTZ)@M&oTh*aOZR5?pCXd`$}TJrOtcs8MX0xAG&ySK*YcDn-Q zZt3_>1ii%CQT5_8{0?fqZ8veE=n;RO7OS@q68pBZ!n0SXQ)uG?S@xaOU3BJ-*wS|5 zSDu(Xd0bYkkW0l259mGw@spX^FuO9Db`HK2$ivXmS?AMQTn-}^Q=z7u3j%vQO= z8r}?ftai&Fv{%NYB(3iW$V`xQP~9$IP8%bocS%{^dA=Rn!i5BHl9dvf?htu2s%dKU zP+}6{MQgBus$1gt@r=%X#1DL)sec>tbKGfXc05 zJek~E6dfV^*fGZz3M&t}ephq9hqbIRSDSULwi&q=jn!GS!|OEkt})lt`b-F;Q+{Yu zs~!z*gd#_D9EBqM{r@`QN$U+rbx}E z@}vrk2G{&yW^GtGJ(S487ESTG>UaFIp3}uz`|iU#w1B(F5|!p$&dqR>CM?}jnb2ii z@1Q~1$oNO=yrqkkF1|`t|M!o62+x$Q<0qYJ`N}^uysb-|MqOs^8hzhJ4(GbB`HWxW>^VkX=;Ec^{sgBJX z0jZ!|gIKTmO##ek2ZH!M=b^QSGXCGl%xX795vUA0iDu|>PMN1-W5v?#KaUg&c4ivo zqWa#@;6KgA8SZ2xE0SZ9Q2Kg8h{y{iHqO@H5Y0w6^S3t&<5cGNW>D}^gzRl6SY!uzs^^4!@B;et-l zgyb9h@ZF4{+vZL(6a)A8*=EU=)cU<~Vy=dHAo~nBMr%=k=jn(Dlc0Mh)p~y&R0w*P zY)R9kCAB9iSDqHJ@MA*M;=qD{CT%^Q zF-UmCzQS*9S>rfC*RR;ffB)38HX}!^eO*>+dhQ<+YHXiqzxZ?8mB6VUPZ2nD!^n?c z@PV7DJ3DH6poSxS;e}DwbZ0~U;|=GZb_F{Dx4fx}gQ~1p>o(lc)0>RT6$>HG`)?cA zLEc&y_X;=qB6&Y9UEje4U+GfY`z_>5=z`;t(KvMjVu?B25?i*A@+c9_Cs1G;Mh|^; zm351x7F6=vn=wJqER^(tq`flikpfy|x4xHL6N`m)qZUPWL0)W2UEuoY#BuzE8ay}l<cM|q&BN@eZbaik9U6Tj z)htHc3>G1O`KA5s9xnG(;}fbho}{>ZZyXXNf+g&N$g9u^U>0=h^(E^$S0(TzDY5LB zaPzW$&?J&Y&1t#eaAv+zw+m&x7CBg=H)S_Rb!a&Ep5V!MHmEIx(wpo10Jo z5IyjtWG@^+UWsmeI|%Iyf`0oT_8?6QF?-+Y*2By#Kv+Ab@1Ew!NF$#6d+=TqBnSI5 z5`RY~7uuLP-zM;KdXV_J`Q2$F1;l6gj_bB!7{5obSlp#Fp!~?N6MHzJ>$}XDS5O5P z=IVX22{CXr33*I{cFGN}%saPo@qY1QcQj1`Wqp0?fp;&`0J#4pS2DFfo6|fly?_v7 zf_&R2n@8<02>o2F+N8EtY#H|9t3?2Z&TxzIW)`{hhl$X3eluZzdW}UEtyl#pz$@3K z7mYC&d^wT^&r~VcyvdUXp~azR>^bXX*G0&7liFrIH$cR?Oyrpmgr z>;FUE6*6L)<(b94j}t1G3@{?pA3S+%d?VEtGI4jZa;H~0Z}0OY&c7D4nj_xNIv?|f zVq<_Y*K7Md#YW0iAcsOX2KCS3rH0^xIn5`)qp}M%#t)?~NsVCZD>9{juzr>Kl|Ypf&rsQChczq0or_<<7k#>o z2J!rr@Cy$PDcv#G^xZN+Y{P0f+U49@{K|k6mH*4dqhKO1>H^u4h!*S)CT7)h5h{~C z*2{mtuno8oXGV$a=R(SgM)#8*SKs=Zb?$$A;MRfp_?Bi+90r56~vWlDd@7ZheJQp50OkmPe#%#T6kPO=Xh~TCZ{0PbcBYe2#&MJBB#FGxah> zF@DkV`r0+bikn;W3gaiKe+2Yl3cECM1@z|J0X|O>(j0wmUt^Da@Aw@wt{6goA!(^I9jZ7a49;=m$i8R(?-| z+NFlllLj*9O!Ya(#EqT{%nN}vi9w*OZTd+R@on1`$7rq`Ar_OlViKWbYuK18F9q&@ zih>h1wPaG>h5f9>$H%AtK!htbE|Ga9^^J#u5)jKR1eJ#9BB%gG*RkJcmf*@E#)aVl zxnbFTR6CrXNj8I!M1sRnI!@|Nn2cm9Kv1}|!nJnK7l7a%;uL$B!o>sA&YS#w8P($f z*Aj`gq1NNbSk9!$lM6Q7-2Np0)UbTOC!vCd;B)#X5(yA^ivsnms#z%WW4NkxU^1!5 z$U7rmF)?4#19oTA4zCM(+j&sFmwd@U6bcYW)T~=eBcwi2Fm#7vc&#;b41q0_B8-Q` z^w6N8Nyt?h8U-Q(tI?!_c*ciDSBjp$6@=u~k=HsqZM1uyZES$A#y1enS0>-~%OD{S zs|dXDxzjJr@mS77gb>G{pG2PpN1U-WuU@iIor;}b^^FxJUs;l|-J{y{!tVF;UZ!QE ziHsw@=o!?mVit`{KE_bFU=6`}V2&Z3f@5)U-$7@@|W~f%(1ljZgIK>=e{NSQ?=DynS5Vd=2X5o4k%Hae+;5mhAW zf##U)s+32fM6q>pxln4Zg+e$40HBzs84`Dv=22<{qaOZ))f-$csrp;NSX?pxNvQ#l z0JT}9)JHo%+uZaJA7c#C3>po|1rC3z3{hHRdFp0N;#wqhf2N7nV*I>jS!@n>i43Lk zT{qj)_e;*~CM9>$w5a`6K|G+Wfq(qi)GZ+l*eJ~`Ke6iUSR=8elJIqyOp&uSJ)wrX z{45kmSWKDnKz~TOjldmgOe(qRfTOgRu&s+1crEEt3+GRSEqEs+Uy!}=k6#^=$Wdsr zG<3w#_!B#=CiBRT;(klzCJy~j&Jn7xn;&Y@%As#UiB|#)(=E|aYEI3}uDlLxmIjO= zIx*{jEo1Tx{vnNK{gllO=M0ss?dO?@Z!|G*dkZx?oV9T(cvO~LoDQ4D zR)d}GBCNlDaAcUXVB_49G{cR3K%i68pTw1J>ia5~2b&E_x+TI3DMM)9>n(^*hCfuB zyL7eUPWXtFcwY_V<8DseJ+c(i1Mh_yi5Y}t5Cm(A+S3?(bvk??%tk|N^nR7YMxAbk##4`Iv9SX_OT zax9m4kRHuoD+){OU%X$T?<~iULWFo`6aj7*qUjHE&p(p6ba z)!EP(lCvb0!-`Gb--u#yFV0%-Wz4ZPHpsV8v|{X1d`&4DNj24OJCTElJAs4!4vcUU znw~SX_8P4Yy*?@RFI-cz=}-diZRO(T+FN>NIoe7>!L7$iZ4q?Dg~GrNN>S`|iLCvp zlW*vyfPc|yMubf)jRua!7<6bTG3{fktOgk_g3+)S*IMqm-gS)H2 z(FSbEm7#VeCQ8a-=Q02+fZ#WPuv?jafkfI|+-oyJSH!)}KlAi+{%t!6;Avpuk_BPl z5*K7eMW~LD_Y=F>x1wiKEO7jlHM59C3>8H**j8oAEMYs?K@;mxK>|bw34viQAj!0~ zSHgC20&5JdP+AnwjPTRkMD}+x=|eb|>D6Q4vy3U+OjvB&=eWi#4PUvq{%-*XkY_ zWP(hU0}j)W`k!jJg%qGvnjM82w#c>mv4JT|xR7{^jn4%n;}`KaT%2T8v^Q+kK5;s8 zGW9Jb?TmC--h>NiAt$!?= z*8YJ-%FR4;6ztlTX6G5 zw75#P6D(4X@aLBi)-~|8=O*2t>N_|Nzh##1Wz#YJJ~I4wEKz!R$JH*tHkdkTu#&qG zcE+nwy8A&vUP9pGhw#)b26t5orbDO@b4iMM&0EsM*Np5H$W#72@b?~04@m@CAF)Uc z&9^U)$@H2bs0BM2#*pe zrq_pjRsg>c+SAlk?3unzj+Ls=F2Td| zAepnMJ8#9XocqogEgC?EP~Y=$mm~Q>{O1 z!uL(uVW;IKt(O0S483t22L*Uw5y*je#bSD|^za-;<|Jqj`z3lLwLtZ{BVtO9I@RIC z@;S0hTI(jqjgS2o>?i0o!H>i`oHC8L3bgYVFG2LUI{L6o_xP8u=RGLnjN%t)4{M0n zqEm=fO+cAFqWW*V%YYmyL7poh-4OalMSme}sPnLxpe}d|WFGe0t9}SujE5&Up*KW_ zQ^8m{O1QN;;G=Hwq!D=J*R^(rk%4%1Lr3dxzv*Zb%L%bNqnoX+tDLjn#~cut`NMtt z##4=N=bxXWe~xSYZde|=Qdo2|U+1VGaI$|jQp{a3`=)mTV)9pOcW@hSozNsCb^t-KR23IP{T$_GE?f(41eZo^e zafKwLx54OcW|a$QJ!;^fX=HC^+M54~-Kw`gr&QwU@JL#-z~yh(V@z;VaPUxtcIBAi z`X$lT^Icch9QH5e+sSUvByzeRK`k_pCGd9i5wfbL@VWO{+i}e{$wkYv&J^w0}Gp>}x~c9oa1Xwv-~uq!=EvW|{zD-!U>Uu|HT4*~JOPoH=lqc^6wE}|Et z0GVw9)B9h#F)Mf_Ujl%Lr~{Nu7fE@K#hm#rpA+&+qDP1cWvXW2wc;KlBRKG=mI>ND zPJZ4QK)|h}T>XfV_oJrIz_$-xB2So5+N+CQ-A!|b7>b|3-!5H6QWmR6paB7tqF<>w38j+4jIcWg^(L26^&kM}?RBsKjPb3K_!-Voy-w*1FOwr2pKSJ(0 zC!6}vG8Z?d_}Avr5gpm6eP?W=sicxB0&k-}0uy0{NLu#5DiTt3`0G z0%p5qXrga|moi6hMb4Y6+&#dff6j}#@qF8?>?AlBsWFdwlE&C2pAaof9`#vRomH8V zm8B(72c{VO7OJ<&qRl26VYtmh1Ifm@5YQr%QO)=4dRTh{v2{L23xaL2Nc>o1sc9)} zA;xQHi01`Quk2lrGhbI9ia5UCv(zDO9<(Z-S1)I*_6ylz&Q339c(b17%+xo4?>Wn6 z($TUrAo#lQQ^k=Hr{;H=l!B!4thaqSymZ!aHIW!M)Qo!@NT^>{muF@R)xC=4keDKj z3~2%FPxLBC)21I!8T@@u7+!GvZFE~~>NDNT%f9$sD+L+Sg-jZi3e88M&APzj+Ai@B zXJ&N1Th2JYlI|#TCQG;T8%r%%$ZZld7iB_4aBKy z7xdrR=^l}HqA zd+mI)Mi456z^)UFpHJ;d}l z_d&aZxxw4fHG*37-_WQ^_snjyoFT2h`Sq7k5I3_dPDhO%r%JRNO?HPBWE1igFbuy- z0;jy^qK_fHhEw$dsE~c_P_HZ)`NEg{P9a+xO{Clz1}jZr;ywdN?M{S2T&?B`TTV`n zqrJT$Av8eoI=T1G!So^1dp2v`^5m6^s;5Yub~tZ{yE{ZtpOZ6bmf>={l4Q9+Z_*M? zlZKY~N+EkDAJmH{Q|y~GwlU*FM(EtxFw_k11_!vC)dm6{%UA9 z5YEby?`;fLl?@v)0<=d_VVLS~_%UxqQ(t;Qu5xsI`ySwNm%s4~XbSOUAVQXY1g6Ab zDjhXbC>LC0eoVh;QyJN2@Qph*oSE8M4d~u(m%OO%D5jt6eCu{evxdZrBFlrLD5Ke^ zR$dgQ^kx`1)WUBqtOz1J3kEZ0=a@B+Sk zFZBTPzY{>HjN;qoBk#UDN8JcKS0RB^j602YS6jPG8On!#&Klowy-C zKb*SA6l$|z(mT{8yslnwzRk_=p^++r-_iC|_yXLtWXQX&2gVgw*H|aC^gZ02bxpJ< z2uER6my>xJKR{k*0CtBnC7#`&NBC_FN4aH&RPL*9^2mHST6;QIj>|2lV;3cNUTi*I zQ?ZN}^o??DCoQjV$==~GAKYt?rr42{Wtul6A9?zjOe-Tl5LcCc|c<9aZ6smsY&k{MaGQs^7oDT zRFRJ2-VNujT~8lBNHMm^pW;VPxvcvQk$Wn1TczA*Z++aZ$Aq0CAHVQ)^O_)^e3bO3 z5v6b5e%iA~F@+nw*wq-2x}aB$srZ#Jm==E5*!ESkR1Vx38_0jvwDGtnuxP&c@$AD+ zZZ~@8zRfs3xDUnPpz48#e>ZliD5@#1N`8H)I)lqs$n0}7$-!#(M2aNQmdNQO7t!Wl zUkOx!-i8wrD~=Sn!JouI7DI zO70F7iGL_iFPoJ5<@G1I_lY2QpNfOYf?d6Y?j)#)@UNsiWf}aRY9H)B(6z|0c2l~3 z@jDKj@z5jy22}hX2{o7>rK>FWbCMe3P%G7-L9Q6MI;4+a2 literal 0 HcmV?d00001 diff --git a/mkdocs/docs/img/sprite_download3.png b/mkdocs/docs/img/sprite_download3.png new file mode 100644 index 0000000000000000000000000000000000000000..9fd451ac971012fef6034c7b1dad5af83efdc86e GIT binary patch literal 20826 zcmXtA18`(bw4T^DH}=N1?POzaY}>YNTbqqF!G;^#nQ&vdg{)t>FV3l_ngz` zeBa!!CQ4aR3JD$`9smF!$w-Ts07w8b z;v(vvYv+DmnM6Ob9xrpNJKG*9WjsW&MFWKs72GrNC5gA^9|~OF_<+cnw`?+?zyZvx6kq@Yl0q`LV>nAnW7iwO*ih85#zZoY z_%pu;)?s#F6;dn zL-8?9h^8CM6NFmAD9bX36Mmas%7gts9Eb%%Frr*y1qlS zpI$PCz>jQ0ag36L-=P;xHG9)7uzJ431S*oedL-eQLUnNzwSwz|ixZSCNA1-MV23z# zq0SKiCOC(e*#c#*-k0zSq_}y(OGW^pxOmZ^;DL>~_oqGhrf5^Lh2ant!De_PSAsR~ zu34^EeHyn5aH0x;w_6x}2yfv78v^S$*oNFUPF}DrLKjNdh3)`QP@hb{XMI$5ly2yvV6B@rrV8Xt|+ zsgxr@fXN93@;q)0aQp!^Wl^!exbT7TNTm&IMO$VfN$HA?t>xi&;IPeLM}4JvFgt1H zJr_^14KOMH&JZJipbh{;MWbonuO`u~8*H(n6g8}iW;iN!o%9F_*k`8#<0;>qf12Z- zi;aNA(uvi$TQ0^QzfMWaG?tJPsxRYX@>g@5B|t6uPbJ97U$H`a2ti5;!4t{BY6s64 z7w63vOfdS2X$YoBm=?PJjF7~ii!m+@M8qupCN{!~Vclnx0X0CokY)Y=4!a5#i3~d% zhWC0NUTF7l8sBgyi@S2p+{X`2s+J-Zftv3>&)z%>jS790oWsQnhX_CmRX)OJz_jk2 zSpXz+H^ujev(<1~E`pWt{@NpD`*r*FDDncqV<%z(9M9hiDcpPe4Yv=HgPs-xDF%f2 zDZXGC=zM^eHjwndqVgi#_LH#j0aAPvP3FL`Ctx_dBxuFon>HG$ew2nYdWyUsa%>&` zM1hny`&v>sf*iD{Y$&-6bLU6f(<)9t;ADes_?=Cn2y6o;m~xhaUEW}4I`&c41@%+5 zyL1CafZLWxX6qxks}djPK|U8gsHGB72)gd{Nr!JOKf+*n?Ve8ds)atH2gTbiYd7Z^ z{EYu0|NQX&5_WKr0~hb*yTAJ)#7et0u8yV_`!FWL-Du*(P>!2kaQwKk3JkJ$M=cI+*Kiq{Fq?8$v{w}NCPTAK6X!IN zT{+N?*=d`cnzTLn<)|CCZih|c!(Y&^x2|RMZ0iwpfX{3%cXPQqeb(Zp_vsWiPzg{8 z`aR;Npdaa{xWzNuenpx%c>W`kCTdUwGYefMNg7>+3@{U}AFmZ$=*I_~yxR1bCH&e! zK~j}~f$uX*?(uX{=ZN3I_QX!nV;PGUc)E;Ni;U(z&&6h8b}naKW*n1eh26-RSs}$u zyndav{K))7PMr8?&-OKMFoB6@%HnVSaUoL-Ga27_SlqbTy@_x1w?0bv&(q9b^T)(` z&*!vR1d7mtwdvx}LX(Q~C(eFN>2E0>TUf$y;9vlzy)O1RXX)u14u0HoFe-p@unJ2o zym73hmNQMJXf$Hv+P{~Oc2U{8u6r0ZnfCgq;iww7#JE3r6(i0f+j4nw1T#Lrd-tsU zwAS07;mm{I#gX&s(+Bu7Z$w z3Pe@aWV9JsVOe|;R8%sj@`0p%4_yZ*$?=0c2r8E~aaqH{HM)9$!Rmfss=t70kx3td zN28w=3tIo9z3(j3M)Ib$zNu=&_u!J zXj0V?=F2uutLXIz7PGsvnYgbdxHR8)2XZGfuffEQ76aZ4NK$;N>=d`?J(kIN?m8;P zYd@PiBYZHfmJ2| zh?UQ9w%)>I=9f!)kgZ6fW&+AkeBzq5Z|;s>$L0Z$hUyCS9VwuAZ0S< z(XHydk-CXv^e?Dwdx!qAf5j0t5*HY&wkBL$)ddX|N}ZxyNo!n?a%fcW$Ru$&;o&gi zLFiinv=cNZc10it-YCY_R*8JR(WKY@Ffe}f~H zxS6RksoHx1t;*xX6{hn>>Ze;H;=^EZLf76lDjb*Lt1KlnHtCh6u<1@&!ZI$RF142C zf3A27|9v4@OLXQuvg+2Bc2-pTvz8eja>2qs;_~*(5guN3P_rqF;jYif2zhrInqGsLg^JTRTk46 z;B65423=5@K1V88kz&mA6|uBk6A?i$ne3{iX81hdt_((rj$>hnGfO^K?!MHKm-~4* z6jMca#;JwoE+uL-3>mAieV(Xf(!uAfpUkTarqF^&i;gofyQ!|LCGioO&M2ya^J%@* z7nw5?S|b#Xzoo)RM_jH5?EJjX9|YUl>VNPV@qeMoXmu6SW;HO&q-yDJqN%NWd?LPa z_HxgU=!wY&(+Jw5Vj9ZLOoQ?alp4RC`ojeBWvleF_EMO09`j)En$WN9eJ zEX|CHL?2U(w=eJalDSUkLH*T9U8#PKQPEh6W%NrGQHq!M4KvnKvLLt@+F+KVJQeJu?nJ{2o$wfQ>=4>+uYY0f=L_4{2Z)8 zYRVIkq^ij>Q!gxV@2JnU#gIAz69{3|0*KSbZk*BU>tKgbBI;_!h~vG=td6&-dNwC9 z-P*5=m`1;u{xW`igKEovp`~V-m;OSuMwe*)M~vOvX`{D;O*Ivc;F4d0ZPBgXoUI$L z#krf(lmEL|9$A^S)xI#h5E=k);s1 ze{#-H-b$e~HmAju`I9{skd|71 zNtY#d4|AETve@2;0#PYu)D*$U#~mz9WybSA#L6PPtRZijq;{11nRX zJVFzO=tAp7?=Zm3inYkv0|K;;-H6A}r*P#Ko!!3GjEwc)L#K{vX zk7RFLSTa+EwgI9q$5(ZH0^At&)M?Skwm46kwFDmg>*gS1uaVV3b z##3%_Epqa^<)Q`-bgE_2!5kk#tq(S9E1b9N+5mrz>w<&EyK2}X74_}|vi;^ly3XnV3<^1zr0Vh{PE8QJ8LR_wlkITE96g zqCzI!f8rc`3UG3~R_dW^(krMQ_SlCGU?32i{JJpJq12o)+E&<y`JrP9=g#x9VzD7>tgnSrtJQ_HS~ni76c%p@biQ>G|$cPy=`OlqeGJqbiaM?gBDyJwFWZ zsSe*?=vNN@;-Gng<9tq*YFr~V9;8_(4d8-CF;BzznOn=@{rRjK zah}|->o&K`ZBO7&R{}PT&MEy`4A%`po00DpscdJ8em*#Y51VdUJrXAOu#WwHHVD@O zuZae^KFf67zktzkSYZhP9!QEC6PY@M&?Ia#l@ABTWDTh}sj4_LOBqZmOvUh=d$8>T4wI_g8&iAN zTG@J*oLQcuHI}I7O%K!YEUiV^+nO_H%A?J&TKKqXR#2cl2S2ROyXBOtj^2xrtIPqf z$NFKEd_uW;xCJj{z*6i|K~ZsFd~${#XV1u=+dub<#~nMBj)>-uubwAtw>J|CWii)f zHBbtXzosqz>nrAoJG6rW$?%2Y>Mp0|E({{T7|A0sv#6`nv0qj7hHRIaJS^8JQ~ z?g=`#0pNC9JR-ln4&Y__kGWj9qe>jx0UB&IGUu7D7hd)=^QgVzz_SHUV+gRe{N9_i zI!Gtx)pB;mUype@>3Z5(hPxbe!qNy92d~paV%H*NZ7r?ZLEnkk`uEvg^=e4#hj^%? zhclj?Cpg*!w#<;*bwoP`co`eAk?}b^+W*;?5k}^aN{l2(HOl)-;h~tnWrSD%(CL57 zp?)+@pe45LC9zYMubj?Op`zaRc=LzU?C}8bxyqf*?Cn*XRFs2MN{57l*L&ZsG3>d` zp#LeHilaiV*hC<2IIZ2){mBq#|2WsoarzSlk;%19eg*lEtua6p1FbuPQ#o>P#dMq- zzT{+L;f)JH^dlwy*}$ZH5)0?M&VatoZ|%9jX>ZN;ks%k$vMggxRI3&z6%@0Vj#J>- zE6+_=R4(qqW2j87hzWZsyRym(JP_n%i|N6%1=jTH%g=(jb7v;Dw|8m;ZTM1Rtmm=; z`0(s!5MSG-?_CLn>S``2dJ&V>dyR>k8Q#7aVq>;Y4XjuWf2x34Vjo#vB#wnUY3bd7 zvns&<_ppcJw=a;*2v{0nI<&0w{7m#ZBy(tBL3?h7B172)lY;RXxkbcs>{m zW%Zg`EUVg(o`>atv6?OYMBJK6Eo8+?(EL`%*=er`swdOYD*EcO-rvY={3FdQeBtLn zh`rrVtd;#t5s^0VyqsFErJ~D5c&K@a2H&=xuc21X9T?ZGeO>A-=izXdPs%AS)vL1+_Bz)EntPLTpZ+5ARf zXhNlG2-WNj*&HX%lz9@umi`=;{}+^E#2AJL$1%4~U-6R9Fj*HH0BWjFoAUuWq_4Gk zd8%Rlsnx2sL4Osq5d;(LhedEHST1vzzu zmR6!jZ|GB5LT?8`JHQN48REKDO?M#epbUMwSusr3;czQ|!Q+W{Xb;6u>MQ2a{~Pa; z>kQ3vfyuUu{9|W_!xv{Lm)L5?tC&-M(?*Vab}l9#o!c+KZLMRW}!6u!6eNCTCH!kKlXQsPeXlvb3m4zNLuBx+z~@ zC@=4c>@v$4t5S8t95>=5M!M{{MF3-HOtLLs`=mgGElgf7*@4Murmu1Xn_$$wVxTAt zzt)Z&Hl>$BWfE4lCjIg(8R9!ONrix>#dpOHX$7NgvvglkSjq17&<<=03?Wxru8e|@ zu~v!a;6(F{GfpSJp-MUT7pHEVeY%*+4-YL|_TT}ThkFp5RDOn;8(Nc-#zrF$Bs;1F9lls~{g_(YHKz2|OET$i zafEPK8vowl)fAjb;CdC7@wRfXz}VYdg0`6-lz};0846!BSc)q&hh~p#oSKaa93bbz zTsX07sZ4ikTs(7;Zg7f;%3Z>lc#~~RB4#mEFVR;FF%l*M_THf-0xLBDFN6`Ly`z`u@0GyQo(#IU(bSLB7*#J36p^&s6Qnx2u=dghp_Df zo-YkECXwqv0?P4SnQ>?WF9qugNzD@iTFHJy|3MWU){ST|e!{=$sbtCh%jXZj}^Oy9uX7u2M5?A9DGj zdO|1vK+po&`+-Ky;9$#_pivKdss_PfY6LBg+iZ=`tiyn3*$&zH1cJ-SRnJhurCDR=U!tQFvtddl z+K&?R^4yY;0HZ=O3VujqW>$4Ey*Ow5g+=|}mD3OjSZZ60i+vJ523@_tXFiTA91mvI z9B+X7nHxK#Pj}JcQF=T$fSKzoi|aBI>b=39PUQzE|2tbK9A6K8fl1lQc>T|I@MU7I zz{m5&s7C3%=+8DZ!;c}!I}gaGo5mEUyKZ+HB)!kmxTg)2h{7-w@}%YMR!TeP5`4+U zYXnjYx zlBfHhcP;3DY9I!)l#k6E0kE;G z=7^E&9TAts734G4aTNc&KAyW#z?~hcVMuDd@ESKT5^|Ts`ZXAYA*H>!Iq;d$ZMAub zUL4y!I!-!4c=~e>Sb!E2#N2D=>4aXF)vor}U_JOd@wE5ZFzN&0Gr~o34+pt>@?X&L zmv$8RRH3jMYN6$#mEkPf%HkPCRa zAuNd4>!#R%hiS3BXEH$ng3hei-)7?SfA^IMZ3Wi&x3WxEQJI*6n*I*@I!*JAXpdy_ zjr}o`Iu9iO=iZ;umry|5&*RY(d!P0qX5jf`s^{q`4_>*mPiJx=@h0=&mX5U8F^$;#&5ka5p>=K7vVSOQ=6FtfTCaZ4M`8P+&XJsFHe zBsZq;#tn#=vLErW_e9e#@PppPP~un#hM8d4yJ`Ri)=vwDJTLbIru`u`zlV9+M|(#~ z;Iv3i@T>Pm6Zm{$0KSZy8ae24V^J!xcYpSa`4qJ%($$>Y+quEeS_Kvt?a68Y*2Pmy z<+~l#Nok}oL4+vJzbnw5&~d)CK#)*Oy-x8m@DvhIpU)%VhNL_m5AH&EU!j1sBW^!` zV{6wiGid(5=@m*1$k5Z+-gpVEzO<|b)<*F=6Tp@j{3mb)> zBYHLwF8%#&ooT0no;-FK9i%KQ>g;CP| zEr{~h6Ma7#FYA~Tx7)W$!tm(^*BiW%g}emC7gK-nAYZM1s~zb&bUSnuyyUaF`Wx9# zPY!(uYD2aloXLrx9=bjY>AEGfdYxDbQb0GPTJ5g>L7+pwNAY?@qewK^IqY^WCe;*F zFMC-G9Uwu7Dy4BnO)r^6or{9;aVC*c!YlZumrR>Cf60}5nv`*hd|N|aeeG9reIgya zDXfVe!_o2DlmwN779wg_n!P&H9VSA`$Cz;W#gUZ!td}K$j?p{H61xwd#QlfyxuFQX zUD#qKGVDLydlM=IS-P%$j!(Fomj5)JT1OGn)8*2ra`Kzf_z^OfAQ^I-qrNIInW$Ra z*f##nXccG^oDwakUy{n4-8;ZwqD>DYw92ttfM=eGq%)E6DqN~tpJ7y(w6jnYFIuFL zXjax77{t1@Qw6m(7HdU#VfUbtImFoPExiClW$CC>1Q$j}I)VnHc92!Dk*-cS^A#V9 zTAfsnBgJkXae++sk=GXlhJAl0DknXou@lLP~*&JXdMTMFNSH;&4=Ie79L!;{M|HwHbLVL^z+8xYV1SXgSr; zJ?CKIOj$Wy|E5wP_*qiLgWZWO|MY4;NdCVTU_58f7n3foy!(H7fp4S#rMA{|O+z2q zxZbj%3<5;NSP|L0^v!hcp!FYUmR6BS_mGU7+3uw17!$n(k&&Usf5tq!VZA1bx)*B^ z6`w-Ar1o2!OGD4wBYa-V#8G8*aqZZgMp~9qy1KdxG{Db?@%#3j_mIrx22_$^12u>Z zrxRouOlp9=kQr*lW+U_$7>q^%z0+Or+tIE0CSAoUWruq~fPqKt`@}&1j-2~G-nPBA z2g6bU5;I)gu6{%|-Mj6|uz7mo>zqXc@ZH1f1?&pj6E4SDPY@tFvxiIx|8!Y2Qcq*{ zapnX1&ozfrzs)s^%I^Kb0oTsVnT~bc8HWN(ssvTCWv)q!>H!0|LFV6RlH+dJYnBP7}VMQpT}(L zt==xm@R2u)kd_Gx0^c^NhldaKyDS?dh)(pY^oXnmjrY&S(3&fDlu0W3Q*8FHdLEWd zW+b;SP|hNGQe@#!p8M|u=-fx(1?|DpOH6OeI zUSA~RrT+!-GQi7|oL3GVCBPAyR}MVm&6As;2F367$HvtM(bX~eOQX5?4ua5XDv9cW|26#Pvl;tEEVyD|If_T|(@_E?eeE++wJ8yFAy>cXLNGbs0$%>L0jq3Gz zGhik7@q9DS(3(qJW~DxxZh{>#&FhR)(5^w{?t?nt+B93H0lB0a$@sZW?mB|+6XuHt z;i>PmQBvB9(p8QcO@8B8dlm2$AqZWDAboKp z1uujC(>R5ghMlLF+}+=qDRsuQgVgHhh*a)qc~RCl1p;j$X}5E+qRSxlG;tGjZ`ZjV z|4rOy_-sy5F*%};nD6=s@eBYR$A>p)A2Cw)qQr`OI702 z2{=wlC+hH9Vjbdt;8OJdP{%R<8#8lUw{O`#_7O_OVc5)~j0&3;6dpR`M3(NrJSjXH zszwfpD#`FI=^V8dQ9V_h=LB&GyPYs~!ag==R%MAhJcGWYv-1`@h81B584Qz4!_ePcNBxedWQ$9nEJ2f!>H&5G^#J@$D z&2r9v0yapx>6YNnGd=twQkpq_F^Sp{L1cSA;Y{a}k({bKR)g?P@K{q6L+gg{{^44d z^>8dA1t1AMj9qSU)@I$W+l60m@1MTdey`zR*AyLh_rCvbb&p}(S~_!7O0?v6qPT4E ztbA@@^PTLI!UHv_mE(smo=5hJ1}44412*;Bfi1NblY;>#z2H<`plPs~PN%_cn$u$x zPcpHCGC@>yClv>LvmJ~%`*FGz`>u5+x}|A|GByXE>$&; zmm#W;VrkAVHgnDLJc+&EmWbd?Z~%!$leTjCnh=&10B(sgUj0AybVS76>>%pR9pyff zdIdtz85OJr&z?t2w|*TGZH}xaDA&vAn$%2JdtZ|IM(}s5Z)-Q$t*-zgkLoQ`c0Fj0OcuBMqqXp zs(*g+YV8+N^ORh$@Uc&@l7&kfM803~LL@6A=%7YomrrVF2?5CeFd5I*!H&leO>utZ zeKIgQAr(k*EN+7~?AlOc*p=NBoP4-clu^&3y1&&)hsRRTTKx(0R~V8OFjI-}Q!I9i zVuJj261oX8j-D!Ud5ua@M)vq!G7ZvDO}^G6hfvjekO~je1e3ovJ@GB>a04ORW4v^V z*pRVqTh&f(J<933`0GDyzMKqlJS3+b6H{p))q0Cc3!~#jceAl84JRxCuf6d%-8MQN zw5Gg>RK3x>nz&++nZ6fLdd9TsToMur<{RL3X)^HNo7{ilwMWE2h}_ zg^S7(svM|}YVdm(>Ks}HDt<66n1yAaah40xP?ZLOG}#>n6I7cVQEMwp$-FAx1ik$l zxSYnN;Mo;QXeZJY1J6pJ%l$x#qL)Mi6^Y9%b6Co4J=ZUU&$0TOhCi>81_L4zRdZSR z#i^c%5h~4@&oY5e@s07Z-3Rd&xrm+jXNJDLhfB^z@@_}Qgx%=|vOF1zwFWHx4O%b* zd8WT1^!-mjS1R>iBe808Ovsae(CbxjN$x$y3is2&-p6xUp#5soffm-thnkyEeSxlDotuyY6aFRXfAJ%=^Avj zE_TieF43L0Wnyh+^Bc?Vvt11dz#N1%)T#>vRx`rfAd+qLK}MzKt}n{8}%e zdF&>dQ{wFQ_%Y;?j5C=#H$zcDyYkwqE9j_}fH8iiRqKO&lntNCCt?4CT}OBk-PRU! ztTm*N^5-gD&QLkhX}GC0=+IZOU(jYQb+j*2r8eL}5by&RLqs~wt;iYP^^-6pi8LvT zhz!dz61L&PAGhYykqFvd2%p(*MwTLHy+R{E-$=sWyi6{+<=HcsL|1SguHt9mf+BQb z`d!vlu&)No4jKe~0GK=omMd}L&~G?ohs0<51)@j4CJw?t)o$PjG)5#tCSGFJACt!uMs4n5!So5^vE z>6{qA;P*&}eo-u|mHDD6vOLi|RkmJ^Yp*K=mQVsnFM!F&TBeJmET;?@q>l}2>uEr8 zgn7K}11sf+izQt1rAFz`L^o39!ejN~UQ61aJ{uxnG*>iO!V!g#mYocXB{4-36;Jff zWKXze?ewKBzwK0i?t8cZCc|VyN-P7fQWEE z@Jfsmx5>M4oj9g)heeUj=jP~=nPmqu@2C|+S4)F!<^ACPVwkilj)t*2SOxvtOpKLD z7yE+hoI3po>WZ0Qg*MD+AD!WnifT2|eF(L~>AG>Cf->|ZXvtHGpQUqHXpUAEZE7n* zBSwF20lXM6EYV-$JW>I$I?UKsjcx^)#s1}EVfJ&ArN6&h(}Fi^tHwETE*1n+RmE%| zn51n_OwDX0GEcL4c^HxQtb`?kzppkF1j>EZ)rs4ZDACORXa5TTN0q@Gj-p|@7 z3g%nW>hvlq>2#J?Eh{Y;PDi@T4|8~!&4Vx7f;%j4K3h92mZx>y{c>Rg!)}*J7w$%s zY=LJ1JDC7o1m?Mep^ue=aQJR7jL_BQtJS3~l%Xl|UV&oa)}a_W_~-^&W$XY}rhs64 z9n`{*yVr*{ioqpwO zK$bwAmmKW7EFrF-e*@o%j56j601EFos{;oW}4LGda!)|D^Uvc7#%gsnM%GrnJsI@F}`VKZo5<@fjZBLEQb zO+uIOoe0HXgGgXm15{wa6xM`R;azDdV_B4CMgDq2G7Bu>np7`Z-v?a=e7=$M|`k%j1$Ra67?9pGnjJ61FHG1zViJEaI%p>=TCl`5OxG_x$`^;Hycc z%a-MV0KUD?ck@a@+P`+Y@t8ItPs=l3Fn;Cu{l;v4yuVjK7d^8SX%+q&dKN&uShAys z&F4L?0;pBIq?G4qcIJ+7NL40?@G6Z$vL<|Jwd#jMRAnnEX>0RPmgPCU zAJ&vzonK$?Vhj9H*>;hzMK)E`1A~t#+~jjR0;XL$vLA{k{JOopy+H!&Zi`4d4vo+J zZU~$7uB9BuGPO4cI*>`BA_Cu@y~~%aQg%771(76nFiVMNLOC^aJN%YDRhisK`fx=; zM!xv>`f;{QU`T6bHCHMV0p=OHQv(tg;bhPwK+EZ^QhrsCN7|Vq1n`zC6$qLv)oNd2 zh$1Tgr|0yW4j$!*Vi{g@=IN{ZZpjhDGJJY|{+{D?`|sWQ{$!B}mNYe$6}(W5Z>Dr4 z`=#oZu$T;K2xd8oVb=Y)F-sUm#3(!>f@|5j#o8Jqw?N=LvIon?#kQvA>tjBV*=Q1d znaTwY0s?{Lhm$#vF`9;c_g=JSaoah6`oi+wC*0nzuk18FNkV6UmpMB-I|`77uNW=p z5&wc2WL7GXghxJJFMC0KeSK*9`uY*jBEJe+rjHc5`l#Zi72D5z7Pd;j#e<^vJS}Z) z&zzk7#qvnJYu!Qt!n9$kRjEkDJ=ETIHflP$Y55U6y2Df0pvO2=QAw5Na@DH0hqm7L zbCg!w)!#f&{1j{AID;$aCz`Th9k+oH$lR7jMn)o1QX@rWs>et}T+ziXUjfc?Lu8UR zND#^Nh!#5J@qBZD@^!Ttu5ef=9zk=nqa|W>mRwcVY$M43^}Or#qRVja^~1JI9Xg){ z+KG@H7979Gawc8{(+oMvPs@k6Q+*94S=GF7^-`J^twQ%jyJ9q-I+_epBvF6bf0stFje)Zsp$>PX zZ4p7W9a)aMMEY+8Bh0Tb*p^-J?+ApVBEipsfpcf8O*lUJTsc#=-XiujPF-={2mBL* z=c0U(j0$4YR$zsX;xH2*SFgou6aMur;i~YmRnXsuCvosOevP=fx-x9|J#Km^rU|P+ zr&Tnux!8Y&lAJFH6z>2A{Mud!Uq0rFpvW$zpWaC@abBTp-2XB_r}_*+Cn1(~Z{w3i zh=SB~QHx}mYLF`YfioWP(gJktCp3@z&~K&38BGRD`r9l!AB4=^b{vXo>EPh8>e$iW z1ToNp<^}cs{>}%Xbv!eb%+1F5+VM5B;`;UXVNKYbuMK++zuke^6e-b}Ey+U@K>|=_ z9UdIcoWUixyvhg9wT3cwU`G72!FrH=>}u8Anv+-cz}>~Kb2UdHte7(c*7o%ccyaS< zuVo>BGUQ%zod_);@F7Jw0pdAJ1I%}kAY-u@jm+orjgHvDC7qVwFFj?8P8}GC%y@_M zs(=J&9PN;0$|XR2eMMB%8II{;VPSFD>2~Um583acV*}U6HtcETuFR}pR))T4jWvwh z+|qIYnPmp4f2|@06Eyuq6TSWFwMLHe!|5a6g$*1u`%vHe$!;j zv9n(!utu?C@!!*Ad3OFyVJ$MqR@p|CtxfzVifno`A)=(E^aZs&{QG-0lodp zmo57p#2twyhq%J5zkJbk2|Tp#I&W>Rs5snQzb3)~;Vd^828;*1F9&S$aB{XH_t8Yr z;K{Z+B_*r@QV*vKKEp09F37`BgMAv<5%;o#L}629%W}g@0Ln_)Om_Uu;zWd}m5yq| zE&JLd;B+tu~Dieu<|%_DWBh#F)M zF~bq?@kZa(+1a|cjt}hlF9ErGcr<8rI3F~bLQM=5&Fx^y;%{2o*zk>dS1fe3a4DKZ zvd3FGqzc#UBJ%ZG8F9iYTx?x_N(lS7c7!|)%a}nL_IvtI*wwRVESD1X&LCQ z4G>7Epw9ER5eMSmgPZ>K>pWcGSD8VOW~Y@}#!blGha&~|A$?(9s3|MdPV_SBq0yJK z;2~nJb89u7WyW0O=Il24md*P2=skHx=TA?2^Eadn9@Xnriw$?mWm~EI%u87#FMuzp zxD_7|^asYP5S2 z;RR^QY9FwT454l3v+BcVsT9a1-H6b?D42}BJr$RwPG6ZX$PzC(C%X_m3QwDGXu)%5 ztNU7#DSwF!6>&kbWOT}>2TXqhs)5Da;85!8Z(a%={tK_UwExJ!I=L717Ihc`Q z(RWmXm8r*hFV~kvZ)L{F9KiqW^6a<^s$7<%jdb;D?Fd*7bQw;Ule+?|&`I*SaV?}l zN=CM(!<41{A>y=CZ%hmNx-&$>Jb3o)O6vO&T9jZGs?w(9h-5t{WkCR=tZ5Qx45ViIxI}m3c#~X;_Q;@rM6wn4aV`_+p{>V|j4(;H`me2-n)+}H5eRONVAx4- z9Um!A1=^E;c4*|lluQwF_f5kfPQJ&~Aa4RDW@@8YIU&Y)I70-;-~g!pG%x=( z31?7t_y3Aw7fM~~1D(4?mCMve9l|4mvlruM&#+tC^pKI_hUWSUAF}B?jMPR_g{L^6 zVCJ?G-q#uX+37P_fv6H_)-5_>bQu*T^DcD7B2USqpG&e;Yr1eCv+;C04xr27)Q~3d z28%(rwH6HI;CHIKjtFvva(xg@Qpio8pl$A(Js3*6J>0%Mh+Okz>?F<0!ZszIXqyF$ zLzax=3JFb{Vp$1K;cQ#2amP0y5-)PH3n*L&P$-fPW7rbx`h0)davC9Wfb;!Q%Y>db z^b#ZBFov=~f2BPxf-4a8lTON%cFi#tnt4h-O>u{xCimEHV7tO0-Mf<&uAt>oQ_J$6 zVoHs7NJLO{HMu8l?0R@Tob-IBE7^=Ev)2|)bg?MYhOt<$`G4~-Fw`ue12GYZ^DMDI z+D+y=*CXGiFC2CT!uF^lTERigLpXbs8o7P+Wuhmh;wq_FtaA}%a;lgT+KUj6^|U3? zYxb@WI_r}X!L++nS}vjMX>7BOEj}HS5=ge#qYQZtLPkaK=;40VjQn+(R zEKT$;tNC(EQ`6G5LI0^NqPkt5Ew96!uL?MvWsx5>?RVj_@`bbr@<+@GAGdrQ^n9;} zxL?nju8&F*b}v6aE}QlgxpyGtvsj_NeLjs`S1I|y`<%CEk_H`Bmc$R_`&>lDFadGp zG8n@Sy||A{Q*xj3eQ(A9foh68P!+k(vJbs62!0DHQag4IJ_L*%`{;U({ZLna->3_G zook-Lbz5z#uxVqR2rMIvGr{bkl0NO2Y+(XTU7d~Q=RBrVEQjL`b_nP^TdVtrwyiZ% zq*Ac!5wQwL?y|gwgVq?WiN=g>7#oWK|)uayWkEP6lKnZaKT)W`}_M-~wzbW?0 zbLeKA-Ob8R6&w0`7SE?iv+8!~%X%$Zhrg=o z+9DZ!WM6Rni&&a`5h|y(&T4JVg^6wqQS~q3U3E?U033C0Achh@8aX+6`}4n?>rwKA zLOn(Z;b9j*;1>wCXb&}x{3@st951i0jn<2F;wq4J92&J+Nq!?hNhzrvFXD3ftYD^q z=W!%t%9d+=Q*vUwot@#JATT|@`$a{m7AW68)-7vaf=2mFnn>oO^Y*zyF+}Gf0H0r3u znCtZ~3uz43XEK7=8`h%0kMoOgf`J}cOokD2J+{1%1SQyfpRKD~h7Q~{b~S##q2L;_sw7WrxVTIlZAhb zY>EE<(*lo1*SkaQU)@Ei10g7akphn!O<)kDki@=MBDf}3QA)nxAt51JJ>G8V*YtD; zyTKRPdMJWVB+W}tD9dxyVX93w%)ajp6txxjYg)9C<C?Hh{l1kAsKoq+44lY1blR zgHMWCJDc-rRNy$?I)1e`Q=-^l!8MMH@35%a$8=I58!G709^7Sm5~dRkEe|>os*L<= z*aHouPL+a6dglx8WTshPF1o0#Lyqm^*blG>Z3?YtBzTL#qCLrehhPIJD>VL2+Svy;t&oh?}xsGPJ#JnAwX_ zY@quKWnW7?&C#&`$v8^Eo$N||OJ(S0-H{sS%)`r9U6wS`wLyG;wjQov)lHs|6VP{V z^mAbE4{xNm!U7&=dAdi>hMAPT;gSrW4b*FH^2pn0Zrm2yk+(dqs8#c*=Rr)7!dlmF znbuFa6-85wgo?&5;Q}oMs{6}vRJ=#n*NNxO<*16uN!Y2qwKCo%1B^WUZ9nD zNoaxl_=PBJPgQ|QYzZKZ>L3Oqv>8vR>Mkz7lqJg^cmIW*bUrKS`QOQq3uldUZhymwCBp&TF>{I(xZ|__SmqV z6Mi0jPh^;w#dnxxpOLD+&`i36XD~w9&{f*;3x^~=Jdv>oT}WV;;x?R(o{xirpTi>~ z37!AiJMpCX{V4ysUkp4iC4pW164rUJuqA@Mx_&&ej&QEbEav#}o~b48Q0mKv3&s>u zlMesaBOZY$pL8*U!jY;GnfgC*^F;Vrvw66m@};aXDK8gjY8u6n5EirowLs_`65dIT z_bqu{&c}P|ocKekOL~<>9(mZ1_dNFjVxvNWAek!mWDm*cV)v$&ME4jAyCwsb?eb~^ zFR`zDRBzh@a>>t6PDsfuLp^YEA^Et~2jDC;Qs#g=UJgjDE$}uzXFQ(3Lw4 zm)3D&5D$P^VPb)3I$*I`JL^fhYy9u*QXdf<GsqqUYr;FwhRU9#|Yj|T2> zBT38k52!a01CT#tndF@>;NCbPYa(E*z>)YUuO)(%VXrn^l?)Au9AI%^l*+kni^pBE zeHcR!bYLQa)2gW8wpPUQ!g}d}F0(X5#2d(?R*C7@*Ky8_!ap1dVcHz|KctWT@!< z9m)E*wfE}tk4;iY?_#P18h=!W8xp!wmg=>?yTfExVH+bR-Tq9z9y=rXt z!4SmPmk068XI#)0NVeQHXk`D&&m21|7DzGRk(x_6b`3$Gj0xoYbB&63V+@xo*FwbP+lQ%f7VKnFc|YXf1{CW3m8 zoF=Cl{MlWpgK;@Z3FxQ(IpqEhwJ{=Owne4FVaAL$Q>GrLCz#@ieb5wbhhS%83msIS zf-xSfE}+q9GFAUT%S!i`5tBntH{9ZCniK&$F%QIHDfBOX1+d%4fa9wMeMi6^%2Pb2 z;Ws1(juM~FYSZzy_V$RWMyi;oXn#+jzdr}CYk9oSS9O2*0Ksh&l(gw3E_Nn1u#`k0 zwz^7OV`xq~SJxny11(pB7WK!0V#6;EEf8d(O=u{w$K^|ly!Ay7c?!-szfYb6^q|q+$IZmK9sS@^#G*IvC-FZ z^F44Hbp*SC%uorQfpu(*TJY5R*4Z1rxDEs@-{ohuwY6PJWY>&bs4FT2`H$4{_eZUY zwl)grzFDx{S_65H9wk3@a7&=Zh=_@Gr|_ICR`sQS4X=zTQz|3%48F;+ggunJ@1SLz zlS1-udAP0`QGME;V$U>f!JlFgEIBTKSeNQ1* z$sK(%q=O$cTXFNMW8(K#BoT}xiSCW>V?a~*4i%>%w^Nay-o$M$roQ#*JI0iVz?s*i zWT>ew#(=Hx3$xY1Zoud9`&-{Opd|TYGeGQh9o3;d!q)<|6BNY#)Ldrk6pBhFq%%d^ z+?*SeXm9Tjk#mS)xBp7cLA@keW(QH*2uIrFMSK}CCyDc9&h4qE5Guk_-`;qsZ5@{0myS>%+% zndWXbip~Z5#9wisMTx>cfyv7?O=|vp)yM#dvj_l_4Wj#Zr6T`Au&5p*fp;O${hxD> zTJ`R&So-|I{?-Uk*G%m!pEu9~`elRP`(HgL!-lk|3)9|N1kmO~=i{~(0eSw6`7;+w zC&=WvB-Yp0e|BOfCvQNtios!-ZV8jdfwB4W%j-aqyA+AAouxedy6!HZ_8$R*r*Mli zyB-Kdh27wuYX#pq`Fr1*{R&Rw9gNNjJ~YhwjGFI_-l2j&J`NYjoaxt%?>`aTfn}L5 zu9o+{C5O&5EDim5grUn@Wqa0dXqt#zFPi`MF)B){9rR1<7uQ8UUemo{@&4YwSN8IN z3um}waSU;0l=}~QXI*UGNRz{C8}_F*pEMd>?S(BCZ$Vb{m-D@$F?HKBaV)Mf!NyKH z!Uy?wNBnWmPQ0sluGFO1x8y};HMkE3l)+=_z>cDW1|Po$zV_$O;-rfM7P5rpu!uu5 zrbVM->7s4ct5>~WZ6Z(NDt1|{{zO)p-DZ~*KC=7w=WCNXdpro_c!}$bUj2Rkck}VF z%|BnJFA^-%$~SMqD*DE-7py=6;~YGmud?Q*h||S88%1CpEzUOb`3M-d%2Fn;Mq)dH zu^a^oFMM$sgJfW+GX?f@`&u9!OU8Q5qD9a}4~%byab~K>HWWMJ1cdLB!X|cY{Iaf? ztgwAYbn?Zbjnjq7>}$1%#;o^zd=7)%C#!2KGttAQIg_GQwlB&cNgCfyyyhqne&6=QCvvpjy;J<> zWZeLEcxkyZ$M#0;gt?k>?|{HeW^e$M%rRg3WiDCps(4R?V0fEz_wT5e9a|=9$66GJ zTq{qZ(2FGH76r5CxeZZwMeR9@lAofIV~Z#^jhe~TB?Cqe%Rf2U5-RLwQ{PU#-RiAS zvo*h7JMnQa)n(Q{N?(>6e?5tjqCeJ^Ur$L>;!iYSUuq%)%-TC3dV3egyxf3`gAT zZtRxrw}Jlfa2A-U5FOGk-`uU+Xs1|Ql-q1!OANK&n4Wgb^qM-;cUkc#9wg5%S65@w zUsBTTTg!jMTX9+4jJjoovE>8=#Yt+J^O$v&-Tl`Xe=p-~1Do%Y-u)t!u***-Qm#W@ zUG&4A!O~yhJvCfw26Vo%qU?PbrfIc3Yn9nn{l>DiRut`hIju5Xmq2Q36Mg2G4cPq` zPP+Zs$}8q4a5me&bJTcRN6)LEMBl7`6GfI{N@crTzFKMR4{l0b%W?oY-R=2oUz?*k zT#+Iv-eF=Ve|)m>E*%PO?LxU$0MZ@Lvb2`%h~AXcQWG~R?w>3 zw1rj$d->AbK;<=twa$D0G1I(@CU|S;QFaxR3w$5c3+c(XRM1V6uW?_tg>D>fed+Fx zD5eascF5b}<5&y%zoAtY!$M3cLjh8Xo-Cv#$fJ~Duw)EIJZE4jVBBIO0cV*vvq<(z?Dbvhz?%9f zz|9TsrxLLvTd&;>%uXP-Bcr3Kj3uI_y2cVTLT8%LTv>a_LA9wQi5eqI*uue_1m6n2 zu{PwYHW0Y)4?b`J@>7-=-5r1Lt3{K}mEKDEDoQJ_DkkQNH;9U&wCGo>CX_MQAz$nb zkrYR4fFKk5>g%C$h8elbW5c;Qwx(xUAs`Tiz%O%Lt6r%ldO-ZWGgN!zeXltB4OMna z)nVOsfS(%vAto=zFtmBVPZI`fih{$p6|-e6@4cRE8x)bIor##38abx=V!p(lrSkoQa4RvQ#r~aaV_6fQF=emTN`ui#JdFI b0ejo0_Bx-*v{>-gLx`ENl~Mg=kHr50?jWUj literal 0 HcmV?d00001 diff --git a/mkdocs/docs/img/sprite_download4.png b/mkdocs/docs/img/sprite_download4.png new file mode 100644 index 0000000000000000000000000000000000000000..db6e5186fec13f1b80e71d1f8247f61d91db306e GIT binary patch literal 23052 zcmV*MKx4m&P)gIE9nAOJ~3 zK~#9!?7exM9aWVF{=K)V-s?S`PSOd4EFH43#;`jeOMnqYa11ICU_f#C3B$M|GK>tU zz<`6wFhAEpVL(t+76*aB0S85apg;(F_5{e*owfV*dsTJs`TbFK>z%sq*6R)3=X}rioGUQWYtID&05G^o0sy=~Kt&SL6VOV6bn-q(LWty8NRc~7 z_P+HZqQ*BhfrubH0%?Mf8q(9yilAdc&`MAdAsVWGcWfVF#Y2SuJ1>Eb-bAh{5~s1C(CNcYgwK`x&3=vTt6%$%GpUR+dk~%k$OA+o zbKwb)7XQ1mO9CkZBmr6jQ0&JjHvQjI=pxGucuBWD^?uQt)G7Bl7A|E;mprR6V z+=lmD`)*9y|I-lY1t6efAg%#I0zoTZ<^a;kZy@=bzprCJ`b@`lvNQr!3=Q0epM3aA zsIUSrs6hrXq?aglX4O-rmcbkU0Qdwle;2tyMzKey)JCC8fM|jA+VS4&Pe$+FpM~@$ z0Q5(euSWnS^8N8ymm?e+1nCNZ5QCfIKSO06)p-07m03kHQEKu)%7<0~NVlM6Oeeah z9g9-?p3reBK(B6aWl$HW`OFB}eh^+c`As?zym>gJJJCA!otSm}kMP3JS3pZZNC{06 z5Q!xxodl2oplffGi6*yuBy@To8l*+0*eW6f?E+g@<=v2xlvyhuG6*0<8ExabFlNf} zD0j|y!{s~o#9OiSr!OY12mx*51f=(;Q)m^{-N?qEwW)IZPQTd%fdj0$7 zAl-_d89ng)snBs7z%)n3DVb@Qt!E-$Vw-2Hm-*B0tmDKXhYA8UU;P`Z@7G= z*M*+G8Gu*^AqWr>Qbg&8e;TFs%EplBWTh4g9ld%qfotQPCe|6yv4mev+ziq_bR2+G z8CtbMt5)njZx({KsSY@(ZZ){@xYaN}MyTd<`?$mfFQa$Pp&YwF~rPPoTU+$70zm?Gc@(xsYR6$gO z5E7)rRPo}}Wg~1|B^<7Y$Sbqg3ZM-_bYa@zQ_*+iLP&r7nAvOFK!seH@qq@Bcpa!BEbXwDP}~9?x8d#Q&&Q;Fk3rYu!%*&=pIgqjfNf#(zxncKgvW)P=aF?|tySueg-HM+=!upC-9qV@5heUc0l?y$RLDQiU6q0r#3-i-AW5|?d~V?rB+C<9Y=ifU`*O? z0XioghH}@NE#J);-tru@uFKc>(dIn-y2h_pQ6gWna+q=OYs*x=Su9|k$Bfy0UjL3j zT-}JlO;6xAU%L<6)@^~-!xkDt#R@&RN#^io9hacvGE~%t!#*|-`<`+gg0^W;Vcn-A zohXgpQXiBVAb%inwf-G*v-}45GHGr|h4sJ9dHH+hJ1R`h@3#~ShyTwNK*!1Vy;A)i zY?Od2L0nCaZhHi(4064KEmm^s4a6A-o`lSJH@vwWZM9I=4W0?Kt9-2V(E{ zUXN1yTk>_ucl)~U;kGlbf>t{r#4zGo&4roEobQ|rPC$AhQD~JQ8=w;QJoaq(Ez@!{ zn+GhmuV(vgMnL9xdj{Lq#t7D)?Ji-t9+>S*+TSyunQQB~{^NqexB_v7GRJjHJPrGQ za7JQr;X@$7?Nyis#_|Cf|9d_d&=K2p*#X2)9BT z-#Hpid}nA0kPod(AYDSKeOkk^!-dKJH#fy^#D0&TgTqOrf$BD3a1#(!^1?FLR$=}- zKr;K|YVw)6j(KqQl4(O*<*o!hXm*S_7sqz<90RS)KyEH{Pq-XdV9!Prt1prDQ6@yd=BGItqaA685 zGC|aK)NjNEDe>>tZ;TSSu8GBot92ohBN*q$+KON=buMeb$7FuT)%vn-mQM2kgalMn zLpbOGNa79^$B>?ahD?2F0kkKPPe*UE0ty5mDnp1C=tzx9zT6UrgL(PdOS$=*D_jPL z{>8~ROyye!L^U-Qo{f0;IkTNu5($t@EDQx89iVe!JJvt*Ym_@zLPrvWl$1mQS_w$W z5d;Atl3*$vOn$eFDb8rQqH}<PLR^CBNaLZynv^!^AFq$QuIUA z0~;Ze;LnA02$38^DnV0%79g$>RG8q+rFI2S1v@uYKvV{)5JLLUDuze{!+hN0r73P7 zL}hgC)`|7cJc5?4)rcbr(mp^ULjmluKqh0U?MU7$ch->{eqTJG<;VhQw2hYQpJ>HeDztA|Vi&CuNAW$0w+Q++CU&)|dQM~k% z<%vnN0sC|UMV?l{G0{YoVT`&sU>s_v0a?NNzd}EOlJ}JDcgE zQK+Rh=9i@wgX5sWV8A+$ZAve-h_Zf+ndK^-oY`{5A-L2+7~hgd{*?hh%KAezkaG6W zQ@mM7c227QHa{jBH{)3-6 z^UvTMGhiN(nj5rdAh!^2%Ev>bd6)!0HW!YTQ(#{&)1A~h5l?Nk6-q$bjb5T&=|yNmKA-IDU}B_Jigp5G*+ zERB%xi%ve7YJ#$5{mBGeY zi*R0kF54oi+Hku1$%YCOtpvQ1sp^r4D#LHdcNgUwSE~ulfJ91I6O9TT4GZ{XQOBp# zGAgZWK-`fH-UJG+S)U^n$wn~0=y)4uvV$VNyxHe8!3!5G9|M0_(r-qHX-p^EoCud9 zIAJYS10XU8F8Q8-^hNqz0htDXZ^?Jp<*TAv8W@a%Z3ih*8e#GUQXynHf00s0v7lW* zJ&$l*18)Z9*$==Lq_F}Vvn*nvxdi939Cqs1Wt}+cU}LH}uBGl9LCCzzQHReG^3VT9 zzC20x|0nq>Rm)X6IWvVOXsM6TRDo0*ZlW=ZzQ(fn3Hp81yD@R&_ zN3t?JC{n7HA_t=Ri2%r;Rw!Sh^tp+AmNz?|Y=#>9UOFai?von7&wp#xs3Wgez9w&w zd&UgI7A$W;wCnOEU2R72Z&ITs08p4MHO8-Mw%pl1ovj>vnR5jiJgPUdnER4;N|vX3 z?J}bxQoC!S6a_9W4ArEJjh}0Rrc#Gdj!~A*2m+|)l&?ufWRRPvLs}!rmn#$`V`z$c zWs8CAa@aPd$rwLPyik2IPzt}9zs=`1)z&57jKXUrX9WH?@=ZDEvfheegrLwA4OHR# zjn>tk#YO+S;F={LpLsL(RI8Xxx19vw%i7%wA}`SUD|{kW9^y+NiM6* z@l~5b$3&=Y;drGf$`( zbDc}5kj0SmQolJWey(0nx7gez%E^?ypEEw931g2&lCMtWn^v~yxPd-OjJ)o$&|yw- zT{mZ2L7Ti{D}1w>IFstMj}6-DN@^=4|BiXzE4h{0W!3GVqsZU4t&zD-jxiQRP_=5k zLI7xNwIEIV1%g(M^37$>((GO4gW6Rztg1w7mYuVdDpOweqnUzvrL^+IYm|ZX6MjYR ze5uk)k4+}aFV5TxWjzOK`7%$LfeAa6Pa7{oK>PIft5yG*oirEP6G zqs!HBDY|O{@rnyB2cD`XQiz6YKzrF2MWRV6tR?61>Mj8<56hI2FX7BW6RU^x zmSmiLeN@|!iTndP^1tEUz(dm**4qD1>M=Mi_3>L*W>&Ep2ac= z2p*+#qKplL6`*DfpSw^46#`);iN3itw^PT7LWcFAFf|U+&nq^?yje{x3%E#G6GrD< z4`<gu%h$LGaW#41UI3^# zZ4hPdUF6-aEc;6k7cS$_pfwRh!+mK|d5g~{W={SLH#YxEj}-~9_63^JQ6ciMoZaCm zlb@>Jdv*@5UV-nDZ{6VxC|KhEhl9xljjK>ZF5zgz zXgdJ8{9HTP(k#2>vcQ}^=|W%l%al2BW#%bi$tRAAP#X*zaOY+?KU1pZ|1b-CjxS^W z*Vp3dR}+8p;ZIBfAaK-oHUJ>Z-z~tCpPY^r{UK(4^_A4+*fY8P&o57CBz>$9AosmpmBvNOa!e~*}!KxoOau0(HLPG3B3EYADc2J~LG9AAHU04ML$i65N0TV_d*o!N$) zPoIPhI~8VLy&CVjVFNyPXjjAiA3v)V_nkW#o&fg#mo=FC)isbp;J$MwWPoSbn!wKm8 zK8o;an@hkE7=F&U3Jh+FFtn*U>PgDw3ylX}s?-%40`J2W>qJc-Z!VgPHmYpmO@aN#|h;CT{XJhmsppC5Q>Fdcs@pcURVr!^b@yXLe4LgMhg zq;u*6FIUp(Cvd>zgrGH0(FnD`h8-bpduj)c`qnyIzA*+j52uYK^)6Uw1KX(@{1%-p zuP#4}CpNrNU4cy%Z%cQc-|JH3WcC6?Rv2q~9@v~Yog^ps4hgJ-ukwTgO z|EbpuOU}p6FtO$>ftLh{MA(0~M3I(mtR?RWfu$SS>gD->K&EidyfT>iz0Yh&R^|iu zD5pIh2@fqFO2^*{>3O;Fzi01+i*fk0bo|SQ08$V-!jjbJ{^tCt82rkfc=A)z@b#0% zW4BH(FW;W&Au9ca217QF!T}J~0E(UP8P61Q7eo5>vK?2u0^7nDDJ=JE?UcDqWU}1} zet!G!X8aA;3Snmkn>BgyN)f^#m31f;9yRo>)@h8ASkwrh7&=Ka?+67Jzf!~0u_a9J z@$t^tZTQ)t5?8Jv{eQmhVMn1T^(?F#hMLCe|h7=t?5`{JS9c4V2kzX3BHf+n+ zEZY|zJr?8Ief;d19k_exHuSXn`1~>BQvb7pTr)+Ea5StoRyL2t3`8y2+JAVZmU|`-EK6L8&mC#TpD1)syN}Nw)tz%U%ro9t z0nWW+3#MJQ8eNyXjMHv@1pqLAa!aFpfjX|wdr9XMTE~r@=|Kb?RUy5&fh)n6>>@^; zE9kP1FY<9+U4oqLV0~Bje|G|#F$ux}A7sTZ9iC)K&MNv%*r!54i!umW@zp`u~*=CVur-PaKCS zT>?|a__*fX<1D!mzVzQ)pp?cnC+&tQ-2wLKF5{XL$3ZELFF&{y5HbmUN#lVRcLD%D zd~he$Z4I$_T?LQ75~kzttShuIyO&7ZckUFtckgzLZ}%`95w@pJ#iMJgjq+{sl7%>_ zNqtAd)nr*jzAB=KOI%dZ;8Ls{PXQLWMYu`$MDDb7L*;akx7^OEXpzsL@oQ++$UnB( zcdo`cnk`4d;X@}`Ee4C#Dw%xK#Kq!uA+}axlzfRhmJR~aM-8pG?AGby#S5q5=VwpE_ZIKSE57vc%a@1nt{Ywf5%A(g zGw{+y(;)!7`-WHX%a?WnI!@v=A+dN}tv>!|hm!8D5RKz!IlM1Fv>j(0I0nysb{Yn+ zn1%1WzZW+=z71#I`kG6=I-?f2O%McjpP#n=5fcxg>xRf@$XE9xW6{bF7*!AUR~&C%jPn9LXF5_82yV@v0xD^#4%67q?> zZSCFo$+?%o^LM~24MKWV_^lD*nnE2I$*GX3+Xq#9cH#No| z`Sfo+i6Q+wzd0k?GsYfSq#?oX1-4koF=2%p`g8^8BVqjwZFPm^-dKDOmwazO=MoTY z1*siSQI&Tg^+&{^Rak`#qTEf|#gffX2s;>bRsyrsE?1VE&~1N_eU<|Mxk``hfqND? zhtV8Wlzz_HXS_lNjl>{Rx#21}NDe55?1FX^2x@o|smoD?SIWy*hk%YO*U6BOL|w^p z$tBjJE2|8#I3KCP3hAT8PY6nyfNJcbRj*XqwF_sn@pUbtfmyKc}J7_Azgv1WX2 zUU1MLUqLCmYL`;qN$NJPMmeP3Ebn$-2fMVYh|T6ovkK(Xt0saLV8j}$S_6!Cn>8e7 z{n;erMUZ6;jH@9Xt3eL#<9uE(Pj7S;l_vKd*LN0haAfL}IMIlx7Nz?Jy5$Q_!dH3A zmVGUyerEmskhjjDbU)POeIi0NrnD&CTA_pm|H-6ZPUA``Qg)xiz?iYQ`fhM;2rpO= z7QHkhfr&=>g2=rKagjnxkDycEyEYhtRP5wB6qRJTthtQ~X8|SEbNWP#hhQj-9`j`x zFCi~|*4j!ijzRjcCAaRZ0wm}A3*?sktU^TR$6)hExbh`vPqog2p^=e}a7PGIYE<&& zt4nT62$7MC@>ZEGwK={qfpY$|t~Y@Mb5$b9E33hzNg&zV)MM5>X0BJdNNd#hP%+o!bkrgkle;Xu&mQXd zcin4{dAe(6v8f*5CoWbh-?TuM0A3(lY@zuQS>0X(QJk|%cAF|55Ecb_Ih>c*o~6gA z*lM>b>#RK+iJ-Uo8O#08#vnM z8V zOA183kkfCe`^EO^ZOHFUac?iLl-Y(j_EJlEA2oAfG_#6~EYGiNTGl4qUyZO5*Ar7k zV5wKfqQjvGI@UQ~*zD7nO;TAO2TXrxrmU1tf;dqcUW=GhK(hRGi=LV)r|0=lS&R}` zD~D)AO>%vM6OOUsMlzY%5^|r&Yd=4(S+#DC}J^pX~T zv9i*~wm`K8q|hK$YpTrkdnO1E*tx!jcsPP?w^lh5xC+p}%tnNn9*r2lg&E6XV~C_M9_IVYN9;5x3m zQGkX{>+cHocrU0a^azNPpeCv+gu{fOQg=7(lBc42Jf8Z_f~d#=$B3kGzz~~s%ytun zGf=3Sa zZ~!~1s=?B?_!?h7X(CS9uM=fo;J=qw@aelZVa+zhEFF8WQUDMdgt%#9g{zqmgm6e9 zs_Cq`x3Tu_S&;hr3iW;LSxHrPYRQ_wq!y4XjpSXKY`B#Lj|RUNU}Sx(^FCKXDXstg z`f*w@)T$_sLLNic%HBLFarBdbJLSa4U3EEz6iSb~oK64$AOJ~3K~$|_7RB43yWLpO zl`%Adq1h^;p;?SoOJ}lt9BIf}x_ano0e=1eOv9A1$VTTnd^hdpM*Jl%x9nHbQ9Pb1z!112Se@o}6SQScH`|DJ+mpUaz}A zHhr}R)TpLrGQhZeULFM@Le@^0QxdcWymVE`;u&Zt6*V&$BbQ)75LB!iN~-uyNj%_+ zDv9sgw6&{apAH$+_w|s0wRM@w`FWpTFb-4323Wl%#M!rR!XMUEanO`j+;nQMrOXoN z9^8R54(P(Vtsy@1pPTTjs)a6K#mkVD`csZ#Hl2oIN#r)k%X*EhEsYcnBTz>aX4_BPfgJWm5qqoCLsz8=k z@U6$TdF}Yo8IzLuHBwl!HOB4F4C2cVZbhW> z6JjW`EH*=F4xiS7e>!d~4xQQx0`TmH8m@S73+{VqIE`13jivng7jX4^CSlS1PK;~w z;D2@rAj|27QYvl+he{`b#$L8{6sG}Vx)D&^$b{qDeT~#cfNz9j9;DgGM+Rwfp_cpr zfZv*f|4!b!0|0QrUH$m=${L13!o#a0eDdf04a5J)p~;3tr{D4le!64_1_mQkmB3@` zBAj;1YXE?S`*x-)ZC#-+xN8%By>bXcwHObt7{Vv-Tn_*^X|MLi_&}T5KPc1&tC^!) z*uua@N~1X0IHv~0HApX&tLNy4!X{l(jj}$eM)~ysPTr>rKR9*wOx8a1e_a5Jl zFCI4zmmSw*Ata4TY%KgyGg|Shk55L)mze*JwU~a@YOENDaPLR&xrBgcz7$m@~(+EY+7q( z=?|Kcpiu)uF+xq)FeGr>k_v8ndM8U<*^?!2W&h#+ExF5o#N>bwqI@?TpT zm05^}^!eq-k44~1{N0_Kv2ts2U!S{YGye9VF}UQ|9^Cwg?K#`}KP!f?WPQ|V{6vkePoXylTS^MRxb1q8*Ox&Y+q9&9ZSWh)IZiN1b$8YTO{5PaFZ8Z6n0U z^R?ugiO8&tan+1ZXK!<6ASIawXF5vK*>SCrZC4PH*L`hK%kOwa5IRb$q>3%{)2~(2 z!v%&?=SU>2wxf0ADA774f%}KlA zxLNI()afDc>&H$X>le2}<_AJusZsgO}VG2!P4N;d9n@6((oXfqn7$Dlu0L5|KP5fTfXDEX=_G4`71 z;T_Y|JOI+Uzktp5S*uaCKMjS@Yo*Y3dfyFa9>Z6fwY8i)OAdmqfSpAc&(}vn(0i@S( z*rFf4hA+KyJdT~!hN)vqn7>CW=I_yp1NUgbk>7rm!(k?RXGtuDo6pz-C(P}{l@AW! zYro!xfnh>PXwB-a7swvCYAg1=Z63Q*S~Kfj_hkedmwOBUkk1~ zd3-)GA_TtqXi}2@^AGKga}ODVX=4MlctFXMm^HqHj~v#G-=4omLrK#03wOhjdv>Bd z0FIj3h3igA&i$XyZBGVdO3X5%lb@t!qv3p`#%4Hl{Z-39d_p8d>c={;+Z=+R^GFf< z@Rc2e58u8K)2>*FF_*lA({Fhd05E@YnaNqyJ8GHkO*-M0|JAgldC-*)4q#w7Ip(lw zt;MX$hX0e5cJaDIBuDMp&PYh24)qCVJ+l+jdP)rC0B-!lPRyU&g1S8I6qyeYXu)@_Y&^{=+1 zu8rMwC=IH0q%itMjR{dbu12{nA9x4Y(D}K9r7~6(5whbpr z|LB@3bL#}w?^KvPMq=*7GG5#iX6IeJu8QMkx8tLSj=^^x-GY55wc>m4@6FFA<^m-P zthR7vfq(q3t$1i!E3SFZ1gzV>9#6em!=w%m$IR@&M-Lr?cYJfLW!QD9nv^$mS`kzf zr^UtEwQLnz=xn=NB9M;i7D?KKoW^=SyN!UU9C2;dm{prtoLkO*gGMb<8Ifhnu*Q-9 z?;70w+)fPE6b6Q3eD|^KIO|6n^B{lPFZq){@5iy#_-k(d&1h;lcO=p z3mfE{3plMMny*6AbJP(g@yS>IdjMw~(1qtdH$ANanUq$6tY?!@`TQiKquV0NNuT}Aj z4^PI>6*KYZ1yk|Yv)l0x_w_d>cp z=U5(jae=B6=W|;^P(i;pQRh7**ItMJ{?do=%J0|0EA50ARN)07AQXgDsBY8PvWn0; z)`J(cKzJPxVk|!J{m){;++z7kKOYdgs%m%uU6hFfaum|k#5QIevul!rCCND(c1Kr6 z^2*T+U{%ua)yGjSWK$Q6u_OHo9{kGRV$JVXLaQCnaRrruusA3*?Z8O0ZkePJSHqM* zBXc*H6S#I$NI4qfsz>Av8MVFj0psQ7mLp_5heXKe|&TQahE#`b}h3ul*C z@hyyV^_-mK&r;>N(|Hy*o0Y&BDyU@fkaFluMvs{u=!^-+x1+5L z(jInJEh??7%P~_PG!`poY&^yl;c4+~&u+eNxU8>s($UFOa>$l< zA|(a}h`I6w8PB0NbIQ8EK6a#7e;Y;$4pTt@-$E}kuMl^RgDI30qyfds}wZ)obnx#7ZML^2o8 zbRW%-F2-rjCKwYar{LNpcO2y~+iwV)#+yh2)$K{&6O!bLy?sY8Na+%xGt*cbtUyJS z+hWU3u<&J&2Y)7dC!M6t&0Z-+fW~5*_Pm+W2Xq$tUeHR2hT%wDDG2iNkKwBr=_JYIwdc9v#{EgD+{g3 zS}vsW^%^!4Re(rtUI;=Q>0*VBh6rdk4@Rftgcv}CxVjP2k0E3^$DA|1vE77*3yG8Z z%UzATSlQ>w1tme=!a^lOZm6`dIJ(B~W)#O$_Uph^Hq*&)+pH{XYg$gDAfPj*oz3GI+(c*_ry+%g5D`?k z;g2p~1CM6s^7H$p2rn@d!U{y~YNRY?@Eh|bLRhJ3$CeoxX`k|$S6=m#xi&iK1MM=5 z3H5ku(m9C=q0vSQ?FBm7a9wB!sUT&9rFT4riF-eVQpbGAH2-NgZFG_TZQt51ZMV1c zHO(Deos&2*pQL~efi0_mjtQyzl4Ti1wj9c=IFp{+7>eOA#<&)UYNRn3hKO~FyA#o? z+bLZG2(=i{inT#VmPg7r*I~5EWK|f#mSuMXB1{c|hL9RsxdK8;t5#wN5#pJjK86Xi zpGLXsZIFJl;k~o@xNPj6@9J@4L!)NgW=oA+#Sj|XOo;SZ^e+cGCIaPlk>~M{fT}Y2 zrgGAZ-(prR6>>y~a+N+pQZ9iez5E0i1Yl&BaewXz3L zOMEsj;Fd|GJYrz=Z-CwB3$#y2HiSpPt5HcRr72bwIK((aZ zn$pd1XkgC;0$W!J1nmM|AVAtfIOKy=0If?=FAU}g4gy5j$=+fwK)XVHuE`5;( zT)E$Y6o}%Oa(kK=?ak=oTNouaP9J62fW+?*GJ=pb2oXX0VbVOt)9`{sp*wmtRy zIXik2++WB5L?sAWh7jfCZ{dw@gPK2uoc*S9&OUDpfJNqdHKWqnV1JSkj37h^(jf@d z!<@nvtKnf{J637lhM*AY> zbQ5V168Q)(MqG`Z?8gRR)wa?qk{~5Pl>WqC>SoHc6;FK$k~x^{^(ncKgg@t(-3^H- zSq=q~Aeyv-7pR;v13*V_qWpCWjhv-ck$@K%i!EQb{QfV#62Ds~h>=QHM8IX8-9h2l zKEl>j0NRrPF^UP&*N~ooRtlsI#31Vl{EQbM8j~8YXIXoliruWK%O732Q~yv(4Xp`Y zkoeTe$~GEhu3PHlxD>|IfQp2*>b!9ZF8x1mBVroKkU}Oa2Pp|7B>dsXq4Le6CHu{K zZt*4!qqJ4OSU7(jO3i=# z#OKTY7mf+=dIgMhc`-j`x#0>=e*$HuQGE7#$HQgkvELpuAb*rdgUON0mkNx)F@z8Zf*^|*MOx+~)7_O!$&Zv0L9Isa)oQhR-!Fdg zi!0+et~eB%{sgWExqv*lQ?c|q6vImuxb4_uk3D((`0<|wFaf|HRlXvD=Bm|d^}c)W zz4!7cDOT5(+xO>$w$STUXj_@3Lz!DzT3R~Cj2Uxm5CnS~0cx!?WXtos6xpl)Z-j3I zZ$9@tFRiK}NGUVn6H#ZmTz+R?U*C_GEnBw2^5gZJ7|av}pxSapxDP6J;Xw%IvIPLj zt*xzHU0q$rmrA7>Z-RVpefi~=Uo2R0{uD3WMAl7Z>hFTz?6ak?wY$5!M+h<12-yg- zR4Qfv+o&FgbB(Z#kX^^552LfYySop-`gAN|YN3@QIDE5|njG$tWj2SZnz&g|^jnSV;aA?NBK>;)yn?Hh(MS4t5nE|x+_V&&S zf?#SRge)6q!2Ew9MD}BZN<_}%(%S0n6S(mOjb$MsZ_Jo6e?52Z+(*~1U%y%i zp`?@%0G;}Qlpag$=^@iN>2+C}` z8Lg>&yCzSbJT;Z?_%}no-QC^C&z(E>w;MKWSS6*bG5O|X;F6vB%ys>dQ_pYx&6Qc4?K9ngb54U+S(2)l}c}_ ze4{AZT&-50W8WcTB;G{%+A)GK^8Nkqe}CW3ojbPx7-leIgOwHzi@_~(DRnd2=Fgu$ zZ_b=K*ZICbO(~TLkBAV*F@hlA2t|W+jG`zDNX*Te^YEq8+>n$q6Po#sloD|q=Yk@0 zo+yeC1VMH^^IdZ+_eE{N$8ikb_p^K8R*wta9P4>r?wnFe3=R%{_nv$1xjGeimHGO* z06j3)xg}NR182^hd3_KBZ^_p!-|yXX&plTG*vXVyp-Ep3JI(5`V6Zf8+O)Gh&zr_) zHou_ltJ!`VkIHSB$@UA)Lv&dheoi9*J|_Oa%#S@TZq3Z+T5Bk!ppa zTU*;{bLPyMks5)b462>Z&fK{vQ>L8b`~F+4Jyfg`rvnKq3n+IZBGmg`m@$!dbFPHp2<2cS9Xb=S117Oy)7KWk6;tm5X8fCT% z?s6GHt+g7Be7PmA)oKm$Em9WS2mT9`ZmUeL zYON&zkBGc9J18tmWwa92Y86C;mX?;BB3l_j7d}e-A=ooMiX!-ap7v-%Rd#U1g=(x7 zABVj(+d-Il;8IHReIH6GisP6(&(lE=C;*j8rBX=+9;UL59ahA4=gRHv?VX!9Z+@g) zF0WQfNh0zAh$0lg+~@G{uvw0q;?WJ@ZmMiXX7+uj^35rpEl>Wx9SGXb)SghTmyxgW zpOsSJ@+G25rBWGAV~#5Gp;b{L#bZAv80?%iYu4N?>q z`Hm>xwF3hKH!fbh`1VSr(hp#WAxgx_HZJl}3xM*`M<0E4XJ_Z}LI`0;uN;PP%|Iap zN~KcH!f^rcNZMZ7Y-ouGI3`fBT^ZMsX95$Y6e^WU&fnvLwvsu8**GJEIF1p=an23! zJg>W>qvP$Jot?|qu3fv*0o@6v&@BKujz0S6YdShQ-jeU=j=5>hDC6U0&|1S- zT-J}nWsX#Q_L=RwvTL>?Pt12Eusi_15Tf*!e0NR0MATxzqNCzNO8_Da!>W|B!`w_5 zhKQmF<#IWP7aAPNmo$jTQ56Lxe=Z!F6O#lH)wI?fW+^byFTWw1JYz#gMX(KD zUX4)rO>xU*gZTVh=H_kR&!$qkI2|r{${sggUTyTlr4U3^Yhsys%z{66`EpCnEwwp^ zQT%4iKNridC=oLn`D(3eMO^J7mxCDm7IObdN{Mp0+;AaVn0%SBp)%XwE8;5HI4Z{_ z%`wIb*d%s40OF`Y5t+wcer+YN5eSY7@Ow!G&M{|E#QFVG$=8Kh^K>@%vyCiVkf$v- zTfVMw^HjIXujcNNOTMYWpdz=z#p}rUyHd)kJ#$}9FIb9RBnS6^_h&CeA zOuj|Ngd2+y~1U@IH9%CRdSR_kCRU4dlE$2m9DchR4EU>*Cxs;4A|8}5b zTX~ZN=dBwyDGR^%!*<3`)7jE#_Ri^~<9 z%fPnOHsg+;XQX>^WqJ9EK7xFQcco$_S2duNs`2RBHRJ4?E&|SNe9{K@xx(b7Fq!LhA8MS+_=>CLmSF=*S3v8}61p^N(a6_c;fmZ7SKXnFl5m)g8^0;y)D zN+!TGf-aRx+40=>WmY?6K_Ny`Yy$u~0KHs3dy(xL)jA_`Oa;`R8@q?8Kzzzs&&+S;1)CHb;%6w(A` zd?~bD5Em%#!iw$c2N!9-C=*r$xw&fJ?A65e0Bj2sh9Nx9LrY5wwALz)<0^8yg%-nh zM!p<_m`{pK2Wm3L8^ z+sJ+d`4-~PXCYh=|$4@LsY(Jh3&?D~h{8^!YkDi4rvX6Pn`7`I+t=ZEg2&9ttGN5mQ59UG-j= za@$U_1;1%a?Vz2y#ZZ%aPd;!09p4qB60_Gi7u!xI4Zbh)=@)T5vJx*Ru3PwytC>_e z(nc}hR-mmEV0qs8ns{W){~yS2(r~8UOF-!`V-x}bzA2JEbT4uyU@=6}afl7b-WBu) zs`KJxF)2?I2^MbaGA9WC#a54jB$Nwk0#?m}1hrB3SvGgf!t^cgfPASyifNpLDYXB! zs`t4EGRcGdS?yxK*t@4it0EB8Aa&51k+Lf&T` z0s-n@!fAwSDB}*fo0}U10n`vN1QoQNK^BpWmC5oR5UO8(!?{kU$ymIhg%$Imr1Osu-8PI7Nqj0hZ z6!2wg>$cD{EV6(2Z5TM6;lZb>DkdPe+kqe&pO_%#?*Iox5X}#wD$Hu%CY|Lj&kwzh z#?>+8$~dBtyIZ0bfy=LPoxFI2T0HCQ0WI(~ri8`ZGGla|5=?UR3%EtDs#S;IP5qyQ z$i3UW)|>g=iw9{Xh@}i^VQ(y|J}U4?G%PR%N9+ar=fyr1`vnF?N=nLr`-VLdti9&| zH|}RP(}=BW#=2#=E*{s`t)+(azq>IK7}4rpzf4wa*~ki{Eny%aHbBYE-U(Vf`$Z1H zEz^g^0AptZ2HF(S|MDv@u8n*th{wI<)RBgU#%61(q<-y#IrxPYW|7W4+Urr=zLFLq{OdF3L8B{f;99QtPouS0e4dJrllou@g|gT{-A_+Vj%wGmqGSEjdhj z@kAQ(GIhf6e*a)-{g>21hw9o|R(_sgPaWg03Rm%){LPK!FB*k3bsb0pB;z8qeRjOj zZ-K>@bF-W*7I9wxMalOD4M)N(Q6=t<)OPH|L~;fhJ~E9v7o7jgKSh_yy zb{cnkYTf4R;hoMkW^RvGJo1OGefh}1@6VVRNJ@I5o-o+1%c%Wb(mi^ZRm7Xz!YD^I zNbSa@|J5!>96AC9C3)*aacW;{7w7vxU6)x%YO(Qpu#OA2*QrsFVX;XCF{U?3T}Oxh zcrh$Y0guC&GzCf$j*pIxY!>RhOe&s}g8SNX65Z<){r<$4oy;6{4)@=4EsN{y?3~5* zCU8av9j-6O-a_^DEY8noCbXKBI8Ha<|84CTSzk~AZ}JF$AKAF= z_%W`mRE{NneH1l*Qaq(mXe(fS2k(RmB7=BNJh}B5q)Y1s)?*(Onh3|Ya^T+UMXU+E ze^FTT%zX}#dkzDs+wdO+IvxltfF@E%`dvTg_0Xp%1$IdJ^X@npo67C)M{O4BRs;+=Kr63$Foit&P5y<#NN*KpWUD_RH&L7 zcdNwDP=kFwjnX34f>{%KGDF%1Zno0l_wH_Q1Sz#Qm2KJGMh?%#kdvRL4;+DG z_`SK={zWaGgZbYr>?0Q#*?uhwqlTKHxhB*=KZuO})~@e0(qEzP!4a@8^FfKela>hl9*5tu59ov5;E)}O_^MZ~DM zZE5Kma8Kod`zu3%qE=08gix21lZyf6B6ucn~gbU+}JwQx&XYAo)P1#F|-6UsZA zHtvhoImV01K+%G?Y>Zej%Agb}>N3PRDmBRH`HN20x0D!R(1L`_d>p8!ntJWqrDgla z`{BYIyLG?SJIkX-nF?JbO^&6AcpA)udlrHZQ+Bt$*t0U`$64@awyvb#pJYtV&7|)%@MGlP6ZnY<{o?CEsS%5Qr9`}@gEW|>^w=zCZ*H_~0H4snnbT|UbUyHK5q0ekT=g#4=)lK38 zz3~5P7}1ZkqHkY>9M)8WNnGe>xrTP0xA@Rum5pcp25c1>Gcz#Q#EbEB5i zb3WXK@zu(J#9Rep%Bo-(Ei3mM+tqg-jB7Q5?RID1hK*FA&bEuW*bY!yT99E? zYw`^?#E6B&zo0uFwCHu8Dy;!vOVL=q??^&tS!m!4EDga+88!;zC@xOORwa< z<%ex7jm0W2K7-OAZmIN33lA(J>}J}rxBjd8;V<& z1DGli^3OL_t{z?ae<5eP=kBF{Y!j@2UcB! z=FDd+4jJB|#*P{+69wZ87_RUItXrDz&IL*sKhox}%(R87IQn-WFkq{%Tm8|5rex@O zAmqM_i%6m}J`lk0T912aF#dD{6C;q=7XrYlae4KXtbXi|@;ttKlmB~OO!wn?8K(Ch_T-uiq<1fleq=XVvA9Y6asH0c zFktDH&ys&BekLGQDEhThM&vywrSod6VUfrrOr)Am@MZ9cfK~P&%alQ>)Yj#a25bGq zdadVBvRFa?8r-H?jk)I%&B=j>hFyVgv{@lj+gI4v<@| z#!SuHns-EggKOxcGcnN5b@UY+ffV|jgbGS?Vw_2sfwAZ^oq<-3(s&$n{n`ry*$A*0l$Kr zR2P;XWDhA4H2!Yb<(Z_$rMNu~XhV@02wf}GI11c}Y%)y4Sx_wP2q zyKfVdr|!D1s}4FUkjRwJEF^Dk^<;Qvx%HX`!Q@6!C2Ef&Ea$pP`oqb2C?NIwJ)HaC zi3Mm>n{X@fxTH&564^43&yXYpu~phgmjq_sF%~Iqofvj89YzY_@*Pv5Rl)(*3u)Tx zpr+_#7*#ysa&Ox2S*WqxR_31oHgCv}T?nlwXblzb{4n0}n%b+AR(1*aHo2YV6DzL3 zrEs)!YU^2JYp1R{_iIj%CPrTTUbNSKHxyG?fEs{%smk0g)7MdyHm4mL{jX4efAsQv z6gL*F6m>XqkDgxfhylp)7=3N9riR*0^jzs|J5p1;nYm?`U|l*$p%#b)PT7@}c=4X) zXkOHf_d{FRyYO2)@I4jyQ7-TBxi^!nx~_qW3U#C|nu{#4Qc{KUN%=T6v^BERON~~` z^?Z($9CW^jAy)AH?uCs}BCiHE%Y7BthFzKV#L{ zOd#vvWKrq=;GxT;diM9{ZYUH~W5ZLg5?qJ2;$H(0XCh~)LgHDp&#e`?o~aV^{XT)D zr4)4@ubZ_NzC=|V=@`&&w8{b|PHwxr^LG3k4aaqU_!0>(V)u;T<|?1R&U9~}RObo0 zP8a~|%YW18T?OE^0{>1LTY457X>vFF)tM(w@5SYiam2LPeBVk<#^3+mR zase#`Cn~?M$XRHa;Qq=V>;~0$--GXr4(1U*1kYd4esZ+?BXipCm-Hwj!H@jlo?(V? z@}PLDf-$K#m}G)W(vG-VwzK=^Pe+CmD)!=do*p#M(ElArQ8H0|)(b9nd~O+xt)V4O zm-Bkw8B$__zP8u z0wo^-+q8h@1=oP0;0OI$?KEpCVIDsl{D5Y;sdRZdY6n!+>!=WO`4eD96#o9FKbt;` zmUWvmh=~-*GhkMmluJz8FHpCW0FFdrNG2*kk{BEHI^NvYsKX=lFjH`@oF1-A8TF#o zw12ZfcWb7?F8WUW>IwTBKF|v0*fAGU6QWBP|2L*)6Y}DuL*jzVqEUuj29;e=b?@42 zO3i1UIqo3WsoD2BN|X?}v+b&iX{+GR2Ew*w&BqjrdE1# zTJ+P8Qk1>i0bHf^D{*9bctQpM?FKZP9njj{fE!34fd~D!vR_jaav5;-vg8?jPBQ+q zB%K-e$2s)(09%6`yW`0>JU$eDHow?INDKf%oYR&8+z^|&>X+JrpkhlyPwym{=w-dr zumuCsokXLsxOObB3oa{;pFLO`B!Yk@cmRpYupiG4!?q)YExx{zLpqUa?Hc(2Vq%y4a-1{86h!-R9dzaVS?szEX zgSmTuxg9K(kgf4h@t*x&-lW+or`)0yJx4Sox#MVVMF=j7z5rv)*eAz9tc0$Vq$DIqx}Wo2P?m6ZpN z{7L#%eYkCBoj+Wyn|r4&gzvr8DSd4D{a?x#Q|C`Ey^XA8EHtySAu_J8(ErdE$C#YRop zcT=eHoD|^3D~TT|(nD}n^VrZIyC-3<1AYkWFfQ8b#ZVz-yOIT-4VCOC=6&|2x?Vu5 zksxO^{Fs*ft659cTcfRqp?%x7abxItunr zGyAZ5T?#e2PcXV;DNkL5^~k=_LOtC}*Tt@f84K*%o05&7_l}NIeYUSs4qX}Hi{gJR z&a0Hx5I#pKaL6Kr?_@d@X7ov5ELgiE)bP!R?&I+KOsAfyq_lwWsla*E^Q>zfvhWvn zH_FAkdnfI981J}UjXJcHM_m6gfN>g#dbxR58NW%LDtV_Bd89*}C#sodr>E95o(>nf zotz(5z%|M@>O&=IrNO7>;->pr`~>yutitO@^TPd}{k48WD(ewMDt)cJzH_{?^eb^l z+Y70W!o$wNwtDpM6eJ;S8$niu!Fq;k;G>SrYWu5Ms;EB3@JFnd@}@a=undd`3em5c zl~VdQf|QiLREuf8^A$)^(YDceEqE|EHis2T$w-Z8=K3!I8Sg3)m*EsE{R-l#rmGOD z1$S^2-d{r@f8``;?ni4;-;?ffFf_hhV|u@R`VQPl7?-;@#)tRbY`&Z~+j=WW%hQ;b zC~-iRMeg!8{PgrRc1kA5@Ib~}lJ+vUzF_Qnn_lPFiT5v@pUxpl6eiFNZ?z~)l$iIazU_p#k@$rbu(Y7C#RiaFP(RQHal&WC%SW4M{>(wgQny)Cmkft zs_eu+DCo3l`~}>$q~=%fdk*pU9FcmMhXhj+{FMR*dQ%ouK)d=&ME)7hUXpeRO?_cTS($bEO4&nl5{-nCKPA28ZSbBt7 z7Y*dLotC3OgFzQL0ujT**i0cHm?7{WjDqhxG&B@OgcLsP^UTLovC)v#ot&IRml$nr zF?1;Kb3Bx4(iE2E-7AmOc{^CH7599k1E~;(+TgM_WT+yxu5r#Ax6`Lq#cbqpkudJp z#mC3<5?zblR9P*&EA{g*-XL`y@@b^htVvW0<|xaudyxZYx6{fU?{lvCn4Otfum$7`@ zwEd7mjsRR}zsHoUBqz09NpU!L=aS`$=SY9b5Q zn`%#2Rq_=mgvpc!(`(){TFK{EW%X$h<%AKrtBgOGks}WS-))9GQPX}{`oJdQe~SyL AN&o-= literal 0 HcmV?d00001 diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 2b26d7ef..24657227 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -1,4 +1,4 @@ -# q - Text as Data +# q - Run SQL directly on CSV or TSV files [![GitHub Stars](https://img.shields.io/github/stars/harelba/q.svg?style=social&label=GitHub Stars&maxAge=600)](https://GitHub.com/harelba/q/stargazers/) [![GitHub forks](https://img.shields.io/github/forks/harelba/q.svg?style=social&label=GitHub Forks&maxAge=600)](https://GitHub.com/harelba/q/network/) @@ -10,7 +10,7 @@ q is a command line tool that allows direct execution of SQL-like queries on CSV q treats ordinary files as database tables, and supports all SQL constructs, such as WHERE, GROUP BY, JOINs etc. It supports automatic column name and column type detection, and provides full support for multiple encodings. -``` +``` bash q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" ``` @@ -18,7 +18,7 @@ q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" ``` -Look at the examples page for some more examples, or just download the tool using the links above or in the installation page and play with it. +Look at some examples [here](#examples), or just download the tool using the links in the [installation](#installation) below and play with it. | | | |:--------------------------------------:|:-----------------------------------------------:| @@ -27,6 +27,8 @@ Look at the examples page for some more examples, or just download the tool usin **Non-english users:** q fully supports all types of encoding. Use `-e data-encoding` to set the input data encoding, `-Q query-encoding` to set the query encoding, and use `-E output-encoding` to set the output encoding. Sensible defaults are in place for all three parameters. Please contact me if you encounter any issues and I'd be glad to help. +**Files with BOM:** Files which contain a BOM ([Byte Order Mark](https://en.wikipedia.org/wiki/Byte_order_mark)) are not properly supported inside python's csv module. q contains a workaround that allows reading UTF8 files which contain a BOM - Use `-e utf-8-sig` for this. I plan to separate the BOM handling from the encoding itself, which would allow to support BOMs for all encodings. + ## Installation | Format | Instructions | Comments | @@ -41,15 +43,7 @@ Look at the examples page for some more examples, or just download the tool usin **Older versions can be downloaded [here](https://github.com/harelba/packages-for-q). Please let me know if you plan on using an older version, and why - I know of no reason to use any of them.** ## Requirements -As of version `2.0.9`, there's no need for any external dependency. Python itself (3.7), and any needed libraries are self-contained inside the installation, not affecting anything but q itself. - -## Limitations -Here's the list of known limitations. Please contact me if you have a use case that needs any of those missing capabilities. - -* `FROM ` is not supported -* Common Table Expressions (CTE) are not supported -* Spaces in file names are not supported. Use stdin for piping the data into q, or rename the file -* Some rare cases of subqueries are not supported yet. +As of version `2.0.9`, there's no need for any external dependency. Python itself (3.7), and any needed libraries are self-contained inside the installation, isolated from the rest of your system. ## Usage @@ -73,10 +67,10 @@ Please note that column names that include spaces need to be used in the query w Query/Input/Output encodings are fully supported (and q tries to provide out-of-the-box usability in that area). Please use `-e`,`-E` and `-Q` to control encoding if needed. -All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table). +All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table). Take a look at the [limitations](#limitations) section below for some rarely-used use cases which are not fully supported. ### Query -q gets one parameter - An SQL-like query. +Each parameter that q gets is a full SQL query. All queries are executed one after another, outputing the results to standard output. Note that data loading is done only once, so when passing multiple queries on the same command-line, only the first one will take a long time. The rest will starting running almost instantanously, since all the data will already have been loaded. Remeber to double-quote each of the queries - Each parameter is a full SQL query. Any standard SQL expression, condition (both WHERE and HAVING), GROUP BY, ORDER BY etc. are allowed. @@ -96,7 +90,7 @@ Usage: Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line. - Basic usage is q "" where table names are just regular file names (Use - to read from standard input) + Basic usage is q "" where table names are just regular file names (Use - to read from standard input) When the input contains a header row, use -H, and column names will be set according to the header row content. If there isn't a header row, then columns will automatically be named c1..cN. Column types are detected automatically. Use -A in order to see the column name/type analysis. @@ -346,16 +340,24 @@ You can see that the ppp filename appears twice, each time matched to one of the Column name detection is supported for JOIN scenarios as well. Just specify `-H` in the command line and make sure that the source files contain the header rows. ## Implementation -The current implementation is written in Python using an in-memory database, in order to prevent the need for external dependencies. The implementation itself supports SELECT statements, including JOINs (Subqueries are supported only in the WHERE clause for now). If you want to do further analysis on the data, you can use the `--save-to-db` option to write the resulting tables to an sqlite database file, and then use `seqlite3` in order to perform queries on the data separately from q itself. +The current implementation is written in Python using an in-memory database, in order to prevent the need for external dependencies. The implementation itself supports SELECT statements, including JOINs (Subqueries are supported only in the WHERE clause for now). If you want to do further analysis on the data, you can use the `--save-db-to-disk` option to write the resulting tables to an sqlite database file, and then use `seqlite3` in order to perform queries on the data separately from q itself. Please note that there is currently no checks and bounds on data size - It's up to the user to make sure things don't get too big. -Please make sure to read the limitations section as well. +Please make sure to read the [limitations](#limitations) section as well. ## Development ### Tests -The code includes a test suite runnable through test/test-all. If you're planning on sending a pull request, I'd appreciate if you could make sure that it doesn't fail. +The code includes a test suite runnable through `test/test-all`. If you're planning on sending a pull request, I'd appreciate if you could make sure that it doesn't fail. + +## Limitations +Here's the list of known limitations. Please contact me if you have a use case that needs any of those missing capabilities. + +* `FROM ` is not supported +* Common Table Expressions (CTE) are not supported +* Spaces in file names are not supported. Use stdin for piping the data into q, or rename the file +* Some rare cases of subqueries are not supported yet. ## Rationale Have you ever stared at a text file on the screen, hoping it would have been a database so you could ask anything you want about it? I had that feeling many times, and I've finally understood that it's not the database that I want. It's the language - SQL. diff --git a/mkdocs/docs/stylesheets/extra.css b/mkdocs/docs/stylesheets/extra.css index cbafc84b..3e6e1305 100644 --- a/mkdocs/docs/stylesheets/extra.css +++ b/mkdocs/docs/stylesheets/extra.css @@ -20,3 +20,12 @@ div.md-content pre { .md-typeset__scrollwrap { text-align: center; } + +.md-typeset .headerlink { + opacity: 50%; +} + +article.md-content__inner.md-typeset>p { + text-align: center; +} + diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml index 3a56fb2b..eb83cf4e 100644 --- a/mkdocs/mkdocs.yml +++ b/mkdocs/mkdocs.yml @@ -20,7 +20,8 @@ theme: fonts: text: 'Roboto' code: 'Roboto Mono' - favicon: 'images/q-logo.png' + favicon: 'img/q-logo1.ico' + logo: 'img/q-logo1.ico' extra: social: - type: 'github' From 81113386b0a849ad9118ddcf66ee018e5b61914c Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 1 Feb 2020 13:07:43 +0200 Subject: [PATCH 040/111] wip --- mkdocs/docs/index.md | 1 - mkdocs/docs/stylesheets/extra.css | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 24657227..8e839f9c 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -2,7 +2,6 @@ [![GitHub Stars](https://img.shields.io/github/stars/harelba/q.svg?style=social&label=GitHub Stars&maxAge=600)](https://GitHub.com/harelba/q/stargazers/) [![GitHub forks](https://img.shields.io/github/forks/harelba/q.svg?style=social&label=GitHub Forks&maxAge=600)](https://GitHub.com/harelba/q/network/) -[![License](https://img.shields.io/github/license/harelba/q.svg?style=social&label=License&maxAge=600)](https://github.com/harelba/q/blob/master/LICENSE) ## Overview diff --git a/mkdocs/docs/stylesheets/extra.css b/mkdocs/docs/stylesheets/extra.css index 3e6e1305..74eb969f 100644 --- a/mkdocs/docs/stylesheets/extra.css +++ b/mkdocs/docs/stylesheets/extra.css @@ -29,3 +29,10 @@ article.md-content__inner.md-typeset>p { text-align: center; } +.md-nav__link[data-md-state=blur] { + color: rgba(0.3,0.5,0.4,.4) +} + +.md-nav__link[data-md-state=current] { + font-weight: 700; +} From 9cdf95534d980be16f75eb5ff338bde07467d252 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 1 Feb 2020 13:12:31 +0200 Subject: [PATCH 041/111] wip --- .gitignore | 1 + mkdocs/docs/about.md | 2 +- mkdocs/generate-web-site.sh | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100755 mkdocs/generate-web-site.sh diff --git a/.gitignore b/.gitignore index 8f686707..694b157b 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ win_build packages .idea/ dist/windows/ +generated-site/ diff --git a/mkdocs/docs/about.md b/mkdocs/docs/about.md index 15c04df3..b0e09e45 100644 --- a/mkdocs/docs/about.md +++ b/mkdocs/docs/about.md @@ -4,5 +4,5 @@ ### Twitter [@harelba](https://twitter.com/harelba) -### Email [harelba@gmail.com](harelba@gmail.com) +### Email [harelba@gmail.com](mailto:harelba@gmail.com) diff --git a/mkdocs/generate-web-site.sh b/mkdocs/generate-web-site.sh new file mode 100755 index 00000000..0014729b --- /dev/null +++ b/mkdocs/generate-web-site.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +mkdocs build -c -s -d ./generated-site From 6d08df4fa616bb4da84abe1dc98a391268f3a2b1 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Feb 2020 12:51:27 +0200 Subject: [PATCH 042/111] added more stuff from the original site, include GA link conversion, but not sure it's working yet --- mkdocs/docs/fsg9b9b1.txt | 0 mkdocs/docs/google0efeb4ff0a886e81.html | 1 + mkdocs/docs/img/torii-favicon.ico | Bin 0 -> 370070 bytes mkdocs/docs/js/google-analytics.js | 50 ++++++++++++++++++++++++ mkdocs/mkdocs.yml | 2 + 5 files changed, 53 insertions(+) create mode 100644 mkdocs/docs/fsg9b9b1.txt create mode 100644 mkdocs/docs/google0efeb4ff0a886e81.html create mode 100644 mkdocs/docs/img/torii-favicon.ico create mode 100644 mkdocs/docs/js/google-analytics.js diff --git a/mkdocs/docs/fsg9b9b1.txt b/mkdocs/docs/fsg9b9b1.txt new file mode 100644 index 00000000..e69de29b diff --git a/mkdocs/docs/google0efeb4ff0a886e81.html b/mkdocs/docs/google0efeb4ff0a886e81.html new file mode 100644 index 00000000..24947e8a --- /dev/null +++ b/mkdocs/docs/google0efeb4ff0a886e81.html @@ -0,0 +1 @@ +google-site-verification: google0efeb4ff0a886e81.html \ No newline at end of file diff --git a/mkdocs/docs/img/torii-favicon.ico b/mkdocs/docs/img/torii-favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..86da98b72d5f398cc00652f667ee5eb1193e523a GIT binary patch literal 370070 zcmeI536xYA&W^ME6~tv&Ax;+5Oxv>BtU?$ho#xl&<%7$H{aavHLoo?^{VSu-@a9^?(dy* zyQ*H@ci(;A|Npz+TdON7x>UTr;(`k*%+#&or)O7GT&wT8wS2BJ^H-XAS6p%W^ZP3* zes@ttMUNgWpSxdKQE}O4Dk}Q+J^lHdE)`$ut?zZ}R?)rN*%iOKxl2WN^GR3H&6FN1_R{Nr=KDv!?jQtLM9d!5&|pGoVV zu>9vzz4Y4F`ubkyrD6FSs@^D1t-4O8ul&8-TK$fw=zE=~-(_w?w(WoDsn5##S0>Ba zofFOL6j{4t!s+*uMZe3n{7qvokr}Ic%8vcZWc$9w=5?uT+qck^miN7$shJ9CzQZ@izNjZ@fO%-{QMC9=>0R zzg_%?&A(cPKK(DP<8)iS^gFH7c7D=l+xYLTT`R8~cs8+p{Z8v7t*`BT)^Ddj@yCC~ z;P3wFeL9#r%s)UioS2} zvo6zieQiJIyZ(5&w*2isyg|0_U(&js&Km#p_(1pJ57loIUFVQlSIXd-J*2jNM>O6( zJoEE1c-9x9@3hX4*b-68q|3(jw9R}xlUb)w|4e=Ew{aJoy=t48eaq*4q5aqU z>t?L#6_w?ihO}7e`Xs=`)_(r zVLaVE`7c}hmX`hJQtW@P%C6>^@9gNB=8%m8k?P;A~ zd(_OU+bXZtZ;!5>#$VJo9FW6DcbWM+WbBe38I8I~&&u|XL1X>x{L@8t99R||`^Cq9 zaX(zVHk<3p&8M28`;hWEj@E6k>Hm}SzIH}8bsLU1)=G`JuSu^%uh}-*T#Fte&#b>& z=Dm1}%wBV&%v{|kDzjJL7|qk~w2s!*Wol~IMQyZ=$FTWdZ{;P9b+%WV{x4W}r#!cL zsJXBDzoYw#`;WOOuk#Jr)|)v`W8Htu#q!Y1&&k9Uzm!#5hRfoY?swk*%ypK^dz<)| zN}n_PLH{{_o9PGJ_xK0@*m@?jKK{W!nSQW+Uk?6>??H*TB~7o|_m=yC^qDA~c=a6z zy-%x$DSb_e@6)O<&%Cys`aW^FTtD=pAWOXMw&P2=)Q@#9m+i6L4z|CAd)v5`OMUc5 zfAv7Q_}O+l@IU^?|L|`+Zj?)X#y`eC#y`ft&KS@1xnlkOB`s5*`#*XL5= zsku4Z_sPray7Jk+_UgFY{z@s-gR)5%zNP$nft;`=J~IFGHdk>vd{c3Ydq=yWtjPVA2Yvi^!=PQ{bkp| zmGRf6?t1f_*lniV8nu!6KQY>JWAnMA{OkYn(BEsVTKGLPz8!DvZ)d-0YHo~v&$V*F zx1#??VYAJdbvv^C_e}YSG&P^->`iUAef?eId#8TUT!k>+_lVo57p*%$9}g?}`35js9O0{eLO_Mqg;2$?aNDzcTxL z-H5KzZ_I^qw`ue7xmU@=<-d?cFWnnGCtBaIKYDIfJ)plEoVi>+?+e}jJ12FQ(Tn~= z?wI&~^IUMperqb9ekRq?-?`Gh=rhXqO!=s{?N91Y+U6Ikf7-S)slRN0=bisbig_u` zM}PEJ50sK8ef95b{qrD6F^#!T`kwjzv?xZb(Z9U) zZO>EakN)a`_I$$k<>?dnFHc?b>H0po*_;*LYlf3w)`z5pJZ?oSx`pupG zEUT_V*Oj?$eXqaWtbNmQH7zBL`j2?#+u439Jeqgkv`=<=>~$SUllCnw+NU-DqrbcH z1^(@03IFynb2%o$zq|OuzkMv>-#%vWU)-1_#UK9db%B5Tm?inIkon|)ckzdR`&h!i zeas5sJZTyDcNc&7w~r-#%uAaGtab{JV=k{M*M8{_SIy z#Bw3?;on{S;om-%@NXZpLO4%a2L9c}AO7uQ3IFynOJcc@`S9;9{_t-fOZc~sSs|P! zEd&4V;t&7!v4ns7m?g1X$b9&B7k~J-k0t!u$E*;}la_&hckzdR`&h!ieaw-#%tZEEh5#{@ukN{_SH4|MoE}g!80j;NM;R z;om-%@NXZpB$f-A5C87sUyA&%+c{Zk>vtrUqYVd=ett*lj_plcE?1j2-|0G%Chc3U zwshMH@qA}3Q;PZ@s@s|r-qg?6ZR;#83uzbq{aOnB+s|F_-+qh=VLjV2rO5y0y-&;W z6NeMaiIc|?p=~@_pSWDy=82}`iOV;fsB3q5T}RTSeT&zV?Nzren-0aASBm<#7mJSW z|Bqkxvn2V@UI?l!KJS@ z;J>{2?^SuW^dEDv+%oPRa$EIf(LTBUu6|r5F8}43(4PKu>eMNDbk3LMrm>eq%l8}e zR`dS=-XeWRT_87%dXx02dV};Hd9F-a@vE%M>&$@@KOonSI47#tr|LYpq3Vrh+4G|| zw5@)lFO(s(z9_n`Y*Y2ppUBPQE|r_dUTXUBw$pvNDC&o<{}02vloV^FIQ~Dg?rzcJ zXgy9p+0^iw($st+Tj(QAw0^Yg$)nQPTI$XFdeLM1h7(6jd%2JQwN6W^lSZ?QF8il) zH0oE9{!Pt|=6ln3?SE_0zO<}=$%Y5fKga*a=sog0dDUF|Dc7m)c#RoL&Mz4c6qElk zi+?CP_AiqyH49|j&PlRr>j>GrcV6PyS`MDjabx4|r)25Ihh*iJ;j;dfDY9wLY#F=c zM@8{OvGmuxezf6WIY{bc8;&>D%76*)^X`FS>9780Ix_;#Z5|qg{PSF{gx=GOB` z_Z{{4!`E@NuBRCMFI+#ULpfe^sP0>9{y2Q}6{)FRC)@TdigM|yEtMv>J}3*;-XXJA z_mycYe;*z1RWH3(Mlb$>R4x2B(enqx=U*+u=6^$mKK*r5{xy29^F}QAc2q~}>N0w) zsBLIl^G%!Dwys0-wyv|LcD?DtuBb11ypnWWkgvO!Z5mS6`s*=y{qgb~J~o1c`~Yi^WrOa4n9oAVVJH05K_c>(=De7!2WdW}~$Hnk7h7ai9IO}#=Mn|qav zGv1lArhl}Lw{71-IdW{b@kVpI+pYTnH;;R}w{7yxzwYN|GSYmiDY|D*uZu5Se~;)f z!2{Dj9UUX}tbCofIAqh+8-nQGf{?ESr zxa>c?QJ&rWL^QtMGv%Yvc{!J{PE%WYF5q7C|0E|a|AjoiwJJLIv3BRgvem!m@UzUe zmDV}XQu2$&=eiNEFI)XfDc1C*IQ##^-QD%Oj>gh={_kjiS>CO8(BEx5)81MnZN_L{TMAN?8s!}S3ChpRo~f7sfiKl(HNhwTCM zM}PMJ!}S3ChpRpNe_?Bn{^-y6AGQb3AN|??57z_mAFlT7|AnnR`lCPNf7l*CfAnYn zKU@#Mf4JJS{};CQ=#Tz7{@?oORjv1d$N7NQ4+Rku)xLs>2oM1xKm>>Y5eO~<-F1t* zm~xiBbkk2272VEmdCmAc=;2;+`y}iA5UvN{KV0qM9_`^D4=@gd>jC%=S9`ced-%r#j053% z0RF?(9`4Z|{_z0gK)4=&|8TX3d$fmtJis^*t_R>hTn? z_{Rf`1L1lA{=?NC?$I9p@c`pMxE_H2aJ7efw1}iA5UvN{ zKV0qM9_`^D4=@gd>jC%=S9`ced-%r#j053%0RF?(9`4Z|{_z0gK)4=&|8TX3d$fmt zJis^*t_R>hTn?_{Rf`1L1lA{=?NC?$I9p@c`pMxE_H2 zaJ7efw1ZR9~jsJC9Y$uIbxo}ph+X4UZ@57t$@5g7hTEf2{UC96Be_J+v)Q5lg_u(`0 zzaO93Y6<^-bRqwf|83dyQ6K){--pl0|9*UCt0nyV(S`g^{}*ebk44`1j#6^1mOS*=h;@ zesm%KlmBhm^id!F;opbP$p3zPW~(Lq`_YB`PyV-M(?@;yhkqYFBmevHnXQ)a??)H% zKl$I5O&|5)AO3y#jQsD%XSQ0xzaL%5|KxvLHht8GfB5&|GxEP5pV?{&|9*5K|C9f1 z+4NB#{^8$;&&dCNd}ga9{QJ>`{7?S3Wz$D}_=kTVJ|qA8@tLib@b5<#@;~|CmQ5e^ z;UE5e_>BDT$7i-$!oMF~$p7SjTQ+^vhky9@;WP5TAD`K33IBd{A^(&AZQ1luAO7Lr zhtJ6Wetc%DCH(u*h5S$cw`J2uefWoeA3h`h`|+8rmhkUK7xF*(-efW(0 z@5g7hTEf2{UC96Be_J+v)Q5lg_u(`0zaO93Y6<^-bRqwf|83dyQ6K){--pl0|9*UC zt0nyV(S`g^{}*ebk44`1j#6^1mOS*=h;@esm%KlmBhm^id!F;opbP$p3zPW~(Lq`_YB` zPyV-M(?@;yhkqYFBmevHnXQ)a??)H%Kl$I5O&|5)AO3y#jQsD%XSQ0xzaL%5|KxvL zHht8GfB5&|GxEP5pV?{&|9*5K|C9f1+4NB#{^8$;&&dCNd}ga9{QJ>`{7?S3Wz$D} z_=kTVJ|qA8@tLib@b5<#@;~|CmQ5e^;UE5e_>BDT$7i-$!oMF~$p7SjTQ+^vhky9@ z;WP5TAD`K33IBd{A^(&AZQ1luAO7LrhtJ6Wetc%DCH(u*h5S$cw`J2uefWoeA3h`h z`|+8rmhkUK7xF*(-efW(0@5g7hTEf2{UC96Be_J+v)Q5lg_u(`0zaO93 zY6<^-bRqwf|83dyQ6K){--pl0|9*UCt0nyV(S`g^{bBTU8nce{MKx3HY?%nDKK!@O z|Hj=<*;<$SB@zMEE+n39w-x@|=YR9wc_rdzx?vju)y|fQkm|!f{M#~>ruy)o+u!h? zJ3g8V$p2{~{>;9@e{O%nfA09e|Cv0c?Gyax_BZ_Jjt~5&jrcSB3;((O4gb001OI38 zl(tXspWEN?pF2Lv|EAru(q?=5dpZKDooYzQ9s~H#9+ynNR4@Cf9IV?cwMTarTuz)k zX3gZW#zO_KuWetOnon4-zyA0>XY1E(v7Iz#ZMVbbvudW=WopU$GyM+#XU3zm{88-t z|M$GQ#uh&rJnIY2*x~+PKR&bdFZ}z_1^#*d$CgbW_2D1>lXeWq|9-sTgY%?yz`q}j z$^Yd4BnFE)AO7K={O`vD#pFC`-SF>6d-6Z|KZ(I&&WC^aC;$8LKruN_S~vXr(VqNI z{!e1CnDgNu{>lG-JWx!|lhzIYezYh5lmC+#EarUphkx?F9}g6h^Q3jdzaQ<%|K$H9 z28%f#{^6he@5ckhS0{Z6^9 z`dv{UZZ~}yFyXyXzwR*o8#wWD6SwX%^ZDMQ+hjAnyl0NwJMEKyDkc^hk9&;S9yQUnZ{N2h8jI4-XlM2x{@dq&)S~Yj?>5_{F6x#*1au6Y^6YQf z2gvdJC-9$}|DMCok~Q1MmVle-LLm{*V~FcV{yAen!WaPmx%t=Q%0qSA3b7u`lt2XZ z*hJ6Qbml&En%fSM|6Y}6NB7?8e+htGU>EclVC~L{^2qGJZ#h?M&e1+L_o_(SF^hjFlUDqX%vjaSl%Dd$+<%f@ zm0g1xD?;S|VROD-&*LRc&E{BN&zChfH(8&vN@|n&#fyNB0eb#1I&W*v*Xr*Is>V7$ zbJFv*c+PHgWrtG{Y0<96y>pXhxH1<|WJW-b^`@`Wy-!etTK_mW^{40lk0mrm>fl{TL8N{_kS0^Zc6Y zc<%5h`u6nFX3e(IQ5)LUx}B3n+t&Z7r~g+|*Qx8)KIrpB+9&Os z_EDdU&_1`l#+H_tYd>{c^!XuPx(@ZzWs+86DUKKnP&^ilh&eSX07UGKdaKL49$ zoBq?>kN>~sxyF97*lg>nEhEf$GDGw^9UVjV9eyb~M$_>_W7T}Bsi?z+j$OLnSnB@n zNdL+`|Mwkrey3jV*Xu=^=UVPBITSq)r`H%ZnjE>}<;NnP$1nR?qj zXXRPuIHjw3*6*C?w@UPwNspoCuN@Gbzu0+TMfAG~g*?ll=RV3+>vHD*=CPMX`-{19 zzwR68{T|A?<`CUifAFc#M8~mue0yDB&x7Q{TH5O9xeNU*q=9DtO!pb|9E6_Z)G?vf zoa57dx?Gc<2h`s&O51kl)8BID|B=~Wiq1pF+UmVOdfa%>>?6LoV|?U;M@=2oR?nB^ z6EEsyJ7<{Ad3tV0e^c$rd0&g3|J<~Dw)w52SIoJbBWIj@)bl$dp7~B$`lForuUzo0 z=sCY#2UnWhK19YY`H|c=?NhK`M)!5O9*_0;FnvCB@`~R?*G=_WjUEq{>oWo6%)cJ9 z>9wntbL@IuE1h|Yeiui#mTRD0qiYl8`Wr&!%s<=|ha2Lap18w*dbENov~ukM_{USO zJw~;dQ?!>JPr-kBw1O+Na_s^5$5XC7MSJP-6#S=0E4V@{ z*B*d>JmuO`w3i-F!GC(Rf-AIg?E(15Q?5Nld+G5M{HI4NxI!z}9)N#5<=RuUmmW{S ze|ofnE3|U$0rG2f&r$;NeLMzuEfPXyY+EcWb9#6r4dbENov~ukM_{USO zJw~;dQ?!>JPr-kBw1O+Na_s^5$5XC7MSJP-6#S=0E4V@{ z*B*d>JmuO`w3i-F!GC(Rf-AIg?E(15Q?5Nld+G5M{HI4NxI!z}9)N#5<=RuUmmW{S ze|ofnE3|U$0rG2f&r$;NeLMzuEfPXyY+EcWb9#6r4dbENov~ukM_{USO zJw~;dQ?!>JPr-kBw1O+Na_s^5$5XC7MSJP-6#S=0E4V@{ z*B*d>JmuO`w3i-F!GC(Rf-AIg?E(15Q?5Nld+G5M{HI4NxI!z}9)N#5<=RuUmmW{S ze|ofnE3|U$0rG2f&r$;NeLMzuEfPXyY+EcWb9#6r4dbENov~ukM_{USO zJw~;dQ?!>JPr-kBw1O+Na_s^5$5XC7MSJP-6#S=0E4V@{ z*B*d>JmuO`w3i-F!GC(Rf-AIg?E(15Q?5Nld+G5M{HI4NxI!z}9)N#5<=RuUmmW{S ze|ofnE3|U$0rG2f&r$;NeLMzuEfPXyY+EcWb9#6r4dbENov~ukM_{USO zJwQT1rre<0z`la5CI}U1c(3; qAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0V0t42>d^(XvLlY literal 0 HcmV?d00001 diff --git a/mkdocs/docs/js/google-analytics.js b/mkdocs/docs/js/google-analytics.js new file mode 100644 index 00000000..05a139d2 --- /dev/null +++ b/mkdocs/docs/js/google-analytics.js @@ -0,0 +1,50 @@ +(function(i, s, o, g, r, a, m) { + i['GoogleAnalyticsObject'] = r; + i[r] = i[r] || function() { + (i[r].q = i[r].q || []).push(arguments) + }, i[r].l = 1 * new Date(); + a = s.createElement(o), + m = s.getElementsByTagName(o)[0]; + a.async = 1; + a.src = g; + m.parentNode.insertBefore(a, m) +})(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga'); + +ga('create', 'UA-48316355-1', 'auto'); +ga('require', 'displayfeatures'); +//ga('require', 'linkid', 'linkid.js'); +ga('send', 'pageview'); + +// Monitor all download links in GA +window.onload = function() { + var a = document.getElementsByTagName('a'); + var cnt = 0; + for (i = 0; i < a.length; i++) { + var url = a[i].href; + var x = url.indexOf("?"); + if (x != -1) { + url = url.substr(0, x); + } + var url_test = url.match(/^https?:\/\/.+(\.rpm|\.deb|\/q|\.tar\.gz|\.zip|\.bat|\.exe)$/i); + if (url_test) { + console.log("Converting url to be GA aware: " + url); + if (url_test.length > 1) { + var event_action = url_test[1]; + } else { + var event_action = 'unknown_action'; + } + a[i].event_action = event_action; + cnt = cnt + 1; + a[i].onclick = function() { + console.log("Sending GA event for link" + url); + var that = this; + ga('send', 'event', 'Downloads', 'Click on ' + this.event_action, this.getAttribute('href')); + setTimeout(function() { + location.href = that.href; + }, 500); + return false; + }; + } + } + console.log("Converted " + cnt + " links to be GA aware"); +} diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml index eb83cf4e..21d11af9 100644 --- a/mkdocs/mkdocs.yml +++ b/mkdocs/mkdocs.yml @@ -32,6 +32,8 @@ extra: link: 'https://www.linkedin.com/in/harelba' extra_css: - 'stylesheets/extra.css' +extra_javascript: + - 'js/google-analytics.js' markdown_extensions: - meta - toc: From c4c38f0079a2be60997e139dd0227bff7f2370ea Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Feb 2020 13:45:53 +0200 Subject: [PATCH 043/111] new GA working --- mkdocs/docs/js/google-analytics.js | 22 +++------------------- mkdocs/mkdocs.yml | 13 +++++++++++++ mkdocs/theme/main.html | 28 ++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 19 deletions(-) create mode 100644 mkdocs/theme/main.html diff --git a/mkdocs/docs/js/google-analytics.js b/mkdocs/docs/js/google-analytics.js index 05a139d2..c8322a73 100644 --- a/mkdocs/docs/js/google-analytics.js +++ b/mkdocs/docs/js/google-analytics.js @@ -1,20 +1,3 @@ -(function(i, s, o, g, r, a, m) { - i['GoogleAnalyticsObject'] = r; - i[r] = i[r] || function() { - (i[r].q = i[r].q || []).push(arguments) - }, i[r].l = 1 * new Date(); - a = s.createElement(o), - m = s.getElementsByTagName(o)[0]; - a.async = 1; - a.src = g; - m.parentNode.insertBefore(a, m) -})(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga'); - -ga('create', 'UA-48316355-1', 'auto'); -ga('require', 'displayfeatures'); -//ga('require', 'linkid', 'linkid.js'); -ga('send', 'pageview'); - // Monitor all download links in GA window.onload = function() { var a = document.getElementsByTagName('a'); @@ -25,7 +8,7 @@ window.onload = function() { if (x != -1) { url = url.substr(0, x); } - var url_test = url.match(/^https?:\/\/.+(\.rpm|\.deb|\/q|\.tar\.gz|\.zip|\.bat|\.exe)$/i); + var url_test = url.match(/^https?:\/\/.+(\/rpms\/.*\.rpm|\/deb\/.*\.deb|single-binary\/Darwin\/.*\/q|\/archive\/.*\.tar\.gz|\/archive\/.*\.zip|\/windows\/.*\.exe)$/i); if (url_test) { console.log("Converting url to be GA aware: " + url); if (url_test.length > 1) { @@ -38,7 +21,8 @@ window.onload = function() { a[i].onclick = function() { console.log("Sending GA event for link" + url); var that = this; - ga('send', 'event', 'Downloads', 'Click on ' + this.event_action, this.getAttribute('href')); + //ga('send', 'event', 'Downloads', 'Click on ' + this.event_action, this.getAttribute('href')); + gtag('event','perform download', { 'event_category': 'Downloads', 'event_label': 'Download ' + this.event_action , 'value': 1 }); setTimeout(function() { location.href = that.href; }, 500); diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml index 21d11af9..6cb54028 100644 --- a/mkdocs/mkdocs.yml +++ b/mkdocs/mkdocs.yml @@ -22,6 +22,7 @@ theme: code: 'Roboto Mono' favicon: 'img/q-logo1.ico' logo: 'img/q-logo1.ico' + custom_dir: 'theme' extra: social: - type: 'github' @@ -42,3 +43,15 @@ markdown_extensions: - fenced_code - admonition # - codehilite + + +## +## +## + diff --git a/mkdocs/theme/main.html b/mkdocs/theme/main.html new file mode 100644 index 00000000..8fd1489e --- /dev/null +++ b/mkdocs/theme/main.html @@ -0,0 +1,28 @@ +{% extends "base.html" %} + +{% block analytics %} + +{% set analytics = config.google_analytics %} + + +{% endblock %} From 051911e49525bda3ecc0f30bb63d7e8d90407caf Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Feb 2020 14:23:02 +0200 Subject: [PATCH 044/111] fixed GA, including toc links --- mkdocs/docs/js/google-analytics.js | 49 ++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/mkdocs/docs/js/google-analytics.js b/mkdocs/docs/js/google-analytics.js index c8322a73..69817c62 100644 --- a/mkdocs/docs/js/google-analytics.js +++ b/mkdocs/docs/js/google-analytics.js @@ -1,27 +1,26 @@ // Monitor all download links in GA -window.onload = function() { - var a = document.getElementsByTagName('a'); - var cnt = 0; - for (i = 0; i < a.length; i++) { - var url = a[i].href; + +var dlCnt = 0; + +function GAizeDownloadLink(a) { + var url = a.href; var x = url.indexOf("?"); if (x != -1) { url = url.substr(0, x); } - var url_test = url.match(/^https?:\/\/.+(\/rpms\/.*\.rpm|\/deb\/.*\.deb|single-binary\/Darwin\/.*\/q|\/archive\/.*\.tar\.gz|\/archive\/.*\.zip|\/windows\/.*\.exe)$/i); + var url_test = url.match(/^https?:\/\/.+(\/rpms\/.*\.rpm|\/deb\/.*\.deb|\/single-binary\/Darwin\/.*\/q|\/archive\/.*\.tar\.gz|\/archive\/.*\.zip|\/windows\/.*\.exe)$/i); if (url_test) { - console.log("Converting url to be GA aware: " + url); + console.log("Converting download link to be GA aware: " + url); if (url_test.length > 1) { var event_action = url_test[1]; } else { var event_action = 'unknown_action'; } - a[i].event_action = event_action; - cnt = cnt + 1; - a[i].onclick = function() { + a.event_action = event_action; + dlCnt = dlCnt + 1; + a.onclick = function() { console.log("Sending GA event for link" + url); var that = this; - //ga('send', 'event', 'Downloads', 'Click on ' + this.event_action, this.getAttribute('href')); gtag('event','perform download', { 'event_category': 'Downloads', 'event_label': 'Download ' + this.event_action , 'value': 1 }); setTimeout(function() { location.href = that.href; @@ -29,6 +28,32 @@ window.onload = function() { return false; }; } +} + +function GAizeTOCLink(l) { + l.onclick = function() { + url_test = l.href.match(/^https?:\/\/.+(#.*)$/i); + toc_name = url_test[1]; + var that = this; + console.log("Sending GA event for toc link " + this.href); + + gtag('event','navigate', { 'event_category': 'Navigation', 'event_label': 'go to ' + toc_name, 'value': 1 }); + setTimeout(function() { + location.href = that.href; + }, 500); + return false; + }; + +} + +window.onload = function() { + var anchors = document.getElementsByTagName('a'); + for (i = 0; i < anchors.length; i++) { + GAizeDownloadLink(anchors[i]); + } + var toc_links = document.querySelectorAll('div.md-sidebar[data-md-component=toc] a.md-nav__link'); + for (i = 0; i < toc_links.length; i++) { + GAizeTOCLink(toc_links[i]); } - console.log("Converted " + cnt + " links to be GA aware"); + console.log("Converted " + dlCnt + " links to be GA aware"); } From 920f03c208debba02d3f9aa0b2a7067d164e5eee Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Feb 2020 14:38:39 +0200 Subject: [PATCH 045/111] osx install stuff --- mkdocs/docs/index.md | 2 +- mkdocs/docs/js/google-analytics.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 8e839f9c..777d4cc6 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -32,7 +32,7 @@ Look at some examples [here](#examples), or just download the tool using the lin | Format | Instructions | Comments | :---|:---|:---| -|[OSX](https://github.com/harelba/packages-for-q/raw/master/single-binary/Darwin/2.0.9/q)|Just run `brew install q` or download the executable from the link on the left, make it executable, and use it.|Make sure that you run `brew update` if needed|| +|[OSX](https://github.com/harelba/packages-for-q/raw/master/single-binary/Darwin/2.0.9/q)|Download the executable from the link on the left, make it executable, and use it.|`brew install q` currently downloads the older version `1.7.1`. I'll update it to install the new version soon|| |[RPM Package](https://github.com/harelba/packages-for-q/raw/master/rpms/q-text-as-data-2.0.9-1.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter man q.| |[DEB Package](https://github.com/harelba/packages-for-q/raw/master/deb/q-text-as-data_2.0.9-2_amd64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`.| |[Windows Installer](https://github.com/harelba/packages-for-q/raw/master/windows/setup-q-2.0.9.exe)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new cmd window after the installation is done.| diff --git a/mkdocs/docs/js/google-analytics.js b/mkdocs/docs/js/google-analytics.js index 69817c62..4c0829d7 100644 --- a/mkdocs/docs/js/google-analytics.js +++ b/mkdocs/docs/js/google-analytics.js @@ -40,7 +40,7 @@ function GAizeTOCLink(l) { gtag('event','navigate', { 'event_category': 'Navigation', 'event_label': 'go to ' + toc_name, 'value': 1 }); setTimeout(function() { location.href = that.href; - }, 500); + }, 250); return false; }; From 7c804a6e15eb18cb143aed326e5c178256bcf02f Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 8 Feb 2020 15:19:34 +0200 Subject: [PATCH 046/111] generalize version for release --- bin/__version__.py | 7 +++++++ bin/q.py | 3 +-- do-manual-release.sh | 2 +- mkdocs/mkdocs.yml | 9 --------- setup.py | 4 +++- 5 files changed, 12 insertions(+), 13 deletions(-) create mode 100755 bin/__version__.py diff --git a/bin/__version__.py b/bin/__version__.py new file mode 100755 index 00000000..47181594 --- /dev/null +++ b/bin/__version__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +q_version = '2.0.10' + + +if __name__ == '__main__': + print(q_version) diff --git a/bin/q.py b/bin/q.py index d6dc1fbe..b41451f3 100755 --- a/bin/q.py +++ b/bin/q.py @@ -30,8 +30,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - -q_version = "2.0.9" +from __version__ import q_version __all__ = [ 'QTextAsData' ] diff --git a/do-manual-release.sh b/do-manual-release.sh index 56b302ce..b39b65d8 100755 --- a/do-manual-release.sh +++ b/do-manual-release.sh @@ -2,7 +2,7 @@ set -e -VERSION=2.0.9 +VERSION=$(bin/__version__.py) echo "Packing binary for $TRAVIS_OS_NAME" diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml index 6cb54028..a47cdf34 100644 --- a/mkdocs/mkdocs.yml +++ b/mkdocs/mkdocs.yml @@ -45,13 +45,4 @@ markdown_extensions: # - codehilite -## -## -## diff --git a/setup.py b/setup.py index 97753d71..1488c10c 100644 --- a/setup.py +++ b/setup.py @@ -2,11 +2,13 @@ from setuptools import setup +from bin.__version__ import q_version + setup( name='q', url='https://github.com/harelba/q', license='LICENSE', - version='2.0.9', + version=q_version, author='Harel Ben-Attia', description="Run SQL directly on CSV or TSV files", author_email='harelba@gmail.com', From 703da8f071d8d1122de0cd32a7d7e18c7d0ac435 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Wed, 12 Feb 2020 16:15:14 +0200 Subject: [PATCH 047/111] Update README.markdown --- README.markdown | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/README.markdown b/README.markdown index 9477de3d..fbef6567 100644 --- a/README.markdown +++ b/README.markdown @@ -10,7 +10,7 @@ q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/). It c ## Installation. Extremely simple. -Instructions for all OSs are [here](http://harelba.github.io/q/install.html). +Instructions for all OSs are [here](http://harelba.github.io/q/#installation). ## Examples @@ -20,18 +20,21 @@ q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" ``` -Go [here](http://harelba.github.io/q/examples.html) for more examples. +Go [here](http://harelba.github.io/q/#examples) for more examples. ## Python API A development branch for exposing q's capabilities as a Python module can be viewed here, along with examples of the alpha version of the API.
    Existing functionality as a command-line tool will not be affected by this. Your input will be most appreciated. -## Change log -Click [here](http://harelba.github.io/q/changelog.html) to see the change log. - ## Contact Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course. Harel Ben-Attia, harelba@gmail.com, [@harelba](https://twitter.com/harelba) on Twitter +Linkedin: [Harel Ben Attia](https://www.linkedin.com/in/harelba/) + +Twitter [@harelba](https://twitter.com/harelba) + +Email [harelba@gmail.com](mailto:harelba@gmail.com) + q on twitter: #qtextasdata From 192ec6db4ab1b0a96dea0d0ded89a2405da8654d Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Wed, 12 Feb 2020 16:15:41 +0200 Subject: [PATCH 048/111] Update README.markdown --- README.markdown | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.markdown b/README.markdown index fbef6567..c8802d60 100644 --- a/README.markdown +++ b/README.markdown @@ -28,8 +28,6 @@ A development branch for exposing q's capabilities as a Python module
    Date: Wed, 3 Jun 2020 16:52:12 +0300 Subject: [PATCH 049/111] Some info regarding the possibility of BOM on failure --- bin/q.py | 2 +- test/test-suite | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/q.py b/bin/q.py index b41451f3..072f608b 100755 --- a/bin/q.py +++ b/bin/q.py @@ -1424,7 +1424,7 @@ def _execute(self,query_str,input_params=None,stdin_file=None,stdin_filename='-' msg = str(e) error = QError(e,"query error: %s" % msg,1) if "no such column" in msg and effective_input_params.skip_header: - warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names')) + warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names. Another issue might be that the file contains a BOM. Files that are encoded with UTF8 and contain a BOM can be read by specifying `-e utf-9-sig` in the command line. Support for non-UTF8 encoding will be provided in the future.')) except ColumnCountMismatchException as e: error = QError(e,e.msg,2) except (UnicodeDecodeError, UnicodeError) as e: diff --git a/test/test-suite b/test/test-suite index 4b8c0bef..510dd634 100755 --- a/test/test-suite +++ b/test/test-suite @@ -514,8 +514,8 @@ class BasicTests(AbstractQTestCase): self.assertEqual(len(o), 0) self.assertEqual(len(e), 2) self.assertTrue(six.b('no such column: c3') in e[0]) - self.assertEqual( - e[1], six.b('Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names')) + self.assertTrue( + e[1].startswith(six.b('Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names'))) self.cleanup(tmpfile) From ca71e490b78e0f21966e5a6fd1e95f80f50b4de0 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Wed, 9 Sep 2020 22:11:29 +0300 Subject: [PATCH 050/111] simplify versioning until a proper solution is found --- bin/q.py | 3 ++- setup.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/q.py b/bin/q.py index 072f608b..51183880 100755 --- a/bin/q.py +++ b/bin/q.py @@ -30,7 +30,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from __version__ import q_version + +q_version = '2.0.14' __all__ = [ 'QTextAsData' ] diff --git a/setup.py b/setup.py index 1488c10c..fd7efb2a 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -from bin.__version__ import q_version +q_version = '2.0.14' setup( name='q', From 48ed3e45edad24ea749ee1888e694c7db8f5c68a Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Wed, 9 Sep 2020 22:51:51 +0300 Subject: [PATCH 051/111] bumping to proper version --- bin/q.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/q.py b/bin/q.py index 51183880..9e8b6051 100755 --- a/bin/q.py +++ b/bin/q.py @@ -31,7 +31,7 @@ from __future__ import division from __future__ import print_function -q_version = '2.0.14' +q_version = '2.0.16' __all__ = [ 'QTextAsData' ] diff --git a/setup.py b/setup.py index fd7efb2a..f672d1c4 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -q_version = '2.0.14' +q_version = '2.0.16' setup( name='q', From 49fba5864f54ab166b4d5b3fae63cb9003c51f2c Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Thu, 10 Sep 2020 13:25:55 +0300 Subject: [PATCH 052/111] fix release version to 2.0.16 so binaries will be built --- bin/__version__.py | 7 ------- do-manual-release.sh | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) delete mode 100755 bin/__version__.py diff --git a/bin/__version__.py b/bin/__version__.py deleted file mode 100755 index 47181594..00000000 --- a/bin/__version__.py +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env python - -q_version = '2.0.10' - - -if __name__ == '__main__': - print(q_version) diff --git a/do-manual-release.sh b/do-manual-release.sh index b39b65d8..a2e04297 100755 --- a/do-manual-release.sh +++ b/do-manual-release.sh @@ -2,7 +2,7 @@ set -e -VERSION=$(bin/__version__.py) +VERSION=2.0.16 echo "Packing binary for $TRAVIS_OS_NAME" From f6966a6ca95983cc7d5f21e3ee42a641c041e75f Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Thu, 10 Sep 2020 15:09:13 +0300 Subject: [PATCH 053/111] updated site to download version 2.0.16 --- mkdocs/docs/index.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 777d4cc6..e450b0a9 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -32,12 +32,12 @@ Look at some examples [here](#examples), or just download the tool using the lin | Format | Instructions | Comments | :---|:---|:---| -|[OSX](https://github.com/harelba/packages-for-q/raw/master/single-binary/Darwin/2.0.9/q)|Download the executable from the link on the left, make it executable, and use it.|`brew install q` currently downloads the older version `1.7.1`. I'll update it to install the new version soon|| -|[RPM Package](https://github.com/harelba/packages-for-q/raw/master/rpms/q-text-as-data-2.0.9-1.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter man q.| -|[DEB Package](https://github.com/harelba/packages-for-q/raw/master/deb/q-text-as-data_2.0.9-2_amd64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`.| -|[Windows Installer](https://github.com/harelba/packages-for-q/raw/master/windows/setup-q-2.0.9.exe)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new cmd window after the installation is done.| -|[tar.gz](https://github.com/harelba/q/archive/2.0.9.tar.gz)|Full source file tree for latest stable version|| -|[zip](https://github.com/harelba/q/archive/2.0.9.zip)|Full source file tree for the latest stable version|| +|[OSX](https://github.com/harelba/packages-for-q/raw/master/single-binary/Darwin/2.0.16/q)|run `brew install q`|man page is not available for this release yet. Use `q --help` for now|| +|[RPM Package](https://github.com/harelba/packages-for-q/raw/master/rpms/q-text-as-data-2.0.16-1.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter man q.| +|[DEB Package](https://github.com/harelba/packages-for-q/raw/master/deb/q-text-as-data_2.0.16-2_amd64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`.| +|[Windows Installer](https://github.com/harelba/packages-for-q/raw/master/windows/setup-q-2.0.16.exe)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new cmd window after the installation is done.| +|[tar.gz](https://github.com/harelba/q/archive/2.0.16.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[zip](https://github.com/harelba/q/archive/2.0.16.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| **Older versions can be downloaded [here](https://github.com/harelba/packages-for-q). Please let me know if you plan on using an older version, and why - I know of no reason to use any of them.** From ebbf96ae8ae6eb981c71506e556c291bd49079e5 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Thu, 10 Sep 2020 16:18:53 +0300 Subject: [PATCH 054/111] website readme --- mkdocs/README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 mkdocs/README.md diff --git a/mkdocs/README.md b/mkdocs/README.md new file mode 100644 index 00000000..147e71aa --- /dev/null +++ b/mkdocs/README.md @@ -0,0 +1,24 @@ + +# Generate web site + +# mkdocs folder under project root +$ `cd mkdocs` + +* create a pyenv virtual environment + +$ `pip install -r requirements.txt` + +$ `./generate-web-site.sh` (static files will be generated into `./generated-site`) + +$ `git checkout gh-pages` + +$ `cd ../` # back to project root + +$ `scp -r mkdocs/generated-site/* ./` + +$ `git add` all modified files + +* commit to git + +$ `git push origin gh-pages` + From c2fdc3ef962101de07774d5f393badbc4df07640 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Thu, 10 Sep 2020 17:20:31 +0300 Subject: [PATCH 055/111] wip --- bin/q.py | 18 +++++++++++ test/test-suite | 82 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/bin/q.py b/bin/q.py index 9e8b6051..64650e37 100755 --- a/bin/q.py +++ b/bin/q.py @@ -85,6 +85,11 @@ def regexp(regular_expression, data): else: return False +def md5(data,encoding='utf-8'): + m = hashlib.md5() + m.update(six.text_type(data).encode(encoding)) + return m.hexdigest() + class Sqlite3DBResults(object): def __init__(self,query_column_names,results): self.query_column_names = query_column_names @@ -166,6 +171,8 @@ def store_db_to_disk(self,sqlite_db_filename,table_names_mapping,method='standar def add_user_functions(self): self.conn.create_function("regexp", 2, regexp) self.conn.create_function("sha1", 1, sha1) + self.conn.create_function("md5", 2, md5) + self.conn.create_function("md5", 1, md5) self.conn.create_aggregate("percentile",2,StrictPercentile) def is_numeric_type(self, column_type): @@ -1738,6 +1745,8 @@ def get_option_with_default(p, option_type, option, default): help="Skip header row. This has been changed from earlier version - Only one header row is supported, and the header row is used for column naming") input_data_option_group.add_option("-d", "--delimiter", dest="delimiter", default=default_delimiter, help="Field delimiter. If none specified, then space is used as the delimiter.") + input_data_option_group.add_option("-p", "--pipe-delimited", dest="pipe_delimited", default=False, action="store_true", + help="Same as -d '|'. Added for convenience and readability") input_data_option_group.add_option("-t", "--tab-delimited", dest="tab_delimited", default=False, action="store_true", help="Same as -d . Just a shorthand for handling standard tab delimited file You can use $'\\t' if you want (this is how Linux expects to provide tabs in the command line") input_data_option_group.add_option("-e", "--encoding", dest="encoding", default=default_encoding, @@ -1769,6 +1778,8 @@ def get_option_with_default(p, option_type, option, default): output_data_option_group = OptionGroup(parser,"Output Options") output_data_option_group.add_option("-D", "--output-delimiter", dest="output_delimiter", default=default_output_delimiter, help="Field delimiter for output. If none specified, then the -d delimiter is used if present, or space if no delimiter is specified") + output_data_option_group.add_option("-P", "--pipe-delimited-output", dest="pipe_delimited_output", default=False, action="store_true", + help="Same as -D '|'. Added for convenience and readability.") output_data_option_group.add_option("-T", "--tab-delimited-output", dest="tab_delimited_output", default=False, action="store_true", help="Same as -D . Just a shorthand for outputting tab delimited output. You can use -D $'\\t' if you want.") output_data_option_group.add_option("-O", "--output-header", dest="output_header", default=default_output_header, action="store_true",help="Output header line. Output column-names are determined from the query itself. Use column aliases in order to set your column names in the query. For example, 'select name FirstName,value1/value2 MyCalculation from ...'. This can be used even if there was no header in the input.") @@ -1854,6 +1865,13 @@ def get_option_with_default(p, option_type, option, default): if options.tab_delimited_output: options.output_delimiter = '\t' + # If the user flagged for a pipe-delimited file then set the delimiter to pipe + if options.pipe_delimited: + options.delimiter = '|' + + if options.pipe_delimited_output: + options.output_delimiter = '|' + if options.delimiter is None: options.delimiter = ' ' elif len(options.delimiter) != 1: diff --git a/test/test-suite b/test/test-suite index 510dd634..80d498b3 100755 --- a/test/test-suite +++ b/test/test-suite @@ -37,6 +37,9 @@ EXAMPLES = os.path.abspath(os.path.join(os.pardir, 'examples')) Q_EXECUTABLE = os.getenv('Q_EXECUTABLE', '../bin/q.py') +if not os.path.exists(Q_EXECUTABLE): + raise Exception("q executable must reside in {}".format(Q_EXECUTABLE)) + DEBUG = False if len(sys.argv) > 2 and sys.argv[2] == '-v': DEBUG = True @@ -334,6 +337,21 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) + def test_pipe_delimition_parameter(self): + tmpfile = self.create_file_with_data( + sample_data_no_header.replace(six.b(","), six.b("|"))) + cmd = Q_EXECUTABLE + ' -p "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + + self.cleanup(tmpfile) + def test_tab_delimition_parameter__with_manual_override_attempt(self): tmpfile = self.create_file_with_data( sample_data_no_header.replace(six.b(","), six.b("\t"))) @@ -349,6 +367,21 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) + def test_pipe_delimition_parameter__with_manual_override_attempt(self): + tmpfile = self.create_file_with_data( + sample_data_no_header.replace(six.b(","), six.b("|"))) + cmd = Q_EXECUTABLE + ' -p -d , "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + + self.cleanup(tmpfile) + def test_output_delimiter(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = Q_EXECUTABLE + ' -d , -D "|" "select c1,c2,c3 from %s"' % tmpfile.name @@ -379,6 +412,21 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) + def test_output_delimiter_pipe_parameter(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , -P "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + + self.cleanup(tmpfile) + def test_output_delimiter_tab_parameter__with_manual_override_attempt(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = Q_EXECUTABLE + ' -d , -T -D "|" "select c1,c2,c3 from %s"' % tmpfile.name @@ -394,6 +442,21 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) + def test_output_delimiter_pipe_parameter__with_manual_override_attempt(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , -P -D ":" "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + + self.cleanup(tmpfile) + def test_stdin_input(self): cmd = six.b('printf "%s" | ' + Q_EXECUTABLE + ' -d , "select c1,c2,c3 from -"') % sample_data_no_header retcode, o, e = run_command(cmd) @@ -1436,6 +1499,21 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) +class UserFunctionTests(AbstractQTestCase): + def test_md5_function(self): + cmd = 'seq 1 4 | %s -c 1 -d , "select c1,md5(c1,\'utf-8\') from -"' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),4) + self.assertEqual(len(e),0) + + self.assertEqual(tuple(o[0].split(six.b(','),1)),(six.b('1'),six.b('c4ca4238a0b923820dcc509a6f75849b'))) + self.assertEqual(tuple(o[1].split(six.b(','),1)),(six.b('2'),six.b('c81e728d9d4c2f636f067f89cc14862c'))) + self.assertEqual(tuple(o[2].split(six.b(','),1)),(six.b('3'),six.b('eccbc87e4b5ce2fe28308fd9f2a7baf3'))) + self.assertEqual(tuple(o[3].split(six.b(','),1)),(six.b('4'),six.b('a87ff679a2f3e71d9181a67b7542122c'))) + + class MultiHeaderTests(AbstractQTestCase): def test_output_header_when_multiple_input_headers_exist(self): TMPFILE_COUNT = 5 @@ -2415,7 +2493,9 @@ def suite(): formatting = tl.loadTestsFromTestCase(FormattingTests) basic_module_stuff = tl.loadTestsFromTestCase(BasicModuleTests) save_db_to_disk_tests = tl.loadTestsFromTestCase(SaveDbToDiskTests) - return unittest.TestSuite([basic_module_stuff, basic_stuff, parsing_mode, sql, formatting,save_db_to_disk_tests]) + user_functions_tests = tl.loadTestsFromTestCase(UserFunctionTests) + multi_header_tests = tl.loadTestsFromTestCase(MultiHeaderTests) + return unittest.TestSuite([basic_module_stuff, basic_stuff, parsing_mode, sql, formatting,save_db_to_disk_tests,multi_header_tests,user_functions_tests]) if __name__ == '__main__': if len(sys.argv) > 1: From abf4577cd6844b88e0b57139c74f38b447d7fdb0 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 11 Sep 2020 12:15:37 +0300 Subject: [PATCH 056/111] fixed multiline quoting for `-W minimal` --- bin/q.py | 2 +- test/test-suite | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/bin/q.py b/bin/q.py index 64650e37..70def0ae 100755 --- a/bin/q.py +++ b/bin/q.py @@ -1508,7 +1508,7 @@ def quote_minimal_func(output_delimiter,v): if v is None: return v t = type(v) - if (t == str or t == unicode) and ((output_delimiter in v) or (six.u('"') in v)): + if (t == str or t == unicode) and ((output_delimiter in v) or (six.u('\n' in v)) or (six.u('"') in v)): return six.u('"{}"').format(escape_double_quotes_if_needed(v)) return v diff --git a/test/test-suite b/test/test-suite index 80d498b3..beeec673 100755 --- a/test/test-suite +++ b/test/test-suite @@ -130,6 +130,8 @@ combined_quoted_data = six.b('''regular_double_quoted double_double_quoted escap sample_quoted_data2 = six.b('"quoted data" 23\nunquoted-data 54') +sample_quoted_data2_with_newline = six.b('"quoted data with\na new line inside it":23\nunquoted-data:54') + one_column_data = six.b('''data without commas 1 data without commas 2 ''') @@ -1116,7 +1118,7 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmp_data_file) - def test_minimal_output_quoting_mode__with_need_to_quote_in_output(self): + def test_minimal_output_quoting_mode__with_need_to_quote_in_output_due_to_delimiter(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) # output delimiter is set to space, so the output will contain it @@ -1132,6 +1134,24 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmp_data_file) + def test_minimal_output_quoting_mode__with_need_to_quote_in_output_due_to_newline(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2_with_newline) + + # Delimiter is set to colon (:), so it will not be inside the data values (this will make sure that the newline is the one causing the quoting) + cmd = Q_EXECUTABLE + " -d ':' -w all -W minimal \"select c1,c2,replace(c1,'with' || x'0a' || 'a new line inside it','NEWLINE-REMOVED') from %s\"" % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),3) + + self.assertEqual(o[0],six.b('"quoted data with')) + # Notice that the third column here is not quoted, because we replaced the newline with something else + self.assertEqual(o[1],six.b('a new line inside it":23:quoted data NEWLINE-REMOVED')) + self.assertEqual(o[2],six.b('unquoted-data:54:unquoted-data')) + + self.cleanup(tmp_data_file) + def test_nonnumeric_output_quoting_mode(self): tmp_data_file = self.create_file_with_data(sample_quoted_data2) @@ -2002,6 +2022,7 @@ class FormattingTests(AbstractQTestCase): self.cleanup(tmp_data_file) + class SqlTests(AbstractQTestCase): def test_find_example(self): From 0473927e941b9a03088cf047fd40bd5100d87da8 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 11 Sep 2020 16:47:16 +0300 Subject: [PATCH 057/111] multiple usability additions --- bin/q.py | 63 +++++++++++++++++++++++++------------------- do-manual-release.sh | 6 +++++ test/test-suite | 13 ++++++--- 3 files changed, 51 insertions(+), 31 deletions(-) diff --git a/bin/q.py b/bin/q.py index 70def0ae..e2e9fb8d 100755 --- a/bin/q.py +++ b/bin/q.py @@ -1508,7 +1508,7 @@ def quote_minimal_func(output_delimiter,v): if v is None: return v t = type(v) - if (t == str or t == unicode) and ((output_delimiter in v) or (six.u('\n' in v)) or (six.u('"') in v)): + if (t == str or t == unicode) and ((output_delimiter in v) or ('\n' in v) or ('"' in v)): return six.u('"{}"').format(escape_double_quotes_if_needed(v)) return v @@ -1860,48 +1860,35 @@ def get_option_with_default(p, option_type, option, default): # If the user flagged for a tab-delimited file then set the delimiter to tab if options.tab_delimited: + if options.delimiter is not None and options.delimiter != '\t': + print("Warning: -t parameter overrides -d parameter (%s)" % options.delimiter,file=sys.stderr) options.delimiter = '\t' - if options.tab_delimited_output: - options.output_delimiter = '\t' - # If the user flagged for a pipe-delimited file then set the delimiter to pipe if options.pipe_delimited: + if options.delimiter is not None and options.delimiter != '|': + print("Warning: -p parameter overrides -d parameter (%s)" % options.delimiter,file=sys.stderr) options.delimiter = '|' - if options.pipe_delimited_output: - options.output_delimiter = '|' - if options.delimiter is None: options.delimiter = ' ' elif len(options.delimiter) != 1: print("Delimiter must be one character only", file=sys.stderr) sys.exit(5) - if options.input_quoting_mode not in list(QTextAsData.input_quoting_modes.keys()): - print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(sorted(QTextAsData.input_quoting_modes.keys())),options.input_quoting_mode), file=sys.stderr) - sys.exit(55) - - if options.output_quoting_mode not in list(QOutputPrinter.output_quoting_modes.keys()): - print("Output quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QOutputPrinter.output_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr) - sys.exit(56) - - if options.column_count is not None: - expected_column_count = int(options.column_count) - else: - # infer automatically - expected_column_count = None + if options.tab_delimited_output: + if options.output_delimiter is not None and options.output_delimiter != '\t': + print("Warning: -T parameter overrides -D parameter (%s)" % options.output_delimiter,file=sys.stderr) + options.output_delimiter = '\t' - if options.encoding != 'none': - try: - codecs.lookup(options.encoding) - except LookupError: - print("Encoding %s could not be found" % options.encoding, file=sys.stderr) - sys.exit(10) + if options.pipe_delimited_output: + if options.output_delimiter is not None and options.output_delimiter != '|': + print("Warning: -P parameter overrides -D parameter (%s)" % options.output_delimiter,file=sys.stderr) + options.output_delimiter = '|' if options.output_delimiter: # If output delimiter is specified, then we use it - output_delimiter = options.output_delimiter + options.output_delimiter = options.output_delimiter else: # Otherwise, if options.delimiter: @@ -1921,6 +1908,28 @@ def get_option_with_default(p, option_type, option, default): print("Max column length limit must be a positive integer (%s)" % max_column_length_limit, file=sys.stderr) sys.exit(31) + + if options.input_quoting_mode not in list(QTextAsData.input_quoting_modes.keys()): + print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(sorted(QTextAsData.input_quoting_modes.keys())),options.input_quoting_mode), file=sys.stderr) + sys.exit(55) + + if options.output_quoting_mode not in list(QOutputPrinter.output_quoting_modes.keys()): + print("Output quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QOutputPrinter.output_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr) + sys.exit(56) + + if options.column_count is not None: + expected_column_count = int(options.column_count) + else: + # infer automatically + expected_column_count = None + + if options.encoding != 'none': + try: + codecs.lookup(options.encoding) + except LookupError: + print("Encoding %s could not be found" % options.encoding, file=sys.stderr) + sys.exit(10) + if options.save_db_to_disk_filename is not None: if options.analyze_only: print("Cannot save database to disk when running with -A (analyze-only) option.", file=sys.stderr) diff --git a/do-manual-release.sh b/do-manual-release.sh index a2e04297..51470617 100755 --- a/do-manual-release.sh +++ b/do-manual-release.sh @@ -4,6 +4,12 @@ set -e VERSION=2.0.16 +if [[ "$TRAVIS_BRANCH" != "master" ]] +then + echo "Not releasing - not on master branch (${TRAVIS_BRANCH})" + exit 0 +fi + echo "Packing binary for $TRAVIS_OS_NAME" if [[ "$TRAVIS_OS_NAME" == "osx" || "$TRAVIS_OS_NAME" == "linux" ]] diff --git a/test/test-suite b/test/test-suite index beeec673..4f1f877b 100755 --- a/test/test-suite +++ b/test/test-suite @@ -11,6 +11,7 @@ # import unittest +import pytest import random import json from json import JSONEncoder @@ -362,10 +363,11 @@ class BasicTests(AbstractQTestCase): self.assertEqual(retcode, 0) self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) + self.assertEqual(len(e), 1) self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) + self.assertEqual(e[0],six.b('Warning: -t parameter overrides -d parameter (,)')) self.cleanup(tmpfile) @@ -377,10 +379,11 @@ class BasicTests(AbstractQTestCase): self.assertEqual(retcode, 0) self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) + self.assertEqual(len(e), 1) self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + self.assertEqual(e[0],six.b('Warning: -p parameter overrides -d parameter (,)')) self.cleanup(tmpfile) @@ -436,11 +439,12 @@ class BasicTests(AbstractQTestCase): self.assertEqual(retcode, 0) self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) + self.assertEqual(len(e), 1) self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) + self.assertEqual(e[0], six.b('Warning: -T parameter overrides -D parameter (|)')) self.cleanup(tmpfile) @@ -451,11 +455,12 @@ class BasicTests(AbstractQTestCase): self.assertEqual(retcode, 0) self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) + self.assertEqual(len(e), 1) self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + self.assertEqual(e[0],six.b('Warning: -P parameter overrides -D parameter (:)')) self.cleanup(tmpfile) From e85c4c50a0c8b5d15ff4e2148f664c02a7543679 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 13 Sep 2020 17:29:53 +0300 Subject: [PATCH 058/111] option to list udfs, and added new functions --- bin/q.py | 180 +++++++++++++++++++++++++++++++++++++++---- mkdocs/docs/index.md | 8 +- test/test-suite | 146 ++++++++++++++++++++++++++++------- 3 files changed, 290 insertions(+), 44 deletions(-) diff --git a/bin/q.py b/bin/q.py index e2e9fb8d..5e165c85 100755 --- a/bin/q.py +++ b/bin/q.py @@ -31,6 +31,8 @@ from __future__ import division from __future__ import print_function +from collections import OrderedDict + q_version = '2.0.16' __all__ = [ 'QTextAsData' ] @@ -72,11 +74,26 @@ def get_stdout_encoding(encoding_override=None): SHOW_SQL = False -def sha1(data): - if not isinstance(data,str) and not isinstance(data,unicode): - return hashlib.sha1(str(data)).hexdigest() - return hashlib.sha1(data).hexdigest() +sha_algorithms = { + 1 : hashlib.sha1, + 224: hashlib.sha224, + 256: hashlib.sha256, + 386: hashlib.sha384, + 512: hashlib.sha512 +} + +def sha(data,algorithm,encoding): + try: + f = sha_algorithms[algorithm] + return f(six.text_type(data).encode(encoding)).hexdigest() + except Exception as e: + print(e) + +# For backward compatibility +def sha1(data,encoding): + return sha(data,1,encoding) +# TODO Add caching of compiled regexps - Will be added after benchmarking capability is baked in def regexp(regular_expression, data): if data is not None: if not isinstance(data, str) and not isinstance(data, unicode): @@ -85,15 +102,16 @@ def regexp(regular_expression, data): else: return False -def md5(data,encoding='utf-8'): +def md5(data,encoding): m = hashlib.md5() m.update(six.text_type(data).encode(encoding)) return m.hexdigest() -class Sqlite3DBResults(object): - def __init__(self,query_column_names,results): - self.query_column_names = query_column_names - self.results = results +def sqrt(data): + return math.sqrt(data) + +def power(data,p): + return data**p def percentile(l, p): # TODO Alpha implementation, need to provide multiple interpolation methods, and add tests @@ -106,6 +124,7 @@ def percentile(l, p): return l[int(k)] return (c-k) * l[int(f)] + (k-f) * l[int(c)] +# TODO Streaming Percentile to prevent memory consumption blowup for large datasets class StrictPercentile(object): def __init__(self): self.values = [] @@ -121,6 +140,130 @@ def finalize(self): else: return percentile(sorted(self.values),self.p) +class StdevPopulation(object): + def __init__(self): + self.M = 0.0 + self.S = 0.0 + self.k = 0 + + def step(self, value): + try: + # Ignore nulls + if value is None: + return + val = float(value) # if fails, skips this iteration, which also ignores nulls + tM = self.M + self.k += 1 + self.M += ((val - tM) / self.k) + self.S += ((val - tM) * (val - self.M)) + except ValueError: + # TODO propagate udf errors to console + raise Exception("Data is not numeric when calculating stddev (%s)" % value) + + def finalize(self): + if self.k <= 1: # avoid division by zero + return None + else: + return math.sqrt(self.S / (self.k)) + +class StdevSample(object): + def __init__(self): + self.M = 0.0 + self.S = 0.0 + self.k = 0 + + def step(self, value): + try: + # Ignore nulls + if value is None: + return + val = float(value) # if fails, skips this iteration, which also ignores nulls + tM = self.M + self.k += 1 + self.M += ((val - tM) / self.k) + self.S += ((val - tM) * (val - self.M)) + except ValueError: + # TODO propagate udf errors to console + raise Exception("Data is not numeric when calculating stddev (%s)" % value) + + def finalize(self): + if self.k <= 1: # avoid division by zero + return None + else: + return math.sqrt(self.S / (self.k-1)) + +class FunctionType(object): + REGULAR = 1 + AGG = 2 + +class UserFunctionDef(object): + def __init__(self,func_type,name,usage,description,func_or_obj,param_count): + self.func_type = func_type + self.name = name + self.usage = usage + self.description = description + self.func_or_obj = func_or_obj + self.param_count = param_count + +user_functions = [ + UserFunctionDef(FunctionType.REGULAR, + "regexp","regexp(,) = <1|0>", + "Find regexp in string expression. Returns 1 if found or 0 if not", + regexp, + 2), + UserFunctionDef(FunctionType.REGULAR, + "sha","sha(,,) = ", + "Calculate sha of some expression. Algorithm can be one of 1,224,256,384,512. For now encoding must be manually provided. Will use the input encoding automatically in the future.", + sha, + 3), + UserFunctionDef(FunctionType.REGULAR, + "sha1","sha1(,) = ", + "Calculate sha1 of some expression. For now encoding must be manually provided. Will be taken automatically from the input encoding in the future.", + sha1, + 2), + UserFunctionDef(FunctionType.REGULAR, + "md5","md5(,) = ", + "Calculate md5 of expression. Returns a hex-string of the result. Currently requires to manually provide the encoding of the data. Will be taken automatically from the input encoding in the future.", + md5, + 2), + UserFunctionDef(FunctionType.REGULAR, + "sqrt","sqrt() = ", + "Calculate the square root of the expression", + sqrt, + 1), + UserFunctionDef(FunctionType.REGULAR, + "power","power(,) = ", + "Raise expr1 to the power of expr2", + power, + 2), + UserFunctionDef(FunctionType.AGG, + "percentile","percentile(,) = ", + "Calculate the strict percentile of a set of a values.", + StrictPercentile, + 2), + UserFunctionDef(FunctionType.AGG, + "stddev_pop","stddev_pop() = ", + "Calculate the population standard deviation of a set of values", + StdevPopulation, + 1), + UserFunctionDef(FunctionType.AGG, + "stddev_sample","stddev_sample() = ", + "Calculate the sample standard deviation of a set of values", + StdevSample, + 1) +] + +def print_user_functions(): + for udf in user_functions: + print("Function: %s" % udf.name) + print(" Usage: %s" % udf.usage) + print(" Description: %s" % udf.description) + +class Sqlite3DBResults(object): + def __init__(self,query_column_names,results): + self.query_column_names = query_column_names + self.results = results + class Sqlite3DB(object): def __init__(self, show_sql=SHOW_SQL): @@ -169,11 +312,13 @@ def store_db_to_disk(self,sqlite_db_filename,table_names_mapping,method='standar raise ValueError('Unknown store-db-to-disk method %s' % method) def add_user_functions(self): - self.conn.create_function("regexp", 2, regexp) - self.conn.create_function("sha1", 1, sha1) - self.conn.create_function("md5", 2, md5) - self.conn.create_function("md5", 1, md5) - self.conn.create_aggregate("percentile",2,StrictPercentile) + for udf in user_functions: + if type(udf.func_or_obj) == type(object): + self.conn.create_aggregate(udf.name,udf.param_count,udf.func_or_obj) + elif type(udf.func_or_obj) == type(md5): + self.conn.create_function(udf.name,udf.param_count,udf.func_or_obj) + else: + raise Exception("Invalid user function definition %s" % str(udf)) def is_numeric_type(self, column_type): return column_type in self.numeric_column_types @@ -1791,6 +1936,8 @@ def get_option_with_default(p, option_type, option, default): help="Output encoding. Defaults to 'none', leading to selecting the system/terminal encoding") output_data_option_group.add_option("-W","--output-quoting-mode",dest="output_quoting_mode",default="minimal", help="Output quoting mode. Possible values are all, minimal, nonnumeric and none. Note the slightly misleading parameter name, and see the matching -w parameter for input quoting.") + output_data_option_group.add_option("-L","--list-user-functions",dest="list_user_functions",default=False,action="store_true", + help="List all user functions") parser.add_option_group(output_data_option_group) #----------------------------------------------- query_option_group = OptionGroup(parser,"Query Related Options") @@ -1808,6 +1955,11 @@ def get_option_with_default(p, option_type, option, default): sys.exit(0) ### + + if options.list_user_functions: + print_user_functions() + sys.exit(0) + if len(args) == 0 and options.query_filename is None: print_credentials() print("Must provide at least one query in the command line, or through a file with the -q parameter", file=sys.stderr) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index e450b0a9..e2b37439 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -89,7 +89,7 @@ Usage: Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line. - Basic usage is q "" where table names are just regular file names (Use - to read from standard input) + Basic usage is q "" where table names are just regular file names (Use - to read from standard input) When the input contains a header row, use -H, and column names will be set according to the header row content. If there isn't a header row, then columns will automatically be named c1..cN. Column types are detected automatically. Use -A in order to see the column name/type analysis. @@ -133,6 +133,8 @@ Options: -d DELIMITER, --delimiter=DELIMITER Field delimiter. If none specified, then space is used as the delimiter. + -p, --pipe-delimited + Same as -d '|'. Added for convenience and readability -t, --tab-delimited Same as -d . Just a shorthand for handling standard tab delimited file You can use $'\t' if you @@ -186,6 +188,8 @@ Options: Field delimiter for output. If none specified, then the -d delimiter is used if present, or space if no delimiter is specified + -P, --pipe-delimited-output + Same as -D '|'. Added for convenience and readability. -T, --tab-delimited-output Same as -D . Just a shorthand for outputting tab delimited output. You can use -D $'\t' if you want. @@ -210,6 +214,8 @@ Options: nonnumeric and none. Note the slightly misleading parameter name, and see the matching -w parameter for input quoting. + -L, --list-user-functions + List all user functions Query Related Options: -q QUERY_FILENAME, --query-filename=QUERY_FILENAME diff --git a/test/test-suite b/test/test-suite index 4f1f877b..a5837701 100755 --- a/test/test-suite +++ b/test/test-suite @@ -11,7 +11,6 @@ # import unittest -import pytest import random import json from json import JSONEncoder @@ -283,34 +282,6 @@ class BasicTests(AbstractQTestCase): self.cleanup(tmpfile) - def test_regexp_int_data_handling(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - - cmd = Q_EXECUTABLE + ' -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 1) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0],six.b("1")) - - self.cleanup(tmpfile) - - def test_regexp_null_data_handling(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - - cmd = Q_EXECUTABLE + ' -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 1) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0],six.b("2")) - - self.cleanup(tmpfile) - def test_select_one_column(self): tmpfile = self.create_file_with_data(sample_data_no_header) @@ -1525,6 +1496,55 @@ class BasicTests(AbstractQTestCase): class UserFunctionTests(AbstractQTestCase): + def test_regexp_int_data_handling(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + + cmd = Q_EXECUTABLE + ' -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b("1")) + + self.cleanup(tmpfile) + + def test_percentile_func(self): + cmd = 'seq 1000 1999 | %s "select substr(c1,0,3),percentile(c1,0),percentile(c1,0.5),percentile(c1,1) from - group by substr(c1,0,3)" -c 1' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 10) + self.assertEqual(len(e), 0) + + output_table = [l.split(six.b(" ")) for l in o] + group_labels = [int(row[0]) for row in output_table] + minimum_values = [float(row[1]) for row in output_table] + median_values = [float(row[2]) for row in output_table] + max_values = [float(row[3]) for row in output_table] + + base_values = list(range(1000,2000,100)) + + self.assertEqual(group_labels,list(range(10,20))) + self.assertEqual(minimum_values,base_values) + self.assertEqual(median_values,list(map(lambda x: x + 49.5,base_values))) + self.assertEqual(max_values,list(map(lambda x: x + 99,base_values))) + + def test_regexp_null_data_handling(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + + cmd = Q_EXECUTABLE + ' -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b("2")) + + self.cleanup(tmpfile) + def test_md5_function(self): cmd = 'seq 1 4 | %s -c 1 -d , "select c1,md5(c1,\'utf-8\') from -"' % Q_EXECUTABLE retcode, o, e = run_command(cmd) @@ -1538,6 +1558,74 @@ class UserFunctionTests(AbstractQTestCase): self.assertEqual(tuple(o[2].split(six.b(','),1)),(six.b('3'),six.b('eccbc87e4b5ce2fe28308fd9f2a7baf3'))) self.assertEqual(tuple(o[3].split(six.b(','),1)),(six.b('4'),six.b('a87ff679a2f3e71d9181a67b7542122c'))) + def test_stddev_functions(self): + tmpfile = self.create_file_with_data(six.b("\n".join(map(str,[234,354,3234,123,4234,234,634,56,65])))) + + cmd = '%s -c 1 -d , "select round(stddev_pop(c1),10),round(stddev_sample(c1),10) from %s"' % (Q_EXECUTABLE,tmpfile.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],'1479.7015464838,1569.4604964764') + + self.cleanup(tmpfile) + + def test_sqrt_function(self): + cmd = 'seq 1 5 | %s -c 1 -d , "select round(sqrt(c1),10) from -"' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),5) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('1.0')) + self.assertEqual(o[1],six.b('1.4142135624')) + self.assertEqual(o[2],six.b('1.7320508076')) + self.assertEqual(o[3],six.b('2.0')) + self.assertEqual(o[4],six.b('2.2360679775')) + + def test_power_function(self): + cmd = 'seq 1 5 | %s -c 1 -d , "select round(power(c1,2.5),10) from -"' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),5) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('1.0')) + self.assertEqual(o[1],six.b('5.6568542495')) + self.assertEqual(o[2],six.b('15.5884572681')) + self.assertEqual(o[3],six.b('32.0')) + self.assertEqual(o[4],six.b('55.9016994375')) + + def test_sha1_function(self): + cmd = 'seq 1 4 | %s -c 1 -d , "select c1,sha1(c1,\'utf-8\') from -"' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),4) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('1,356a192b7913b04c54574d18c28d46e6395428ab')) + self.assertEqual(o[1],six.b('2,da4b9237bacccdf19c0760cab7aec4a8359010b0')) + self.assertEqual(o[2],six.b('3,77de68daecd823babbb58edb1c8e14d7106e83bb')) + self.assertEqual(o[3],six.b('4,1b6453892473a467d07372d45eb05abc2031647a')) + + def test_sha_function(self): + cmd = 'seq 1 4 | %s -c 1 -d , "select c1,sha(c1,1,\'utf-8\') as sha1,sha(c1,224,\'utf-8\') as sha224,sha(c1,256,\'utf-8\') as sha256 from -"' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),4) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('1,356a192b7913b04c54574d18c28d46e6395428ab,e25388fde8290dc286a6164fa2d97e551b53498dcbf7bc378eb1f178,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b')) + self.assertEqual(o[1],six.b('2,da4b9237bacccdf19c0760cab7aec4a8359010b0,58b2aaa0bfae7acc021b3260e941117b529b2e69de878fd7d45c61a9,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35')) + self.assertEqual(o[2],six.b('3,77de68daecd823babbb58edb1c8e14d7106e83bb,4cfc3a1811fe40afa401b25ef7fa0379f1f7c1930a04f8755d678474,4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce')) + self.assertEqual(o[3],six.b('4,1b6453892473a467d07372d45eb05abc2031647a,271f93f45e9b4067327ed5c8cd30a034730aaace4382803c3e1d6c2f,4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328cb08b5531fcacdabf8a')) + class MultiHeaderTests(AbstractQTestCase): def test_output_header_when_multiple_input_headers_exist(self): From e11c3a1659d4a74c93dc65ec89023a896fbd1383 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 13 Sep 2020 17:38:07 +0300 Subject: [PATCH 059/111] fix python2/3 issue in test --- test/test-suite | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-suite b/test/test-suite index a5837701..7efa3fea 100755 --- a/test/test-suite +++ b/test/test-suite @@ -1568,7 +1568,7 @@ class UserFunctionTests(AbstractQTestCase): self.assertEqual(len(o),1) self.assertEqual(len(e),0) - self.assertEqual(o[0],'1479.7015464838,1569.4604964764') + self.assertEqual(o[0],six.b('1479.7015464838,1569.4604964764')) self.cleanup(tmpfile) From 5b428b60d63b50c17faf68a5969961a67bdd3467 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Mon, 14 Sep 2020 15:43:52 +0300 Subject: [PATCH 060/111] reintroduce old sha1 for backward compat + version bumps --- bin/q.py | 20 +++++++++++--------- do-manual-release.sh | 2 +- setup.py | 2 +- test/test-suite | 2 +- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/bin/q.py b/bin/q.py index 5e165c85..c259588f 100755 --- a/bin/q.py +++ b/bin/q.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (C) 2012-2019 Harel Ben-Attia +# Copyright (C) 2012-2020 Harel Ben-Attia # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -33,7 +33,7 @@ from collections import OrderedDict -q_version = '2.0.16' +q_version = '2.0.17' __all__ = [ 'QTextAsData' ] @@ -89,9 +89,11 @@ def sha(data,algorithm,encoding): except Exception as e: print(e) -# For backward compatibility -def sha1(data,encoding): - return sha(data,1,encoding) +# For backward compatibility only (doesn't handle encoding well enough) +def sha1(data): + if not isinstance(data,str) and not isinstance(data,unicode): + return hashlib.sha1(str(data)).hexdigest() + return hashlib.sha1(data).hexdigest() # TODO Add caching of compiled regexps - Will be added after benchmarking capability is baked in def regexp(regular_expression, data): @@ -217,10 +219,10 @@ def __init__(self,func_type,name,usage,description,func_or_obj,param_count): sha, 3), UserFunctionDef(FunctionType.REGULAR, - "sha1","sha1(,) = ", - "Calculate sha1 of some expression. For now encoding must be manually provided. Will be taken automatically from the input encoding in the future.", + "sha1","sha1() = ", + "Exists for backward compatibility only, since it doesn't handle encoding properly. Calculates sha1 of some expression", sha1, - 2), + 1), UserFunctionDef(FunctionType.REGULAR, "md5","md5(,) = ", "Calculate md5 of expression. Returns a hex-string of the result. Currently requires to manually provide the encoding of the data. Will be taken automatically from the input encoding in the future.", @@ -1296,7 +1298,7 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter): def print_credentials(): print("q version %s" % q_version, file=sys.stderr) print("Python: %s" % " // ".join([str(x).strip() for x in sys.version.split("\n")]), file=sys.stderr) - print("Copyright (C) 2012-2019 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr) + print("Copyright (C) 2012-2020 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr) print("http://harelba.github.io/q/", file=sys.stderr) print(file=sys.stderr) diff --git a/do-manual-release.sh b/do-manual-release.sh index 51470617..11116778 100755 --- a/do-manual-release.sh +++ b/do-manual-release.sh @@ -2,7 +2,7 @@ set -e -VERSION=2.0.16 +VERSION=2.0.17 if [[ "$TRAVIS_BRANCH" != "master" ]] then diff --git a/setup.py b/setup.py index f672d1c4..49092722 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -q_version = '2.0.16' +q_version = '2.0.17' setup( name='q', diff --git a/test/test-suite b/test/test-suite index 7efa3fea..b44b357c 100755 --- a/test/test-suite +++ b/test/test-suite @@ -1601,7 +1601,7 @@ class UserFunctionTests(AbstractQTestCase): self.assertEqual(o[4],six.b('55.9016994375')) def test_sha1_function(self): - cmd = 'seq 1 4 | %s -c 1 -d , "select c1,sha1(c1,\'utf-8\') from -"' % Q_EXECUTABLE + cmd = 'seq 1 4 | %s -c 1 -d , "select c1,sha1(c1) from -"' % Q_EXECUTABLE retcode, o, e = run_command(cmd) self.assertEqual(retcode,0) From 865f591a10141f708710532d7b0f40ea4e2d1386 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Mon, 14 Sep 2020 16:05:22 +0300 Subject: [PATCH 061/111] make sha1 py2/py3 compatible --- bin/q.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bin/q.py b/bin/q.py index c259588f..5760653c 100755 --- a/bin/q.py +++ b/bin/q.py @@ -91,9 +91,7 @@ def sha(data,algorithm,encoding): # For backward compatibility only (doesn't handle encoding well enough) def sha1(data): - if not isinstance(data,str) and not isinstance(data,unicode): - return hashlib.sha1(str(data)).hexdigest() - return hashlib.sha1(data).hexdigest() + return hashlib.sha1(six.text_type(data).encode('utf-8')).hexdigest() # TODO Add caching of compiled regexps - Will be added after benchmarking capability is baked in def regexp(regular_expression, data): From 9b492b829a327ab1e2c8c0e5ed7ccad23ecbc9dc Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 19 Sep 2020 12:56:06 +0300 Subject: [PATCH 062/111] q benchmark (#241) --- .gitignore | 3 + VERSION_BUMP.md | 18 ++ bin/q.py | 2 +- do-manual-release.sh | 2 +- requirements.txt | 1 + setup.py | 2 +- test/BENCHMARK.md | 159 +++++++++++++++ test/benchmark-config.sh | 3 + .../octosql_v0.3.0.benchmark-results | 48 +++++ .../q-benchmark-2.7.18.benchmark-results | 48 +++++ .../q-benchmark-3.6.4.benchmark-results | 48 +++++ .../q-benchmark-3.7.9.benchmark-results | 48 +++++ .../q-benchmark-3.8.5.benchmark-results | 48 +++++ .../summary.benchmark-results | 48 +++++ .../textql_2.0.3.benchmark-results | 48 +++++ test/prepare-benchmark-env | 44 ++++ test/run-benchmark | 77 +++++++ test/test-suite | 192 +++++++++++++++++- 18 files changed, 835 insertions(+), 4 deletions(-) create mode 100644 VERSION_BUMP.md create mode 100644 test/BENCHMARK.md create mode 100644 test/benchmark-config.sh create mode 100644 test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/octosql_v0.3.0.benchmark-results create mode 100644 test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-2.7.18.benchmark-results create mode 100644 test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.6.4.benchmark-results create mode 100644 test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.7.9.benchmark-results create mode 100644 test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.8.5.benchmark-results create mode 100644 test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/summary.benchmark-results create mode 100644 test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/textql_2.0.3.benchmark-results create mode 100755 test/prepare-benchmark-env create mode 100755 test/run-benchmark diff --git a/.gitignore b/.gitignore index 694b157b..2d4ca0f8 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,6 @@ packages .idea/ dist/windows/ generated-site/ +benchmark_data.tar.gz +_benchmark_data/ +q.egg-info/ diff --git a/VERSION_BUMP.md b/VERSION_BUMP.md new file mode 100644 index 00000000..7a4c5bf5 --- /dev/null +++ b/VERSION_BUMP.md @@ -0,0 +1,18 @@ + +# Version bump +Currently, there are some manual steps needed in order to release a new version: + +* Make sure that you're in a branch +* Change the version in the following three files: `bin/q.py`, `setup.py` and `do-manual-release.sh` and commit them to the branch +* perform merge into master of that branch +* add a tag of the release version +* `git push --tags origin master` +* create a release in github with the tag you've just created + +Pushing to master will trigger a build/release, and will push the artifacts to the new release as assets. + +The reason for this is related to limitations in the way that pyci uploads the binaries to github. + +# + +TBD - Continue with the flow of wrapping the artifacts with rpm/deb, copying the files to packages-for-q, and updating the web site. diff --git a/bin/q.py b/bin/q.py index 5760653c..4775ddb9 100755 --- a/bin/q.py +++ b/bin/q.py @@ -33,7 +33,7 @@ from collections import OrderedDict -q_version = '2.0.17' +q_version = '2.0.18' __all__ = [ 'QTextAsData' ] diff --git a/do-manual-release.sh b/do-manual-release.sh index 11116778..9e0d787a 100755 --- a/do-manual-release.sh +++ b/do-manual-release.sh @@ -2,7 +2,7 @@ set -e -VERSION=2.0.17 +VERSION=2.0.18 if [[ "$TRAVIS_BRANCH" != "master" ]] then diff --git a/requirements.txt b/requirements.txt index 6c4193ae..3ad7d2bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ six==1.11.0 flake8==3.6.0 +setuptools<45.0.0 diff --git a/setup.py b/setup.py index 49092722..df113123 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -q_version = '2.0.17' +q_version = '2.0.18' setup( name='q', diff --git a/test/BENCHMARK.md b/test/BENCHMARK.md new file mode 100644 index 00000000..3a4d5732 --- /dev/null +++ b/test/BENCHMARK.md @@ -0,0 +1,159 @@ + + +NOTE: *Please don't use or publish this benchmark data yet. See below for details* + +# Overview +This just a preliminary benchmark, originally created for validating performance optimizations and suggestions from users, and analyzing q's move to python3. After writing it, I thought it might be interesting to test its speed against textql and octosql as well. + +The results I'm getting are somewhat surprising, to the point of me questioning them a bit, so it would be great to validate the further before finalizing the benchmark results. + +The most surprising results are as follows: +* python3 vs python2 - A huge improvement (for large files, execution times with python 3 are around 40% of the times for python 2) +* python3 vs textql (written in golang) - Seems that textql becomes slower than the python3 q version as the data sizes grows (both rows and columns) + +I would love to validate these results by having other people run the benchmark as well and send me their results. + +If you're interested, follow the instructions and run the benchmark on your machine. After the benchmark is finished, send me the final results file, along with some details about your hardware, and i'll add it to the spreadsheet. + +I've tried to make running the benchmark as seamless as possible, but there obviously might be errors/issues. Please contact me if you encounter any issue, or just open a ticket. + +# Benchmark +This is an initial version of the benchmark, along with some results. The following is compared: +* q running on multiple python versions +* textql 2.0.3 +* octosql v0.3.0 + +The specific python versions which are being tested are specified in `benchmark-config.sh`. + +This is by no means a scientific benchmark, and it only focuses on the data loading time which is the only significant factor for comparison (e.g. the query itself is a very simple count query). Also, it does not try to provide any usability comparison between q and textql/octosql, an interesting topic on its own. + +## Methodology +The idea was to compare the time sensitivity of row and column count. + +* Row counts: 1,10,100,1000,10000,100000,1000000 +* Column counts: 1,5,10,20,50,100 +* Iterations for each combination: 10 + +File sizes: +* 1M rows by 100 columns - 976MB (~1GB) - Largest file +* 1M rows by 50 columns - 477MB + +The benchmark executes simple `select count(*) from ` queries for each combination, calculating the mean and stddev of each set of iterations. The stddev is used in order to measure the validity of the results. + +The graphs below only compare the means of the results, the standard deviations are written into the google sheet itself, and can be viewed there if needed. + +Instructions on how to run the benchmark are at the bottom section of this document, after the results section. + +## Hardware +OSX Catalina on a 15" Macbook Pro from Mid 2015, with 16GB of RAM, and an internal Flash Drive of 256GB. + +## Results +(Results are automatically updated from the baseline tab in the google spreadsheet). + +Detailed results below. + +Summary: +* All python 3 versions (3.6/3.7/3.8) provide similar results across all scales. +* python 3.x provides significantly better results than python2. Improvement grows as the file size grows (20% improvement for small files, up to ~70% improvement for the largest file) +* textql seems to provide faster results than q (py3) for smaller files, up to around 30MB of data. As the size grows further, it becomes slower than q, up to 80% (74 seconds vs 41 seconds) for the largest file +* The larger the files, textql becomes slower than q-py3 (up to 80% more time than q for the largest file) +* octosql is significantly slower than both q and textql, even for small files with a low number of rows and columns + +### Data for 1M rows + +#### Run time durations for 1M rows and different column counts: +| rows | columns | File Size | python 2.7 | python 3.6 | python 3.7 | python 3.8 | textql | octosql | +|:-------: |:-------: |:---------: |:----------: |:----------: |:----------: |:----------: |:------: |:-------: | +| 1000000 | 1 | 17M | 5.15 | 4.24 | 4.08 | 3.98 | 2.90 | 49.95 | +| 1000000 | 5 | 37M | 10.68 | 5.37 | 5.26 | 5.14 | 5.88 | 54.69 | +| 1000000 | 10 | 89M | 17.56 | 7.25 | 7.15 | 7.01 | 9.69 | 65.32 | +| 1000000 | 20 | 192M | 30.28 | 10.96 | 10.78 | 10.64 | 17.34 | 83.94 | +| 1000000 | 50 | 477M | 71.56 | 21.98 | 21.59 | 21.70 | 38.57 | 158.26 | +| 1000000 | 100 | 986M | 131.86 | 41.71 | 40.82 | 41.02 | 74.62 | 289.58 | + +#### Comparison between python 3.x and python 2 run times (1M rows): +(>100% is slower than q-py2, <100% is faster than q-py2) + +| rows | columns | file size | q-py2 runtime | q-py3.6 vs q-py2 runtime | q-py3.7 vs q-py2 runtime | q-py3.8 vs q-py2 runtime | +|:-------: |:-------: |:---------: |:-------------: |:------------------------: |:------------------------: |:------------------------: | +| 1000000 | 1 | 17M | 100.00% | 82.34% | 79.34% | 77.36% | +| 1000000 | 5 | 37M | 100.00% | 50.25% | 49.22% | 48.08% | +| 1000000 | 10 | 89M | 100.00% | 41.30% | 40.69% | 39.93% | +| 1000000 | 20 | 192M | 100.00% | 36.18% | 35.59% | 35.14% | +| 1000000 | 50 | 477M | 100.00% | 30.71% | 30.17% | 30.32% | +| 1000000 | 100 | 986M | 100.00% | 31.63% | 30.96% | 31.11% | + +#### textql and octosql comparison against q-py3 run time (1M rows): +(>100% is slower than q-py3, <100% is faster than q-py3) + +| rows | columns | file size | avg q-py3 runtime | textql vs q-py3 runtime | octosql vs q-py3 runtime | +|:-------: |:-------: |:---------: |:-----------------: |:-----------------------: |:------------------------: | +| 1000000 | 1 | 17M | 100.00% | 70.67% | 1217.76% | +| 1000000 | 5 | 37M | 100.00% | 111.86% | 1040.70% | +| 1000000 | 10 | 89M | 100.00% | 135.80% | 915.28% | +| 1000000 | 20 | 192M | 100.00% | 160.67% | 777.92% | +| 1000000 | 50 | 477M | 100.00% | 177.26% | 727.40% | +| 1000000 | 100 | 986M | 100.00% | 181.19% | 703.15% | + +### Sensitivity to column count +Based on a the largest file size of 1,000,000 rows. + +![Sensitivity to column count](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=1585602598&format=image) + +### Sensitivity to line count (per column count) + +#### 1 Column Table +![1 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=1119350798&format=image) + +#### 5 Column Table +![5 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=599223098&format=image) + +#### 10 Column Table +![10 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=82695414&format=image) + +#### 20 Column Table +![20 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=1573199483&format=image) + +#### 50 Column Table +![50 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=448568670&format=image) + +#### 100 Column Table +![100 column table](https://docs.google.com/spreadsheets/d/e/2PACX-1vQy9Zm4I322Tdf5uoiFFJx6Oi3Z4AMq7He3fUUtsEQVQIdTGfWgjxFD6k8PAy9wBjvFkqaG26oBgNTP/pubchart?oid=2101488258&format=image) + +## Running the benchmark +Please note that the initial run generates large files, so you'd need more than 3GB of free space available. All the generated files reside in the `_benchmark_data/` folder. + +Part of the preparation flow will download the benchmark data as needed. + +### Preparations +* Prerequisites: + * pyenv installed + * pyenv-virtualenv installed + * [`textql`](https://github.com/dinedal/textql#install) + * [`octosql`](https://github.com/cube2222/octosql#installation) + +Run `./prepare-benchmark-env` + +### Execution +Run `./run-benchmark `. + +Benchmark output files will be written to `./benchmark-results///`. + +* `benchmark-id` is the id you wanna give the benchmark. +* `q-executable` is the name of the q executable being used for the benchmark. If none has been provided through Q_EXECUTABLE, then the value will be the last commit hash. Note that there is no checking of whether the working tree is clean. + +The summary of benchmark will be written to `./benchmark-results//summary.benchmark-results`` + +By default, the benchmark will use the source python files inside the project. If you wanna run it on one of the standalone binary executable, the set Q_EXECUTABLE to the full path of the q binary. + +For anyone helping with running the benchmark, don't use this parameter for now, just test against a clean checkout of the code using `./run-benchmark `. + +## Benchmark Development info +### Running against the standalone binary +* `./run-benchmark` can accept a second parameter with the q executable. If it gets this parameter, it will use this path for running q. This provides a way to test the standalone q binaries in the new packaging format. When this parameter does not exist, the benchmark is executed directly from the source code. + +### Updating the benchmark markdown document file +The results should reside in the following [google sheet](https://docs.google.com/spreadsheets/d/1Ljr8YIJwUQ5F4wr6ATga5Aajpu1CvQp1pe52KGrLkbY/edit?usp=sharing). + +add a new tab to the google sheet, and paste the content of `summary.benchmark-results` to the new sheet. + diff --git a/test/benchmark-config.sh b/test/benchmark-config.sh new file mode 100644 index 00000000..52cf71e9 --- /dev/null +++ b/test/benchmark-config.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +BENCHMARK_PYTHON_VERSIONS=(2.7.18 3.6.4 3.7.9 3.8.5) diff --git a/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/octosql_v0.3.0.benchmark-results b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/octosql_v0.3.0.benchmark-results new file mode 100644 index 00000000..ced04856 --- /dev/null +++ b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/octosql_v0.3.0.benchmark-results @@ -0,0 +1,48 @@ +lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 1 0.582091641426 0.0235290239617 +10 1 0.596219730377 0.0320124029461 +100 1 0.575977492332 0.0199296245316 +1000 1 0.56785056591 0.00846389017466 +10000 1 1.1466334343 0.00760108698846 +100000 1 5.49565172195 0.131791932977 +1000000 1 49.9513648033 0.443430523063 +lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 5 0.582160949707 0.0274409391571 +10 5 0.57046456337 0.0199413000359 +100 5 0.585747480392 0.0372543971623 +1000 5 0.572268772125 0.00384300349763 +10000 5 1.15530762672 0.0117990775856 +100000 5 6.10629923344 0.146711842919 +1000000 5 54.6851765394 0.315486399525 +lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 10 0.586222410202 0.0232479065914 +10 10 0.59000480175 0.0186508192447 +100 10 0.581873703003 0.0331332482772 +1000 10 0.569027900696 0.0103675493106 +10000 10 1.40067322254 0.00583352224401 +100000 10 7.30705575943 0.0165839217599 +1000000 10 65.3242264032 0.512552576414 +lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 20 0.571048212051 0.0166919396871 +10 20 0.594776701927 0.0368900941023 +100 20 0.561370825768 0.00907051791451 +1000 20 0.577527880669 0.00983965108957 +10000 20 1.90710241795 0.00757011452155 +100000 20 9.8267291069 0.127844155326 +1000000 20 83.9448960066 0.46121344046 +lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 50 0.572030115128 0.0253648479103 +10 50 0.56993534565 0.0230474303306 +100 50 0.563336873055 0.00964411866903 +1000 50 0.826378440857 0.00941629472813 +10000 50 3.27872717381 0.126592845956 +100000 50 17.890055728 0.116794666005 +1000000 50 158.262442636 0.826290454446 +lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 100 0.569358110428 0.0279801762531 +10 100 0.580981063843 0.0272341107532 +100 100 0.559471726418 0.00668155858429 +1000 100 1.08161640167 0.00698594638512 +10000 100 5.67823712826 0.0123398407167 +100000 100 32.2797194242 0.315508270241 +1000000 100 289.582628798 0.929455236817 diff --git a/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-2.7.18.benchmark-results b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-2.7.18.benchmark-results new file mode 100644 index 00000000..5b7aa05a --- /dev/null +++ b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-2.7.18.benchmark-results @@ -0,0 +1,48 @@ +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev +1 1 0.106449890137 0.002010027753 +10 1 0.106737875938 0.00224112203891 +100 1 0.107839012146 0.00102954061006 +1000 1 0.113026666641 0.00147361890226 +10000 1 0.160376381874 0.00569766179806 +100000 1 0.608236479759 0.00604026519608 +1000000 1 5.14807910919 0.0584474028762 +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev +1 5 0.106719517708 0.00236752032369 +10 5 0.107823801041 0.00238873169438 +100 5 0.109785079956 0.0013047675259 +1000 5 0.120395207405 0.00207224422629 +10000 5 0.21783041954 0.00522254475716 +100000 5 1.17115747929 0.0221394865225 +1000000 5 10.6830974817 0.339822977934 +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev +1 10 0.104981088638 0.00166552032929 +10 10 0.108320140839 0.00204034349199 +100 10 0.112528729439 0.00168376477305 +1000 10 0.13019015789 0.00253773120965 +10000 10 0.284891676903 0.00384009140782 +100000 10 1.84725661278 0.00860738744089 +1000000 10 17.5610994339 0.228322442172 +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev +1 20 0.106477689743 0.00254429925697 +10 20 0.108580899239 0.00173704653824 +100 20 0.118750286102 0.00247623639866 +1000 20 0.146431708336 0.00249685551944 +10000 20 0.419492387772 0.00248210434668 +100000 20 3.15847921371 0.0550301268026 +1000000 20 30.279082489 0.124978814506 +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev +1 50 0.105411934853 0.00171651054128 +10 50 0.109102797508 0.00111620290512 +100 50 0.135682177544 0.00196166766665 +1000 50 0.198261427879 0.00396172489054 +10000 50 0.821499919891 0.0111642692132 +100000 50 7.05980975628 0.121182371277 +1000000 50 71.5645889759 5.02009516291 +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev +1 100 0.10662381649 0.00193146624495 +10 100 0.110662698746 0.00171461379583 +100 100 0.163547992706 0.00166570196628 +1000 100 0.280023741722 0.00337543024145 +10000 100 1.46053376198 0.0221691284465 +100000 100 13.2369835854 0.309375896258 +1000000 100 131.864977288 1.22415449691 diff --git a/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.6.4.benchmark-results b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.6.4.benchmark-results new file mode 100644 index 00000000..e611b7a5 --- /dev/null +++ b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.6.4.benchmark-results @@ -0,0 +1,48 @@ +lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev +1 1 0.10342762470245362 0.0017673875851759295 +10 1 0.10239293575286865 0.0012505611685910795 +100 1 0.10317318439483643 0.0010581783881541751 +1000 1 0.10687050819396973 0.0014050135772919004 +10000 1 0.1447664737701416 0.001841256227287192 +100000 1 0.5162809371948243 0.006962985088492867 +1000000 1 4.238853335380554 0.04834401143632507 +lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev +1 5 0.10211825370788574 0.0022568191323651568 +10 5 0.1025341272354126 0.0016446470901070106 +100 5 0.1053577184677124 0.0015298114223855884 +1000 5 0.10980842113494874 0.002536098780902228 +10000 5 0.1590113162994385 0.003123074098301634 +100000 5 0.6348223447799682 0.0082691507829872 +1000000 5 5.368562030792236 0.11628913334105236 +lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev +1 10 0.10251858234405517 0.0015963869535345293 +10 10 0.10278875827789306 0.0009920577082124496 +100 10 0.10715732574462891 0.002033320000941064 +1000 10 0.11389360427856446 0.0023603847702423973 +10000 10 0.17806434631347656 0.001114054252191835 +100000 10 0.8252989768981933 0.0037080843359275904 +1000000 10 7.252838873863221 0.029052130546213153 +lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev +1 20 0.10367965698242188 0.003661761341842434 +10 20 0.10489590167999267 0.001977141196109372 +100 20 0.11108210086822509 0.0014801173497056886 +1000 20 0.12110791206359864 0.001648524669420912 +10000 20 0.2178968906402588 0.0019298316207276716 +100000 20 1.1962245225906372 0.010541407803235559 +1000000 20 10.956057572364807 0.12677108174061705 +lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev +1 50 0.10458300113677979 0.0016367630302744722 +10 50 0.10616152286529541 0.002345135740908088 +100 50 0.12375867366790771 0.00238414904864133 +1000 50 0.14462883472442628 0.0022428030896492978 +10000 50 0.34488487243652344 0.004867441221052092 +100000 50 2.3394312858581543 0.02263239858944125 +1000000 50 21.979821610450745 0.09080404939303836 +lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev +1 100 0.10372309684753418 0.0010299126833031144 +10 100 0.10784556865692138 0.0016557634029464607 +100 100 0.14526791572570802 0.0028194506905186724 +1000 100 0.18315494060516357 0.0023585311962114673 +10000 100 0.5586131334304809 0.004808492789681402 +100000 100 4.287398314476013 0.00957500108409644 +1000000 100 41.706851434707644 0.4161526076289425 diff --git a/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.7.9.benchmark-results b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.7.9.benchmark-results new file mode 100644 index 00000000..7a1f7715 --- /dev/null +++ b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.7.9.benchmark-results @@ -0,0 +1,48 @@ +lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev +1 1 0.08099310398101807 0.001417385651688644 +10 1 0.0822291374206543 0.0014809900020001858 +100 1 0.08169686794281006 0.002108157069167563 +1000 1 0.08690853118896484 0.0012595326919263487 +10000 1 0.12215542793273926 0.0020152625320395434 +100000 1 0.4825761795043945 0.0050418000028856335 +1000000 1 4.084399747848511 0.027731958079814215 +lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev +1 5 0.0817826271057129 0.002665533758836163 +10 5 0.08261749744415284 0.0019205430658525572 +100 5 0.08472237586975098 0.002571239449841039 +1000 5 0.08973510265350342 0.002323797583077552 +10000 5 0.13746986389160157 0.001964971666036654 +100000 5 0.60649254322052 0.007131635266871318 +1000000 5 5.2585612535476685 0.05661789407928516 +lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev +1 10 0.08112843036651611 0.002251300165899426 +10 10 0.08175232410430908 0.0014557171018568637 +100 10 0.08572309017181397 0.0019643550214810675 +1000 10 0.09268453121185302 0.001816414236580489 +10000 10 0.15538835525512695 0.0024978076091814994 +100000 10 0.7879442930221557 0.009412516078916211 +1000000 10 7.146207928657532 0.06659760176757985 +lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev +1 20 0.08142082691192627 0.001304584466639188 +10 20 0.08197519779205323 0.0014842098503865223 +100 20 0.08949971199035645 0.0009937446141285785 +1000 20 0.09955930709838867 0.0013978961740806384 +10000 20 0.1966566801071167 0.0028489273218240147 +100000 20 1.1518636226654053 0.006410720031542237 +1000000 20 10.776052689552307 0.04739925571001746 +lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev +1 50 0.08237688541412354 0.0016494314799953837 +10 50 0.08519520759582519 0.002610550182895596 +100 50 0.10423583984375 0.0018808335751867933 +1000 50 0.12195603847503662 0.0023611894043373983 +10000 50 0.3163540124893188 0.002761333651520998 +100000 50 2.237372374534607 0.009955353920396077 +1000000 50 21.59097549915314 0.081188190530421 +lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev +1 100 0.08336784839630126 0.0013840724401561887 +10 100 0.0864112138748169 0.0017946939354350697 +100 100 0.12199611663818359 0.0013003743156634682 +1000 100 0.15871686935424806 0.0035993681064501234 +10000 100 0.5243751525878906 0.004370273273595629 +100000 100 4.175828623771667 0.016127303710583043 +1000000 100 40.82292411327362 0.12328165162380703 diff --git a/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.8.5.benchmark-results b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.8.5.benchmark-results new file mode 100644 index 00000000..ca8c87ad --- /dev/null +++ b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/q-benchmark-3.8.5.benchmark-results @@ -0,0 +1,48 @@ +lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev +1 1 0.10138180255889892 0.0017947074090971444 +10 1 0.10056869983673096 0.003442371291904885 +100 1 0.10126984119415283 0.0016392348107127808 +1000 1 0.10484635829925537 0.0019743937339163262 +10000 1 0.1400548219680786 0.0024523366133394117 +100000 1 0.4901275157928467 0.003970374711691596 +1000000 1 3.982502889633179 0.045292138461945054 +lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev +1 5 0.09946837425231933 0.0018876161478998787 +10 5 0.099178147315979 0.0014194733014858227 +100 5 0.10171806812286377 0.0017580984705406846 +1000 5 0.10602672100067138 0.002000261880840017 +10000 5 0.15207929611206056 0.0015802680033212048 +100000 5 0.609218978881836 0.006150144273259608 +1000000 5 5.13688440322876 0.03649575898109647 +lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev +1 10 0.09925477504730225 0.002168389758635997 +10 10 0.09943633079528809 0.0016154501074880502 +100 10 0.10376312732696533 0.0017275485891005433 +1000 10 0.11087138652801513 0.0016934328033239559 +10000 10 0.17246220111846924 0.0023824485659318527 +100000 10 0.7999232530593872 0.003442975393506892 +1000000 10 7.012071299552917 0.059217904448851263 +lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev +1 20 0.10027089118957519 0.0020291529595204906 +10 20 0.10038816928863525 0.001957086760826999 +100 20 0.10723590850830078 0.0013833918448622436 +1000 20 0.11735000610351562 0.0020318895390750882 +10000 20 0.21264209747314453 0.00482341642419078 +100000 20 1.1567201137542724 0.002987096441878969 +1000000 20 10.640758633613586 0.06116581724028616 +lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev +1 50 0.10066506862640381 0.002051307639276982 +10 50 0.10588631629943848 0.0035835389655972105 +100 50 0.11841504573822022 0.001608174845404568 +1000 50 0.14032282829284667 0.002640027148889162 +10000 50 0.33160474300384524 0.0027796660009712947 +100000 50 2.258401036262512 0.011041280982383895 +1000000 50 21.70080256462097 0.15897944629180621 +lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev +1 100 0.10147004127502442 0.0021285682695135768 +10 100 0.10471885204315186 0.001248479289219899 +100 100 0.13894760608673096 0.002307980025026551 +1000 100 0.17586205005645753 0.0023822296091426 +10000 100 0.5414002418518067 0.0036291866664635458 +100000 100 4.222555088996887 0.08562968951916528 +1000000 100 41.021552324295044 0.16033566363076862 diff --git a/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/summary.benchmark-results b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/summary.benchmark-results new file mode 100644 index 00000000..dcb1d280 --- /dev/null +++ b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/summary.benchmark-results @@ -0,0 +1,48 @@ +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 1 0.106449890137 0.002010027753 1 1 0.10342762470245362 0.0017673875851759295 1 1 0.08099310398101807 0.001417385651688644 1 1 0.10138180255889892 0.0017947074090971444 1 1 0.0196103572845 0.00207355214257 1 1 0.582091641426 0.0235290239617 +10 1 0.106737875938 0.00224112203891 10 1 0.10239293575286865 0.0012505611685910795 10 1 0.0822291374206543 0.0014809900020001858 10 1 0.10056869983673096 0.003442371291904885 10 1 0.0186784029007 0.000970810220668 10 1 0.596219730377 0.0320124029461 +100 1 0.107839012146 0.00102954061006 100 1 0.10317318439483643 0.0010581783881541751 100 1 0.08169686794281006 0.002108157069167563 100 1 0.10126984119415283 0.0016392348107127808 100 1 0.019472026825 0.00181951524514 100 1 0.575977492332 0.0199296245316 +1000 1 0.113026666641 0.00147361890226 1000 1 0.10687050819396973 0.0014050135772919004 1000 1 0.08690853118896484 0.0012595326919263487 1000 1 0.10484635829925537 0.0019743937339163262 1000 1 0.022180891037 0.00116649968967 1000 1 0.56785056591 0.00846389017466 +10000 1 0.160376381874 0.00569766179806 10000 1 0.1447664737701416 0.001841256227287192 10000 1 0.12215542793273926 0.0020152625320395434 10000 1 0.1400548219680786 0.0024523366133394117 10000 1 0.051066827774 0.0018168767618 10000 1 1.1466334343 0.00760108698846 +100000 1 0.608236479759 0.00604026519608 100000 1 0.5162809371948243 0.006962985088492867 100000 1 0.4825761795043945 0.0050418000028856335 100000 1 0.4901275157928467 0.003970374711691596 100000 1 0.307463979721 0.00246268029188 100000 1 5.49565172195 0.131791932977 +1000000 1 5.14807910919 0.0584474028762 1000000 1 4.238853335380554 0.04834401143632507 1000000 1 4.084399747848511 0.027731958079814215 1000000 1 3.982502889633179 0.045292138461945054 1000000 1 2.89862303734 0.022182722976 1000000 1 49.9513648033 0.443430523063 +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 5 0.106719517708 0.00236752032369 1 5 0.10211825370788574 0.0022568191323651568 1 5 0.0817826271057129 0.002665533758836163 1 5 0.09946837425231933 0.0018876161478998787 1 5 0.0195286750793 0.0017840569109 1 5 0.582160949707 0.0274409391571 +10 5 0.107823801041 0.00238873169438 10 5 0.1025341272354126 0.0016446470901070106 10 5 0.08261749744415284 0.0019205430658525572 10 5 0.099178147315979 0.0014194733014858227 10 5 0.0183676958084 0.000925251595491 10 5 0.57046456337 0.0199413000359 +100 5 0.109785079956 0.0013047675259 100 5 0.1053577184677124 0.0015298114223855884 100 5 0.08472237586975098 0.002571239449841039 100 5 0.10171806812286377 0.0017580984705406846 100 5 0.0199447393417 0.000907007099218 100 5 0.585747480392 0.0372543971623 +1000 5 0.120395207405 0.00207224422629 1000 5 0.10980842113494874 0.002536098780902228 1000 5 0.08973510265350342 0.002323797583077552 1000 5 0.10602672100067138 0.002000261880840017 1000 5 0.0263328790665 0.00165486505938 1000 5 0.572268772125 0.00384300349763 +10000 5 0.21783041954 0.00522254475716 10000 5 0.1590113162994385 0.003123074098301634 10000 5 0.13746986389160157 0.001964971666036654 10000 5 0.15207929611206056 0.0015802680033212048 10000 5 0.0826982736588 0.00152451583229 10000 5 1.15530762672 0.0117990775856 +100000 5 1.17115747929 0.0221394865225 100000 5 0.6348223447799682 0.0082691507829872 100000 5 0.60649254322052 0.007131635266871318 100000 5 0.609218978881836 0.006150144273259608 100000 5 0.60660867691 0.00395761320274 100000 5 6.10629923344 0.146711842919 +1000000 5 10.6830974817 0.339822977934 1000000 5 5.368562030792236 0.11628913334105236 1000000 5 5.2585612535476685 0.05661789407928516 1000000 5 5.13688440322876 0.03649575898109647 1000000 5 5.87811236382 0.0304332294491 1000000 5 54.6851765394 0.315486399525 +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 10 0.104981088638 0.00166552032929 1 10 0.10251858234405517 0.0015963869535345293 1 10 0.08112843036651611 0.002251300165899426 1 10 0.09925477504730225 0.002168389758635997 1 10 0.0191783189774 0.00107718516178 1 10 0.586222410202 0.0232479065914 +10 10 0.108320140839 0.00204034349199 10 10 0.10278875827789306 0.0009920577082124496 10 10 0.08175232410430908 0.0014557171018568637 10 10 0.09943633079528809 0.0016154501074880502 10 10 0.0185215950012 0.000840353961363 10 10 0.59000480175 0.0186508192447 +100 10 0.112528729439 0.00168376477305 100 10 0.10715732574462891 0.002033320000941064 100 10 0.08572309017181397 0.0019643550214810675 100 10 0.10376312732696533 0.0017275485891005433 100 10 0.0209223031998 0.00164494657684 100 10 0.581873703003 0.0331332482772 +1000 10 0.13019015789 0.00253773120965 1000 10 0.11389360427856446 0.0023603847702423973 1000 10 0.09268453121185302 0.001816414236580489 1000 10 0.11087138652801513 0.0016934328033239559 1000 10 0.0309282779694 0.00110848590345 1000 10 0.569027900696 0.0103675493106 +10000 10 0.284891676903 0.00384009140782 10000 10 0.17806434631347656 0.001114054252191835 10000 10 0.15538835525512695 0.0024978076091814994 10000 10 0.17246220111846924 0.0023824485659318527 10000 10 0.121016025543 0.00105071105139 10000 10 1.40067322254 0.00583352224401 +100000 10 1.84725661278 0.00860738744089 100000 10 0.8252989768981933 0.0037080843359275904 100000 10 0.7879442930221557 0.009412516078916211 100000 10 0.7999232530593872 0.003442975393506892 100000 10 0.987622976303 0.00699348302979 100000 10 7.30705575943 0.0165839217599 +1000000 10 17.5610994339 0.228322442172 1000000 10 7.252838873863221 0.029052130546213153 1000000 10 7.146207928657532 0.06659760176757985 1000000 10 7.012071299552917 0.059217904448851263 1000000 10 9.69240145683 0.0354453778052 1000000 10 65.3242264032 0.512552576414 +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 20 0.106477689743 0.00254429925697 1 20 0.10367965698242188 0.003661761341842434 1 20 0.08142082691192627 0.001304584466639188 1 20 0.10027089118957519 0.0020291529595204906 1 20 0.0202306985855 0.00159619251952 1 20 0.571048212051 0.0166919396871 +10 20 0.108580899239 0.00173704653824 10 20 0.10489590167999267 0.001977141196109372 10 20 0.08197519779205323 0.0014842098503865223 10 20 0.10038816928863525 0.001957086760826999 10 20 0.0187650680542 0.000845692486156 10 20 0.594776701927 0.0368900941023 +100 20 0.118750286102 0.00247623639866 100 20 0.11108210086822509 0.0014801173497056886 100 20 0.08949971199035645 0.0009937446141285785 100 20 0.10723590850830078 0.0013833918448622436 100 20 0.0211876153946 0.000993808448942 100 20 0.561370825768 0.00907051791451 +1000 20 0.146431708336 0.00249685551944 1000 20 0.12110791206359864 0.001648524669420912 1000 20 0.09955930709838867 0.0013978961740806384 1000 20 0.11735000610351562 0.0020318895390750882 1000 20 0.0404737234116 0.00122415059261 1000 20 0.577527880669 0.00983965108957 +10000 20 0.419492387772 0.00248210434668 10000 20 0.2178968906402588 0.0019298316207276716 10000 20 0.1966566801071167 0.0028489273218240147 10000 20 0.21264209747314453 0.00482341642419078 10000 20 0.197762489319 0.00198188642677 10000 20 1.90710241795 0.00757011452155 +100000 20 3.15847921371 0.0550301268026 100000 20 1.1962245225906372 0.010541407803235559 100000 20 1.1518636226654053 0.006410720031542237 100000 20 1.1567201137542724 0.002987096441878969 100000 20 1.75432097912 0.00692372147543 100000 20 9.8267291069 0.127844155326 +1000000 20 30.279082489 0.124978814506 1000000 20 10.956057572364807 0.12677108174061705 1000000 20 10.776052689552307 0.04739925571001746 1000000 20 10.640758633613586 0.06116581724028616 1000000 20 17.3383012295 0.0410164637448 1000000 20 83.9448960066 0.46121344046 +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 50 0.105411934853 0.00171651054128 1 50 0.10458300113677979 0.0016367630302744722 1 50 0.08237688541412354 0.0016494314799953837 1 50 0.10066506862640381 0.002051307639276982 1 50 0.0205577373505 0.00133922342068 1 50 0.572030115128 0.0253648479103 +10 50 0.109102797508 0.00111620290512 10 50 0.10616152286529541 0.002345135740908088 10 50 0.08519520759582519 0.002610550182895596 10 50 0.10588631629943848 0.0035835389655972105 10 50 0.0195438146591 0.000791630611893 10 50 0.56993534565 0.0230474303306 +100 50 0.135682177544 0.00196166766665 100 50 0.12375867366790771 0.00238414904864133 100 50 0.10423583984375 0.0018808335751867933 100 50 0.11841504573822022 0.001608174845404568 100 50 0.0246078014374 0.00108949795701 100 50 0.563336873055 0.00964411866903 +1000 50 0.198261427879 0.00396172489054 1000 50 0.14462883472442628 0.0022428030896492978 1000 50 0.12195603847503662 0.0023611894043373983 1000 50 0.14032282829284667 0.002640027148889162 1000 50 0.063302564621 0.00058195987294 1000 50 0.826378440857 0.00941629472813 +10000 50 0.821499919891 0.0111642692132 10000 50 0.34488487243652344 0.004867441221052092 10000 50 0.3163540124893188 0.002761333651520998 10000 50 0.33160474300384524 0.0027796660009712947 10000 50 0.410061001778 0.00294901155085 10000 50 3.27872717381 0.126592845956 +100000 50 7.05980975628 0.121182371277 100000 50 2.3394312858581543 0.02263239858944125 100000 50 2.237372374534607 0.009955353920396077 100000 50 2.258401036262512 0.011041280982383895 100000 50 3.87797718048 0.0123467913678 100000 50 17.890055728 0.116794666005 +1000000 50 71.5645889759 5.02009516291 1000000 50 21.979821610450745 0.09080404939303836 1000000 50 21.59097549915314 0.081188190530421 1000000 50 21.70080256462097 0.15897944629180621 1000000 50 38.5674883366 0.0602820291386 1000000 50 158.262442636 0.826290454446 +lines columns q-benchmark-2.7.18_mean q-benchmark-2.7.18_stddev lines columns q-benchmark-3.6.4_mean q-benchmark-3.6.4_stddev lines columns q-benchmark-3.7.9_mean q-benchmark-3.7.9_stddev lines columns q-benchmark-3.8.5_mean q-benchmark-3.8.5_stddev lines columns textql_2.0.3_mean textql_2.0.3_stddev lines columns octosql_v0.3.0_mean octosql_v0.3.0_stddev +1 100 0.10662381649 0.00193146624495 1 100 0.10372309684753418 0.0010299126833031144 1 100 0.08336784839630126 0.0013840724401561887 1 100 0.10147004127502442 0.0021285682695135768 1 100 0.0216581106186 0.00103280947157 1 100 0.569358110428 0.0279801762531 +10 100 0.110662698746 0.00171461379583 10 100 0.10784556865692138 0.0016557634029464607 10 100 0.0864112138748169 0.0017946939354350697 10 100 0.10471885204315186 0.001248479289219899 10 100 0.021723818779 0.000920429257416 10 100 0.580981063843 0.0272341107532 +100 100 0.163547992706 0.00166570196628 100 100 0.14526791572570802 0.0028194506905186724 100 100 0.12199611663818359 0.0013003743156634682 100 100 0.13894760608673096 0.002307980025026551 100 100 0.0299471855164 0.00130217326679 100 100 0.559471726418 0.00668155858429 +1000 100 0.280023741722 0.00337543024145 1000 100 0.18315494060516357 0.0023585311962114673 1000 100 0.15871686935424806 0.0035993681064501234 1000 100 0.17586205005645753 0.0023822296091426 1000 100 0.0996923923492 0.00155352212734 1000 100 1.08161640167 0.00698594638512 +10000 100 1.46053376198 0.0221691284465 10000 100 0.5586131334304809 0.004808492789681402 10000 100 0.5243751525878906 0.004370273273595629 10000 100 0.5414002418518067 0.0036291866664635458 10000 100 0.767001605034 0.00328944029633 10000 100 5.67823712826 0.0123398407167 +100000 100 13.2369835854 0.309375896258 100000 100 4.287398314476013 0.00957500108409644 100000 100 4.175828623771667 0.016127303710583043 100000 100 4.222555088996887 0.08562968951916528 100000 100 7.46734063625 0.0262039846119 100000 100 32.2797194242 0.315508270241 +1000000 100 131.864977288 1.22415449691 1000000 100 41.706851434707644 0.4161526076289425 1000000 100 40.82292411327362 0.12328165162380703 1000000 100 41.021552324295044 0.16033566363076862 1000000 100 74.6216712952 0.0994037504394 1000000 100 289.582628798 0.929455236817 diff --git a/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/textql_2.0.3.benchmark-results b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/textql_2.0.3.benchmark-results new file mode 100644 index 00000000..f98760cd --- /dev/null +++ b/test/benchmark-results/source-files-1443b7418b46594ad256abd9db4a7671cb251e6a/2020-09-17-v2.0.17/textql_2.0.3.benchmark-results @@ -0,0 +1,48 @@ +lines columns textql_2.0.3_mean textql_2.0.3_stddev +1 1 0.0196103572845 0.00207355214257 +10 1 0.0186784029007 0.000970810220668 +100 1 0.019472026825 0.00181951524514 +1000 1 0.022180891037 0.00116649968967 +10000 1 0.051066827774 0.0018168767618 +100000 1 0.307463979721 0.00246268029188 +1000000 1 2.89862303734 0.022182722976 +lines columns textql_2.0.3_mean textql_2.0.3_stddev +1 5 0.0195286750793 0.0017840569109 +10 5 0.0183676958084 0.000925251595491 +100 5 0.0199447393417 0.000907007099218 +1000 5 0.0263328790665 0.00165486505938 +10000 5 0.0826982736588 0.00152451583229 +100000 5 0.60660867691 0.00395761320274 +1000000 5 5.87811236382 0.0304332294491 +lines columns textql_2.0.3_mean textql_2.0.3_stddev +1 10 0.0191783189774 0.00107718516178 +10 10 0.0185215950012 0.000840353961363 +100 10 0.0209223031998 0.00164494657684 +1000 10 0.0309282779694 0.00110848590345 +10000 10 0.121016025543 0.00105071105139 +100000 10 0.987622976303 0.00699348302979 +1000000 10 9.69240145683 0.0354453778052 +lines columns textql_2.0.3_mean textql_2.0.3_stddev +1 20 0.0202306985855 0.00159619251952 +10 20 0.0187650680542 0.000845692486156 +100 20 0.0211876153946 0.000993808448942 +1000 20 0.0404737234116 0.00122415059261 +10000 20 0.197762489319 0.00198188642677 +100000 20 1.75432097912 0.00692372147543 +1000000 20 17.3383012295 0.0410164637448 +lines columns textql_2.0.3_mean textql_2.0.3_stddev +1 50 0.0205577373505 0.00133922342068 +10 50 0.0195438146591 0.000791630611893 +100 50 0.0246078014374 0.00108949795701 +1000 50 0.063302564621 0.00058195987294 +10000 50 0.410061001778 0.00294901155085 +100000 50 3.87797718048 0.0123467913678 +1000000 50 38.5674883366 0.0602820291386 +lines columns textql_2.0.3_mean textql_2.0.3_stddev +1 100 0.0216581106186 0.00103280947157 +10 100 0.021723818779 0.000920429257416 +100 100 0.0299471855164 0.00130217326679 +1000 100 0.0996923923492 0.00155352212734 +10000 100 0.767001605034 0.00328944029633 +100000 100 7.46734063625 0.0262039846119 +1000000 100 74.6216712952 0.0994037504394 diff --git a/test/prepare-benchmark-env b/test/prepare-benchmark-env new file mode 100755 index 00000000..397a290d --- /dev/null +++ b/test/prepare-benchmark-env @@ -0,0 +1,44 @@ +#!/bin/bash + +set -e + +eval "$(pyenv init -)" +eval "$(pyenv virtualenv-init -)" + +source benchmark-config.sh + +if [ ! -f ./benchmark_data.tar.gz ]; +then + echo benchmark data not found. downloading it + curl "https://s3.amazonaws.com/harelba-q-public/benchmark_data.tar.gz" -o ./benchmark_data.tar.gz +else + echo no need to download benchmark data +fi + +if [ ! -d ./_benchmark_data ]; +then + echo extracting benchmark data + tar xvfz benchmark_data.tar.gz + echo benchmark data is ready +else + echo no need to extract benchmark data +fi + +for ver in "${BENCHMARK_PYTHON_VERSIONS[@]}" +do + echo installing $ver + pyenv install -s $ver + + venv_name=q-benchmark-$ver + echo create venv $venv_name + pyenv virtualenv -f $ver $venv_name + echo activate venv $venv_name + pyenv activate $venv_name + pyenv version + echo installing requirements $venv_name + pip install -r ../requirements.txt + echo deactivating $venv_name + pyenv deactivate +done + + diff --git a/test/run-benchmark b/test/run-benchmark new file mode 100755 index 00000000..a1c6ff21 --- /dev/null +++ b/test/run-benchmark @@ -0,0 +1,77 @@ +#!/bin/bash + +# Usage: ./run-benchmark.sh +set -e + +get_abs_filename() { + # $1 : relative filename + echo "$(cd "$(dirname "$1")" && pwd)/$(basename "$1")" +} + +eval "$(pyenv init -)" +eval "$(pyenv virtualenv-init -)" + +if [ "x$1" == "x" ]; +then + echo Benchmark id must be provided as a parameter + exit 1 +fi +Q_BENCHMARK_ID=$1 + +if [ "x$2" == "x" ]; +then + EFFECTIVE_Q_EXECUTABLE="source-files-$(git rev-parse HEAD)" +else + ABS_Q_EXECUTABLE="$(get_abs_filename $2)" + export Q_EXECUTABLE=$ABS_Q_EXECUTABLE + if [ ! -f $ABS_Q_EXECUTABLE ] + then + echo "q executable must exist ($ABS_Q_EXECUTABLE)" + exit 1 + fi + EFFECTIVE_Q_EXECUTABLE="${ABS_Q_EXECUTABLE//\//__}" +fi + +echo "Q executable to use is $EFFECTIVE_Q_EXECUTABLE" + +# Must be provided to the benchmark code so it knows where to write the results to +export Q_BENCHMARK_RESULTS_FOLDER="./benchmark-results/${EFFECTIVE_Q_EXECUTABLE}/${Q_BENCHMARK_ID}/" +echo Benchmark results folder is $Q_BENCHMARK_RESULTS_FOLDER +mkdir -p $Q_BENCHMARK_RESULTS_FOLDER + +source benchmark-config.sh + +ALL_FILES=() + +for ver in "${BENCHMARK_PYTHON_VERSIONS[@]}" +do +venv_name=q-benchmark-$ver +echo activating $venv_name +pyenv activate $venv_name +echo "==== testing inside $venv_name ===" +./test-all BenchmarkTests.test_q_matrix -v +RESULT_FILE="${Q_BENCHMARK_RESULTS_FOLDER}/$venv_name.benchmark-results" +echo "==== Done. Results are in $RESULT_FILE" +ALL_FILES[${#ALL_FILES[@]}]="$RESULT_FILE" +echo "Deactivating" +pyenv deactivate +done + +echo "==== testing textql ===" +./test-all BenchmarkTests.test_textql_matrix -v +RESULT_FILE="textql*.benchmark-results" +ALL_FILES[${#ALL_FILES[@]}]="${Q_BENCHMARK_RESULTS_FOLDER}/$RESULT_FILE" +echo "Done. Results are in textql.benchmark-results" + +echo "==== testing octosql ===" +./test-all BenchmarkTests.test_octosql_matrix -v +RESULT_FILE="octosql*.benchmark-results" +ALL_FILES[${#ALL_FILES[@]}]="${Q_BENCHMARK_RESULTS_FOLDER}/$RESULT_FILE" +echo "Done. Results are in octosql.benchmark-results" + +summary_file="$Q_BENCHMARK_RESULTS_FOLDER/summary.benchmark-results" + +rm -vf $summary_file + +paste ${ALL_FILES[*]} > $summary_file +echo "Done. final results file is $summary_file" diff --git a/test/test-suite b/test/test-suite index b44b357c..5628e6cf 100755 --- a/test/test-suite +++ b/test/test-suite @@ -10,6 +10,7 @@ # in order to test the resulting binary executables as well, instead of just executing the q python source code. # +from __future__ import print_function import unittest import random import json @@ -24,7 +25,7 @@ import pprint import six from six.moves import range import codecs - +import itertools sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin')) from q import QTextAsData,QOutput,QOutputPrinter,QInputParams @@ -2599,6 +2600,195 @@ class BasicModuleTests(AbstractQTestCase): self.assertTrue(table_structure.materialized_files['my_data'].filename,'my_data') self.assertTrue(table_structure.materialized_files['my_data'].is_stdin) + +class BenchmarkAttemptResults(object): + def __init__(self, attempt, lines, columns, duration,return_code): + self.attempt = attempt + self.lines = lines + self.columns = columns + self.duration = duration + self.return_code = return_code + + def __str__(self): + return "{}".format(self.__dict__) + __repr__ = __str__ + +class BenchmarkResults(object): + def __init__(self, lines, columns, attempt_results, mean, stddev): + self.lines = lines + self.columns = columns + self.attempt_results = attempt_results + self.mean = mean + self.stddev = stddev + + def __str__(self): + return "{}".format(self.__dict__) + __repr__ = __str__ + +class BenchmarkTests(AbstractQTestCase): + + BENCHMARK_DIR = './_benchmark_data' + + def _ensure_benchmark_data_dir_exists(self): + try: + os.mkdir(BenchmarkTests.BENCHMARK_DIR) + except Exception as e: + pass + + def _create_benchmark_file_if_needed(self): + self._ensure_benchmark_data_dir_exists() + + if os.path.exists('{}/benchmark-file.csv'.format(BenchmarkTests.BENCHMARK_DIR)): + return + + g = GzipFile('unit-file.csv.gz') + d = g.read().decode('utf-8') + f = open('{}/benchmark-file.csv'.format(BenchmarkTests.BENCHMARK_DIR), 'w') + for i in range(100): + f.write(d) + f.close() + + def _prepare_test_file(self, lines, columns): + + filename = '{}/_benchmark_data__lines_{}_columns_{}.csv'.format(BenchmarkTests.BENCHMARK_DIR,lines, columns) + + if os.path.exists(filename): + return filename + + c = ['c{}'.format(x + 1) for x in range(columns)] + + # write a header line + ff = open(filename,'w') + ff.write(",".join(c)) + ff.write('\n') + ff.close() + + r, o, e = run_command('head -{} {}/benchmark-file.csv | ' + Q_EXECUTABLE + ' -d , "select {} from -" >> {}'.format(lines, BenchmarkTests.BENCHMARK_DIR, ','.join(c), filename)) + self.assertEqual(r, 0) + return filename + + def _decide_result(self,attempt_results): + + failed = list(filter(lambda a: a.return_code != 0,attempt_results)) + + if len(failed) == 0: + mean = sum([x.duration for x in attempt_results]) / len(attempt_results) + sum_squared = sum([(x.duration - mean)**2 for x in attempt_results]) + ddof = 0 + pvar = sum_squared / (len(attempt_results) - ddof) + stddev = pvar ** 0.5 + else: + mean = None + stddev = None + + return BenchmarkResults( + attempt_results[0].lines, + attempt_results[0].columns, + attempt_results, + mean, + stddev + ) + + def _perform_test_performance_matrix(self,name,generate_cmd_function): + results = [] + + benchmark_results_folder = os.environ.get("Q_BENCHMARK_RESULTS_FOLDER",'') + if benchmark_results_folder == "": + raise Exception("Q_BENCHMARK_RESULTS_FOLDER must be provided as an environment variable") + + self._create_benchmark_file_if_needed() + for columns in [1, 5, 10, 20, 50, 100]: + for lines in [1, 10, 100, 1000, 10000, 100000, 1000000]: + attempt_results = [] + for attempt in range(10): + filename = self._prepare_test_file(lines, columns) + if DEBUG: + print("Testing {}".format(filename)) + t0 = time.time() + r, o, e = run_command(generate_cmd_function(filename,lines,columns)) + duration = time.time() - t0 + attempt_result = BenchmarkAttemptResults(attempt, lines, columns, duration, r) + attempt_results += [attempt_result] + if DEBUG: + print("Results: {}".format(attempt_result.__dict__)) + final_result = self._decide_result(attempt_results) + results += [final_result] + + series_fields = [six.u('lines'),six.u('columns')] + value_fields = [six.u('mean'),six.u('stddev')] + + all_fields = series_fields + value_fields + + output_filename = '{}/{}.benchmark-results'.format(benchmark_results_folder,name) + output_file = open(output_filename,'w') + for columns,g in itertools.groupby(sorted(results,key=lambda x:x.columns),key=lambda x:x.columns): + x = six.u("\t").join(series_fields + [six.u('{}_{}').format(name, f) for f in value_fields]) + print(x,file = output_file) + for result in g: + print(six.u("\t").join(map(str,[getattr(result,f) for f in all_fields])),file=output_file) + output_file.close() + + print("results have been written to : {}".format(output_filename)) + if DEBUG: + print("RESULTS FOR {}".format(name)) + print(open(output_filename,'r').read()) + + def test_q_matrix(self): + venv = os.path.basename(os.environ.get('VIRTUAL_ENV') or 'unknown-virtual-env') + + def generate_q_cmd(data_filename,line_count,column_count): + if column_count == 1: + additional_params = '-c 1' + else: + additional_params = '' + return '{} -d , {} "select count(*) from {}"'.format(Q_EXECUTABLE,additional_params, data_filename) + self._perform_test_performance_matrix(venv,generate_q_cmd) + + def _get_textql_version(self): + r,o,e = run_command("textql --version") + if r != 0: + raise Exception("Could not find textql") + if len(e) != 0: + raise Exception("Errors while getting textql version") + return o[0] + + def _get_octosql_version(self): + r,o,e = run_command("octosql --version") + if r != 0: + raise Exception("Could not find octosql") + if len(e) != 0: + raise Exception("Errors while getting octosql version") + import re + version = re.findall('v[0-9]+\.[0-9]+\.[0-9]+',o[0])[0] + return version + + def test_textql_matrix(self): + def generate_textql_cmd(data_filename,line_count,column_count): + return 'textql -dlm , -sql "select count(*)" {}'.format(data_filename) + + name = 'textql_%s' % self._get_textql_version() + self._perform_test_performance_matrix(name,generate_textql_cmd) + + def test_octosql_matrix(self): + config_fn = self.random_tmp_filename('octosql', 'config') + def generate_octosql_cmd(data_filename,line_count,column_count): + j = """ +dataSources: + - name: bmdata + type: csv + config: + path: "{}" + headerRow: false + batchSize: 10000 +""".format(data_filename)[1:] + f = open(config_fn,'w') + f.write(j) + f.close() + return 'octosql -c {} -o batch-csv "select count(*) from bmdata a"'.format(config_fn) + + name = 'octosql_%s' % self._get_octosql_version() + self._perform_test_performance_matrix(name,generate_octosql_cmd) + def suite(): tl = unittest.TestLoader() basic_stuff = tl.loadTestsFromTestCase(BasicTests) From b4aedcf386dc65717f42182deb4410e915ed9094 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 19 Sep 2020 23:37:00 +0300 Subject: [PATCH 063/111] Release Cycle (#242) --- RELEASE.md | 27 +++++++++++++++++++++++++++ VERSION_BUMP.md | 18 ------------------ dist/create-rpm | 10 ++++++++-- dist/q-text-as-data.spec.template | 4 +++- package-release | 7 +++++++ upload-release | 24 ++++++++++++++++++++++++ 6 files changed, 69 insertions(+), 21 deletions(-) create mode 100644 RELEASE.md delete mode 100644 VERSION_BUMP.md create mode 100755 upload-release diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 00000000..f43f4fdb --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,27 @@ + +# Releasing a new version +Currently, there are some manual steps needed in order to release a new version: + +* Make sure that you're in a branch +* Change the version in the following three files: `bin/q.py`, `setup.py` and `do-manual-release.sh` and commit them to the branch +* Push the commit to the branch/PR +* perform merge into master of that branch from the github UI +* create a new release with a new tag that has the same name as the new version (x.y.z) + +The merge will trigger a build/release, and will push the artifacts to the new release as assets. + +Now, create the relevant rpm and deb packages: + +* Run `./package-release `. In most cases, both will be the same. +* This will download all the released artifacts for the release into `./packages/`, and will create an rpm and a deb +* Test that the two new artifacts (inside `./packages/`) +* Run `./upload-release ` + +The rpm and deb will be added to the assets of the release + + +Update the website to match the new version. + +# Requirements +Requires a logged in github-cli (`gh`) to work + diff --git a/VERSION_BUMP.md b/VERSION_BUMP.md deleted file mode 100644 index 7a4c5bf5..00000000 --- a/VERSION_BUMP.md +++ /dev/null @@ -1,18 +0,0 @@ - -# Version bump -Currently, there are some manual steps needed in order to release a new version: - -* Make sure that you're in a branch -* Change the version in the following three files: `bin/q.py`, `setup.py` and `do-manual-release.sh` and commit them to the branch -* perform merge into master of that branch -* add a tag of the release version -* `git push --tags origin master` -* create a release in github with the tag you've just created - -Pushing to master will trigger a build/release, and will push the artifacts to the new release as assets. - -The reason for this is related to limitations in the way that pyci uploads the binaries to github. - -# - -TBD - Continue with the flow of wrapping the artifacts with rpm/deb, copying the files to packages-for-q, and updating the web site. diff --git a/dist/create-rpm b/dist/create-rpm index 8c247f99..697f1122 100755 --- a/dist/create-rpm +++ b/dist/create-rpm @@ -5,6 +5,8 @@ # # +set -o pipefail + if [ $# -ne 2 ]; then echo 'create-rpm ' @@ -45,8 +47,12 @@ mkdir -p ${rpm_build_area}/SOURCES pushd ${rpm_build_area}/SOURCES >/dev/null tar xvzf ./q.tar.gz --strip-components=1 rm -vf ./q.tar.gz -curl -f -o ./bin/q -L -R "https://github.com/harelba/packages-for-q/raw/master/single-binary/x86_64/${VERSION}/q" -chmod +x ./bin/q + +mkdir ${rpm_build_area}/packages +cp /q/packages/q-x86_64-Linux ${rpm_build_area}/packages/q-x86_64-Linux + +# Expecting the binaries to exist in /packages/ + popd >/dev/null find ${rpm_build_area}/ -ls diff --git a/dist/q-text-as-data.spec.template b/dist/q-text-as-data.spec.template index 8512aa2d..2be7cc20 100644 --- a/dist/q-text-as-data.spec.template +++ b/dist/q-text-as-data.spec.template @@ -20,6 +20,8 @@ q allows to perform SQL-like statements on tabular text data. cd %{_topdir}/BUILD cp -vrf %{_topdir}/SOURCES/* %{_topdir}/BUILD/ chmod -Rf a+rX,u+w,g-w,o-w %{_topdir}/BUILD/ +mkdir -p %{_topdir}/BUILD/packages/ +cp -vfr /q/packages/* %{_topdir}/BUILD/packages/ %build cd %{_topdir}/BUILD @@ -29,7 +31,7 @@ ronn doc/USAGE.markdown rm -vrf ${RPM_BUILD_ROOT}/ install -d -m 0755 ${RPM_BUILD_ROOT}%{_bindir} install -d -m 0755 ${RPM_BUILD_ROOT}%{_datadir}/q-text-as-data -install -Dm 0644 bin/q ${RPM_BUILD_ROOT}%{_datadir}/q-text-as-data/ +install -Dm 0644 ./packages/q-x86_64-Linux ${RPM_BUILD_ROOT}%{_datadir}/q-text-as-data/q ln -s %{_datadir}/q-text-as-data/q ${RPM_BUILD_ROOT}%{_bindir}/q install -d -m 0755 ${RPM_BUILD_ROOT}%{_mandir}/man1/ install -m 0644 doc/USAGE ${RPM_BUILD_ROOT}%{_mandir}/man1/q.1 diff --git a/package-release b/package-release index f27186fa..1aa1515b 100755 --- a/package-release +++ b/package-release @@ -28,9 +28,16 @@ rm -rvf ${base_folder}/packages mkdir -p ${base_folder}/packages sleep 1 + +gh release download $BASED_ON_TAG -p '*' -D ./packages/ + +chmod +x ./packages/* + docker exec -it ${cid1} /bin/bash -i -c "/q/dist/create-rpm ${VERSION} ${BASED_ON_TAG}" docker cp ${cid1}:/q/dist/rpm_build_area/RPMS/x86_64/q-text-as-data-${VERSION}-1.el6.x86_64.rpm ${base_folder}/packages/q-text-as-data-${VERSION}-1.x86_64.rpm docker exec -it ${cid2} /bin/bash -i -c "cd /q/packages && alien ./q-text-as-data-${VERSION}-1.x86_64.rpm" +find ./packages/ -ls + diff --git a/upload-release b/upload-release new file mode 100755 index 00000000..a3909393 --- /dev/null +++ b/upload-release @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e + +base_folder=$(dirname $0) +pushd ${base_folder} >/dev/null + +if [ $# -ne 2 ]; +then + echo "Usage: $(dirname $0) " + echo + echo "Note that the git tag must be pushed to github before doing this." + exit 1 +fi +VERSION="$1" +BASED_ON_TAG="$2" + +echo uploading +gh release upload 2.0.18 ./packages/*.{rpm,deb} + +echo done + + + From 9b428e22e955832f029aa555e66c81fef2e4dc08 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 19 Sep 2020 23:47:35 +0300 Subject: [PATCH 064/111] prevent releasing from a push check inside a PR --- do-manual-release.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/do-manual-release.sh b/do-manual-release.sh index 9e0d787a..628ae68e 100755 --- a/do-manual-release.sh +++ b/do-manual-release.sh @@ -4,12 +4,20 @@ set -e VERSION=2.0.18 +echo "TRAVIS_BRANCH is $TRAVIS_BRANCH . TRAVIS_PULL_REQUEST_BRANCH is $TRAVIS_PULL_REQUEST_BRANCH" + if [[ "$TRAVIS_BRANCH" != "master" ]] then echo "Not releasing - not on master branch (${TRAVIS_BRANCH})" exit 0 fi +if [[ "$TRAVIS_PULL_REQUEST_BRANCH" != "" ]] +then + echo "Not releasing - push check in PR" + exit 0 +fi + echo "Packing binary for $TRAVIS_OS_NAME" if [[ "$TRAVIS_OS_NAME" == "osx" || "$TRAVIS_OS_NAME" == "linux" ]] From 5f0c650a540059f17846c543d68f384178b9d02f Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Tue, 22 Sep 2020 17:20:28 +0300 Subject: [PATCH 065/111] bump to 2.0.19 --- bin/q.py | 2 +- do-manual-release.sh | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/q.py b/bin/q.py index 4775ddb9..59ecc882 100755 --- a/bin/q.py +++ b/bin/q.py @@ -33,7 +33,7 @@ from collections import OrderedDict -q_version = '2.0.18' +q_version = '2.0.19' __all__ = [ 'QTextAsData' ] diff --git a/do-manual-release.sh b/do-manual-release.sh index 628ae68e..3e09b87a 100755 --- a/do-manual-release.sh +++ b/do-manual-release.sh @@ -2,7 +2,7 @@ set -e -VERSION=2.0.18 +VERSION=2.0.19 echo "TRAVIS_BRANCH is $TRAVIS_BRANCH . TRAVIS_PULL_REQUEST_BRANCH is $TRAVIS_PULL_REQUEST_BRANCH" diff --git a/setup.py b/setup.py index df113123..f949d1a2 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup -q_version = '2.0.18' +q_version = '2.0.19' setup( name='q', From 79b5d9ae1c9af8af1348b00439894bed4ec3489c Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Wed, 23 Sep 2020 11:26:38 +0300 Subject: [PATCH 066/111] more robust release flow --- README.markdown | 6 ++++-- RELEASE.md | 12 ++++++------ do-manual-release.sh | 14 ++++++++++++++ mkdocs/docs/index.md | 12 ++++++------ upload-release | 2 +- 5 files changed, 31 insertions(+), 15 deletions(-) diff --git a/README.markdown b/README.markdown index c8802d60..3ffda105 100644 --- a/README.markdown +++ b/README.markdown @@ -22,8 +22,10 @@ ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LI Go [here](http://harelba.github.io/q/#examples) for more examples. -## Python API -A development branch for exposing q's capabilities as a Python module can be viewed here, along with examples of the alpha version of the API.
    Existing functionality as a command-line tool will not be affected by this. Your input will be most appreciated. +## Benchmark +I have created a preliminary benchmark comparing q's speed between python2, python3, and comparing both to textql and octosql. + +Your input about the validity of the benchmark and about the results would be greatly appreciated. More details are [here](test/BENCHMARK.md). ## Contact Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course. diff --git a/RELEASE.md b/RELEASE.md index f43f4fdb..9ba0f02c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -2,13 +2,14 @@ # Releasing a new version Currently, there are some manual steps needed in order to release a new version: -* Make sure that you're in a branch +* Make sure that you're in `master` * Change the version in the following three files: `bin/q.py`, `setup.py` and `do-manual-release.sh` and commit them to the branch -* Push the commit to the branch/PR -* perform merge into master of that branch from the github UI -* create a new release with a new tag that has the same name as the new version (x.y.z) +* Commit the files locally +* Create a new tag `git tag x.y.z` +* Create a new release in the github UI based on the new tag +* `git push --tags origin master` -The merge will trigger a build/release, and will push the artifacts to the new release as assets. +The push will trigger a build/release, and will push the artifacts to the new release as assets. Now, create the relevant rpm and deb packages: @@ -19,7 +20,6 @@ Now, create the relevant rpm and deb packages: The rpm and deb will be added to the assets of the release - Update the website to match the new version. # Requirements diff --git a/do-manual-release.sh b/do-manual-release.sh index 3e09b87a..4dd49ef5 100755 --- a/do-manual-release.sh +++ b/do-manual-release.sh @@ -18,6 +18,20 @@ then exit 0 fi +# ensure release exists +curl -v -L -f https://api.github.com/repos/harelba/q/releases/tags/$VERSION || (echo "Release $VERSION not found in github. " && exit 1) + +# skip releasing if release already has some asset. Not using jq on purpose, to prevent the need for dependencies +ASSET_COUNT=$(curl -f -L https://api.github.com/repos/harelba/q/releases/tags/$VERSION | grep /releases/assets/ | grep url | wc -l | awk '{print $1}') + +if [[ "$ASSET_COUNT" != "0" ]] +then + echo "Assets already exists in the release. No need to release version $VERSION again." + exit 0 +fi + +echo "Gonna release version $VERSION" + echo "Packing binary for $TRAVIS_OS_NAME" if [[ "$TRAVIS_OS_NAME" == "osx" || "$TRAVIS_OS_NAME" == "linux" ]] diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index e2b37439..ccf76ffb 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -32,12 +32,12 @@ Look at some examples [here](#examples), or just download the tool using the lin | Format | Instructions | Comments | :---|:---|:---| -|[OSX](https://github.com/harelba/packages-for-q/raw/master/single-binary/Darwin/2.0.16/q)|run `brew install q`|man page is not available for this release yet. Use `q --help` for now|| -|[RPM Package](https://github.com/harelba/packages-for-q/raw/master/rpms/q-text-as-data-2.0.16-1.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter man q.| -|[DEB Package](https://github.com/harelba/packages-for-q/raw/master/deb/q-text-as-data_2.0.16-2_amd64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`.| -|[Windows Installer](https://github.com/harelba/packages-for-q/raw/master/windows/setup-q-2.0.16.exe)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new cmd window after the installation is done.| -|[tar.gz](https://github.com/harelba/q/archive/2.0.16.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| -|[zip](https://github.com/harelba/q/archive/2.0.16.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[OSX](https://github.com/harelba/q/releases/download/2.0.19/q-x86_64-Darwin)|run `brew install q`|man page is not available for this release yet. Use `q --help` for now|| +|[RPM Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data-2.0.19-1.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter man q.| +|[DEB Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data_2.0.19-2_amd64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`.| +|[Windows Installer](https://github.com/harelba/q/releases/download/2.0.19/q-AMD64-Windows-installer.exe)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new cmd window after the installation is done.| +|[tar.gz](https://github.com/harelba/q/archive/2.0.19.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[zip](https://github.com/harelba/q/archive/2.0.19.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| **Older versions can be downloaded [here](https://github.com/harelba/packages-for-q). Please let me know if you plan on using an older version, and why - I know of no reason to use any of them.** diff --git a/upload-release b/upload-release index a3909393..bfd19018 100755 --- a/upload-release +++ b/upload-release @@ -16,7 +16,7 @@ VERSION="$1" BASED_ON_TAG="$2" echo uploading -gh release upload 2.0.18 ./packages/*.{rpm,deb} +gh release upload $VERSION ./packages/*.{rpm,deb} echo done From 373841af1260d773297df98162db6184f085f8f0 Mon Sep 17 00:00:00 2001 From: jinzc Date: Thu, 2 Sep 2021 19:10:30 +0800 Subject: [PATCH 067/111] translate 'index.md' into 'index_cn.md' (#271) --- README.markdown | 2 +- mkdocs/docs/about.md | 2 + mkdocs/docs/index.md | 2 +- mkdocs/docs/index_cn.md | 367 ++++++++++++++++++++++++++++++ mkdocs/docs/stylesheets/extra.css | 2 +- mkdocs/mkdocs.yml | 1 + 6 files changed, 373 insertions(+), 3 deletions(-) create mode 100644 mkdocs/docs/index_cn.md diff --git a/README.markdown b/README.markdown index 3ffda105..5d9f232d 100644 --- a/README.markdown +++ b/README.markdown @@ -5,7 +5,7 @@ q is a command line tool that allows direct execution of SQL-like queries on CSV q treats ordinary files as database tables, and supports all SQL constructs, such as `WHERE`, `GROUP BY`, `JOIN`s, etc. It supports automatic column name and type detection, and q provides full support for multiple character encodings. -q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/). It contains everything you need to download and use q immediately. +q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/) or [https://q.textasdata.wiki](https://q.textasdata.wiki) It contains everything you need to download and use q immediately. ## Installation. Extremely simple. diff --git a/mkdocs/docs/about.md b/mkdocs/docs/about.md index b0e09e45..49ec15bd 100644 --- a/mkdocs/docs/about.md +++ b/mkdocs/docs/about.md @@ -6,3 +6,5 @@ ### Email [harelba@gmail.com](mailto:harelba@gmail.com) +### Chinese translation [jinzhencheng@outlook.com](jinzhencheng@outlook.com) + diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index ccf76ffb..98e31c69 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -33,7 +33,7 @@ Look at some examples [here](#examples), or just download the tool using the lin | Format | Instructions | Comments | :---|:---|:---| |[OSX](https://github.com/harelba/q/releases/download/2.0.19/q-x86_64-Darwin)|run `brew install q`|man page is not available for this release yet. Use `q --help` for now|| -|[RPM Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data-2.0.19-1.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter man q.| +|[RPM Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data-2.0.19-1.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| |[DEB Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data_2.0.19-2_amd64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`.| |[Windows Installer](https://github.com/harelba/q/releases/download/2.0.19/q-AMD64-Windows-installer.exe)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new cmd window after the installation is done.| |[tar.gz](https://github.com/harelba/q/archive/2.0.19.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| diff --git a/mkdocs/docs/index_cn.md b/mkdocs/docs/index_cn.md new file mode 100644 index 00000000..d17f48e0 --- /dev/null +++ b/mkdocs/docs/index_cn.md @@ -0,0 +1,367 @@ +# q - 直接在CSV或TSV文件上运行SQL + +[![GitHub Stars](https://img.shields.io/github/stars/harelba/q.svg?style=social&label=GitHub Stars&maxAge=600)](https://GitHub.com/harelba/q/stargazers/) +[![GitHub forks](https://img.shields.io/github/forks/harelba/q.svg?style=social&label=GitHub Forks&maxAge=600)](https://GitHub.com/harelba/q/network/) + + +## 概述 +q 是一个可以运行在 CSV / TSV 文件(或其他表格式的文本文件)上运行类SQL命令的命令行工具。 + +q 将普通文本(如上述)作为数据库表,且支持所有的SQL语法如:WHERE、GROUP BY、各种JOIN等。此外,还拥有自动识别列名和列类型及广泛支持多种编码的特性。 + +``` bash +q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" +``` + +``` bash +ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" +``` + +查看[示例](#示例)或[安装](#安装)体验. + +| | | +|:--------------------------------------:|:-----------------------------------------------:| +| 完全支持所有的字符编码 | すべての文字エンコーディングを完全にサポート | +| 모든 문자 인코딩이 완벽하게 지원됩니다 | все кодировки символов полностью поддерживаются | + + +**非英语用户:** q 完全支持所有类型的字符编码。 使用 `-e data-encoding` 设置输入编码; 使用 `-Q query-encoding` 设置查询编码; 使用 `-E output-encoding` 设置输出编码; +如上三个参数均设有合理的默认值。
    + +> 如果遇到问题请与我联系,期待与你交流。 + +**含有BOM的文件:** python的csv模块并不能很好的支持含有[Byte Order Mark](https://en.wikipedia.org/wiki/Byte_order_mark) 的文件。针对该种情况,使用 `-e utf-8-sig` 命令参数可读取包含BOM的UTF8编码文件。 + +> 我们计划将BOM相关处理与编码'解耦', 这样就可以支持所有编码的BOM文件了。 + +## 安装 + +| 格式 | 说明 | 备注 | +|:---|:---|:---| +|[OSX](https://github.com/harelba/q/releases/download/2.0.19/q-x86_64-Darwin)|运行 `brew install q`| 该方式暂不支持MAN手册, 可以使用 `q --help` 查看帮助|| +|[RPM Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data-2.0.19-1.x86_64.rpm)| 运行 `rpm -ivh ` 如果安装过旧版则运行 `rpm -U ` | 该方式支持MAN手册,可运行`man q`查看| +|[DEB Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data_2.0.19-2_amd64.deb)| 运行 `sudo dpkg -i `|该方式支持MAN手册,可运行`man q`查看| +|[Windows Installer](https://github.com/harelba/q/releases/download/2.0.19/q-AMD64-Windows-installer.exe)|运行安装可执行文件,一直点击下一步、下一步... q.exe 将被添加至PATH,以便于随处运行|PATH更新后并不会即时生效,重新打开cmd命令窗口便可| +|[tar.gz](https://github.com/harelba/q/archive/2.0.19.tar.gz)|最新稳定版的所有源码文件。提示,q.py 文件不能直接使用,因为它需要python依赖|| +|[zip](https://github.com/harelba/q/archive/2.0.19.zip)|最新稳定版的所有源码文件。提示,q.py 文件不能直接使用,因为它需要python依赖|| + +**旧版本可以在这儿[下载](https://github.com/harelba/packages-for-q) 。按理说不会有人愿意用旧版本,要是你计划使用旧版,希望能与你交流。** + +## 须知 +从`2.0.9`版本开始,不需要任何外部依赖。Python(3.7)和其他所需的库包含在了安装文件中且与系统隔离。 + +## 使用 + +``` bash +q "" + + 最简单的执行语句:q "SELECT * FROM myfile" 该语句会输出文件内容 +``` + +q 支持在表格式的文本上执行类SQL命令。它的初衷是为Linux命令行附加SQL的表达力且实现对文本数据的轻松访问。 + +类SQL的查询将*文件名(或标准输入流)看作表名*。查询语句会作为命令输入的一个参数(使用引号包裹),如果将多个文件看作一张表,可以这样写 `文件名1+文件名2....`或者使用通配符(比如:`my_files*.csv`)。 + +使用 `-H` 表示输入内容中包含表头。该情况下列名会被自动识别,如果没有指定该参数,列名将会被以`cX`命名,`X`从1开始(比如: `q "SELECT c3,c8 from ..."`) 。 + +使用 `-d` 声明输入的分隔符。 + +列类型可由工具自动识别,无需强制转换。 提示,使用`--as-text` 可以强制将所有列类型转换为文本类型。 + +依据sqlite规范,如果列名中含有空格,需要使用反引号 (即:`) 引起来。 + +完全支持查询/输入/输出的编码设置(q 力争提供一种开箱即用的方法), 可以分别使用`-Q`,`-e` 和 `-E`来指定编码设置类型。 + +支持所有的sqlite3 SQL方法,包括文件之间的 JOIN(可以为文件设置别名)操作。在下面的[限制](#限制)小节可以看到一些少有使用的、欠支持的说明。 + +### 查询 + +q 的每一个参数都是由双引号包裹的一条完整的SQL语句。所有的查询语句会依次执行,最终结果以标准输出流形式输出。 提示,在同一命令行中执行多条查询语句时,仅在执行第一条查询语句时需要耗时载入数据,其他查询语句即时执行。 + +支持所有标准SQL语法,条件(WHERE 和 HAVING)、GROUP BY、ORDER BY等。 + +在WHERE条件查询中,支持JOIN操作和子查询,但在FROM子句中并不支持。JOIN操作时,可以为文件起别名。 + +SQL语法同sqlite的语法,详情见 http://www.sqlite.org/lang.html 或上网找一些示例。 + +**注意**: + +* 支持所有类型的自动识别,无需强制转换或其他操作。 + +* 如果重命名输出列,则需要为列指定别名并使用 `-O` 声明。如: `q -O -H "select count(*) cnt,sum(*) as mysum from -"` 便会将`cnt`和`mysum`作为列名输出。 + +### 指令 + +``` bash +使用: + q 支持在表格式的文本数据上执行类SQL查询。 + + 它的初衷是为Linux命令行附加SQL的表达力且实现对文本数据的轻松访问。 + + 基本操作是 q "SQL查询语句" 表名便是文件名(使用 - 从标注输入中读取数据)。若输入内容包含表头时,可以使用 -H 指定列名。若无表头,则列将会自动命名为 c1...cN。 + + 列类型可被自动识别。可以使用 -A 命令查看每列的名称及其类型。 + + 可以使用 -d (或 -t) 指定分隔符,使用 -D 指定输出分割符。 + + 支持所有的sqlite3 SQL方法。 + + 示例: + + 例子1: ls -ltrd * | q "select c1,count(1) from - group by c1" + 上例将会输出当前目录下,所有文件的权限表达式分组及每组数量。 + + 例子2: seq 1 1000 | q "select avg(c1),sum(c1) from -" + 上例将会输出1到1000的平均数与和数。 + + 例子3: sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc" + 上例将会输出在/tmp目录下,相同'用户+组'的文件所占用的MB磁盘空间。 + + 更多详情见 https://github.com/harelba/q/ 或查看帮助 + +选项: + -h, --help 显示此帮助信息并退出 + -v, --version 显示版本号 + -V, --verbose 出现问题时显示调试信息 + -S SAVE_DB_TO_DISK_FILENAME, --save-db-to-disk=SAVE_DB_TO_DISK_FILENAME + 将数据库保存为一个 sqlite 数据库文件 + --save-db-to-disk-method=SAVE_DB_TO_DISK_METHOD + 保存数据库到磁盘的方法 + 'standard' 不需要任何设置 + 'fast'需要手动在python的安装目录下执行`pip install sqlitebck` + 打包的问题解决后,'fast'即被作为默认方式 + 数据相关的选项: + + -H, --skip-header 忽略表头,在早期的版本中已修改为:仅支持用于标明列名的一行表头 + -d DELIMITER, --delimiter=DELIMITER + 列分隔符,若无特别指定,默认为空格符 + -p, --pipe-delimited + 作用同 -d '|',为了方便和可读性提供该参数 + -t, --tab-delimited + 作用同 -d ,这仅是一种简写,也可以在Linux命令行中使用$'\t' + -e ENCODING, --encoding=ENCODING + 输入文件的编码,默认是UTF-8 + -z, --gzipped 压缩数据,对于从输入流读取文件非常高效 .gz 是自动压缩后文件扩展名 + -A, --analyze-only 简单分析:各列的数据类型 + -m MODE, --mode=MODE + 数据解析模式: 松散, 宽松和严格。在严格模式下必须指定 -c + --column-count 参数。 + -c COLUMN_COUNT, --column-count=COLUMN_COUNT + 当使用宽松或严格模式时,用于指定列的数量 + -k, --keep-leading-whitespace + 保留每列前的空格。为了使其开箱即用,默认去除了列前的空格 + 如果有需要,可以指定该参数 + --disable-double-double-quoting + 禁止一对双引号的转义。默认可以使用 "" 转义双引号 + 主要为了向后兼容 + --disable-escaped-double-quoting + 禁止转义双引号 + 默认可以在双引号字段中使用 \" 进行转义 + 主要为了向后兼容 + --as-text 不识别列类型(所有列被当作文本类型) + -w INPUT_QUOTING_MODE, --input-quoting-mode=INPUT_QUOTING_MODE + 输入内容的转义模式,可选值 all、minimal、none + 该参数稍有误导性,-W 指定输出内容的转义模式 + -M MAX_COLUMN_LENGTH_LIMIT, --max-column-length-limit=MAX_COLUMN_LENGTH_LIMIT + 设置列的最大长度 + -U, --with-universal-newlines + 设置通用换行符 + -U 参数当前仅适用于常规文件,输入流或.gz类文件暂不支持 + + 输出相关的选项: + -D OUTPUT_DELIMITER, --output-delimiter=OUTPUT_DELIMITER + 输出列间的分隔符 + 若未指定,则与 -d 指定的分隔符相同;若均为指定,则默认为空格符 + -P, --pipe-delimited-output + 同 -D '|' 为了方便和可读性提供该参数 + -T, --tab-delimited-output + 同 -D 这仅是一种简写,也可以在Linux命令行中使用$'\t' + -O, --output-header + 输出表头,输出的列名是由查询中指定的别名 + 如: 'select name FirstName, value1/value2 MyCalculation + from ...' 即使输入时未指定表头仍可使用该参数。 + -b, --beautify 美化输出结果,可能较慢... + -f FORMATTING, --formatting=FORMATTING + 格式化输出列 + 如格式X=fmt,Y=fmt等,上述中的X、Y是指第几列(如:1 表示 SELECT + 的第一列) + -E OUTPUT_ENCODING, --output-encoding=OUTPUT_ENCODING + 输出内容的编码,默认是 'none',跟随系统或终端的编码 + -W OUTPUT_QUOTING_MODE, --output-quoting-mode=OUTPUT_QUOTING_MODE + 输出内容的转义模式,可选值 all、minimal、none + 该参数稍有误导性,-w 指定输入内容的转义模式 + -L, --list-user-functions + 列出所有内置函数 + + 查询相关的参数: + -q QUERY_FILENAME, --query-filename=QUERY_FILENAME + 指定文件名,由文件中读取查询语句。 + 该操作常与查询编码(使用 -Q)一同使用 + -Q QUERY_ENCODING, --query-encoding=QUERY_ENCODING + 查询编码(包含查询语句的文件编码) + 实验性参数,对该参数的意见可反馈 +``` + +## 示例 +下述 `-H` 参数的例子,表示文件中含有表头时使用该参数。 + +`-t` 参数是指定文件以 tab 作为分隔符的缩写(可以使用 `-d` 参数指定任意分隔符)。 + +为了清楚起见,查询关键字均使用大写,实际上关键字(如 SELECT、WHERE等)对大小写并不敏感。 + +示例目录: + +* [例1 - 统计指定列唯一值的数量](#1) +* [例2 - 数值条件过滤、排序并限制输出数](#2) +* [例3 - GROUP BY简单示例](#3) +* [例4 - GROUP BY进阶示例 (以时间格式分组)](#4) +* [例5 - 标准输入流作为输入](#5) +* [例6 - 使用表头中列名](#6) +* [例7 - JOIN 两个文件](#7) + +### 例1 +对指定字段(点击数据中的uuid)执行 COUNT DISTINCT + +``` bash +q -H -t "SELECT COUNT(DISTINCT(uuid)) FROM ./clicks.csv" +``` +输出: +``` bash +229 +``` + +### 例2 +过滤数值数据、排序并限制输出数量 + +注意:q 将其看作数值类型并对其进行数值过滤(数值比较而不是字符串比较) + +``` bash +q -H -t "SELECT request_id,score FROM ./clicks.csv WHERE score > 0.7 ORDER BY score DESC LIMIT 5" +``` +输出: +``` bash +2cfab5ceca922a1a2179dc4687a3b26e 1.0 +f6de737b5aa2c46a3db3208413a54d64 0.986665809568 +766025d25479b95a224bd614141feee5 0.977105183282 +2c09058a1b82c6dbcf9dc463e73eddd2 0.703255121794 +``` + +### 例3 +GROUP BY 简单示例 + +``` bash +q -t -H "SELECT hashed_source_machine,count(*) FROM ./clicks.csv GROUP BY hashed_source_machine" +``` +输出: +``` bash +47d9087db433b9ba.domain.com 400000 +``` + +### 例4 +GROUP BY进阶示例 (以时间格式分组) + +``` bash +q -t -H "SELECT strftime('%H:%M',date_time) hour_and_minute,count(*) FROM ./clicks.csv GROUP BY hour_and_minute" +``` +输出: +``` bash +07:00 138148 +07:01 140026 +07:02 121826 +``` + +### 例5 +标准输入流作为输入 + +计算 /tmp 目录下各 user/group 的占用空间大小 + +``` bash +sudo find /tmp -ls | q "SELECT c5,c6,sum(c7)/1024.0/1024 AS total FROM - GROUP BY c5,c6 ORDER BY total desc" +``` +输出: +``` bash +mapred hadoop 304.00390625 +root root 8.0431451797485 +smith smith 4.34389972687 +``` + +### 例6 +使用表头中列名 + +计算拥有进程数最多的前3位用户名及其数量 + +注意: 该查询中自动识别了列名 + +``` bash +ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" +``` +输出: +``` bash +root 152 +harel 119 +avahi 2 +``` + +### 例7 +JOIN 两个文件 + +如下命令中JOIN一个ls命令输出内容文件(exampledatafile) 和一个包含group_name、email两列字段的文件(group-emails-example),每一邮件组均包含filename、email列, 为了输出简便,使用WHERE条件过滤出名为 ppp 的文件 + +``` bash +q "SELECT myfiles.c8,emails.c2 FROM exampledatafile myfiles JOIN group-emails-example emails ON (myfiles.c4 = emails.c1) WHERE myfiles.c8 = 'ppp'" +``` +输出: +``` bash +ppp dip.1@otherdomain.com +ppp dip.2@otherdomain.com +``` +可以看出 ppp 文件出现了两次,每次都匹配到了它所属的dip邮件组(如例中 dip.1@... / dip2@...),可以在 `exampledatafile` 和 `group-emails-example` 文件中查看数据。 + +JOIN 的应用场景中也支持列名识别,在查询包含表头的文件时,只需指定 `-H` 参数即可。 + +## 声明 +为了避免引用外部依赖,当前是使用由Python编写的内存数据库实现的。当前是支持 SELECT 语句及 各种JOIN ( 目前仅在 WHERE 语句中支持子查询)。 +若想对数据进一步分析,可以使用 `--save-db-to-disk` 参数,以将结果输出为 sqlite 数据库文件,然后使用 `sqlite3` 语句来执行查询操作。 + +需要提示的是,当前并没有对数据量的大小进行检测和限制 - 也就是说,需要用户自己掌控文件大小。 + +请务必阅读[限制](#限制)小节。 + +## 开发 + +### 测试 +源码中包含了测试用例,可以通过 `test/test-all` 来执行。若想要提交 PR的话,一定先确保其均执行成功。 + +## 限制 +如下罗列了一些已知的限制,若你的使用场景中需要用到以下标明的限制,请联系我。 + +* 不支持 `FROM ` +* 不支持公用表表达式(CTE) +* 不支持文件名中包含空格 (可以将文件以标准输入流的方式输入 q 或重命名文件) +* 不支持较少用到的子查询 + +## 原理 +你是否曾经盯着屏幕上的文本文件发呆,希望它要是数据库就好了,这样就可以找出自己想要的内容?我曾有过很多次,最终顿悟。我想要的不是数据库,而是 SQL。 + +SQL 是一种面向数据声明的语言,它允许自定义数据内容而无需关心其执行过程。这也正是SQL强大之处,因为它对于数据'所见即所得',而不是将数据看作字节码。 + +本工具的目的是:在文本文件和SQL之间搭建一座桥梁。 + +### 为什么其他Linux工具不能满足需求? +传统的Linux工具库也很酷,我也经常使用它们, 但Linux的整体理念是为任一部分搭配最好的工具。本工具为传统Linux工具集新添了 SQL 族类工具,其他工具并不会失去本来优势。 +事实上,我也经常将 q 和其他Linux工具搭配使用,就如同使用管道将 awk/sed 和 grep 搭配使用一样。 + +另外需要注意的是,许多Linux工具就将文本看作文本,而不是数据。从这个意义上来讲,可以将 q 看作提供了 SQL 功能(如:表达式、排序、分组、聚合等)的元工具。 + +### 理念 + +本工具的设计遵从了 Linux/Unix 的传统设计原则。若你对这些设计原则感兴趣,可以阅读 [这本书](http://catb.org/~esr/writings/taoup/) ,尤其是书中 [这部分](http://catb.org/~esr/writings/taoup/html/ch01s06.html) +若你认为本工具工作方式与之背道而驰,愿洗耳恭听你的建议。 + +## 展望 + +* 主要方向:将其作为python的模块公开。 在公开之前,需要对处理标准输入流做一些内部API的完善。 +* 支持分布式以提高算力。 + + + diff --git a/mkdocs/docs/stylesheets/extra.css b/mkdocs/docs/stylesheets/extra.css index 74eb969f..e90c3300 100644 --- a/mkdocs/docs/stylesheets/extra.css +++ b/mkdocs/docs/stylesheets/extra.css @@ -26,7 +26,7 @@ div.md-content pre { } article.md-content__inner.md-typeset>p { - text-align: center; + text-align: left; } .md-nav__link[data-md-state=blur] { diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml index a47cdf34..c21b5d29 100644 --- a/mkdocs/mkdocs.yml +++ b/mkdocs/mkdocs.yml @@ -10,6 +10,7 @@ google_analytics: - "auto" nav: - Home: index.md + - 首页: index_cn.md - About: about.md theme: name: material From cad588265ccd81b8ae60d2bd4e4333bd50545462 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 22 Oct 2021 01:48:53 +0300 Subject: [PATCH 068/111] q version 3.1.0-beta - automatic immutable caching and direct queries on sqlite databases (#280) + New Packaging Workflow --- .github/workflows/build-and-package.yaml | 543 ++ .github/workflows/q.rb.brew-formula-template | 35 + .gitignore | 8 + .travis.yml | 137 - Makefile | 37 - QSQL-NOTES.md | 99 + README.markdown | 33 +- RELEASE.md | 27 - benchmark-config.sh | 3 + bin/q.py | 2797 ++++++-- bin/qtextasdata.py | 1 - build-deb-builder-container | 10 - build-rpm-builder-container | 10 - conftest.py | 5 + create-windows-setup-instructions | 76 - dist/AddToPath.nsh | 440 -- dist/create-deb | 11 - dist/create-rpm | 63 - dist/deb-builder-Dockerfile | 8 - dist/fpm-config | 7 + dist/q-TextAsData-with-path.nsi | 182 - dist/q-text-as-data.spec.template | 66 - dist/rpm-builder-Dockerfile | 12 - dist/test-rpm-inside-container.sh | 9 + dist/test-using-deb.sh | 8 + dist/test-using-rpm.sh | 8 + dist/update-mac-homebrew-instructions | 51 - do-manual-release.sh | 57 - package-release | 43 - ...are-benchmark-env => prepare-benchmark-env | 2 +- pyoxidizer.bzl | 114 + pytest.ini | 3 +- requirements-win-x86_64.txt | 4 + requirements.txt | 1 + run-benchmark | 110 + run-coverage.sh | 18 + run-tests.sh | 3 + setup-pyenv.sh | 134 - setup.py | 16 +- test-requirements.txt | 6 +- test/__init__.py | 0 test/benchmark-config.sh | 3 - test/run-benchmark | 77 - test/test-all | 14 - test/test-all.bat | 4 - test/test-suite | 2819 -------- test/test_suite.py | 5704 +++++++++++++++++ upload-release | 24 - 48 files changed, 8849 insertions(+), 4993 deletions(-) create mode 100644 .github/workflows/build-and-package.yaml create mode 100644 .github/workflows/q.rb.brew-formula-template delete mode 100644 .travis.yml delete mode 100644 Makefile create mode 100644 QSQL-NOTES.md delete mode 100644 RELEASE.md create mode 100644 benchmark-config.sh delete mode 120000 bin/qtextasdata.py delete mode 100755 build-deb-builder-container delete mode 100755 build-rpm-builder-container create mode 100644 conftest.py delete mode 100644 create-windows-setup-instructions delete mode 100644 dist/AddToPath.nsh delete mode 100755 dist/create-deb delete mode 100755 dist/create-rpm delete mode 100644 dist/deb-builder-Dockerfile create mode 100644 dist/fpm-config delete mode 100644 dist/q-TextAsData-with-path.nsi delete mode 100644 dist/q-text-as-data.spec.template delete mode 100644 dist/rpm-builder-Dockerfile create mode 100755 dist/test-rpm-inside-container.sh create mode 100755 dist/test-using-deb.sh create mode 100755 dist/test-using-rpm.sh delete mode 100644 dist/update-mac-homebrew-instructions delete mode 100755 do-manual-release.sh delete mode 100755 package-release rename test/prepare-benchmark-env => prepare-benchmark-env (96%) create mode 100644 pyoxidizer.bzl create mode 100644 requirements-win-x86_64.txt create mode 100755 run-benchmark create mode 100755 run-coverage.sh create mode 100755 run-tests.sh delete mode 100644 setup-pyenv.sh create mode 100644 test/__init__.py delete mode 100644 test/benchmark-config.sh delete mode 100755 test/run-benchmark delete mode 100755 test/test-all delete mode 100644 test/test-all.bat delete mode 100755 test/test-suite create mode 100755 test/test_suite.py delete mode 100755 upload-release diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml new file mode 100644 index 00000000..f7237862 --- /dev/null +++ b/.github/workflows/build-and-package.yaml @@ -0,0 +1,543 @@ +name: BuildAndPackage + +on: + push: + branches: master + paths-ignore: + - "*.md" + - "*.markdown" + - "mkdocs/**/*" + # Remove comment in order to pre-release on a PR, to validate packaging flow + pull_request: + branches: master + paths-ignore: + - "*.md" + - "*.markdown" + - "mkdocs/**/*" + +jobs: + create-man: + runs-on: ubuntu-18.04 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: '2.6' + - name: Create man page + run: | + set -x -e + gem install ronn + + ronn doc/USAGE.markdown + # Must be gzipped, otherwise debian does not install it + gzip doc/USAGE + - name: Upload man page + uses: actions/upload-artifact@v1.0.0 + with: + name: q-man-page + path: doc/USAGE.gz + + build-linux: + runs-on: ubuntu-18.04 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Cache pyox + uses: actions/cache@v2 + with: + path: | + ~/.cache/pyoxidizer + key: ${{ runner.os }}-pyox + - name: Install pyoxidizer + run: | + set -e -x + + sudo apt-get update + sudo apt-get install -y zip sqlite3 rpm + + curl -o pyoxidizer.zip -L "https://github.com/indygreg/PyOxidizer/releases/download/pyoxidizer%2F0.17/pyoxidizer-0.17.0-linux_x86_64.zip" + unzip pyoxidizer.zip + chmod +x ./pyoxidizer + - name: Create Q Executable - Linux + run: | + set -e -x + + ./pyoxidizer build --release + + export Q_EXECUTABLE=./build/x86_64-unknown-linux-gnu/release/install/q + chmod 755 $Q_EXECUTABLE + + seq 1 100 | $Q_EXECUTABLE -c 1 "select sum(c1),count(*) from -" -S test.sqlite + + mkdir -p packages/linux/ + cp $Q_EXECUTABLE packages/linux/linux-q + - name: Upload Linux Executable + uses: actions/upload-artifact@v1.0.0 + with: + name: linux-q + path: packages/linux/linux-q + + test-linux: + needs: build-linux + runs-on: ubuntu-18.04 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install Python for Testing + uses: actions/setup-python@v2 + with: + python-version: '3.8.12' + architecture: 'x64' + - name: Prepare Testing + run: | + set -e -x + + pip3 install -r test-requirements.txt + - name: Download Linux Executable + uses: actions/download-artifact@v2 + with: + name: linux-q + - name: Run Tests on Linux Executable + run: | + set -x -e + + find ./ -ls + + chmod 755 ./linux-q + + Q_EXECUTABLE=`pwd`/linux-q Q_SKIP_EXECUTABLE_VALIDATION=true ./run-tests.sh -v + + package-linux-deb: + needs: [test-linux, create-man] + runs-on: ubuntu-18.04 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: '2.6' + - name: Downoad man page + uses: actions/download-artifact@v2 + with: + name: q-man-page + - name: Download Linux Executable + uses: actions/download-artifact@v2 + with: + name: linux-q + - name: Build DEB Package + run: | + set -e -x + + mkdir -p packages/linux/ + + find ./ -ls + + chmod 755 ./linux-q + + gem install fpm + cp dist/fpm-config ~/.fpm + fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.0-beta-1.x86_64.deb --version 3.1.0-beta ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + - name: Upload DEB Package + uses: actions/upload-artifact@v1.0.0 + with: + name: q-text-as-data-3.1.0-beta-1.x86_64.deb + path: packages/linux/q-text-as-data-3.1.0-beta-1.x86_64.deb + + test-deb-packaging: + runs-on: ubuntu-18.04 + needs: package-linux-deb + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Download DEB + uses: actions/download-artifact@v2 + with: + name: q-text-as-data-3.1.0-beta-1.x86_64.deb + - name: Install Python for Testing + uses: actions/setup-python@v2 + with: + python-version: '3.8.12' + architecture: 'x64' + - name: Prepare Testing + run: | + set -e -x + + pip3 install -r test-requirements.txt + - name: Test DEB Package Installation + run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.0-beta-1.x86_64.deb + + package-linux-rpm: + needs: [test-linux, create-man] + runs-on: ubuntu-18.04 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: '2.6' + - name: Download man page + uses: actions/download-artifact@v2 + with: + name: q-man-page + - name: Download Linux Executable + uses: actions/download-artifact@v2 + with: + name: linux-q + - name: Build RPM Package + run: | + set -e -x + + mkdir -p packages/linux + + find ./ -ls + + chmod 755 ./linux-q + + gem install fpm + cp dist/fpm-config ~/.fpm + fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.0-beta.x86_64.rpm --version 3.1.0-beta ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + - name: Upload RPM Package + uses: actions/upload-artifact@v1.0.0 + with: + name: q-text-as-data-3.1.0-beta.x86_64.rpm + path: packages/linux/q-text-as-data-3.1.0-beta.x86_64.rpm + + test-rpm-packaging: + runs-on: ubuntu-18.04 + needs: package-linux-rpm + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Download RPM + uses: actions/download-artifact@v2 + with: + name: q-text-as-data-3.1.0-beta.x86_64.rpm + - name: Retest using RPM + run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.0-beta.x86_64.rpm + + build-mac: + runs-on: macos-11 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Cache pyox + uses: actions/cache@v2 + with: + path: | + ~/.cache/pyoxidizer + key: ${{ runner.os }}-pyox + - name: Install pyoxidizer + run: | + set -e -x + + curl -o pyoxidizer.zip -L "https://github.com/indygreg/PyOxidizer/releases/download/pyoxidizer%2F0.17/pyoxidizer-0.17.0-macos-universal.zip" + unzip pyoxidizer.zip + mv macos-universal/pyoxidizer ./pyoxidizer + + chmod +x ./pyoxidizer + - name: Create Q Executable - Mac + run: | + set -e -x + + ./pyoxidizer build --release + + export Q_EXECUTABLE=./build/x86_64-apple-darwin/release/install/q + chmod 755 $Q_EXECUTABLE + + seq 1 100 | $Q_EXECUTABLE -c 1 "select sum(c1),count(*) from -" -S test.sqlite + + mkdir -p packages/macos/ + cp $Q_EXECUTABLE packages/macos/macos-q + - name: Upload MacOS Executable + uses: actions/upload-artifact@v1.0.0 + with: + name: macos-q + path: packages/macos/macos-q + + test-mac: + needs: build-mac + runs-on: macos-11 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install Python for Testing + uses: actions/setup-python@v2 + with: + python-version: '3.8.12' + architecture: 'x64' + - name: Prepare Testing + run: | + set -e -x + + pip3 install wheel + + pip3 install -r test-requirements.txt + - name: Download MacOS Executable + uses: actions/download-artifact@v2 + with: + name: macos-q + - name: Run Tests on MacOS Executable + run: | + set -e -x + + find ./ -ls + + chmod 755 ./macos-q + + Q_EXECUTABLE=`pwd`/macos-q Q_SKIP_EXECUTABLE_VALIDATION=true ./run-tests.sh -v + + package-mac: + # create-man is not needed, as it's generated inside the brew formula independently + needs: [test-mac] + runs-on: macos-11 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Create homebrew formula and install it + run: | + set -x -e + set -o pipefail + + mkdir brew + + # TODO temp, since template rendering action doesn't work in mac + cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.0-beta/g' > ./brew/q.rb + + brew install --display-times --formula --build-bottle --verbose ./brew/q.rb + brew test ./brew/q.rb + + - name: Create q bottle + run: | + brew bottle --force-core-tap --no-rebuild ./brew/q.rb + - name: Upload Executable + uses: actions/upload-artifact@v1.0.0 + with: + name: q--3.1.0-beta_1.big_sur.bottle.tar.gz + path: ./q--3.1.0-beta_1.big_sur.bottle.tar.gz + +# TODO auto-create PR to main homebrew-core +# git clone https://github.com/harelba/homebrew-core.git +# cd homebrew-core +# +# git checkout -b new-q-version + + test-mac-packaging: + needs: package-mac + runs-on: macos-11 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Download q bottle + uses: actions/download-artifact@v2 + with: + name: q--3.1.0-beta_1.big_sur.bottle.tar.gz + - name: Test the created bottle + run: | + set -x -e + set -o pipefail + + WD=$(pwd) + + pushd /usr/local/Cellar + tar xvfz ${WD}/q--3.1.0-beta_1.big_sur.bottle.tar.gz + popd + + brew link q + + seq 1 100 | q -c 1 "select sum(c1),count(*) from -" -S test.sqlite + + echo "select sum(c1),count(*) from data_stream_stdin" | sqlite3 test.sqlite + + # TODO Windows build/test/package flow is running, but q executable is still not running well, due to pyox+sqlite3 issue + build-windows: + runs-on: windows-latest + if: ${{ false }} + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install MSVC build tools + uses: ilammy/msvc-dev-cmd@v1 + - name: Install Python + uses: actions/setup-python@v2 + with: + python-version: '3.8.10' + architecture: 'x64' + - name: Install pyoxidizer + shell: bash + run: | + set -x -e + + python3 -V + pip3 -V + + pip3 install pyoxidizer + - name: Create Q Executable - Windows + shell: bash + run: | + set -e -x + + # Hack to overcome the fact that apsw doesn't have a registered wheel for Windows + cp requirements-win-x86_64.txt requirements.txt + + pyoxidizer build --release + + export Q_EXECUTABLE=./build/x86_64-pc-windows-msvc/release/install/q + chmod 755 $Q_EXECUTABLE + + seq 1 100 | $Q_EXECUTABLE -c 1 "select sum(c1),count(*) from -" -S test.sqlite + + mkdir -p packages/windows/ + cp $Q_EXECUTABLE packages/windows/win-q.exe + + find ./ -ls + - name: Upload Linux Executable + uses: actions/upload-artifact@v1.0.0 + with: + name: win-q + path: packages/windows/win-q.exe + + test-windows: + needs: build-windows + runs-on: windows-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Fail deliberately - No tests on Windows + continue-on-error: true + run: | + echo "Tests are not actually running on the Windows Executable itself. Only the packaging is later tested" + exit 1 + + package-windows: + needs: [create-man, test-windows] + runs-on: windows-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Install MSVC build tools + uses: ilammy/msvc-dev-cmd@v1 + - name: Install Python + uses: actions/setup-python@v2 + with: + python-version: '3.8.10' + architecture: 'x64' + - name: Install pyoxidizer + shell: bash + run: | + set -x -e + + python3 -V + pip3 -V + + pip3 install pyoxidizer + - name: Create Q MSI - Windows + shell: bash + run: | + set -e -x + + # Hack to overcome the fact that apsw doesn't have a registered wheel for Windows + cp requirements-win-x86_64.txt requirements.txt + + pyoxidizer build --release msi_installer + + find ./ -ls + + export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.0-beta.msi + chmod 755 $Q_MSI + + mkdir -p packages/windows/ + cp $Q_MSI packages/windows/q-text-as-data-3.1.0-beta.msi + + - name: Upload Windows MSI + uses: actions/upload-artifact@v1.0.0 + with: + name: q-text-as-data-3.1.0-beta.msi + path: packages/windows/q-text-as-data-3.1.0-beta.msi + + test-windows-packaging: + needs: package-windows + runs-on: windows-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Download Windows Package + uses: actions/download-artifact@v2 + with: + name: q-text-as-data-3.1.0-beta.msi + - name: Test Install of MSI + continue-on-error: true + shell: powershell + run: | + $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.0-beta.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait + $process.ExitCode + gc msi-install.log + + exit $process.ExitCode + - name: Test Uninstall of MSI + continue-on-error: true + shell: powershell + run: | + $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.0-beta.msi /norestart /quiet" -PassThru -Wait + $process.ExitCode + exit $process.ExitCode + + perform-prerelease: + # We'd like artifacts to be uploaded regardless of tests succeeded or not, + # this is why the dependency here is not on test-X-packaging jobs + needs: [package-linux-deb, package-linux-rpm, package-mac] + runs-on: ubuntu-latest + # TODO Push to master will now pre-release as well, until things stabilize + # if: ${{ github.event_name == 'pull_request' }} + steps: + - name: Download All Artifacts + uses: actions/download-artifact@v2 + with: + path: artifacts/ + - name: Timestamp pre-release + run: | + set -e -x + + echo "Workflow finished at $(date)" >> artifacts/workflow-finish-time.txt + - name: Create pre-release + uses: "marvinpinto/action-automatic-releases@v1.2.1" + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + automatic_release_tag: "latest" + prerelease: true + title: "Next Release Development Build" + files: | + artifacts/**/* + + perform-release: + # TODO Windows is not here so users won't be confused by seeing an MSI (it's still not production-grade, you need to have sqlite3 dll in the path) + needs: [test-mac-packaging, test-deb-packaging, test-rpm-packaging] + runs-on: ubuntu-latest + # Disabled on purpose for now - Changing the beta release to a real one will be done manually until everything stabilizes + # and then this will be reinstated + # if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + if: ${{ false }} + steps: + - name: Download All Artifacts + uses: actions/download-artifact@v2 + with: + path: artifacts/ + - name: Delete Windows Artifacts so they're not part of the release for now + run: | + set -x -e + + echo "Deleting windows artifacts so they're not part of the release - windows is not fully ready" + + set +e + rm -vf artifacts/*.msi + rm -vf artifacts/win-q.exe + set -e + - uses: "marvinpinto/action-automatic-releases@v1.2.1" + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + prerelease: false + files: | + artifacts/**/* diff --git a/.github/workflows/q.rb.brew-formula-template b/.github/workflows/q.rb.brew-formula-template new file mode 100644 index 00000000..c60fb430 --- /dev/null +++ b/.github/workflows/q.rb.brew-formula-template @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +# Formula for q +class Q < Formula + desc "Run SQL directly on CSV or TSV files" + homepage "https://harelba.github.io/q/" + # Building directly from master for now, eventually it will be tag-based so the version tag will be downloaded + url "https://github.com/harelba/q/archive/master.tar.gz" + version "{{ .Q_VERSION }}" + + # Removed for now, until everything is finalized + # sha256 "0844aed6658d0347a299b84bee978c88724d45093e8cbd7b05506ecc0b93c98c" + + license "GPL-3.0-or-later" + revision 1 + + depends_on "pyoxidizer" => :build + depends_on "python@3.8" => :build + depends_on "ronn" => :build + depends_on xcode: ["12.4", :build] + + def install + system "pyoxidizer", "build", "--release" + bin.install "./build/x86_64-apple-darwin/release/install/q" + + system "ronn", "--roff", "--section=1", "doc/USAGE.markdown" + man1.install "doc/USAGE.1" => "q.1" + end + + test do + seq = (1..100).map(&:to_s).join("\n") + output = pipe_output("#{bin}/q -c 1 'select sum(c1) from -'", seq) + assert_equal "5050\n", output + end +end diff --git a/.gitignore b/.gitignore index 2d4ca0f8..eb3a2d28 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,11 @@ generated-site/ benchmark_data.tar.gz _benchmark_data/ q.egg-info/ +.pytest_cache/ +*.qsql +htmlcov/ +*.sqlite +*.tar.gz +.coverage +.DS_Store +*.egg diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e8186f1e..00000000 --- a/.travis.yml +++ /dev/null @@ -1,137 +0,0 @@ -sudo: false - -stages: - - integration - - release - -env: - global: - - CACHE_NAME=${TRAVIS_JOB_NAME} - - -_commands_provider: - - _test: &_test make test - - _lint: &_lint make lint - - _release: &_release make local-release - - _install_requirements: &_install_requirements make dep - - # https://ttcshelbyville.wordpress.com/2012/12/19/disable-remote-differential-compression-form-the-command-line/ - _disable_windows_compression: &_disable_windows_compression "powershell Disable-WindowsOptionalFeature -Online -FeatureName MSRDC-Infrastructure" - - # https://travis-ci.community/t/yarn-network-troubles/333/7 - _disable_windows_defender: &_disable_windows_defender "powershell Set-MpPreference -DisableRealtimeMonitoring \\$true" - - -_steps_provider: - - _test: &_step_test - - install: - - *_install_requirements - before_script: *_lint - script: *_test - - _release: &_step_release - - install: *_install_requirements - script: *_release - - - -jobs: - include: - - - stage: integration - name: py27-macos - os: osx - language: generic - osx_image: xcode7.3 - env: - - PYENV_VERSION=2.7.14 - before_install: source setup-pyenv.sh - <<: *_step_test - cache: - directories: - - ${HOME}/.pyenv_cache - - - stage: integration - name: py36-macos - os: osx - language: generic - osx_image: xcode7.3 - env: - - PYENV_VERSION=3.6.4 - before_install: source setup-pyenv.sh - <<: *_step_test - cache: - directories: - - ${HOME}/.pyenv_cache - - - stage: integration - name: py37-macos - os: osx - language: generic - osx_image: xcode7.3 - env: - - PYENV_VERSION=3.7.3 - before_install: source setup-pyenv.sh - <<: *_step_test - cache: - directories: - - ${HOME}/.pyenv_cache - - - stage: integration - name: py27-linux - language: python - python: "2.7" - <<: *_step_test - - - stage: integration - name: py36-linux - language: python - python: "3.6" - <<: *_step_test - - - stage: integration - name: py37-linux - language: python - dist: xenial - python: "3.7" - <<: *_step_test - - - stage: release - name: macos - os: osx - language: generic - osx_image: xcode7.3 - env: - - PYENV_VERSION=3.7.3 - before_install: source setup-pyenv.sh - <<: *_step_release - cache: - directories: - - ${HOME}/.pyenv_cache - - - stage: release - name: linux - language: python - dist: xenial - python: "3.7" - <<: *_step_release - - - stage: release - name: windows - os: windows - language: shell - env: - - PATH=/c/Python37:/c/Python37/Scripts:$PATH - before_install: - - *_disable_windows_compression - - *_disable_windows_defender - - choco install make - - choco install python --version 3.7.3 - <<: *_step_release diff --git a/Makefile b/Makefile deleted file mode 100644 index e612a54a..00000000 --- a/Makefile +++ /dev/null @@ -1,37 +0,0 @@ -SHELL := /bin/bash - -PROJECT_NAME=$(shell dirname "$0") -ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) - -.PHONY: test help -.DEFAULT_GOAL := ci - -ci: lint test ## Equivelant to 'make lint test' - -help: ## Show this help message. - - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' - -dep: ## Install the dependent libraries. - - pip install -r test-requirements.txt - pip install -e . - -lint: dep ## Run lint validations. - - flake8 q/ --count --select=E901,E999,F821,F822,F823 --show-source --statistics - -test: dep ## Run the unit tests. - - test/test-all - ## TODO Bring back pytest - ## py.test -rs -c pytest.ini -s -v q/tests/suite.py --rootdir . - -release: ## Run release - pip install py-ci - pyci release --no-wheel-publish --wheel-universal - -local-release: - pip install py-ci - ./do-manual-release.sh - diff --git a/QSQL-NOTES.md b/QSQL-NOTES.md new file mode 100644 index 00000000..1bb1e430 --- /dev/null +++ b/QSQL-NOTES.md @@ -0,0 +1,99 @@ + +# New beta version 3.1.0-beta is available, which contains the following major changes/additions: + +The following sections provide the details of each of the new functionality in this major version. + +## Automatic caching of data files +Speeding up subsequent reads from the same file by several orders of magnitude by automatically creating an immutable cache file for each tabular text file. + +For example, reading a 0.9GB file with 1M rows and 100 columns without caching takes ~50 seconds. When the cache exists, querying the same file will take less than 1 second. Obviously, the cache can be used in order to perform any query and not just the original query that was used for creating the cache. + +When caching is enabled, the cache is created on the first read of a file, and used automatically when reading it in other queries. A separate cache is being created for each file that is being used, allowing reuse in multiple use-cases. For example, if two csv files each have their own cache file from previous queries, then running a query that JOINs these two files would use the caches as well (without loading the data into memory), speeding it up considerably. + +The tradeoff for using cache files is disk space - A new file with the postfix `.qsql` is created and automatically detected and used in queries as needed. This file is essentially a standard sqlite file (with some additional metadata tables), and can be used directly by any standard sqlite tool later on. + +For backward compatibility, the caching option is not turned on by default. You'd need to use the new `-C ` to determine the caching mode. Available options are as follows: +* `none` - The default, provides the original q's behaviour without caching +* `read` - Only reads cache files if they exists, but doesn't create any new ones +* `readwrite` - Uses cache files if they exists, or creates new ones if they don't. Writing new cache files doesn't interfere with the actual run of the query, so this option can be used in order to dynamically create the cache files if they don't exist + +Content signatures are being stored in the caches, allowing to detect a state where the original file has been modified after the cache has been created. q will issue an error if this happens. For now, just delete the `.qsql` file in order to recreate the cache. In the future, another `-C` option would be added to automatically recreate the updated cache in such a case. Notice that the content signature contains various q flags which affect parsing, so make sure to use the same parameters to q when performing the queries, otherwise q will issue an error. + +Notice that when running with `-A`, the cache is not written, even when `-C` is set to `readwrite`. This is due to the fact that `-A` does not really read the entire content of the files. For now, if you'd like to just prepare the cache without running the actual query, you can run it with a `select 1` query or something, although in terms of speed it will mostly not matter. If there's demand for adding an explicit `prepare caches only` option, I'll consider adding it. + +## Revamped `.qrc` mechanism +Adding `-C ` for each query can be cumbersome at some point, so the `.qrc` file has been revamped for easy addition of default parameters. + +For example, if you want the caching behaviour to be `read` all the time, then just add a `~/.qrc` file, and set the following in it: +``` +[options] +caching_mode=read +``` + +All other flags and parameters to q can be controlled by the `.qrc` file. To see the proper names for each parameter, run `q --dump-defaults` and it will dump a default `.qrc` file that contains all parameters to `stdout`. + +### Direct querying of standard sqlite databases +q now supports direct querying of standard sqlite databases. The syntax for accessing a table inside an sqlite database is `:::`. A query can contain any mix of sqlite files, qsql files or regular delimited files. + +For example, this command joins two tables from two separate sqlite databases: +``` +$ q "select count(*) from mydatabase1.sqlite:::mytable1 a left join mydatabase2.sqlite:::mytable2 b on (a.c1 = b.c1)" +``` + +Running queries on sqlite databases does not usually entail loading the data into memory. Databases are attached to a virtual database and queried directly from disk. This means that querying speed is practically identical to standard sqlite access. This is also true when multiple sqlite databases are used in a single query. The same mechanism is being used by q whenever it uses a qsql file (either directly or as a cache of a delimited fild). + +sqlite itself does have a pre-compiled limit of the number of databases that can be attached simultanously. If this limit is reached, then q will attach as many databases as possible, and then continue processing by loading additional tables into memory in order to execute the query. The standard limit in sqlite3 (unless compiled specifically with another limit) is 10 databases. This allows q to access as many as 8 user databases without having to load any data into memory (2 databases are always used for q's internal logic). Using more databases in a single query than this pre-compiled sqlite limit would slow things down, since some of the data would go into memory, but the query should still provide correct results. + +Whenever the sqlite database file contains only one table, the table name part can be ommitted, and the user can specify only the sqlite-filename as the table name. For example, querying an sqlite database `mydatabase.sqlite` that only has one table `mytable` is possible with `q "SELECT ... FROM mydatabase.sqlite"`. There's no need to specify the table name in this case. + +Since `.qsql` files are also standard sqlite files, they can be queried directly as well. This allows the user to actually delete the original CSV file and use the caches as if they were the original files. For example: + +``` +$ q "select count(*) from myfile.csv.qsql" +``` + +Notice that there's no need to write the `:::` as part of the table name, since `qsql` files that are created as caches contain only one table (e.g. the table matching the original file). + +Running a query that uses an sqlite/qsql database without specifying a table name will fail if there is more than one table in the database, showing the list of existing tables. This can be used in order to detect which tables exist in the database without resorting to other tools. For example: +``` +$ q "select * from chinook.db:::blah" +Table blah could not be found in sqlite file chinook.db . Existing table names: albums,sqlite_sequence,artists,customers,employees,genres,invoices,invoice_items,media_types,playlists,playlist_track,tracks,sqlite_stat1 +``` + +## Storing source data into a disk database +The `-S` option (`--save-db-to-disk`) has been modified to match the new capabilities. It works with all types of input tables/files, and writes the output database as a standard sqlite database. I've considered making the output a multi-table `qsql` file (e.g. with the additional metadata that q uses), but some things still need to be ironed out in order to make these qsql files work seamlessly with all other aspects of q. This will probably happen in the next version. + +This database can be accessed directly by q later on, by providing `:::` as the table name in the query. The table names that are chosen match the original file names, but go through the following process: +* The names are normalised in order to by compatible with sqlite restrictions (e.g. `x.csv` is normalised to `x_dot_csv`) +* duplicate table names are de-deduped by adding `_` to their names (e.g. two different csv files in separate folders which both have the name `companies` will be written to the file as `companies` and `companies_2`) + +This table-name normalisation happens also inside `.qsql` cache files, but in most cases there won't be any need to know these table names, since q automatically detects table names for databases which have a single-table. + +### File-concatenation and wildcard-matching features - Breaking change +File concatenation using '+' has been removed in this version, which is a breaking change. + +This was a controversial feature anyway, and can be done using standard SQL relatively easily. It also complicated the caching implementation significantly, and it seemed that it was not worth it. If there's demand for bringing this feature back, please write to me and I'll consider re-adding it. + +If you have a case of using file concatenation, you can use the following SQL instead: +``` +# Instead of writing +$ q "select * from myfile1+myfile2" +# Use the following: +$ q "select * from (select * from myfile1 UNION ALL select * from myfile2)" +``` + +This will provide the same results, but the error checking is a bit less robust, so be mindful on whether you're performing the right query on the right files. + +Conceptually, this is similar to wildcard matching (e.g. `select * from myfolder/myfile*`), but I have decided to leave wildcard-matching intact, since it seems to be a more common use-case. Cache creation and use is limited for now when using wildcards. Use the same method as described above for file concatenation if you wanna make sure that caches are being used. + +After this version is fully stabilised, I'll make more efforts to consolidate wildcard (and perhaps concatenation) to fully utilise caching seamlessly. + +## Code runs only on python 3 +Removed the dual py2/py3 support. Since q is packaged as a self-contained executable, along with python 3.8 itself, then this is not needed anymore. + +Users which for some reason still use q's main source code file directly and use python 2 would need to stay with the latest 2.0.19 release. In some next version, q's code structure is going to change significantly anyway in order to become a standard python module, so using the main source code file directly would not be possible. + +If you are such a user, and this decision hurts you considerably, please ping me. + + + diff --git a/README.markdown b/README.markdown index 5d9f232d..c5cd183e 100644 --- a/README.markdown +++ b/README.markdown @@ -7,11 +7,42 @@ q treats ordinary files as database tables, and supports all SQL constructs, suc q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/) or [https://q.textasdata.wiki](https://q.textasdata.wiki) It contains everything you need to download and use q immediately. +## New beta version `3.1.0-beta` is available, which contains the following major changes/additions: + +This is the list of new/changed functionality in this version: + +* **Automatic Immutable Caching** - Automatic caching of data files (into `.qsql` files), with huge speedups for medium/large files. Enabled through `-C readwrite` or `-C read` +* **Direct querying of standard sqlite databases** - Just use it as a table name in the query. Format is `select ... from :::`, or just `` if the database contains only one table. Multiple separate sqlite databases are fully supported in the same query. +* **Direct querying of the `qsql` cache files** - The user can query directly from the `qsql` files, removing the need for the original files. Just use `select ... from .qsql`. Please wait until the non-beta version is out before thinking about deleting any of your original files... +* **Revamped `.qrc` mechanism** - allows opting-in to caching without specifying it in every query. By default, caching is **disabled**, for backward compatibility and for finding usability issues. +* **Only python3 is supported from now on** - Shouldn't be an issue, since q is a self-contained binary executable which has its own python embedded in it +* **Save-to-db is now reusable for queries** - `--save-db-to-disk` option (`-S`) has been enhanced to match the new capabilities. You can query the resulting file directly through q, using the method mentioned above (it's just a standard sqlite database). + +For details on the changes and the new usage, see [here](QSQL-NOTES.md) + +The version is still in early testing, for two reasons: + +* Completely new build and packaging flow - Using [pyoxidizer](https://github.com/indygreg/PyOxidizer) +* It's a very large change in functionality, which might surface issues, new and backward compatibility ones + +**Please don't use it for production, until the final non-beta version is out** + +If you're testing it out, I'd be more than happy to get any feedback. Please write all your feedback in [this issue](https://github.com/harelba/q/issues/281), instead of opening separate issues. That would really help me with managing this. + ## Installation. -Extremely simple. +**This will currently install the latest standard version `2.0.19`. See below if you want to download the `3.1.0-beta` version** + +The current production version `2.0.19` installation is extremely simple. Instructions for all OSs are [here](http://harelba.github.io/q/#installation). +### Installation of the new beta release +For now, only Linux RPM, DEB and Mac OSX are supported. Almost made the Windows version work, but there's some issue there, and the windows executable requires some external dependencies which I'm trying to eliminate. + +The beta OSX version is not in `brew` yet, you'll need to take the `macos-q` executable and put it in your filesystem. DEB/RPM are working well, although for some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. + +Download the relevant files directly from [Links Coming Soon](TBD). + ## Examples ``` diff --git a/RELEASE.md b/RELEASE.md deleted file mode 100644 index 9ba0f02c..00000000 --- a/RELEASE.md +++ /dev/null @@ -1,27 +0,0 @@ - -# Releasing a new version -Currently, there are some manual steps needed in order to release a new version: - -* Make sure that you're in `master` -* Change the version in the following three files: `bin/q.py`, `setup.py` and `do-manual-release.sh` and commit them to the branch -* Commit the files locally -* Create a new tag `git tag x.y.z` -* Create a new release in the github UI based on the new tag -* `git push --tags origin master` - -The push will trigger a build/release, and will push the artifacts to the new release as assets. - -Now, create the relevant rpm and deb packages: - -* Run `./package-release `. In most cases, both will be the same. -* This will download all the released artifacts for the release into `./packages/`, and will create an rpm and a deb -* Test that the two new artifacts (inside `./packages/`) -* Run `./upload-release ` - -The rpm and deb will be added to the assets of the release - -Update the website to match the new version. - -# Requirements -Requires a logged in github-cli (`gh`) to work - diff --git a/benchmark-config.sh b/benchmark-config.sh new file mode 100644 index 00000000..8606b926 --- /dev/null +++ b/benchmark-config.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +BENCHMARK_PYTHON_VERSIONS=(3.6.4 3.8.5) diff --git a/bin/q.py b/bin/q.py index 59ecc882..1ea957d4 100755 --- a/bin/q.py +++ b/bin/q.py @@ -1,6 +1,7 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- -# Copyright (C) 2012-2020 Harel Ben-Attia +# Copyright (C) 2012-2021 Harel Ben-Attia # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,15 +16,14 @@ # # # Name : q (With respect to The Q Continuum) -# Author : Harel Ben Attia - harelba@gmail.com, harelba @ github, @harelba on twitter -# Requires : python with sqlite3 (standard in python>=2.6) +# Author : Harel Ben-Attia - harelba@gmail.com, harelba @ github, @harelba on twitter # # # q allows performing SQL-like statements on tabular text data. # # Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line. # -# Full Documentation and details in http://harelba.github.io/q/ +# Full Documentation and details in http://harelba.github.io/q/ # # Run with --help for command line details # @@ -32,18 +32,18 @@ from __future__ import print_function from collections import OrderedDict +from sqlite3.dbapi2 import OperationalError +from uuid import uuid4 -q_version = '2.0.19' +q_version = '3.1.0-beta' -__all__ = [ 'QTextAsData' ] +#__all__ = [ 'QTextAsData' ] import os import sys import sqlite3 -import gzip import glob from optparse import OptionParser,OptionGroup -import traceback as tb import codecs import locale import time @@ -51,17 +51,36 @@ from six.moves import configparser, range, filter import traceback import csv -import hashlib import uuid import math import six import io +import json +import sqlitebck +import datetime +import hashlib + +if six.PY2: + assert False, 'Python 2 is not longer supported by q' -if six.PY3: - long = int - unicode = six.text_type +long = int +unicode = six.text_type -DEBUG = False +DEBUG = bool(os.environ.get('Q_DEBUG', None)) or '-V' in sys.argv + +if DEBUG: + def xprint(*args,**kwargs): + print(datetime.datetime.utcnow().isoformat()," DEBUG ",*args,file=sys.stderr,**kwargs) + + def iprint(*args,**kwargs): + print(datetime.datetime.utcnow().isoformat()," INFO ",*args,file=sys.stderr,**kwargs) + + def sqlprint(*args,**kwargs): + print(datetime.datetime.utcnow().isoformat(), " SQL ", *args, file=sys.stderr, **kwargs) +else: + def xprint(*args,**kwargs): pass + def iprint(*args,**kwargs): pass + def sqlprint(*args,**kwargs): pass def get_stdout_encoding(encoding_override=None): if encoding_override is not None and encoding_override != 'none': @@ -102,6 +121,16 @@ def regexp(regular_expression, data): else: return False +def regexp_extract(regular_expression, data,group_number): + if data is not None: + if not isinstance(data, str) and not isinstance(data, unicode): + data = str(data) + m = re.search(regular_expression, data) + if m is not None: + return m.groups()[group_number] + else: + return False + def md5(data,encoding): m = hashlib.md5() m.update(six.text_type(data).encode(encoding)) @@ -129,6 +158,7 @@ class StrictPercentile(object): def __init__(self): self.values = [] self.p = None + def step(self,value,p): if self.p is None: self.p = p @@ -211,6 +241,11 @@ def __init__(self,func_type,name,usage,description,func_or_obj,param_count): "Find regexp in string expression. Returns 1 if found or 0 if not", regexp, 2), + UserFunctionDef(FunctionType.REGULAR, + "regexp_extract","regexp_extract(,,group_number) = ", + "Get regexp capture group content", + regexp_extract, + 3), UserFunctionDef(FunctionType.REGULAR, "sha","sha(,,) = ", "Calculate sha of some expression. Algorithm can be one of 1,224,256,384,512. For now encoding must be manually provided. Will use the input encoding automatically in the future.", @@ -264,52 +299,205 @@ def __init__(self,query_column_names,results): self.query_column_names = query_column_names self.results = results + def __str__(self): + return "Sqlite3DBResults" % (len(self.results),str(self.query_column_names)) + __repr__ = __str__ + +def get_sqlite_type_affinity(sqlite_type): + sqlite_type = sqlite_type.upper() + if 'INT' in sqlite_type: + return 'INTEGER' + elif 'CHAR' in sqlite_type or 'TEXT' in sqlite_type or 'CLOB' in sqlite_type: + return 'TEXT' + elif 'BLOB' in sqlite_type: + return 'BLOB' + elif 'REAL' in sqlite_type or 'FLOA' in sqlite_type or 'DOUB' in sqlite_type: + return 'REAL' + else: + return 'NUMERIC' + +def sqlite_type_to_python_type(sqlite_type): + SQLITE_AFFINITY_TO_PYTHON_TYPE_NAMES = { + 'INTEGER': long, + 'TEXT': unicode, + 'BLOB': bytes, + 'REAL': float, + 'NUMERIC': float + } + return SQLITE_AFFINITY_TO_PYTHON_TYPE_NAMES[get_sqlite_type_affinity(sqlite_type)] + + class Sqlite3DB(object): + # TODO Add metadata table with qsql file version - def __init__(self, show_sql=SHOW_SQL): + QCATALOG_TABLE_NAME = '_qcatalog' + NUMERIC_COLUMN_TYPES = {int, long, float} + PYTHON_TO_SQLITE_TYPE_NAMES = { str: 'TEXT', int: 'INT', long : 'INT' , float: 'FLOAT', None: 'TEXT' } + + + def __str__(self): + return "Sqlite3DB" % self.sqlite_db_url + __repr__ = __str__ + + def __init__(self, db_id, sqlite_db_url, sqlite_db_filename, create_qcatalog, show_sql=SHOW_SQL): self.show_sql = show_sql - self.conn = sqlite3.connect(':memory:') + self.create_qcatalog = create_qcatalog + + self.db_id = db_id + # TODO Is this needed anymore? + self.sqlite_db_filename = sqlite_db_filename + self.sqlite_db_url = sqlite_db_url + self.conn = sqlite3.connect(self.sqlite_db_url, uri=True) self.last_temp_table_id = 10000 self.cursor = self.conn.cursor() - self.type_names = { - str: 'TEXT', int: 'INT', long : 'INT' , float: 'FLOAT', None: 'TEXT'} - self.numeric_column_types = set([int, long, float]) self.add_user_functions() - def done(self): + if create_qcatalog: + self.create_qcatalog_table() + else: + xprint('Not creating qcatalog for db_id %s' % db_id) + + def retrieve_all_table_names(self): + return [x[0] for x in self.execute_and_fetch("select tbl_name from sqlite_master where type='table'").results] + + def get_sqlite_table_info(self,table_name): + return self.execute_and_fetch('PRAGMA table_info(%s)' % table_name).results + + def get_sqlite_database_list(self): + return self.execute_and_fetch('pragma database_list').results + + def find_new_table_name(self,planned_table_name): + existing_table_names = self.retrieve_all_table_names() + + possible_indices = range(1,1000) + + for index in possible_indices: + if index == 1: + suffix = '' + else: + suffix = '_%s' % index + + table_name_attempt = '%s%s' % (planned_table_name,suffix) + + if table_name_attempt not in existing_table_names: + xprint("Found free table name %s in db %s for planned table name %s" % (table_name_attempt,self.db_id,planned_table_name)) + return table_name_attempt + + # TODO Add test for this + raise Exception('Cannot find free table name in db %s for planned table name %s' % (self.db_id,planned_table_name)) + + def create_qcatalog_table(self): + if not self.qcatalog_table_exists(): + xprint("qcatalog table does not exist. Creating it") + r = self.conn.execute("""CREATE TABLE %s ( + qcatalog_entry_id text not null primary key, + content_signature_key text, + temp_table_name text, + content_signature text, + creation_time text, + source_type text, + source text)""" % self.QCATALOG_TABLE_NAME).fetchall() + else: + xprint("qcatalog table already exists. No need to create it") + + def qcatalog_table_exists(self): + return sqlite_table_exists(self.conn,self.QCATALOG_TABLE_NAME) + + def calculate_content_signature_key(self,content_signature): + assert type(content_signature) == OrderedDict + pp = json.dumps(content_signature,sort_keys=True) + xprint("Calculating content signature for:",pp,six.b(pp)) + return hashlib.sha1(six.b(pp)).hexdigest() + + def add_to_qcatalog_table(self, temp_table_name, content_signature, creation_time,source_type, source): + assert source is not None + assert source_type is not None + content_signature_key = self.calculate_content_signature_key(content_signature) + xprint("db_id: %s Adding to qcatalog table: %s. Calculated signature key %s" % (self.db_id, temp_table_name,content_signature_key)) + r = self.execute_and_fetch( + 'INSERT INTO %s (qcatalog_entry_id,content_signature_key, temp_table_name,content_signature,creation_time,source_type,source) VALUES (?,?,?,?,?,?,?)' % self.QCATALOG_TABLE_NAME, + (str(uuid4()),content_signature_key,temp_table_name,json.dumps(content_signature),creation_time,source_type,source)) + # Ensure transaction is completed self.conn.commit() - def store_db_to_disk_standard(self,sqlite_db_filename,table_names_mapping): - new_db = sqlite3.connect(sqlite_db_filename,isolation_level=None) - c = new_db.cursor() - for s in self.conn.iterdump(): - c.execute(s) - results = c.fetchall() - for source_filename_str,tn in six.iteritems(table_names_mapping): - c.execute('alter table `%s` rename to `%s`' % (tn, source_filename_str)) - new_db.close() - - def store_db_to_disk_fast(self,sqlite_db_filename,table_names_mapping): + def get_from_qcatalog(self, content_signature): + content_signature_key = self.calculate_content_signature_key(content_signature) + xprint("Finding table in db_id %s that matches content signature key %s" % (self.db_id,content_signature_key)) + + field_names = ["content_signature_key", "temp_table_name", "content_signature", "creation_time","source_type","source","qcatalog_entry_id"] + + q = "SELECT %s FROM %s where content_signature_key = ?" % (",".join(field_names),self.QCATALOG_TABLE_NAME) + r = self.execute_and_fetch(q,(content_signature_key,)) + + if r is None: + return None + + if len(r.results) == 0: + return None + + if len(r.results) > 1: + raise Exception("Bug - Exactly one result should have been provided: %s" % str(r.results)) + + d = dict(zip(field_names,r.results[0])) + return d + + def get_from_qcatalog_using_table_name(self, temp_table_name): + xprint("getting from qcatalog using table name") + + field_names = ["content_signature", "temp_table_name","creation_time","source_type","source","content_signature_key","qcatalog_entry_id"] + + q = "SELECT %s FROM %s where temp_table_name = ?" % (",".join(field_names),self.QCATALOG_TABLE_NAME) + xprint("Query from qcatalog %s params %s" % (q,str(temp_table_name,))) + r = self.execute_and_fetch(q,(temp_table_name,)) + xprint("results: ",r.results) + + if r is None: + return None + + if len(r.results) == 0: + return None + + if len(r.results) > 1: + raise Exception("Bug - Exactly one result should have been provided: %s" % str(r.results)) + + d = dict(zip(field_names,r.results[0])) + # content_signature should be the first in the list of field_names + cs = OrderedDict(json.loads(r.results[0][0])) + if self.calculate_content_signature_key(cs) != d['content_signature_key']: + raise Exception('Table contains an invalid entry - content signature key is not matching the actual content signature') + return d + + def get_all_from_qcatalog(self): + xprint("getting from qcatalog using table name") + + field_names = ["temp_table_name", "content_signature", "creation_time","source_type","source","qcatalog_entry_id"] + + q = "SELECT %s FROM %s" % (",".join(field_names),self.QCATALOG_TABLE_NAME) + xprint("Query from qcatalog %s" % q) + r = self.execute_and_fetch(q) + + if r is None: + return None + + def convert(res): + d = dict(zip(field_names, res)) + cs = OrderedDict(json.loads(res[1])) + d['content_signature_key'] = self.calculate_content_signature_key(cs) + return d + + rr = [convert(r) for r in r.results] + + return rr + + def done(self): + xprint("Closing database %s" % self.db_id) try: - import sqlitebck - except ImportError as e: - msg = "sqlitebck python module cannot be found - fast store to disk cannot be performed. Note that for now, sqlitebck is not packaged as part of q. In order to use the fast method, you need to manually `pip install sqlitebck` into your python environment. We obviously consider this as a bug and it will be fixed once proper packaging will be done, making the fast method the standard one." - raise MissingSqliteBckModuleException(msg) - - new_db = sqlite3.connect(sqlite_db_filename) - sqlitebck.copy(self.conn,new_db) - c = new_db.cursor() - for source_filename_str,tn in table_names_mapping.iteritems(): - c.execute('alter table `%s` rename to `%s`' % (tn, source_filename_str)) - new_db.close() - - def store_db_to_disk(self,sqlite_db_filename,table_names_mapping,method='standard'): - if method == 'standard': - self.store_db_to_disk_standard(sqlite_db_filename,table_names_mapping) - elif method == 'fast': - self.store_db_to_disk_fast(sqlite_db_filename,table_names_mapping) - else: - raise ValueError('Unknown store-db-to-disk method %s' % method) + self.conn.commit() + self.conn.close() + xprint("Database %s closed" % self.db_id) + except Exception as e: + xprint("Could not close database %s" % self.db_id) + raise def add_user_functions(self): for udf in user_functions: @@ -321,75 +509,57 @@ def add_user_functions(self): raise Exception("Invalid user function definition %s" % str(udf)) def is_numeric_type(self, column_type): - return column_type in self.numeric_column_types + return column_type in Sqlite3DB.NUMERIC_COLUMN_TYPES def update_many(self, sql, params): try: - if self.show_sql: - print(sql, " params: " + str(params)) + sqlprint(sql, " params: " + str(params)) self.cursor.executemany(sql, params) + _ = self.cursor.fetchall() finally: pass # cursor.close() - def execute_and_fetch(self, q): + def execute_and_fetch(self, q,params = None): try: - if self.show_sql: - print(repr(q)) - self.cursor.execute(q) - if self.cursor.description is not None: - # we decode the column names, so they can be encoded to any output format later on - if six.PY2: - query_column_names = [unicode(c[0],'utf-8') for c in self.cursor.description] + try: + if self.show_sql: + print(repr(q)) + if params is None: + r = self.cursor.execute(q) else: + r = self.cursor.execute(q,params) + if self.cursor.description is not None: + # we decode the column names, so they can be encoded to any output format later on query_column_names = [c[0] for c in self.cursor.description] - else: - query_column_names = None - result = self.cursor.fetchall() - finally: - pass # cursor.close() + else: + query_column_names = None + result = self.cursor.fetchall() + finally: + pass # cursor.close() + except OperationalError as e: + raise SqliteOperationalErrorException("Failed executing sqlite query %s with params %s . error: %s" % (q,params,str(e)),e) return Sqlite3DBResults(query_column_names,result) def _get_as_list_str(self, l): return ",".join(['"%s"' % x.replace('"', '""') for x in l]) - def _get_col_values_as_list_str(self, col_vals, col_types): - result = [] - for col_val, col_type in zip(col_vals, col_types): - if col_val == '' and col_type is not str: - col_val = "null" - else: - if col_val is not None: - if "'" in col_val: - col_val = col_val.replace("'", "''") - col_val = "'" + col_val + "'" - else: - col_val = "null" - - result.append(col_val) - return ",".join(result) - def generate_insert_row(self, table_name, column_names): col_names_str = self._get_as_list_str(column_names) question_marks = ", ".join(["?" for i in range(0, len(column_names))]) return 'INSERT INTO %s (%s) VALUES (%s)' % (table_name, col_names_str, question_marks) - def generate_begin_transaction(self): - return "BEGIN TRANSACTION" - - def generate_end_transaction(self): - return "COMMIT" - # Get a list of column names so order will be preserved (Could have used OrderedDict, but # then we would need python 2.7) def generate_create_table(self, table_name, column_names, column_dict): # Convert dict from python types to db types column_name_to_db_type = dict( - (n, self.type_names[t]) for n, t in six.iteritems(column_dict)) + (n, Sqlite3DB.PYTHON_TO_SQLITE_TYPE_NAMES[t]) for n, t in six.iteritems(column_dict)) column_defs = ','.join(['"%s" %s' % ( n.replace('"', '""'), column_name_to_db_type[n]) for n in column_names]) return 'CREATE TABLE %s (%s)' % (table_name, column_defs) def generate_temp_table_name(self): + # WTF - From my own past mutable-self self.last_temp_table_id += 1 tn = "temp_table_%s" % self.last_temp_table_id return tn @@ -400,6 +570,35 @@ def generate_drop_table(self, table_name): def drop_table(self, table_name): return self.execute_and_fetch(self.generate_drop_table(table_name)) + def attach_and_copy_table(self, from_db, relevant_table,stop_after_analysis): + xprint("Attaching %s into db %s and copying table %s into it" % (from_db,self,relevant_table)) + temp_db_id = 'temp_db_id' + q = "attach '%s' as %s" % (from_db.sqlite_db_url,temp_db_id) + xprint("Attach query: %s" % q) + c = self.execute_and_fetch(q) + + new_temp_table_name = 'temp_table_%s' % (self.last_temp_table_id + 1) + fully_qualified_table_name = '%s.%s' % (temp_db_id,relevant_table) + + if stop_after_analysis: + limit = ' limit 100' + else: + limit = '' + + copy_query = 'create table %s as select * from %s %s' % (new_temp_table_name,fully_qualified_table_name,limit) + copy_results = self.execute_and_fetch(copy_query) + xprint("Copied %s.%s into %s in db_id %s. Results %s" % (temp_db_id,relevant_table,new_temp_table_name,self.db_id,copy_results)) + self.last_temp_table_id += 1 + + xprint("Copied table into %s. Detaching db that was attached temporarily" % self.db_id) + + q = "detach database %s" % temp_db_id + xprint("detach query: %s" % q) + c = self.execute_and_fetch(q) + xprint(c) + return new_temp_table_name + + class CouldNotConvertStringToNumericValueException(Exception): def __init__(self, msg): @@ -408,15 +607,62 @@ def __init__(self, msg): def __str(self): return repr(self.msg) -class ColumnMaxLengthLimitExceededException(Exception): +class SqliteOperationalErrorException(Exception): - def __init__(self, msg): + def __init__(self, msg,original_error): self.msg = msg + self.original_error = original_error def __str(self): - return repr(self.msg) + return repr(self.msg) + "//" + repr(self.original_error) + +class IncorrectDefaultValueException(Exception): + + def __init__(self, option_type,option,actual_value): + self.option_type = option_type + self.option = option + self.actual_value = actual_value -class MissingSqliteBckModuleException(Exception): + def __str__(self): + return repr(self) + +class NonExistentTableNameInQsql(Exception): + + def __init__(self, qsql_filename,table_name,existing_table_names): + self.qsql_filename = qsql_filename + self.table_name = table_name + self.existing_table_names = existing_table_names + +class NonExistentTableNameInSqlite(Exception): + + def __init__(self, qsql_filename,table_name,existing_table_names): + self.qsql_filename = qsql_filename + self.table_name = table_name + self.existing_table_names = existing_table_names + +class TooManyTablesInQsqlException(Exception): + + def __init__(self, qsql_filename,existing_table_names): + self.qsql_filename = qsql_filename + self.existing_table_names = existing_table_names + +class NoTableInQsqlExcption(Exception): + + def __init__(self, qsql_filename): + self.qsql_filename = qsql_filename + +class TooManyTablesInSqliteException(Exception): + + def __init__(self, qsql_filename,existing_table_names): + self.qsql_filename = qsql_filename + self.existing_table_names = existing_table_names + +class NoTablesInSqliteException(Exception): + + def __init__(self, sqlite_filename): + self.sqlite_filename = sqlite_filename + +class ColumnMaxLengthLimitExceededException(Exception): def __init__(self, msg): self.msg = msg @@ -424,7 +670,6 @@ def __init__(self, msg): def __str(self): return repr(self.msg) - class CouldNotParseInputException(Exception): def __init__(self, msg): @@ -450,7 +695,7 @@ def __str(self): return repr(self.msg) -class CannotUnzipStdInException(Exception): +class CannotUnzipDataStreamException(Exception): def __init__(self): pass @@ -460,11 +705,6 @@ class UniversalNewlinesExistException(Exception): def __init__(self): pass -class UnprovidedStdInException(Exception): - - def __init__(self): - pass - class EmptyDataException(Exception): def __init__(self): @@ -475,6 +715,15 @@ class MissingHeaderException(Exception): def __init__(self,msg): self.msg = msg +class InvalidQueryException(Exception): + + def __init__(self,msg): + self.msg = msg + +class TooManyAttachedDatabasesException(Exception): + + def __init__(self,msg): + self.msg = msg class FileNotFoundException(Exception): @@ -484,8 +733,7 @@ def __init__(self, msg): def __str(self): return repr(self.msg) - -class ColumnCountMismatchException(Exception): +class UnknownFileTypeException(Exception): def __init__(self, msg): self.msg = msg @@ -493,36 +741,81 @@ def __init__(self, msg): def __str(self): return repr(self.msg) + +class ColumnCountMismatchException(Exception): + + def __init__(self, msg): + self.msg = msg + +class ContentSignatureNotFoundException(Exception): + + def __init__(self, msg): + self.msg = msg + class StrictModeColumnCountMismatchException(Exception): - def __init__(self,expected_col_count,actual_col_count): + def __init__(self,atomic_fn, expected_col_count,actual_col_count,lines_read): + self.atomic_fn = atomic_fn self.expected_col_count = expected_col_count self.actual_col_count = actual_col_count + self.lines_read = lines_read class FluffyModeColumnCountMismatchException(Exception): - def __init__(self,expected_col_count,actual_col_count): + def __init__(self,atomic_fn, expected_col_count,actual_col_count,lines_read): + self.atomic_fn = atomic_fn self.expected_col_count = expected_col_count self.actual_col_count = actual_col_count + self.lines_read = lines_read + +class ContentSignatureDiffersException(Exception): + + def __init__(self,original_filename, other_filename, filenames_str,key,source_value,signature_value): + self.original_filename = original_filename + self.other_filename = other_filename + self.filenames_str = filenames_str + self.key = key + self.source_value = source_value + self.signature_value = signature_value + + +class ContentSignatureDataDiffersException(Exception): + + def __init__(self,msg): + self.msg = msg + + +class InvalidQSqliteFileException(Exception): + + def __init__(self,msg): + self.msg = msg + + +class MaximumSourceFilesExceededException(Exception): + + def __init__(self,msg): + self.msg = msg + # Simplistic Sql "parsing" class... We'll eventually require a real SQL parser which will provide us with a parse tree # # A "qtable" is a filename which behaves like an SQL table... - - class Sql(object): - def __init__(self, sql): + def __init__(self, sql, data_streams): # Currently supports only standard SELECT statements # Holds original SQL self.sql = sql # Holds sql parts self.sql_parts = sql.split() + self.data_streams = data_streams + + self.qtable_metadata_dict = OrderedDict() # Set of qtable names - self.qtable_names = set() + self.qtable_names = [] # Dict from qtable names to their positions in sql_parts. Value here is a *list* of positions, # since it is possible that the same qtable_name (file) is referenced in multiple positions # and we don't want the database table to be recreated for each @@ -544,7 +837,7 @@ def __init__(self, sql): # and there is nothing after it, if idx == len(self.sql_parts) - 1: # Just fail - raise Exception( + raise InvalidQueryException( 'FROM/JOIN is missing a table name after it') qtable_name = self.sql_parts[idx + 1] @@ -559,39 +852,59 @@ def __init__(self, sql): qtable_name = qtable_name[:qtable_name.index(')')] self.sql_parts[idx + 1] = qtable_name - self.qtable_names.add(qtable_name) + if qtable_name[0] != '(': + normalized_qtable_name = self.normalize_qtable_name(qtable_name) + xprint("Normalized qtable name for %s is %s" % (qtable_name,normalized_qtable_name)) + self.qtable_names += [normalized_qtable_name] - if qtable_name not in self.qtable_name_positions.keys(): - self.qtable_name_positions[qtable_name] = [] + if normalized_qtable_name not in self.qtable_name_positions.keys(): + self.qtable_name_positions[normalized_qtable_name] = [] - self.qtable_name_positions[qtable_name].append(idx + 1) - idx += 2 + self.qtable_name_positions[normalized_qtable_name].append(idx + 1) + self.sql_parts[idx + 1] = normalized_qtable_name + idx += 2 + else: + idx += 1 else: idx += 1 + xprint("Final sql parts: %s" % self.sql_parts) + + def normalize_qtable_name(self,qtable_name): + if self.data_streams.is_data_stream(qtable_name): + return qtable_name + + if ':::' in qtable_name: + qsql_filename, table_name = qtable_name.split(":::", 1) + return '%s:::%s' % (os.path.realpath(os.path.abspath(qsql_filename)),table_name) + else: + return os.path.realpath(os.path.abspath(qtable_name)) def set_effective_table_name(self, qtable_name, effective_table_name): - if qtable_name not in self.qtable_names: - raise Exception("Unknown qtable %s" % qtable_name) if qtable_name in self.qtable_name_effective_table_names.keys(): - raise Exception( - "Already set effective table name for qtable %s" % qtable_name) + if self.qtable_name_effective_table_names[qtable_name] != effective_table_name: + raise Exception( + "Already set effective table name for qtable %s. Trying to change the effective table name from %s to %s" % + (qtable_name,self.qtable_name_effective_table_names[qtable_name],effective_table_name)) + xprint("Setting effective table name for %s - effective table name is set to %s" % (qtable_name,effective_table_name)) self.qtable_name_effective_table_names[ qtable_name] = effective_table_name - def get_effective_sql(self,original_names=False): + def get_effective_sql(self,table_name_mapping=None): if len(list(filter(lambda x: x is None, self.qtable_name_effective_table_names))) != 0: - raise Exception('There are qtables without effective tables') + assert False, 'There are qtables without effective tables' effective_sql = [x for x in self.sql_parts] + xprint("Effective table names",self.qtable_name_effective_table_names) for qtable_name, positions in six.iteritems(self.qtable_name_positions): + xprint("Positions for qtable name %s are %s" % (qtable_name,positions)) for pos in positions: - if not original_names: - effective_sql[pos] = self.qtable_name_effective_table_names[ - qtable_name] + if table_name_mapping is not None: + x = self.qtable_name_effective_table_names[qtable_name] + effective_sql[pos] = table_name_mapping[x] else: - effective_sql[pos] = "`%s`" % qtable_name + effective_sql[pos] = self.qtable_name_effective_table_names[qtable_name] return " ".join(effective_sql) @@ -599,43 +912,54 @@ def get_qtable_name_effective_table_names(self): return self.qtable_name_effective_table_names def execute_and_fetch(self, db): - db_results_obj = db.execute_and_fetch(self.get_effective_sql()) + x = self.get_effective_sql() + xprint("Final query: %s" % x) + db_results_obj = db.execute_and_fetch(x) return db_results_obj + def materialize_using(self,loaded_table_structures_dict): + xprint("Materializing sql object: %s" % str(self.qtable_names)) + xprint("loaded table structures dict %s" % loaded_table_structures_dict) + for qtable_name in self.qtable_names: + table_structure = loaded_table_structures_dict[qtable_name] -class LineSplitter(object): + table_name_in_disk_db = table_structure.get_table_name_for_querying() - def __init__(self, delimiter, expected_column_count): - self.delimiter = delimiter - self.expected_column_count = expected_column_count - if delimiter is not None: - escaped_delimiter = re.escape(delimiter) - self.split_regexp = re.compile('(?:%s)+' % escaped_delimiter) - else: - self.split_regexp = re.compile(r'\s+') + effective_table_name = '%s.%s' % (table_structure.db_id, table_name_in_disk_db) - def split(self, line): - if line and line[-1] == '\n': - line = line[:-1] - return self.split_regexp.split(line, max_split=self.expected_column_count) + # for a single file - no need to create a union, just use the table name + self.set_effective_table_name(qtable_name, effective_table_name) + xprint("Materialized filename %s to effective table name %s" % (qtable_name,effective_table_name)) class TableColumnInferer(object): - def __init__(self, mode, expected_column_count, input_delimiter, skip_header=False,disable_column_type_detection=False): + def __init__(self, input_params): self.inferred = False - self.mode = mode + self.mode = input_params.parsing_mode self.rows = [] - self.skip_header = skip_header + self.skip_header = input_params.skip_header self.header_row = None self.header_row_filename = None - self.expected_column_count = expected_column_count - self.input_delimiter = input_delimiter - self.disable_column_type_detection = disable_column_type_detection + self.expected_column_count = input_params.expected_column_count + self.input_delimiter = input_params.delimiter + self.disable_column_type_detection = input_params.disable_column_type_detection + + def _generate_content_signature(self): + return OrderedDict({ + "inferred": self.inferred, + "mode": self.mode, + "rows": "\n".join([",".join(x) for x in self.rows]), + "skip_header": self.skip_header, + "header_row": self.header_row, + "expected_column_count": self.expected_column_count, + "input_delimiter": self.input_delimiter, + "disable_column_type_detection": self.disable_column_type_detection + }) def analyze(self, filename, col_vals): if self.inferred: - raise Exception("Already inferred columns") + assert False, "Already inferred columns" if self.skip_header and self.header_row is None: self.header_row = col_vals @@ -701,7 +1025,7 @@ def determine_type_of_value_list(self, value_list): def do_analysis(self): if self.mode == 'strict': self._do_strict_analysis() - elif self.mode in ['relaxed', 'fluffy']: + elif self.mode in ['relaxed']: self._do_relaxed_analysis() else: raise Exception('Unknown parsing mode %s' % self.mode) @@ -710,8 +1034,8 @@ def do_analysis(self): print("Warning: column count is one - did you provide the correct delimiter?", file=sys.stderr) self.infer_column_types() - self.infer_column_names() + self.inferred = True def validate_column_names(self, value_list): column_name_errors = [] @@ -771,7 +1095,7 @@ def infer_column_names(self): if self.mode == 'strict': raise ColumnCountMismatchException("Strict mode. Header row contains less columns than expected column count(%s vs %s)" % ( len(self.header_row), self.column_count)) - elif self.mode in ['relaxed', 'fluffy']: + elif self.mode in ['relaxed']: # in relaxed mode, add columns to fill the missing ones self.header_row = self.header_row + \ ['c%s' % (x + len(self.header_row) + 1) @@ -780,7 +1104,7 @@ def infer_column_names(self): if self.mode == 'strict': raise ColumnCountMismatchException("Strict mode. Header row contains more columns than expected column count (%s vs %s)" % ( len(self.header_row), self.column_count)) - elif self.mode in ['relaxed', 'fluffy']: + elif self.mode in ['relaxed']: # In relaxed mode, just cut the extra column names self.header_row = self.header_row[:self.column_count] self.column_names = self.header_row @@ -793,7 +1117,10 @@ def _do_relaxed_analysis(self): column_count_list = [len(col_vals) for col_vals in self.rows] if len(self.rows) == 0: - self.column_count = 0 + if self.header_row is None: + self.column_count = 0 + else: + self.column_count = len(self.header_row) else: if self.expected_column_count is not None: self.column_count = self.expected_column_count @@ -823,6 +1150,7 @@ def _do_strict_analysis(self): self.infer_column_types() def infer_column_types(self): + assert self.column_count > -1 self.column_types = [] self.column_types2 = [] for column_number in range(self.column_count): @@ -845,7 +1173,7 @@ def infer_column_types(self): print('Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data', file=sys.stderr) def get_column_dict(self): - return dict(zip(self.column_names, self.column_types)) + return OrderedDict(zip(self.column_names, self.column_types)) def get_column_count(self): return self.column_count @@ -857,38 +1185,25 @@ def get_column_types(self): return self.column_types -def py3_encoded_csv_reader(encoding, f, dialect, is_stdin,**kwargs): +def py3_encoded_csv_reader(encoding, f, dialect,row_data_only=False,**kwargs): try: + xprint("f is %s" % str(f)) + xprint("dialect is %s" % dialect) csv_reader = csv.reader(f, dialect, **kwargs) - for row in csv_reader: - yield row - except ValueError as e: - if e.message is not None and e.message.startswith('could not convert string to'): - raise CouldNotConvertStringToNumericValueException(e.message) - else: - raise CouldNotParseInputException(str(e)) - except Exception as e: - if str(e).startswith("field larger than field limit"): - raise ColumnMaxLengthLimitExceededException(str(e)) - elif 'universal-newline' in str(e): - raise UniversalNewlinesExistException() - else: - raise - - -def py2_encoded_csv_reader(encoding, f, dialect, is_stdin, **kwargs): - try: - csv_reader = csv.reader(f, dialect, **kwargs) - if encoding is not None and encoding != 'none': + if row_data_only: for row in csv_reader: - yield [unicode(x, encoding) for x in row] + yield row else: for row in csv_reader: - yield row + yield (f.filename(),f.isfirstline(),row) + + except UnicodeDecodeError as e1: + raise CouldNotParseInputException(e1) except ValueError as e: - if e.message is not None and e.message.startswith('could not convert string to'): - raise CouldNotConvertStringToNumericValueException(e.message) + # TODO Add test for this + if str(e) is not None and str(e).startswith('could not convert string to'): + raise CouldNotConvertStringToNumericValueException(str(e)) else: raise CouldNotParseInputException(str(e)) except Exception as e: @@ -899,239 +1214,986 @@ def py2_encoded_csv_reader(encoding, f, dialect, is_stdin, **kwargs): else: raise -if six.PY2: - encoded_csv_reader = py2_encoded_csv_reader -else: - encoded_csv_reader = py3_encoded_csv_reader +encoded_csv_reader = py3_encoded_csv_reader def normalized_filename(filename): - if filename == '-': - return 'stdin' - else: - return filename + return filename class TableCreatorState(object): - NEW = 'NEW' INITIALIZED = 'INITIALIZED' ANALYZED = 'ANALYZED' FULLY_READ = 'FULLY_READ' -class MaterializedFileState(object): - def __init__(self,filename,f,encoding,dialect,is_stdin): - self.filename = filename - self.lines_read = 0 - self.f = f - self.encoding = encoding - self.dialect = dialect - self.is_stdin = is_stdin - self.skipped_bom = False +class MaterializedStateType(object): + UNKNOWN = 'unknown' + DELIMITED_FILE = 'delimited-file' + QSQL_FILE = 'qsql-file' + SQLITE_FILE = 'sqlite-file' + DATA_STREAM = 'data-stream' + +class TableSourceType(object): + DELIMITED_FILE = 'file' + DELIMITED_FILE_WITH_UNUSED_QSQL = 'file-with-unused-qsql' + QSQL_FILE = 'qsql-file' + QSQL_FILE_WITH_ORIGINAL = 'qsql-file-with-original' + SQLITE_FILE = 'sqlite-file' + DATA_STREAM = 'data-stream' + +def skip_BOM(f): + try: + BOM = f.buffer.read(3) - def read_file_using_csv(self): - # This is a hack for utf-8 with BOM encoding in order to skip the BOM. python's csv module - # has a bug which prevents fixing it using the proper encoding, and it has been encountered by - # multiple people. - if self.encoding == 'utf-8-sig' and self.lines_read == 0 and not self.skipped_bom: - try: - if six.PY2: - BOM = self.f.read(3) - else: - BOM = self.f.buffer.read(3) + if BOM != six.b('\xef\xbb\xbf'): + # TODO Add test for this (propagates to try:except) + raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM)) + except Exception as e: + # TODO Add a test for this + raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e)) + +def detect_qtable_name_source_info(qtable_name,data_streams,read_caching_enabled): + data_stream = data_streams.get_for_filename(qtable_name) + xprint("Found data stream %s" % data_stream) + + if data_stream is not None: + return MaterializedStateType.DATA_STREAM, TableSourceType.DATA_STREAM,(data_stream,) + + if ':::' in qtable_name: + qsql_filename, table_name = qtable_name.split(":::", 1) + if not os.path.exists(qsql_filename): + raise FileNotFoundException("Could not find file %s" % qsql_filename) + + if is_qsql_file(qsql_filename): + return MaterializedStateType.QSQL_FILE, TableSourceType.QSQL_FILE, (qsql_filename, table_name,) + if is_sqlite_file(qsql_filename): + return MaterializedStateType.SQLITE_FILE, TableSourceType.SQLITE_FILE, (qsql_filename, table_name,) + raise UnknownFileTypeException("Cannot detect the type of table %s" % qtable_name) + else: + if is_qsql_file(qtable_name): + return MaterializedStateType.QSQL_FILE, TableSourceType.QSQL_FILE, (qtable_name, None) + if is_sqlite_file(qtable_name): + return MaterializedStateType.SQLITE_FILE, TableSourceType.SQLITE_FILE, (qtable_name, None) + matching_qsql_file_candidate = qtable_name + '.qsql' + + table_source_type = TableSourceType.DELIMITED_FILE + if is_qsql_file(matching_qsql_file_candidate): + if read_caching_enabled: + xprint("Found matching qsql file for original file %s (matching file %s) and read caching is enabled. Using it" % (qtable_name,matching_qsql_file_candidate)) + return MaterializedStateType.QSQL_FILE, TableSourceType.QSQL_FILE_WITH_ORIGINAL, (matching_qsql_file_candidate, None) + else: + xprint("Found matching qsql file for original file %s (matching file %s), but read caching is disabled. Not using it" % (qtable_name,matching_qsql_file_candidate)) + table_source_type = TableSourceType.DELIMITED_FILE_WITH_UNUSED_QSQL - if BOM != six.b('\xef\xbb\xbf'): - raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM)) - except Exception as e: - raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e)) - csv_reader = encoded_csv_reader(self.encoding, self.f, is_stdin=self.is_stdin,dialect=self.dialect) - try: - for col_vals in csv_reader: - self.lines_read += 1 - yield col_vals - except ColumnMaxLengthLimitExceededException as e: - msg = "Column length is larger than the maximum. Offending file is '%s' - Line is %s, counting from 1 (encoding %s). The line number is the raw line number of the file, ignoring whether there's a header or not" % (self.filename,self.lines_read + 1,self.encoding) - raise ColumnMaxLengthLimitExceededException(msg) - except UniversalNewlinesExistException as e2: - # No need to translate the exception, but we want it to be explicitly defined here for clarity - raise UniversalNewlinesExistException() - def close(self): - if self.f != sys.stdin: - self.f.close() + return MaterializedStateType.DELIMITED_FILE,table_source_type ,(qtable_name, None) -class TableCreator(object): - def __init__(self, db, filenames_str, line_splitter, skip_header=False, gzipped=False, with_universal_newlines=False, encoding='UTF-8', mode='fluffy', expected_column_count=None, input_delimiter=None,disable_column_type_detection=False, - stdin_file=None,stdin_filename='-'): - self.db = db - self.filenames_str = filenames_str - self.skip_header = skip_header - self.gzipped = gzipped - self.table_created = False - self.line_splitter = line_splitter - self.encoding = encoding - self.mode = mode - self.expected_column_count = expected_column_count - self.input_delimiter = input_delimiter - self.stdin_file = stdin_file - self.stdin_filename = stdin_filename - self.with_universal_newlines = with_universal_newlines +def is_sqlite_file(filename): + if not os.path.exists(filename): + return False - self.column_inferer = TableColumnInferer( - mode, expected_column_count, input_delimiter, skip_header,disable_column_type_detection) + f = open(filename,'rb') + magic = f.read(16) + f.close() + return magic == six.b("SQLite format 3\x00") - # Filled only after table population since we're inferring the table - # creation data - self.table_name = None +def sqlite_table_exists(cursor,table_name): + results = cursor.execute("select count(*) from sqlite_master where type='table' and tbl_name == '%s'" % table_name).fetchall() + return results[0][0] == 1 - self.pre_creation_rows = [] - self.buffered_inserts = [] - self.effective_column_names = None +def is_qsql_file(filename): + if not is_sqlite_file(filename): + return False - # Column type indices for columns that contain numeric types. Lazily initialized - # so column inferer can do its work before this information is needed - self.numeric_column_indices = None + db = Sqlite3DB('check_qsql_db',filename,filename,create_qcatalog=False) + qcatalog_exists = db.qcatalog_table_exists() + db.done() + return qcatalog_exists + +def normalize_filename_to_table_name(filename): + xprint("Normalizing filename %s" % filename) + if filename[0].isdigit(): + xprint("Filename starts with a digit, adding prefix") + filename = 't_%s' % filename + if filename.lower().endswith(".qsql"): + filename = filename[:-5] + elif filename.lower().endswith('.sqlite'): + filename = filename[:-7] + elif filename.lower().endswith('.sqlite3'): + filename = filename[:-8] + return filename.replace("-","_dash_").replace(".","_dot_").replace('?','_qm_').replace("/","_slash_").replace("\\","_backslash_") + +def validate_content_signature(original_filename, source_signature,other_filename, content_signature,scope=None,dump=False): + if dump: + xprint("Comparing: source value: %s target value: %s" % (source_signature,content_signature)) + + s = "%s vs %s:" % (original_filename,other_filename) + if scope is None: + scope = [] + for k in source_signature: + if type(source_signature[k]) == OrderedDict: + return validate_content_signature(original_filename, source_signature[k],other_filename, content_signature[k],scope + [k]) + else: + if k not in content_signature: + raise ContentSignatureDataDiffersException("%s Content Signatures differ. %s is missing from content signature" % (s,k)) + if source_signature[k] != content_signature[k]: + if k == 'rows': + raise ContentSignatureDataDiffersException("%s Content Signatures differ at %s.%s (actual analysis data differs)" % (s,".".join(scope),k)) + else: + raise ContentSignatureDiffersException(original_filename, other_filename, original_filename,".".join(scope + [k]),source_signature[k],content_signature[k]) - self.materialized_file_list = self.materialize_file_list() - self.materialized_file_dict = {} +class DelimitedFileReader(object): + def __init__(self,atomic_fns, input_params, dialect, f = None,external_f_name = None): + if f is not None: + assert len(atomic_fns) == 0 - self.state = TableCreatorState.NEW + self.atomic_fns = atomic_fns + self.input_params = input_params + self.dialect = dialect - def materialize_file_list(self): - materialized_file_list = [] + self.f = f + self.lines_read = 0 + self.file_number = -1 - # Get the list of filenames - filenames = self.filenames_str.split("+") + self.skipped_bom = False - # for each filename (or pattern) - for fileglob in filenames: - # Allow either stdin or a glob match - if fileglob == self.stdin_filename: - materialized_file_list.append(self.stdin_filename) - else: - materialized_file_list += glob.glob(fileglob) + self.is_open = f is not None - # If there are no files to go over, - if len(materialized_file_list) == 0: - raise FileNotFoundException( - "No files matching '%s' have been found" % self.filenames_str) + self.external_f = f is not None + self.external_f_name = external_f_name - return materialized_file_list + def get_lines_read(self): + return self.lines_read - def get_table_name(self): - return self.table_name + def get_size_hash(self): + if self.atomic_fns is None or len(self.atomic_fns) == 0: + return "data-stream-size" + else: + return ",".join(map(str,[os.stat(atomic_fn).st_size for atomic_fn in self.atomic_fns])) + + def get_last_modification_time_hash(self): + if self.atomic_fns is None or len(self.atomic_fns) == 0: + return "data stream-lmt" + else: + x = ",".join(map(str,[os.stat(x).st_mtime_ns for x in self.atomic_fns])) + return hashlib.sha1(six.b(x)).hexdigest() + + def open_file(self): + if self.external_f: + xprint("External f has been provided. No need to open the file") + return - def open_file(self,filename): # TODO Support universal newlines for gzipped and stdin data as well - # Check if it's standard input or a file - if filename == self.stdin_filename: - if self.stdin_file is None: - raise UnprovidedStdInException() - f = self.stdin_file - if self.gzipped: - raise CannotUnzipStdInException() - else: - if self.gzipped or filename.endswith('.gz'): - f = codecs.iterdecode(gzip.GzipFile(fileobj=io.open(filename,'rb')),encoding=self.encoding) + xprint("XX Opening file %s" % ",".join(self.atomic_fns)) + import fileinput + + def q_openhook(filename, mode): + if self.input_params.gzipped_input or filename.endswith('.gz'): + import gzip + f = gzip.open(filename,mode='rt',encoding=self.input_params.input_encoding) else: if six.PY3: - if self.with_universal_newlines: - f = io.open(filename, 'rU',newline=None,encoding=self.encoding) + if self.input_params.with_universal_newlines: + f = io.open(filename, 'rU', newline=None, encoding=self.input_params.input_encoding) else: - f = io.open(filename, 'r', newline=None, encoding=self.encoding) + f = io.open(filename, 'r', newline=None, encoding=self.input_params.input_encoding) else: - if self.with_universal_newlines: + if self.input_params.with_universal_newlines: file_opening_mode = 'rbU' else: file_opening_mode = 'rb' f = open(filename, file_opening_mode) - return f - def _pre_populate(self,dialect): - # For each match - for filename in self.materialized_file_list: - if filename in self.materialized_file_dict.keys(): - continue + if self.input_params.input_encoding == 'utf-8-sig' and not self.skipped_bom: + skip_BOM(f) - f = self.open_file(filename) + return f - is_stdin = filename == self.stdin_filename + f = fileinput.input(self.atomic_fns,mode='rb',openhook=q_openhook) - mfs = MaterializedFileState(filename,f,self.encoding,dialect,is_stdin) - self.materialized_file_dict[filename] = mfs + self.f = f + self.is_open = True + xprint("Actually opened file %s" % self.f) + return f - def _should_skip_extra_headers(self, filenumber, filename, mfs, col_vals): - if not self.skip_header: - return False + def close_file(self): + if not self.is_open: + # TODO Convert to assertion + raise Exception("Bug - file should already be open: %s" % ",".join(self.atomic_fns)) - if filenumber == 0: - return False + self.f.close() + xprint("XX Closed file %s" % ",".join(self.atomic_fns)) - header_already_exists = self.column_inferer.header_row is not None + def generate_rows(self): + csv_reader = encoded_csv_reader(self.input_params.input_encoding, self.f, dialect=self.dialect,row_data_only=self.external_f) + try: + # TODO Some order with regard to separating data-streams for actual files + if self.external_f: + for col_vals in csv_reader: + self.lines_read += 1 + yield self.external_f_name,0, self.lines_read == 0, col_vals + else: + for file_name,is_first_line,col_vals in csv_reader: + if is_first_line: + self.file_number = self.file_number + 1 + self.lines_read += 1 + yield file_name,self.file_number,is_first_line,col_vals + except ColumnMaxLengthLimitExceededException as e: + msg = "Column length is larger than the maximum. Offending file is '%s' - Line is %s, counting from 1 (encoding %s). The line number is the raw line number of the file, ignoring whether there's a header or not" % (",".join(self.atomic_fns),self.lines_read + 1,self.input_params.input_encoding) + raise ColumnMaxLengthLimitExceededException(msg) + except UniversalNewlinesExistException as e2: + # No need to translate the exception, but we want it to be explicitly defined here for clarity + raise UniversalNewlinesExistException() - is_extra_header = self.skip_header and mfs.lines_read == 1 and header_already_exists +class MaterializedState(object): + def __init__(self, table_source_type,qtable_name, engine_id): + xprint("Creating new MS: %s %s" % (id(self), qtable_name)) - if is_extra_header: - if tuple(self.column_inferer.header_row) != tuple(col_vals): - raise BadHeaderException("Extra header {} in file {} mismatches original header {} from file {}. Table name is {}".format(",".join(col_vals),mfs.filename,",".join(self.column_inferer.header_row),self.column_inferer.header_row_filename,self.filenames_str)) + self.table_source_type = table_source_type - return is_extra_header + self.qtable_name = qtable_name + self.engine_id = engine_id - def _populate(self,dialect,stop_after_analysis=False): - total_data_lines_read = 0 + self.db_to_use = None + self.db_id = None + + self.source_type = None + self.source = None + + self.mfs_structure = None + + self.start_time = None + self.end_time = None + self.duration = None + + self.effective_table_name = None + + + def get_materialized_state_type(self): + return MaterializedStateType.UNKNOWN + + def get_planned_table_name(self): + assert False, 'not implemented' + + def autodetect_table_name(self): + xprint("Autodetecting table name. db_to_use=%s" % self.db_to_use) + existing_table_names = self.db_to_use.retrieve_all_table_names() + xprint("Existing table names: %s" % existing_table_names) + + possible_indices = range(1,1000) + + for index in possible_indices: + if index == 1: + suffix = '' + else: + suffix = '_%s' % index + + table_name_attempt = '%s%s' % (self.get_planned_table_name(),suffix) + xprint("Table name attempt: index=%s name=%s" % (index,table_name_attempt)) + + if table_name_attempt not in existing_table_names: + xprint("Found free table name %s for source type %s source %s" % (table_name_attempt,self.source_type,self.source)) + return table_name_attempt + + raise Exception('Cannot find free table name for source type %s source %s' % (self.source_type,self.source)) + + def initialize(self): + self.start_time = time.time() + + def finalize(self): + self.end_time = time.time() + self.duration = self.end_time - self.start_time + + def choose_db_to_use(self,forced_db_to_use=None,stop_after_analysis=False): + assert False, 'not implemented' + + def make_data_available(self,stop_after_analysis): + assert False, 'not implemented' + +class MaterializedDelimitedFileState(MaterializedState): + def __init__(self, table_source_type,qtable_name, input_params, dialect_id,engine_id,target_table_name=None): + super().__init__(table_source_type,qtable_name,engine_id) + + self.input_params = input_params + self.dialect_id = dialect_id + self.target_table_name = target_table_name + + self.content_signature = None + + self.atomic_fns = None + + self.can_store_as_cached = None + + def get_materialized_state_type(self): + return MaterializedStateType.DELIMITED_FILE + + def initialize(self): + super(MaterializedDelimitedFileState, self).initialize() + + self.atomic_fns = self.materialize_file_list(self.qtable_name) + self.delimited_file_reader = DelimitedFileReader(self.atomic_fns,self.input_params,self.dialect_id) + + self.source_type = self.table_source_type + self.source = ",".join(self.atomic_fns) + + return + + def materialize_file_list(self,qtable_name): + materialized_file_list = [] + + unfound_files = [] + # First check if the file exists without globbing. This will ensure that we don't support non-existent files + if os.path.exists(qtable_name): + # If it exists, then just use it + found_files = [qtable_name] + else: + # If not, then try with globs (and sort for predictability) + found_files = list(sorted(glob.glob(qtable_name))) + # If no files + if len(found_files) == 0: + unfound_files += [qtable_name] + materialized_file_list += found_files + + # If there are no files to go over, + if len(unfound_files) == 1: + raise FileNotFoundException( + "No files matching '%s' have been found" % unfound_files[0]) + elif len(unfound_files) > 1: + # TODO Add test for this + raise FileNotFoundException( + "The following files have not been found for table %s: %s" % (qtable_name,",".join(unfound_files))) + + # deduplicate with matching qsql files + filtered_file_list = list(filter(lambda x: not x.endswith('.qsql'),materialized_file_list)) + xprint("Filtered qsql files from glob search. Original file count: %s new file count: %s" % (len(materialized_file_list),len(filtered_file_list))) + + l = len(filtered_file_list) + # If this proves to be a problem for users in terms of usability, then we'll just materialize the files + # into the adhoc db, as with the db attach limit of sqlite + if l > 500: + msg = "Maximum source files for table must be 500. Table is name is %s Number of actual files is %s" % (qtable_name,l) + raise MaximumSourceFilesExceededException(msg) + + absolute_path_list = [os.path.abspath(x) for x in filtered_file_list] + return absolute_path_list + + def choose_db_to_use(self,forced_db_to_use=None,stop_after_analysis=False): + if forced_db_to_use is not None: + self.db_id = forced_db_to_use.db_id + self.db_to_use = forced_db_to_use + self.can_store_as_cached = False + assert self.target_table_name is None + self.target_table_name = self.autodetect_table_name() + return + + self.can_store_as_cached = True + + self.db_id = '%s' % self._generate_db_name(self.atomic_fns[0]) + xprint("Database id is %s" % self.db_id) + self.db_to_use = Sqlite3DB(self.db_id, 'file:%s?mode=memory&cache=shared' % self.db_id, 'memory<%s>' % self.db_id,create_qcatalog=True) + + if self.target_table_name is None: + self.target_table_name = self.autodetect_table_name() + + + def __analyze_delimited_file(self,database_info): + xprint("Analyzing delimited file") + if self.target_table_name is not None: + target_sqlite_table_name = self.target_table_name + else: + assert False + + xprint("Target sqlite table name is %s" % target_sqlite_table_name) + # Create the matching database table and populate it + table_creator = TableCreator(self.qtable_name, self.delimited_file_reader,self.input_params, sqlite_db=database_info.sqlite_db, + target_sqlite_table_name=target_sqlite_table_name) + table_creator.perform_analyze(self.dialect_id) + xprint("after perform_analyze") + self.content_signature = table_creator._generate_content_signature() + + now = datetime.datetime.utcnow().isoformat() + + database_info.sqlite_db.add_to_qcatalog_table(target_sqlite_table_name, + self.content_signature, + now, + self.source_type, + self.source) + return table_creator + + def _generate_disk_db_filename(self, filenames_str): + fn = '%s.qsql' % (os.path.abspath(filenames_str).replace("+","__")) + return fn + + + def _get_should_read_from_cache(self, disk_db_filename): + disk_db_file_exists = os.path.exists(disk_db_filename) + + should_read_from_cache = self.input_params.read_caching and disk_db_file_exists + + return should_read_from_cache + + def calculate_should_read_from_cache(self): + # TODO cache filename is chosen according to first filename only, which makes multi-file (glob) caching difficult + # cache writing is blocked for now in these cases. Will be added in the future (see save_cache_to_disk_if_needed) + disk_db_filename = self._generate_disk_db_filename(self.atomic_fns[0]) + should_read_from_cache = self._get_should_read_from_cache(disk_db_filename) + xprint("should read from cache %s" % should_read_from_cache) + return disk_db_filename,should_read_from_cache + + def get_planned_table_name(self): + return normalize_filename_to_table_name(os.path.basename(self.atomic_fns[0])) + + def make_data_available(self,stop_after_analysis): + xprint("In make_data_available. db_id %s db_to_use %s" % (self.db_id,self.db_to_use)) + assert self.db_id is not None + + disk_db_filename, should_read_from_cache = self.calculate_should_read_from_cache() + xprint("disk_db_filename=%s should_read_from_cache=%s" % (disk_db_filename,should_read_from_cache)) + + database_info = DatabaseInfo(self.db_id,self.db_to_use, needs_closing=True) + xprint("db %s (%s) has been added to the database list" % (self.db_id, self.db_to_use)) + + self.delimited_file_reader.open_file() + + table_creator = self.__analyze_delimited_file(database_info) + + self.mfs_structure = MaterializedStateTableStructure(self.qtable_name, self.atomic_fns, self.db_id, + table_creator.column_inferer.get_column_names(), + table_creator.column_inferer.get_column_types(), + None, + self.target_table_name, + self.source_type, + self.source, + self.get_planned_table_name()) + + content_signature = table_creator.content_signature + content_signature_key = self.db_to_use.calculate_content_signature_key(content_signature) + xprint("table creator signature key: %s" % content_signature_key) + + relevant_table = self.db_to_use.get_from_qcatalog(content_signature)['temp_table_name'] + + if not stop_after_analysis: + table_creator.perform_read_fully(self.dialect_id) + + self.save_cache_to_disk_if_needed(disk_db_filename, table_creator) + + + self.delimited_file_reader.close_file() + + return database_info, relevant_table + + def save_cache_to_disk_if_needed(self, disk_db_filename, table_creator): + if len(self.atomic_fns) > 1: + xprint("Cannot save cache for multi-files for now, deciding auto-naming for cache is challenging. Will be added in the future.") + return + + effective_write_caching = self.input_params.write_caching + if effective_write_caching: + if self.can_store_as_cached: + assert self.table_source_type != TableSourceType.DELIMITED_FILE_WITH_UNUSED_QSQL + xprint("Going to write file cache for %s. Disk filename is %s" % (",".join(self.atomic_fns), disk_db_filename)) + self._store_qsql(table_creator.sqlite_db, disk_db_filename) + else: + xprint("Database has been provided externally. Skipping storing a cached version of the data") + + def _store_qsql(self, source_sqlite_db, disk_db_filename): + xprint("Storing data as disk db") + disk_db_conn = sqlite3.connect(disk_db_filename) + sqlitebck.copy(source_sqlite_db.conn,disk_db_conn) + xprint("Written db to disk: disk db filename %s" % (disk_db_filename)) + disk_db_conn.close() + + def _generate_db_name(self, qtable_name): + return 'e_%s_fn_%s' % (self.engine_id,normalize_filename_to_table_name(qtable_name)) + + +class MaterialiedDataStreamState(MaterializedDelimitedFileState): + def __init__(self, table_source_type, qtable_name, input_params, dialect_id, engine_id, data_stream, stream_target_db): ## should pass adhoc_db + assert data_stream is not None + + super().__init__(table_source_type, qtable_name, input_params, dialect_id, engine_id,target_table_name=None) + + self.data_stream = data_stream + + self.stream_target_db = stream_target_db + + self.target_table_name = None + + def get_planned_table_name(self): + return 'data_stream_%s' % (normalize_filename_to_table_name(self.source)) + + def get_materialized_state_type(self): + return MaterializedStateType.DATA_STREAM + + def initialize(self): + self.start_time = time.time() + if self.input_params.gzipped_input: + raise CannotUnzipDataStreamException() + + self.source_type = self.table_source_type + self.source = self.data_stream.stream_id + + self.delimited_file_reader = DelimitedFileReader([], self.input_params, self.dialect_id, f=self.data_stream.stream,external_f_name=self.source) + + def choose_db_to_use(self,forced_db_to_use=None,stop_after_analysis=False): + assert forced_db_to_use is None + + self.db_id = self.stream_target_db.db_id + self.db_to_use = self.stream_target_db + + self.target_table_name = self.autodetect_table_name() + + return + + def calculate_should_read_from_cache(self): + # No disk_db_filename, and no reading from cache when reading a datastream + return None, False + + def finalize(self): + super(MaterialiedDataStreamState, self).finalize() + + def save_cache_to_disk_if_needed(self, disk_db_filename, table_creator): + xprint("Saving to cache is disabled for data streams") + return + + +class MaterializedSqliteState(MaterializedState): + def __init__(self,table_source_type,qtable_name,sqlite_filename,table_name, engine_id): + super(MaterializedSqliteState, self).__init__(table_source_type,qtable_name,engine_id) + self.sqlite_filename = sqlite_filename + self.table_name = table_name + + self.table_name_autodetected = None + + def initialize(self): + super(MaterializedSqliteState, self).initialize() + + self.table_name_autodetected = False + if self.table_name is None: + self.table_name = self.autodetect_table_name() + self.table_name_autodetected = True + return + + self.validate_table_name() + + def get_planned_table_name(self): + if self.table_name_autodetected: + return normalize_filename_to_table_name(os.path.basename(self.qtable_name)) + else: + return self.table_name + + + def autodetect_table_name(self): + db = Sqlite3DB('temp_db','file:%s?immutable=1' % self.sqlite_filename,self.sqlite_filename,create_qcatalog=False) + try: + table_names = list(sorted(db.retrieve_all_table_names())) + if len(table_names) == 1: + return table_names[0] + elif len(table_names) == 0: + raise NoTablesInSqliteException(self.sqlite_filename) + else: + raise TooManyTablesInSqliteException(self.sqlite_filename,table_names) + finally: + db.done() + + def validate_table_name(self): + db = Sqlite3DB('temp_db', 'file:%s?immutable=1' % self.sqlite_filename, self.sqlite_filename, + create_qcatalog=False) + try: + table_names = list(db.retrieve_all_table_names()) + if self.table_name.lower() not in map(lambda x:x.lower(),table_names): + raise NonExistentTableNameInSqlite(self.sqlite_filename, self.table_name, table_names) + finally: + db.done() + + def finalize(self): + super(MaterializedSqliteState, self).finalize() + + def get_materialized_state_type(self): + return MaterializedStateType.SQLITE_FILE + + def _generate_qsql_only_db_name__temp(self, filenames_str): + return 'e_%s_fn_%s' % (self.engine_id,hashlib.sha1(six.b(filenames_str)).hexdigest()) + + def choose_db_to_use(self,forced_db_to_use=None,stop_after_analysis=False): + self.source = self.sqlite_filename + self.source_type = self.table_source_type + + self.db_id = '%s' % self._generate_qsql_only_db_name__temp(self.qtable_name) + + x = 'file:%s?immutable=1' % self.sqlite_filename + self.db_to_use = Sqlite3DB(self.db_id, x, self.sqlite_filename,create_qcatalog=False) + + if forced_db_to_use: + xprint("Forced sqlite db_to_use %s" % forced_db_to_use) + new_table_name = forced_db_to_use.attach_and_copy_table(self.db_to_use,self.table_name,stop_after_analysis) + self.table_name = new_table_name + self.db_id = forced_db_to_use.db_id + self.db_to_use = forced_db_to_use + + return + + def make_data_available(self,stop_after_analysis): + xprint("db %s (%s) has been added to the database list" % (self.db_id, self.db_to_use)) + + database_info,relevant_table = DatabaseInfo(self.db_id,self.db_to_use, needs_closing=True), self.table_name + + column_names, column_types, sqlite_column_types = self._extract_information() + + self.mfs_structure = MaterializedStateTableStructure(self.qtable_name, [self.qtable_name], self.db_id, + column_names, column_types, sqlite_column_types, + self.table_name, + self.source_type,self.source, + self.get_planned_table_name()) + return database_info, relevant_table + + def _extract_information(self): + table_list = self.db_to_use.retrieve_all_table_names() + if len(table_list) == 1: + table_name = table_list[0][0] + xprint("Only one table in sqlite database, choosing it: %s" % table_name) + else: + # self.table_name has either beein autodetected, or validated as an existing table up the stack + table_name = self.table_name + xprint("Multiple tables in sqlite file. Using provided table name %s" % self.table_name) + + table_info = self.db_to_use.get_sqlite_table_info(table_name) + xprint('Table info is %s' % table_info) + column_names = list(map(lambda x: x[1], table_info)) + sqlite_column_types = list(map(lambda x: x[2].lower(),table_info)) + column_types = list(map(lambda x: sqlite_type_to_python_type(x[2]), table_info)) + xprint("Column names and types for table %s: %s" % (table_name, list(zip(column_names, zip(sqlite_column_types,column_types))))) + self.content_signature = OrderedDict() + + return column_names, column_types, sqlite_column_types + + +class MaterializedQsqlState(MaterializedState): + def __init__(self,table_source_type,qtable_name,qsql_filename,table_name, engine_id,input_params,dialect_id): + super(MaterializedQsqlState, self).__init__(table_source_type,qtable_name,engine_id) + self.qsql_filename = qsql_filename + self.table_name = table_name + + # These are for cases where the qsql file is just a cache and the original is still there, used for content + # validation + self.input_params = input_params + self.dialect_id = dialect_id + + self.table_name_autodetected = None + + def initialize(self): + super(MaterializedQsqlState, self).initialize() + + self.table_name_autodetected = False + if self.table_name is None: + self.table_name = self.autodetect_table_name() + self.table_name_autodetected = True + return + + self.validate_table_name() + + def get_planned_table_name(self): + if self.table_name_autodetected: + return normalize_filename_to_table_name(os.path.basename(self.qtable_name)) + else: + return self.table_name + + + def autodetect_table_name(self): + db = Sqlite3DB('temp_db','file:%s?immutable=1' % self.qsql_filename,self.qsql_filename,create_qcatalog=False) + assert db.qcatalog_table_exists() + try: + qcatalog_entries = db.get_all_from_qcatalog() + if len(qcatalog_entries) == 0: + raise NoTableInQsqlExcption(self.qsql_filename) + elif len(qcatalog_entries) == 1: + return qcatalog_entries[0]['temp_table_name'] + else: + # TODO Add a test for this + table_names = list(sorted([x['temp_table_name'] for x in qcatalog_entries])) + raise TooManyTablesInQsqlException(self.qsql_filename,table_names) + finally: + db.done() + + def validate_table_name(self): + db = Sqlite3DB('temp_db', 'file:%s?immutable=1' % self.qsql_filename, self.qsql_filename, + create_qcatalog=False) + assert db.qcatalog_table_exists() + try: + entry = db.get_from_qcatalog_using_table_name(self.table_name) + if entry is None: + qcatalog_entries = db.get_all_from_qcatalog() + table_names = list(sorted([x['temp_table_name'] for x in qcatalog_entries])) + raise NonExistentTableNameInQsql(self.qsql_filename,self.table_name,table_names) + finally: + db.done() + + def finalize(self): + super(MaterializedQsqlState, self).finalize() + + def get_materialized_state_type(self): + return MaterializedStateType.QSQL_FILE + + def _generate_qsql_only_db_name__temp(self, filenames_str): + return 'e_%s_fn_%s' % (self.engine_id,hashlib.sha1(six.b(filenames_str)).hexdigest()) + + def choose_db_to_use(self,forced_db_to_use=None,stop_after_analysis=False): + self.source = self.qsql_filename + self.source_type = self.table_source_type + + self.db_id = '%s' % self._generate_qsql_only_db_name__temp(self.qtable_name) + + x = 'file:%s?immutable=1' % self.qsql_filename + self.db_to_use = Sqlite3DB(self.db_id, x, self.qsql_filename,create_qcatalog=False) + + if forced_db_to_use: + xprint("Forced qsql to use forced_db: %s" % forced_db_to_use) + + # TODO RLRL Move query to Sqlite3DB + all_table_names = [(x[0],x[1]) for x in self.db_to_use.execute_and_fetch("select content_signature_key,temp_table_name from %s" % self.db_to_use.QCATALOG_TABLE_NAME).results] + csk,t = list(filter(lambda x: x[1] == self.table_name,all_table_names))[0] + xprint("Copying table %s from db_id %s" % (t,self.db_id)) + d = self.db_to_use.get_from_qcatalog_using_table_name(t) + + new_table_name = forced_db_to_use.attach_and_copy_table(self.db_to_use,self.table_name,stop_after_analysis) + + xprint("CS",d['content_signature']) + cs = OrderedDict(json.loads(d['content_signature'])) + forced_db_to_use.add_to_qcatalog_table(new_table_name, cs, d['creation_time'], + d['source_type'], d['source']) + + self.table_name = new_table_name + self.db_id = forced_db_to_use.db_id + self.db_to_use = forced_db_to_use + + return + + def make_data_available(self,stop_after_analysis): + xprint("db %s (%s) has been added to the database list" % (self.db_id, self.db_to_use)) + + database_info,relevant_table = self._read_table_from_cache(stop_after_analysis) + + column_names, column_types, sqlite_column_types = self._extract_information() + + self.mfs_structure = MaterializedStateTableStructure(self.qtable_name, [self.qtable_name], self.db_id, + column_names, column_types, sqlite_column_types, + self.table_name, + self.source_type,self.source, + self.get_planned_table_name()) + return database_info, relevant_table + + def _extract_information(self): + assert self.db_to_use.qcatalog_table_exists() + table_info = self.db_to_use.get_sqlite_table_info(self.table_name) + xprint('table_name=%s Table info is %s' % (self.table_name,table_info)) + + x = self.db_to_use.get_from_qcatalog_using_table_name(self.table_name) + + column_names = list(map(lambda x: x[1], table_info)) + sqlite_column_types = list(map(lambda x: x[2].lower(),table_info)) + column_types = list(map(lambda x: sqlite_type_to_python_type(x[2]), table_info)) + self.content_signature = OrderedDict( + **json.loads(x['content_signature'])) + xprint('Inferred column names and types from qsql: %s' % list(zip(column_names, zip(sqlite_column_types,column_types)))) + + return column_names, column_types, sqlite_column_types + + def _backing_original_file_exists(self): + return '%s.qsql' % self.qtable_name == self.qsql_filename + + def _read_table_from_cache(self, stop_after_analysis): + if self._backing_original_file_exists(): + xprint("Found a matching source file for qsql file with qtable name %s. Checking content signature by creating a temp MFDS + analysis" % self.qtable_name) + mdfs = MaterializedDelimitedFileState(TableSourceType.DELIMITED_FILE,self.qtable_name,self.input_params,self.dialect_id,self.engine_id,target_table_name=None) + mdfs.initialize() + mdfs.choose_db_to_use(forced_db_to_use=None,stop_after_analysis=stop_after_analysis) + _,_ = mdfs.make_data_available(stop_after_analysis=True) + + original_file_content_signature = mdfs.content_signature + original_file_content_signature_key = self.db_to_use.calculate_content_signature_key(original_file_content_signature) + + qcatalog_entry = self.db_to_use.get_from_qcatalog_using_table_name(self.table_name) + + if qcatalog_entry is None: + raise Exception('missing content signature!') + + xprint("Actual Signature Key: %s Expected Signature Key: %s" % (qcatalog_entry['content_signature_key'],original_file_content_signature_key)) + actual_content_signature = json.loads(qcatalog_entry['content_signature']) + + xprint("Validating content signatures: original %s vs qsql %s" % (original_file_content_signature,actual_content_signature)) + validate_content_signature(self.qtable_name, original_file_content_signature, self.qsql_filename, actual_content_signature,dump=True) + mdfs.finalize() + return DatabaseInfo(self.db_id,self.db_to_use, needs_closing=True), self.table_name + + +class MaterializedStateTableStructure(object): + def __init__(self,qtable_name, atomic_fns, db_id, column_names, python_column_types, sqlite_column_types, table_name_for_querying,source_type,source,planned_table_name): + self.qtable_name = qtable_name + self.atomic_fns = atomic_fns + self.db_id = db_id + self.column_names = column_names + self.python_column_types = python_column_types + self.table_name_for_querying = table_name_for_querying + self.source_type = source_type + self.source = source + self.planned_table_name = planned_table_name + + if sqlite_column_types is not None: + self.sqlite_column_types = sqlite_column_types + else: + self.sqlite_column_types = [Sqlite3DB.PYTHON_TO_SQLITE_TYPE_NAMES[t].lower() for t in python_column_types] + + def get_table_name_for_querying(self): + return self.table_name_for_querying + + def __str__(self): + return "MaterializedStateTableStructure<%s>" % self.__dict__ + __repr__ = __str__ + +class TableCreator(object): + def __str__(self): + return "TableCreator<%s>" % str(self) + __repr__ = __str__ + + def __init__(self, qtable_name, delimited_file_reader,input_params,sqlite_db=None,target_sqlite_table_name=None): + + self.qtable_name = qtable_name + self.delimited_file_reader = delimited_file_reader + + self.db_id = sqlite_db.db_id + + self.sqlite_db = sqlite_db + self.target_sqlite_table_name = target_sqlite_table_name + + self.skip_header = input_params.skip_header + self.gzipped = input_params.gzipped_input + self.table_created = False + + self.encoding = input_params.input_encoding + self.mode = input_params.parsing_mode + self.expected_column_count = input_params.expected_column_count + self.input_delimiter = input_params.delimiter + self.with_universal_newlines = input_params.with_universal_newlines + + self.column_inferer = TableColumnInferer(input_params) + + self.pre_creation_rows = [] + self.buffered_inserts = [] + self.effective_column_names = None + + # Column type indices for columns that contain numeric types. Lazily initialized + # so column inferer can do its work before this information is needed + self.numeric_column_indices = None + + self.state = TableCreatorState.INITIALIZED + + self.content_signature = None + + def _generate_content_signature(self): + if self.state != TableCreatorState.ANALYZED: + # TODO Change to assertion + raise Exception('Bug - Wrong state %s. Table needs to be analyzed before a content signature can be calculated' % self.state) + + size = self.delimited_file_reader.get_size_hash() + last_modification_time = self.delimited_file_reader.get_last_modification_time_hash() - # For each match - for filenumber,filename in enumerate(self.materialized_file_list): - mfs = self.materialized_file_dict[filename] + m = OrderedDict({ + "_signature_version": "v1", + "skip_header": self.skip_header, + "gzipped": self.gzipped, + "with_universal_newlines": self.with_universal_newlines, + "encoding": self.encoding, + "mode": self.mode, + "expected_column_count": self.expected_column_count, + "input_delimiter": self.input_delimiter, + "inferer": self.column_inferer._generate_content_signature(), + "original_file_size": size, + "last_modification_time": last_modification_time + }) + return m + + def validate_extra_header_if_needed(self, file_number, filename,col_vals): + xprint("HHX validate",file_number,filename,col_vals) + if not self.skip_header: + xprint("No need to validate header") + return False + + if file_number == 0: + xprint("First file, no need to validate extra header") + return False + + header_already_exists = self.column_inferer.header_row is not None + + if header_already_exists: + xprint("Validating extra header") + if tuple(self.column_inferer.header_row) != tuple(col_vals): + raise BadHeaderException("Extra header '{}' in file '{}' mismatches original header '{}' from file '{}'. Table name is '{}'".format( + ",".join(col_vals),filename, + ",".join(self.column_inferer.header_row), + self.column_inferer.header_row_filename, + self.qtable_name)) + xprint("header already exists: %s" % self.column_inferer.header_row) + else: + xprint("Header doesn't already exist") + + return header_already_exists + + def _populate(self,dialect,stop_after_analysis=False): + total_data_lines_read = 0 + try: try: - try: - for col_vals in mfs.read_file_using_csv(): - if self._should_skip_extra_headers(filenumber,filename,mfs,col_vals): + for file_name,file_number,is_first_line,col_vals in self.delimited_file_reader.generate_rows(): + if is_first_line: + if self.validate_extra_header_if_needed(file_number,file_name,col_vals): continue - self._insert_row(filename, col_vals) - if stop_after_analysis and self.column_inferer.inferred: + self._insert_row(file_name, col_vals) + if stop_after_analysis: + if self.column_inferer.inferred: + xprint("Stopping after analysis") return - if mfs.lines_read == 0 and self.skip_header: - raise MissingHeaderException("Header line is expected but missing in file %s" % filename) - - total_data_lines_read += mfs.lines_read - (1 if self.skip_header else 0) - except StrictModeColumnCountMismatchException as e: - raise ColumnCountMismatchException( - 'Strict mode - Expected %s columns instead of %s columns in file %s row %s. Either use relaxed/fluffy modes or check your delimiter' % ( - e.expected_col_count, e.actual_col_count, normalized_filename(mfs.filename), mfs.lines_read)) - except FluffyModeColumnCountMismatchException as e: - raise ColumnCountMismatchException( - 'Deprecated fluffy mode - Too many columns in file %s row %s (%s fields instead of %s fields). Consider moving to either relaxed or strict mode' % ( - normalized_filename(mfs.filename), mfs.lines_read, e.actual_col_count, e.expected_col_count)) - finally: - if not stop_after_analysis: - mfs.close() - self._flush_inserts() - - if not self.table_created: - self.column_inferer.force_analysis() - self._do_create_table(filename) - + if self.delimited_file_reader.get_lines_read() == 0 and self.skip_header: + raise MissingHeaderException("Header line is expected but missing in file %s" % ",".join(self.delimited_file_reader.atomic_fns)) + + total_data_lines_read += self.delimited_file_reader.lines_read - (1 if self.skip_header else 0) + xprint("Total Data lines read %s" % total_data_lines_read) + except StrictModeColumnCountMismatchException as e: + raise ColumnCountMismatchException( + 'Strict mode - Expected %s columns instead of %s columns in file %s row %s. Either use relaxed modes or check your delimiter' % ( + e.expected_col_count, e.actual_col_count, normalized_filename(e.atomic_fn), e.lines_read)) + except FluffyModeColumnCountMismatchException as e: + raise ColumnCountMismatchException( + 'Deprecated fluffy mode - Too many columns in file %s row %s (%s fields instead of %s fields). Consider moving to either relaxed or strict mode' % ( + normalized_filename(e.atomic_fn), e.lines_read, e.actual_col_count, e.expected_col_count)) + finally: + self._flush_inserts() - if total_data_lines_read == 0: - raise EmptyDataException() + if not self.table_created: + self.column_inferer.force_analysis() + self._do_create_table(self.qtable_name) - def populate(self,dialect,stop_after_analysis=False): - if self.state == TableCreatorState.NEW: - self._pre_populate(dialect) - self.state = TableCreatorState.INITIALIZED + self.sqlite_db.conn.commit() + def perform_analyze(self, dialect): + xprint("Analyzing... %s" % dialect) if self.state == TableCreatorState.INITIALIZED: self._populate(dialect,stop_after_analysis=True) self.state = TableCreatorState.ANALYZED - if stop_after_analysis: - return + self.content_signature = self._generate_content_signature() + content_signature_key = self.sqlite_db.calculate_content_signature_key(self.content_signature) + xprint("Setting content signature after analysis: %s" % content_signature_key) + else: + # TODO Convert to assertion + raise Exception('Bug - Wrong state %s' % self.state) + def perform_read_fully(self, dialect): if self.state == TableCreatorState.ANALYZED: self._populate(dialect,stop_after_analysis=False) self.state = TableCreatorState.FULLY_READ - return + else: + # TODO Convert to assertion + raise Exception('Bug - Wrong state %s' % self.state) def _flush_pre_creation_rows(self, filename): for i, col_vals in enumerate(self.pre_creation_rows): @@ -1163,7 +2225,7 @@ def initialize_numeric_column_indices_if_needed(self): if self.numeric_column_indices is None: column_types = self.column_inferer.get_column_types() self.numeric_column_indices = [idx for idx, column_type in enumerate( - column_types) if self.db.is_numeric_type(column_type)] + column_types) if self.sqlite_db.is_numeric_type(column_type)] def nullify_values_if_needed(self, col_vals): new_vals = col_vals[:] @@ -1186,7 +2248,7 @@ def normalize_col_vals(self, col_vals): actual_col_count = len(col_vals) if self.mode == 'strict': if actual_col_count != expected_col_count: - raise StrictModeColumnCountMismatchException(expected_col_count,actual_col_count) + raise StrictModeColumnCountMismatchException(",".join(self.delimited_file_reader.atomic_fns), expected_col_count,actual_col_count,self.delimited_file_reader.get_lines_read()) return col_vals # in all non strict mode, we add dummy data to missing columns @@ -1205,12 +2267,7 @@ def normalize_col_vals(self, col_vals): else: return col_vals - if self.mode == 'fluffy': - if actual_col_count > expected_col_count: - raise FluffyModeColumnCountMismatchException(expected_col_count,actual_col_count) - return col_vals - - raise Exception("Unidentified parsing mode %s" % self.mode) + assert False, "Unidentified parsing mode %s" % self.mode def _insert_row_i(self, col_vals): col_vals = self.normalize_col_vals(col_vals) @@ -1228,22 +2285,20 @@ def _insert_row_i(self, col_vals): self._flush_inserts() def _flush_inserts(self): - # print self.db.execute_and_fetch(self.db.generate_begin_transaction()) - # If the table is still not created, then we don't have enough data if not self.table_created: return if len(self.buffered_inserts) > 0: - insert_row_stmt = self.db.generate_insert_row( - self.table_name, self.effective_column_names) + insert_row_stmt = self.sqlite_db.generate_insert_row( + self.target_sqlite_table_name, self.effective_column_names) - self.db.update_many(insert_row_stmt, self.buffered_inserts) - # print self.db.execute_and_fetch(self.db.generate_end_transaction()) + self.sqlite_db.update_many(insert_row_stmt, self.buffered_inserts) self.buffered_inserts = [] def try_to_create_table(self, filename, col_vals): if self.table_created: + # TODO Convert to assertion raise Exception('Table is already created') # Add that line to the column inferer @@ -1255,8 +2310,6 @@ def try_to_create_table(self, filename, col_vals): pass # We don't have enough information for creating the table yet def _do_create_table(self,filename): - # Then generate a temp table name - self.table_name = self.db.generate_temp_table_name() # Get the column definition dict from the inferer column_dict = self.column_inferer.get_column_dict() @@ -1268,18 +2321,14 @@ def _do_create_table(self,filename): ordered_column_names = self.column_inferer.get_column_names() # Create the CREATE TABLE statement - create_table_stmt = self.db.generate_create_table( - self.table_name, ordered_column_names, column_dict) + create_table_stmt = self.sqlite_db.generate_create_table( + self.target_sqlite_table_name, ordered_column_names, column_dict) # And create the table itself - self.db.execute_and_fetch(create_table_stmt) + self.sqlite_db.execute_and_fetch(create_table_stmt) # Mark the table as created self.table_created = True self._flush_pre_creation_rows(filename) - def drop_table(self): - if self.table_created: - self.db.drop_table(self.table_name) - def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter): if len(m) == 0: @@ -1287,7 +2336,7 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter): max_lengths = [0 for x in range(0, len(m[0]))] for row_index in range(0, len(m)): for col_index in range(0, len(m[0])): - # TODO Optimize this and make sure that py2 hack of float precision is applied here as well + # TODO Optimize this new_len = len("{}".format(output_field_quoting_func(output_delimiter,m[row_index][col_index]))) if new_len > max_lengths[col_index]: max_lengths[col_index] = new_len @@ -1296,7 +2345,7 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter): def print_credentials(): print("q version %s" % q_version, file=sys.stderr) print("Python: %s" % " // ".join([str(x).strip() for x in sys.version.split("\n")]), file=sys.stderr) - print("Copyright (C) 2012-2020 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr) + print("Copyright (C) 2012-2021 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr) print("http://harelba.github.io/q/", file=sys.stderr) print(file=sys.stderr) @@ -1312,49 +2361,18 @@ def __init__(self,exception,msg,errorcode): self.errorcode = errorcode self.traceback = traceback.format_exc() -class QDataLoad(object): - def __init__(self,filename,start_time,end_time): - self.filename = filename - self.start_time = start_time - self.end_time = end_time - - def duration(self): - return self.end_time - self.start_time - - def __str__(self): - return "DataLoad<'%s' at %s (took %4.3f seconds)>" % (self.filename,self.start_time,self.duration()) - __repr__ = __str__ - -class QMaterializedFile(object): - def __init__(self,filename,is_stdin): - self.filename = filename - self.is_stdin = is_stdin - - def __str__(self): - return "QMaterializedFile" % (self.filename,self.is_stdin) - __repr__ = __str__ - -class QTableStructure(object): - def __init__(self,filenames_str,materialized_files,column_names,column_types): - self.filenames_str = filenames_str - self.materialized_files = materialized_files - self.column_names = column_names - self.column_types = column_types - def __str__(self): - return "QTableStructure" % ( - self.filenames_str,len(self.materialized_files.keys()),self.column_names,self.column_types) + return "QError" % (self.errorcode,self.msg,self.exception,str(self.traceback)) __repr__ = __str__ class QMetadata(object): - def __init__(self,table_structures=[],output_column_name_list=None,data_loads=[]): + def __init__(self,table_structures={},new_table_structures={},output_column_name_list=None): self.table_structures = table_structures + self.new_table_structures = new_table_structures self.output_column_name_list = output_column_name_list - self.data_loads = data_loads def __str__(self): - return "QMetadata" % (self.stream_id,self.filename,self.stream) + __repr__ = __str__ + + +class DataStreams(object): + def __init__(self, data_streams_dict): + assert type(data_streams_dict) == dict + self.validate(data_streams_dict) + self.data_streams_dict = data_streams_dict + + def validate(self,d): + for k in d: + v = d[k] + if type(k) != str or type(v) != DataStream: + raise Exception('Bug - Invalid dict: %s' % str(d)) + + def get_for_filename(self, filename): + xprint("Data streams dict is %s. Trying to find %s" % (self.data_streams_dict,filename)) + x = self.data_streams_dict.get(filename) + return x + + def is_data_stream(self,filename): + return filename in self.data_streams_dict + +class DatabaseInfo(object): + def __init__(self,db_id,sqlite_db,needs_closing): + self.db_id = db_id + self.sqlite_db = sqlite_db + self.needs_closing = needs_closing + + def __str__(self): + return "DatabaseInfo" % (self.sqlite_db,self.needs_closing) + __repr__ = __str__ + class QTextAsData(object): - def __init__(self,default_input_params=QInputParams()): + def __init__(self,default_input_params=QInputParams(),data_streams_dict=None): + self.engine_id = str(uuid.uuid4()).replace("-","_") + self.default_input_params = default_input_params + xprint("Default input params: %s" % self.default_input_params) + + self.loaded_table_structures_dict = OrderedDict() + self.databases = OrderedDict() - self.table_creators = {} + if data_streams_dict is not None: + self.data_streams = DataStreams(data_streams_dict) + else: + self.data_streams = DataStreams({}) # Create DB object - self.db = Sqlite3DB() + self.query_level_db_id = 'query_e_%s' % self.engine_id + self.query_level_db = Sqlite3DB(self.query_level_db_id, + 'file:%s?mode=memory&cache=shared' % self.query_level_db_id,'',create_qcatalog=True) + self.adhoc_db_id = 'adhoc_e_%s' % self.engine_id + self.adhoc_db_name = 'file:%s?mode=memory&cache=shared' % self.adhoc_db_id + self.adhoc_db = Sqlite3DB(self.adhoc_db_id,self.adhoc_db_name,'',create_qcatalog=True) + self.query_level_db.conn.execute("attach '%s' as %s" % (self.adhoc_db_name,self.adhoc_db_id)) + + self.add_db_to_database_list(DatabaseInfo(self.query_level_db_id,self.query_level_db,needs_closing=True)) + self.add_db_to_database_list(DatabaseInfo(self.adhoc_db_id,self.adhoc_db,needs_closing=True)) + + def done(self): + xprint("Inside done: Database list is %s" % self.databases) + for db_id in reversed(self.databases.keys()): + database_info = self.databases[db_id] + if database_info.needs_closing: + xprint("Gonna close database %s - %s" % (db_id,self.databases[db_id])) + self.databases[db_id].sqlite_db.done() + xprint("Database %s has been closed" % db_id) + else: + xprint("No need to close database %s" % db_id) + xprint("Closed all databases") input_quoting_modes = { 'minimal' : csv.QUOTE_MINIMAL, 'all' : csv.QUOTE_ALL, @@ -1458,66 +2554,249 @@ def determine_proper_dialect(self,input_params): def get_dialect_id(self,filename): return 'q_dialect_%s' % filename - def _load_data(self,filename,input_params=QInputParams(),stdin_file=None,stdin_filename='-',stop_after_analysis=False): - start_time = time.time() + def _open_files_and_get_mfss(self,qtable_name,input_params,dialect): + materialized_file_dict = OrderedDict() + + materialized_state_type,table_source_type,source_info = detect_qtable_name_source_info(qtable_name,self.data_streams,read_caching_enabled=input_params.read_caching) + xprint("Detected source type %s source info %s" % (materialized_state_type,source_info)) + + if materialized_state_type == MaterializedStateType.DATA_STREAM: + (data_stream,) = source_info + ms = MaterialiedDataStreamState(table_source_type,qtable_name,input_params,dialect,self.engine_id,data_stream,stream_target_db=self.adhoc_db) + effective_qtable_name = data_stream.stream_id + elif materialized_state_type == MaterializedStateType.QSQL_FILE: + (qsql_filename,table_name) = source_info + ms = MaterializedQsqlState(table_source_type,qtable_name, qsql_filename=qsql_filename, table_name=table_name, + engine_id=self.engine_id, input_params=input_params, dialect_id=dialect) + effective_qtable_name = '%s:::%s' % (qsql_filename, table_name) + elif materialized_state_type == MaterializedStateType.SQLITE_FILE: + (sqlite_filename,table_name) = source_info + ms = MaterializedSqliteState(table_source_type,qtable_name, sqlite_filename=sqlite_filename, table_name=table_name, + engine_id=self.engine_id) + effective_qtable_name = '%s:::%s' % (sqlite_filename, table_name) + elif materialized_state_type == MaterializedStateType.DELIMITED_FILE: + (source_qtable_name,_) = source_info + ms = MaterializedDelimitedFileState(table_source_type,source_qtable_name, input_params, dialect, self.engine_id) + effective_qtable_name = source_qtable_name + else: + assert False, "Unknown file type for qtable %s should have exited with an exception" % (qtable_name) + + assert effective_qtable_name not in materialized_file_dict + materialized_file_dict[effective_qtable_name] = ms + + xprint("MS dict: %s" % str(materialized_file_dict)) + + return list([item for item in materialized_file_dict.values()]) + + def _load_mfs(self,mfs,input_params,dialect_id,stop_after_analysis): + xprint("Loading MFS:", mfs) + + materialized_state_type = mfs.get_materialized_state_type() + xprint("Detected materialized state type for %s: %s" % (mfs.qtable_name,materialized_state_type)) + + mfs.initialize() + + if not materialized_state_type in [MaterializedStateType.DATA_STREAM]: + if stop_after_analysis or self.should_copy_instead_of_attach(input_params): + xprint("Should copy instead of attaching. Forcing db to use to adhoc db") + forced_db_to_use = self.adhoc_db + else: + forced_db_to_use = None + else: + forced_db_to_use = None + + mfs.choose_db_to_use(forced_db_to_use,stop_after_analysis) + xprint("Chosen db to use: source %s source_type %s db_id %s db_to_use %s" % (mfs.source,mfs.source_type,mfs.db_id,mfs.db_to_use)) + + database_info,relevant_table = mfs.make_data_available(stop_after_analysis) + + if not self.is_adhoc_db(mfs.db_to_use) and not self.should_copy_instead_of_attach(input_params): + if not self.already_attached_to_query_level_db(mfs.db_to_use): + self.attach_to_db(mfs.db_to_use, self.query_level_db) + self.add_db_to_database_list(database_info) + else: + xprint("DB %s is already attached to query level db. No need to attach it again.") + + mfs.finalize() + + xprint("MFS Loaded") + + return mfs.source,mfs.source_type + + def add_db_to_database_list(self,database_info): + db_id = database_info.db_id + assert db_id is not None + assert database_info.sqlite_db is not None + if db_id in self.databases: + # TODO Convert to assertion + if id(database_info.sqlite_db) != id(self.databases[db_id].sqlite_db): + raise Exception('Bug - database already in database list: db_id %s: old %s new %s' % (db_id,self.databases[db_id],database_info)) + else: + return + self.databases[db_id] = database_info + + def is_adhoc_db(self,db_to_use): + return db_to_use.db_id == self.adhoc_db_id + + def should_copy_instead_of_attach(self,input_params): + attached_database_count = len(self.query_level_db.get_sqlite_database_list()) + x = attached_database_count >= input_params.max_attached_sqlite_databases + xprint("should_copy_instead_of_attach: attached_database_count=%s should_copy=%s" % (attached_database_count,x)) + return x + + def _load_data(self,qtable_name,input_params=QInputParams(),stop_after_analysis=False): + xprint("Attempting to load data for materialized file names %s" % qtable_name) q_dialect = self.determine_proper_dialect(input_params) - dialect_id = self.get_dialect_id(filename) + xprint("Dialect is %s" % q_dialect) + dialect_id = self.get_dialect_id(qtable_name) csv.register_dialect(dialect_id, **q_dialect) - csv.field_size_limit(input_params.max_column_length_limit) + xprint("qtable metadata for loading is %s" % qtable_name) + mfss = self._open_files_and_get_mfss(qtable_name, + input_params, + dialect_id) + assert len(mfss) == 1, "one MS now encapsulated an entire table" + mfs = mfss[0] - # Create a line splitter - line_splitter = LineSplitter(input_params.delimiter, input_params.expected_column_count) + xprint("MFS to load: %s" % mfs) - # reuse already loaded data, except for stdin file data (stdin file data will always - # be reloaded and overwritten) - if filename in self.table_creators.keys() and filename != stdin_filename: + if qtable_name in self.loaded_table_structures_dict.keys(): + xprint("Atomic filename %s found. no need to load" % qtable_name) return None - # Create the matching database table and populate it - table_creator = TableCreator( - self.db, filename, line_splitter, input_params.skip_header, input_params.gzipped_input, input_params.with_universal_newlines,input_params.input_encoding, - mode=input_params.parsing_mode, expected_column_count=input_params.expected_column_count, - input_delimiter=input_params.delimiter,disable_column_type_detection=input_params.disable_column_type_detection, - stdin_file = stdin_file,stdin_filename = stdin_filename) + xprint("qtable %s not found - loading" % qtable_name) - table_creator.populate(dialect_id,stop_after_analysis) - self.table_creators[filename] = table_creator + self._load_mfs(mfs, input_params, dialect_id, stop_after_analysis) + xprint("Loaded: source-type %s source %s mfs_structure %s" % (mfs.source_type, mfs.source, mfs.mfs_structure)) - return QDataLoad(filename,start_time,time.time()) + assert qtable_name not in self.loaded_table_structures_dict, "loaded_table_structures_dict has been changed to have a non-list value" + self.loaded_table_structures_dict[qtable_name] = mfs.mfs_structure - def load_data(self,filename,input_params=QInputParams(),stop_after_analysis=False): - self._load_data(filename,input_params,stop_after_analysis=stop_after_analysis) + return mfs.mfs_structure + + def already_attached_to_query_level_db(self,db_to_attach): + attached_dbs = list(map(lambda x:x[1],self.query_level_db.get_sqlite_database_list())) + return db_to_attach.db_id in attached_dbs - def load_data_from_string(self,filename,str_data,input_params=QInputParams(),stop_after_analysis=False): - sf = six.StringIO(str_data) + def attach_to_db(self, target_db, source_db): + q = "attach '%s' as %s" % (target_db.sqlite_db_url,target_db.db_id) + xprint("Attach query: %s" % q) try: - self._load_data(filename,input_params,stdin_file=sf,stdin_filename=filename,stop_after_analysis=stop_after_analysis) - finally: - if sf is not None: - sf.close() + c = source_db.execute_and_fetch(q) + except SqliteOperationalErrorException as e: + if 'too many attached databases' in str(e): + raise TooManyAttachedDatabasesException('There are too many attached databases. Use a proper --max-attached-sqlite-databases parameter which is below the maximum. Original error: %s' % str(e)) + except Exception as e1: + raise - def _ensure_data_is_loaded(self,sql_object,input_params,stdin_file,stdin_filename='-',stop_after_analysis=False): - data_loads = [] + def detach_from_db(self, target_db, source_db): + q = "detach %s" % (target_db.db_id) + xprint("Detach query: %s" % q) + try: + c = source_db.execute_and_fetch(q) + except Exception as e1: + raise - # Get each "table name" which is actually the file name - for filename in sql_object.qtable_names: - data_load = self._load_data(filename,input_params,stdin_file=stdin_file,stdin_filename=stdin_filename,stop_after_analysis=stop_after_analysis) - if data_load is not None: - data_loads.append(data_load) + def load_data(self,filename,input_params=QInputParams(),stop_after_analysis=False): + return self._load_data(filename,input_params,stop_after_analysis=stop_after_analysis) + + def _ensure_data_is_loaded_for_sql(self,sql_object,input_params,data_streams=None,stop_after_analysis=False): + xprint("Ensuring Data load") + new_table_structures = OrderedDict() + + # For each "table name" + for qtable_name in sql_object.qtable_names: + tss = self._load_data(qtable_name,input_params,stop_after_analysis=stop_after_analysis) + if tss is not None: + xprint("New Table Structures:",new_table_structures) + assert qtable_name not in new_table_structures, "new_table_structures was changed not to contain a list as a value" + new_table_structures[qtable_name] = tss + + return new_table_structures + + def materialize_query_level_db(self,save_db_to_disk_filename,sql_object): + # TODO More robust creation - Create the file in a separate folder and move it to the target location only after success + + materialized_db = Sqlite3DB("materialized","file:%s" % save_db_to_disk_filename,save_db_to_disk_filename,create_qcatalog=False) + table_name_mapping = OrderedDict() + + # For each table in the query + effective_table_names = sql_object.get_qtable_name_effective_table_names() + + for i, qtable_name in enumerate(effective_table_names): + # table name, in the format db_id.table_name + effective_table_name_for_qtable_name = effective_table_names[qtable_name] + + source_db_id, actual_table_name_in_db = effective_table_name_for_qtable_name.split(".", 1) + # The DatabaseInfo instance for this db + source_database = self.databases[source_db_id] + if source_db_id != self.query_level_db_id: + self.attach_to_db(source_database.sqlite_db,materialized_db) + + ts = self.loaded_table_structures_dict[qtable_name] + proposed_new_table_name = ts.planned_table_name + xprint("Proposed table name is %s" % proposed_new_table_name) + + new_table_name = materialized_db.find_new_table_name(proposed_new_table_name) + + xprint("Materializing",source_db_id,actual_table_name_in_db,"as",new_table_name) + # Copy the table into the materialized database + xx = materialized_db.execute_and_fetch('CREATE TABLE %s AS SELECT * FROM %s' % (new_table_name,effective_table_name_for_qtable_name)) + + table_name_mapping[effective_table_name_for_qtable_name] = new_table_name + + # TODO RLRL Preparation for writing materialized database as a qsql file + # if source_database.sqlite_db.qcatalog_table_exists(): + # qcatalog_entry = source_database.sqlite_db.get_from_qcatalog_using_table_name(actual_table_name_in_db) + # # TODO RLRL Encapsulate dictionary transform inside qcatalog access methods + # materialized_db.add_to_qcatalog_table(new_table_name,OrderedDict(json.loads(qcatalog_entry['content_signature'])), + # qcatalog_entry['creation_time'], + # qcatalog_entry['source_type'], + # qcatalog_entry['source_type']) + # xprint("PQX Added to qcatalog",source_db_id,actual_table_name_in_db,'as',new_table_name) + # else: + # xprint("PQX Skipped adding to qcatalog",source_db_id,actual_table_name_in_db) + + if source_db_id != self.query_level_db: + self.detach_from_db(source_database.sqlite_db,materialized_db) + + return table_name_mapping + + def validate_query(self,sql_object,table_structures): + + for qtable_name in sql_object.qtable_names: + relevant_table_structures = [table_structures[qtable_name]] + + column_names = None + column_types = None + for ts in relevant_table_structures: + names = ts.column_names + types = ts.python_column_types + xprint("Comparing column names: %s with %s" % (column_names,names)) + if column_names is None: + column_names = names + else: + if column_names != names: + raise BadHeaderException("Column names differ for table %s: %s vs %s" % ( + qtable_name, ",".join(column_names), ",".join(names))) + + xprint("Comparing column types: %s with %s" % (column_types,types)) + if column_types is None: + column_types = types + else: + if column_types != types: + raise BadHeaderException("Column types differ for table %s: %s vs %s" % ( + qtable_name, ",".join(column_types), ",".join(types))) - return data_loads + xprint("All column names match for qtable name %s: column names: %s column types: %s" % (ts.qtable_name,column_names,column_types)) - def materialize_sql_object(self,sql_object): - for filename in sql_object.qtable_names: - sql_object.set_effective_table_name(filename,self.table_creators[filename].table_name) + xprint("Query validated") - def _execute(self,query_str,input_params=None,stdin_file=None,stdin_filename='-',stop_after_analysis=False,save_db_to_disk_filename=None,save_db_to_disk_method=None): + def _execute(self,query_str,input_params=None,data_streams=None,stop_after_analysis=False,save_db_to_disk_filename=None): warnings = [] error = None - data_loads = [] table_structures = [] db_results_obj = None @@ -1532,49 +2811,67 @@ def _execute(self,query_str,input_params=None,stdin_file=None,stdin_filename='-' error = QError(EncodedQueryException(''),"Query should be in unicode. Please make sure to provide a unicode literal string or decode it using proper the character encoding.",91) return QOutput(error = error) - # Create SQL statement - sql_object = Sql('%s' % query_str) try: + # Create SQL statement + sql_object = Sql('%s' % query_str, self.data_streams) + load_start_time = time.time() - data_loads += self._ensure_data_is_loaded(sql_object,effective_input_params,stdin_file=stdin_file,stdin_filename=stdin_filename,stop_after_analysis=stop_after_analysis) + iprint("Going to ensure data is loaded. Currently loaded tables: %s" % str(self.loaded_table_structures_dict)) + new_table_structures = self._ensure_data_is_loaded_for_sql(sql_object,effective_input_params,data_streams,stop_after_analysis=stop_after_analysis) + iprint("Ensured data is loaded. loaded tables: %s" % self.loaded_table_structures_dict) + + self.validate_query(sql_object,self.loaded_table_structures_dict) + + iprint("Query validated") - table_structures = self._create_table_structures_list() + sql_object.materialize_using(self.loaded_table_structures_dict) - self.materialize_sql_object(sql_object) + iprint("Materialized sql object") if save_db_to_disk_filename is not None: - self.db.done() + xprint("Saving query data to disk") dump_start_time = time.time() - print("Data has been loaded in %4.3f seconds" % (dump_start_time - load_start_time), file=sys.stderr) - print("Saving data to db file %s" % save_db_to_disk_filename, file=sys.stderr) - self.db.store_db_to_disk(save_db_to_disk_filename,sql_object.get_qtable_name_effective_table_names(),save_db_to_disk_method) + table_name_mapping = self.materialize_query_level_db(save_db_to_disk_filename,sql_object) print("Data has been saved into %s . Saving has taken %4.3f seconds" % (save_db_to_disk_filename,time.time()-dump_start_time), file=sys.stderr) - print("Query to run on the database: %s;" % sql_object.get_effective_sql(True), file=sys.stderr) - # TODO Propagate dump results using a different output class instead of an empty one + effective_sql = sql_object.get_effective_sql(table_name_mapping) + print("Query to run on the database: %s;" % effective_sql, file=sys.stderr) + command_line = 'echo "%s" | sqlite3 %s' % (effective_sql,save_db_to_disk_filename) + print("You can run the query directly from the command line using the following command: %s" % command_line, file=sys.stderr) + # TODO Propagate dump results using a different output class instead of an empty one return QOutput() + # Ensure that adhoc db is not in the middle of a transaction + self.adhoc_db.conn.commit() + + all_databases = self.query_level_db.get_sqlite_database_list() + xprint("Query level db: databases %s" % all_databases) + # Execute the query and fetch the data - db_results_obj = sql_object.execute_and_fetch(self.db) + db_results_obj = sql_object.execute_and_fetch(self.query_level_db) + iprint("Query executed") + + if len(db_results_obj.results) == 0: + warnings.append(QWarning(None, "Warning - data is empty")) return QOutput( data = db_results_obj.results, metadata = QMetadata( - table_structures=table_structures, - output_column_name_list=db_results_obj.query_column_names, - data_loads=data_loads), + table_structures=self.loaded_table_structures_dict, + new_table_structures=new_table_structures, + output_column_name_list=db_results_obj.query_column_names), warnings = warnings, error = error) - - except EmptyDataException as e: - warnings.append(QWarning(e,"Warning - data is empty")) + except InvalidQueryException as e: + error = QError(e,str(e),118) except MissingHeaderException as e: error = QError(e,e.msg,117) except FileNotFoundException as e: error = QError(e,e.msg,30) - except sqlite3.OperationalError as e: - msg = str(e) + except SqliteOperationalErrorException as e: + xprint("Sqlite Operational error: %s" % e) + msg = str(e.original_error) error = QError(e,"query error: %s" % msg,1) if "no such column" in msg and effective_input_params.skip_header: warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names. Another issue might be that the file contains a BOM. Files that are encoded with UTF8 and contain a BOM can be read by specifying `-e utf-9-sig` in the command line. Support for non-UTF8 encoding will be provided in the future.')) @@ -1584,61 +2881,77 @@ def _execute(self,query_str,input_params=None,stdin_file=None,stdin_filename='-' error = QError(e,"Cannot decode data. Try to change the encoding by setting it using the -e parameter. Error:%s" % e,3) except BadHeaderException as e: error = QError(e,"Bad header row: %s" % e.msg,35) - except CannotUnzipStdInException as e: + except CannotUnzipDataStreamException as e: error = QError(e,"Cannot decompress standard input. Pipe the input through zcat in order to decompress.",36) except UniversalNewlinesExistException as e: error = QError(e,"Data contains universal newlines. Run q with -U to use universal newlines. Please note that q still doesn't support universal newlines for .gz files or for stdin. Route the data through a regular file to use -U.",103) - except UnprovidedStdInException as e: - error = QError(e,"Standard Input must be provided in order to use it as a table",61) + # deprecated, but shouldn't be used: error = QError(e,"Standard Input must be provided in order to use it as a table",61) except CouldNotConvertStringToNumericValueException as e: error = QError(e,"Could not convert string to a numeric value. Did you use `-w nonnumeric` with unquoted string values? Error: %s" % e.msg,58) except CouldNotParseInputException as e: error = QError(e,"Could not parse the input. Please make sure to set the proper -w input-wrapping parameter for your input, and that you use the proper input encoding (-e). Error: %s" % e.msg,59) except ColumnMaxLengthLimitExceededException as e: error = QError(e,e.msg,31) - except MissingSqliteBckModuleException as e: - error = QError(e,e.msg,79) + # deprecated, but shouldn't be used: error = QError(e,e.msg,79) + except ContentSignatureDiffersException as e: + error = QError(e,"%s vs %s: Content Signatures for table %s differ at %s (source value '%s' disk signature value '%s')" % + (e.original_filename,e.other_filename,e.filenames_str,e.key,e.source_value,e.signature_value),80) + except ContentSignatureDataDiffersException as e: + error = QError(e,e.msg,81) + except MaximumSourceFilesExceededException as e: + error = QError(e,e.msg,82) + except ContentSignatureNotFoundException as e: + error = QError(e,e.msg,83) + except NonExistentTableNameInQsql as e: + msg = "Table %s could not be found in qsql file %s . Existing table names: %s" % (e.table_name,e.qsql_filename,",".join(e.existing_table_names)) + error = QError(e,msg,84) + except NonExistentTableNameInSqlite as e: + msg = "Table %s could not be found in sqlite file %s . Existing table names: %s" % (e.table_name,e.qsql_filename,",".join(e.existing_table_names)) + error = QError(e,msg,85) + except TooManyTablesInQsqlException as e: + msg = "Could not autodetect table name in qsql file. Existing Tables %s" % ",".join(e.existing_table_names) + error = QError(e,msg,86) + except NoTableInQsqlExcption as e: + msg = "Could not autodetect table name in qsql file. File contains no record of a table" + error = QError(e,msg,97) + except TooManyTablesInSqliteException as e: + msg = "Could not autodetect table name in sqlite file %s . Existing tables: %s" % (e.qsql_filename,",".join(e.existing_table_names)) + error = QError(e,msg,87) + except NoTablesInSqliteException as e: + msg = "sqlite file %s has no tables" % e.sqlite_filename + error = QError(e,msg,88) + except TooManyAttachedDatabasesException as e: + msg = str(e) + error = QError(e,msg,89) + except UnknownFileTypeException as e: + msg = str(e) + error = QError(e,msg,95) except KeyboardInterrupt as e: warnings.append(QWarning(e,"Interrupted")) except Exception as e: + global DEBUG if DEBUG: - print(traceback.format_exc()) + xprint(traceback.format_exc()) error = QError(e,repr(e),199) - return QOutput(warnings = warnings,error = error , metadata=QMetadata(table_structures=table_structures,data_loads = data_loads)) + return QOutput(data=None,warnings = warnings,error = error , metadata=QMetadata(table_structures=self.loaded_table_structures_dict,new_table_structures=self.loaded_table_structures_dict,output_column_name_list=[])) - def execute(self,query_str,input_params=None,stdin_file=None,stdin_filename='-',save_db_to_disk_filename=None,save_db_to_disk_method=None): - return self._execute(query_str,input_params,stdin_file,stdin_filename,stop_after_analysis=False,save_db_to_disk_filename=save_db_to_disk_filename,save_db_to_disk_method=save_db_to_disk_method) + def execute(self,query_str,input_params=None,save_db_to_disk_filename=None): + r = self._execute(query_str,input_params,stop_after_analysis=False,save_db_to_disk_filename=save_db_to_disk_filename) + return r def unload(self): - - for filename,table_creator in six.iteritems(self.table_creators): + # TODO This would fail, since table structures are just value objects now. Will be fixed as part of making q a full python module + for qtable_name,table_creator in six.iteritems(self.loaded_table_structures_dict): try: table_creator.drop_table() except: # Support no-table select queries pass - self.table_creators = {} - - def _create_materialized_files(self,table_creator): - d = table_creator.materialized_file_dict - m = {} - for filename,mfs in six.iteritems(d): - m[filename] = QMaterializedFile(filename,mfs.is_stdin) - return m + self.loaded_table_structures_dict = OrderedDict() - def _create_table_structures_list(self): - table_structures = [] - for filename,table_creator in six.iteritems(self.table_creators): - column_names = table_creator.column_inferer.get_column_names() - column_types = [self.db.type_names[table_creator.column_inferer.get_column_dict()[k]].lower() for k in column_names] - materialized_files = self._create_materialized_files(table_creator) - table_structure = QTableStructure(table_creator.filenames_str,materialized_files,column_names,column_types) - table_structures.append(table_structure) - return table_structures - - def analyze(self,query_str,input_params=None,stdin_file=None,stdin_filename='-'): - q_output = self._execute(query_str,input_params,stdin_file,stdin_filename,stop_after_analysis=True) + def analyze(self,query_str,input_params=None,data_streams=None): + q_output = self._execute(query_str,input_params,data_streams=data_streams,stop_after_analysis=True) return q_output @@ -1722,10 +3035,15 @@ def print_analysis(self,f_out,f_err,results): if results.metadata.table_structures is None: return - for table_structure in results.metadata.table_structures: - print("Table for file: %s" % normalized_filename(table_structure.filenames_str), file=f_out) - for n,t in zip(table_structure.column_names,table_structure.column_types): - print(" `%s` - %s" % (n,t), file=f_out) + for qtable_name in results.metadata.table_structures: + table_structures = results.metadata.table_structures[qtable_name] + print("Table: %s" % qtable_name,file=f_out) + print(" Sources:",file=f_out) + dl = results.metadata.new_table_structures[qtable_name] + print(" source_type: %s source: %s" % (dl.source_type,dl.source),file=f_out) + print(" Fields:",file=f_out) + for n,t in zip(table_structures.column_names,table_structures.sqlite_column_types): + print(" `%s` - %s" % (n,t), file=f_out) def print_output(self,f_out,f_err,results): try: @@ -1781,9 +3099,6 @@ def _print_output(self,f_out,f_err,results): fmt_str = six.u("{}") if col is not None: - # Hack for python2 - The defaulting rendering of a float to string is losing precision. This hack works around it by using repr() - if six.PY2 and isinstance(col, float) and str(i+1) not in formatting_dict: - col = repr(col) xx = self.output_field_quoting_func(self.output_params.delimiter,col) row_str.append(fmt_str.format(xx)) else: @@ -1796,7 +3111,6 @@ def _print_output(self,f_out,f_err,results): print("Cannot encode data. Error:%s" % e, file=sys.stderr) sys.exit(3) except TypeError as e: - print(traceback.format_exc()) print("Error while formatting output: %s" % e, file=sys.stderr) sys.exit(4) except IOError as e: @@ -1815,162 +3129,301 @@ def _print_output(self,f_out,f_err,results): except IOError as e: pass -def run_standalone(): - p = configparser.ConfigParser() - p.read([os.path.expanduser('~/.qrc'), '.qrc']) - - def get_option_with_default(p, option_type, option, default): +def get_option_with_default(p, option_type, option, default): + try: if not p.has_option('options', option): return default + if p.get('options',option) == 'None': + return None if option_type == 'boolean': - return p.getboolean('options', option) + r = p.getboolean('options', option) + return r elif option_type == 'int': - return p.getint('options', option) + r = p.getint('options', option) + return r elif option_type == 'string': - return p.get('options', option) - elif option_type == 'escaped_string': - return p.get('options', option).decode('string-escape') + r = p.get('options', option) + return r else: - raise Exception("Unknown option type") + raise Exception("Unknown option type %s " % option_type) + except ValueError as e: + raise IncorrectDefaultValueException(option_type,option,p.get("options",option)) + +QRC_FILENAME_ENVVAR = 'QRC_FILENAME' + +def dump_default_values_as_qrc(parser,exclusions): + m = parser.get_default_values().__dict__ + print("[options]",file=sys.stdout) + for k in sorted(m.keys()): + if k not in exclusions: + print("%s=%s" % (k,m[k]),file=sys.stdout) - default_beautify = get_option_with_default(p, 'boolean', 'beautify', False) - default_gzipped = get_option_with_default(p, 'boolean', 'gzipped', False) - default_delimiter = get_option_with_default( - p, 'escaped_string', 'delimiter', None) - default_output_delimiter = get_option_with_default( - p, 'escaped_string', 'output_delimiter', None) - default_skip_header = get_option_with_default(p, 'int', 'skip_header', 0) - default_formatting = get_option_with_default(p, 'string', 'formatting', None) - default_encoding = get_option_with_default(p, 'string', 'encoding', 'UTF-8') - default_output_encoding = get_option_with_default(p, 'string', 'encoding', None) - default_query_encoding = get_option_with_default(p, 'string', 'query_encoding', locale.getpreferredencoding()) - default_output_header = get_option_with_default(p, 'string', 'output_header', False) +USAGE_TEXT = """ + q allows performing SQL-like statements on tabular text data. - parser = OptionParser(usage=""" - q allows performing SQL-like statements on tabular text data. + Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line. - Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line. + Basic usage is q "" where table names are just regular file names (Use - to read from standard input) + When the input contains a header row, use -H, and column names will be set according to the header row content. If there isn't a header row, then columns will automatically be named c1..cN. - Basic usage is q "" where table names are just regular file names (Use - to read from standard input) - When the input contains a header row, use -H, and column names will be set according to the header row content. If there isn't a header row, then columns will automatically be named c1..cN. + Column types are detected automatically. Use -A in order to see the column name/type analysis. - Column types are detected automatically. Use -A in order to see the column name/type analysis. + Delimiter can be set using the -d (or -t) option. Output delimiter can be set using -D - Delimiter can be set using the -d (or -t) option. Output delimiter can be set using -D + All sqlite3 SQL constructs are supported. - All sqlite3 SQL constructs are supported. + Examples: - Examples: + Example 1: ls -ltrd * | q "select c1,count(1) from - group by c1" + This example would print a count of each unique permission string in the current folder. + + Example 2: seq 1 1000 | q "select avg(c1),sum(c1) from -" + This example would provide the average and the sum of the numbers in the range 1 to 1000 + + Example 3: sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc" + This example will output the total size in MB per user+group in the /tmp subtree + + + See the help or http://harelba.github.io/q/ for more details. +""" + +def run_standalone(): + sqlite3.enable_callback_tracebacks(True) - Example 1: ls -ltrd * | q "select c1,count(1) from - group by c1" - This example would print a count of each unique permission string in the current folder. + p, qrc_filename = parse_qrc_file() - Example 2: seq 1 1000 | q "select avg(c1),sum(c1) from -" - This example would provide the average and the sum of the numbers in the range 1 to 1000 + args, options, parser = initialize_command_line_parser(p, qrc_filename) - Example 3: sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc" - This example will output the total size in MB per user+group in the /tmp subtree + dump_defaults_and_stop__if_needed(options, parser) + dump_version_and_stop__if_needed(options) + + STDOUT, default_input_params, q_output_printer, query_strs = parse_options(args, options) + + data_streams_dict = initialize_default_data_streams() + + q_engine = QTextAsData(default_input_params=default_input_params,data_streams_dict=data_streams_dict) + + execute_queries(STDOUT, options, q_engine, q_output_printer, query_strs) + + q_engine.done() + + sys.exit(0) + + +def dump_version_and_stop__if_needed(options): + if options.version: + print_credentials() + sys.exit(0) - See the help or https://github.com/harelba/q/ for more details. - """) - #----------------------------------------------- +def dump_defaults_and_stop__if_needed(options, parser): + if options.dump_defaults: + dump_default_values_as_qrc(parser, ['dump-defaults', 'version']) + sys.exit(0) + + +def execute_queries(STDOUT, options, q_engine, q_output_printer, query_strs): + for query_str in query_strs: + if options.analyze_only: + q_output = q_engine.analyze(query_str) + q_output_printer.print_analysis(STDOUT, sys.stderr, q_output) + else: + q_output = q_engine.execute(query_str, save_db_to_disk_filename=options.save_db_to_disk_filename) + q_output_printer.print_output(STDOUT, sys.stderr, q_output) + + if q_output.status == 'error': + sys.exit(q_output.error.errorcode) + + +def initialize_command_line_parser(p, qrc_filename): + try: + default_verbose = get_option_with_default(p, 'boolean', 'verbose', False) + default_save_db_to_disk = get_option_with_default(p, 'string', 'save_db_to_disk_filename', None) + default_caching_mode = get_option_with_default(p, 'string', 'caching_mode', 'none') + + default_skip_header = get_option_with_default(p, 'boolean', 'skip_header', False) + default_delimiter = get_option_with_default(p, 'string', 'delimiter', None) + default_pipe_delimited = get_option_with_default(p, 'boolean', 'pipe_delimited', False) + default_tab_delimited = get_option_with_default(p, 'boolean', 'tab_delimited', False) + default_encoding = get_option_with_default(p, 'string', 'encoding', 'UTF-8') + default_gzipped = get_option_with_default(p, 'boolean', 'gzipped', False) + default_analyze_only = get_option_with_default(p, 'boolean', 'analyze_only', False) + default_mode = get_option_with_default(p, 'string', 'mode', "relaxed") + default_column_count = get_option_with_default(p, 'string', 'column_count', None) + default_keep_leading_whitespace_in_values = get_option_with_default(p, 'boolean', + 'keep_leading_whitespace_in_values', False) + default_disable_double_double_quoting = get_option_with_default(p, 'boolean', 'disable_double_double_quoting', + True) + default_disable_escaped_double_quoting = get_option_with_default(p, 'boolean', 'disable_escaped_double_quoting', + True) + default_disable_column_type_detection = get_option_with_default(p, 'boolean', 'disable_column_type_detection', + False) + default_input_quoting_mode = get_option_with_default(p, 'string', 'input_quoting_mode', 'minimal') + default_max_column_length_limit = get_option_with_default(p, 'int', 'max_column_length_limit', 131072) + default_with_universal_newlines = get_option_with_default(p, 'boolean', 'with_universal_newlines', False) + + default_output_delimiter = get_option_with_default(p, 'string', 'output_delimiter', None) + default_pipe_delimited_output = get_option_with_default(p, 'boolean', 'pipe_delimited_output', False) + default_tab_delimited_output = get_option_with_default(p, 'boolean', 'tab_delimited_output', False) + default_output_header = get_option_with_default(p, 'string', 'output_header', False) + default_beautify = get_option_with_default(p, 'boolean', 'beautify', False) + default_formatting = get_option_with_default(p, 'string', 'formatting', None) + default_output_encoding = get_option_with_default(p, 'string', 'output_encoding', 'none') + default_output_quoting_mode = get_option_with_default(p, 'string', 'output_quoting_mode', 'minimal') + default_list_user_functions = get_option_with_default(p, 'boolean', 'list_user_functions', False) + default_overwrite_qsql = get_option_with_default(p, 'boolean', 'overwrite_qsql', False) + + default_query_filename = get_option_with_default(p, 'string', 'query_filename', None) + default_query_encoding = get_option_with_default(p, 'string', 'query_encoding', locale.getpreferredencoding()) + default_max_attached_sqlite_databases = get_option_with_default(p,'int','max_attached_sqlite_databases', 10) + except IncorrectDefaultValueException as e: + print("Incorrect value '%s' for option %s in .qrc file %s (option type is %s)" % ( + e.actual_value, e.option, qrc_filename, e.option_type)) + sys.exit(199) + parser = OptionParser(prog="q",usage=USAGE_TEXT) parser.add_option("-v", "--version", dest="version", default=False, action="store_true", help="Print version") - parser.add_option("-V", "--verbose", dest="verbose", default=False, action="store_true", + parser.add_option("-V", "--verbose", dest="verbose", default=default_verbose, action="store_true", help="Print debug info in case of problems") - parser.add_option("-S", "--save-db-to-disk", dest="save_db_to_disk_filename", default=None, + parser.add_option("-S", "--save-db-to-disk", dest="save_db_to_disk_filename", default=default_save_db_to_disk, help="Save database to an sqlite database file") - parser.add_option("", "--save-db-to-disk-method", dest="save_db_to_disk_method", default='standard', - help="Method to use to save db to disk. 'standard' does not require any deps, 'fast' currenty requires manually running `pip install sqlitebck` on your python installation. Once packing issues are solved, the fast method will be the default.") - #----------------------------------------------- - input_data_option_group = OptionGroup(parser,"Input Data Options") - input_data_option_group.add_option("-H", "--skip-header", dest="skip_header", default=default_skip_header, action="store_true", - help="Skip header row. This has been changed from earlier version - Only one header row is supported, and the header row is used for column naming") + parser.add_option("-C", "--caching-mode", dest="caching_mode", default=default_caching_mode, + help="Choose the autocaching mode (none/read/readwrite). Autocaches files to disk db so further queries will be faster. Caching is done to a side-file with the same name of the table, but with an added extension .qsql") + parser.add_option("", "--dump-defaults", dest="dump_defaults", default=False, action="store_true", + help="Dump all default values for parameters and exit. Can be used in order to make sure .qrc file content is being read properly.") + parser.add_option("", "--max-attached-sqlite-databases", dest="max_attached_sqlite_databases", default=default_max_attached_sqlite_databases,type="int", + help="Set the maximum number of concurrently-attached sqlite dbs. This is a compile time definition of sqlite. q's performance will slow down once this limit is reached for a query, since it will perform table copies in order to avoid that limit.") + # ----------------------------------------------- + input_data_option_group = OptionGroup(parser, "Input Data Options") + input_data_option_group.add_option("-H", "--skip-header", dest="skip_header", default=default_skip_header, + action="store_true", + help="Skip header row. This has been changed from earlier version - Only one header row is supported, and the header row is used for column naming") input_data_option_group.add_option("-d", "--delimiter", dest="delimiter", default=default_delimiter, - help="Field delimiter. If none specified, then space is used as the delimiter.") - input_data_option_group.add_option("-p", "--pipe-delimited", dest="pipe_delimited", default=False, action="store_true", - help="Same as -d '|'. Added for convenience and readability") - input_data_option_group.add_option("-t", "--tab-delimited", dest="tab_delimited", default=False, action="store_true", - help="Same as -d . Just a shorthand for handling standard tab delimited file You can use $'\\t' if you want (this is how Linux expects to provide tabs in the command line") + help="Field delimiter. If none specified, then space is used as the delimiter.") + input_data_option_group.add_option("-p", "--pipe-delimited", dest="pipe_delimited", default=default_pipe_delimited, + action="store_true", + help="Same as -d '|'. Added for convenience and readability") + input_data_option_group.add_option("-t", "--tab-delimited", dest="tab_delimited", default=default_tab_delimited, + action="store_true", + help="Same as -d . Just a shorthand for handling standard tab delimited file You can use $'\\t' if you want (this is how Linux expects to provide tabs in the command line") input_data_option_group.add_option("-e", "--encoding", dest="encoding", default=default_encoding, - help="Input file encoding. Defaults to UTF-8. set to none for not setting any encoding - faster, but at your own risk...") + help="Input file encoding. Defaults to UTF-8. set to none for not setting any encoding - faster, but at your own risk...") input_data_option_group.add_option("-z", "--gzipped", dest="gzipped", default=default_gzipped, action="store_true", - help="Data is gzipped. Useful for reading from stdin. For files, .gz means automatic gunzipping") - input_data_option_group.add_option("-A", "--analyze-only", dest="analyze_only", action='store_true', - help="Analyze sample input and provide information about data types") - input_data_option_group.add_option("-m", "--mode", dest="mode", default="relaxed", - help="Data parsing mode. fluffy, relaxed and strict. In strict mode, the -c column-count parameter must be supplied as well") - input_data_option_group.add_option("-c", "--column-count", dest="column_count", default=None, - help="Specific column count when using relaxed or strict mode") - input_data_option_group.add_option("-k", "--keep-leading-whitespace", dest="keep_leading_whitespace_in_values", default=False, action="store_true", - help="Keep leading whitespace in values. Default behavior strips leading whitespace off values, in order to provide out-of-the-box usability for simple use cases. If you need to preserve whitespace, use this flag.") - input_data_option_group.add_option("--disable-double-double-quoting", dest="disable_double_double_quoting", default=True, action="store_false", - help="Disable support for double double-quoting for escaping the double quote character. By default, you can use \"\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.") - input_data_option_group.add_option("--disable-escaped-double-quoting", dest="disable_escaped_double_quoting", default=True, action="store_false", - help="Disable support for escaped double-quoting for escaping the double quote character. By default, you can use \\\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.") - input_data_option_group.add_option("--as-text", dest="disable_column_type_detection", default=False, action="store_true", - help="Don't detect column types - All columns will be treated as text columns") - input_data_option_group.add_option("-w","--input-quoting-mode",dest="input_quoting_mode",default="minimal", - help="Input quoting mode. Possible values are all, minimal and none. Note the slightly misleading parameter name, and see the matching -W parameter for output quoting.") - input_data_option_group.add_option("-M","--max-column-length-limit",dest="max_column_length_limit",default=131072, - help="Sets the maximum column length.") - input_data_option_group.add_option("-U","--with-universal-newlines",dest="with_universal_newlines",default=False,action="store_true", - help="Expect universal newlines in the data. Limitation: -U works only with regular files for now, stdin or .gz files are not supported yet.") + help="Data is gzipped. Useful for reading from stdin. For files, .gz means automatic gunzipping") + input_data_option_group.add_option("-A", "--analyze-only", dest="analyze_only", default=default_analyze_only, + action='store_true', + help="Analyze sample input and provide information about data types") + input_data_option_group.add_option("-m", "--mode", dest="mode", default=default_mode, + help="Data parsing mode. fluffy, relaxed and strict. In strict mode, the -c column-count parameter must be supplied as well") + input_data_option_group.add_option("-c", "--column-count", dest="column_count", default=default_column_count, + help="Specific column count when using relaxed or strict mode") + input_data_option_group.add_option("-k", "--keep-leading-whitespace", dest="keep_leading_whitespace_in_values", + default=default_keep_leading_whitespace_in_values, action="store_true", + help="Keep leading whitespace in values. Default behavior strips leading whitespace off values, in order to provide out-of-the-box usability for simple use cases. If you need to preserve whitespace, use this flag.") + input_data_option_group.add_option("--disable-double-double-quoting", dest="disable_double_double_quoting", + default=default_disable_double_double_quoting, action="store_false", + help="Disable support for double double-quoting for escaping the double quote character. By default, you can use \"\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.") + input_data_option_group.add_option("--disable-escaped-double-quoting", dest="disable_escaped_double_quoting", + default=default_disable_escaped_double_quoting, action="store_false", + help="Disable support for escaped double-quoting for escaping the double quote character. By default, you can use \\\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.") + input_data_option_group.add_option("--as-text", dest="disable_column_type_detection", + default=default_disable_column_type_detection, action="store_true", + help="Don't detect column types - All columns will be treated as text columns") + input_data_option_group.add_option("-w", "--input-quoting-mode", dest="input_quoting_mode", + default=default_input_quoting_mode, + help="Input quoting mode. Possible values are all, minimal and none. Note the slightly misleading parameter name, and see the matching -W parameter for output quoting.") + input_data_option_group.add_option("-M", "--max-column-length-limit", dest="max_column_length_limit", + default=default_max_column_length_limit, + help="Sets the maximum column length.") + input_data_option_group.add_option("-U", "--with-universal-newlines", dest="with_universal_newlines", + default=default_with_universal_newlines, action="store_true", + help="Expect universal newlines in the data. Limitation: -U works only with regular files for now, stdin or .gz files are not supported yet.") parser.add_option_group(input_data_option_group) - #----------------------------------------------- - output_data_option_group = OptionGroup(parser,"Output Options") - output_data_option_group.add_option("-D", "--output-delimiter", dest="output_delimiter", default=default_output_delimiter, - help="Field delimiter for output. If none specified, then the -d delimiter is used if present, or space if no delimiter is specified") - output_data_option_group.add_option("-P", "--pipe-delimited-output", dest="pipe_delimited_output", default=False, action="store_true", - help="Same as -D '|'. Added for convenience and readability.") - output_data_option_group.add_option("-T", "--tab-delimited-output", dest="tab_delimited_output", default=False, action="store_true", - help="Same as -D . Just a shorthand for outputting tab delimited output. You can use -D $'\\t' if you want.") - output_data_option_group.add_option("-O", "--output-header", dest="output_header", default=default_output_header, action="store_true",help="Output header line. Output column-names are determined from the query itself. Use column aliases in order to set your column names in the query. For example, 'select name FirstName,value1/value2 MyCalculation from ...'. This can be used even if there was no header in the input.") - output_data_option_group.add_option("-b", "--beautify", dest="beautify", default=default_beautify, action="store_true", - help="Beautify output according to actual values. Might be slow...") + # ----------------------------------------------- + output_data_option_group = OptionGroup(parser, "Output Options") + output_data_option_group.add_option("-D", "--output-delimiter", dest="output_delimiter", + default=default_output_delimiter, + help="Field delimiter for output. If none specified, then the -d delimiter is used if present, or space if no delimiter is specified") + output_data_option_group.add_option("-P", "--pipe-delimited-output", dest="pipe_delimited_output", + default=default_pipe_delimited_output, action="store_true", + help="Same as -D '|'. Added for convenience and readability.") + output_data_option_group.add_option("-T", "--tab-delimited-output", dest="tab_delimited_output", + default=default_tab_delimited_output, action="store_true", + help="Same as -D . Just a shorthand for outputting tab delimited output. You can use -D $'\\t' if you want.") + output_data_option_group.add_option("-O", "--output-header", dest="output_header", default=default_output_header, + action="store_true", + help="Output header line. Output column-names are determined from the query itself. Use column aliases in order to set your column names in the query. For example, 'select name FirstName,value1/value2 MyCalculation from ...'. This can be used even if there was no header in the input.") + output_data_option_group.add_option("-b", "--beautify", dest="beautify", default=default_beautify, + action="store_true", + help="Beautify output according to actual values. Might be slow...") output_data_option_group.add_option("-f", "--formatting", dest="formatting", default=default_formatting, - help="Output-level formatting, in the format X=fmt,Y=fmt etc, where X,Y are output column numbers (e.g. 1 for first SELECT column etc.") - output_data_option_group.add_option("-E", "--output-encoding", dest="output_encoding", default=default_output_encoding, - help="Output encoding. Defaults to 'none', leading to selecting the system/terminal encoding") - output_data_option_group.add_option("-W","--output-quoting-mode",dest="output_quoting_mode",default="minimal", - help="Output quoting mode. Possible values are all, minimal, nonnumeric and none. Note the slightly misleading parameter name, and see the matching -w parameter for input quoting.") - output_data_option_group.add_option("-L","--list-user-functions",dest="list_user_functions",default=False,action="store_true", - help="List all user functions") + help="Output-level formatting, in the format X=fmt,Y=fmt etc, where X,Y are output column numbers (e.g. 1 for first SELECT column etc.") + output_data_option_group.add_option("-E", "--output-encoding", dest="output_encoding", + default=default_output_encoding, + help="Output encoding. Defaults to 'none', leading to selecting the system/terminal encoding") + output_data_option_group.add_option("-W", "--output-quoting-mode", dest="output_quoting_mode", + default=default_output_quoting_mode, + help="Output quoting mode. Possible values are all, minimal, nonnumeric and none. Note the slightly misleading parameter name, and see the matching -w parameter for input quoting.") + output_data_option_group.add_option("-L", "--list-user-functions", dest="list_user_functions", + default=default_list_user_functions, action="store_true", + help="List all user functions") + parser.add_option("", "--overwrite-qsql", dest="overwrite_qsql", default=default_overwrite_qsql, + help="When used, qsql files (both caches and store-to-db) will be overwritten if they already exist. Use with care.") parser.add_option_group(output_data_option_group) - #----------------------------------------------- - query_option_group = OptionGroup(parser,"Query Related Options") - query_option_group.add_option("-q", "--query-filename", dest="query_filename", default=None, - help="Read query from the provided filename instead of the command line, possibly using the provided query encoding (using -Q).") + # ----------------------------------------------- + query_option_group = OptionGroup(parser, "Query Related Options") + query_option_group.add_option("-q", "--query-filename", dest="query_filename", default=default_query_filename, + help="Read query from the provided filename instead of the command line, possibly using the provided query encoding (using -Q).") query_option_group.add_option("-Q", "--query-encoding", dest="query_encoding", default=default_query_encoding, - help="query text encoding. Experimental. Please send your feedback on this") + help="query text encoding. Experimental. Please send your feedback on this") parser.add_option_group(query_option_group) - #----------------------------------------------- - + # ----------------------------------------------- (options, args) = parser.parse_args() + return args, options, parser + + +def parse_qrc_file(): + p = configparser.ConfigParser() + if QRC_FILENAME_ENVVAR in os.environ: + qrc_filename = os.environ[QRC_FILENAME_ENVVAR] + if qrc_filename != 'None': + xprint("qrc filename is %s" % qrc_filename) + if os.path.exists(qrc_filename): + p.read([os.environ[QRC_FILENAME_ENVVAR]]) + else: + print('QRC_FILENAME env var exists, but cannot find qrc file at %s' % qrc_filename, file=sys.stderr) + sys.exit(244) + else: + pass # special handling of 'None' env var value for QRC_FILENAME. Allows to eliminate the default ~/.qrc reading + else: + qrc_filename = os.path.expanduser('~/.qrc') + p.read([qrc_filename, '.qrc']) + return p, qrc_filename - if options.version: - print_credentials() - sys.exit(0) -### +def initialize_default_data_streams(): + data_streams_dict = { + '-': DataStream('stdin', '-', sys.stdin) + } + return data_streams_dict + +def parse_options(args, options): if options.list_user_functions: print_user_functions() sys.exit(0) - if len(args) == 0 and options.query_filename is None: print_credentials() - print("Must provide at least one query in the command line, or through a file with the -q parameter", file=sys.stderr) + print("Must provide at least one query in the command line, or through a file with the -q parameter", + file=sys.stderr) sys.exit(1) - if options.query_filename is not None: if len(args) != 0: print("Can't provide both a query file and a query on the command line", file=sys.stderr) sys.exit(1) try: - f = open(options.query_filename,'rb') + f = open(options.query_filename, 'rb') query_strs = [f.read()] f.close() except: @@ -1981,25 +3434,23 @@ def get_option_with_default(p, option_type, option, default): query_strs = [x.encode(sys.stdin.encoding) for x in args] else: query_strs = args - if options.query_encoding is not None and options.query_encoding != 'none': try: for idx in range(len(query_strs)): query_strs[idx] = query_strs[idx].decode(options.query_encoding).strip() if len(query_strs[idx]) == 0: - print("Query cannot be empty (query number %s)" % (idx+1), file=sys.stderr) + print("Query cannot be empty (query number %s)" % (idx + 1), file=sys.stderr) sys.exit(1) except Exception as e: - print("Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding), file=sys.stderr) + print("Could not decode query number %s using the provided query encoding (%s)" % ( + idx + 1, options.query_encoding), file=sys.stderr) sys.exit(3) -### - - if options.mode not in ['fluffy', 'relaxed', 'strict']: - print("Parsing mode can be one of fluffy, relaxed or strict", file=sys.stderr) + ### + if options.mode not in ['relaxed', 'strict']: + print("Parsing mode can either be relaxed or strict", file=sys.stderr) sys.exit(13) - output_encoding = get_stdout_encoding(options.output_encoding) try: if six.PY3: @@ -2009,35 +3460,29 @@ def get_option_with_default(p, option_type, option, default): except: print("Could not create output stream using output encoding %s" % (output_encoding), file=sys.stderr) sys.exit(200) - # If the user flagged for a tab-delimited file then set the delimiter to tab if options.tab_delimited: if options.delimiter is not None and options.delimiter != '\t': - print("Warning: -t parameter overrides -d parameter (%s)" % options.delimiter,file=sys.stderr) + print("Warning: -t parameter overrides -d parameter (%s)" % options.delimiter, file=sys.stderr) options.delimiter = '\t' - # If the user flagged for a pipe-delimited file then set the delimiter to pipe if options.pipe_delimited: if options.delimiter is not None and options.delimiter != '|': - print("Warning: -p parameter overrides -d parameter (%s)" % options.delimiter,file=sys.stderr) + print("Warning: -p parameter overrides -d parameter (%s)" % options.delimiter, file=sys.stderr) options.delimiter = '|' - if options.delimiter is None: options.delimiter = ' ' elif len(options.delimiter) != 1: print("Delimiter must be one character only", file=sys.stderr) sys.exit(5) - if options.tab_delimited_output: if options.output_delimiter is not None and options.output_delimiter != '\t': - print("Warning: -T parameter overrides -D parameter (%s)" % options.output_delimiter,file=sys.stderr) + print("Warning: -T parameter overrides -D parameter (%s)" % options.output_delimiter, file=sys.stderr) options.output_delimiter = '\t' - if options.pipe_delimited_output: if options.output_delimiter is not None and options.output_delimiter != '|': - print("Warning: -P parameter overrides -D parameter (%s)" % options.output_delimiter,file=sys.stderr) + print("Warning: -P parameter overrides -D parameter (%s)" % options.output_delimiter, file=sys.stderr) options.output_delimiter = '|' - if options.output_delimiter: # If output delimiter is specified, then we use it options.output_delimiter = options.output_delimiter @@ -2051,37 +3496,41 @@ def get_option_with_default(p, option_type, option, default): # if no input delimiter is specified, then we use space as the default # (since no input delimiter means any whitespace) options.output_delimiter = " " - try: max_column_length_limit = int(options.max_column_length_limit) - if max_column_length_limit < 1: - raise Exception() except: - print("Max column length limit must be a positive integer (%s)" % max_column_length_limit, file=sys.stderr) + print("Max column length limit must be an integer larger than 2 (%s)" % options.max_column_length_limit, + file=sys.stderr) + sys.exit(31) + if max_column_length_limit < 3: + print("Maximum column length must be larger than 2",file=sys.stderr) sys.exit(31) + csv.field_size_limit(max_column_length_limit) + xprint("Max column length limit is %s" % options.max_column_length_limit) if options.input_quoting_mode not in list(QTextAsData.input_quoting_modes.keys()): - print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(sorted(QTextAsData.input_quoting_modes.keys())),options.input_quoting_mode), file=sys.stderr) + print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % ( + ",".join(sorted(QTextAsData.input_quoting_modes.keys())), options.input_quoting_mode), file=sys.stderr) sys.exit(55) - if options.output_quoting_mode not in list(QOutputPrinter.output_quoting_modes.keys()): - print("Output quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QOutputPrinter.output_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr) + print("Output quoting mode can only be one of %s. It cannot be set to '%s'" % ( + ",".join(QOutputPrinter.output_quoting_modes.keys()), options.input_quoting_mode), file=sys.stderr) sys.exit(56) - if options.column_count is not None: expected_column_count = int(options.column_count) + if expected_column_count < 1 or expected_column_count > int(options.max_column_length_limit): + print("Column count must be between 1 and %s" % int(options.max_column_length_limit),file=sys.stderr) + sys.exit(90) else: # infer automatically expected_column_count = None - if options.encoding != 'none': try: codecs.lookup(options.encoding) except LookupError: print("Encoding %s could not be found" % options.encoding, file=sys.stderr) sys.exit(10) - if options.save_db_to_disk_filename is not None: if options.analyze_only: print("Cannot save database to disk when running with -A (analyze-only) option.", file=sys.stderr) @@ -2091,26 +3540,33 @@ def get_option_with_default(p, option_type, option, default): if os.path.exists(options.save_db_to_disk_filename): print("Disk database file %s already exists." % options.save_db_to_disk_filename, file=sys.stderr) sys.exit(77) + # sys.exit(78) Deprecated, but shouldn't be reused + if options.caching_mode not in ['none', 'read', 'readwrite']: + print("caching mode must be none,read or readwrite",file=sys.stderr) + sys.exit(85) + read_caching = options.caching_mode in ['read', 'readwrite'] + write_caching = options.caching_mode in ['readwrite'] - if options.save_db_to_disk_method is not None: - if options.save_db_to_disk_method not in ['standard','fast']: - print("save-db-to-disk method should be either standard or fast (%s)" % options.save_db_to_disk_method, file=sys.stderr) - sys.exit(78) + if options.max_attached_sqlite_databases <= 3: + print("Max attached sqlite databases must be larger than 3") + sys.exit(99) default_input_params = QInputParams(skip_header=options.skip_header, - delimiter=options.delimiter, - input_encoding=options.encoding, - gzipped_input=options.gzipped, - with_universal_newlines=options.with_universal_newlines, - parsing_mode=options.mode, - expected_column_count=expected_column_count, - keep_leading_whitespace_in_values=options.keep_leading_whitespace_in_values, - disable_double_double_quoting=options.disable_double_double_quoting, - disable_escaped_double_quoting=options.disable_escaped_double_quoting, - input_quoting_mode=options.input_quoting_mode, - disable_column_type_detection=options.disable_column_type_detection, - max_column_length_limit=max_column_length_limit) - q_engine = QTextAsData(default_input_params=default_input_params) + delimiter=options.delimiter, + input_encoding=options.encoding, + gzipped_input=options.gzipped, + with_universal_newlines=options.with_universal_newlines, + parsing_mode=options.mode, + expected_column_count=expected_column_count, + keep_leading_whitespace_in_values=options.keep_leading_whitespace_in_values, + disable_double_double_quoting=options.disable_double_double_quoting, + disable_escaped_double_quoting=options.disable_escaped_double_quoting, + input_quoting_mode=options.input_quoting_mode, + disable_column_type_detection=options.disable_column_type_detection, + max_column_length_limit=max_column_length_limit, + read_caching=read_caching, + write_caching=write_caching, + max_attached_sqlite_databases=options.max_attached_sqlite_databases) output_params = QOutputParams( delimiter=options.output_delimiter, @@ -2119,22 +3575,9 @@ def get_option_with_default(p, option_type, option, default): formatting=options.formatting, output_header=options.output_header, encoding=output_encoding) - q_output_printer = QOutputPrinter(output_params,show_tracebacks=options.verbose) - - for query_str in query_strs: - if options.analyze_only: - q_output = q_engine.analyze(query_str,stdin_file=sys.stdin) - q_output_printer.print_analysis(STDOUT,sys.stderr,q_output) - else: - q_output = q_engine.execute(query_str,stdin_file=sys.stdin,save_db_to_disk_filename=options.save_db_to_disk_filename,save_db_to_disk_method=options.save_db_to_disk_method) - q_output_printer.print_output(STDOUT,sys.stderr,q_output) - - if q_output.status == 'error': - sys.exit(q_output.error.errorcode) - - q_engine.unload() + q_output_printer = QOutputPrinter(output_params, show_tracebacks=DEBUG) - sys.exit(0) + return STDOUT, default_input_params, q_output_printer, query_strs if __name__ == '__main__': diff --git a/bin/qtextasdata.py b/bin/qtextasdata.py deleted file mode 120000 index ea0c8a85..00000000 --- a/bin/qtextasdata.py +++ /dev/null @@ -1 +0,0 @@ -q \ No newline at end of file diff --git a/build-deb-builder-container b/build-deb-builder-container deleted file mode 100755 index abd021a6..00000000 --- a/build-deb-builder-container +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -if [ $# -ne 1 ]; -then - echo "Usage: $(basename $0) " - exit 1 -fi -VERSION_TAG="$1" - -docker build -f dist/deb-builder-Dockerfile -t q-text-as-data-deb-builder:${VERSION_TAG} . diff --git a/build-rpm-builder-container b/build-rpm-builder-container deleted file mode 100755 index 4788f19e..00000000 --- a/build-rpm-builder-container +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -if [ $# -ne 1 ]; -then - echo "Usage: $(basename $0) " - exit 1 -fi -VERSION_TAG="$1" - -docker build -f dist/rpm-builder-Dockerfile -t q-text-as-data-rpm-builder:${VERSION_TAG} . diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..2a7272c8 --- /dev/null +++ b/conftest.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python + +# Required so pytest can find files properly + + diff --git a/create-windows-setup-instructions b/create-windows-setup-instructions deleted file mode 100644 index 9ff5fbef..00000000 --- a/create-windows-setup-instructions +++ /dev/null @@ -1,76 +0,0 @@ - -# Alpha version - process is working, instructions not fully tested for minor details yet. - -Instructions for creating a windows package for q: - -The installation is based on a wine docker container. - -mkdir -p dist/windows - -pushd dist/windows - -fetch all files from https://github.com/harelba/packages-for-q/tree/master/artifactory-for-packaging into that folder. - -tar xvzf PyInstaller-2.1.tar.gz - -Fix pyinstaller to work around a bug by running the following command (use gsed for osx or sed for linux): -gsed -i '1587s/^.*$/ if tpl[2] in ["BINARY", "DATA"]:/' ./PyInstaller-2.1/PyInstaller/build.py - -popd - - -d=`pwd` -cid1=`docker run -d -v ${d}:/q -e VNC_PASSWORD=newPW -p 5900:5900 suchja/x11server` -cid2=`docker run -d --rm -i --link ${cid1}:xserver --volumes-from ${cid1} suchja/wine:latest /bin/bash` - -sleep 1 - -function kill_container { - tmp=`docker kill ${cid1} ${cid2}` -} -trap kill_container EXIT - -docker exec -it ${cid2} /bin/bash - - -inside the docker container prompt: - - export DISPLAY=xserver:0 - - wine wineboot --init - - cd ~/.wine/dosdevices/ - - ln -s /q "q:" - - wine msiexec /i q:\\dist\\windows\\python-2.7.13.msi /q - - wine q:\\dist\\windows\\pywin32-219.win32-py2.7.exe - - mkdir ~/.wine/drive_c/q-build-environment - - cp -r /q/dist/windows/PyInstaller-2.1 ~/.wine/drive_c/q-build-environment/ - - cd /q/dist - - wine q:\\dist\\windows\\nsis-2.46-setup.exe - install to c:\q-build-environment\nsis - - - wine c:\\python27\\python.exe c:\\q-build-environment\\PyInstaller-2.1\\pyinstaller.py -F --distpath=win_output --workpath=win_build q:\\bin\\q - - ### Don't forget to change the version in the command below: - - wine c:\\q-build-environment\\nsis\\makensis.exe -DVERSION=1.6.2.0 q:\\dist\\q-TextAsData-with-path.nsi - -run the installation file and check that the install works properly: - - wine setup.exe - -move the installation to the packages folder: - - mv /q/dist/setup.exe /q/packages/setup-q-.exe - -exit from the docker container - - - diff --git a/dist/AddToPath.nsh b/dist/AddToPath.nsh deleted file mode 100644 index b961a1f4..00000000 --- a/dist/AddToPath.nsh +++ /dev/null @@ -1,440 +0,0 @@ -!ifndef _AddToPath_nsh -!define _AddToPath_nsh - -!verbose 3 -!include "WinMessages.NSH" -!verbose 4 - -!ifndef WriteEnvStr_RegKey - !ifdef ALL_USERS - !define WriteEnvStr_RegKey \ - 'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"' - !else - !define WriteEnvStr_RegKey 'HKCU "Environment"' - !endif -!endif - -; AddToPath - Adds the given dir to the search path. -; Input - head of the stack -; Note - Win9x systems requires reboot - -Function AddToPath - Exch $0 - Push $1 - Push $2 - Push $3 - - # don't add if the path doesn't exist - IfFileExists "$0\*.*" "" AddToPath_done - - ReadEnvStr $1 PATH - Push "$1;" - Push "$0;" - Call StrStr - Pop $2 - StrCmp $2 "" "" AddToPath_done - Push "$1;" - Push "$0\;" - Call StrStr - Pop $2 - StrCmp $2 "" "" AddToPath_done - GetFullPathName /SHORT $3 $0 - Push "$1;" - Push "$3;" - Call StrStr - Pop $2 - StrCmp $2 "" "" AddToPath_done - Push "$1;" - Push "$3\;" - Call StrStr - Pop $2 - StrCmp $2 "" "" AddToPath_done - - Call IsNT - Pop $1 - StrCmp $1 1 AddToPath_NT - ; Not on NT - StrCpy $1 $WINDIR 2 - FileOpen $1 "$1\autoexec.bat" a - FileSeek $1 -1 END - FileReadByte $1 $2 - IntCmp $2 26 0 +2 +2 # DOS EOF - FileSeek $1 -1 END # write over EOF - FileWrite $1 "$\r$\nSET PATH=%PATH%;$3$\r$\n" - FileClose $1 - SetRebootFlag true - Goto AddToPath_done - - AddToPath_NT: - ReadRegStr $1 ${WriteEnvStr_RegKey} "PATH" - StrCmp $1 "" AddToPath_NTdoIt - Push $1 - Call Trim - Pop $1 - StrCpy $0 "$1;$0" - AddToPath_NTdoIt: - WriteRegExpandStr ${WriteEnvStr_RegKey} "PATH" $0 - SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 - - AddToPath_done: - Pop $3 - Pop $2 - Pop $1 - Pop $0 -FunctionEnd - -; RemoveFromPath - Remove a given dir from the path -; Input: head of the stack - -Function un.RemoveFromPath - Exch $0 - Push $1 - Push $2 - Push $3 - Push $4 - Push $5 - Push $6 - - IntFmt $6 "%c" 26 # DOS EOF - - Call un.IsNT - Pop $1 - StrCmp $1 1 unRemoveFromPath_NT - ; Not on NT - StrCpy $1 $WINDIR 2 - FileOpen $1 "$1\autoexec.bat" r - GetTempFileName $4 - FileOpen $2 $4 w - GetFullPathName /SHORT $0 $0 - StrCpy $0 "SET PATH=%PATH%;$0" - Goto unRemoveFromPath_dosLoop - - unRemoveFromPath_dosLoop: - FileRead $1 $3 - StrCpy $5 $3 1 -1 # read last char - StrCmp $5 $6 0 +2 # if DOS EOF - StrCpy $3 $3 -1 # remove DOS EOF so we can compare - StrCmp $3 "$0$\r$\n" unRemoveFromPath_dosLoopRemoveLine - StrCmp $3 "$0$\n" unRemoveFromPath_dosLoopRemoveLine - StrCmp $3 "$0" unRemoveFromPath_dosLoopRemoveLine - StrCmp $3 "" unRemoveFromPath_dosLoopEnd - FileWrite $2 $3 - Goto unRemoveFromPath_dosLoop - unRemoveFromPath_dosLoopRemoveLine: - SetRebootFlag true - Goto unRemoveFromPath_dosLoop - - unRemoveFromPath_dosLoopEnd: - FileClose $2 - FileClose $1 - StrCpy $1 $WINDIR 2 - Delete "$1\autoexec.bat" - CopyFiles /SILENT $4 "$1\autoexec.bat" - Delete $4 - Goto unRemoveFromPath_done - - unRemoveFromPath_NT: - ReadRegStr $1 ${WriteEnvStr_RegKey} "PATH" - StrCpy $5 $1 1 -1 # copy last char - StrCmp $5 ";" +2 # if last char != ; - StrCpy $1 "$1;" # append ; - Push $1 - Push "$0;" - Call un.StrStr ; Find `$0;` in $1 - Pop $2 ; pos of our dir - StrCmp $2 "" unRemoveFromPath_done - ; else, it is in path - # $0 - path to add - # $1 - path var - StrLen $3 "$0;" - StrLen $4 $2 - StrCpy $5 $1 -$4 # $5 is now the part before the path to remove - StrCpy $6 $2 "" $3 # $6 is now the part after the path to remove - StrCpy $3 $5$6 - - StrCpy $5 $3 1 -1 # copy last char - StrCmp $5 ";" 0 +2 # if last char == ; - StrCpy $3 $3 -1 # remove last char - - WriteRegExpandStr ${WriteEnvStr_RegKey} "PATH" $3 - SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 - - unRemoveFromPath_done: - Pop $6 - Pop $5 - Pop $4 - Pop $3 - Pop $2 - Pop $1 - Pop $0 -FunctionEnd - - - -; AddToEnvVar - Adds the given value to the given environment var -; Input - head of the stack $0 environement variable $1=value to add -; Note - Win9x systems requires reboot - -Function AddToEnvVar - - Exch $1 ; $1 has environment variable value - Exch - Exch $0 ; $0 has environment variable name - - DetailPrint "Adding $1 to $0" - Push $2 - Push $3 - Push $4 - - - ReadEnvStr $2 $0 - Push "$2;" - Push "$1;" - Call StrStr - Pop $3 - StrCmp $3 "" "" AddToEnvVar_done - - Push "$2;" - Push "$1\;" - Call StrStr - Pop $3 - StrCmp $3 "" "" AddToEnvVar_done - - - Call IsNT - Pop $2 - StrCmp $2 1 AddToEnvVar_NT - ; Not on NT - StrCpy $2 $WINDIR 2 - FileOpen $2 "$2\autoexec.bat" a - FileSeek $2 -1 END - FileReadByte $2 $3 - IntCmp $3 26 0 +2 +2 # DOS EOF - FileSeek $2 -1 END # write over EOF - FileWrite $2 "$\r$\nSET $0=%$0%;$4$\r$\n" - FileClose $2 - SetRebootFlag true - Goto AddToEnvVar_done - - AddToEnvVar_NT: - ReadRegStr $2 ${WriteEnvStr_RegKey} $0 - StrCpy $3 $2 1 -1 # copy last char - StrCmp $3 ";" 0 +2 # if last char == ; - StrCpy $2 $2 -1 # remove last char - StrCmp $2 "" AddToEnvVar_NTdoIt - StrCpy $1 "$2;$1" - AddToEnvVar_NTdoIt: - WriteRegExpandStr ${WriteEnvStr_RegKey} $0 $1 - SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 - - AddToEnvVar_done: - Pop $4 - Pop $3 - Pop $2 - Pop $0 - Pop $1 - -FunctionEnd - -; RemoveFromEnvVar - Remove a given value from a environment var -; Input: head of the stack - -Function un.RemoveFromEnvVar - - Exch $1 ; $1 has environment variable value - Exch - Exch $0 ; $0 has environment variable name - - DetailPrint "Removing $1 from $0" - Push $2 - Push $3 - Push $4 - Push $5 - Push $6 - Push $7 - - IntFmt $7 "%c" 26 # DOS EOF - - Call un.IsNT - Pop $2 - StrCmp $2 1 unRemoveFromEnvVar_NT - ; Not on NT - StrCpy $2 $WINDIR 2 - FileOpen $2 "$2\autoexec.bat" r - GetTempFileName $5 - FileOpen $3 $5 w - GetFullPathName /SHORT $1 $1 - StrCpy $1 "SET $0=%$0%;$1" - Goto unRemoveFromEnvVar_dosLoop - - unRemoveFromEnvVar_dosLoop: - FileRead $2 $4 - StrCpy $6 $4 1 -1 # read last char - StrCmp $6 $7 0 +2 # if DOS EOF - StrCpy $4 $4 -1 # remove DOS EOF so we can compare - StrCmp $4 "$1$\r$\n" unRemoveFromEnvVar_dosLoopRemoveLine - StrCmp $4 "$1$\n" unRemoveFromEnvVar_dosLoopRemoveLine - StrCmp $4 "$1" unRemoveFromEnvVar_dosLoopRemoveLine - StrCmp $4 "" unRemoveFromEnvVar_dosLoopEnd - FileWrite $3 $4 - Goto unRemoveFromEnvVar_dosLoop - unRemoveFromEnvVar_dosLoopRemoveLine: - SetRebootFlag true - Goto unRemoveFromEnvVar_dosLoop - - unRemoveFromEnvVar_dosLoopEnd: - FileClose $3 - FileClose $2 - StrCpy $2 $WINDIR 2 - Delete "$2\autoexec.bat" - CopyFiles /SILENT $5 "$2\autoexec.bat" - Delete $5 - Goto unRemoveFromEnvVar_done - - unRemoveFromEnvVar_NT: - ReadRegStr $2 ${WriteEnvStr_RegKey} $0 - StrCpy $6 $2 1 -1 # copy last char - StrCmp $6 ";" +2 # if last char != ; - StrCpy $2 "$2;" # append ; - Push $2 - Push "$1;" - Call un.StrStr ; Find `$1;` in $2 - Pop $3 ; pos of our dir - StrCmp $3 "" unRemoveFromEnvVar_done - ; else, it is in path - # $1 - path to add - # $2 - path var - StrLen $4 "$1;" - StrLen $5 $3 - StrCpy $6 $2 -$5 # $6 is now the part before the path to remove - StrCpy $7 $3 "" $4 # $7 is now the part after the path to remove - StrCpy $4 $6$7 - - StrCpy $6 $4 1 -1 # copy last char - StrCmp $6 ";" 0 +2 # if last char == ; - StrCpy $4 $4 -1 # remove last char - - WriteRegExpandStr ${WriteEnvStr_RegKey} $0 $4 - - ; delete reg value if null - StrCmp $4 "" 0 +2 # if null delete reg - DeleteRegValue ${WriteEnvStr_RegKey} $0 - - SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 - - unRemoveFromEnvVar_done: - Pop $7 - Pop $6 - Pop $5 - Pop $4 - Pop $3 - Pop $2 - Pop $1 - Pop $0 -FunctionEnd - - - - -!ifndef IsNT_KiCHiK -!define IsNT_KiCHiK - -########################################### -# Utility Functions # -########################################### - -; IsNT -; no input -; output, top of the stack = 1 if NT or 0 if not -; -; Usage: -; Call IsNT -; Pop $R0 -; ($R0 at this point is 1 or 0) - -!macro IsNT un -Function ${un}IsNT - Push $0 - ReadRegStr $0 HKLM "SOFTWARE\Microsoft\Windows NT\CurrentVersion" CurrentVersion - StrCmp $0 "" 0 IsNT_yes - ; we are not NT. - Pop $0 - Push 0 - Return - - IsNT_yes: - ; NT!!! - Pop $0 - Push 1 -FunctionEnd -!macroend -!insertmacro IsNT "" -!insertmacro IsNT "un." - -!endif ; IsNT_KiCHiK - -; StrStr -; input, top of stack = string to search for -; top of stack-1 = string to search in -; output, top of stack (replaces with the portion of the string remaining) -; modifies no other variables. -; -; Usage: -; Push "this is a long ass string" -; Push "ass" -; Call StrStr -; Pop $R0 -; ($R0 at this point is "ass string") - -!macro StrStr un -Function ${un}StrStr -Exch $R1 ; st=haystack,old$R1, $R1=needle - Exch ; st=old$R1,haystack - Exch $R2 ; st=old$R1,old$R2, $R2=haystack - Push $R3 - Push $R4 - Push $R5 - StrLen $R3 $R1 - StrCpy $R4 0 - ; $R1=needle - ; $R2=haystack - ; $R3=len(needle) - ; $R4=cnt - ; $R5=tmp - loop: - StrCpy $R5 $R2 $R3 $R4 - StrCmp $R5 $R1 done - StrCmp $R5 "" done - IntOp $R4 $R4 + 1 - Goto loop -done: - StrCpy $R1 $R2 "" $R4 - Pop $R5 - Pop $R4 - Pop $R3 - Pop $R2 - Exch $R1 -FunctionEnd -!macroend -!insertmacro StrStr "" -!insertmacro StrStr "un." - -Function Trim ; Added by Pelaca - Exch $R1 - Push $R2 -Loop: - StrCpy $R2 "$R1" 1 -1 - StrCmp "$R2" " " RTrim - StrCmp "$R2" "$\n" RTrim - StrCmp "$R2" "$\r" RTrim - StrCmp "$R2" ";" RTrim - GoTo Done -RTrim: - StrCpy $R1 "$R1" -1 - Goto Loop -Done: - Pop $R2 - Exch $R1 -FunctionEnd - -!endif ; _AddToPath_nsh diff --git a/dist/create-deb b/dist/create-deb deleted file mode 100755 index 0017ff35..00000000 --- a/dist/create-deb +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -if [ $# -ne 1 ]; -then - echo 'create-deb ' - exit 1 -fi - -command -v alien &>/dev/null || { echo >&2 "alien needs to be installed."; exit 1; } - -alien -d -k $1 diff --git a/dist/create-rpm b/dist/create-rpm deleted file mode 100755 index 697f1122..00000000 --- a/dist/create-rpm +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash -x - -# -# Commit tag and Version number should be provided as input in the command line -# -# - -set -o pipefail - -if [ $# -ne 2 ]; -then - echo 'create-rpm ' - exit 1 -fi - -command -v ronn &>/dev/null || { echo >&2 "ronn needs to be installed."; exit 1; } -command -v rpmbuild &>/dev/null || { echo >&2 "rpmbuild needs to be installed."; exit 1; } - -set -e - -base_folder=`readlink -m $(dirname $0)` - -pushd ${base_folder} >/dev/null - -rpm_build_area=${base_folder}/rpm_build_area -rm -rf ${rpm_build_area:=/non-existent-dir} -mkdir -p ${rpm_build_area}/{SOURCES,SPECS,BUILD,RPMS,SRPMS,BUILDROOT} - -echo RPM build area is in ${rpm_build_area} - -VERSION=$1 -BASED_ON_TAG=$2 - -REAL_PACKAGE_NAME=q -RPM_PACKAGE_NAME=q-text-as-data - -FULL_NAME_FOLDER=${RPM_PACKAGE_NAME}-${VERSION} - -if [ ! -e ${RPM_PACKAGE_NAME}.spec.template ]; -then - echo "spec template does not exist. can't continue" - exit 1 -fi - -curl -f -o ${rpm_build_area}/SOURCES/q.tar.gz -L -R "https://github.com/harelba/q/tarball/$BASED_ON_TAG" -mkdir -p ${rpm_build_area}/SOURCES -pushd ${rpm_build_area}/SOURCES >/dev/null -tar xvzf ./q.tar.gz --strip-components=1 -rm -vf ./q.tar.gz - -mkdir ${rpm_build_area}/packages -cp /q/packages/q-x86_64-Linux ${rpm_build_area}/packages/q-x86_64-Linux - -# Expecting the binaries to exist in /packages/ - -popd >/dev/null -find ${rpm_build_area}/ -ls - -cat ${RPM_PACKAGE_NAME}.spec.template | sed "s/VERSION_PLACEHOLDER/$VERSION/g" > ${rpm_build_area}/SPECS/${RPM_PACKAGE_NAME}.spec - -rpmbuild -v --define "_topdir ${rpm_build_area}" -ba ${rpm_build_area}/SPECS/${RPM_PACKAGE_NAME}.spec - -popd >/dev/null diff --git a/dist/deb-builder-Dockerfile b/dist/deb-builder-Dockerfile deleted file mode 100644 index 7ff7b08f..00000000 --- a/dist/deb-builder-Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ - -FROM ubuntu:12.04 - -RUN apt-get update && apt-get install -y alien - -ENTRYPOINT "/bin/bash" - - diff --git a/dist/fpm-config b/dist/fpm-config new file mode 100644 index 00000000..12adbf73 --- /dev/null +++ b/dist/fpm-config @@ -0,0 +1,7 @@ +-s dir +--name q-text-as-data +--license GPLv3 +--architecture x86_64 +--description "q allows to perform SQL-like statements on tabular text data." +--url https://github.com/harelba/q +--maintainer "Harel Ben-Attia " diff --git a/dist/q-TextAsData-with-path.nsi b/dist/q-TextAsData-with-path.nsi deleted file mode 100644 index f2bec1e4..00000000 --- a/dist/q-TextAsData-with-path.nsi +++ /dev/null @@ -1,182 +0,0 @@ -############################################################################################ -# NSIS Installation Script created by NSIS Quick Setup Script Generator v1.09.18 -# Entirely Edited with NullSoft Scriptable Installation System -# by Vlasis K. Barkas aka Red Wine red_wine@freemail.gr Sep 2006 -############################################################################################ - -!define APP_NAME "q-TextAsData" -!define COMP_NAME "harelba" -!define WEB_SITE "http://harelba.github.io/q/" -# REQUIRED TO BE DEFINED EXTERNALLY !define VERSION "1.5.0.0" -!define COPYRIGHT "Harel Ben-Attia @ 2012-2014" -!define DESCRIPTION "Application" -!define LICENSE_TXT "..\doc\LICENSE" -!define INSTALLER_NAME "setup.exe" -!define MAIN_APP_EXE "q.exe" -!define INSTALL_TYPE "SetShellVarContext all" -!define REG_ROOT "HKLM" -!define REG_APP_PATH "Software\Microsoft\Windows\CurrentVersion\App Paths\${MAIN_APP_EXE}" -!define UNINSTALL_PATH "Software\Microsoft\Windows\CurrentVersion\Uninstall\${APP_NAME}" - -!ifndef VERSION - Abort ; VERSION has to be defined externally -!endif -###################################################################### - -VIProductVersion "${VERSION}" -VIAddVersionKey "ProductName" "${APP_NAME}" -VIAddVersionKey "CompanyName" "${COMP_NAME}" -VIAddVersionKey "LegalCopyright" "${COPYRIGHT}" -VIAddVersionKey "FileDescription" "${DESCRIPTION}" -VIAddVersionKey "FileVersion" "${VERSION}" - -###################################################################### - -SetCompressor ZLIB -Name "${APP_NAME}" -Caption "${APP_NAME}" -OutFile "${INSTALLER_NAME}" -BrandingText "${APP_NAME}" -XPStyle on -InstallDirRegKey "${REG_ROOT}" "${REG_APP_PATH}" "" -InstallDir "$PROGRAMFILES\q-TextAsData" - -###################################################################### - -!include "AddToPath.nsh" - - -!include "MUI.nsh" - -!define MUI_ABORTWARNING -!define MUI_UNABORTWARNING - -!insertmacro MUI_PAGE_WELCOME - -!ifdef LICENSE_TXT -!insertmacro MUI_PAGE_LICENSE "${LICENSE_TXT}" -!endif - -!ifdef REG_START_MENU -!define MUI_STARTMENUPAGE_NODISABLE -!define MUI_STARTMENUPAGE_DEFAULTFOLDER "q-TextAsData" -!define MUI_STARTMENUPAGE_REGISTRY_ROOT "${REG_ROOT}" -!define MUI_STARTMENUPAGE_REGISTRY_KEY "${UNINSTALL_PATH}" -!define MUI_STARTMENUPAGE_REGISTRY_VALUENAME "${REG_START_MENU}" -!insertmacro MUI_PAGE_STARTMENU Application $SM_Folder -!endif - -!insertmacro MUI_PAGE_INSTFILES - -!insertmacro MUI_PAGE_FINISH - -!insertmacro MUI_UNPAGE_CONFIRM - -!insertmacro MUI_UNPAGE_INSTFILES - -!insertmacro MUI_UNPAGE_FINISH - -!insertmacro MUI_LANGUAGE "English" - -###################################################################### - -Section -MainProgram -${INSTALL_TYPE} -SetOverwrite ifnewer -SetOutPath "$INSTDIR" -File "q:\dist\win_output\q.exe" - -Push $INSTDIR -Call AddToPath -SectionEnd - -###################################################################### - -Section -Icons_Reg -SetOutPath "$INSTDIR" -WriteUninstaller "$INSTDIR\uninstall.exe" - -!ifdef REG_START_MENU -!insertmacro MUI_STARTMENU_WRITE_BEGIN Application -CreateDirectory "$SMPROGRAMS\$SM_Folder" -CreateShortCut "$SMPROGRAMS\$SM_Folder\${APP_NAME}.lnk" "$INSTDIR\${MAIN_APP_EXE}" -!ifdef WEB_SITE -WriteIniStr "$INSTDIR\${APP_NAME} website.url" "InternetShortcut" "URL" "${WEB_SITE}" -CreateShortCut "$SMPROGRAMS\$SM_Folder\${APP_NAME} Website.lnk" "$INSTDIR\${APP_NAME} website.url" -!endif -!insertmacro MUI_STARTMENU_WRITE_END -!endif - -!ifndef REG_START_MENU -CreateDirectory "$SMPROGRAMS\q-TextAsData" -CreateShortCut "$SMPROGRAMS\q-TextAsData\${APP_NAME}.lnk" "$INSTDIR\${MAIN_APP_EXE}" -!ifdef WEB_SITE -WriteIniStr "$INSTDIR\${APP_NAME} website.url" "InternetShortcut" "URL" "${WEB_SITE}" -CreateShortCut "$SMPROGRAMS\q-TextAsData\${APP_NAME} Website.lnk" "$INSTDIR\${APP_NAME} website.url" -!endif -!endif - -CreateShortCut "$SMPROGRAMS\q-TextAsData\Uninstall ${APP_NAME}.lnk" "$INSTDIR\uninstall.exe" - -WriteRegStr ${REG_ROOT} "${REG_APP_PATH}" "" "$INSTDIR\${MAIN_APP_EXE}" -WriteRegStr ${REG_ROOT} "${UNINSTALL_PATH}" "DisplayName" "${APP_NAME}" -WriteRegStr ${REG_ROOT} "${UNINSTALL_PATH}" "UninstallString" "$INSTDIR\uninstall.exe" -WriteRegStr ${REG_ROOT} "${UNINSTALL_PATH}" "DisplayIcon" "$INSTDIR\${MAIN_APP_EXE}" -WriteRegStr ${REG_ROOT} "${UNINSTALL_PATH}" "DisplayVersion" "${VERSION}" -WriteRegStr ${REG_ROOT} "${UNINSTALL_PATH}" "Publisher" "${COMP_NAME}" - -!ifdef WEB_SITE -WriteRegStr ${REG_ROOT} "${UNINSTALL_PATH}" "URLInfoAbout" "${WEB_SITE}" -!endif -SectionEnd - -###################################################################### - -Section Uninstall - -Push $INSTDIR -Call un.RemoveFromPath - -${INSTALL_TYPE} -Delete "$INSTDIR\select.pyd" -Delete "$INSTDIR\unicodedata.pyd" -Delete "$INSTDIR\library.zip" -Delete "$INSTDIR\bz2.pyd" -Delete "$INSTDIR\sqlite3.dll" -Delete "$INSTDIR\q.exe" -Delete "$INSTDIR\w9xpopen.exe" -Delete "$INSTDIR\python27.dll" -Delete "$INSTDIR\_sqlite3.pyd" -Delete "$INSTDIR\_hashlib.pyd" - - -Delete "$INSTDIR\uninstall.exe" -!ifdef WEB_SITE -Delete "$INSTDIR\${APP_NAME} website.url" -!endif - -RmDir "$INSTDIR" - -!ifdef REG_START_MENU -!insertmacro MUI_STARTMENU_GETFOLDER "Application" $SM_Folder -Delete "$SMPROGRAMS\$SM_Folder\${APP_NAME}.lnk" -!ifdef WEB_SITE -Delete "$SMPROGRAMS\$SM_Folder\${APP_NAME} Website.lnk" -!endif -RmDir "$SMPROGRAMS\$SM_Folder" -!endif - -!ifndef REG_START_MENU -Delete "$SMPROGRAMS\q-TextAsData\${APP_NAME}.lnk" -!ifdef WEB_SITE -Delete "$SMPROGRAMS\q-TextAsData\${APP_NAME} Website.lnk" -!endif -RmDir "$SMPROGRAMS\q-TextAsData" -!endif - -DeleteRegKey ${REG_ROOT} "${REG_APP_PATH}" -DeleteRegKey ${REG_ROOT} "${UNINSTALL_PATH}" -SectionEnd - -###################################################################### - diff --git a/dist/q-text-as-data.spec.template b/dist/q-text-as-data.spec.template deleted file mode 100644 index 2be7cc20..00000000 --- a/dist/q-text-as-data.spec.template +++ /dev/null @@ -1,66 +0,0 @@ -%global _enable_debug_package 0 -%global debug_package %{nil} -%global __os_install_post %{nil} - -Name: q-text-as-data -Version: VERSION_PLACEHOLDER -Release: 1%{?dist} -Summary: q - Text as Data - -Group: Applications/Text -License: GPLv3 -URL: https://github.com/harelba/q -BuildArch: x86_64 - -%description -q allows to perform SQL-like statements on tabular text data. - - -%prep -cd %{_topdir}/BUILD -cp -vrf %{_topdir}/SOURCES/* %{_topdir}/BUILD/ -chmod -Rf a+rX,u+w,g-w,o-w %{_topdir}/BUILD/ -mkdir -p %{_topdir}/BUILD/packages/ -cp -vfr /q/packages/* %{_topdir}/BUILD/packages/ - -%build -cd %{_topdir}/BUILD -ronn doc/USAGE.markdown - -%install -rm -vrf ${RPM_BUILD_ROOT}/ -install -d -m 0755 ${RPM_BUILD_ROOT}%{_bindir} -install -d -m 0755 ${RPM_BUILD_ROOT}%{_datadir}/q-text-as-data -install -Dm 0644 ./packages/q-x86_64-Linux ${RPM_BUILD_ROOT}%{_datadir}/q-text-as-data/q -ln -s %{_datadir}/q-text-as-data/q ${RPM_BUILD_ROOT}%{_bindir}/q -install -d -m 0755 ${RPM_BUILD_ROOT}%{_mandir}/man1/ -install -m 0644 doc/USAGE ${RPM_BUILD_ROOT}%{_mandir}/man1/q.1 -gzip ${RPM_BUILD_ROOT}%{_mandir}/man1/q.1 - -%files -%defattr(-,root,root,-) -%{_bindir}/q -%doc README.markdown doc/AUTHORS doc/IMPLEMENTATION.markdown doc/LICENSE doc/RATIONALE.markdown doc/THANKS doc/USAGE.markdown -%attr(755,root,root) %{_datadir}/q-text-as-data/q -%{_datadir}/q-text-as-data -%doc %_mandir/man1/q.1.gz - -%changelog -*Wed Apr 05 2017 Harel Ben-Attia 1.6.0-1 -- Moved RPM building to be dockerized -- Removed the need for providing commit hashes -*Fri Dec 12 2014 Harel Ben-Attia 1.5.0-1 -- Moved stuff from create-rpm script into the rpm spec itself -*Sat Jun 14 2014 Harel Ben-Attia 1.4.0-1 -- Changed RPM package name to q-text-as-data -- Fixed RPM creation logic after folder restructuring -- Man page is now taken directly from USAGE.markdown - -* Mon Mar 03 2014 Harel Ben-Attia 1.3.0-1 -- Version 1.3.0 packaging - -* Thu Feb 20 2014 Harel Ben-Attia 1.1.7-1 -- Added man page - -* Wed Feb 19 2014 Jens Neu 1.1.5-1 -- initial release diff --git a/dist/rpm-builder-Dockerfile b/dist/rpm-builder-Dockerfile deleted file mode 100644 index dafcd1e9..00000000 --- a/dist/rpm-builder-Dockerfile +++ /dev/null @@ -1,12 +0,0 @@ - -FROM centos:centos6 - -RUN yum install -y which curl gcc make rpm rpm-build - -RUN curl -sSL https://get.rvm.io | bash - -RUN /bin/bash -l -c "rvm install 2.4.1" && /bin/bash -l -c "gem install ronn" - -ENTRYPOINT "/bin/bash" - - diff --git a/dist/test-rpm-inside-container.sh b/dist/test-rpm-inside-container.sh new file mode 100755 index 00000000..4fc43f4b --- /dev/null +++ b/dist/test-rpm-inside-container.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -x +set -e + +yum install -y python38 sqlite perl gcc python3-devel sqlite-devel +pip3 install -r test-requirements.txt + +rpm -i $1 +Q_EXECUTABLE=q Q_SKIP_EXECUTABLE_VALIDATION=true ./run-tests.sh -v diff --git a/dist/test-using-deb.sh b/dist/test-using-deb.sh new file mode 100755 index 00000000..d2c8d3b4 --- /dev/null +++ b/dist/test-using-deb.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -x +set -e + +sudo dpkg -i $1 +Q_EXECUTABLE=q Q_SKIP_EXECUTABLE_VALIDATION=true ./run-tests.sh -v + diff --git a/dist/test-using-rpm.sh b/dist/test-using-rpm.sh new file mode 100755 index 00000000..84d6a9d5 --- /dev/null +++ b/dist/test-using-rpm.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -x +set -e + +RPM_LOCATION=$1 + +docker run -i -v `pwd`:/q-sources -w /q-sources centos:8 /bin/bash -e -x ./dist/test-rpm-inside-container.sh ${RPM_LOCATION} diff --git a/dist/update-mac-homebrew-instructions b/dist/update-mac-homebrew-instructions deleted file mode 100644 index 0f3cf4df..00000000 --- a/dist/update-mac-homebrew-instructions +++ /dev/null @@ -1,51 +0,0 @@ - -Instructions to bump the version of q in homebrew - -1. Sync your fork with homebrew original fork - - git checkout master - - git pull upstream master - - git push origin master - -2. Create a branch for the version bump - - git checkout -b q - -3. Edit the file Library/Formula/q.rb. - - a. Change the url to the new tar.gz file - - b. Change the sha256 checksum - - $ curl -sL "https://github.com/harelba/q/archive/.tar.gz" | shasum -a 256 - - - - $ Change the checksum in q.rb's "sha256" line to the new checksum - - c. Verify by running the following: - - $ brew fetch -s q - - The output should show the new SHA256 without any warning - -4. Check the diff - - git diff | vi - - -5. Commit the change as "q " - -6. Push it as a separate branch to your repository: - - git push origin q - -7. Go the homebrew fork in github, and press "compare and pull-request" on the just-pushed branch - -8. Review the diff and make sure that the pull-request is from / to homebrew/master (should be the default) - -9. Press "create pull-request" button - -10. If everything is fine, you'll be confirmed after several hours - - diff --git a/do-manual-release.sh b/do-manual-release.sh deleted file mode 100755 index 4dd49ef5..00000000 --- a/do-manual-release.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -x - -set -e - -VERSION=2.0.19 - -echo "TRAVIS_BRANCH is $TRAVIS_BRANCH . TRAVIS_PULL_REQUEST_BRANCH is $TRAVIS_PULL_REQUEST_BRANCH" - -if [[ "$TRAVIS_BRANCH" != "master" ]] -then - echo "Not releasing - not on master branch (${TRAVIS_BRANCH})" - exit 0 -fi - -if [[ "$TRAVIS_PULL_REQUEST_BRANCH" != "" ]] -then - echo "Not releasing - push check in PR" - exit 0 -fi - -# ensure release exists -curl -v -L -f https://api.github.com/repos/harelba/q/releases/tags/$VERSION || (echo "Release $VERSION not found in github. " && exit 1) - -# skip releasing if release already has some asset. Not using jq on purpose, to prevent the need for dependencies -ASSET_COUNT=$(curl -f -L https://api.github.com/repos/harelba/q/releases/tags/$VERSION | grep /releases/assets/ | grep url | wc -l | awk '{print $1}') - -if [[ "$ASSET_COUNT" != "0" ]] -then - echo "Assets already exists in the release. No need to release version $VERSION again." - exit 0 -fi - -echo "Gonna release version $VERSION" - -echo "Packing binary for $TRAVIS_OS_NAME" - -if [[ "$TRAVIS_OS_NAME" == "osx" || "$TRAVIS_OS_NAME" == "linux" ]] -then - echo "Packing $TRAVIS_OS_NAME installer - packing binary" - pyci pack binary - echo "Packing $TRAVIS_OS_NAME installer - uploading" - pyci github upload-asset --asset q-$(uname -m)-$(uname -s) --release $VERSION -else - echo "Packing windows installer - packing binary" - pyci pack binary - echo "Packing windows installer - listing files" - find `pwd` -ls | grep -v \.git/ - echo "Packing windows installer - packing nsis" - BINARY_LOCATION="c:\\Users\\travis\\build\\harelba\\q\\q-AMD64-Windows.exe" - pyci pack nsis --program-files-dir q-TextAsData --binary-path $BINARY_LOCATION --version ${VERSION}.0 - echo "Packing windows installer - uploading" - pyci github upload-asset --asset $BINARY_LOCATION --release $VERSION - SETUP_LOCATION="c:\\Users\\travis\\build\\harelba\\q\\q-AMD64-Windows-installer.exe" - pyci github upload-asset --asset $SETUP_LOCATION --release $VERSION -fi - -echo "done" diff --git a/package-release b/package-release deleted file mode 100755 index 1aa1515b..00000000 --- a/package-release +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -set -e - -base_folder=$(dirname $0) -pushd ${base_folder} >/dev/null - -if [ $# -ne 2 ]; -then - echo "Usage: $(dirname $0) " - echo - echo "Note that the git tag must be pushed to github before doing this." - exit 1 -fi -VERSION="$1" -BASED_ON_TAG="$2" - -d=`pwd` -cid1=`docker run -i -d -v ${d}:/q q-text-as-data-rpm-builder:0.1` -cid2=`docker run -i -d -v ${d}:/q q-text-as-data-deb-builder:0.1` - -function kill_container { - tmp=`docker kill ${cid1} ${cid2}` -} -trap kill_container EXIT - -rm -rvf ${base_folder}/packages -mkdir -p ${base_folder}/packages - -sleep 1 - -gh release download $BASED_ON_TAG -p '*' -D ./packages/ - -chmod +x ./packages/* - -docker exec -it ${cid1} /bin/bash -i -c "/q/dist/create-rpm ${VERSION} ${BASED_ON_TAG}" - -docker cp ${cid1}:/q/dist/rpm_build_area/RPMS/x86_64/q-text-as-data-${VERSION}-1.el6.x86_64.rpm ${base_folder}/packages/q-text-as-data-${VERSION}-1.x86_64.rpm - -docker exec -it ${cid2} /bin/bash -i -c "cd /q/packages && alien ./q-text-as-data-${VERSION}-1.x86_64.rpm" - -find ./packages/ -ls - diff --git a/test/prepare-benchmark-env b/prepare-benchmark-env similarity index 96% rename from test/prepare-benchmark-env rename to prepare-benchmark-env index 397a290d..81bc84a1 100755 --- a/test/prepare-benchmark-env +++ b/prepare-benchmark-env @@ -36,7 +36,7 @@ do pyenv activate $venv_name pyenv version echo installing requirements $venv_name - pip install -r ../requirements.txt + pip install -r ./requirements.txt echo deactivating $venv_name pyenv deactivate done diff --git a/pyoxidizer.bzl b/pyoxidizer.bzl new file mode 100644 index 00000000..88ce10a2 --- /dev/null +++ b/pyoxidizer.bzl @@ -0,0 +1,114 @@ +# This file defines how PyOxidizer application building and packaging is +# performed. See PyOxidizer's documentation at +# https://pyoxidizer.readthedocs.io/en/stable/ for details of this +# configuration file format. + +# Configuration files consist of functions which define build "targets." +# This function creates a Python executable and installs it in a destination +# directory. +def make_exe(): + dist = default_python_distribution(python_version="3.8") + + policy = dist.make_python_packaging_policy() + policy.set_resource_handling_mode("classify") + policy.resources_location = "in-memory" + policy.resources_location_fallback = "filesystem-relative:Lib" + policy.allow_in_memory_shared_library_loading = True + + python_config = dist.make_python_interpreter_config() + + python_config.run_module = "bin.q" + + exe = dist.to_python_executable( + name="q", + + packaging_policy=policy, + + config=python_config, + ) + + exe.pip_install(["wheel"]) + + exe.add_python_resources(exe.pip_install(["-r", "requirements.txt"])) + exe.add_python_resources(exe.pip_install(["-e", "."])) + + exe.add_python_resources(exe.read_package_root( + path="./", + packages=["bin"], + )) + + return exe + +def make_embedded_resources(exe): + return exe.to_embedded_resources() + +def make_install(exe): + # Create an object that represents our installed application file layout. + files = FileManifest() + + # Add the generated executable to our install layout in the root directory. + files.add_python_resource(".", exe) + + return files + +def make_msi(exe): + # See the full docs for more. But this will convert your Python executable + # into a `WiXMSIBuilder` Starlark type, which will be converted to a Windows + # .msi installer when it is built. + builder = exe.to_wix_msi_builder( + # Simple identifier of your app. + "q", + # The name of your application. + "q-text-as-data", + # The version of your application. + "2.1.0", + # The author/manufacturer of your application. + "Harel Ben-Attia" + ) + return builder + + +# Dynamically enable automatic code signing. +def register_code_signers(): + # You will need to run with `pyoxidizer build --var ENABLE_CODE_SIGNING 1` for + # this if block to be evaluated. + if not VARS.get("ENABLE_CODE_SIGNING"): + return + + # Use a code signing certificate in a .pfx/.p12 file, prompting the + # user for its path and password to open. + # pfx_path = prompt_input("path to code signing certificate file") + # pfx_password = prompt_password( + # "password for code signing certificate file", + # confirm = True + # ) + # signer = code_signer_from_pfx_file(pfx_path, pfx_password) + + # Use a code signing certificate in the Windows certificate store, specified + # by its SHA-1 thumbprint. (This allows you to use YubiKeys and other + # hardware tokens if they speak to the Windows certificate APIs.) + # sha1_thumbprint = prompt_input( + # "SHA-1 thumbprint of code signing certificate in Windows store" + # ) + # signer = code_signer_from_windows_store_sha1_thumbprint(sha1_thumbprint) + + # Choose a code signing certificate automatically from the Windows + # certificate store. + # signer = code_signer_from_windows_store_auto() + + # Activate your signer so it gets called automatically. + # signer.activate() + + +# Call our function to set up automatic code signers. +register_code_signers() + +# Tell PyOxidizer about the build targets defined above. +register_target("exe", make_exe) +register_target("resources", make_embedded_resources, depends=["exe"], default_build_script=True) +register_target("install", make_install, depends=["exe"], default=True) +register_target("msi_installer", make_msi, depends=["exe"]) + +# Resolve whatever targets the invoker of this configuration file is requesting +# be resolved. +resolve_targets() diff --git a/pytest.ini b/pytest.ini index 9d60edec..57ea2be4 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +1,3 @@ [pytest] -log_print = True +markers = + benchmark: Benchmark tests diff --git a/requirements-win-x86_64.txt b/requirements-win-x86_64.txt new file mode 100644 index 00000000..69d4961a --- /dev/null +++ b/requirements-win-x86_64.txt @@ -0,0 +1,4 @@ +six==1.11.0 +flake8==3.6.0 +setuptools<45.0.0 +# Turns out it would not be possible to use apsw without a major change all around, so we'd need to be able to compile sqlitebck on Windows as well diff --git a/requirements.txt b/requirements.txt index 3ad7d2bf..9661caf6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ six==1.11.0 flake8==3.6.0 setuptools<45.0.0 +sqlitebck diff --git a/run-benchmark b/run-benchmark new file mode 100755 index 00000000..f6556508 --- /dev/null +++ b/run-benchmark @@ -0,0 +1,110 @@ +#!/bin/bash + +# Usage: ./run-benchmark.sh +set -e + +get_abs_filename() { + # $1 : relative filename + echo "$(cd "$(dirname "$1")" && pwd)/$(basename "$1")" +} + +eval "$(pyenv init -)" +eval "$(pyenv virtualenv-init -)" + +if [ "x$1" == "x" ]; +then + echo Benchmark id must be provided as a parameter + exit 1 +fi +Q_BENCHMARK_ID=$1 +shift + +if [ "x$1" == "x" ]; +then + EFFECTIVE_Q_EXECUTABLE="source-files-$(git rev-parse HEAD)" +else + ABS_Q_EXECUTABLE="$(get_abs_filename $1)" + export Q_EXECUTABLE=$ABS_Q_EXECUTABLE + if [ ! -f $ABS_Q_EXECUTABLE ] + then + echo "q executable must exist ($ABS_Q_EXECUTABLE)" + exit 1 + fi + EFFECTIVE_Q_EXECUTABLE="${ABS_Q_EXECUTABLE//\//__}" + shift +fi + +echo "Q executable to use is $EFFECTIVE_Q_EXECUTABLE" + +PYTEST_OPTIONS="$@" +echo "pytest options are $PYTEST_OPTIONS" + +mkdir -p ./test/benchmark-results + +# Must be provided to the benchmark code so it knows where to write the results to +export Q_BENCHMARK_RESULTS_FOLDER="./test/benchmark-results/${EFFECTIVE_Q_EXECUTABLE}/${Q_BENCHMARK_ID}/" +echo Benchmark results folder is $Q_BENCHMARK_RESULTS_FOLDER +mkdir -p $Q_BENCHMARK_RESULTS_FOLDER + +source benchmark-config.sh +LATEST_PYTHON_VERSION=${BENCHMARK_PYTHON_VERSIONS[${#BENCHMARK_PYTHON_VERSIONS[@]}-1]} + +ALL_FILES=() + +for ver in "${BENCHMARK_PYTHON_VERSIONS[@]}" +do +venv_name=q-benchmark-$ver +echo activating $venv_name +pyenv activate $venv_name +echo "==== testing inside $venv_name ===" +if [[ -f $Q_BENCHMARK_RESULTS_FOLDER/${venv_name}.benchmark-results ]] +then + echo "Results files for version $ver already exists skipping benchmark for this version" + continue +fi + +export Q_BENCHMARK_NAME=${venv_name} +export Q_BENCHMARK_ADDITIONAL_PARAMS="-C read" + +Q_BENCHMARK_NAME=${venv_name}-with-caching Q_BENCHMARK_DATA_DIR=./_benchmark_data_with_qsql_caches pytest -m benchmark -k test_q_matrix -v -s $PYTEST_OPTIONS +Q_BENCHMARK_NAME=${venv_name} Q_BENCHMARK_DATA_DIR=./_benchmark_data pytest -m benchmark -k test_q_matrix -v -s $PYTEST_OPTIONS + +RESULT_FILE="${Q_BENCHMARK_RESULTS_FOLDER}/$venv_name.benchmark-results" +echo "==== Done. Results are in $RESULT_FILE" +ALL_FILES[${#ALL_FILES[@]}]="$RESULT_FILE" +echo "Deactivating" +pyenv deactivate +done + +exit 0 + +pyenv activate q-benchmark-${LATEST_PYTHON_VERSION} +echo "==== testing textql ===" +if [[ -f `ls $Q_BENCHMARK_RESULTS_FOLDER/textql*.benchmark-results` ]] +then + echo "Results files for textql already exist. Skipping benchmark for textql" +else + pytest -m benchmark -k test_textql_matrix -v -s $PYTEST_OPTIONS + RESULT_FILE="textql*.benchmark-results" + ALL_FILES[${#ALL_FILES[@]}]="${Q_BENCHMARK_RESULTS_FOLDER}/$RESULT_FILE" + echo "Done. Results are in textql.benchmark-results" +fi + +echo "==== testing octosql ===" +if [[ -f $Q_BENCHMARK_RESULTS_FOLDER/octosql.benchmark-results ]] +then + echo "Results files for octosql aready exist. Skipping benchmark for octosql" +else + pytest -m benchmark -k test_octosql_matrix -v -s $PYTEST_OPTIONS + RESULT_FILE="octosql*.benchmark-results" + ALL_FILES[${#ALL_FILES[@]}]="${Q_BENCHMARK_RESULTS_FOLDER}/$RESULT_FILE" + echo "Done. Results are in octosql.benchmark-results" +fi + +summary_file="$Q_BENCHMARK_RESULTS_FOLDER/summary.benchmark-results" + +rm -vf $summary_file + +paste ${ALL_FILES[*]} > $summary_file +echo "Done. final results file is $summary_file" +pyenv deactivate diff --git a/run-coverage.sh b/run-coverage.sh new file mode 100755 index 00000000..26ee0f90 --- /dev/null +++ b/run-coverage.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -e + +rm -vf ./htmlcov/* + +pytest -m "not benchmark" --cov --cov-report html "$@" + +function cleanup() { + kill %1 +} + +# TODO Fix + +# python -m http.server 8000 +# open http://localhost:8000/htmlcov/ + + diff --git a/run-tests.sh b/run-tests.sh new file mode 100755 index 00000000..f28e57e6 --- /dev/null +++ b/run-tests.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +pytest -m 'not benchmark' "$@" diff --git a/setup-pyenv.sh b/setup-pyenv.sh deleted file mode 100644 index 6b29d86d..00000000 --- a/setup-pyenv.sh +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env bash -# NOTE: This script needs to be sourced so it can modify the environment. -# -# Environment variables that can be set: -# - PYENV_VERSION -# Python to install [required] -# - PYENV_VERSION_STRING -# String to `grep -F` against the output of `python --version` to validate -# that the correct Python was installed (recommended) [default: none] -# - PYENV_ROOT -# Directory in which to install pyenv [default: ~/.travis-pyenv] -# - PYENV_RELEASE -# Release tag of pyenv to download [default: clone from master] -# - PYENV_CACHE_PATH -# Directory where full Python builds are cached (i.e., for Travis) - -# PYENV_ROOT is exported because pyenv uses it -export PYENV_ROOT="${PYENV_ROOT:-$HOME/.travis-pyenv}" -export PYTHON_CONFIGURE_OPTS="--enable-shared" -PYENV_CACHE_PATH="${PYENV_CACHE_PATH:-$HOME/.pyenv_cache}" -version_cache_path="$PYENV_CACHE_PATH/$PYENV_VERSION" -version_pyenv_path="$PYENV_ROOT/versions/$PYENV_VERSION" - -# Functions -# -# verify_python -- attempts to call the Python command or binary -# supplied in the first argument with the --version flag. If -# PYENV_VERSION_STRING is set, then it validates the returned version string -# as well (using grep -F). Returns whatever status code the command returns. -verify_python() { - local python_bin="$1"; shift - - if [[ -n "$PYENV_VERSION_STRING" ]]; then - "$python_bin" --version 2>&1 | grep -F "$PYENV_VERSION_STRING" &>/dev/null - else - "$python_bin" --version &>/dev/null - fi -} - -# use_cached_python -- Tries symlinking to the cached PYENV_VERSION and -# verifying that it's a working build. Returns 0 if it's found and it -# verifies, otherwise returns 1. -use_cached_python() { - if [[ -d "$version_cache_path" ]]; then - printf "Cached python found, %s. Verifying..." "$PYENV_VERSION" - ln -s "$version_cache_path" "$version_pyenv_path" - if verify_python "$version_pyenv_path/bin/python"; then - printf "success!\n" - return 0 - else - printf "FAILED.\nClearing cached version..." - rm -f "$version_pyenv_path" - rm -rf "$version_cache_path" - printf "done.\n" - return 1 - fi - else - echo "No cached python found." - return 1 - fi -} - -# output_debugging_info -- Outputs useful debugging information -output_debugging_info() { - echo "**** Debugging information" - printf "PYENV_VERSION\n%s\n" "$PYENV_VERSION" - printf "PYENV_VERSION_STRING\n%s\n" "$PYENV_VERSION_STRING" - printf "PYENV_CACHE_PATH\n%s\n" "$PYENV_CACHE_PATH" - set -x - python --version - "$version_cache_path/bin/python" --version - which python - pyenv which python - set +x -} - -# Main script begins. - -if [[ -z "$PYENV_VERSION" ]]; then - echo "PYENV_VERSION is not set. Not installing a pyenv." - return 0 -fi - -# Get out of the virtualenv we're in (if we're in one). -[[ -z "$VIRTUAL_ENV" ]] || deactivate - -# Install pyenv -echo "**** Installing pyenv." -if [[ -n "$PYENV_RELEASE" ]]; then - # Fetch the release archive from Github (slightly faster than cloning) - mkdir "$PYENV_ROOT" - curl -fsSL "https://github.com/yyuu/pyenv/archive/$PYENV_RELEASE.tar.gz" \ - | tar -xz -C "$PYENV_ROOT" --strip-components 1 -else - # Don't have a release to fetch, so just clone directly - git clone --depth 1 https://github.com/yyuu/pyenv.git "$PYENV_ROOT" -fi - -export PATH="$PYENV_ROOT/bin:$PATH" -eval "$(pyenv init -)" - -# Make sure the cache directory exists -mkdir -p "$PYENV_CACHE_PATH" - -# Try using an already cached PYENV_VERSION. If it fails or is not found, -# then install from scratch. -echo "**** Trying to find and use cached python $PYENV_VERSION." -if ! use_cached_python; then - echo "**** Installing python $PYENV_VERSION with pyenv now." - if pyenv install "$PYENV_VERSION"; then - if mv "$version_pyenv_path" "$PYENV_CACHE_PATH"; then - echo "Python was successfully built and moved to cache." - echo "**** Trying to find and use cached python $PYENV_VERSION." - if ! use_cached_python; then - echo "Python version $PYENV_VERSION was apparently successfully built" - echo "with pyenv, but, once cached, it could not be verified." - output_debugging_info - return 1 - fi - else - echo "**** Warning: Python was succesfully built, but moving to cache" - echo "failed. Proceeding anyway without caching." - fi - else - echo "Python version $PYENV_VERSION build FAILED." - return 1 - fi -fi - -# Now we have to reinitialize pyenv, as we need the shims etc to be created so -# the pyenv activates correctly. -echo "**** Activating python $PYENV_VERSION and generating new virtualenv." -eval "$(pyenv init -)" -pyenv global "$PYENV_VERSION" diff --git a/setup.py b/setup.py index f949d1a2..ead9127a 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,12 @@ #!/usr/bin/env python from setuptools import setup +import setuptools -q_version = '2.0.19' +q_version = '3.1.0-beta' + +with open("README.markdown", "r", encoding="utf-8") as fh: + long_description = fh.read() setup( name='q', @@ -11,13 +15,15 @@ version=q_version, author='Harel Ben-Attia', description="Run SQL directly on CSV or TSV files", + long_description=long_description, + long_description_content_type="text/markdown", author_email='harelba@gmail.com', install_requires=[ - 'six==1.11.0' - ], - packages=[ - 'bin' + 'six==1.11.0', + 'sqlitebck' ], + package_dir={"": "bin"}, + packages=setuptools.find_packages(where="bin"), entry_points={ 'console_scripts': [ 'q = bin.q:run_standalone' diff --git a/test-requirements.txt b/test-requirements.txt index a89474ca..bb72fa51 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,2 +1,4 @@ -pytest==4.6.2 -flake8==3.6.0 \ No newline at end of file +pytest==6.2.2 +flake8==3.6.0 +six==1.11.0 +sqlitebck diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/benchmark-config.sh b/test/benchmark-config.sh deleted file mode 100644 index 52cf71e9..00000000 --- a/test/benchmark-config.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -BENCHMARK_PYTHON_VERSIONS=(2.7.18 3.6.4 3.7.9 3.8.5) diff --git a/test/run-benchmark b/test/run-benchmark deleted file mode 100755 index a1c6ff21..00000000 --- a/test/run-benchmark +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash - -# Usage: ./run-benchmark.sh -set -e - -get_abs_filename() { - # $1 : relative filename - echo "$(cd "$(dirname "$1")" && pwd)/$(basename "$1")" -} - -eval "$(pyenv init -)" -eval "$(pyenv virtualenv-init -)" - -if [ "x$1" == "x" ]; -then - echo Benchmark id must be provided as a parameter - exit 1 -fi -Q_BENCHMARK_ID=$1 - -if [ "x$2" == "x" ]; -then - EFFECTIVE_Q_EXECUTABLE="source-files-$(git rev-parse HEAD)" -else - ABS_Q_EXECUTABLE="$(get_abs_filename $2)" - export Q_EXECUTABLE=$ABS_Q_EXECUTABLE - if [ ! -f $ABS_Q_EXECUTABLE ] - then - echo "q executable must exist ($ABS_Q_EXECUTABLE)" - exit 1 - fi - EFFECTIVE_Q_EXECUTABLE="${ABS_Q_EXECUTABLE//\//__}" -fi - -echo "Q executable to use is $EFFECTIVE_Q_EXECUTABLE" - -# Must be provided to the benchmark code so it knows where to write the results to -export Q_BENCHMARK_RESULTS_FOLDER="./benchmark-results/${EFFECTIVE_Q_EXECUTABLE}/${Q_BENCHMARK_ID}/" -echo Benchmark results folder is $Q_BENCHMARK_RESULTS_FOLDER -mkdir -p $Q_BENCHMARK_RESULTS_FOLDER - -source benchmark-config.sh - -ALL_FILES=() - -for ver in "${BENCHMARK_PYTHON_VERSIONS[@]}" -do -venv_name=q-benchmark-$ver -echo activating $venv_name -pyenv activate $venv_name -echo "==== testing inside $venv_name ===" -./test-all BenchmarkTests.test_q_matrix -v -RESULT_FILE="${Q_BENCHMARK_RESULTS_FOLDER}/$venv_name.benchmark-results" -echo "==== Done. Results are in $RESULT_FILE" -ALL_FILES[${#ALL_FILES[@]}]="$RESULT_FILE" -echo "Deactivating" -pyenv deactivate -done - -echo "==== testing textql ===" -./test-all BenchmarkTests.test_textql_matrix -v -RESULT_FILE="textql*.benchmark-results" -ALL_FILES[${#ALL_FILES[@]}]="${Q_BENCHMARK_RESULTS_FOLDER}/$RESULT_FILE" -echo "Done. Results are in textql.benchmark-results" - -echo "==== testing octosql ===" -./test-all BenchmarkTests.test_octosql_matrix -v -RESULT_FILE="octosql*.benchmark-results" -ALL_FILES[${#ALL_FILES[@]}]="${Q_BENCHMARK_RESULTS_FOLDER}/$RESULT_FILE" -echo "Done. Results are in octosql.benchmark-results" - -summary_file="$Q_BENCHMARK_RESULTS_FOLDER/summary.benchmark-results" - -rm -vf $summary_file - -paste ${ALL_FILES[*]} > $summary_file -echo "Done. final results file is $summary_file" diff --git a/test/test-all b/test/test-all deleted file mode 100755 index d3dcb022..00000000 --- a/test/test-all +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -set -e - -function return_to_original_folder() { - popd -} -trap return_to_original_folder EXIT - -pushd $(dirname $0)/ - -./test-suite "$@" - -set +e diff --git a/test/test-all.bat b/test/test-all.bat deleted file mode 100644 index 3192c9e5..00000000 --- a/test/test-all.bat +++ /dev/null @@ -1,4 +0,0 @@ -@echo off - -echo TBD - diff --git a/test/test-suite b/test/test-suite deleted file mode 100755 index 5628e6cf..00000000 --- a/test/test-suite +++ /dev/null @@ -1,2819 +0,0 @@ -#!/usr/bin/env python - -# -# test suite for q. -# -# Prefer end-to-end tests, running the actual q command and testing stdout/stderr, and the return code. -# Some utilities are provided for making that easy, see other tests for examples. -# -# Don't forget to use the Q_EXECUTABLE instead of hardcoding the q command line. This will be used in the near future -# in order to test the resulting binary executables as well, instead of just executing the q python source code. -# - -from __future__ import print_function -import unittest -import random -import json -from json import JSONEncoder -from subprocess import PIPE, Popen, STDOUT -import sys -import os -import time -from tempfile import NamedTemporaryFile -import locale -import pprint -import six -from six.moves import range -import codecs -import itertools - -sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin')) -from q import QTextAsData,QOutput,QOutputPrinter,QInputParams - -# q uses this encoding as the default output encoding. Some of the tests use it in order to -# make sure that the output is correctly encoded -SYSTEM_ENCODING = locale.getpreferredencoding() - -EXAMPLES = os.path.abspath(os.path.join(os.pardir, 'examples')) - -Q_EXECUTABLE = os.getenv('Q_EXECUTABLE', '../bin/q.py') - -if not os.path.exists(Q_EXECUTABLE): - raise Exception("q executable must reside in {}".format(Q_EXECUTABLE)) - -DEBUG = False -if len(sys.argv) > 2 and sys.argv[2] == '-v': - DEBUG = True - - -def run_command(cmd_to_run): - global DEBUG - if DEBUG: - print("CMD: {}".format(cmd_to_run)) - - p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True) - o, e = p.communicate() - # remove last newline - o = o.rstrip() - e = e.strip() - # split rows - if o != six.b(''): - o = o.split(six.b(os.linesep)) - else: - o = [] - if e != six.b(''): - e = e.split(six.b(os.linesep)) - else: - e = [] - - res = (p.returncode, o, e) - if DEBUG: - print("RESULT:{}".format(res)) - return res - - -uneven_ls_output = six.b("""drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux -drwxr-xr-x 2 root root 4096 Apr 19 2013 /mnt -drwxr-xr-x 2 root root 4096 Apr 24 2013 /srv -drwx------ 2 root root 16384 Jun 21 2013 /lost+found -lrwxrwxrwx 1 root root 33 Jun 21 2013 /initrd.img.old -> /boot/initrd.img-3.8.0-19-generic -drwxr-xr-x 2 root root 4096 Jun 21 2013 /cdrom -drwxr-xr-x 3 root root 4096 Jun 21 2013 /home -lrwxrwxrwx 1 root root 29 Jun 21 2013 /vmlinuz -> boot/vmlinuz-3.8.0-19-generic -lrwxrwxrwx 1 root root 32 Jun 21 2013 /initrd.img -> boot/initrd.img-3.8.0-19-generic -""") - - -find_output = six.b("""8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 11:00 /tmp -8299123 4 drwxrwxr-x 2 harel harel 4096 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576 -8263229 964 -rw-rw-r-- 1 mapred mapred 984569 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormcode.ser -8263230 4 -rw-rw-r-- 1 harel harel 1223 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormconf.ser -8299113 4 drwxrwxr-x 2 harel harel 4096 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate -8263406 4 -rw-rw-r-- 1 harel harel 2002 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514168746 -8263476 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514168746.version -8263607 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514169735.version -8263533 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514172733.version -8263604 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514175754.version -""") - - -header_row = six.b('name,value1,value2') -sample_data_rows = [six.b('a,1,0'), six.b('b,2,0'), six.b('c,,0')] -sample_data_rows_with_empty_string = [six.b('a,aaa,0'), six.b('b,bbb,0'), six.b('c,,0')] -sample_data_no_header = six.b("\n").join(sample_data_rows) + six.b("\n") -sample_data_with_empty_string_no_header = six.b("\n").join( - sample_data_rows_with_empty_string) + six.b("\n") -sample_data_with_header = header_row + six.b("\n") + sample_data_no_header -sample_data_with_missing_header_names = six.b("name,value1\n") + sample_data_no_header - -def generate_sample_data_with_header(header): - return header + six.b("\n") + sample_data_no_header - -sample_quoted_data = six.b('''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted -control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6" -non-quoted-value "this is a quoted value" "this is a ""double double"" quoted value" "this is an escaped \\"quoted value\\"" "this is a double double quoted ""multiline - value""." "this is an escaped \\"multiline - value\\"." -control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6" -''') - -double_double_quoted_data = six.b('''regular_double_quoted double_double_quoted -"this is a quoted value" "this is a quoted value with ""double double quotes""" -''') - -escaped_double_quoted_data = six.b('''regular_double_quoted escaped_double_quoted -"this is a quoted value" "this is a quoted value with \\"escaped double quotes\\"" -''') - -combined_quoted_data = six.b('''regular_double_quoted double_double_quoted escaped_double_quoted -"this is a quoted value" "this is a quoted value with ""double double quotes""" "this is a quoted value with \\"escaped double quotes\\"" -''') - -sample_quoted_data2 = six.b('"quoted data" 23\nunquoted-data 54') - -sample_quoted_data2_with_newline = six.b('"quoted data with\na new line inside it":23\nunquoted-data:54') - -one_column_data = six.b('''data without commas 1 -data without commas 2 -''') - -# Values with leading whitespace -sample_data_rows_with_spaces = [six.b('a,1,0'), six.b(' b, 2,0'), six.b('c,,0')] -sample_data_with_spaces_no_header = six.b("\n").join( - sample_data_rows_with_spaces) + six.b("\n") - -header_row_with_spaces = six.b('name,value 1,value2') -sample_data_with_spaces_with_header = header_row_with_spaces + \ - six.b("\n") + sample_data_with_spaces_no_header - -long_value1 = "23683289372328372328373" -int_value = "2328372328373" -sample_data_with_long_values = "%s\n%s\n%s" % (long_value1,int_value,int_value) - - -def one_column_warning(e): - return e[0].startswith(six.b('Warning: column count is one')) - - -class AbstractQTestCase(unittest.TestCase): - - def create_file_with_data(self, data, encoding=None): - if encoding is not None: - raise Exception('Deprecated: Encoding must be none') - tmpfile = NamedTemporaryFile(delete=False) - tmpfile.write(data) - tmpfile.close() - return tmpfile - - def cleanup(self, tmpfile): - global DEBUG - if not DEBUG: - os.remove(tmpfile.name) - - def random_tmp_filename(self,prefix,postfix): - # TODO Use more robust method for this - path = '/var/tmp' - return '%s/%s-%s.%s' % (path,prefix,random.randint(0,1000000000),postfix) - - -class SaveDbToDiskTests(AbstractQTestCase): - - def test_store_to_disk(self): - db_filename = self.random_tmp_filename('store-to-disk','db') - self.assertFalse(os.path.exists(db_filename)) - - retcode, o, e = run_command('seq 1 1000 | ' + Q_EXECUTABLE + ' "select count(*) from -" -c 1 -S %s' % db_filename) - - self.assertTrue(retcode == 0) - self.assertTrue(len(o) == 0) - self.assertTrue(len(e) == 5) - self.assertTrue(e[0].startswith(six.b('Going to save data'))) - self.assertTrue(db_filename.encode(sys.stdout.encoding or 'utf-8') in e[0]) - self.assertTrue(e[1].startswith(six.b('Data has been loaded in'))) - self.assertTrue(e[2].startswith(six.b('Saving data to db file'))) - self.assertTrue(e[3].startswith(six.b('Data has been saved into'))) - self.assertTrue(e[4] == six.b('Query to run on the database: select count(*) from `-`;')) - - self.assertTrue(os.path.exists(db_filename)) - - sqlite_command = """echo 'select * from `-`;' | sqlite3 %s""" % db_filename - sqlite_retcode,sqlite_o,sqlite_e = run_command(sqlite_command) - self.assertTrue(sqlite_retcode == 0) - self.assertTrue(len(sqlite_o) == 1000) - self.assertTrue(len(sqlite_e) == 0) - - os.remove(db_filename) - - def test_preventing_db_overwrite(self): - db_filename = self.random_tmp_filename('store-to-disk', 'db') - self.assertFalse(os.path.exists(db_filename)) - - retcode, o, e = run_command('seq 1 1000 | ' + Q_EXECUTABLE + ' "select count(*) from -" -c 1 -S %s' % db_filename) - - self.assertTrue(retcode == 0) - self.assertTrue(os.path.exists(db_filename)) - - retcode2, o2, e2 = run_command('seq 1 1000 | ' + Q_EXECUTABLE + ' "select count(*) from -" -c 1 -S %s' % db_filename) - self.assertTrue(retcode2 != 0) - self.assertTrue(e2[0].startswith(six.b('Going to save data into a disk database'))) - self.assertTrue(e2[1] == six.b('Disk database file {} already exists.'.format(db_filename))) - - os.remove(db_filename) - - -class BasicTests(AbstractQTestCase): - - def test_basic_aggregation(self): - retcode, o, e = run_command( - 'seq 1 10 | ' + Q_EXECUTABLE + ' "select sum(c1),avg(c1) from -"') - self.assertTrue(retcode == 0) - self.assertTrue(len(o) == 1) - self.assertTrue(len(e) == 1) - - s = sum(range(1, 11)) - self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0))) - self.assertTrue(one_column_warning(e)) - - def test_gzipped_file(self): - tmpfile = self.create_file_with_data( - six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00')) - - cmd = Q_EXECUTABLE + ' -z "select sum(c1),avg(c1) from %s"' % tmpfile.name - - retcode, o, e = run_command(cmd) - self.assertTrue(retcode == 0) - self.assertTrue(len(o) == 1) - self.assertTrue(len(e) == 1) - - s = sum(range(1, 11)) - self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0))) - self.assertTrue(one_column_warning(e)) - - self.cleanup(tmpfile) - - def test_attempt_to_unzip_stdin(self): - tmpfile = self.create_file_with_data( - six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00')) - - cmd = 'cat %s | ' % tmpfile.name + Q_EXECUTABLE + ' -z "select sum(c1),avg(c1) from -"' - - retcode, o, e = run_command(cmd) - self.assertTrue(retcode != 0) - self.assertTrue(len(o) == 0) - self.assertTrue(len(e) == 1) - - self.assertEqual(e[0],six.b('Cannot decompress standard input. Pipe the input through zcat in order to decompress.')) - - self.cleanup(tmpfile) - - def test_delimition_mistake_with_header(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - - cmd = Q_EXECUTABLE + ' -d " " "select * from %s" -H' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode, 0) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 3) - - self.assertTrue(e[0].startswith( - six.b("Warning: column count is one - did you provide the correct delimiter"))) - self.assertTrue(e[1].startswith(six.b("Bad header row"))) - self.assertTrue(six.b("Column name cannot contain commas") in e[2]) - - self.cleanup(tmpfile) - - def test_select_one_column(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - - cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) - - self.assertEqual(six.b(" ").join(o), six.b('a b c')) - - self.cleanup(tmpfile) - - def test_tab_delimition_parameter(self): - tmpfile = self.create_file_with_data( - sample_data_no_header.replace(six.b(","), six.b("\t"))) - cmd = Q_EXECUTABLE + ' -t "select c1,c2,c3 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) - self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) - self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) - self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) - - self.cleanup(tmpfile) - - def test_pipe_delimition_parameter(self): - tmpfile = self.create_file_with_data( - sample_data_no_header.replace(six.b(","), six.b("|"))) - cmd = Q_EXECUTABLE + ' -p "select c1,c2,c3 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) - self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) - self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) - self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) - - self.cleanup(tmpfile) - - def test_tab_delimition_parameter__with_manual_override_attempt(self): - tmpfile = self.create_file_with_data( - sample_data_no_header.replace(six.b(","), six.b("\t"))) - cmd = Q_EXECUTABLE + ' -t -d , "select c1,c2,c3 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 1) - self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) - self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) - self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) - self.assertEqual(e[0],six.b('Warning: -t parameter overrides -d parameter (,)')) - - self.cleanup(tmpfile) - - def test_pipe_delimition_parameter__with_manual_override_attempt(self): - tmpfile = self.create_file_with_data( - sample_data_no_header.replace(six.b(","), six.b("|"))) - cmd = Q_EXECUTABLE + ' -p -d , "select c1,c2,c3 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 1) - self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) - self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) - self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) - self.assertEqual(e[0],six.b('Warning: -p parameter overrides -d parameter (,)')) - - self.cleanup(tmpfile) - - def test_output_delimiter(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , -D "|" "select c1,c2,c3 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) - self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) - self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) - - self.cleanup(tmpfile) - - def test_output_delimiter_tab_parameter(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , -T "select c1,c2,c3 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) - self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) - self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) - - self.cleanup(tmpfile) - - def test_output_delimiter_pipe_parameter(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , -P "select c1,c2,c3 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) - self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) - self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) - - self.cleanup(tmpfile) - - def test_output_delimiter_tab_parameter__with_manual_override_attempt(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , -T -D "|" "select c1,c2,c3 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 1) - - self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) - self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) - self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) - self.assertEqual(e[0], six.b('Warning: -T parameter overrides -D parameter (|)')) - - self.cleanup(tmpfile) - - def test_output_delimiter_pipe_parameter__with_manual_override_attempt(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , -P -D ":" "select c1,c2,c3 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 1) - - self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) - self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) - self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) - self.assertEqual(e[0],six.b('Warning: -P parameter overrides -D parameter (:)')) - - self.cleanup(tmpfile) - - def test_stdin_input(self): - cmd = six.b('printf "%s" | ' + Q_EXECUTABLE + ' -d , "select c1,c2,c3 from -"') % sample_data_no_header - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], sample_data_rows[0]) - self.assertEqual(o[1], sample_data_rows[1]) - self.assertEqual(o[2], sample_data_rows[2]) - - def test_column_separation(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , "select c1,c2,c3 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], sample_data_rows[0]) - self.assertEqual(o[1], sample_data_rows[1]) - self.assertEqual(o[2], sample_data_rows[2]) - - self.cleanup(tmpfile) - - def test_column_analysis(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - - cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEqual(o[1].strip(), six.b('`c1` - text')) - self.assertEqual(o[2].strip(), six.b('`c2` - int')) - self.assertEqual(o[3].strip(), six.b('`c3` - int')) - - self.cleanup(tmpfile) - - def test_column_analysis_no_header(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - - cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEqual(o[1].strip(), six.b('`c1` - text')) - self.assertEqual(o[2].strip(), six.b('`c2` - int')) - self.assertEqual(o[3].strip(), six.b('`c3` - int')) - - def test_header_exception_on_numeric_header_data(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , "select * from %s" -A -H' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode, 0) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 3) - self.assertTrue( - six.b('Bad header row: Header must contain only strings') in e[0]) - self.assertTrue(six.b("Column name must be a string") in e[1]) - self.assertTrue(six.b("Column name must be a string") in e[2]) - - self.cleanup(tmpfile) - - def test_column_analysis_with_header(self): - tmpfile = self.create_file_with_data(sample_data_with_header) - cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A -H' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode, 0) - self.assertEqual(len(o),4) - self.assertEqual(len(e),2) - self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEqual(o[1].strip(), six.b('`name` - text')) - self.assertEqual(o[2].strip(), six.b('`value1` - int')) - self.assertEqual(o[3].strip(), six.b('`value2` - int')) - self.assertEqual(e[0].strip(),six.b('query error: no such column: c1')) - self.assertTrue(e[1].startswith(six.b('Warning - There seems to be a '))) - - self.cleanup(tmpfile) - - def test_data_with_header(self): - tmpfile = self.create_file_with_data(sample_data_with_header) - cmd = Q_EXECUTABLE + ' -d , "select name from %s" -H' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(six.b(" ").join(o), six.b("a b c")) - - self.cleanup(tmpfile) - - def test_output_header_when_input_header_exists(self): - tmpfile = self.create_file_with_data(sample_data_with_header) - cmd = Q_EXECUTABLE + ' -d , "select name from %s" -H -O' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 4) - self.assertEqual(o[0],six.b('name')) - self.assertEqual(o[1],six.b('a')) - self.assertEqual(o[2],six.b('b')) - self.assertEqual(o[3],six.b('c')) - - self.cleanup(tmpfile) - - def test_generated_column_name_warning_when_header_line_exists(self): - tmpfile = self.create_file_with_data(sample_data_with_header) - cmd = Q_EXECUTABLE + ' -d , "select c3 from %s" -H' % tmpfile.name - - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode, 0) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 2) - self.assertTrue(six.b('no such column: c3') in e[0]) - self.assertTrue( - e[1].startswith(six.b('Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names'))) - - self.cleanup(tmpfile) - - def test_column_analysis_with_unexpected_header(self): - tmpfile = self.create_file_with_data(sample_data_with_header) - cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 4) - self.assertEqual(len(e), 1) - - self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEqual(o[1].strip(), six.b('`c1` - text')) - self.assertEqual(o[2].strip(), six.b('`c2` - text')) - self.assertEqual(o[3].strip(), six.b('`c3` - text')) - - self.assertEqual( - e[0], six.b('Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data')) - - self.cleanup(tmpfile) - - def test_empty_data(self): - tmpfile = self.create_file_with_data(six.b('')) - cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - - self.assertTrue(six.b('Warning - data is empty') in e[0]) - - self.cleanup(tmpfile) - - def test_empty_data_with_header_param(self): - tmpfile = self.create_file_with_data(six.b('')) - cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -H' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode, 0) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - - m = six.b("Header line is expected but missing in file %s" % tmpfile.name) - self.assertTrue(m in e[0]) - - self.cleanup(tmpfile) - - def test_one_row_of_data_without_header_param(self): - tmpfile = self.create_file_with_data(header_row) - cmd = Q_EXECUTABLE + ' -d , "select c2 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 1) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], six.b('value1')) - - self.cleanup(tmpfile) - - def test_one_row_of_data_with_header_param(self): - tmpfile = self.create_file_with_data(header_row) - cmd = Q_EXECUTABLE + ' -d , "select c2 from %s" -H' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - - self.assertTrue(six.b('Warning - data is empty') in e[0]) - - self.cleanup(tmpfile) - - def test_dont_leading_keep_whitespace_in_values(self): - tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) - cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 3) - - self.assertEqual(o[0], six.b('a')) - self.assertEqual(o[1], six.b('b')) - self.assertEqual(o[2], six.b('c')) - - self.cleanup(tmpfile) - - def test_keep_leading_whitespace_in_values(self): - tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) - cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -k' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 3) - - self.assertEqual(o[0], six.b('a')) - self.assertEqual(o[1], six.b(' b')) - self.assertEqual(o[2], six.b('c')) - - self.cleanup(tmpfile) - - def test_no_impact_of_keeping_leading_whitespace_on_integers(self): - tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) - cmd = Q_EXECUTABLE + ' -d , "select c2 from %s" -k -A' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 4) - - self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEqual(o[1].strip(), six.b('`c1` - text')) - self.assertEqual(o[2].strip(), six.b('`c2` - int')) - self.assertEqual(o[3].strip(), six.b('`c3` - int')) - - self.cleanup(tmpfile) - - def test_spaces_in_header_row(self): - tmpfile = self.create_file_with_data( - header_row_with_spaces + six.b("\n") + sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , "select name,\`value 1\` from %s" -H' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 3) - - self.assertEqual(o[0], six.b('a,1')) - self.assertEqual(o[1], six.b('b,2')) - self.assertEqual(o[2], six.b('c,')) - - self.cleanup(tmpfile) - - def test_column_analysis_for_spaces_in_header_row(self): - tmpfile = self.create_file_with_data( - header_row_with_spaces + six.b("\n") + sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , "select name,\`value 1\` from %s" -H -A' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 4) - - self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEqual(o[1].strip(), six.b('`name` - text')) - self.assertEqual(o[2].strip(), six.b('`value 1` - int')) - self.assertEqual(o[3].strip(), six.b('`value2` - int')) - - self.cleanup(tmpfile) - - def test_no_query_in_command_line(self): - cmd = Q_EXECUTABLE + ' -d , ""' - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 1) - self.assertEqual(len(e), 1) - self.assertEqual(len(o), 0) - - self.assertEqual(e[0],six.b('Query cannot be empty (query number 1)')) - - def test_empty_query_in_command_line(self): - cmd = Q_EXECUTABLE + ' -d , " "' - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 1) - self.assertEqual(len(e), 1) - self.assertEqual(len(o), 0) - - self.assertEqual(e[0],six.b('Query cannot be empty (query number 1)')) - - def test_failure_in_query_stops_processing_queries(self): - cmd = Q_EXECUTABLE + ' -d , "select 500" "select 300" "wrong-query" "select 8000"' - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 1) - self.assertEqual(len(e), 1) - self.assertEqual(len(o), 2) - self.assertEqual(o[0],six.b('500')) - self.assertEqual(o[1],six.b('300')) - - def test_multiple_queries_in_command_line(self): - cmd = Q_EXECUTABLE + ' -d , "select 500" "select 300+100" "select 300" "select 200"' - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 4) - - self.assertEqual(o[0],six.b('500')) - self.assertEqual(o[1],six.b('400')) - self.assertEqual(o[2],six.b('300')) - self.assertEqual(o[3],six.b('200')) - - def test_literal_calculation_query(self): - cmd = Q_EXECUTABLE + ' -d , "select 1+40/6"' - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 1) - - self.assertEqual(o[0],six.b('7')) - - def test_literal_calculation_query_float_result(self): - cmd = Q_EXECUTABLE + ' -d , "select 1+40/6.0"' - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 1) - - self.assertEqual(o[0],six.b('7.666666666666667')) - - def test_use_query_file(self): - tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data(six.b("select name from %s" % tmp_data_file.name)) - - cmd = Q_EXECUTABLE + ' -d , -q %s -H' % tmp_query_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 3) - - self.assertEqual(o[0], six.b('a')) - self.assertEqual(o[1], six.b('b')) - self.assertEqual(o[2], six.b('c')) - - self.cleanup(tmp_data_file) - self.cleanup(tmp_query_file) - - def test_use_query_file_with_incorrect_query_encoding(self): - tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) - - cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q ascii' % tmp_query_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,3) - self.assertEqual(len(o),0) - self.assertEqual(len(e),1) - - self.assertTrue(e[0].startswith(six.b('Could not decode query number 1 using the provided query encoding (ascii)'))) - - self.cleanup(tmp_data_file) - self.cleanup(tmp_query_file) - - def test_output_header_with_non_ascii_names(self): - tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' Hr\xc3\xa1\xc4\x8d from %s" % tmp_data_file.name),encoding=None) - - cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -O' % tmp_query_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(o),4) - self.assertEqual(len(e),0) - - self.assertEqual(o[0].decode(SYSTEM_ENCODING), u'name,Hr\xe1\u010d') - self.assertEqual(o[1].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d') - self.assertEqual(o[2].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d') - self.assertEqual(o[3].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d') - - self.cleanup(tmp_data_file) - self.cleanup(tmp_query_file) - - def test_use_query_file_with_query_encoding(self): - tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) - - cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8' % tmp_query_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 3) - - self.assertEqual(o[0].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d') - self.assertEqual(o[1].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d') - self.assertEqual(o[2].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d') - - self.cleanup(tmp_data_file) - self.cleanup(tmp_query_file) - - def test_use_query_file_and_command_line(self): - tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data(six.b("select name from %s" % tmp_data_file.name)) - - cmd = Q_EXECUTABLE + ' -d , -q %s -H "select * from ppp"' % tmp_query_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 1) - self.assertEqual(len(e), 1) - self.assertEqual(len(o), 0) - - self.assertTrue(e[0].startswith(six.b("Can't provide both a query file and a query on the command line"))) - - self.cleanup(tmp_data_file) - self.cleanup(tmp_query_file) - - def test_select_output_encoding(self): - tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data(six.b("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) - - for target_encoding in ['utf-8','ibm852']: - cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 3) - - self.assertEqual(o[0].decode(target_encoding), u'Hr\xe1\u010d') - self.assertEqual(o[1].decode(target_encoding), u'Hr\xe1\u010d') - self.assertEqual(o[2].decode(target_encoding), u'Hr\xe1\u010d') - - self.cleanup(tmp_data_file) - self.cleanup(tmp_query_file) - - def test_select_failed_output_encoding(self): - tmp_data_file = self.create_file_with_data(sample_data_with_header) - tmp_query_file = self.create_file_with_data(six.b("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) - - cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 3) - self.assertEqual(len(e), 1) - self.assertEqual(len(o), 0) - - self.assertTrue(e[0].startswith(six.b('Cannot encode data'))) - - self.cleanup(tmp_data_file) - self.cleanup(tmp_query_file) - - - def test_use_query_file_with_empty_query(self): - tmp_query_file = self.create_file_with_data(six.b(" ")) - - cmd = Q_EXECUTABLE + ' -d , -q %s -H' % tmp_query_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 1) - self.assertEqual(len(e), 1) - self.assertEqual(len(o), 0) - - self.assertTrue(e[0].startswith(six.b("Query cannot be empty"))) - - self.cleanup(tmp_query_file) - - def test_use_non_existent_query_file(self): - cmd = Q_EXECUTABLE + ' -d , -q non-existent-query-file -H' - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 1) - self.assertEqual(len(e), 1) - self.assertEqual(len(o), 0) - - self.assertTrue(e[0].startswith(six.b("Could not read query from file"))) - - def test_non_quoted_values_in_quoted_data(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data) - - cmd = Q_EXECUTABLE + ' -d " " "select c1 from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),4) - - self.assertTrue(o[0],'non_quoted') - self.assertTrue(o[1],'control-value-1') - self.assertTrue(o[2],'non-quoted-value') - self.assertTrue(o[3],'control-value-1') - - self.cleanup(tmp_data_file) - - def test_regular_quoted_values_in_quoted_data(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data) - - cmd = Q_EXECUTABLE + ' -d " " "select c2 from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),4) - - self.assertTrue(o[0],'regular_double_quoted') - self.assertTrue(o[1],'control-value-2') - self.assertTrue(o[2],'this is a quoted value') - self.assertTrue(o[3],'control-value-2') - - self.cleanup(tmp_data_file) - - def test_double_double_quoted_values_in_quoted_data(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data) - - cmd = Q_EXECUTABLE + ' -d " " "select c3 from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),4) - - self.assertTrue(o[0],'double_double_quoted') - self.assertTrue(o[1],'control-value-3') - self.assertTrue(o[2],'this is a "double double" quoted value') - self.assertTrue(o[3],'control-value-3') - - self.cleanup(tmp_data_file) - - def test_escaped_double_quoted_values_in_quoted_data(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data) - - cmd = Q_EXECUTABLE + ' -d " " "select c4 from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),4) - - self.assertTrue(o[0],'escaped_double_quoted') - self.assertTrue(o[1],'control-value-4') - self.assertTrue(o[2],'this is an escaped "quoted value"') - self.assertTrue(o[3],'control-value-4') - - self.cleanup(tmp_data_file) - - def test_none_input_quoting_mode_in_relaxed_mode(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2) - - cmd = Q_EXECUTABLE + ' -d " " -m relaxed -D , -w none -W none "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('"quoted,data",23')) - self.assertEqual(o[1],six.b('unquoted-data,54,')) - - self.cleanup(tmp_data_file) - - def test_none_input_quoting_mode_in_strict_mode(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2) - - cmd = Q_EXECUTABLE + ' -d " " -m strict -D , -w none "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode,0) - self.assertEqual(len(e),1) - self.assertEqual(len(o),0) - - self.assertTrue(e[0].startswith(six.b('Strict mode. Column Count is expected to identical'))) - - self.cleanup(tmp_data_file) - - def test_minimal_input_quoting_mode(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2) - - cmd = Q_EXECUTABLE + ' -d " " -D , -w minimal "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('quoted data,23')) - self.assertEqual(o[1],six.b('unquoted-data,54')) - - self.cleanup(tmp_data_file) - - def test_all_input_quoting_mode(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2) - - cmd = Q_EXECUTABLE + ' -d " " -D , -w all "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('quoted data,23')) - self.assertEqual(o[1],six.b('unquoted-data,54')) - - self.cleanup(tmp_data_file) - - def test_incorrect_input_quoting_mode(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2) - - cmd = Q_EXECUTABLE + ' -d " " -D , -w unknown_wrapping_mode "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode,0) - self.assertEqual(len(e),1) - self.assertEqual(len(o),0) - - self.assertTrue(e[0].startswith(six.b('Input quoting mode can only be one of all,minimal,none'))) - self.assertTrue(six.b('unknown_wrapping_mode') in e[0]) - - self.cleanup(tmp_data_file) - - def test_none_output_quoting_mode(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2) - - cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W none "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('quoted data,23')) - self.assertEqual(o[1],six.b('unquoted-data,54')) - - self.cleanup(tmp_data_file) - - def test_minimal_output_quoting_mode__without_need_to_quote_in_output(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2) - - cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W minimal "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('quoted data,23')) - self.assertEqual(o[1],six.b('unquoted-data,54')) - - self.cleanup(tmp_data_file) - - def test_minimal_output_quoting_mode__with_need_to_quote_in_output_due_to_delimiter(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2) - - # output delimiter is set to space, so the output will contain it - cmd = Q_EXECUTABLE + ' -d " " -D " " -w all -W minimal "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('"quoted data" 23')) - self.assertEqual(o[1],six.b('unquoted-data 54')) - - self.cleanup(tmp_data_file) - - def test_minimal_output_quoting_mode__with_need_to_quote_in_output_due_to_newline(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2_with_newline) - - # Delimiter is set to colon (:), so it will not be inside the data values (this will make sure that the newline is the one causing the quoting) - cmd = Q_EXECUTABLE + " -d ':' -w all -W minimal \"select c1,c2,replace(c1,'with' || x'0a' || 'a new line inside it','NEWLINE-REMOVED') from %s\"" % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),3) - - self.assertEqual(o[0],six.b('"quoted data with')) - # Notice that the third column here is not quoted, because we replaced the newline with something else - self.assertEqual(o[1],six.b('a new line inside it":23:quoted data NEWLINE-REMOVED')) - self.assertEqual(o[2],six.b('unquoted-data:54:unquoted-data')) - - self.cleanup(tmp_data_file) - - def test_nonnumeric_output_quoting_mode(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2) - - cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W nonnumeric "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('"quoted data",23')) - self.assertEqual(o[1],six.b('"unquoted-data",54')) - - self.cleanup(tmp_data_file) - - def test_all_output_quoting_mode(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data2) - - cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W all "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('"quoted data","23"')) - self.assertEqual(o[1],six.b('"unquoted-data","54"')) - - self.cleanup(tmp_data_file) - - def _internal_test_consistency_of_chaining_output_to_input(self,input_data,input_wrapping_mode,output_wrapping_mode): - - tmp_data_file = self.create_file_with_data(input_data) - - basic_cmd = Q_EXECUTABLE + ' -w %s -W %s "select * from -"' % (input_wrapping_mode,output_wrapping_mode) - chained_cmd = 'cat %s | %s | %s | %s' % (tmp_data_file.name,basic_cmd,basic_cmd,basic_cmd) - - retcode, o, e = run_command(chained_cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(six.b("\n").join(o),input_data) - - self.cleanup(tmp_data_file) - - def test_consistency_of_chaining_minimal_wrapping_to_minimal_wrapping(self): - input_data = six.b('"quoted data" 23\nunquoted-data 54') - self._internal_test_consistency_of_chaining_output_to_input(input_data,'minimal','minimal') - - def test_consistency_of_chaining_all_wrapping_to_all_wrapping(self): - input_data = six.b('"quoted data" "23"\n"unquoted-data" "54"') - self._internal_test_consistency_of_chaining_output_to_input(input_data,'all','all') - - def test_utf8_with_bom_encoding(self): - utf_8_data_with_bom = six.b('\xef\xbb\xbf"typeid","limit","apcost","date","checkpointId"\n"1","2","5","1,2,3,4,5,6,7","3000,3001,3002"\n"2","2","5","1,2,3,4,5,6,7","3003,3004,3005"\n') - tmp_data_file = self.create_file_with_data(utf_8_data_with_bom,encoding=None) - - cmd = Q_EXECUTABLE + ' -d , -H -O -e utf-8-sig "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),3) - - self.assertEqual(o[0],six.b('typeid,limit,apcost,date,checkpointId')) - self.assertEqual(o[1],six.b('1,2,5,"1,2,3,4,5,6,7","3000,3001,3002"')) - self.assertEqual(o[2],six.b('2,2,5,"1,2,3,4,5,6,7","3003,3004,3005"')) - - self.cleanup(tmp_data_file) - - def test_input_field_quoting_and_data_types_with_encoding(self): - # Checks combination of minimal input field quoting, with special characters that need to be decoded - - # Both content and proper data types are verified - data = six.b('111,22.22,"testing text with special characters - citt\xc3\xa0 ",http://somekindofurl.com,12.13.14.15,12.1\n') - tmp_data_file = self.create_file_with_data(data) - - cmd = Q_EXECUTABLE + ' -d , "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),1) - - self.assertEqual(o[0].decode('utf-8'),u'111,22.22,testing text with special characters - citt\xe0 ,http://somekindofurl.com,12.13.14.15,12.1') - - cmd = Q_EXECUTABLE + ' -d , "select * from %s" -A' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),7) - - self.assertTrue(o[0].startswith(six.b('Table for file'))) - self.assertEqual(o[1].strip(),six.b('`c1` - int')) - self.assertEqual(o[2].strip(),six.b('`c2` - float')) - self.assertEqual(o[3].strip(),six.b('`c3` - text')) - self.assertEqual(o[4].strip(),six.b('`c4` - text')) - self.assertEqual(o[5].strip(),six.b('`c5` - text')) - self.assertEqual(o[6].strip(),six.b('`c6` - float')) - - self.cleanup(tmp_data_file) - - def test_multiline_double_double_quoted_values_in_quoted_data(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data) - - # FIXME Need to convert \0a to proper encoding suitable for the person running the tests. - cmd = Q_EXECUTABLE + ' -d " " "select replace(c5,X\'0A\',\'::\') from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),4) - - self.assertTrue(o[0],six.b('multiline_double_double_quoted')) - self.assertTrue(o[1],six.b('control-value-5')) - self.assertTrue(o[2],six.b('this is a double double quoted "multiline\n value".')) - self.assertTrue(o[3],six.b('control-value-5')) - - self.cleanup(tmp_data_file) - - def test_multiline_escaped_double_quoted_values_in_quoted_data(self): - tmp_data_file = self.create_file_with_data(sample_quoted_data) - - # FIXME Need to convert \0a to proper encoding suitable for the person running the tests. - cmd = Q_EXECUTABLE + ' -d " " "select replace(c6,X\'0A\',\'::\') from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),4) - - self.assertTrue(o[0],'multiline_escaped_double_quoted') - self.assertTrue(o[1],'control-value-6') - self.assertTrue(o[2],'this is an escaped "multiline:: value".') - self.assertTrue(o[3],'control-value-6') - - self.cleanup(tmp_data_file) - - def test_disable_double_double_quoted_data_flag__values(self): - # This test (and flag) is meant to verify backward comptibility only. It is possible that - # this flag will be removed completely in the future - - tmp_data_file = self.create_file_with_data(double_double_quoted_data) - - cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c2 from %s" -W none' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('double_double_quoted')) - self.assertEqual(o[1],six.b('this is a quoted value with "double')) - - cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c3 from %s" -W none' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('')) - self.assertEqual(o[1],six.b('double')) - - cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c4 from %s" -W none' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('')) - self.assertEqual(o[1],six.b('quotes"""')) - - self.cleanup(tmp_data_file) - - def test_disable_escaped_double_quoted_data_flag__values(self): - # This test (and flag) is meant to verify backward comptibility only. It is possible that - # this flag will be removed completely in the future - - tmp_data_file = self.create_file_with_data(escaped_double_quoted_data) - - cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c2 from %s" -W none' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('escaped_double_quoted')) - self.assertEqual(o[1],six.b('this is a quoted value with \\escaped')) - - cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c3 from %s" -W none' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('')) - self.assertEqual(o[1],six.b('double')) - - cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c4 from %s" -W none' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('')) - self.assertEqual(o[1],six.b('quotes\\""')) - - self.cleanup(tmp_data_file) - - def test_combined_quoted_data_flags__number_of_columns_detected(self): - # This test (and flags) is meant to verify backward comptibility only. It is possible that - # these flags will be removed completely in the future - tmp_data_file = self.create_file_with_data(combined_quoted_data) - - cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - o = o[1:] # remove the first "Table for file..." line in the output - - self.assertEqual(len(o),7) # found 7 fields - - cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - o = o[1:] # remove the first "Table for file..." line in the output - - self.assertEqual(len(o),5) # found 5 fields - - cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select * from %s" -A' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - o = o[1:] # remove the first "Table for file..." line in the output - - self.assertEqual(len(o),5) # found 5 fields - - cmd = Q_EXECUTABLE + ' -d " " "select * from %s" -A' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(e),0) - o = o[1:] # remove the first "Table for file..." line in the output - - self.assertEqual(len(o),3) # found only 3 fields, which is the correct amount - - self.cleanup(tmp_data_file) - - def test_nonexistent_file(self): - cmd = Q_EXECUTABLE + ' "select * from non-existent-file"' - - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode,0) - self.assertEqual(len(o),0) - self.assertEqual(len(e),1) - - self.assertEqual(e[0],six.b("No files matching 'non-existent-file' have been found")) - - def test_default_column_max_length_parameter__short_enough(self): - huge_text = six.b("x" * 131000) - - file_data = six.b("a,b,c\n1,{},3\n".format(huge_text)) - - tmpfile = self.create_file_with_data(file_data) - - cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 1) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0],six.b('1')) - - self.cleanup(tmpfile) - - def test_default_column_max_length_parameter__too_long(self): - huge_text = six.b("x") * 132000 - - file_data = six.b("a,b,c\n1,{},3\n".format(huge_text)) - - tmpfile = self.create_file_with_data(file_data) - - cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 31) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - - self.assertTrue(e[0].startswith(six.b("Column length is larger than the maximum"))) - self.assertTrue(six.b("Offending file is '{}'".format(tmpfile.name)) in e[0]) - self.assertTrue(six.b('Line is 2') in e[0]) - - self.cleanup(tmpfile) - - def test_column_max_length_parameter(self): - file_data = six.b("a,b,c\nvery-long-text,2,3\n") - tmpfile = self.create_file_with_data(file_data) - - cmd = Q_EXECUTABLE + ' -H -d , -M 3 "select a from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 31) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - - self.assertTrue(e[0].startswith(six.b("Column length is larger than the maximum"))) - self.assertTrue((six.b("Offending file is '%s'" % tmpfile.name)) in e[0]) - self.assertTrue(six.b('Line is 2') in e[0]) - - cmd2 = Q_EXECUTABLE + ' -H -d , -M 300 -H "select a from %s"' % tmpfile.name - retcode2, o2, e2 = run_command(cmd2) - - self.assertEqual(retcode2, 0) - self.assertEqual(len(o2), 1) - self.assertEqual(len(e2), 0) - - self.assertEqual(o2[0],six.b('very-long-text')) - - self.cleanup(tmpfile) - - def test_invalid_column_max_length_parameter(self): - file_data = six.b("a,b,c\nvery-long-text,2,3\n") - tmpfile = self.create_file_with_data(file_data) - - cmd = Q_EXECUTABLE + ' -H -d , -M 0 "select a from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 31) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - - self.assertTrue(e[0].startswith(six.b('Max column length limit must be a positive integer'))) - - - self.cleanup(tmpfile) - - def test_duplicate_column_name_detection(self): - file_data = six.b("a,b,a\n10,20,30\n30,40,50") - tmpfile = self.create_file_with_data(file_data) - - cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 35) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 2) - - self.assertTrue(e[0].startswith(six.b('Bad header row:'))) - self.assertEqual(e[1],six.b("'a': Column name is duplicated")) - - self.cleanup(tmpfile) - - -class UserFunctionTests(AbstractQTestCase): - def test_regexp_int_data_handling(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - - cmd = Q_EXECUTABLE + ' -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 1) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0],six.b("1")) - - self.cleanup(tmpfile) - - def test_percentile_func(self): - cmd = 'seq 1000 1999 | %s "select substr(c1,0,3),percentile(c1,0),percentile(c1,0.5),percentile(c1,1) from - group by substr(c1,0,3)" -c 1' % Q_EXECUTABLE - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 10) - self.assertEqual(len(e), 0) - - output_table = [l.split(six.b(" ")) for l in o] - group_labels = [int(row[0]) for row in output_table] - minimum_values = [float(row[1]) for row in output_table] - median_values = [float(row[2]) for row in output_table] - max_values = [float(row[3]) for row in output_table] - - base_values = list(range(1000,2000,100)) - - self.assertEqual(group_labels,list(range(10,20))) - self.assertEqual(minimum_values,base_values) - self.assertEqual(median_values,list(map(lambda x: x + 49.5,base_values))) - self.assertEqual(max_values,list(map(lambda x: x + 99,base_values))) - - def test_regexp_null_data_handling(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - - cmd = Q_EXECUTABLE + ' -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 1) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0],six.b("2")) - - self.cleanup(tmpfile) - - def test_md5_function(self): - cmd = 'seq 1 4 | %s -c 1 -d , "select c1,md5(c1,\'utf-8\') from -"' % Q_EXECUTABLE - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(o),4) - self.assertEqual(len(e),0) - - self.assertEqual(tuple(o[0].split(six.b(','),1)),(six.b('1'),six.b('c4ca4238a0b923820dcc509a6f75849b'))) - self.assertEqual(tuple(o[1].split(six.b(','),1)),(six.b('2'),six.b('c81e728d9d4c2f636f067f89cc14862c'))) - self.assertEqual(tuple(o[2].split(six.b(','),1)),(six.b('3'),six.b('eccbc87e4b5ce2fe28308fd9f2a7baf3'))) - self.assertEqual(tuple(o[3].split(six.b(','),1)),(six.b('4'),six.b('a87ff679a2f3e71d9181a67b7542122c'))) - - def test_stddev_functions(self): - tmpfile = self.create_file_with_data(six.b("\n".join(map(str,[234,354,3234,123,4234,234,634,56,65])))) - - cmd = '%s -c 1 -d , "select round(stddev_pop(c1),10),round(stddev_sample(c1),10) from %s"' % (Q_EXECUTABLE,tmpfile.name) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(o),1) - self.assertEqual(len(e),0) - - self.assertEqual(o[0],six.b('1479.7015464838,1569.4604964764')) - - self.cleanup(tmpfile) - - def test_sqrt_function(self): - cmd = 'seq 1 5 | %s -c 1 -d , "select round(sqrt(c1),10) from -"' % Q_EXECUTABLE - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(o),5) - self.assertEqual(len(e),0) - - self.assertEqual(o[0],six.b('1.0')) - self.assertEqual(o[1],six.b('1.4142135624')) - self.assertEqual(o[2],six.b('1.7320508076')) - self.assertEqual(o[3],six.b('2.0')) - self.assertEqual(o[4],six.b('2.2360679775')) - - def test_power_function(self): - cmd = 'seq 1 5 | %s -c 1 -d , "select round(power(c1,2.5),10) from -"' % Q_EXECUTABLE - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(o),5) - self.assertEqual(len(e),0) - - self.assertEqual(o[0],six.b('1.0')) - self.assertEqual(o[1],six.b('5.6568542495')) - self.assertEqual(o[2],six.b('15.5884572681')) - self.assertEqual(o[3],six.b('32.0')) - self.assertEqual(o[4],six.b('55.9016994375')) - - def test_sha1_function(self): - cmd = 'seq 1 4 | %s -c 1 -d , "select c1,sha1(c1) from -"' % Q_EXECUTABLE - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(o),4) - self.assertEqual(len(e),0) - - self.assertEqual(o[0],six.b('1,356a192b7913b04c54574d18c28d46e6395428ab')) - self.assertEqual(o[1],six.b('2,da4b9237bacccdf19c0760cab7aec4a8359010b0')) - self.assertEqual(o[2],six.b('3,77de68daecd823babbb58edb1c8e14d7106e83bb')) - self.assertEqual(o[3],six.b('4,1b6453892473a467d07372d45eb05abc2031647a')) - - def test_sha_function(self): - cmd = 'seq 1 4 | %s -c 1 -d , "select c1,sha(c1,1,\'utf-8\') as sha1,sha(c1,224,\'utf-8\') as sha224,sha(c1,256,\'utf-8\') as sha256 from -"' % Q_EXECUTABLE - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - self.assertEqual(len(o),4) - self.assertEqual(len(e),0) - - self.assertEqual(o[0],six.b('1,356a192b7913b04c54574d18c28d46e6395428ab,e25388fde8290dc286a6164fa2d97e551b53498dcbf7bc378eb1f178,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b')) - self.assertEqual(o[1],six.b('2,da4b9237bacccdf19c0760cab7aec4a8359010b0,58b2aaa0bfae7acc021b3260e941117b529b2e69de878fd7d45c61a9,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35')) - self.assertEqual(o[2],six.b('3,77de68daecd823babbb58edb1c8e14d7106e83bb,4cfc3a1811fe40afa401b25ef7fa0379f1f7c1930a04f8755d678474,4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce')) - self.assertEqual(o[3],six.b('4,1b6453892473a467d07372d45eb05abc2031647a,271f93f45e9b4067327ed5c8cd30a034730aaace4382803c3e1d6c2f,4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328cb08b5531fcacdabf8a')) - - -class MultiHeaderTests(AbstractQTestCase): - def test_output_header_when_multiple_input_headers_exist(self): - TMPFILE_COUNT = 5 - tmpfiles = [self.create_file_with_data(sample_data_with_header) for x in range(TMPFILE_COUNT)] - - tmpfilenames = "+".join(map(lambda x:x.name, tmpfiles)) - - cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from %s order by name" -H -O' % tmpfilenames - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), TMPFILE_COUNT*3+1) - self.assertEqual(o[0], six.b("name,value1,value2")) - - for i in range (TMPFILE_COUNT): - self.assertEqual(o[1+i],sample_data_rows[0]) - for i in range (TMPFILE_COUNT): - self.assertEqual(o[TMPFILE_COUNT+1+i],sample_data_rows[1]) - for i in range (TMPFILE_COUNT): - self.assertEqual(o[TMPFILE_COUNT*2+1+i],sample_data_rows[2]) - - for oi in o[1:]: - self.assertTrue(six.b('name') not in oi) - - for i in range(TMPFILE_COUNT): - self.cleanup(tmpfiles[i]) - - def test_output_header_when_extra_header_column_names_are_different(self): - tmpfile1 = self.create_file_with_data(sample_data_with_header) - tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(six.b('othername,value1,value2'))) - - cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from %s+%s order by name" -H -O' % (tmpfile1.name,tmpfile2.name) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 35) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - self.assertTrue(e[0].startswith(six.b("Bad header row:"))) - - self.cleanup(tmpfile1) - self.cleanup(tmpfile2) - - def test_output_header_when_extra_header_has_different_number_of_columns(self): - tmpfile1 = self.create_file_with_data(sample_data_with_header) - tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(six.b('name,value1'))) - - cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from %s+%s order by name" -H -O' % (tmpfile1.name,tmpfile2.name) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 35) - - -class ParsingModeTests(AbstractQTestCase): - - def test_strict_mode_column_count_mismatch_error(self): - tmpfile = self.create_file_with_data(uneven_ls_output) - cmd = Q_EXECUTABLE + ' -m strict "select count(*) from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode, 0) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - - self.assertTrue(six.b("Column Count is expected to identical") in e[0]) - - self.cleanup(tmpfile) - - def test_strict_mode_too_large_specific_column_count(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , -m strict -c 4 "select count(*) from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode, 0) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - - self.assertEqual( - e[0], six.b("Strict mode. Column count is expected to be 4 but is 3")) - - self.cleanup(tmpfile) - - def test_strict_mode_too_small_specific_column_count(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , -m strict -c 2 "select count(*) from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode, 0) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - - self.assertEqual( - e[0], six.b("Strict mode. Column count is expected to be 2 but is 3")) - - self.cleanup(tmpfile) - - def test_relaxed_mode_missing_columns_in_header(self): - tmpfile = self.create_file_with_data( - sample_data_with_missing_header_names) - cmd = Q_EXECUTABLE + ' -d , -m relaxed "select count(*) from %s" -H -A' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 4) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], six.b('Table for file: %s' % tmpfile.name)) - self.assertEqual(o[1].strip(), six.b('`name` - text')) - self.assertEqual(o[2].strip(), six.b('`value1` - int')) - self.assertEqual(o[3].strip(), six.b('`c3` - int')) - - self.cleanup(tmpfile) - - def test_strict_mode_missing_columns_in_header(self): - tmpfile = self.create_file_with_data( - sample_data_with_missing_header_names) - cmd = Q_EXECUTABLE + ' -d , -m strict "select count(*) from %s" -H -A' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode, 0) - self.assertEqual(len(o), 0) - self.assertEqual(len(e), 1) - - self.assertEqual( - e[0], six.b('Strict mode. Header row contains less columns than expected column count(2 vs 3)')) - - self.cleanup(tmpfile) - - def test_output_delimiter_with_missing_fields(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , "select * from %s" -D ";"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], six.b('a;1;0')) - self.assertEqual(o[1], six.b('b;2;0')) - self.assertEqual(o[2], six.b('c;;0')) - - self.cleanup(tmpfile) - - def test_handling_of_null_integers(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , "select avg(c2) from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 1) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], six.b('1.5')) - - self.cleanup(tmpfile) - - def test_empty_integer_values_converted_to_null(self): - tmpfile = self.create_file_with_data(sample_data_no_header) - cmd = Q_EXECUTABLE + ' -d , "select * from %s where c2 is null"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 1) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], six.b('c,,0')) - - self.cleanup(tmpfile) - - def test_empty_string_values_not_converted_to_null(self): - tmpfile = self.create_file_with_data( - sample_data_with_empty_string_no_header) - cmd = Q_EXECUTABLE + ' -d , "select * from %s where c2 == %s"' % ( - tmpfile.name, "''") - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 1) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], six.b('c,,0')) - - self.cleanup(tmpfile) - - def test_relaxed_mode_detected_columns(self): - tmpfile = self.create_file_with_data(uneven_ls_output) - cmd = Q_EXECUTABLE + ' -m relaxed "select count(*) from %s" -A' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - - table_name_row = o[0] - column_rows = o[1:] - - self.assertEqual(len(column_rows), 11) - - column_tuples = [x.strip().split(six.b(" ")) for x in column_rows] - column_info = [(x[0], x[2]) for x in column_tuples] - column_names = [x[0] for x in column_tuples] - column_types = [x[2] for x in column_tuples] - - self.assertEqual(column_names, [six.b('`c{}`'.format(x)) for x in range(1, 12)]) - self.assertEqual(column_types, list(map(lambda x:six.b(x),[ - 'text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text', 'text', 'text']))) - - self.cleanup(tmpfile) - - def test_relaxed_mode_detected_columns_with_specific_column_count(self): - tmpfile = self.create_file_with_data(uneven_ls_output) - cmd = Q_EXECUTABLE + ' -m relaxed "select count(*) from %s" -A -c 9' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - - table_name_row = o[0] - column_rows = o[1:] - - self.assertEqual(len(column_rows), 9) - - column_tuples = [x.strip().split(six.b(" ")) for x in column_rows] - column_info = [(x[0], x[2]) for x in column_tuples] - column_names = [x[0] for x in column_tuples] - column_types = [x[2] for x in column_tuples] - - self.assertEqual(column_names, [six.b('`c{}`'.format(x)) for x in range(1, 10)]) - self.assertEqual( - column_types, list(map(lambda x:six.b(x),['text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text']))) - - self.cleanup(tmpfile) - - def test_relaxed_mode_last_column_data_with_specific_column_count(self): - tmpfile = self.create_file_with_data(uneven_ls_output) - cmd = Q_EXECUTABLE + ' -m relaxed "select c9 from %s" -c 9' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 9) - self.assertEqual(len(e), 0) - - expected_output = list(map(lambda x:six.b(x),["/selinux", "/mnt", "/srv", "/lost+found", '"/initrd.img.old -> /boot/initrd.img-3.8.0-19-generic"', - "/cdrom", "/home", '"/vmlinuz -> boot/vmlinuz-3.8.0-19-generic"', '"/initrd.img -> boot/initrd.img-3.8.0-19-generic"'])) - - self.assertEqual(o, expected_output) - - self.cleanup(tmpfile) - - def test_1_column_warning_in_relaxed_mode(self): - tmpfile = self.create_file_with_data(one_column_data) - cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d ,' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 1) - self.assertEqual(len(o),2) - - self.assertEqual(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) - self.assertEqual(o[0],six.b('data without commas 1')) - self.assertEqual(o[1],six.b('data without commas 2')) - - self.cleanup(tmpfile) - - def test_1_column_warning_in_strict_mode(self): - tmpfile = self.create_file_with_data(one_column_data) - cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m strict' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 1) - self.assertEqual(len(o),2) - - self.assertEqual(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) - self.assertEqual(o[0],six.b('data without commas 1')) - self.assertEqual(o[1],six.b('data without commas 2')) - - self.cleanup(tmpfile) - - - def test_1_column_warning_suppression_in_relaxed_mode_when_column_count_is_specific(self): - tmpfile = self.create_file_with_data(one_column_data) - cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m relaxed -c 1' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('data without commas 1')) - self.assertEqual(o[1],six.b('data without commas 2')) - - self.cleanup(tmpfile) - - def test_1_column_warning_suppression_in_strict_mode_when_column_count_is_specific(self): - tmpfile = self.create_file_with_data(one_column_data) - cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m strict -c 1' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o),2) - - self.assertEqual(o[0],six.b('data without commas 1')) - self.assertEqual(o[1],six.b('data without commas 2')) - - self.cleanup(tmpfile) - - def test_fluffy_mode(self): - tmpfile = self.create_file_with_data(uneven_ls_output) - cmd = Q_EXECUTABLE + ' -m fluffy "select c9 from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 9) - self.assertEqual(len(e), 0) - - expected_output = list(map(lambda x:six.b(x),["/selinux", "/mnt", "/srv", "/lost+found", - "/initrd.img.old", "/cdrom", "/home", "/vmlinuz", "/initrd.img"])) - - self.assertEqual(o, expected_output) - - self.cleanup(tmpfile) - - def test_fluffy_mode_column_count_mismatch(self): - data_row = six.b("column1 column2 column3 column4") - data_list = [data_row] * 1000 - data_list[950] = six.b("column1 column2 column3 column4 column5") - tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) - - cmd = Q_EXECUTABLE + ' -m fluffy "select * from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode,0) - self.assertEqual(len(o),0) - self.assertEqual(len(e),1) - self.assertTrue(e[0].startswith(six.b("Deprecated fluffy mode"))) - self.assertTrue(six.b(' row 951 ') in e[0]) - - self.cleanup(tmpfile) - - def test_strict_mode_column_count_mismatch__less_columns(self): - data_row = six.b("column1 column2 column3 column4") - data_list = [data_row] * 1000 - data_list[750] = six.b("column1 column3 column4") - tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) - - cmd = Q_EXECUTABLE + ' -m strict "select * from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode,0) - self.assertEqual(len(o),0) - self.assertEqual(len(e),1) - self.assertTrue(e[0].startswith(six.b("Strict mode - Expected 4 columns instead of 3 columns"))) - self.assertTrue(six.b(' row 751.') in e[0]) - - self.cleanup(tmpfile) - - def test_strict_mode_column_count_mismatch__more_columns(self): - data_row = six.b("column1 column2 column3 column4") - data_list = [data_row] * 1000 - data_list[750] = six.b("column1 column2 column3 column4 column5") - tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) - - cmd = Q_EXECUTABLE + ' -m strict "select * from %s"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode,0) - self.assertEqual(len(o),0) - self.assertEqual(len(e),1) - self.assertTrue(e[0].startswith(six.b("Strict mode - Expected 4 columns instead of 5 columns"))) - self.assertTrue(six.b(' row 751.') in e[0]) - - self.cleanup(tmpfile) - - -class FormattingTests(AbstractQTestCase): - - def test_column_formatting(self): - # TODO Decide if this breaking change is reasonable - #cmd = 'seq 1 10 | ' + Q_EXECUTABLE + ' -f 1=%4.3f,2=%4.3f "select sum(c1),avg(c1) from -" -c 1' - cmd = 'seq 1 10 | ' + Q_EXECUTABLE + ' -f 1={:4.3f},2={:4.3f} "select sum(c1),avg(c1) from -" -c 1' - - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 1) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], six.b('55.000 5.500')) - - def test_column_formatting_with_output_header(self): - perl_regex = "'s/1\n/column_name\n1\n/;'" - # TODO Decide if this breaking change is reasonable - #cmd = 'seq 1 10 | perl -pe ' + perl_regex + ' | ' + Q_EXECUTABLE + ' -f 1=%4.3f,2=%4.3f "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' - cmd = 'seq 1 10 | perl -pe ' + perl_regex + ' | ' + Q_EXECUTABLE + ' -f 1={:4.3f},2={:4.3f} "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' - - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 2) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], six.b('mysum myavg')) - self.assertEqual(o[1], six.b('55.000 5.500')) - - def py2_test_failure_to_parse_universal_newlines_without_explicit_flag(self): - data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') - tmp_data_file = self.create_file_with_data(data) - - cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertNotEqual(retcode, 0) - self.assertEqual(len(e), 1) - self.assertEqual(len(o), 0) - - self.assertTrue(e[0].startswith(six.b('Data contains universal newlines'))) - - self.cleanup(tmp_data_file) - - def py3_test_successfuly_parse_universal_newlines_without_explicit_flag(self): - def list_as_byte_list(l): - return list(map(lambda x:six.b(x),l)) - - expected_output = list(map(lambda x:list_as_byte_list(x),[['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-May-07', '6850000', 'USD', 'b'], - ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Oct-06', '6000000', 'USD', 'a'], - ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Jan-08', '25000000', 'USD', 'c'], - ['mycityfaces', 'MyCityFaces', '7', 'web', 'Scottsdale', 'AZ', '1-Jan-08', '50000', 'USD', 'seed'], - ['flypaper', 'Flypaper', '', 'web', 'Phoenix', 'AZ', '1-Feb-08', '3000000', 'USD', 'a'], - ['infusionsoft', 'Infusionsoft', '105', 'software', 'Gilbert', 'AZ', '1-Oct-07', '9000000', 'USD', 'a']])) - - data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') - tmp_data_file = self.create_file_with_data(data) - - cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 6) - - actual_output = list(map(lambda row: row.split(six.b(",")),o)) - - self.assertEqual(actual_output,expected_output) - - self.cleanup(tmp_data_file) - - if six.PY2: - test_parsing_universal_newlines_without_explicit_flag = py2_test_failure_to_parse_universal_newlines_without_explicit_flag - else: - test_parsing_universal_newlines_without_explicit_flag = py3_test_successfuly_parse_universal_newlines_without_explicit_flag - - - def test_universal_newlines_parsing_flag(self): - def list_as_byte_list(l): - return list(map(lambda x:six.b(x),l)) - - expected_output = list(map(lambda x:list_as_byte_list(x),[['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-May-07', '6850000', 'USD', 'b'], - ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Oct-06', '6000000', 'USD', 'a'], - ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Jan-08', '25000000', 'USD', 'c'], - ['mycityfaces', 'MyCityFaces', '7', 'web', 'Scottsdale', 'AZ', '1-Jan-08', '50000', 'USD', 'seed'], - ['flypaper', 'Flypaper', '', 'web', 'Phoenix', 'AZ', '1-Feb-08', '3000000', 'USD', 'a'], - ['infusionsoft', 'Infusionsoft', '105', 'software', 'Gilbert', 'AZ', '1-Oct-07', '9000000', 'USD', 'a']])) - - data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') - tmp_data_file = self.create_file_with_data(data) - - cmd = Q_EXECUTABLE + ' -d , -H -U "select permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round from %s"' % tmp_data_file.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode,0) - - if len(e) == 2 or len(e) == 1: - # In python 3.7, there's a deprecation warning for the 'U' file opening mode, which is ok for now - self.assertIn(len(e), [1,2]) - self.assertTrue(b"DeprecationWarning: 'U' mode is deprecated" in e[0]) - elif len(e) != 0: - # Nothing should be output to stderr in other versions - self.assertTrue(False,msg='Unidentified output in stderr') - - self.assertEqual(len(o), 6) - - actual_output = list(map(lambda row: row.split(six.b(",")),o)) - - self.assertEqual(actual_output,expected_output) - - self.cleanup(tmp_data_file) - - - -class SqlTests(AbstractQTestCase): - - def test_find_example(self): - tmpfile = self.create_file_with_data(find_output) - cmd = Q_EXECUTABLE + ' "select c5,c6,sum(c7)/1024.0/1024 as total from %s group by c5,c6 order by total desc"' % tmpfile.name - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - self.assertEqual(len(e), 0) - - self.assertEqual(o[0], six.b('mapred mapred 0.9389581680297852')) - self.assertEqual(o[1], six.b('root root 0.02734375')) - self.assertEqual(o[2], six.b('harel harel 0.010888099670410156')) - - self.cleanup(tmpfile) - - def test_join_example(self): - cmd = Q_EXECUTABLE + ' "select myfiles.c8,emails.c2 from {0}/exampledatafile myfiles join {0}/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'.format(EXAMPLES) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 2) - - self.assertEqual(o[0], six.b('ppp dip.1@otherdomain.com')) - self.assertEqual(o[1], six.b('ppp dip.2@otherdomain.com')) - - def test_join_example_with_output_header(self): - cmd = Q_EXECUTABLE + ' -O "select myfiles.c8 aaa,emails.c2 bbb from {0}/exampledatafile myfiles join {0}/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'.format(EXAMPLES) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(o), 3) - - self.assertEqual(o[0], six.b('aaa bbb')) - self.assertEqual(o[1], six.b('ppp dip.1@otherdomain.com')) - self.assertEqual(o[2], six.b('ppp dip.2@otherdomain.com')) - - def test_self_join1(self): - tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) - cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)"' % (tmpfile.name,tmpfile.name) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 10) - - self.cleanup(tmpfile) - - def test_self_join_reuses_table(self): - tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) - cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)" -A' % (tmpfile.name,tmpfile.name) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 3) - - self.assertEqual(o[0],six.b('Table for file: %s' % tmpfile.name)) - self.assertEqual(o[1],six.b(' `c1` - int')) - self.assertEqual(o[2],six.b(' `c2` - int')) - - self.cleanup(tmpfile) - - def test_self_join2(self): - tmpfile1 = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) - cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c2 = a2.c2)"' % (tmpfile1.name,tmpfile1.name) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 10*10) - - self.cleanup(tmpfile1) - - tmpfile2 = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) - cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c2 = a2.c2) join %s a3 on (a1.c2 = a3.c2)"' % (tmpfile2.name,tmpfile2.name,tmpfile2.name) - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 10*10*10) - - self.cleanup(tmpfile2) - - def test_disable_column_type_detection(self): - tmpfile = self.create_file_with_data(six.b('''regular_text,text_with_digits1,text_with_digits2,float_number -"regular text 1",67,"67",12.3 -"regular text 2",067,"067",22.3 -"regular text 3",123,"123",33.4 -"regular text 4",-123,"-123",0122.2 -''')) - - # Check original column type detection - cmd = Q_EXECUTABLE + ' -A -d , -H "select * from %s"' % (tmpfile.name) - - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 5) - - - self.assertEqual(o[0],six.b('Table for file: %s' % tmpfile.name)) - self.assertEqual(o[1],six.b(' `regular_text` - text')) - self.assertEqual(o[2],six.b(' `text_with_digits1` - int')) - self.assertEqual(o[3],six.b(' `text_with_digits2` - int')) - self.assertEqual(o[4],six.b(' `float_number` - float')) - - # Check column types detected when actual detection is disabled - cmd = Q_EXECUTABLE + ' -A -d , -H --as-text "select * from %s"' % (tmpfile.name) - - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 5) - - self.assertEqual(o[0],six.b('Table for file: %s' % tmpfile.name)) - self.assertEqual(o[1],six.b(' `regular_text` - text')) - self.assertEqual(o[2],six.b(' `text_with_digits1` - text')) - self.assertEqual(o[3],six.b(' `text_with_digits2` - text')) - self.assertEqual(o[4],six.b(' `float_number` - text')) - - # Get actual data with regular detection - cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % (tmpfile.name) - - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 4) - - self.assertEqual(o[0],six.b("regular text 1,67,67,12.3")) - self.assertEqual(o[1],six.b("regular text 2,67,67,22.3")) - self.assertEqual(o[2],six.b("regular text 3,123,123,33.4")) - self.assertEqual(o[3],six.b("regular text 4,-123,-123,122.2")) - - # Get actual data without detection - cmd = Q_EXECUTABLE + ' -d , -H --as-text "select * from %s"' % (tmpfile.name) - - retcode, o, e = run_command(cmd) - - self.assertEqual(retcode, 0) - self.assertEqual(len(e), 0) - self.assertEqual(len(o), 4) - - self.assertEqual(o[0],six.b("regular text 1,67,67,12.3")) - self.assertEqual(o[1],six.b("regular text 2,067,067,22.3")) - self.assertEqual(o[2],six.b("regular text 3,123,123,33.4")) - self.assertEqual(o[3],six.b("regular text 4,-123,-123,0122.2")) - - self.cleanup(tmpfile) - - -class BasicModuleTests(AbstractQTestCase): - - def test_simple_query(self): - tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - - q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - r = q.execute('select * from %s' % tmpfile.name) - - self.assertTrue(r.status == 'ok') - self.assertEqual(len(r.warnings),0) - self.assertEqual(len(r.data),2) - self.assertEqual(r.metadata.output_column_name_list,['a','b','c']) - self.assertEqual(r.data,[(1,2,3),(4,5,6)]) - self.assertEqual(len(r.metadata.data_loads),1) - self.assertEqual(r.metadata.data_loads[0].filename,tmpfile.name) - - self.cleanup(tmpfile) - - def test_loaded_data_reuse(self): - tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - - q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - r1 = q.execute('select * from %s' % tmpfile.name) - - r2 = q.execute('select * from %s' % tmpfile.name) - - self.assertTrue(r1.status == 'ok') - self.assertEqual(len(r1.warnings),0) - self.assertEqual(len(r1.data),2) - self.assertEqual(r1.metadata.output_column_name_list,['a','b','c']) - self.assertEqual(r1.data,[(1,2,3),(4,5,6)]) - self.assertEqual(r1.metadata.data_loads[0].filename,tmpfile.name) - - self.assertTrue(r2.status == 'ok') - self.assertEqual(len(r1.metadata.data_loads),1) - self.assertEqual(r1.metadata.data_loads[0].filename,tmpfile.name) - self.assertEqual(len(r2.metadata.data_loads),0) - self.assertEqual(r2.data,r1.data) - self.assertEqual(r2.metadata.output_column_name_list,r2.metadata.output_column_name_list) - self.assertEqual(len(r2.warnings),0) - - self.cleanup(tmpfile) - - def test_stdin_injection(self): - tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - - q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - r = q.execute('select * from -',stdin_file=codecs.open(tmpfile.name,'rb',encoding='utf-8')) - - self.assertTrue(r.status == 'ok') - self.assertEqual(len(r.warnings),0) - self.assertEqual(len(r.data),2) - self.assertEqual(r.metadata.output_column_name_list,['a','b','c']) - self.assertEqual(r.data,[(1,2,3),(4,5,6)]) - self.assertEqual(len(r.metadata.data_loads),1) - self.assertEqual(r.metadata.data_loads[0].filename,'-') - - self.cleanup(tmpfile) - - def test_named_stdin_injection(self): - tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - - q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - r = q.execute('select a from my_stdin_data',stdin_file=codecs.open(tmpfile.name,'rb',encoding='utf-8'),stdin_filename='my_stdin_data') - - self.assertTrue(r.status == 'ok') - self.assertEqual(len(r.warnings),0) - self.assertEqual(len(r.data),2) - self.assertEqual(r.metadata.output_column_name_list,['a']) - self.assertEqual(r.data,[(1,),(4,)]) - self.assertEqual(len(r.metadata.data_loads),1) - self.assertEqual(r.metadata.data_loads[0].filename,'my_stdin_data') - - self.cleanup(tmpfile) - - def test_stdin_injection_isolation(self): - tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - tmpfile2 = self.create_file_with_data(six.b("d e f\n7 8 9\n10 11 12")) - - q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - r1 = q.execute('select * from -',stdin_file=codecs.open(tmpfile1.name,'rb',encoding='utf-8')) - - self.assertTrue(r1.status == 'ok') - self.assertEqual(len(r1.warnings),0) - self.assertEqual(len(r1.data),2) - self.assertEqual(r1.metadata.output_column_name_list,['a','b','c']) - self.assertEqual(r1.data,[(1,2,3),(4,5,6)]) - self.assertEqual(len(r1.metadata.data_loads),1) - self.assertEqual(r1.metadata.data_loads[0].filename,'-') - - r2 = q.execute('select * from -',stdin_file=codecs.open(tmpfile2.name,'rb',encoding='utf-8')) - - self.assertTrue(r2.status == 'ok') - self.assertEqual(len(r2.warnings),0) - self.assertEqual(len(r2.data),2) - self.assertEqual(r2.metadata.output_column_name_list,['d','e','f']) - self.assertEqual(r2.data,[(7,8,9),(10,11,12)]) - # There should be another data load, even though it's the same 'filename' as before - self.assertEqual(len(r2.metadata.data_loads),1) - self.assertEqual(r2.metadata.data_loads[0].filename,'-') - - self.cleanup(tmpfile1) - self.cleanup(tmpfile2) - - def test_multiple_stdin_injection(self): - tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - tmpfile2 = self.create_file_with_data(six.b("d e f\n7 8 9\n10 11 12")) - - q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - r1 = q.execute('select * from my_stdin_data1',stdin_file=codecs.open(tmpfile1.name,'rb',encoding='utf-8'),stdin_filename='my_stdin_data1') - - self.assertTrue(r1.status == 'ok') - self.assertEqual(len(r1.warnings),0) - self.assertEqual(len(r1.data),2) - self.assertEqual(r1.metadata.output_column_name_list,['a','b','c']) - self.assertEqual(r1.data,[(1,2,3),(4,5,6)]) - self.assertEqual(len(r1.metadata.data_loads),1) - self.assertEqual(r1.metadata.data_loads[0].filename,'my_stdin_data1') - - r2 = q.execute('select * from my_stdin_data2',stdin_file=codecs.open(tmpfile2.name,'rb',encoding='utf-8'),stdin_filename='my_stdin_data2') - - self.assertTrue(r2.status == 'ok') - self.assertEqual(len(r2.warnings),0) - self.assertEqual(len(r2.data),2) - self.assertEqual(r2.metadata.output_column_name_list,['d','e','f']) - self.assertEqual(r2.data,[(7,8,9),(10,11,12)]) - # There should be another data load, even though it's the same 'filename' as before - self.assertEqual(len(r2.metadata.data_loads),1) - self.assertEqual(r2.metadata.data_loads[0].filename,'my_stdin_data2') - - r3 = q.execute('select aa.*,bb.* from my_stdin_data1 aa join my_stdin_data2 bb') - - self.assertTrue(r3.status == 'ok') - self.assertEqual(len(r3.warnings),0) - self.assertEqual(len(r3.data),4) - self.assertEqual(r3.metadata.output_column_name_list,['a','b','c','d','e','f']) - self.assertEqual(r3.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) - self.assertEqual(len(r3.metadata.data_loads),0) - - self.cleanup(tmpfile1) - self.cleanup(tmpfile2) - - def test_different_input_params_for_different_files(self): - tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - tmpfile2 = self.create_file_with_data(six.b("7\t8\t9\n10\t11\t12")) - - q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) - - q.load_data(tmpfile1.name,QInputParams(skip_header=True,delimiter=' ')) - q.load_data(tmpfile2.name,QInputParams(skip_header=False,delimiter='\t')) - - r = q.execute('select aa.*,bb.* from %s aa join %s bb' % (tmpfile1.name,tmpfile2.name)) - - self.assertTrue(r.status == 'ok') - self.assertEqual(len(r.warnings),0) - self.assertEqual(len(r.data),4) - self.assertEqual(r.metadata.output_column_name_list,['a','b','c','c1','c2','c3']) - self.assertEqual(r.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) - self.assertEqual(len(r.metadata.data_loads),0) - - self.cleanup(tmpfile1) - self.cleanup(tmpfile2) - - def test_different_input_params_for_different_files(self): - tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - tmpfile2 = self.create_file_with_data(six.b("7\t8\t9\n10\t11\t12")) - - q = QTextAsData() - - q.load_data(tmpfile1.name,QInputParams(skip_header=True,delimiter=' ')) - q.load_data(tmpfile2.name,QInputParams(skip_header=False,delimiter='\t')) - - r = q.execute('select aa.*,bb.* from %s aa join %s bb' % (tmpfile1.name,tmpfile2.name)) - - self.assertTrue(r.status == 'ok') - self.assertEqual(len(r.warnings),0) - self.assertEqual(len(r.data),4) - self.assertEqual(r.metadata.output_column_name_list,['a','b','c','c1','c2','c3']) - self.assertEqual(r.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) - self.assertEqual(len(r.metadata.data_loads),0) - - self.cleanup(tmpfile1) - self.cleanup(tmpfile2) - - def test_input_params_override(self): - tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - - default_input_params = QInputParams() - - for k in default_input_params.__dict__.keys(): - setattr(default_input_params,k,'GARBAGE') - - q = QTextAsData(default_input_params) - - r = q.execute('select * from %s' % tmpfile.name) - - self.assertTrue(r.status == 'error') - - overwriting_input_params = QInputParams(skip_header=True,delimiter=' ') - - r2 = q.execute('select * from %s' % tmpfile.name,input_params=overwriting_input_params) - - self.assertTrue(r2.status == 'ok') - self.assertEqual(len(r2.warnings),0) - self.assertEqual(len(r2.data),2) - self.assertEqual(r2.metadata.output_column_name_list,['a','b','c']) - self.assertEqual(r2.data,[(1,2,3),(4,5,6)]) - self.assertEqual(len(r2.metadata.data_loads),1) - self.assertEqual(r2.metadata.data_loads[0].filename,tmpfile.name) - - self.cleanup(tmpfile) - - def test_input_params_merge(self): - input_params = QInputParams() - - for k in input_params.__dict__.keys(): - setattr(input_params,k,'GARBAGE') - - merged_input_params = input_params.merged_with(QInputParams()) - - for k in merged_input_params.__dict__.keys(): - self.assertTrue(getattr(merged_input_params,k) != 'GARBAGE') - - for k in input_params.__dict__.keys(): - self.assertTrue(getattr(merged_input_params,k) != 'GARBAGE') - - def test_table_analysis_with_syntax_error(self): - - q = QTextAsData() - - q_output = q.analyze("bad syntax") - - self.assertTrue(q_output.status == 'error') - self.assertTrue(q_output.error.msg.startswith('query error')) - - def test_execute_response(self): - tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - - q = QTextAsData() - - q_output = q.execute("select a,c from %s" % tmpfile.name,QInputParams(skip_header=True)) - - self.assertTrue(q_output.status == 'ok') - self.assertTrue(q_output.error is None) - self.assertEqual(len(q_output.warnings),0) - self.assertEqual(len(q_output.data),2) - self.assertEqual(q_output.data,[ (1,3),(4,6) ]) - self.assertTrue(q_output.metadata is not None) - - metadata = q_output.metadata - - self.assertEqual(metadata.output_column_name_list, [ 'a','c']) - self.assertEqual(len(metadata.data_loads),1) - self.assertEqual(len(metadata.table_structures),1) - - table_structure = metadata.table_structures[0] - - self.assertEqual(table_structure.column_names,[ 'a','b','c']) - self.assertEqual(table_structure.column_types,[ 'int','int','int']) - self.assertEqual(table_structure.filenames_str,tmpfile.name) - self.assertTrue(len(table_structure.materialized_files.keys()),1) - self.assertTrue(table_structure.materialized_files[tmpfile.name].filename,tmpfile.name) - self.assertFalse(table_structure.materialized_files[tmpfile.name].is_stdin) - - self.cleanup(tmpfile) - - def test_analyze_response(self): - tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) - - q = QTextAsData() - - q_output = q.analyze("select a,c from %s" % tmpfile.name,QInputParams(skip_header=True)) - - self.assertTrue(q_output.status == 'ok') - self.assertTrue(q_output.error is None) - self.assertEqual(len(q_output.warnings),0) - self.assertEqual(len(q_output.data),2) - self.assertEqual(q_output.data,[ (1,3),(4,6) ]) - self.assertTrue(q_output.metadata is not None) - - metadata = q_output.metadata - - self.assertEqual(metadata.output_column_name_list, [ 'a','c']) - self.assertEqual(len(metadata.data_loads),1) - self.assertEqual(len(metadata.table_structures),1) - - table_structure = metadata.table_structures[0] - - self.assertEqual(table_structure.column_names,[ 'a','b','c']) - self.assertEqual(table_structure.column_types,[ 'int','int','int']) - self.assertEqual(table_structure.filenames_str,tmpfile.name) - self.assertTrue(len(table_structure.materialized_files.keys()),1) - self.assertTrue(table_structure.materialized_files[tmpfile.name].filename,tmpfile.name) - self.assertFalse(table_structure.materialized_files[tmpfile.name].is_stdin) - - self.cleanup(tmpfile) - - def test_load_data_from_string(self): - input_str = six.u('column1,column2,column3\n') + six.u('\n').join([six.u('value1,2.5,value3')] * 1000) - - q = QTextAsData() - - q.load_data_from_string('my_data',input_str,QInputParams(skip_header=True,delimiter=',')) - - q_output = q.execute('select column2,column3 from my_data') - - self.assertTrue(q_output.status == 'ok') - self.assertTrue(q_output.error is None) - self.assertEqual(len(q_output.warnings),0) - self.assertTrue(len(q_output.data),1000) - self.assertEqual(len(set(q_output.data)),1) - self.assertEqual(list(set(q_output.data))[0],(2.5,'value3')) - - metadata = q_output.metadata - - self.assertTrue(metadata.output_column_name_list,['column2','column3']) - self.assertEqual(len(metadata.data_loads),0) - self.assertTrue(len(metadata.table_structures),1) - - table_structure = metadata.table_structures[0] - - self.assertEqual(table_structure.column_names,['column1','column2','column3']) - self.assertEqual(table_structure.column_types,['text','float','text']) - self.assertEqual(table_structure.filenames_str,'my_data') - self.assertTrue(len(table_structure.materialized_files.keys()),1) - self.assertTrue(table_structure.materialized_files['my_data'].filename,'my_data') - self.assertTrue(table_structure.materialized_files['my_data'].is_stdin) - - -class BenchmarkAttemptResults(object): - def __init__(self, attempt, lines, columns, duration,return_code): - self.attempt = attempt - self.lines = lines - self.columns = columns - self.duration = duration - self.return_code = return_code - - def __str__(self): - return "{}".format(self.__dict__) - __repr__ = __str__ - -class BenchmarkResults(object): - def __init__(self, lines, columns, attempt_results, mean, stddev): - self.lines = lines - self.columns = columns - self.attempt_results = attempt_results - self.mean = mean - self.stddev = stddev - - def __str__(self): - return "{}".format(self.__dict__) - __repr__ = __str__ - -class BenchmarkTests(AbstractQTestCase): - - BENCHMARK_DIR = './_benchmark_data' - - def _ensure_benchmark_data_dir_exists(self): - try: - os.mkdir(BenchmarkTests.BENCHMARK_DIR) - except Exception as e: - pass - - def _create_benchmark_file_if_needed(self): - self._ensure_benchmark_data_dir_exists() - - if os.path.exists('{}/benchmark-file.csv'.format(BenchmarkTests.BENCHMARK_DIR)): - return - - g = GzipFile('unit-file.csv.gz') - d = g.read().decode('utf-8') - f = open('{}/benchmark-file.csv'.format(BenchmarkTests.BENCHMARK_DIR), 'w') - for i in range(100): - f.write(d) - f.close() - - def _prepare_test_file(self, lines, columns): - - filename = '{}/_benchmark_data__lines_{}_columns_{}.csv'.format(BenchmarkTests.BENCHMARK_DIR,lines, columns) - - if os.path.exists(filename): - return filename - - c = ['c{}'.format(x + 1) for x in range(columns)] - - # write a header line - ff = open(filename,'w') - ff.write(",".join(c)) - ff.write('\n') - ff.close() - - r, o, e = run_command('head -{} {}/benchmark-file.csv | ' + Q_EXECUTABLE + ' -d , "select {} from -" >> {}'.format(lines, BenchmarkTests.BENCHMARK_DIR, ','.join(c), filename)) - self.assertEqual(r, 0) - return filename - - def _decide_result(self,attempt_results): - - failed = list(filter(lambda a: a.return_code != 0,attempt_results)) - - if len(failed) == 0: - mean = sum([x.duration for x in attempt_results]) / len(attempt_results) - sum_squared = sum([(x.duration - mean)**2 for x in attempt_results]) - ddof = 0 - pvar = sum_squared / (len(attempt_results) - ddof) - stddev = pvar ** 0.5 - else: - mean = None - stddev = None - - return BenchmarkResults( - attempt_results[0].lines, - attempt_results[0].columns, - attempt_results, - mean, - stddev - ) - - def _perform_test_performance_matrix(self,name,generate_cmd_function): - results = [] - - benchmark_results_folder = os.environ.get("Q_BENCHMARK_RESULTS_FOLDER",'') - if benchmark_results_folder == "": - raise Exception("Q_BENCHMARK_RESULTS_FOLDER must be provided as an environment variable") - - self._create_benchmark_file_if_needed() - for columns in [1, 5, 10, 20, 50, 100]: - for lines in [1, 10, 100, 1000, 10000, 100000, 1000000]: - attempt_results = [] - for attempt in range(10): - filename = self._prepare_test_file(lines, columns) - if DEBUG: - print("Testing {}".format(filename)) - t0 = time.time() - r, o, e = run_command(generate_cmd_function(filename,lines,columns)) - duration = time.time() - t0 - attempt_result = BenchmarkAttemptResults(attempt, lines, columns, duration, r) - attempt_results += [attempt_result] - if DEBUG: - print("Results: {}".format(attempt_result.__dict__)) - final_result = self._decide_result(attempt_results) - results += [final_result] - - series_fields = [six.u('lines'),six.u('columns')] - value_fields = [six.u('mean'),six.u('stddev')] - - all_fields = series_fields + value_fields - - output_filename = '{}/{}.benchmark-results'.format(benchmark_results_folder,name) - output_file = open(output_filename,'w') - for columns,g in itertools.groupby(sorted(results,key=lambda x:x.columns),key=lambda x:x.columns): - x = six.u("\t").join(series_fields + [six.u('{}_{}').format(name, f) for f in value_fields]) - print(x,file = output_file) - for result in g: - print(six.u("\t").join(map(str,[getattr(result,f) for f in all_fields])),file=output_file) - output_file.close() - - print("results have been written to : {}".format(output_filename)) - if DEBUG: - print("RESULTS FOR {}".format(name)) - print(open(output_filename,'r').read()) - - def test_q_matrix(self): - venv = os.path.basename(os.environ.get('VIRTUAL_ENV') or 'unknown-virtual-env') - - def generate_q_cmd(data_filename,line_count,column_count): - if column_count == 1: - additional_params = '-c 1' - else: - additional_params = '' - return '{} -d , {} "select count(*) from {}"'.format(Q_EXECUTABLE,additional_params, data_filename) - self._perform_test_performance_matrix(venv,generate_q_cmd) - - def _get_textql_version(self): - r,o,e = run_command("textql --version") - if r != 0: - raise Exception("Could not find textql") - if len(e) != 0: - raise Exception("Errors while getting textql version") - return o[0] - - def _get_octosql_version(self): - r,o,e = run_command("octosql --version") - if r != 0: - raise Exception("Could not find octosql") - if len(e) != 0: - raise Exception("Errors while getting octosql version") - import re - version = re.findall('v[0-9]+\.[0-9]+\.[0-9]+',o[0])[0] - return version - - def test_textql_matrix(self): - def generate_textql_cmd(data_filename,line_count,column_count): - return 'textql -dlm , -sql "select count(*)" {}'.format(data_filename) - - name = 'textql_%s' % self._get_textql_version() - self._perform_test_performance_matrix(name,generate_textql_cmd) - - def test_octosql_matrix(self): - config_fn = self.random_tmp_filename('octosql', 'config') - def generate_octosql_cmd(data_filename,line_count,column_count): - j = """ -dataSources: - - name: bmdata - type: csv - config: - path: "{}" - headerRow: false - batchSize: 10000 -""".format(data_filename)[1:] - f = open(config_fn,'w') - f.write(j) - f.close() - return 'octosql -c {} -o batch-csv "select count(*) from bmdata a"'.format(config_fn) - - name = 'octosql_%s' % self._get_octosql_version() - self._perform_test_performance_matrix(name,generate_octosql_cmd) - -def suite(): - tl = unittest.TestLoader() - basic_stuff = tl.loadTestsFromTestCase(BasicTests) - parsing_mode = tl.loadTestsFromTestCase(ParsingModeTests) - sql = tl.loadTestsFromTestCase(SqlTests) - formatting = tl.loadTestsFromTestCase(FormattingTests) - basic_module_stuff = tl.loadTestsFromTestCase(BasicModuleTests) - save_db_to_disk_tests = tl.loadTestsFromTestCase(SaveDbToDiskTests) - user_functions_tests = tl.loadTestsFromTestCase(UserFunctionTests) - multi_header_tests = tl.loadTestsFromTestCase(MultiHeaderTests) - return unittest.TestSuite([basic_module_stuff, basic_stuff, parsing_mode, sql, formatting,save_db_to_disk_tests,multi_header_tests,user_functions_tests]) - -if __name__ == '__main__': - if len(sys.argv) > 1: - suite = unittest.TestSuite() - if '.' in sys.argv[1]: - c,m = sys.argv[1].split(".") - suite.addTest(globals()[c](m)) - else: - tl = unittest.TestLoader() - tc = tl.loadTestsFromTestCase(globals()[sys.argv[1]]) - suite = unittest.TestSuite([tc]) - else: - suite = suite() - - test_runner = unittest.TextTestRunner(verbosity=2) - result = test_runner.run(suite) - sys.exit(not result.wasSuccessful()) diff --git a/test/test_suite.py b/test/test_suite.py new file mode 100755 index 00000000..4f6aea39 --- /dev/null +++ b/test/test_suite.py @@ -0,0 +1,5704 @@ +#!/usr/bin/env python3 + +# +# test suite for q. +# +# Prefer end-to-end tests, running the actual q command and testing stdout/stderr, and the return code. +# Some utilities are provided for making that easy, see other tests for examples. +# +# Don't forget to use the Q_EXECUTABLE instead of hardcoding the q command line. This will be used in the near future +# in order to test the resulting binary executables as well, instead of just executing the q python source code. +# + +from __future__ import print_function + +import collections +import functools +import tempfile +import unittest +import random +import json +import uuid +from collections import OrderedDict +from json import JSONEncoder +from subprocess import PIPE, Popen, STDOUT +import sys +import os +import time +from tempfile import NamedTemporaryFile +import locale +import pprint +import six +from six.moves import range +import codecs +import itertools +from gzip import GzipFile +import pytest +import uuid +import sqlite3 +import re +import collections + +sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin')) +from bin.q import QTextAsData, QOutput, QOutputPrinter, QInputParams, DataStream, Sqlite3DB + +# q uses this encoding as the default output encoding. Some of the tests use it in order to +# make sure that the output is correctly encoded +SYSTEM_ENCODING = locale.getpreferredencoding() + +EXAMPLES = os.path.abspath(os.path.join(os.getcwd(), 'examples')) + +Q_EXECUTABLE = os.getenv('Q_EXECUTABLE', os.path.abspath('./bin/q.py')) +Q_SKIP_EXECUTABLE_VALIDATION = os.getenv('Q_SKIP_EXECUTABLE_VALIDATION','false') + +if not Q_SKIP_EXECUTABLE_VALIDATION == 'true': + Q_EXECUTABLE = os.path.abspath(Q_EXECUTABLE) + if not os.path.exists(Q_EXECUTABLE): + raise Exception("q executable must reside in {}".format(Q_EXECUTABLE)) +else: + Q_EXECUTABLE = os.getenv('Q_EXECUTABLE') + # Skip checking of executable (useful for testing that q is in the path) + pass + +DEBUG = '-v' in sys.argv +if os.environ.get('Q_DEBUG'): + DEBUG = True + +def batch(iterable, n=1): + r = [] + l = len(iterable) + for ndx in range(0, l, n): + r += [iterable[ndx:min(ndx + n, l)]] + return r + +def partition(pred, iterable): + t1, t2 = itertools.tee(iterable) + return list(itertools.filterfalse(pred, t1)), list(filter(pred, t2)) + +def run_command(cmd_to_run,env_to_inject=None): + global DEBUG + if DEBUG: + print("CMD: {}".format(cmd_to_run)) + + if env_to_inject is None: + env_to_inject = os.environ + + env = env_to_inject + + p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True,env=env) + o, e = p.communicate() + # remove last newline + o = o.rstrip() + e = e.strip() + # split rows + if o != six.b(''): + o = o.split(six.b(os.linesep)) + else: + o = [] + if e != six.b(''): + e = e.split(six.b(os.linesep)) + else: + e = [] + + res = (p.returncode, o, e) + if DEBUG: + print("RESULT:{}".format(res)) + return res + + +uneven_ls_output = six.b("""drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux +drwxr-xr-x 2 root root 4096 Apr 19 2013 /mnt +drwxr-xr-x 2 root root 4096 Apr 24 2013 /srv +drwx------ 2 root root 16384 Jun 21 2013 /lost+found +lrwxrwxrwx 1 root root 33 Jun 21 2013 /initrd.img.old -> /boot/initrd.img-3.8.0-19-generic +drwxr-xr-x 2 root root 4096 Jun 21 2013 /cdrom +drwxr-xr-x 3 root root 4096 Jun 21 2013 /home +lrwxrwxrwx 1 root root 29 Jun 21 2013 /vmlinuz -> boot/vmlinuz-3.8.0-19-generic +lrwxrwxrwx 1 root root 32 Jun 21 2013 /initrd.img -> boot/initrd.img-3.8.0-19-generic +""") + + +find_output = six.b("""8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 11:00 /tmp +8299123 4 drwxrwxr-x 2 harel harel 4096 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576 +8263229 964 -rw-rw-r-- 1 mapred mapred 984569 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormcode.ser +8263230 4 -rw-rw-r-- 1 harel harel 1223 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormconf.ser +8299113 4 drwxrwxr-x 2 harel harel 4096 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate +8263406 4 -rw-rw-r-- 1 harel harel 2002 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514168746 +8263476 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514168746.version +8263607 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514169735.version +8263533 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514172733.version +8263604 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514175754.version +""") + + +header_row = six.b('name,value1,value2') +sample_data_rows = [six.b('a,1,0'), six.b('b,2,0'), six.b('c,,0')] +sample_data_rows_with_empty_string = [six.b('a,aaa,0'), six.b('b,bbb,0'), six.b('c,,0')] +sample_data_no_header = six.b("\n").join(sample_data_rows) + six.b("\n") +sample_data_with_empty_string_no_header = six.b("\n").join( + sample_data_rows_with_empty_string) + six.b("\n") +sample_data_with_header = header_row + six.b("\n") + sample_data_no_header +sample_data_with_missing_header_names = six.b("name,value1\n") + sample_data_no_header + +def generate_sample_data_with_header(header): + return header + six.b("\n") + sample_data_no_header + +sample_quoted_data = six.b('''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted +control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6" +non-quoted-value "this is a quoted value" "this is a ""double double"" quoted value" "this is an escaped \\"quoted value\\"" "this is a double double quoted ""multiline + value""." "this is an escaped \\"multiline + value\\"." +control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6" +''') + +double_double_quoted_data = six.b('''regular_double_quoted double_double_quoted +"this is a quoted value" "this is a quoted value with ""double double quotes""" +''') + +escaped_double_quoted_data = six.b('''regular_double_quoted escaped_double_quoted +"this is a quoted value" "this is a quoted value with \\"escaped double quotes\\"" +''') + +combined_quoted_data = six.b('''regular_double_quoted double_double_quoted escaped_double_quoted +"this is a quoted value" "this is a quoted value with ""double double quotes""" "this is a quoted value with \\"escaped double quotes\\"" +''') + +sample_quoted_data2 = six.b('"quoted data" 23\nunquoted-data 54') + +sample_quoted_data2_with_newline = six.b('"quoted data with\na new line inside it":23\nunquoted-data:54') + +one_column_data = six.b('''data without commas 1 +data without commas 2 +''') + +# Values with leading whitespace +sample_data_rows_with_spaces = [six.b('a,1,0'), six.b(' b, 2,0'), six.b('c,,0')] +sample_data_with_spaces_no_header = six.b("\n").join( + sample_data_rows_with_spaces) + six.b("\n") + +header_row_with_spaces = six.b('name,value 1,value2') +sample_data_with_spaces_with_header = header_row_with_spaces + \ + six.b("\n") + sample_data_with_spaces_no_header + +long_value1 = "23683289372328372328373" +int_value = "2328372328373" +sample_data_with_long_values = "%s\n%s\n%s" % (long_value1,int_value,int_value) + + +def one_column_warning(e): + return e[0].startswith(six.b('Warning: column count is one')) + +def sqlite_dict_factory(cursor, row): + d = {} + for idx, col in enumerate(cursor.description): + d[col[0]] = row[idx] + return d + +class AbstractQTestCase(unittest.TestCase): + + def create_file_with_data(self, data, encoding=None,prefix=None,suffix=None,use_real_path=True): + if encoding is not None: + raise Exception('Deprecated: Encoding must be none') + tmpfile = NamedTemporaryFile(delete=False,prefix=prefix,suffix=suffix) + tmpfile.write(data) + tmpfile.close() + if use_real_path: + tmpfile.name = os.path.realpath(tmpfile.name) + return tmpfile + + def generate_tmpfile_name(self,prefix=None,suffix=None): + tmpfile = NamedTemporaryFile(delete=False,prefix=prefix,suffix=suffix) + os.remove(tmpfile.name) + return os.path.realpath(tmpfile.name) + + def arrays_to_csv_file_content(self,delimiter,header_row_list,cell_list): + all_rows = [delimiter.join(row) for row in [header_row_list] + cell_list] + return six.b("\n").join(all_rows) + + def create_qsql_file_with_content_and_return_filename(self, header_row,cell_list): + csv_content = self.arrays_to_csv_file_content(six.b(','),header_row,cell_list) + tmpfile = self.create_file_with_data(csv_content) + + cmd = '%s -d , -H "select count(*) from %s" -C readwrite' % (Q_EXECUTABLE,tmpfile.name) + r, o, e = run_command(cmd) + self.assertEqual(r, 0) + + created_qsql_filename = '%s.qsql' % tmpfile.name + self.assertTrue(os.path.exists(created_qsql_filename)) + + return created_qsql_filename + + def arrays_to_qsql_file_content(self, header_row,cell_list): + csv_content = self.arrays_to_csv_file_content(six.b(','),header_row,cell_list) + tmpfile = self.create_file_with_data(csv_content) + + cmd = '%s -d , -H "select count(*) from %s" -C readwrite' % (Q_EXECUTABLE,tmpfile.name) + r, o, e = run_command(cmd) + self.assertEqual(r, 0) + + matching_qsql_filename = '%s.qsql' % tmpfile.name + f = open(matching_qsql_filename,'rb') + qsql_file_bytes = f.read() + f.close() + + self.assertEqual(matching_qsql_filename,'%s.qsql' % tmpfile.name) + + return qsql_file_bytes + + def write_file(self,filename,data): + f = open(filename,'wb') + f.write(data) + f.close() + + def create_folder_with_files(self,filename_to_content_dict,prefix, suffix): + name = self.random_tmp_filename(prefix,suffix) + os.makedirs(name) + for filename,content in six.iteritems(filename_to_content_dict): + if os.path.sep in filename: + os.makedirs('%s/%s' % (name,os.path.split(filename)[0])) + f = open(os.path.join(name,filename),'wb') + f.write(content) + f.close() + return name + + def cleanup_folder(self,tmpfolder): + if not tmpfolder.startswith(os.path.realpath('/var/tmp')): + raise Exception('Guard against accidental folder deletions: %s' % tmpfolder) + global DEBUG + if not DEBUG: + print("should have removed tmpfolder %s. Not doing it for the sake of safety. # TODO re-add" % tmpfolder) + pass # os.remove(tmpfolder) + + def cleanup(self, tmpfile): + global DEBUG + if not DEBUG: + os.remove(tmpfile.name) + + def random_tmp_filename(self,prefix,postfix): + # TODO Use more robust method for this + path = '/var/tmp' + return os.path.realpath('%s/%s-%s.%s' % (path,prefix,random.randint(0,1000000000),postfix)) + + + +def get_sqlite_table_list(c,exclude_qcatalog=True): + if exclude_qcatalog: + r = c.execute("select tbl_name from sqlite_master where type='table' and tbl_name != '_qcatalog'").fetchall() + else: + r = c.execute("select tbl_name from sqlite_master where type='table'").fetchall() + + return r + +class SaveToSqliteTests(AbstractQTestCase): + + # Returns a folder with files and a header in each, one column named 'a' + def generate_files_in_folder(self,batch_size, file_count): + numbers = list(range(1, 1 + batch_size * file_count)) + numbers_as_text = batch([str(x) for x in numbers], n=batch_size) + + content_list = list(map(six.b, ['a\n' + "\n".join(x) + '\n' for x in numbers_as_text])) + + filename_list = list(map(lambda x: 'file-%s' % x, range(file_count))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d, 'split-files', 'sqlite-stuff') + return (tmpfolder,filename_list) + + # 11074 3.8.2021 10:53 bin/q.py "select count(*) from xxxx/file-95 left join xxxx/file-96 left join xxxx/file-97 left join xxxx/file-97 left join xxxx/file-98 left join xxxx/*" -c 1 -C readwrite -A + # # fails because it takes qsql files as well + + def test_save_glob_files_to_sqlite(self): + BATCH_SIZE = 50 + FILE_COUNT = 5 + + tmpfolder,filename_list = self.generate_files_in_folder(BATCH_SIZE,FILE_COUNT) + + output_sqlite_file = self.random_tmp_filename("x","sqlite") + + cmd = '%s -H "select count(*) from %s/*" -c 1 -S %s' % (Q_EXECUTABLE,tmpfolder,output_sqlite_file) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + + c = sqlite3.connect(output_sqlite_file) + results = c.execute('select a from file_dash_0').fetchall() + self.assertEqual(len(results),BATCH_SIZE*FILE_COUNT) + self.assertEqual(sum(map(lambda x:x[0],results)),sum(range(1,BATCH_SIZE*FILE_COUNT+1))) + tables = get_sqlite_table_list(c) + self.assertEqual(len(tables),1) + + c.close() + + self.cleanup_folder(tmpfolder) + + def test_save_multiple_files_to_sqlite(self): + BATCH_SIZE = 50 + FILE_COUNT = 5 + + tmpfolder,filename_list = self.generate_files_in_folder(BATCH_SIZE,FILE_COUNT) + + output_sqlite_file = self.random_tmp_filename("x","sqlite") + + tables_as_str = " left join ".join(["%s/%s" % (tmpfolder,x) for x in filename_list]) + cmd = '%s -H "select count(*) from %s" -c 1 -S %s' % (Q_EXECUTABLE,tables_as_str,output_sqlite_file) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + + c = sqlite3.connect(output_sqlite_file) + + tables = get_sqlite_table_list(c) + self.assertEqual(len(tables), FILE_COUNT) + + for i in range(FILE_COUNT): + results = c.execute('select a from file_dash_%s' % i).fetchall() + self.assertEqual(len(results),BATCH_SIZE) + self.assertEqual(sum(map(lambda x:x[0],results)),sum(range(1+i*BATCH_SIZE,1+(i+1)*BATCH_SIZE))) + + c.close() + + self.cleanup_folder(tmpfolder) + + def test_save_multiple_files_to_sqlite_without_duplicates(self): + BATCH_SIZE = 50 + FILE_COUNT = 5 + + tmpfolder,filename_list = self.generate_files_in_folder(BATCH_SIZE,FILE_COUNT) + + output_sqlite_file = self.random_tmp_filename("x","sqlite") + + tables_as_str = " left join ".join(["%s/%s" % (tmpfolder,x) for x in filename_list]) + + # duplicate the left-joins for all the files, so the query will contain each filename twice + tables_as_str = "%s left join %s" % (tables_as_str,tables_as_str) + + cmd = '%s -H "select count(*) from %s" -c 1 -S %s' % (Q_EXECUTABLE,tables_as_str,output_sqlite_file) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + + c = sqlite3.connect(output_sqlite_file) + + tables = get_sqlite_table_list(c) + # total table count should still be FILE_COUNT, even with the duplications + self.assertEqual(len(tables), FILE_COUNT) + + for i in range(FILE_COUNT): + results = c.execute('select a from file_dash_%s' % i).fetchall() + self.assertEqual(len(results),BATCH_SIZE) + self.assertEqual(sum(map(lambda x:x[0],results)),sum(range(1+i*BATCH_SIZE,1+(i+1)*BATCH_SIZE))) + + c.close() + + self.cleanup_folder(tmpfolder) + + def test_sqlite_file_is_not_created_if_some_table_does_not_exist(self): + BATCH_SIZE = 50 + FILE_COUNT = 5 + + tmpfolder,filename_list = self.generate_files_in_folder(BATCH_SIZE,FILE_COUNT) + + output_sqlite_file = self.random_tmp_filename("x","sqlite") + + tables_as_str = " left join ".join(["%s/%s" % (tmpfolder,x) for x in filename_list]) + + tables_as_str = tables_as_str + ' left join %s/non_existent_table' % (tmpfolder) + + cmd = '%s -H "select count(*) from %s" -c 1 -S %s' % (Q_EXECUTABLE,tables_as_str,output_sqlite_file) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 30) + self.assertEqual(len(e), 2) + self.assertEqual(e[0],six.b("Going to save data into a disk database: %s" % output_sqlite_file)) + self.assertEqual(e[1],six.b("No files matching '%s/non_existent_table' have been found" % tmpfolder)) + + self.assertTrue(not os.path.exists(output_sqlite_file)) + + self.cleanup_folder(tmpfolder) + + def test_recurring_glob_and_separate_files_in_same_query_when_writing_to_sqlite(self): + BATCH_SIZE = 50 + FILE_COUNT = 5 + + tmpfolder,filename_list = self.generate_files_in_folder(BATCH_SIZE,FILE_COUNT) + + output_sqlite_file = self.random_tmp_filename("x","sqlite") + + tables_as_str = " left join ".join(["%s/%s" % (tmpfolder,x) for x in filename_list]) + # The same files are left-joined in the query as an additional "left join /*". This should create an additional table + # in the sqlite file, with all the data in it + cmd = '%s -H "select count(*) from %s left join %s/*" -c 1 -S %s' % (Q_EXECUTABLE,tables_as_str,tmpfolder,output_sqlite_file) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + + c = sqlite3.connect(output_sqlite_file) + + tables = get_sqlite_table_list(c) + # plus the additional table from the glob + self.assertEqual(len(tables), FILE_COUNT+1) + + # check all the per-file tables + for i in range(FILE_COUNT): + results = c.execute('select a from file_dash_%s' % i).fetchall() + self.assertEqual(len(results),BATCH_SIZE) + self.assertEqual(sum(map(lambda x:x[0],results)),sum(range(1+i*BATCH_SIZE,1+(i+1)*BATCH_SIZE))) + + # ensure the glob-based table exists, with an _2 added to the name, as the original "file_dash_0" already exists in the sqlite db + results = c.execute('select a from file_dash_0_2').fetchall() + self.assertEqual(len(results),FILE_COUNT*BATCH_SIZE) + self.assertEqual(sum(map(lambda x:x[0],results)),sum(range(1,1+FILE_COUNT*BATCH_SIZE))) + c.close() + + self.cleanup_folder(tmpfolder) + + def test_empty_sqlite_handling(self): + fn = self.generate_tmpfile_name("empty",".sqlite") + + c = sqlite3.connect(fn) + c.execute('create table x (a int)').fetchall() + c.execute('drop table x').fetchall() + c.close() + + cmd = '%s "select * from %s"' % (Q_EXECUTABLE,fn) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,88) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + self.assertEqual(e[0],six.b('sqlite file %s has no tables' % fn)) + + def test_storing_to_disk_too_many_qsql_files(self): + BATCH_SIZE = 10 + MAX_ATTACHED_DBS = 5 + FILE_COUNT = MAX_ATTACHED_DBS + 4 + + numbers_as_text = batch([str(x) for x in range(1, 1 + BATCH_SIZE * FILE_COUNT)], n=BATCH_SIZE) + + content_list = map(six.b, ["\n".join(x) for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x, range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d, 'split-files', 'attach-limit') + + for fn in filename_list: + cmd = '%s -c 1 "select count(*) from %s/%s" -C readwrite' % (Q_EXECUTABLE,tmpfolder, fn) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + + output_sqlite_file = self.generate_tmpfile_name("many-sqlites",".sqlite") + + table_refs = list(['select * from %s/%s.qsql' % (tmpfolder,x) for x in filename_list]) + table_refs_str = " UNION ALL ".join(table_refs) + # Limit max attached dbs according to the parameter (must be below the hardcoded sqlite limit, which is 10 when having a standard version compiled) + cmd = '%s "select * from (%s)" -S %s --max-attached-sqlite-databases=%s' % (Q_EXECUTABLE,table_refs_str,output_sqlite_file,MAX_ATTACHED_DBS) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode,0) + self.assertEqual(len(o),0) + self.assertEqual(len(e),4) + + c = sqlite3.connect(output_sqlite_file) + tables_results = c.execute("select tbl_name from sqlite_master where type='table'").fetchall() + table_names = list(sorted([x[0] for x in tables_results])) + self.assertEqual(len(table_names),FILE_COUNT) + + for i,tn in enumerate(table_names): + self.assertEqual(tn,'file_dash_%s' % i) + + table_content = c.execute('select * from %s' % tn).fetchall() + self.assertEqual(len(table_content),BATCH_SIZE) + + cmd = '%s "select * from %s:::%s"' % (Q_EXECUTABLE,output_sqlite_file,tn) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),BATCH_SIZE) + self.assertEqual(o,list([six.b(str(x)) for x in range(1 + i*BATCH_SIZE,1+(i+1)*BATCH_SIZE)])) + + self.cleanup_folder(tmpfolder) + + def test_storing_to_disk_too_many_sqlite_files(self): + # a variation of test_storing_to_disk_too_many_qsql_files, which deletes the qcatalog file from the caches, + # so they'll be just regular sqlite files + + BATCH_SIZE = 10 + MAX_ATTACHED_DBS = 5 + FILE_COUNT = MAX_ATTACHED_DBS + 4 + + numbers_as_text = batch([str(x) for x in range(1, 1 + BATCH_SIZE * FILE_COUNT)], n=BATCH_SIZE) + + content_list = map(six.b, ["\n".join(x) for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x, range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d, 'split-files', 'attach-limit') + + for fn in filename_list: + cmd = '%s -c 1 "select count(*) from %s/%s" -C readwrite' % (Q_EXECUTABLE,tmpfolder, fn) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + + c = sqlite3.connect('%s/%s.qsql' % (tmpfolder,fn)) + c.execute('drop table _qcatalog').fetchall() + c.close() + os.rename('%s/%s.qsql' % (tmpfolder,fn),'%s/%s.sqlite' % (tmpfolder,fn)) + + output_sqlite_file = self.generate_tmpfile_name("many-sqlites",".sqlite") + + table_refs = list(['select * from %s/%s.sqlite' % (tmpfolder,x) for x in filename_list]) + table_refs_str = " UNION ALL ".join(table_refs) + # Limit max attached dbs according to the parameter (must be below the hardcoded sqlite limit, which is 10 when having a standard version compiled) + cmd = '%s "select * from (%s)" -S %s --max-attached-sqlite-databases=%s' % (Q_EXECUTABLE,table_refs_str,output_sqlite_file,MAX_ATTACHED_DBS) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode,0) + self.assertEqual(len(o),0) + self.assertEqual(len(e),4) + + c = sqlite3.connect(output_sqlite_file) + tables_results = c.execute("select tbl_name from sqlite_master where type='table'").fetchall() + table_names = list(sorted([x[0] for x in tables_results])) + self.assertEqual(len(table_names),FILE_COUNT) + + for i,tn in enumerate(table_names): + self.assertEqual(tn,'file_dash_%s' % i) + + table_content = c.execute('select * from %s' % tn).fetchall() + self.assertEqual(len(table_content),BATCH_SIZE) + + cmd = '%s "select * from %s:::%s"' % (Q_EXECUTABLE,output_sqlite_file,tn) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),BATCH_SIZE) + self.assertEqual(o,list([six.b(str(x)) for x in range(1 + i*BATCH_SIZE,1+(i+1)*BATCH_SIZE)])) + + self.cleanup_folder(tmpfolder) + + def test_storing_to_disk_too_many_sqlite_files__over_the_sqlite_limit(self): + # a variation of test_storing_to_disk_too_many_sqlite_files, but with a limit above the sqlite hardcoded limit + MAX_ATTACHED_DBS = 20 # standard sqlite limit is 10, so q should throw an error + + BATCH_SIZE = 10 + FILE_COUNT = MAX_ATTACHED_DBS + 4 + + numbers_as_text = batch([str(x) for x in range(1, 1 + BATCH_SIZE * FILE_COUNT)], n=BATCH_SIZE) + + content_list = map(six.b, ["\n".join(x) for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x, range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d, 'split-files', 'attach-limit') + + for fn in filename_list: + cmd = '%s -c 1 "select count(*) from %s/%s" -C readwrite' % (Q_EXECUTABLE,tmpfolder, fn) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + + c = sqlite3.connect('%s/%s.qsql' % (tmpfolder,fn)) + c.execute('drop table _qcatalog').fetchall() + c.close() + os.rename('%s/%s.qsql' % (tmpfolder,fn),'%s/%s.sqlite' % (tmpfolder,fn)) + + output_sqlite_file = self.generate_tmpfile_name("many-sqlites",".sqlite") + + table_refs = list(['select * from %s/%s.sqlite' % (tmpfolder,x) for x in filename_list]) + table_refs_str = " UNION ALL ".join(table_refs) + # Limit max attached dbs according to the parameter (must be below the hardcoded sqlite limit, which is 10 when having a standard version compiled) + cmd = '%s "select * from (%s)" -S %s --max-attached-sqlite-databases=%s' % (Q_EXECUTABLE,table_refs_str,output_sqlite_file,MAX_ATTACHED_DBS) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode,89) + self.assertEqual(len(o),0) + self.assertEqual(len(e),2) + self.assertTrue(e[0].startswith(six.b('Going to save data into'))) + self.assertTrue(e[1].startswith(six.b('There are too many attached databases. Use a proper --max-attached-sqlite-databases parameter which is below the maximum'))) + + self.cleanup_folder(tmpfolder) + + def test_qtable_name_normalization__starting_with_a_digit(self): + numbers = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 101)] + + header = [six.b('aa'), six.b('bb'), six.b('cc')] + + base_filename_with_digits = '010' + + new_tmp_folder = self.create_folder_with_files({ + base_filename_with_digits : self.arrays_to_csv_file_content(six.b(','),header,numbers) + },prefix='xx',suffix='digits') + + effective_filename = '%s/010' % new_tmp_folder + + output_sqlite_filename = self.generate_tmpfile_name("starting-with-digit",".sqlite") + cmd = '%s -d , -H "select count(aa),count(bb),count(cc) from %s" -S %s' % (Q_EXECUTABLE,effective_filename,output_sqlite_filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),0) + self.assertEqual(len(e),4) + + c = sqlite3.connect(output_sqlite_filename) + results = c.execute('select aa,bb,cc from t_%s' % base_filename_with_digits).fetchall() + self.assertEqual(results,list([(x,x,x) for x in range(1,101)])) + c.close() + + self.cleanup_folder(new_tmp_folder) + + def test_qtable_name_normalization(self): + x = [six.b(a) for a in map(str, range(1, 101))] + large_file_data = six.b("val\n") + six.b("\n").join(x) + tmpfile = self.create_file_with_data(large_file_data) + + tmpfile_folder = os.path.dirname(tmpfile.name) + tmpfile_basename = os.path.basename(tmpfile.name) + + cmd = 'cd %s && %s -c 1 -H -D , -O "select a.val,b.val from %s a cross join ./%s b on (a.val = b.val * 2)"' % (tmpfile_folder,Q_EXECUTABLE,tmpfile_basename,tmpfile_basename) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 51) + + evens = list(filter(lambda x: x%2 == 0,range(1,101))) + expected_result_rows = [six.b('val,val')] + [six.b('%d,%d' % (x,x / 2)) for x in evens] + self.assertEqual(o,expected_result_rows) + + def test_qtable_name_normalization2(self): + cmd = '%s "select * from"' % Q_EXECUTABLE + + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 118) + self.assertEqual(len(e), 1) + self.assertEqual(e[0],six.b('FROM/JOIN is missing a table name after it')) + + def test_qtable_name_normalization3(self): + # with a space after the from + cmd = '%s "select * from "' % Q_EXECUTABLE + + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 118) + self.assertEqual(len(e), 1) + self.assertEqual(e[0],six.b('FROM/JOIN is missing a table name after it')) + + def test_save_multiple_files_to_sqlite_while_caching_them(self): + BATCH_SIZE = 50 + FILE_COUNT = 5 + + tmpfolder,filename_list = self.generate_files_in_folder(BATCH_SIZE,FILE_COUNT) + + output_sqlite_file = self.random_tmp_filename("x","sqlite") + + tables_as_str = " left join ".join(["%s/%s" % (tmpfolder,x) for x in filename_list]) + cmd = '%s -H "select count(*) from %s" -c 1 -S %s -C readwrite' % (Q_EXECUTABLE,tables_as_str,output_sqlite_file) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + + c = sqlite3.connect(output_sqlite_file) + + tables = get_sqlite_table_list(c) + self.assertEqual(len(tables), FILE_COUNT) + + for i,filename in enumerate(filename_list): + matching_table_name = 'file_dash_%s' % i + + results = c.execute('select a from %s' % matching_table_name).fetchall() + self.assertEqual(len(results),BATCH_SIZE) + self.assertEqual(sum(map(lambda x:x[0],results)),sum(range(1+i*BATCH_SIZE,1+(i+1)*BATCH_SIZE))) + + # check actual resulting qsql file for the file + cmd = '%s -c 1 -H "select a from %s/%s"' % (Q_EXECUTABLE,tmpfolder,filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), BATCH_SIZE) + self.assertEqual(sum(map(int,o)),sum(range(1+i*BATCH_SIZE,1+(i+1)*BATCH_SIZE))) + self.assertEqual(len(e), 0) + + # check analysis returns proper file-with-unused-qsql for each file, since by default `-C none` which means don't read the cache + # even if it exists + cmd = '%s -c 1 -H "select a from %s/%s" -A' % (Q_EXECUTABLE,tmpfolder,filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 5) + self.assertEqual(o,[ + six.b('Table: %s/file-%s' % (tmpfolder,i)), + six.b(' Sources:'), + six.b(' source_type: file-with-unused-qsql source: %s/file-%s' % (tmpfolder,i)), + six.b(' Fields:'), + six.b(' `a` - int') + ]) + + cmd = '%s -c 1 -H "select a from %s/%s" -A -C read' % (Q_EXECUTABLE,tmpfolder,filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 5) + self.assertEqual(o,[ + six.b('Table: %s/file-%s' % (tmpfolder,i)), + six.b(' Sources:'), + six.b(' source_type: qsql-file-with-original source: %s/file-%s.qsql' % (tmpfolder,i)), + six.b(' Fields:'), + six.b(' `a` - int') + ]) + + # check qsql file is readable directly through q + cmd = '%s -c 1 -H "select a from %s/%s.qsql"' % (Q_EXECUTABLE,tmpfolder,filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), BATCH_SIZE) + self.assertEqual(sum(map(int,o)),sum(range(1+i*BATCH_SIZE,1+(i+1)*BATCH_SIZE))) + self.assertEqual(len(e), 0) + + # check analysis returns proper qsql-with-original for each file when running directly against the qsql file + cmd = '%s -c 1 -H "select a from %s/%s.qsql" -A' % (Q_EXECUTABLE,tmpfolder,filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 5) + self.assertEqual(o,[ + six.b('Table: %s/file-%s.qsql' % (tmpfolder,i)), + six.b(' Sources:'), + six.b(' source_type: qsql-file source: %s/file-%s.qsql' % (tmpfolder,i)), + six.b(' Fields:'), + six.b(' `a` - int') + ]) + c.close() + + import glob + filename_list_with_qsql = list(map(lambda x: x+'.qsql',filename_list)) + + files_in_folder = glob.glob('%s/*' % tmpfolder) + regular_files,qsql_files = partition(lambda x: x.endswith('.qsql'),files_in_folder) + + self.assertEqual(len(files_in_folder),2*FILE_COUNT) + self.assertEqual(sorted(list(map(os.path.basename,regular_files))),sorted(list(map(os.path.basename,filename_list)))) + self.assertEqual(sorted(list(map(os.path.basename,qsql_files))),sorted(list(map(os.path.basename,filename_list_with_qsql)))) + + self.cleanup_folder(tmpfolder) + + def test_globs_ignore_matching_qsql_files(self): + BATCH_SIZE = 10 + FILE_COUNT = 5 + + tmpfolder,filename_list = self.generate_files_in_folder(BATCH_SIZE,FILE_COUNT) + + tables_as_str = " left join ".join(["%s/%s" % (tmpfolder,x) for x in filename_list]) + cmd = '%s -H "select count(*) from %s" -c 1 -C readwrite' % (Q_EXECUTABLE,tables_as_str) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b(str(pow(BATCH_SIZE,FILE_COUNT)))) + + cmd = '%s -H "select a from %s/*" -c 1 -C read' % (Q_EXECUTABLE,tmpfolder) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), BATCH_SIZE*FILE_COUNT) + self.assertEqual(len(e), 0) + self.assertEqual(sum(map(int,o)),sum(range(1,1+BATCH_SIZE*FILE_COUNT))) + + self.cleanup_folder(tmpfolder) + + def test_error_on_reading_from_multi_table_sqlite_without_explicit_table_name(self): + BATCH_SIZE = 50 + FILE_COUNT = 5 + + tmpfolder,filename_list = self.generate_files_in_folder(BATCH_SIZE,FILE_COUNT) + + output_sqlite_file = self.random_tmp_filename("x","sqlite") + + tables_as_str = " left join ".join(["%s/%s" % (tmpfolder,x) for x in filename_list]) + cmd = '%s -H "select count(*) from %s" -c 1 -S %s' % (Q_EXECUTABLE,tables_as_str,output_sqlite_file) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + + + cmd = '%s -H "select count(*) from %s"' % (Q_EXECUTABLE,output_sqlite_file) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 87) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(e[0],six.b("Could not autodetect table name in sqlite file %s . Existing tables: file_dash_0,file_dash_1,file_dash_2,file_dash_3,file_dash_4" % output_sqlite_file)) + + self.cleanup_folder(tmpfolder) + + def test_error_on_trying_to_specify_an_explicit_non_existent_qsql_file(self): + cmd = '%s -H "select count(*) from /non-existent-folder/non-existent.qsql:::mytable"' % (Q_EXECUTABLE) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 30) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(e[0],six.b("Could not find file /non-existent-folder/non-existent.qsql")) + + def test_error_on_providing_a_non_qsql_file_when_specifying_an_explicit_table(self): + data = six.b("\x1f\x8b\x08\x00\tZ\x0ea\x00\x03\xed\x93\xdd\n\xc20\x0cF\xf3(}\x01ij\x93\xf6y:\xd9P\x10)\xb3\xbe\xbf\x9d\x1d\xbbQ\xc6\x06F\x10rn\xbe\x9b\xd0\xfc\x1c\x9a-\x88\x83\x88\x91\xd9\xbc2\xb4\xc4#\xb5\x9c1\x8e\x1czb\x8a\xd1\x19t\xdeS\x00\xc3\xf2\xa3\x01<\xee%\x8du\x94s\x1a\xfbk\xd7\xdf\x0e\xa9\x94Kz\xaf\xabe\xc3\xb0\xf2\xce\xbc\xc7\x92\x7fB\xb6\x1fv\xfd2\xf5\x1e\x81h\xa3\xff\x10'\xff\x8c\x04\x06\xc5'\x03\xf5oO\xe2=v\xf9o\xff\x9f\xd1\xa9\xff_\x90m'\xdec\x9f\x7f\x9c\xfc\xd7T\xff\x8a\xa2(\x92<\x01WY\x0c\x06\x00\x0c\x00\x00") + tmpfilename = self.random_tmp_filename('xx','yy') + f = open(tmpfilename,'wb') + f.write(data) + f.close() + + cmd = '%s -H "select count(*) from %s:::mytable1"' % (Q_EXECUTABLE,tmpfilename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 95) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(e[0],six.b("Cannot detect the type of table %s:::mytable1" % tmpfilename)) + + def test_error_on_providing_a_non_qsql_file_when_not_specifying_an_explicit_table(self): + data = six.b("\x1f\x8b\x08\x00\tZ\x0ea\x00\x03\xed\x93\xdd\n\xc20\x0cF\xf3(}\x01ij\x93\xf6y:\xd9P\x10)\xb3\xbe\xbf\x9d\x1d\xbbQ\xc6\x06F\x10rn\xbe\x9b\xd0\xfc\x1c\x9a-\x88\x83\x88\x91\xd9\xbc2\xb4\xc4#\xb5\x9c1\x8e\x1czb\x8a\xd1\x19t\xdeS\x00\xc3\xf2\xa3\x01<\xee%\x8du\x94s\x1a\xfbk\xd7\xdf\x0e\xa9\x94Kz\xaf\xabe\xc3\xb0\xf2\xce\xbc\xc7\x92\x7fB\xb6\x1fv\xfd2\xf5\x1e\x81h\xa3\xff\x10'\xff\x8c\x04\x06\xc5'\x03\xf5oO\xe2=v\xf9o\xff\x9f\xd1\xa9\xff_\x90m'\xdec\x9f\x7f\x9c\xfc\xd7T\xff\x8a\xa2(\x92<\x01WY\x0c\x06\x00\x0c\x00\x00") + tmpfilename = self.random_tmp_filename('xx','yy') + f = open(tmpfilename,'wb') + f.write(data) + f.close() + + cmd = '%s -H "select count(*) from %s"' % (Q_EXECUTABLE,tmpfilename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 59) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertTrue(e[0].startswith(six.b("Could not parse the input. Please make sure to set the proper -w input-wrapping parameter for your input, and that you use the proper input encoding (-e). Error:"))) + +class OldSaveDbToDiskTests(AbstractQTestCase): + + def test_join_with_stdin_and_save(self): + x = [six.b(a) for a in map(str,range(1,101))] + large_file_data = six.b("val\n") + six.b("\n").join(x) + tmpfile = self.create_file_with_data(large_file_data) + tmpfile_expected_table_name = os.path.basename(tmpfile.name) + + disk_db_filename = self.random_tmp_filename('save-to-db','sqlite') + + cmd = '(echo id ; seq 1 2 10) | ' + Q_EXECUTABLE + ' -c 1 -H -O "select stdin.*,f.* from - stdin left join %s f on (stdin.id * 10 = f.val)" -S %s' % \ + (tmpfile.name,disk_db_filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + + self.assertEqual(e[0],six.b('Going to save data into a disk database: %s' % disk_db_filename)) + self.assertTrue(e[1].startswith(six.b('Data has been saved into %s . Saving has taken ' % disk_db_filename))) + self.assertEqual(e[2],six.b('Query to run on the database: select stdin.*,f.* from data_stream_stdin stdin left join %s f on (stdin.id * 10 = f.val);' % \ + tmpfile_expected_table_name)) + self.assertEqual(e[3],six.b('You can run the query directly from the command line using the following command: echo "select stdin.*,f.* from data_stream_stdin stdin left join %s f on (stdin.id * 10 = f.val)" | sqlite3 %s' % + (tmpfile_expected_table_name,disk_db_filename))) + + P = re.compile(six.b("^Query to run on the database: (?P.*)$")) + m = P.search(e[2]) + query_to_run_on_db = m.groupdict()['query_to_run_on_db'] + + self.assertTrue(os.path.exists(disk_db_filename)) + + # validate disk db content natively + c = sqlite3.connect(disk_db_filename) + c.row_factory = sqlite_dict_factory + t0_results = c.execute('select * from data_stream_stdin').fetchall() + self.assertEqual(len(t0_results),5) + self.assertEqual(sorted(list(t0_results[0].keys())), ['id']) + self.assertEqual(list(map(lambda x:x['id'],t0_results)),[1,3,5,7,9]) + t1_results = c.execute('select * from %s' % tmpfile_expected_table_name).fetchall() + self.assertEqual(len(t1_results),100) + self.assertEqual(sorted(list(t1_results[0].keys())), ['val']) + self.assertEqual("\n".join(list(map(lambda x:str(x['val']),t1_results))),"\n".join(map(str,range(1,101)))) + + query_results = c.execute(query_to_run_on_db.decode('utf-8')).fetchall() + + self.assertEqual(query_results[0],{ 'id': 1 , 'val': 10}) + self.assertEqual(query_results[1],{ 'id': 3 , 'val': 30}) + self.assertEqual(query_results[2],{ 'id': 5 , 'val': 50}) + self.assertEqual(query_results[3],{ 'id': 7 , 'val': 70}) + self.assertEqual(query_results[4],{ 'id': 9 , 'val': 90}) + + self.cleanup(tmpfile) + + def test_join_with_qsql_file(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + + header = [six.b('aa'), six.b('bb'), six.b('cc')] + + new_tmp_folder = self.create_folder_with_files({ + 'some_csv_file': self.arrays_to_csv_file_content(six.b(','),header,numbers1), + 'some_qsql_database.qsql' : self.arrays_to_qsql_file_content(header,numbers2) + },prefix='xx',suffix='yy') + + effective_filename1 = '%s/some_csv_file' % new_tmp_folder + effective_filename2 = '%s/some_qsql_database.qsql' % new_tmp_folder + + cmd = Q_EXECUTABLE + ' -d , -H "select sum(large_file.aa),sum(small_file.aa) from %s large_file left join %s small_file on (small_file.aa == large_file.bb)"' % \ + (effective_filename1,effective_filename2) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('50005000,55')) + + # TODO RLRL Check if needed anymore + + # def test_creation_of_qsql_database(self): + # numbers = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + # header = [six.b('aa'), six.b('bb'), six.b('cc')] + # + # qsql_filename = self.create_qsql_file_with_content_and_return_filename(header,numbers) + # + # conn = sqlite3.connect(qsql_filename) + # qcatalog = conn.execute('select temp_table_name,source_type,source from _qcatalog').fetchall() + # print(qcatalog) + # + # cmd = '%s "select count(*) from %s" -A' % (Q_EXECUTABLE,qsql_filename) + # retcode, o, e = run_command(cmd) + # print(o) + + def test_join_with_qsql_file_and_save(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + + header = [six.b('aa'), six.b('bb'), six.b('cc')] + + saved_qsql_with_multiple_tables = self.generate_tmpfile_name(suffix='.qsql') + + new_tmp_folder = self.create_folder_with_files({ + 'some_csv_file': self.arrays_to_csv_file_content(six.b(','),header,numbers1), + 'some_qsql_database' : self.arrays_to_csv_file_content(six.b(','),header,numbers2) + },prefix='xx',suffix='yy') + cmd = '%s -d , -H "select count(*) from %s/some_qsql_database" -C readwrite' % (Q_EXECUTABLE,new_tmp_folder) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode,0) + os.remove('%s/some_qsql_database' % new_tmp_folder) + + effective_filename1 = '%s/some_csv_file' % new_tmp_folder + effective_filename2 = '%s/some_qsql_database.qsql' % new_tmp_folder + + cmd = Q_EXECUTABLE + ' -d , -H "select sum(large_file.aa),sum(small_file.aa) from %s large_file left join %s small_file on (small_file.aa == large_file.bb)" -S %s' % \ + (effective_filename1,effective_filename2,saved_qsql_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + + conn = sqlite3.connect(saved_qsql_with_multiple_tables) + c1 = conn.execute('select count(*) from some_csv_file').fetchall() + c2 = conn.execute('select count(*) from some_qsql_database').fetchall() + + self.assertEqual(c1[0][0],10000) + self.assertEqual(c2[0][0],10) + + + def test_saving_to_db_with_same_basename_files(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + + header = [six.b('aa'), six.b('bb'), six.b('cc')] + + qsql_with_multiple_tables = self.generate_tmpfile_name(suffix='.qsql') + + new_tmp_folder = self.create_folder_with_files({ + 'filename1': self.arrays_to_csv_file_content(six.b(','),header,numbers1), + 'otherfolder/filename1' : self.arrays_to_csv_file_content(six.b(','),header,numbers2) + },prefix='xx',suffix='yy') + + effective_filename1 = '%s/filename1' % new_tmp_folder + effective_filename2 = '%s/otherfolder/filename1' % new_tmp_folder + + expected_stored_table_name1 = 'filename1' + expected_stored_table_name2 = 'filename1_2' + + cmd = Q_EXECUTABLE + ' -d , -H "select sum(large_file.aa),sum(large_file.bb),sum(large_file.cc) from %s small_file left join %s large_file on (large_file.aa == small_file.bb)" -S %s' % \ + (effective_filename1,effective_filename2,qsql_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + self.assertEqual(e[0], six.b('Going to save data into a disk database: %s' % qsql_with_multiple_tables)) + self.assertTrue(e[1].startswith(six.b('Data has been saved into %s . Saving has taken' % qsql_with_multiple_tables))) + self.assertEqual(e[2],six.b('Query to run on the database: select sum(large_file.aa),sum(large_file.bb),sum(large_file.cc) from %s small_file left join %s large_file on (large_file.aa == small_file.bb);' % \ + (expected_stored_table_name1,expected_stored_table_name2))) + self.assertEqual(e[3],six.b('You can run the query directly from the command line using the following command: echo "select sum(large_file.aa),sum(large_file.bb),sum(large_file.cc) from %s small_file left join %s large_file on (large_file.aa == small_file.bb)" | sqlite3 %s' % \ + (expected_stored_table_name1,expected_stored_table_name2,qsql_with_multiple_tables))) + + #self.assertTrue(False) # pxpx - need to actually test reading from the saved db file + conn = sqlite3.connect(qsql_with_multiple_tables) + c1 = conn.execute('select count(*) from filename1').fetchall() + c2 = conn.execute('select count(*) from filename1_2').fetchall() + + self.assertEqual(c1[0][0],10000) + self.assertEqual(c2[0][0],10) + + + def test_error_when_not_specifying_table_name_in_multi_table_qsql(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + + header = [six.b('aa'), six.b('bb'), six.b('cc')] + + qsql_with_multiple_tables = self.generate_tmpfile_name(suffix='.qsql') + + new_tmp_folder = self.create_folder_with_files({ + 'filename1': self.arrays_to_csv_file_content(six.b(','),header,numbers1), + 'otherfolder/filename1' : self.arrays_to_csv_file_content(six.b(','),header,numbers2) + },prefix='xx',suffix='yy') + + effective_filename1 = '%s/filename1' % new_tmp_folder + effective_filename2 = '%s/otherfolder/filename1' % new_tmp_folder + + expected_stored_table_name1 = 'filename1' + expected_stored_table_name2 = 'filename1_2' + + cmd = Q_EXECUTABLE + ' -d , -H "select sum(large_file.aa),sum(large_file.bb),sum(large_file.cc) from %s small_file left join %s large_file on (large_file.aa == small_file.bb)" -S %s' % \ + (effective_filename1,effective_filename2,qsql_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + + # Actual tests + + cmd = '%s "select count(*) from %s"' % (Q_EXECUTABLE,qsql_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 87) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + self.assertEqual(e[0],six.b('Could not autodetect table name in sqlite file %s . Existing tables: %s,%s' % (qsql_with_multiple_tables,expected_stored_table_name1,expected_stored_table_name2))) + + def test_error_when_not_specifying_table_name_in_multi_table_sqlite(self): + sqlite_with_multiple_tables = self.generate_tmpfile_name(suffix='.sqlite') + + c = sqlite3.connect(sqlite_with_multiple_tables) + c.execute('create table my_table_1 (x int, y int)').fetchall() + c.execute('create table my_table_2 (x int, y int)').fetchall() + c.close() + + cmd = '%s "select count(*) from %s"' % (Q_EXECUTABLE,sqlite_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 87) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + print(e[0]) + self.assertEqual(e[0],six.b('Could not autodetect table name in sqlite file %s . Existing tables: my_table_1,my_table_2' % sqlite_with_multiple_tables)) + + def test_querying_from_multi_table_sqlite_using_explicit_table_name(self): + sqlite_with_multiple_tables = self.generate_tmpfile_name(suffix='.sqlite') + + c = sqlite3.connect(sqlite_with_multiple_tables) + c.execute('create table my_table_1 (x int, y int)').fetchall() + c.execute('insert into my_table_1 (x,y) values (100,200),(300,400)').fetchall() + c.execute('commit').fetchall() + c.execute('create table my_table_2 (x int, y int)').fetchall() + c.close() + + cmd = '%s -d , "select * from %s:::my_table_1"' % (Q_EXECUTABLE,sqlite_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('100,200')) + self.assertEqual(o[1],six.b('300,400')) + + # Check again, this time with a different output delimiter and with explicit column names + cmd = '%s -t "select x,y from %s:::my_table_1"' % (Q_EXECUTABLE,sqlite_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('100\t200')) + self.assertEqual(o[1],six.b('300\t400')) + + + def test_error_when_specifying_nonexistent_table_name_in_multi_table_qsql(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + + header = [six.b('aa'), six.b('bb'), six.b('cc')] + + qsql_with_multiple_tables = self.generate_tmpfile_name(suffix='.qsql') + + new_tmp_folder = self.create_folder_with_files({ + 'filename1': self.arrays_to_csv_file_content(six.b(','),header,numbers1), + 'otherfolder/filename1' : self.arrays_to_csv_file_content(six.b(','),header,numbers2) + },prefix='xx',suffix='yy') + + effective_filename1 = '%s/filename1' % new_tmp_folder + effective_filename2 = '%s/otherfolder/filename1' % new_tmp_folder + + expected_stored_table_name1 = 'filename1' + expected_stored_table_name2 = 'filename1_2' + + cmd = Q_EXECUTABLE + ' -d , -H "select sum(large_file.aa),sum(large_file.bb),sum(large_file.cc) from %s small_file left join %s large_file on (large_file.aa == small_file.bb)" -S %s' % \ + (effective_filename1,effective_filename2,qsql_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + + # Actual tests + + cmd = '%s "select count(*) from %s:::non_existent_table"' % (Q_EXECUTABLE,qsql_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 85) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + self.assertEqual(e[0],six.b('Table non_existent_table could not be found in sqlite file %s . Existing table names: %s,%s' % \ + (qsql_with_multiple_tables,expected_stored_table_name1,expected_stored_table_name2))) + + def test_querying_multi_table_qsql_file(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + + header = [six.b('aa'), six.b('bb'), six.b('cc')] + + qsql_with_multiple_tables = self.generate_tmpfile_name(suffix='.qsql') + + new_tmp_folder = self.create_folder_with_files({ + 'filename1': self.arrays_to_csv_file_content(six.b(','),header,numbers1), + 'otherfolder/filename1' : self.arrays_to_csv_file_content(six.b(','),header,numbers2) + },prefix='xx',suffix='yy') + + effective_filename1 = '%s/filename1' % new_tmp_folder + effective_filename2 = '%s/otherfolder/filename1' % new_tmp_folder + + expected_stored_table_name1 = 'filename1' + expected_stored_table_name2 = 'filename1_2' + + cmd = Q_EXECUTABLE + ' -d , -H "select sum(large_file.aa),sum(large_file.bb),sum(large_file.cc) from %s small_file left join %s large_file on (large_file.aa == small_file.bb)" -S %s' % \ + (effective_filename1,effective_filename2,qsql_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + + # Actual tests + + cmd = '%s "select count(*) from %s:::%s"' % (Q_EXECUTABLE,qsql_with_multiple_tables,expected_stored_table_name1) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('10000')) + + cmd = '%s "select count(*) from %s:::%s"' % (Q_EXECUTABLE,qsql_with_multiple_tables,expected_stored_table_name2) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('10')) + + def test_preventing_db_overwrite(self): + db_filename = self.random_tmp_filename('store-to-disk', 'db') + self.assertFalse(os.path.exists(db_filename)) + + retcode, o, e = run_command('seq 1 1000 | ' + Q_EXECUTABLE + ' "select count(*) from -" -c 1 -S %s' % db_filename) + + self.assertTrue(retcode == 0) + self.assertTrue(os.path.exists(db_filename)) + + retcode2, o2, e2 = run_command('seq 1 1000 | ' + Q_EXECUTABLE + ' "select count(*) from -" -c 1 -S %s' % db_filename) + self.assertTrue(retcode2 != 0) + self.assertTrue(e2[0].startswith(six.b('Going to save data into a disk database'))) + self.assertTrue(e2[1] == six.b('Disk database file {} already exists.'.format(db_filename))) + + os.remove(db_filename) + + +class BasicTests(AbstractQTestCase): + + def test_basic_aggregation(self): + retcode, o, e = run_command( + 'seq 1 10 | ' + Q_EXECUTABLE + ' "select sum(c1),avg(c1) from -"') + self.assertTrue(retcode == 0) + self.assertTrue(len(o) == 1) + self.assertTrue(len(e) == 1) + + s = sum(range(1, 11)) + self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0))) + self.assertTrue(one_column_warning(e)) + + def test_select_one_column(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + + self.assertEqual(six.b(" ").join(o), six.b('a b c')) + + self.cleanup(tmpfile) + + def test_column_separation(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], sample_data_rows[0]) + self.assertEqual(o[1], sample_data_rows[1]) + self.assertEqual(o[2], sample_data_rows[2]) + + self.cleanup(tmpfile) + + def test_header_exception_on_numeric_header_data(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , "select * from %s" -A -H' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 3) + self.assertTrue( + six.b('Bad header row: Header must contain only strings') in e[0]) + self.assertTrue(six.b("Column name must be a string") in e[1]) + self.assertTrue(six.b("Column name must be a string") in e[2]) + + self.cleanup(tmpfile) + + def test_different_header_in_second_file(self): + folder_name = self.create_folder_with_files({ + 'file1': self.arrays_to_csv_file_content(six.b(','),[six.b('a'),six.b('b')],[[six.b(str(x)),six.b(str(x))] for x in range(1,6)]), + 'file2': self.arrays_to_csv_file_content(six.b(','),[six.b('c'),six.b('d')],[[six.b(str(x)),six.b(str(x))] for x in range(1,6)]) + },prefix="xx",suffix="aa") + + cmd = Q_EXECUTABLE + ' -d , "select * from %s/*" -H' % (folder_name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 35) + self.assertEqual(len(e),1) + self.assertEqual(e[0],six.b("Bad header row: Extra header 'c,d' in file '%s/file2' mismatches original header 'a,b' from file '%s/file1'. Table name is '%s/*'" % (folder_name,folder_name,folder_name))) + + def test_data_with_header(self): + tmpfile = self.create_file_with_data(sample_data_with_header) + cmd = Q_EXECUTABLE + ' -d , "select name from %s" -H' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(six.b(" ").join(o), six.b("a b c")) + + self.cleanup(tmpfile) + + def test_output_header_when_input_header_exists(self): + tmpfile = self.create_file_with_data(sample_data_with_header) + cmd = Q_EXECUTABLE + ' -d , "select name from %s" -H -O' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 4) + self.assertEqual(o[0],six.b('name')) + self.assertEqual(o[1],six.b('a')) + self.assertEqual(o[2],six.b('b')) + self.assertEqual(o[3],six.b('c')) + + self.cleanup(tmpfile) + + def test_generated_column_name_warning_when_header_line_exists(self): + tmpfile = self.create_file_with_data(sample_data_with_header) + cmd = Q_EXECUTABLE + ' -d , "select c3 from %s" -H' % tmpfile.name + + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 2) + self.assertTrue(six.b('no such column: c3') in e[0]) + self.assertTrue( + e[1].startswith(six.b('Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names'))) + + self.cleanup(tmpfile) + + def test_empty_data(self): + tmpfile = self.create_file_with_data(six.b('')) + cmd = Q_EXECUTABLE + ' -d , "select * from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + + self.assertTrue(six.b('Warning - data is empty') in e[0]) + + self.cleanup(tmpfile) + + def test_empty_data_with_header_param(self): + tmpfile = self.create_file_with_data(six.b('')) + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -H' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + + m = six.b("Header line is expected but missing in file %s" % tmpfile.name) + self.assertTrue(m in e[0]) + + self.cleanup(tmpfile) + + def test_one_row_of_data_without_header_param(self): + tmpfile = self.create_file_with_data(header_row) + cmd = Q_EXECUTABLE + ' -d , "select c2 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b('value1')) + + self.cleanup(tmpfile) + + def test_one_row_of_data_with_header_param(self): + tmpfile = self.create_file_with_data(header_row) + cmd = Q_EXECUTABLE + ' -d , "select name from %s" -H' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + + self.assertTrue(six.b('Warning - data is empty') in e[0]) + + self.cleanup(tmpfile) + + def test_dont_leading_keep_whitespace_in_values(self): + tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) + + self.assertEqual(o[0], six.b('a')) + self.assertEqual(o[1], six.b('b')) + self.assertEqual(o[2], six.b('c')) + + self.cleanup(tmpfile) + + def test_keep_leading_whitespace_in_values(self): + tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -k' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) + + self.assertEqual(o[0], six.b('a')) + self.assertEqual(o[1], six.b(' b')) + self.assertEqual(o[2], six.b('c')) + + self.cleanup(tmpfile) + + def test_no_impact_of_keeping_leading_whitespace_on_integers(self): + tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) + cmd = Q_EXECUTABLE + ' -d , "select c2 from %s" -k -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + f = open("/var/tmp/XXX","wb") + f.write(six.b("\n").join(o)) + f.write(six.b("STDERR:")) + f.write(six.b("\n").join(e)) + f.close() + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 7) + + + self.assertEqual(o[0], six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1], six.b(' Sources:')) + self.assertEqual(o[2], six.b(' source_type: file source: %s') % six.b(tmpfile.name)) + self.assertEqual(o[3], six.b(' Fields:')) + self.assertEqual(o[4], six.b(' `c1` - text')) + self.assertEqual(o[5], six.b(' `c2` - int')) + self.assertEqual(o[6], six.b(' `c3` - int')) + + + self.cleanup(tmpfile) + + def test_spaces_in_header_row(self): + tmpfile = self.create_file_with_data( + header_row_with_spaces + six.b("\n") + sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , "select name,\\`value 1\\` from %s" -H' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) + + self.assertEqual(o[0], six.b('a,1')) + self.assertEqual(o[1], six.b('b,2')) + self.assertEqual(o[2], six.b('c,')) + + self.cleanup(tmpfile) + + def test_no_query_in_command_line(self): + cmd = Q_EXECUTABLE + ' -d , ""' + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) + + self.assertEqual(e[0],six.b('Query cannot be empty (query number 1)')) + + def test_empty_query_in_command_line(self): + cmd = Q_EXECUTABLE + ' -d , " "' + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) + + self.assertEqual(e[0],six.b('Query cannot be empty (query number 1)')) + + def test_failure_in_query_stops_processing_queries(self): + cmd = Q_EXECUTABLE + ' -d , "select 500" "select 300" "wrong-query" "select 8000"' + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 2) + self.assertEqual(o[0],six.b('500')) + self.assertEqual(o[1],six.b('300')) + + def test_multiple_queries_in_command_line(self): + cmd = Q_EXECUTABLE + ' -d , "select 500" "select 300+100" "select 300" "select 200"' + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 4) + + self.assertEqual(o[0],six.b('500')) + self.assertEqual(o[1],six.b('400')) + self.assertEqual(o[2],six.b('300')) + self.assertEqual(o[3],six.b('200')) + + def test_literal_calculation_query(self): + cmd = Q_EXECUTABLE + ' -d , "select 1+40/6"' + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 1) + + self.assertEqual(o[0],six.b('7')) + + def test_literal_calculation_query_float_result(self): + cmd = Q_EXECUTABLE + ' -d , "select 1+40/6.0"' + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 1) + + self.assertEqual(o[0],six.b('7.666666666666667')) + + def test_use_query_file(self): + tmp_data_file = self.create_file_with_data(sample_data_with_header) + tmp_query_file = self.create_file_with_data(six.b("select name from %s" % tmp_data_file.name)) + + cmd = Q_EXECUTABLE + ' -d , -q %s -H' % tmp_query_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) + + self.assertEqual(o[0], six.b('a')) + self.assertEqual(o[1], six.b('b')) + self.assertEqual(o[2], six.b('c')) + + self.cleanup(tmp_data_file) + self.cleanup(tmp_query_file) + + def test_use_query_file_with_incorrect_query_encoding(self): + tmp_data_file = self.create_file_with_data(sample_data_with_header) + tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) + + cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q ascii' % tmp_query_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,3) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + + self.assertTrue(e[0].startswith(six.b('Could not decode query number 1 using the provided query encoding (ascii)'))) + + self.cleanup(tmp_data_file) + self.cleanup(tmp_query_file) + + def test_output_header_with_non_ascii_names(self): + OUTPUT_ENCODING = 'utf-8' + + tmp_data_file = self.create_file_with_data(sample_data_with_header) + tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' Hr\xc3\xa1\xc4\x8d from %s" % tmp_data_file.name),encoding=None) + + cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -O -E %s' % (tmp_query_file.name,OUTPUT_ENCODING) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),4) + self.assertEqual(len(e),0) + + self.assertEqual(o[0].decode(OUTPUT_ENCODING), u'name,Hr\xe1\u010d') + self.assertEqual(o[1].decode(OUTPUT_ENCODING), u'a,Hr\xe1\u010d') + self.assertEqual(o[2].decode(OUTPUT_ENCODING), u'b,Hr\xe1\u010d') + self.assertEqual(o[3].decode(OUTPUT_ENCODING), u'c,Hr\xe1\u010d') + + self.cleanup(tmp_data_file) + self.cleanup(tmp_query_file) + + def test_use_query_file_with_query_encoding(self): + OUTPUT_ENCODING = 'utf-8' + + tmp_data_file = self.create_file_with_data(sample_data_with_header) + tmp_query_file = self.create_file_with_data(six.b("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) + + cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,OUTPUT_ENCODING) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) + + self.assertEqual(o[0].decode(OUTPUT_ENCODING), u'a,Hr\xe1\u010d') + self.assertEqual(o[1].decode(OUTPUT_ENCODING), u'b,Hr\xe1\u010d') + self.assertEqual(o[2].decode(OUTPUT_ENCODING), u'c,Hr\xe1\u010d') + + self.cleanup(tmp_data_file) + self.cleanup(tmp_query_file) + + def test_use_query_file_and_command_line(self): + tmp_data_file = self.create_file_with_data(sample_data_with_header) + tmp_query_file = self.create_file_with_data(six.b("select name from %s" % tmp_data_file.name)) + + cmd = Q_EXECUTABLE + ' -d , -q %s -H "select * from ppp"' % tmp_query_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) + + self.assertTrue(e[0].startswith(six.b("Can't provide both a query file and a query on the command line"))) + + self.cleanup(tmp_data_file) + self.cleanup(tmp_query_file) + + def test_select_output_encoding(self): + tmp_data_file = self.create_file_with_data(sample_data_with_header) + tmp_query_file = self.create_file_with_data(six.b("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) + + for target_encoding in ['utf-8','ibm852']: + cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 3) + + self.assertEqual(o[0].decode(target_encoding), u'Hr\xe1\u010d') + self.assertEqual(o[1].decode(target_encoding), u'Hr\xe1\u010d') + self.assertEqual(o[2].decode(target_encoding), u'Hr\xe1\u010d') + + self.cleanup(tmp_data_file) + self.cleanup(tmp_query_file) + + def test_select_failed_output_encoding(self): + tmp_data_file = self.create_file_with_data(sample_data_with_header) + tmp_query_file = self.create_file_with_data(six.b("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name),encoding=None) + + cmd = Q_EXECUTABLE + ' -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 3) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) + + self.assertTrue(e[0].startswith(six.b('Cannot encode data'))) + + self.cleanup(tmp_data_file) + self.cleanup(tmp_query_file) + + + def test_use_query_file_with_empty_query(self): + tmp_query_file = self.create_file_with_data(six.b(" ")) + + cmd = Q_EXECUTABLE + ' -d , -q %s -H' % tmp_query_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) + + self.assertTrue(e[0].startswith(six.b("Query cannot be empty"))) + + self.cleanup(tmp_query_file) + + def test_use_non_existent_query_file(self): + cmd = Q_EXECUTABLE + ' -d , -q non-existent-query-file -H' + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 1) + self.assertEqual(len(e), 1) + self.assertEqual(len(o), 0) + + self.assertTrue(e[0].startswith(six.b("Could not read query from file"))) + + def test_nonexistent_file(self): + cmd = Q_EXECUTABLE + ' "select * from non-existent-file"' + + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode,0) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + + self.assertEqual(e[0],six.b("No files matching '%s/non-existent-file' have been found" % os.getcwd())) + + def test_default_column_max_length_parameter__short_enough(self): + huge_text = six.b("x" * 131000) + + file_data = six.b("a,b,c\n1,{},3\n".format(huge_text)) + + tmpfile = self.create_file_with_data(file_data) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b('1')) + + self.cleanup(tmpfile) + + def test_default_column_max_length_parameter__too_long(self): + huge_text = six.b("x") * 132000 + + file_data = six.b("a,b,c\n1,{},3\n".format(huge_text)) + + tmpfile = self.create_file_with_data(file_data) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 31) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + + self.assertTrue(e[0].startswith(six.b("Column length is larger than the maximum"))) + self.assertTrue(six.b("Offending file is '{}'".format(tmpfile.name)) in e[0]) + self.assertTrue(six.b('Line is 2') in e[0]) + + self.cleanup(tmpfile) + + def test_column_max_length_parameter(self): + file_data = six.b("a,b,c\nvery-long-text,2,3\n") + tmpfile = self.create_file_with_data(file_data) + + cmd = Q_EXECUTABLE + ' -H -d , -M 3 "select a from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 31) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + + self.assertTrue(e[0].startswith(six.b("Column length is larger than the maximum"))) + self.assertTrue((six.b("Offending file is '%s'" % tmpfile.name)) in e[0]) + self.assertTrue(six.b('Line is 2') in e[0]) + + cmd2 = Q_EXECUTABLE + ' -H -d , -M 300 -H "select a from %s"' % tmpfile.name + retcode2, o2, e2 = run_command(cmd2) + + self.assertEqual(retcode2, 0) + self.assertEqual(len(o2), 1) + self.assertEqual(len(e2), 0) + + self.assertEqual(o2[0],six.b('very-long-text')) + + self.cleanup(tmpfile) + + def test_invalid_column_max_length_parameter(self): + file_data = six.b("a,b,c\nvery-long-text,2,3\n") + tmpfile = self.create_file_with_data(file_data) + + cmd = Q_EXECUTABLE + ' -H -d , -M xx "select a from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 31) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + + self.assertEqual(e[0],six.b('Max column length limit must be an integer larger than 2 (xx)')) + + self.cleanup(tmpfile) + + def test_duplicate_column_name_detection(self): + file_data = six.b("a,b,a\n10,20,30\n30,40,50") + tmpfile = self.create_file_with_data(file_data) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 35) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 2) + + self.assertTrue(e[0].startswith(six.b('Bad header row:'))) + self.assertEqual(e[1],six.b("'a': Column name is duplicated")) + + self.cleanup(tmpfile) + + def test_join_with_stdin(self): + x = [six.b(a) for a in map(str,range(1,101))] + large_file_data = six.b("val\n") + six.b("\n").join(x) + tmpfile = self.create_file_with_data(large_file_data) + + cmd = '(echo id ; seq 1 2 10) | %s -c 1 -H -O "select stdin.*,f.* from - stdin left join %s f on (stdin.id * 10 = f.val)"' % (Q_EXECUTABLE,tmpfile.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 6) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b('id val')) + self.assertEqual(o[1],six.b('1 10')) + self.assertEqual(o[2],six.b('3 30')) + self.assertEqual(o[3],six.b('5 50')) + self.assertEqual(o[4],six.b('7 70')) + self.assertEqual(o[5],six.b('9 90')) + + self.cleanup(tmpfile) + + def test_concatenated_files(self): + file_data1 = six.b("a,b,c\n10,11,12\n20,21,22") + tmpfile1 = self.create_file_with_data(file_data1) + tmpfile1_folder = os.path.dirname(tmpfile1.name) + tmpfile1_filename = os.path.basename(tmpfile1.name) + expected_cache_filename1 = os.path.join(tmpfile1_folder,tmpfile1_filename + '.qsql') + + file_data2 = six.b("a,b,c\n30,31,32\n40,41,42") + tmpfile2 = self.create_file_with_data(file_data2) + tmpfile2_folder = os.path.dirname(tmpfile2.name) + tmpfile2_filename = os.path.basename(tmpfile2.name) + expected_cache_filename2 = os.path.join(tmpfile2_folder,tmpfile2_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -O -H -d , "select * from %s UNION ALL select * from %s" -C none' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 5) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('a,b,c')) + self.assertEqual(o[1],six.b('10,11,12')) + self.assertEqual(o[2],six.b('20,21,22')) + self.assertEqual(o[3],six.b('30,31,32')) + self.assertEqual(o[4],six.b('40,41,42')) + + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + def test_out_of_range_expected_column_count(self): + cmd = '%s "select count(*) from some_table" -c -1' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 90) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(e[0], six.b('Column count must be between 1 and 131072')) + + def test_out_of_range_expected_column_count__with_explicit_limit(self): + cmd = '%s "select count(*) from some_table" -c -1 -M 100' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 90) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(e[0], six.b('Column count must be between 1 and 100')) + + def test_other_out_of_range_expected_column_count__with_explicit_limit(self): + cmd = '%s "select count(*) from some_table" -c 101 -M 100' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 90) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(e[0], six.b('Column count must be between 1 and 100')) + + def test_explicit_limit_of_columns__data_is_ok(self): + file_data1 = six.b("191\n192\n") + tmpfile1 = self.create_file_with_data(file_data1) + + cmd = '%s "select count(*) from %s" -c 1 -M 3' % (Q_EXECUTABLE,tmpfile1.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0], six.b('2')) + + self.cleanup(tmpfile1) + +class ManyOpenFilesTests(AbstractQTestCase): + + + def test_multi_file_header_skipping(self): + BATCH_SIZE = 50 + FILE_COUNT = 5 + + numbers = list(range(1,1+BATCH_SIZE*FILE_COUNT)) + numbers_as_text = batch([str(x) for x in numbers],n=BATCH_SIZE) + + content_list = list(map(six.b,['a\n' + "\n".join(x)+'\n' for x in numbers_as_text])) + + filename_list = list(map(lambda x: 'file-%s' % x,range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d,'split-files','multi-header') + + cmd = '%s -d , -H -c 1 "select count(a),sum(a) from %s/*" -C none' % (Q_EXECUTABLE,tmpfolder) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b("%s,%s" % (BATCH_SIZE*FILE_COUNT,sum(numbers)))) + + self.cleanup_folder(tmpfolder) + + def test_that_globs_dont_max_out_sqlite_attached_database_limits(self): + BATCH_SIZE = 50 + FILE_COUNT = 40 + + numbers_as_text = batch([str(x) for x in range(1,1+BATCH_SIZE*FILE_COUNT)],n=BATCH_SIZE) + + content_list = map(six.b,["\n".join(x)+'\n' for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x,range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d,'split-files','attach-limit') + #expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + cmd = 'cd %s && %s -c 1 "select count(*) from *" -C none --max-attached-sqlite-databases=10' % (tmpfolder,Q_EXECUTABLE) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b(str(BATCH_SIZE*FILE_COUNT))) + + self.cleanup_folder(tmpfolder) + + def test_maxing_out_max_attached_database_limits__regular_files(self): + BATCH_SIZE = 50 + FILE_COUNT = 40 + + numbers_as_text = batch([str(x) for x in range(1,1+BATCH_SIZE*FILE_COUNT)],n=BATCH_SIZE) + + content_list = map(six.b,["\n".join(x)+'\n' for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x,range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d,'split-files','attach-limit') + #expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + unioned_subquery = " UNION ALL ".join(["select * from %s/%s" % (tmpfolder,filename) for filename in filename_list]) + cmd = 'cd %s && %s -c 1 "select count(*) from (%s)" -C none --max-attached-sqlite-databases=10' % (tmpfolder,Q_EXECUTABLE,unioned_subquery) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b(str(BATCH_SIZE*FILE_COUNT))) + + self.cleanup_folder(tmpfolder) + + def test_maxing_out_max_attached_database_limits__with_qsql_files_below_attached_limit(self): + MAX_ATTACHED_SQLITE_DATABASES = 10 + + BATCH_SIZE = 50 + FILE_COUNT = MAX_ATTACHED_SQLITE_DATABASES - 1 + + numbers_as_text = batch([str(x) for x in range(1,1+BATCH_SIZE*FILE_COUNT)],n=BATCH_SIZE) + + content_list = map(six.b,["\n".join(x)+'\n' for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x,range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d,'split-files','attach-limit') + #expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + # Execute the query with -C readwrite, so all qsql files will be created + unioned_subquery = " UNION ALL ".join(["select * from %s/%s" % (tmpfolder,filename) for filename in filename_list]) + cmd = 'cd %s && %s -c 1 "select count(*) from (%s)" -C readwrite --max-attached-sqlite-databases=%s' % (tmpfolder,Q_EXECUTABLE,unioned_subquery,MAX_ATTACHED_SQLITE_DATABASES) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b(str(BATCH_SIZE*FILE_COUNT))) + + # Now execute the same query with -C readwrite, so all files will be read directly from the qsql files + cmd = 'cd %s && %s -c 1 "select count(*) from (%s)" -C readwrite' % (tmpfolder,Q_EXECUTABLE,unioned_subquery) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b(str(BATCH_SIZE*FILE_COUNT))) + + self.cleanup_folder(tmpfolder) + + def test_maxing_out_max_attached_database_limits__with_qsql_files_above_attached_limit(self): + MAX_ATTACHED_SQLITE_DATABASES = 10 + + BATCH_SIZE = 50 + # Here's the difference from test_maxing_out_max_attached_database_limits__with_qsql_files_below_attached_limit + # We're trying to cache 2 times the number of files than the number of databases that can be attached. + # Expectation is that only a part of the files will be cached + FILE_COUNT = MAX_ATTACHED_SQLITE_DATABASES * 2 + + numbers_as_text = batch([str(x) for x in range(1,1+BATCH_SIZE*FILE_COUNT)],n=BATCH_SIZE) + + content_list = map(six.b,["\n".join(x)+'\n' for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x,range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d,'split-files','attach-limit') + #expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + # Execute the query with -C readwrite, so all qsql files will be created + unioned_subquery = " UNION ALL ".join(["select * from %s/%s" % (tmpfolder,filename) for filename in filename_list]) + cmd = 'cd %s && %s -c 1 "select count(*) from (%s)" -C readwrite --max-attached-sqlite-databases=%s' % (tmpfolder,Q_EXECUTABLE,unioned_subquery,MAX_ATTACHED_SQLITE_DATABASES) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b(str(BATCH_SIZE*FILE_COUNT))) + + # Now execute the same query with -C readwrite, so all files will be read directly from the qsql files + cmd = 'cd %s && %s -c 1 "select count(*) from (%s)" -C readwrite' % (tmpfolder,Q_EXECUTABLE,unioned_subquery) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b(str(BATCH_SIZE*FILE_COUNT))) + + from glob import glob + files_in_folder = [os.path.basename(x) for x in glob('%s/*' % (tmpfolder))] + + expected_files_in_folder = filename_list + list(map(lambda x: 'file-%s.qsql' % x,range(MAX_ATTACHED_SQLITE_DATABASES-2))) + + self.assertEqual(sorted(files_in_folder),sorted(expected_files_in_folder)) + + self.cleanup_folder(tmpfolder) + + def test_maxing_out_max_attached_database_limits__with_directly_using_qsql_files(self): + MAX_ATTACHED_SQLITE_DATABASES = 10 + + BATCH_SIZE = 50 + FILE_COUNT = MAX_ATTACHED_SQLITE_DATABASES * 2 + + numbers_as_text = batch([str(x) for x in range(1,1+BATCH_SIZE*FILE_COUNT)],n=BATCH_SIZE) + + content_list = map(six.b,["\n".join(x)+'\n' for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x,range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d,'split-files','attach-limit') + #expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + # Prepare qsql for each of the files (separately, just for simplicity) + for fn in filename_list: + cmd = 'cd %s && %s -c 1 "select count(*) from %s" -C readwrite' % (tmpfolder,Q_EXECUTABLE,fn) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + # Now execute a big query which uses the created qsql files + unioned_subquery = " UNION ALL ".join(["select * from %s/%s.qsql" % (tmpfolder,filename) for filename in filename_list]) + + cmd = 'cd %s && %s -c 1 "select count(*) from (%s)" -C readwrite' % (tmpfolder,Q_EXECUTABLE,unioned_subquery) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b(str(BATCH_SIZE*FILE_COUNT))) + + self.cleanup_folder(tmpfolder) + + def test_too_many_open_files_for_one_table(self): + # Previously file opening was parallel, causing too-many-open-files + + MAX_ALLOWED_FILES = 500 + + BATCH_SIZE = 2 + FILE_COUNT = MAX_ALLOWED_FILES + 1 + + numbers_as_text = batch([str(x) for x in range(1,1+BATCH_SIZE*FILE_COUNT)],n=BATCH_SIZE) + + content_list = map(six.b,["\n".join(x) for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x,range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d,'split-files','attach-limit') + + cmd = 'cd %s && %s -c 1 "select count(*) from * where 1 = 1 or c1 != 2" -C none' % (tmpfolder,Q_EXECUTABLE) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 82) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + x = six.b('Maximum source files for table must be %s. Table is name is %s/* Number of actual files is %s' % (MAX_ALLOWED_FILES,os.path.realpath(tmpfolder),FILE_COUNT)) + print(x) + self.assertEqual(e[0],x) + + self.cleanup_folder(tmpfolder) + + def test_many_open_files_for_one_table(self): + # Previously file opening was parallel, causing too-many-open-files + + BATCH_SIZE = 2 + FILE_COUNT = 500 + + numbers_as_text = batch([str(x) for x in range(1,1+BATCH_SIZE*FILE_COUNT)],n=BATCH_SIZE) + + content_list = map(six.b,["\n".join(x) for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x,range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder = self.create_folder_with_files(d,'split-files','attach-limit') + #expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + cmd = 'cd %s && %s -c 1 "select count(*) from * where 1 = 1 or c1 != 2" -C none' % (tmpfolder,Q_EXECUTABLE) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b(str(BATCH_SIZE*FILE_COUNT))) + + self.cleanup_folder(tmpfolder) + + def test_many_open_files_for_two_tables(self): + BATCH_SIZE = 2 + FILE_COUNT = 500 + + numbers_as_text = batch([str(x) for x in range(1, 1 + BATCH_SIZE * FILE_COUNT)], n=BATCH_SIZE) + + content_list = map(six.b, ["\n".join(x) for x in numbers_as_text]) + + filename_list = list(map(lambda x: 'file-%s' % x, range(FILE_COUNT))) + d = collections.OrderedDict(zip(filename_list, content_list)) + + tmpfolder1 = self.create_folder_with_files(d, 'split-files1', 'blah') + tmpfolder2 = self.create_folder_with_files(d, 'split-files1', 'blah') + + cmd = '%s -c 1 "select count(*) from %s/* a left join %s/* b on (a.c1 = b.c1)" -C none' % ( + Q_EXECUTABLE, + tmpfolder1, + tmpfolder2) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b(str(BATCH_SIZE * FILE_COUNT))) + + self.cleanup_folder(tmpfolder1) + self.cleanup_folder(tmpfolder2) + + +class GzippingTests(AbstractQTestCase): + + def test_gzipped_file(self): + tmpfile = self.create_file_with_data( + six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00')) + + cmd = Q_EXECUTABLE + ' -z "select sum(c1),avg(c1) from %s"' % tmpfile.name + + retcode, o, e = run_command(cmd) + self.assertTrue(retcode == 0) + self.assertTrue(len(o) == 1) + self.assertTrue(len(e) == 1) + + s = sum(range(1, 11)) + self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0))) + self.assertTrue(one_column_warning(e)) + + self.cleanup(tmpfile) + + +class DelimiterTests(AbstractQTestCase): + + def test_delimition_mistake_with_header(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + + cmd = Q_EXECUTABLE + ' -d " " "select * from %s" -H' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 3) + + self.assertTrue(e[0].startswith( + six.b("Warning: column count is one - did you provide the correct delimiter"))) + self.assertTrue(e[1].startswith(six.b("Bad header row"))) + self.assertTrue(six.b("Column name cannot contain commas") in e[2]) + + self.cleanup(tmpfile) + + def test_tab_delimition_parameter(self): + tmpfile = self.create_file_with_data( + sample_data_no_header.replace(six.b(","), six.b("\t"))) + cmd = Q_EXECUTABLE + ' -t "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) + + self.cleanup(tmpfile) + + def test_pipe_delimition_parameter(self): + tmpfile = self.create_file_with_data( + sample_data_no_header.replace(six.b(","), six.b("|"))) + cmd = Q_EXECUTABLE + ' -p "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + + self.cleanup(tmpfile) + + def test_tab_delimition_parameter__with_manual_override_attempt(self): + tmpfile = self.create_file_with_data( + sample_data_no_header.replace(six.b(","), six.b("\t"))) + cmd = Q_EXECUTABLE + ' -t -d , "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 1) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) + self.assertEqual(e[0],six.b('Warning: -t parameter overrides -d parameter (,)')) + + self.cleanup(tmpfile) + + def test_pipe_delimition_parameter__with_manual_override_attempt(self): + tmpfile = self.create_file_with_data( + sample_data_no_header.replace(six.b(","), six.b("|"))) + cmd = Q_EXECUTABLE + ' -p -d , "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 1) + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + self.assertEqual(e[0],six.b('Warning: -p parameter overrides -d parameter (,)')) + + self.cleanup(tmpfile) + + def test_output_delimiter(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , -D "|" "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + + self.cleanup(tmpfile) + + def test_output_delimiter_tab_parameter(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , -T "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) + + self.cleanup(tmpfile) + + def test_output_delimiter_pipe_parameter(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , -P "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + + self.cleanup(tmpfile) + + def test_output_delimiter_tab_parameter__with_manual_override_attempt(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , -T -D "|" "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 1) + + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("\t"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("\t"))) + self.assertEqual(e[0], six.b('Warning: -T parameter overrides -D parameter (|)')) + + self.cleanup(tmpfile) + + def test_output_delimiter_pipe_parameter__with_manual_override_attempt(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , -P -D ":" "select c1,c2,c3 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 1) + + self.assertEqual(o[0], sample_data_rows[0].replace(six.b(","), six.b("|"))) + self.assertEqual(o[1], sample_data_rows[1].replace(six.b(","), six.b("|"))) + self.assertEqual(o[2], sample_data_rows[2].replace(six.b(","), six.b("|"))) + self.assertEqual(e[0],six.b('Warning: -P parameter overrides -D parameter (:)')) + + self.cleanup(tmpfile) + + +class AnalysisTests(AbstractQTestCase): + + def test_analyze_result(self): + d = "\n".join(['%s\t%s\t%s' % (x+1,x+1,x+1) for x in range(100)]) + tmpfile = self.create_file_with_data(six.b(d)) + + cmd = Q_EXECUTABLE + ' -c 1 "select count(*) from %s" -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 5) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1], six.b(' Sources:')) + self.assertEqual(o[2], six.b(' source_type: file source: %s' %(tmpfile.name))) + self.assertEqual(o[3], six.b(' Fields:')) + self.assertEqual(o[4], six.b(' `c1` - text')) + + self.cleanup(tmpfile) + + def test_analyze_result_with_data_stream(self): + d = "\n".join(['%s\t%s\t%s' % (x+1,x+1,x+1) for x in range(100)]) + tmpfile = self.create_file_with_data(six.b(d)) + + cmd = 'cat %s | %s -c 1 "select count(*) from -" -A' % (tmpfile.name,Q_EXECUTABLE) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 5) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b('Table: -')) + self.assertEqual(o[1], six.b(' Sources:')) + self.assertEqual(o[2], six.b(' source_type: data-stream source: stdin')) + self.assertEqual(o[3], six.b(' Fields:')) + self.assertEqual(o[4], six.b(' `c1` - text')) + + self.cleanup(tmpfile) + + def test_column_analysis(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(o[0], six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s' % tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4], six.b(' `c1` - text')) + self.assertEqual(o[5], six.b(' `c2` - int')) + self.assertEqual(o[6], six.b(' `c3` - int')) + + self.cleanup(tmpfile) + + def test_column_analysis_no_header(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(o[0], six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s' % tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4], six.b(' `c1` - text')) + self.assertEqual(o[5], six.b(' `c2` - int')) + self.assertEqual(o[6], six.b(' `c3` - int')) + + def test_column_analysis_with_unexpected_header(self): + tmpfile = self.create_file_with_data(sample_data_with_header) + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 7) + self.assertEqual(len(e), 1) + + self.assertEqual(o[0], six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s' % tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4],six.b(' `c1` - text')) + self.assertEqual(o[5],six.b(' `c2` - text')) + self.assertEqual(o[6],six.b(' `c3` - text')) + + self.assertEqual( + e[0], six.b('Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data')) + + self.cleanup(tmpfile) + + def test_column_analysis_for_spaces_in_header_row(self): + tmpfile = self.create_file_with_data( + header_row_with_spaces + six.b("\n") + sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , "select name,\\`value 1\\` from %s" -H -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 7) + + self.assertEqual(o[0], six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s' % tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4], six.b(' `name` - text')) + self.assertEqual(o[5], six.b(' `value 1` - int')) + self.assertEqual(o[6], six.b(' `value2` - int')) + + self.cleanup(tmpfile) + + def test_column_analysis_with_header(self): + tmpfile = self.create_file_with_data(sample_data_with_header) + cmd = Q_EXECUTABLE + ' -d , "select c1 from %s" -A -H' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode, 0) + self.assertEqual(len(o),7) + self.assertEqual(len(e),2) + self.assertEqual(o[0], six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s' % tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4], six.b(' `name` - text')) + self.assertEqual(o[5], six.b(' `value1` - int')) + self.assertEqual(o[6], six.b(' `value2` - int')) + + self.assertEqual(e[0],six.b('query error: no such column: c1')) + self.assertTrue(e[1].startswith(six.b('Warning - There seems to be a '))) + + self.cleanup(tmpfile) + + + +class StdInTests(AbstractQTestCase): + + def test_stdin_input(self): + cmd = six.b('printf "%s" | ' + Q_EXECUTABLE + ' -d , "select c1,c2,c3 from -"') % sample_data_no_header + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], sample_data_rows[0]) + self.assertEqual(o[1], sample_data_rows[1]) + self.assertEqual(o[2], sample_data_rows[2]) + + def test_attempt_to_unzip_stdin(self): + tmpfile = self.create_file_with_data( + six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00')) + + cmd = 'cat %s | ' % tmpfile.name + Q_EXECUTABLE + ' -z "select sum(c1),avg(c1) from -"' + + retcode, o, e = run_command(cmd) + self.assertTrue(retcode != 0) + self.assertTrue(len(o) == 0) + self.assertTrue(len(e) == 1) + + self.assertEqual(e[0],six.b('Cannot decompress standard input. Pipe the input through zcat in order to decompress.')) + + self.cleanup(tmpfile) + +class QuotingTests(AbstractQTestCase): + def test_non_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + cmd = Q_EXECUTABLE + ' -d " " "select c1 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) + + self.assertTrue(o[0],'non_quoted') + self.assertTrue(o[1],'control-value-1') + self.assertTrue(o[2],'non-quoted-value') + self.assertTrue(o[3],'control-value-1') + + self.cleanup(tmp_data_file) + + def test_regular_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + cmd = Q_EXECUTABLE + ' -d " " "select c2 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) + + self.assertTrue(o[0],'regular_double_quoted') + self.assertTrue(o[1],'control-value-2') + self.assertTrue(o[2],'this is a quoted value') + self.assertTrue(o[3],'control-value-2') + + self.cleanup(tmp_data_file) + + def test_double_double_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + cmd = Q_EXECUTABLE + ' -d " " "select c3 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) + + self.assertTrue(o[0],'double_double_quoted') + self.assertTrue(o[1],'control-value-3') + self.assertTrue(o[2],'this is a "double double" quoted value') + self.assertTrue(o[3],'control-value-3') + + self.cleanup(tmp_data_file) + + def test_escaped_double_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + cmd = Q_EXECUTABLE + ' -d " " "select c4 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) + + self.assertTrue(o[0],'escaped_double_quoted') + self.assertTrue(o[1],'control-value-4') + self.assertTrue(o[2],'this is an escaped "quoted value"') + self.assertTrue(o[3],'control-value-4') + + self.cleanup(tmp_data_file) + + def test_none_input_quoting_mode_in_relaxed_mode(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2) + + cmd = Q_EXECUTABLE + ' -d " " -m relaxed -D , -w none -W none "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('"quoted,data",23')) + self.assertEqual(o[1],six.b('unquoted-data,54,')) + + self.cleanup(tmp_data_file) + + def test_none_input_quoting_mode_in_strict_mode(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2) + + cmd = Q_EXECUTABLE + ' -d " " -m strict -D , -w none "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode,0) + self.assertEqual(len(e),1) + self.assertEqual(len(o),0) + + self.assertTrue(e[0].startswith(six.b('Strict mode. Column Count is expected to identical'))) + + self.cleanup(tmp_data_file) + + def test_minimal_input_quoting_mode(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2) + + cmd = Q_EXECUTABLE + ' -d " " -D , -w minimal "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('quoted data,23')) + self.assertEqual(o[1],six.b('unquoted-data,54')) + + self.cleanup(tmp_data_file) + + def test_all_input_quoting_mode(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2) + + cmd = Q_EXECUTABLE + ' -d " " -D , -w all "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('quoted data,23')) + self.assertEqual(o[1],six.b('unquoted-data,54')) + + self.cleanup(tmp_data_file) + + def test_incorrect_input_quoting_mode(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2) + + cmd = Q_EXECUTABLE + ' -d " " -D , -w unknown_wrapping_mode "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode,0) + self.assertEqual(len(e),1) + self.assertEqual(len(o),0) + + self.assertTrue(e[0].startswith(six.b('Input quoting mode can only be one of all,minimal,none'))) + self.assertTrue(six.b('unknown_wrapping_mode') in e[0]) + + self.cleanup(tmp_data_file) + + def test_none_output_quoting_mode(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2) + + cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W none "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('quoted data,23')) + self.assertEqual(o[1],six.b('unquoted-data,54')) + + self.cleanup(tmp_data_file) + + def test_minimal_output_quoting_mode__without_need_to_quote_in_output(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2) + + cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W minimal "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('quoted data,23')) + self.assertEqual(o[1],six.b('unquoted-data,54')) + + self.cleanup(tmp_data_file) + + def test_minimal_output_quoting_mode__with_need_to_quote_in_output_due_to_delimiter(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2) + + # output delimiter is set to space, so the output will contain it + cmd = Q_EXECUTABLE + ' -d " " -D " " -w all -W minimal "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('"quoted data" 23')) + self.assertEqual(o[1],six.b('unquoted-data 54')) + + self.cleanup(tmp_data_file) + + def test_minimal_output_quoting_mode__with_need_to_quote_in_output_due_to_newline(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2_with_newline) + + # Delimiter is set to colon (:), so it will not be inside the data values (this will make sure that the newline is the one causing the quoting) + cmd = Q_EXECUTABLE + " -d ':' -w all -W minimal \"select c1,c2,replace(c1,'with' || x'0a' || 'a new line inside it','NEWLINE-REMOVED') from %s\"" % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),3) + + self.assertEqual(o[0],six.b('"quoted data with')) + # Notice that the third column here is not quoted, because we replaced the newline with something else + self.assertEqual(o[1],six.b('a new line inside it":23:quoted data NEWLINE-REMOVED')) + self.assertEqual(o[2],six.b('unquoted-data:54:unquoted-data')) + + self.cleanup(tmp_data_file) + + def test_nonnumeric_output_quoting_mode(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2) + + cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W nonnumeric "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('"quoted data",23')) + self.assertEqual(o[1],six.b('"unquoted-data",54')) + + self.cleanup(tmp_data_file) + + def test_all_output_quoting_mode(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data2) + + cmd = Q_EXECUTABLE + ' -d " " -D , -w all -W all "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('"quoted data","23"')) + self.assertEqual(o[1],six.b('"unquoted-data","54"')) + + self.cleanup(tmp_data_file) + + def _internal_test_consistency_of_chaining_output_to_input(self,input_data,input_wrapping_mode,output_wrapping_mode): + + tmp_data_file = self.create_file_with_data(input_data) + + basic_cmd = Q_EXECUTABLE + ' -w %s -W %s "select * from -"' % (input_wrapping_mode,output_wrapping_mode) + chained_cmd = 'cat %s | %s | %s | %s' % (tmp_data_file.name,basic_cmd,basic_cmd,basic_cmd) + + retcode, o, e = run_command(chained_cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(six.b("\n").join(o),input_data) + + self.cleanup(tmp_data_file) + + def test_consistency_of_chaining_minimal_wrapping_to_minimal_wrapping(self): + input_data = six.b('"quoted data" 23\nunquoted-data 54') + self._internal_test_consistency_of_chaining_output_to_input(input_data,'minimal','minimal') + + def test_consistency_of_chaining_all_wrapping_to_all_wrapping(self): + input_data = six.b('"quoted data" "23"\n"unquoted-data" "54"') + self._internal_test_consistency_of_chaining_output_to_input(input_data,'all','all') + + def test_input_field_quoting_and_data_types_with_encoding(self): + OUTPUT_ENCODING = 'utf-8' + + # Checks combination of minimal input field quoting, with special characters that need to be decoded - + # Both content and proper data types are verified + data = six.b('111,22.22,"testing text with special characters - citt\xc3\xa0 ",http://somekindofurl.com,12.13.14.15,12.1\n') + tmp_data_file = self.create_file_with_data(data) + + cmd = Q_EXECUTABLE + ' -d , "select * from %s" -E %s' % (tmp_data_file.name,OUTPUT_ENCODING) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),1) + + self.assertEqual(o[0].decode('utf-8'),u'111,22.22,testing text with special characters - citt\xe0 ,http://somekindofurl.com,12.13.14.15,12.1') + + cmd = Q_EXECUTABLE + ' -d , "select * from %s" -A' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),10) + + self.assertEqual(o[0],six.b('Table: %s' % tmp_data_file.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s' % tmp_data_file.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4],six.b(' `c1` - int')) + self.assertEqual(o[5],six.b(' `c2` - float')) + self.assertEqual(o[6],six.b(' `c3` - text')) + self.assertEqual(o[7],six.b(' `c4` - text')) + self.assertEqual(o[8],six.b(' `c5` - text')) + self.assertEqual(o[9],six.b(' `c6` - float')) + + self.cleanup(tmp_data_file) + + def test_multiline_double_double_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + # FIXME Need to convert \0a to proper encoding suitable for the person running the tests. + cmd = Q_EXECUTABLE + ' -d " " "select replace(c5,X\'0A\',\'::\') from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) + + self.assertTrue(o[0],six.b('multiline_double_double_quoted')) + self.assertTrue(o[1],six.b('control-value-5')) + self.assertTrue(o[2],six.b('this is a double double quoted "multiline\n value".')) + self.assertTrue(o[3],six.b('control-value-5')) + + self.cleanup(tmp_data_file) + + def test_multiline_escaped_double_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + # FIXME Need to convert \0a to proper encoding suitable for the person running the tests. + cmd = Q_EXECUTABLE + ' -d " " "select replace(c6,X\'0A\',\'::\') from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),4) + + self.assertTrue(o[0],'multiline_escaped_double_quoted') + self.assertTrue(o[1],'control-value-6') + self.assertTrue(o[2],'this is an escaped "multiline:: value".') + self.assertTrue(o[3],'control-value-6') + + self.cleanup(tmp_data_file) + + def test_disable_double_double_quoted_data_flag__values(self): + # This test (and flag) is meant to verify backward comptibility only. It is possible that + # this flag will be removed completely in the future + + tmp_data_file = self.create_file_with_data(double_double_quoted_data) + + cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c2 from %s" -W none' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('double_double_quoted')) + self.assertEqual(o[1],six.b('this is a quoted value with "double')) + + cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c3 from %s" -W none' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('')) + self.assertEqual(o[1],six.b('double')) + + cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select c4 from %s" -W none' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('')) + self.assertEqual(o[1],six.b('quotes"""')) + + self.cleanup(tmp_data_file) + + def test_disable_escaped_double_quoted_data_flag__values(self): + # This test (and flag) is meant to verify backward comptibility only. It is possible that + # this flag will be removed completely in the future + + tmp_data_file = self.create_file_with_data(escaped_double_quoted_data) + + cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c2 from %s" -W none' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('escaped_double_quoted')) + self.assertEqual(o[1],six.b('this is a quoted value with \\escaped')) + + cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c3 from %s" -W none' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('')) + self.assertEqual(o[1],six.b('double')) + + cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select c4 from %s" -W none' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('')) + self.assertEqual(o[1],six.b('quotes\\""')) + + self.cleanup(tmp_data_file) + + def test_combined_quoted_data_flags__number_of_columns_detected(self): + # This test (and flags) is meant to verify backward comptibility only. It is possible that + # these flags will be removed completely in the future + tmp_data_file = self.create_file_with_data(combined_quoted_data) + + cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + o = o[o.index(six.b(' Fields:'))+1:] + + self.assertEqual(len(o),7) # found 7 fields + + cmd = Q_EXECUTABLE + ' -d " " --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + o = o[o.index(six.b(' Fields:'))+1:] + + self.assertEqual(len(o),5) # found 5 fields + + cmd = Q_EXECUTABLE + ' -d " " --disable-double-double-quoting "select * from %s" -A' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + o = o[o.index(six.b(' Fields:'))+1:] + + self.assertEqual(len(o),5) # found 5 fields + + cmd = Q_EXECUTABLE + ' -d " " "select * from %s" -A' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + o = o[o.index(six.b(' Fields:'))+1:] + + self.assertEqual(len(o),3) # found only 3 fields, which is the correct amount + + self.cleanup(tmp_data_file) + + +class EncodingTests(AbstractQTestCase): + + def test_utf8_with_bom_encoding(self): + utf_8_data_with_bom = six.b('\xef\xbb\xbf"typeid","limit","apcost","date","checkpointId"\n"1","2","5","1,2,3,4,5,6,7","3000,3001,3002"\n"2","2","5","1,2,3,4,5,6,7","3003,3004,3005"\n') + tmp_data_file = self.create_file_with_data(utf_8_data_with_bom,encoding=None) + + cmd = Q_EXECUTABLE + ' -d , -H -O -e utf-8-sig "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(e),0) + self.assertEqual(len(o),3) + + self.assertEqual(o[0],six.b('typeid,limit,apcost,date,checkpointId')) + self.assertEqual(o[1],six.b('1,2,5,"1,2,3,4,5,6,7","3000,3001,3002"')) + self.assertEqual(o[2],six.b('2,2,5,"1,2,3,4,5,6,7","3003,3004,3005"')) + + self.cleanup(tmp_data_file) + + +class QrcTests(AbstractQTestCase): + + def test_explicit_qrc_filename_not_found(self): + non_existent_filename = str(uuid.uuid4()) + env_to_inject = { 'QRC_FILENAME': non_existent_filename} + cmd = Q_EXECUTABLE + ' "select 1"' + retcode, o, e = run_command(cmd, env_to_inject=env_to_inject) + + self.assertEqual(retcode, 244) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertTrue(e[0] == six.b('QRC_FILENAME env var exists, but cannot find qrc file at %s' % non_existent_filename)) + + def test_explicit_qrc_filename_that_exists(self): + tmp_qrc_file = self.create_file_with_data(six.b('''[options] +output_delimiter=| +''')) + env_to_inject = { 'QRC_FILENAME': tmp_qrc_file.name} + cmd = Q_EXECUTABLE + ' "select 1,2"' + retcode, o, e = run_command(cmd, env_to_inject=env_to_inject) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertTrue(o[0] == six.b('1|2')) + + self.cleanup(tmp_qrc_file) + + def test_all_default_options(self): + # Create a qrc file that contains all default values inside the qrc file, but with some different values than the regular defaults + tmp_qrc_file = self.create_file_with_data(six.b('''[options] +analyze_only=True +beautify=True +caching_mode=readwrite +column_count=32 +delimiter=, +disable_column_type_detection=True +disable_double_double_quoting=False +disable_escaped_double_quoting=False +encoding=ascii +formatting=xxx +gzipped=True +input_quoting_mode=all +keep_leading_whitespace_in_values=True +list_user_functions=True +max_attached_sqlite_databases=888 +max_column_length_limit=8888 +mode=strict +output_delimiter=| +output_encoding=utf-8 +output_header=True +output_quoting_mode=all +overwrite_qsql=False +pipe_delimited=True +pipe_delimited_output=True +query_encoding=ascii +query_filename=query-filename +save_db_to_disk_filename=save-db-to-disk-filename +skip_header=True +tab_delimited=True +tab_delimited_output=true +verbose=True +with_universal_newlines=True +''')) + env_to_inject = { 'QRC_FILENAME': tmp_qrc_file.name} + cmd = Q_EXECUTABLE + ' --dump-defaults' + retcode, o, e = run_command(cmd, env_to_inject=env_to_inject) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 34) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b('[options]')) + o = o[1:] + + m = {} + for r in o: + key,val = r.split(six.b("="),1) + m[key] = val + + self.assertEqual(m[six.b('analyze_only')],six.b('True')) + self.assertEqual(m[six.b('beautify')],six.b('True')) + self.assertEqual(m[six.b('caching_mode')],six.b('readwrite')) + self.assertEqual(m[six.b('column_count')],six.b('32')) + self.assertEqual(m[six.b('delimiter')],six.b(',')) + self.assertEqual(m[six.b('disable_column_type_detection')],six.b('True')) + self.assertEqual(m[six.b('disable_double_double_quoting')],six.b('False')) + self.assertEqual(m[six.b('disable_escaped_double_quoting')],six.b('False')) + self.assertEqual(m[six.b('encoding')],six.b('ascii')) + self.assertEqual(m[six.b('formatting')],six.b('xxx')) + self.assertEqual(m[six.b('gzipped')],six.b('True')) + self.assertEqual(m[six.b('input_quoting_mode')],six.b('all')) + self.assertEqual(m[six.b('keep_leading_whitespace_in_values')],six.b('True')) + self.assertEqual(m[six.b('list_user_functions')],six.b('True')) + self.assertEqual(m[six.b('max_attached_sqlite_databases')],six.b('888')) + self.assertEqual(m[six.b('max_column_length_limit')],six.b('8888')) + self.assertEqual(m[six.b('mode')],six.b('strict')) + self.assertEqual(m[six.b('output_delimiter')],six.b('|')) + self.assertEqual(m[six.b('output_encoding')],six.b('utf-8')) + self.assertEqual(m[six.b('output_header')],six.b('True')) + self.assertEqual(m[six.b('output_quoting_mode')],six.b('all')) + self.assertEqual(m[six.b('overwrite_qsql')],six.b('False')) + self.assertEqual(m[six.b('pipe_delimited')],six.b('True')) + self.assertEqual(m[six.b('pipe_delimited_output')],six.b('True')) + self.assertEqual(m[six.b('query_encoding')],six.b('ascii')) + self.assertEqual(m[six.b('query_filename')],six.b('query-filename')) + self.assertEqual(m[six.b('save_db_to_disk_filename')],six.b('save-db-to-disk-filename')) + self.assertEqual(m[six.b('skip_header')],six.b('True')) + self.assertEqual(m[six.b('tab_delimited')],six.b('True')) + self.assertEqual(m[six.b('tab_delimited_output')],six.b('True')) + self.assertEqual(m[six.b('verbose')],six.b('True')) + self.assertEqual(m[six.b('with_universal_newlines')],six.b('True')) + + self.cleanup(tmp_qrc_file) + + def test_caching_readwrite_using_qrc_file(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + tmpfile_folder = os.path.dirname(tmpfile.name) + tmpfile_filename = os.path.basename(tmpfile.name) + expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -d , "select * from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),3) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('a,1,0')) + self.assertEqual(o[1],six.b('b,2,0')) + self.assertEqual(o[2],six.b('c,,0')) + + # Ensure default does not create a cache file + self.assertTrue(not os.path.exists(expected_cache_filename)) + + tmp_qrc_file = self.create_file_with_data(six.b('''[options] +caching_mode=readwrite +''')) + env_to_inject = { 'QRC_FILENAME': tmp_qrc_file.name} + cmd = Q_EXECUTABLE + ' -d , "select * from %s"' % tmpfile.name + retcode, o, e = run_command(cmd, env_to_inject=env_to_inject) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),3) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('a,1,0')) + self.assertEqual(o[1],six.b('b,2,0')) + self.assertEqual(o[2],six.b('c,,0')) + + # Ensure that qrc file caching is being used and caching is activated (cache file should exist) + self.assertTrue(os.path.exists(expected_cache_filename)) + + self.cleanup(tmp_qrc_file) + self.cleanup(tmpfile) + + +class QsqlUsageTests(AbstractQTestCase): + + def test_concatenate_same_qsql_file_with_single_table(self): + numbers = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + + qsql_file_data = self.arrays_to_qsql_file_content([six.b('aa'), six.b('bb'), six.b('cc')], numbers) + + tmpfile = self.create_file_with_data(qsql_file_data,suffix='.qsql') + + cmd = Q_EXECUTABLE + ' -t "select count(*) from (select * from %s union all select * from %s)"' % (tmpfile.name,tmpfile.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('20000')) + + def test_query_qsql_with_single_table(self): + numbers = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + + qsql_file_data = self.arrays_to_qsql_file_content([six.b('aa'), six.b('bb'), six.b('cc')], numbers) + + tmpfile = self.create_file_with_data(qsql_file_data) + + cmd = Q_EXECUTABLE + ' -t "select sum(aa),sum(bb),sum(cc) from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('50005000\t50005000\t50005000')) + + def test_query_qsql_with_single_table_with_explicit_non_existent_tablename(self): + numbers = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + + qsql_file_data = self.arrays_to_qsql_file_content([six.b('aa'), six.b('bb'), six.b('cc')], numbers) + + tmpfile = self.create_file_with_data(qsql_file_data) + + c = sqlite3.connect(tmpfile.name) + actual_table_name = c.execute('select temp_table_name from _qcatalog').fetchall()[0][0] + c.close() + + + cmd = '%s -t "select sum(aa),sum(bb),sum(cc) from %s:::non-existent"' % (Q_EXECUTABLE,tmpfile.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 84) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + self.assertEqual(e[0],six.b('Table non-existent could not be found in qsql file %s . Existing table names: %s' % (tmpfile.name,actual_table_name))) + + def test_query_qsql_with_single_table_with_explicit_table_name(self): + numbers = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + + qsql_file_data = self.arrays_to_qsql_file_content([six.b('aa'), six.b('bb'), six.b('cc')], numbers) + + tmpfile = self.create_file_with_data(qsql_file_data) + + c = sqlite3.connect(tmpfile.name) + actual_table_name = c.execute('select temp_table_name from _qcatalog').fetchall()[0][0] + c.close() + + + cmd = '%s -t "select sum(aa),sum(bb),sum(cc) from %s:::%s"' % (Q_EXECUTABLE,tmpfile.name,actual_table_name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('50005000\t50005000\t50005000')) + + def test_query_multi_qsql_with_single_table(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + qsql_file_data1 = self.arrays_to_qsql_file_content([six.b('aa'), six.b('bb'), six.b('cc')], numbers1) + tmpfile1 = self.create_file_with_data(qsql_file_data1,suffix='.qsql') + + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + qsql_file_data2 = self.arrays_to_qsql_file_content([six.b('aa'), six.b('bb'), six.b('cc')], numbers2) + tmpfile2 = self.create_file_with_data(qsql_file_data2,suffix='.qsql') + + cmd = Q_EXECUTABLE + ' -t "select sum(large_file.aa),sum(large_file.bb),sum(large_file.cc) from %s small_file left join %s large_file on (large_file.aa == small_file.bb)"' % (tmpfile2.name,tmpfile1.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('55\t55\t55')) + + def test_query_concatenated_qsqls_each_with_single_table(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + qsql_file_data1 = self.arrays_to_qsql_file_content([six.b('aa'), six.b('bb'), six.b('cc')], numbers1) + tmpfile1 = self.create_file_with_data(qsql_file_data1,suffix='.qsql') + + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + qsql_file_data2 = self.arrays_to_qsql_file_content([six.b('aa'), six.b('bb'), six.b('cc')], numbers2) + tmpfile2 = self.create_file_with_data(qsql_file_data2,suffix='.qsql') + + cmd = Q_EXECUTABLE + ' -t "select sum(aa),sum(bb),sum(cc) from (select * from %s union all select * from %s)"' % (tmpfile2.name,tmpfile1.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('50005055\t50005055\t50005055')) + + def test_concatenated_qsql_and_data_stream__column_names_mismatch(self): + N1 = 10000 + N2 = 100 + + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, N1 + 1)] + csv_file_data1 = self.arrays_to_csv_file_content(six.b('\t'),[six.b('aa'), six.b('bb'), six.b('cc')], numbers1) + tmpfile1 = self.create_file_with_data(csv_file_data1) + expected_cache_filename1 = '%s.qsql' % tmpfile1.name + + cmd = Q_EXECUTABLE + ' -H -t "select count(*) from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertTrue(os.path.exists(expected_cache_filename1)) + + cmd = 'seq 1 %s | %s -c 1 "select count(*) from (select * from %s UNION ALL select * from -)"' % (N2, Q_EXECUTABLE,expected_cache_filename1) + + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 1) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + self.assertEqual(e[0],six.b('query error: SELECTs to the left and right of UNION ALL do not have the same number of result columns')) + + def test_concatenated_qsql_and_data_stream(self): + N1 = 10000 + N2 = 100 + + numbers1 = [[six.b(str(i))] for i in range(1, N1 + 1)] + csv_file_data1 = self.arrays_to_csv_file_content(six.b('\t'),[six.b('c1')], numbers1) + tmpfile1 = self.create_file_with_data(csv_file_data1) + expected_cache_filename1 = '%s.qsql' % tmpfile1.name + + cmd = Q_EXECUTABLE + ' -H -t "select count(*) from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertTrue(os.path.exists(expected_cache_filename1)) + + cmd = 'seq 1 %s | %s -t -c 1 "select count(*),sum(c1) from (select * from %s UNION ALL select * from -)"' % (N2, Q_EXECUTABLE,expected_cache_filename1) + + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('%s\t%s' % (N1+N2,sum(range(1,N1+1)) + sum(range(1,N2+1))))) + + def test_concatenated_qsql_and_data_stream__explicit_table_name(self): + N1 = 10000 + N2 = 100 + + numbers1 = [[six.b(str(i))] for i in range(1, N1 + 1)] + csv_file_data1 = self.arrays_to_csv_file_content(six.b('\t'),[six.b('c1')], numbers1) + tmpfile1 = self.create_file_with_data(csv_file_data1) + tmpfile1_expected_table_name = os.path.basename(tmpfile1.name) + + expected_cache_filename1 = '%s.qsql' % tmpfile1.name + + cmd = Q_EXECUTABLE + ' -H -t "select count(*) from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertTrue(os.path.exists(expected_cache_filename1)) + + cmd = 'seq 1 %s | %s -t -c 1 "select count(*),sum(c1) from (select * from %s:::%s UNION ALL select * from -)"' % (N2, Q_EXECUTABLE,expected_cache_filename1,tmpfile1_expected_table_name) + + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('%s\t%s' % (N1+N2,sum(range(1,N1+1)) + sum(range(1,N2+1))))) + + def test_write_to_qsql__check_chosen_table_name(self): + numbers1 = [[six.b(str(i))] for i in range(1, 10001)] + csv_file_data1 = self.arrays_to_csv_file_content(six.b('\t'),[six.b('c1')], numbers1) + tmpfile1 = self.create_file_with_data(csv_file_data1) + expected_cache_filename1 = '%s.qsql' % tmpfile1.name + + cmd = Q_EXECUTABLE + ' -c 1 -H -t "select count(*) from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertTrue(os.path.exists(expected_cache_filename1)) + + c = sqlite3.connect(expected_cache_filename1) + qcatalog_entries = c.execute('select temp_table_name from _qcatalog').fetchall() + self.assertEqual(len(qcatalog_entries),1) + self.assertEqual(qcatalog_entries[0][0],os.path.basename(tmpfile1.name)) + + def test_concatenated_mixes_qsql_with_single_table_and_csv(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + csv_file_data1 = self.arrays_to_csv_file_content(six.b('\t'),[six.b('aa'), six.b('bb'), six.b('cc')], numbers1) + tmpfile1 = self.create_file_with_data(csv_file_data1) + expected_cache_filename1 = '%s.qsql' % tmpfile1.name + + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + csv_file_data2 = self.arrays_to_csv_file_content(six.b('\t'),[six.b('aa'), six.b('bb'), six.b('cc')], numbers2) + tmpfile2 = self.create_file_with_data(csv_file_data2) + expected_cache_filename2 = '%s.qsql' % tmpfile2.name + + + cmd = Q_EXECUTABLE + ' -H -t "select count(*) from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertTrue(os.path.exists(expected_cache_filename1)) + + cmd = Q_EXECUTABLE + ' -H -t "select count(*) from %s" -C readwrite' % tmpfile2.name + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertTrue(os.path.exists(expected_cache_filename2)) + + # csv and qsql files prepared. now test all four combinations + + cmd = Q_EXECUTABLE + ' -O -H -t "select count(*) cnt,sum(aa) sum_aa,sum(bb) sum_bb,sum(cc) sum_cc from (select * from %s union all select * from %s)"' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertEqual(len(o),2) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('cnt\tsum_aa\tsum_bb\tsum_cc')) + self.assertEqual(o[1],six.b('10010\t50005055\t50005055\t50005055')) + + cmd = Q_EXECUTABLE + ' -O -H -t "select count(*) cnt,sum(aa) sum_aa,sum(bb) sum_bb,sum(cc) sum_cc from (select * from %s union all select * from %s.qsql)"' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertEqual(len(o),2) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('cnt\tsum_aa\tsum_bb\tsum_cc')) + self.assertEqual(o[1],six.b('10010\t50005055\t50005055\t50005055')) + + cmd = Q_EXECUTABLE + ' -O -H -t "select count(*) cnt,sum(aa) sum_aa,sum(bb) sum_bb,sum(cc) sum_cc from (select * from %s.qsql union all select * from %s)"' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertEqual(len(o),2) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('cnt\tsum_aa\tsum_bb\tsum_cc')) + self.assertEqual(o[1],six.b('10010\t50005055\t50005055\t50005055')) + + cmd = Q_EXECUTABLE + ' -O -H -t "select count(*) cnt,sum(aa) sum_aa,sum(bb) sum_bb,sum(cc) sum_cc from (select * from %s.qsql union all select * from %s.qsql)"' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertEqual(len(o),2) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('cnt\tsum_aa\tsum_bb\tsum_cc')) + self.assertEqual(o[1],six.b('10010\t50005055\t50005055\t50005055')) + + def test_analysis_of_concatenated_mixes_qsql_with_single_table_and_csv(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + csv_file_data1 = self.arrays_to_csv_file_content(six.b('\t'),[six.b('aa'), six.b('bb'), six.b('cc')], numbers1) + tmpfile1 = self.create_file_with_data(csv_file_data1) + expected_cache_filename1 = '%s.qsql' % tmpfile1.name + + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + csv_file_data2 = self.arrays_to_csv_file_content(six.b('\t'),[six.b('aa'), six.b('bb'), six.b('cc')], numbers2) + tmpfile2 = self.create_file_with_data(csv_file_data2) + expected_cache_filename2 = '%s.qsql' % tmpfile2.name + + cmd = Q_EXECUTABLE + ' -H -t "select count(*) from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertTrue(os.path.exists(expected_cache_filename1)) + + cmd = Q_EXECUTABLE + ' -H -t "select count(*) from %s" -C readwrite' % tmpfile2.name + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertTrue(os.path.exists(expected_cache_filename2)) + + # csv and qsql files prepared + + # Test function, will be used multiple times, each time with a different combination + + def do_check(caching_mode, + file1_source_type,file1_table_postfix,file1_postfix, + file2_source_type,file2_table_postfix,file2_postfix): + cmd = '%s -C %s -O -H -t "select count(*) cnt,sum(aa) sum_aa,sum(bb) sum_bb,sum(cc) sum_cc from (select * from %s%s UNION ALL select * from %s%s)" -A' % ( + Q_EXECUTABLE, + caching_mode, + tmpfile1.name, + file1_table_postfix, + tmpfile2.name, + file2_table_postfix) + + retcode, o, e = run_command(cmd) + self.assertEqual(retcode, 0) + self.assertEqual(len(o),14) + self.assertEqual(len(e),0) + self.assertEqual(o, [ + six.b('Table: %s%s' % (tmpfile1.name,file1_table_postfix)), + six.b(' Sources:'), + six.b(' source_type: %s source: %s%s' % (file1_source_type,tmpfile1.name,file1_postfix)), + six.b(' Fields:'), + six.b(' `aa` - int'), + six.b(' `bb` - int'), + six.b(' `cc` - int'), + six.b('Table: %s%s' % (tmpfile2.name,file2_table_postfix)), + six.b(' Sources:'), + six.b(' source_type: %s source: %s%s' % (file2_source_type,tmpfile2.name,file2_postfix)), + six.b(' Fields:'), + six.b(' `aa` - int'), + six.b(' `bb` - int'), + six.b(' `cc` - int')]) + + # now test *the analysis results* of all four combinations, adding `-C read`, so the + # qsql will be used. Running with `-C none`, would have caused the qsql not to be used even if the qsql file exists + + do_check(caching_mode='read', + file1_source_type='qsql-file-with-original',file1_table_postfix='',file1_postfix='.qsql', + file2_source_type='qsql-file-with-original',file2_table_postfix='',file2_postfix='.qsql') + do_check('read', + file1_source_type='qsql-file-with-original',file1_table_postfix='',file1_postfix='.qsql', + file2_source_type='qsql-file',file2_table_postfix='.qsql',file2_postfix='.qsql') + do_check('read', + file1_source_type='qsql-file',file1_table_postfix='.qsql',file1_postfix='.qsql', + file2_source_type='qsql-file-with-original',file2_table_postfix='',file2_postfix='.qsql') + do_check('read', + file1_source_type='qsql-file',file1_table_postfix='.qsql',file1_postfix='.qsql', + file2_source_type='qsql-file',file2_table_postfix='.qsql',file2_postfix='.qsql') + + # Now test the all combinations again, this time with `-C none`, to make sure that by + # default, the qsql file is not used, and -A shows that fact + + do_check(caching_mode='none', + file1_source_type='file-with-unused-qsql',file1_table_postfix='',file1_postfix='', + file2_source_type='file-with-unused-qsql',file2_table_postfix='',file2_postfix='') + do_check('none', + file1_source_type='file-with-unused-qsql',file1_table_postfix='',file1_postfix='', + file2_source_type='qsql-file',file2_table_postfix='.qsql',file2_postfix='.qsql') + do_check('none', + file1_source_type='qsql-file',file1_table_postfix='.qsql',file1_postfix='.qsql', + file2_source_type='file-with-unused-qsql',file2_table_postfix='',file2_postfix='') + do_check('none', + file1_source_type='qsql-file',file1_table_postfix='.qsql',file1_postfix='.qsql', + file2_source_type='qsql-file',file2_table_postfix='.qsql',file2_postfix='.qsql') + + def test_mixed_qsql_with_single_table_and_csv__missing_header_parameter_for_csv(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + qsql_file_data1 = self.arrays_to_qsql_file_content([six.b('aa'), six.b('bb'), six.b('cc')], numbers1) + tmpfile1 = self.create_file_with_data(qsql_file_data1,suffix='.qsql') + + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + csv_file_data2 = self.arrays_to_csv_file_content(six.b('\t'),[six.b('aa'), six.b('bb'), six.b('cc')], numbers2) + tmpfile2 = self.create_file_with_data(csv_file_data2) + + cmd = Q_EXECUTABLE + ' -t "select sum(aa),sum(bb),sum(cc) from (select * from %s union all select * from %s)"' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 1) + self.assertEqual(e[0],six.b('Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data')) + self.assertEqual(o[0],six.b('50005055.0\t50005055.0\t50005055.0')) + + def test_qsql_with_multiple_tables_direct_use(self): + numbers1 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 10001)] + qsql_filename1 = self.create_qsql_file_with_content_and_return_filename([six.b('aa'), six.b('bb'), six.b('cc')],numbers1) + expected_stored_table_name1 = os.path.basename(qsql_filename1)[:-5] + + numbers2 = [[six.b(str(i)), six.b(str(i)), six.b(str(i))] for i in range(1, 11)] + qsql_filename2 = self.create_qsql_file_with_content_and_return_filename([six.b('aa'), six.b('bb'), six.b('cc')],numbers2) + expected_stored_table_name2 = os.path.basename(qsql_filename2)[:-5] + + qsql_with_multiple_tables = self.generate_tmpfile_name(suffix='.qsql') + + cmd = '%s -t "select sum(large_file.aa),sum(large_file.bb),sum(large_file.cc) from %s large_file left join %s small_file on (large_file.aa == small_file.bb)" -S %s' % \ + (Q_EXECUTABLE,qsql_filename1,qsql_filename2,qsql_with_multiple_tables) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 4) + self.assertEqual(e[0], six.b('Going to save data into a disk database: %s' % qsql_with_multiple_tables)) + self.assertTrue(e[1].startswith(six.b('Data has been saved into %s . Saving has taken' % qsql_with_multiple_tables))) + self.assertEqual(e[2],six.b('Query to run on the database: select sum(large_file.aa),sum(large_file.bb),sum(large_file.cc) from %s large_file left join %s small_file on (large_file.aa == small_file.bb);' % \ + (expected_stored_table_name1,expected_stored_table_name2))) + self.assertEqual(e[3],six.b('You can run the query directly from the command line using the following command: echo "select sum(large_file.aa),sum(large_file.bb),sum(large_file.cc) from %s large_file left join %s small_file on (large_file.aa == small_file.bb)" | sqlite3 %s' % \ + (expected_stored_table_name1,expected_stored_table_name2,qsql_with_multiple_tables))) + + cmd = '%s -d , "select count(*) cnt,sum(aa),sum(bb),sum(cc) from %s:::%s"' % (Q_EXECUTABLE,qsql_with_multiple_tables,expected_stored_table_name1) + r, o, e = run_command(cmd) + + self.assertEqual(r,0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('10000,50005000,50005000,50005000')) + + def test_direct_use_of_sqlite_db_with_one_table(self): + tmpfile = self.create_file_with_data(six.b(''),suffix='.sqlite') + os.remove(tmpfile.name) + c = sqlite3.connect(tmpfile.name) + c.execute(' create table mytable (x int, y int)').fetchall() + c.execute(' insert into mytable (x,y) values (100,200),(300,400)').fetchall() + c.commit() + c.close() + + cmd = Q_EXECUTABLE + ' -t "select sum(x),sum(y) from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('400\t600')) + + cmd = Q_EXECUTABLE + ' -t "select sum(x),sum(y) from %s:::mytable"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('400\t600')) + + def test_direct_use_of_sqlite_db_with_one_table__nonexistent_table(self): + tmpfile = self.create_file_with_data(six.b(''),suffix='.sqlite') + os.remove(tmpfile.name) + c = sqlite3.connect(tmpfile.name) + c.execute(' create table some_numbers (x int, y int)').fetchall() + c.execute(' insert into some_numbers (x,y) values (100,200),(300,400)').fetchall() + c.commit() + c.close() + + cmd = Q_EXECUTABLE + ' -t "select sum(x),sum(y) from %s:::non_existent"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 85) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(e[0],six.b('Table non_existent could not be found in sqlite file %s . Existing table names: some_numbers' % (tmpfile.name))) + + + def test_qsql_creation_and_direct_use(self): + numbers = [[six.b(str(i)),six.b(str(i)),six.b(str(i))] for i in range(1,10001)] + + file_data = self.arrays_to_csv_file_content(six.b('\t'),[six.b('aa'),six.b('bb'),six.b('cc')],numbers) + + tmpfile = self.create_file_with_data(file_data) + tmpfile_folder = os.path.dirname(tmpfile.name) + tmpfile_filename = os.path.basename(tmpfile.name) + expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -t "select sum(aa),sum(bb),sum(cc) from %s" -H -C readwrite' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('50005000\t50005000\t50005000')) + + self.assertTrue(os.path.exists(expected_cache_filename)) + + self.cleanup(tmpfile) + + # Get the data using a comma delimiter, to make sure that column parsing was done correctlyAdding to qcatalog table: + cmd = Q_EXECUTABLE + ' -D , "select count(*),sum(aa),sum(bb),sum(cc) from %s"' % expected_cache_filename + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('10000,50005000,50005000,50005000')) + + def test_analysis_of_qsql_direct_usage(self): + numbers = [[six.b(str(i)),six.b(str(i)),six.b(str(i))] for i in range(1,10001)] + + file_data = self.arrays_to_csv_file_content(six.b('\t'),[six.b('aa'),six.b('bb'),six.b('cc')],numbers) + + tmpfile = self.create_file_with_data(file_data) + tmpfile_folder = os.path.dirname(tmpfile.name) + tmpfile_filename = os.path.basename(tmpfile.name) + expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -t "select sum(aa),sum(bb),sum(cc) from %s" -H -C readwrite' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('50005000\t50005000\t50005000')) + + self.assertTrue(os.path.exists(expected_cache_filename)) + + self.cleanup(tmpfile) + + cmd = Q_EXECUTABLE + ' "select * from %s" -A' % expected_cache_filename + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 7) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('Table: %s' % expected_cache_filename)) + self.assertEqual(o[1],six.b(" Sources:")) + self.assertEqual(o[2],six.b(' source_type: qsql-file source: %s' % expected_cache_filename)) + self.assertEqual(o[3],six.b(" Fields:")) + self.assertEqual(o[4],six.b(' `aa` - int')) + self.assertEqual(o[5],six.b(' `bb` - int')) + self.assertEqual(o[6],six.b(' `cc` - int')) + + def test_analysis_of_qsql_direct_usage2(self): + numbers = [[six.b(str(i)),six.b(str(i)),six.b(str(i))] for i in range(1,10001)] + + file_data = self.arrays_to_csv_file_content(six.b('\t'),[six.b('aa'),six.b('bb'),six.b('cc')],numbers) + + tmpfile = self.create_file_with_data(file_data) + tmpfile_folder = os.path.dirname(tmpfile.name) + tmpfile_filename = os.path.basename(tmpfile.name) + expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -t "select sum(aa),sum(bb),sum(cc) from %s" -H -C readwrite' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('50005000\t50005000\t50005000')) + + self.assertTrue(os.path.exists(expected_cache_filename)) + + self.cleanup(tmpfile) + + cmd = Q_EXECUTABLE + ' "select * from %s" -A' % expected_cache_filename + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 7) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('Table: %s' % expected_cache_filename)) + self.assertEqual(o[1],six.b(" Sources:")) + self.assertEqual(o[2],six.b(' source_type: qsql-file source: %s' % expected_cache_filename)) + self.assertEqual(o[3],six.b(" Fields:")) + self.assertEqual(o[4],six.b(' `aa` - int')) + self.assertEqual(o[5],six.b(' `bb` - int')) + self.assertEqual(o[6],six.b(' `cc` - int')) + + def test_direct_qsql_usage_for_single_table_qsql_file(self): + disk_db_filename = self.random_tmp_filename('save-to-db','qsql') + + cmd = 'seq 1 10000 | %s -t "select sum(aa),sum(bb),sum(cc) from -" -S %s' % (Q_EXECUTABLE,disk_db_filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + + cmd = '%s -D, "select count(*),sum(c1) from %s:::data_stream_stdin"' % (Q_EXECUTABLE,disk_db_filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('10000,50005000')) + + def test_direct_qsql_usage_for_single_table_qsql_file__nonexistent_table(self): + disk_db_filename = self.random_tmp_filename('save-to-db','qsql') + + cmd = 'seq 1 10000 | %s -t "select sum(aa),sum(bb),sum(cc) from -" -S %s' % (Q_EXECUTABLE,disk_db_filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + + cmd = '%s -D, "select count(*),sum(c1) from %s:::unknown_table_name"' % (Q_EXECUTABLE,disk_db_filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 85) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + self.assertEqual(e[0],six.b('Table unknown_table_name could not be found in sqlite file %s . Existing table names: data_stream_stdin' % (disk_db_filename))) + + def test_direct_qsql_usage_from_written_data_stream(self): + disk_db_filename = self.random_tmp_filename('save-to-db','qsql') + + cmd = 'seq 1 10000 | %s -t "select sum(aa),sum(bb),sum(cc) from -" -S %s' % (Q_EXECUTABLE,disk_db_filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + + cmd = '%s -D, "select count(*),sum(c1) from %s:::data_stream_stdin"' % (Q_EXECUTABLE,disk_db_filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('10000,50005000')) + + def test_direct_qsql_self_join(self): + disk_db_filename = self.random_tmp_filename('save-to-db','qsql') + + N = 100 + cmd = 'seq 1 %s | %s -t "select count(*),sum(c1) from -" -S %s' % (N,Q_EXECUTABLE,disk_db_filename) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + + cmd = '%s -D, "select count(*),sum(a.c1),sum(b.c1) from %s:::data_stream_stdin a left join %s:::data_stream_stdin b"' % (Q_EXECUTABLE,disk_db_filename,disk_db_filename) + retcode, o, e = run_command(cmd) + + expected_sum = sum(range(1,N+1))*N + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + self.assertEqual(o[0],six.b('10000,%s,%s' % (expected_sum,expected_sum))) + + +class CachingTests(AbstractQTestCase): + + def test_cache_empty_file(self): + file_data = six.b("a,b,c") + tmpfile = self.create_file_with_data(file_data) + tmpfile_folder = os.path.dirname(tmpfile.name) + tmpfile_filename = os.path.basename(tmpfile.name) + tmpfile_expected_table_name = os.path.basename(tmpfile.name) + expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C none' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(e[0],six.b("Warning - data is empty")) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C readwrite' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(e[0],six.b("Warning - data is empty")) + + # After readwrite caching has been activated, the cache file is expected to exist + self.assertTrue(os.path.exists(expected_cache_filename)) + + # Read the cache file directly, to make sure it's a valid sqlite file + import sqlite3 + db = sqlite3.connect(expected_cache_filename) + table_list = db.execute("select content_signature_key,temp_table_name,content_signature,creation_time,source_type,source from _qcatalog where temp_table_name == '%s'" % (tmpfile_expected_table_name)).fetchall() + self.assertTrue(len(table_list) == 1) + table_metadata = table_list[0] + results = db.execute("select * from %s" % table_metadata[1]).fetchall() + self.assertTrue(len(results) == 0) + + self.cleanup(tmpfile) + + def test_reading_the_wrong_cache__original_file_having_different_data(self): + file_data1 = six.b("a,b,c\n10,20,30\n30,40,50") + + tmpfile1 = self.create_file_with_data(file_data1) + tmpfile1_folder = os.path.dirname(tmpfile1.name) + tmpfile1_filename = os.path.basename(tmpfile1.name) + expected_cache_filename = os.path.join(tmpfile1_folder,tmpfile1_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0], six.b('10')) + self.assertEqual(o[1], six.b('30')) + + # Ensure cache has been created + self.assertTrue(os.path.exists(expected_cache_filename)) + + # Overwrite the original file + file_data2 = six.b("a,b,c\n10,20,30\n30,40,50\n50,60,70") + self.write_file(tmpfile1.name,file_data2) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C read' % tmpfile1.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 81) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + self.assertEqual(e[0], six.b('%s vs %s.qsql: Content Signatures differ at inferer.rows (actual analysis data differs)' % \ + (tmpfile1.name,tmpfile1.name))) + + + def test_reading_the_wrong_cache__original_file_having_different_delimiter(self): + file_data1 = six.b("a,b,c\n10,20,30\n30,40,50") + + tmpfile1 = self.create_file_with_data(file_data1) + tmpfile1_folder = os.path.dirname(tmpfile1.name) + tmpfile1_filename = os.path.basename(tmpfile1.name) + expected_cache_filename = os.path.join(tmpfile1_folder,tmpfile1_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0], six.b('10')) + self.assertEqual(o[1], six.b('30')) + + # Ensure cache has been created + self.assertTrue(os.path.exists(expected_cache_filename)) + + # Overwrite the original file + file_data2 = six.b("a\tb\tc\n10\t20\t30\n30\t40\t50") + self.write_file(tmpfile1.name,file_data2) + + cmd = Q_EXECUTABLE + ' -H -t "select a from %s" -C read' % tmpfile1.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 80) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + x = six.b("%s vs %s.qsql: Content Signatures for table %s differ at input_delimiter (source value '\t' disk signature value ',')" % \ + (tmpfile1.name,tmpfile1.name,tmpfile1.name)) + self.assertEqual(e[0], x) + + def test_rename_cache_and_read_from_it(self): + # create a file, along with its qsql + file_data1 = six.b("a,b,c\n10,20,30\n30,40,50") + + tmpfile1 = self.create_file_with_data(file_data1) + tmpfile1_folder = os.path.dirname(tmpfile1.name) + tmpfile1_filename = os.path.basename(tmpfile1.name) + expected_cache_filename1 = os.path.join(tmpfile1_folder,tmpfile1_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0], six.b('10')) + self.assertEqual(o[1], six.b('30')) + # Ensure cache has been created + self.assertTrue(os.path.exists(expected_cache_filename1)) + + tmp_fn = self.generate_tmpfile_name("aa","qsql") + os.rename(expected_cache_filename1,tmp_fn) + + cmd = '%s "select a from %s"' % (Q_EXECUTABLE,tmp_fn) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0], six.b('10')) + self.assertEqual(o[1], six.b('30')) + + + def test_reading_the_wrong_cache__qsql_file_not_having_a_matching_content_signature(self): + # create a file, along with its qsql + file_data1 = six.b("a,b,c\n10,20,30\n30,40,50") + + tmpfile1 = self.create_file_with_data(file_data1) + tmpfile1_folder = os.path.dirname(tmpfile1.name) + tmpfile1_filename = os.path.basename(tmpfile1.name) + expected_cache_filename1 = os.path.join(tmpfile1_folder,tmpfile1_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0], six.b('10')) + self.assertEqual(o[1], six.b('30')) + # Ensure cache has been created + self.assertTrue(os.path.exists(expected_cache_filename1)) + + file_data2 = six.b("c,d,e\n10,20,30\n30,40,50") + + # create another file with a different header, along with its qsql + tmpfile2 = self.create_file_with_data(file_data2) + tmpfile2_folder = os.path.dirname(tmpfile2.name) + tmpfile2_filename = os.path.basename(tmpfile2.name) + expected_cache_filename2 = os.path.join(tmpfile2_folder,tmpfile2_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -d , "select c from %s" -C readwrite' % tmpfile2.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0], six.b('10')) + self.assertEqual(o[1], six.b('30')) + # Ensure cache has been created + self.assertTrue(os.path.exists(expected_cache_filename2)) + + # now take the second qsql file as if it was the first. Execution on file 1 should fail, since the qsql file + # does not really contain the table we're after + + os.remove(expected_cache_filename1) + os.rename(expected_cache_filename2,expected_cache_filename1) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C read' % tmpfile1.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 80) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + x = six.b("%s vs %s.qsql: Content Signatures for table %s differ at inferer.header_row (source value '['a', 'b', 'c']' disk signature value '['c', 'd', 'e']')" % (tmpfile1.name,tmpfile1.name,tmpfile1.name)) + self.assertEqual(e[0], x) + + def test_reading_the_wrong_cache__qsql_file_not_having_any_content_signature(self): + # create a file, along with its qsql + file_data1 = six.b("a,b,c\n10,20,30\n30,40,50") + + tmpfile1 = self.create_file_with_data(file_data1) + tmpfile1_folder = os.path.dirname(tmpfile1.name) + tmpfile1_filename = os.path.basename(tmpfile1.name) + expected_cache_filename1 = os.path.join(tmpfile1_folder,tmpfile1_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0], six.b('10')) + self.assertEqual(o[1], six.b('30')) + # Ensure cache has been created + self.assertTrue(os.path.exists(expected_cache_filename1)) + + file_data2 = six.b("c,d,e\n10,20,30\n30,40,50") + + # delete qcatalog content, so no entries will be available + c = sqlite3.connect(expected_cache_filename1) + c.execute('delete from _qcatalog').fetchall() + c.commit() + c.close() + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C read' % tmpfile1.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 97) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + self.assertEqual(e[0],six.b("Could not autodetect table name in qsql file. File contains no record of a table")) + + + def test_cache_full_flow(self): + file_data = six.b("a,b,c\n10,20,30\n30,40,50") + tmpfile = self.create_file_with_data(file_data) + tmpfile_folder = os.path.dirname(tmpfile.name) + tmpfile_filename = os.path.basename(tmpfile.name) + expected_tmpfile_table_name = tmpfile_filename + expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C none' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0],six.b('10')) + self.assertEqual(o[1],six.b('30')) + + # Ensure cache has not been created + self.assertTrue(not os.path.exists(expected_cache_filename)) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C read' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0],six.b('10')) + self.assertEqual(o[1],six.b('30')) + + # Ensure cache has not been created, as cache mode is "read" only + self.assertTrue(not os.path.exists(expected_cache_filename)) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C readwrite' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0],six.b('10')) + self.assertEqual(o[1],six.b('30')) + + # After readwrite caching has been activated, the cache file is expected to exist + self.assertTrue(os.path.exists(expected_cache_filename)) + + # Read the cache file directly, to make sure it's a valid sqlite file + db = sqlite3.connect(expected_cache_filename) + table_list = db.execute("select content_signature_key,temp_table_name,content_signature,creation_time,source_type,source from _qcatalog where temp_table_name == '%s'" % expected_tmpfile_table_name).fetchall() + self.assertTrue(len(table_list) == 1) + table_metadata = table_list[0] + results = db.execute("select * from %s" % table_metadata[1]).fetchall() + self.assertEqual(results[0],(10,20,30)) + self.assertEqual(results[1],(30,40,50)) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C read' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0],six.b('10')) + self.assertEqual(o[1],six.b('30')) + + # After readwrite caching has been activated, the cache file is expected to exist + self.assertTrue(os.path.exists(expected_cache_filename)) + + self.cleanup(tmpfile) + + def test_cache_full_flow_with_concatenated_files(self): + file_data1 = six.b("a,b,c\n10,11,12\n20,21,22") + tmpfile1 = self.create_file_with_data(file_data1) + tmpfile1_folder = os.path.dirname(tmpfile1.name) + tmpfile1_filename = os.path.basename(tmpfile1.name) + expected_cache_filename1 = os.path.join(tmpfile1_folder,tmpfile1_filename + '.qsql') + + file_data2 = six.b("a,b,c\n30,31,32\n40,41,42") + tmpfile2 = self.create_file_with_data(file_data2) + tmpfile2_folder = os.path.dirname(tmpfile2.name) + tmpfile2_filename = os.path.basename(tmpfile2.name) + expected_cache_filename2 = os.path.join(tmpfile2_folder,tmpfile2_filename + '.qsql') + + cmd = Q_EXECUTABLE + ' -O -H -d , "select * from (select * from %s UNION ALL select * from %s)" -C readwrite' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 5) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('a,b,c')) + self.assertEqual(o[1],six.b('10,11,12')) + self.assertEqual(o[2],six.b('20,21,22')) + self.assertEqual(o[3],six.b('30,31,32')) + self.assertEqual(o[4],six.b('40,41,42')) + + self.assertTrue(os.path.exists(expected_cache_filename1)) + self.assertTrue(os.path.exists(expected_cache_filename2)) + + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + + def test_analyze_result_with_cache_file(self): + file_data = six.b("a,b,c\n10,20,30\n30,40,50") + tmpfile = self.create_file_with_data(file_data) + tmpfile_folder = os.path.dirname(tmpfile.name) + tmpfile_filename = os.path.basename(tmpfile.name) + expected_cache_filename = os.path.join(tmpfile_folder,tmpfile_filename + '.qsql') + + # Ensure cache has not been created yet + self.assertTrue(not os.path.exists(expected_cache_filename)) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C readwrite' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + self.assertTrue(o[0],six.b('10')) + self.assertEqual(o[1],six.b('30')) + + # Ensure cache is now created + self.assertTrue(os.path.exists(expected_cache_filename)) + + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C read -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),7) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: qsql-file-with-original source: %s.qsql' % tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4],six.b(' `a` - int')) + self.assertEqual(o[5],six.b(' `b` - int')) + self.assertEqual(o[6],six.b(' `c` - int')) + + # delete the newly created cache + os.remove(expected_cache_filename) + + # Now rerun the analysis without the cache file + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C read -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),7) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s' % tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4],six.b(' `a` - int')) + self.assertEqual(o[5],six.b(' `b` - int')) + self.assertEqual(o[6],six.b(' `c` - int')) + + self.cleanup(tmpfile) + + def test_partial_caching_exists(self): + file1_data = six.b("a,b,c\n10,20,30\n30,40,50\n60,70,80") + tmpfile1 = self.create_file_with_data(file1_data) + tmpfile1_folder = os.path.dirname(tmpfile1.name) + tmpfile1_filename = os.path.basename(tmpfile1.name) + expected_cache_filename1 = os.path.join(tmpfile1_folder,tmpfile1_filename + '.qsql') + + file2_data = six.b("b,x\n10,linewith10\n20,linewith20\n30,linewith30\n40,linewith40") + tmpfile2 = self.create_file_with_data(file2_data) + tmpfile2_folder = os.path.dirname(tmpfile2.name) + tmpfile2_filename = os.path.basename(tmpfile2.name) + expected_cache_filename2 = os.path.join(tmpfile2_folder,tmpfile2_filename + '.qsql') + + # Use only first file, and cache + cmd = Q_EXECUTABLE + ' -H -d , "select a from %s" -C readwrite' % tmpfile1.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + self.assertTrue(o[0],six.b('10')) + self.assertEqual(o[1],six.b('30')) + + # Ensure cache has been created for file 1 + self.assertTrue(os.path.exists(expected_cache_filename1)) + + # Use both files with read caching, one should be read from cache, the other from the file + cmd = Q_EXECUTABLE + ' -H -d , "select file1.a,file1.b,file1.c,file2.x from %s file1 left join %s file2 on (file1.b = file2.b)" -C read' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('10,20,30,linewith20')) + self.assertEqual(o[1],six.b('30,40,50,linewith40')) + self.assertEqual(o[2],six.b('60,70,80,')) + + # Ensure cache has NOT been created for file 2 + self.assertTrue(not os.path.exists(expected_cache_filename2)) + + # Now rerun the query, this time with readwrite caching, so the second file cache will be written + cmd = Q_EXECUTABLE + ' -H -d , "select file1.a,file1.b,file1.c,file2.x from %s file1 left join %s file2 on (file1.b = file2.b)" -C readwrite' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + self.assertEqual(o[0],six.b('10,20,30,linewith20')) + self.assertEqual(o[1],six.b('30,40,50,linewith40')) + self.assertEqual(o[2],six.b('60,70,80,')) + + # Ensure cache has now been created for file 2 + self.assertTrue(os.path.exists(expected_cache_filename2)) + + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + +class UserFunctionTests(AbstractQTestCase): + def test_regexp_int_data_handling(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + + cmd = Q_EXECUTABLE + ' -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b("1")) + + self.cleanup(tmpfile) + + def test_percentile_func(self): + cmd = 'seq 1000 1999 | %s "select substr(c1,0,3),percentile(c1,0),percentile(c1,0.5),percentile(c1,1) from - group by substr(c1,0,3)" -c 1' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 10) + self.assertEqual(len(e), 0) + + output_table = [l.split(six.b(" ")) for l in o] + group_labels = [int(row[0]) for row in output_table] + minimum_values = [float(row[1]) for row in output_table] + median_values = [float(row[2]) for row in output_table] + max_values = [float(row[3]) for row in output_table] + + base_values = list(range(1000,2000,100)) + + self.assertEqual(group_labels,list(range(10,20))) + self.assertEqual(minimum_values,base_values) + self.assertEqual(median_values,list(map(lambda x: x + 49.5,base_values))) + self.assertEqual(max_values,list(map(lambda x: x + 99,base_values))) + + def test_regexp_null_data_handling(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + + cmd = Q_EXECUTABLE + ' -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b("2")) + + self.cleanup(tmpfile) + + def test_md5_function(self): + cmd = 'seq 1 4 | %s -c 1 -d , "select c1,md5(c1,\'utf-8\') from -"' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),4) + self.assertEqual(len(e),0) + + self.assertEqual(tuple(o[0].split(six.b(','),1)),(six.b('1'),six.b('c4ca4238a0b923820dcc509a6f75849b'))) + self.assertEqual(tuple(o[1].split(six.b(','),1)),(six.b('2'),six.b('c81e728d9d4c2f636f067f89cc14862c'))) + self.assertEqual(tuple(o[2].split(six.b(','),1)),(six.b('3'),six.b('eccbc87e4b5ce2fe28308fd9f2a7baf3'))) + self.assertEqual(tuple(o[3].split(six.b(','),1)),(six.b('4'),six.b('a87ff679a2f3e71d9181a67b7542122c'))) + + def test_stddev_functions(self): + tmpfile = self.create_file_with_data(six.b("\n".join(map(str,[234,354,3234,123,4234,234,634,56,65])))) + + cmd = '%s -c 1 -d , "select round(stddev_pop(c1),10),round(stddev_sample(c1),10) from %s"' % (Q_EXECUTABLE,tmpfile.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('1479.7015464838,1569.4604964764')) + + self.cleanup(tmpfile) + + def test_sqrt_function(self): + cmd = 'seq 1 5 | %s -c 1 -d , "select round(sqrt(c1),10) from -"' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),5) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('1.0')) + self.assertEqual(o[1],six.b('1.4142135624')) + self.assertEqual(o[2],six.b('1.7320508076')) + self.assertEqual(o[3],six.b('2.0')) + self.assertEqual(o[4],six.b('2.2360679775')) + + def test_power_function(self): + cmd = 'seq 1 5 | %s -c 1 -d , "select round(power(c1,2.5),10) from -"' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),5) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('1.0')) + self.assertEqual(o[1],six.b('5.6568542495')) + self.assertEqual(o[2],six.b('15.5884572681')) + self.assertEqual(o[3],six.b('32.0')) + self.assertEqual(o[4],six.b('55.9016994375')) + + def test_sha1_function(self): + cmd = 'seq 1 4 | %s -c 1 -d , "select c1,sha1(c1) from -"' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),4) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('1,356a192b7913b04c54574d18c28d46e6395428ab')) + self.assertEqual(o[1],six.b('2,da4b9237bacccdf19c0760cab7aec4a8359010b0')) + self.assertEqual(o[2],six.b('3,77de68daecd823babbb58edb1c8e14d7106e83bb')) + self.assertEqual(o[3],six.b('4,1b6453892473a467d07372d45eb05abc2031647a')) + + def test_regexp_extract_function(self): + query = """ + select + regexp_extract('was ([0-9]+) seconds and ([0-9]+) ms',c1,0), + regexp_extract('was ([0-9]+) seconds and ([0-9]+) ms',c1,1), + regexp_extract('non-existent-(regexp)',c1,0) + from + - + """ + + cmd = 'echo "Duration was 322 seconds and 240 ms" | %s -c 1 -d , "%s"' % (Q_EXECUTABLE,query) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),1) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('322,240,')) + + def test_sha_function(self): + cmd = 'seq 1 4 | %s -c 1 -d , "select c1,sha(c1,1,\'utf-8\') as sha1,sha(c1,224,\'utf-8\') as sha224,sha(c1,256,\'utf-8\') as sha256 from -"' % Q_EXECUTABLE + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),4) + self.assertEqual(len(e),0) + + self.assertEqual(o[0],six.b('1,356a192b7913b04c54574d18c28d46e6395428ab,e25388fde8290dc286a6164fa2d97e551b53498dcbf7bc378eb1f178,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b')) + self.assertEqual(o[1],six.b('2,da4b9237bacccdf19c0760cab7aec4a8359010b0,58b2aaa0bfae7acc021b3260e941117b529b2e69de878fd7d45c61a9,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35')) + self.assertEqual(o[2],six.b('3,77de68daecd823babbb58edb1c8e14d7106e83bb,4cfc3a1811fe40afa401b25ef7fa0379f1f7c1930a04f8755d678474,4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce')) + self.assertEqual(o[3],six.b('4,1b6453892473a467d07372d45eb05abc2031647a,271f93f45e9b4067327ed5c8cd30a034730aaace4382803c3e1d6c2f,4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328cb08b5531fcacdabf8a')) + + +class MultiHeaderTests(AbstractQTestCase): + def test_output_header_when_multiple_input_headers_exist(self): + TMPFILE_COUNT = 5 + tmpfiles = [self.create_file_with_data(sample_data_with_header) for x in range(TMPFILE_COUNT)] + + tmpfilenames = " UNION ALL ".join(map(lambda x:"select * from %s" % x.name, tmpfiles)) + + cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from (%s) order by name" -H -O' % tmpfilenames + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), TMPFILE_COUNT*3+1) + self.assertEqual(o[0], six.b("name,value1,value2")) + + for i in range (TMPFILE_COUNT): + self.assertEqual(o[1+i],sample_data_rows[0]) + for i in range (TMPFILE_COUNT): + self.assertEqual(o[TMPFILE_COUNT+1+i],sample_data_rows[1]) + for i in range (TMPFILE_COUNT): + self.assertEqual(o[TMPFILE_COUNT*2+1+i],sample_data_rows[2]) + + for oi in o[1:]: + self.assertTrue(six.b('name') not in oi) + + for i in range(TMPFILE_COUNT): + self.cleanup(tmpfiles[i]) + + def test_output_header_when_extra_header_column_names_are_different__concatenation_replacement(self): + tmpfile1 = self.create_file_with_data(sample_data_with_header) + tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(six.b('othername,value1,value2'))) + + cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from (select * from %s union all select * from %s) order by name" -H -O' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 7) + self.assertEqual(len(e), 0) + self.assertTrue(o, [ + six.b('name,value1,value2'), + six.b('a,1,0'), + six.b('a,1,0'), + six.b('b,2,0'), + six.b('b,2,0'), + six.b('c,,0'), + six.b('c,,0') + ]) + + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + def test_output_header_when_extra_header_has_different_number_of_columns(self): + tmpfile1 = self.create_file_with_data(sample_data_with_header) + tmpfile2 = self.create_file_with_data(generate_sample_data_with_header(six.b('name,value1'))) + + cmd = Q_EXECUTABLE + ' -d , "select name,value1,value2 from (select * from %s UNION ALL select * from %s) order by name" -H -O' % (tmpfile1.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 7) + self.assertEqual(len(e), 0) + self.assertTrue(o, [ + six.b('name,value1,value2'), + six.b('a,1,0'), + six.b('a,1,0'), + six.b('b,2,0'), + six.b('b,2,0'), + six.b('c,,0'), + six.b('c,,0') + ]) + + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + +class ParsingModeTests(AbstractQTestCase): + + def test_strict_mode_column_count_mismatch_error(self): + tmpfile = self.create_file_with_data(uneven_ls_output) + cmd = Q_EXECUTABLE + ' -m strict "select count(*) from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + + self.assertTrue(six.b("Column Count is expected to identical") in e[0]) + + self.cleanup(tmpfile) + + def test_strict_mode_too_large_specific_column_count(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , -m strict -c 4 "select count(*) from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + + self.assertEqual( + e[0], six.b("Strict mode. Column count is expected to be 4 but is 3")) + + self.cleanup(tmpfile) + + def test_strict_mode_too_small_specific_column_count(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , -m strict -c 2 "select count(*) from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + + self.assertEqual( + e[0], six.b("Strict mode. Column count is expected to be 2 but is 3")) + + self.cleanup(tmpfile) + + def test_relaxed_mode_missing_columns_in_header(self): + tmpfile = self.create_file_with_data( + sample_data_with_missing_header_names) + cmd = Q_EXECUTABLE + ' -d , -m relaxed "select count(*) from %s" -H -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 7) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0],six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s') % six.b(tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4],six.b(' `name` - text')) + self.assertEqual(o[5],six.b(' `value1` - int')) + self.assertEqual(o[6],six.b(' `c3` - int')) + + self.cleanup(tmpfile) + + def test_strict_mode_missing_columns_in_header(self): + tmpfile = self.create_file_with_data( + sample_data_with_missing_header_names) + cmd = Q_EXECUTABLE + ' -d , -m strict "select count(*) from %s" -H -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode, 0) + self.assertEqual(len(o), 0) + self.assertEqual(len(e), 1) + + self.assertEqual( + e[0], six.b('Strict mode. Header row contains less columns than expected column count(2 vs 3)')) + + self.cleanup(tmpfile) + + def test_output_delimiter_with_missing_fields(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , "select * from %s" -D ";"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b('a;1;0')) + self.assertEqual(o[1], six.b('b;2;0')) + self.assertEqual(o[2], six.b('c;;0')) + + self.cleanup(tmpfile) + + def test_handling_of_null_integers(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , "select avg(c2) from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b('1.5')) + + self.cleanup(tmpfile) + + def test_empty_integer_values_converted_to_null(self): + tmpfile = self.create_file_with_data(sample_data_no_header) + cmd = Q_EXECUTABLE + ' -d , "select * from %s where c2 is null"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b('c,,0')) + + self.cleanup(tmpfile) + + def test_empty_string_values_not_converted_to_null(self): + tmpfile = self.create_file_with_data( + sample_data_with_empty_string_no_header) + cmd = Q_EXECUTABLE + ' -d , "select * from %s where c2 == %s"' % ( + tmpfile.name, "''") + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b('c,,0')) + + self.cleanup(tmpfile) + + def test_relaxed_mode_detected_columns(self): + tmpfile = self.create_file_with_data(uneven_ls_output) + cmd = Q_EXECUTABLE + ' -m relaxed "select count(*) from %s" -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + + column_rows = o[o.index(six.b(' Fields:'))+1:] + + self.assertEqual(len(column_rows), 11) + + column_tuples = [x.strip().split(six.b(" ")) for x in column_rows] + column_info = [(x[0], x[2]) for x in column_tuples] + column_names = [x[0] for x in column_tuples] + column_types = [x[2] for x in column_tuples] + + self.assertEqual(column_names, [six.b('`c{}`'.format(x)) for x in range(1, 12)]) + self.assertEqual(column_types, list(map(lambda x:six.b(x),[ + 'text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text', 'text', 'text']))) + + self.cleanup(tmpfile) + + def test_relaxed_mode_detected_columns_with_specific_column_count(self): + tmpfile = self.create_file_with_data(uneven_ls_output) + cmd = Q_EXECUTABLE + ' -m relaxed "select count(*) from %s" -A -c 9' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + + column_rows = o[o.index(six.b(' Fields:'))+1:] + + self.assertEqual(len(column_rows), 9) + + column_tuples = [x.strip().split(six.b(" ")) for x in column_rows] + column_info = [(x[0], x[2]) for x in column_tuples] + column_names = [x[0] for x in column_tuples] + column_types = [x[2] for x in column_tuples] + + self.assertEqual(column_names, [six.b('`c{}`'.format(x)) for x in range(1, 10)]) + self.assertEqual( + column_types, list(map(lambda x:six.b(x),['text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text']))) + + self.cleanup(tmpfile) + + def test_relaxed_mode_last_column_data_with_specific_column_count(self): + tmpfile = self.create_file_with_data(uneven_ls_output) + cmd = Q_EXECUTABLE + ' -m relaxed "select c9 from %s" -c 9' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 9) + self.assertEqual(len(e), 0) + + expected_output = list(map(lambda x:six.b(x),["/selinux", "/mnt", "/srv", "/lost+found", '"/initrd.img.old -> /boot/initrd.img-3.8.0-19-generic"', + "/cdrom", "/home", '"/vmlinuz -> boot/vmlinuz-3.8.0-19-generic"', '"/initrd.img -> boot/initrd.img-3.8.0-19-generic"'])) + + self.assertEqual(o, expected_output) + + self.cleanup(tmpfile) + + def test_1_column_warning_in_relaxed_mode(self): + tmpfile = self.create_file_with_data(one_column_data) + cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d ,' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 1) + self.assertEqual(len(o),2) + + self.assertEqual(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) + self.assertEqual(o[0],six.b('data without commas 1')) + self.assertEqual(o[1],six.b('data without commas 2')) + + self.cleanup(tmpfile) + + def test_1_column_warning_in_strict_mode(self): + tmpfile = self.create_file_with_data(one_column_data) + cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m strict' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 1) + self.assertEqual(len(o),2) + + self.assertEqual(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) + self.assertEqual(o[0],six.b('data without commas 1')) + self.assertEqual(o[1],six.b('data without commas 2')) + + self.cleanup(tmpfile) + + + def test_1_column_warning_suppression_in_relaxed_mode_when_column_count_is_specific(self): + tmpfile = self.create_file_with_data(one_column_data) + cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m relaxed -c 1' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('data without commas 1')) + self.assertEqual(o[1],six.b('data without commas 2')) + + self.cleanup(tmpfile) + + def test_1_column_warning_suppression_in_strict_mode_when_column_count_is_specific(self): + tmpfile = self.create_file_with_data(one_column_data) + cmd = Q_EXECUTABLE + ' -m relaxed "select c1 from %s" -d , -m strict -c 1' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o),2) + + self.assertEqual(o[0],six.b('data without commas 1')) + self.assertEqual(o[1],six.b('data without commas 2')) + + self.cleanup(tmpfile) + + def test_fluffy_mode__as_relaxed_mode(self): + tmpfile = self.create_file_with_data(uneven_ls_output) + cmd = Q_EXECUTABLE + ' -m relaxed "select c9 from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 9) + self.assertEqual(len(e), 0) + + expected_output = list(map(lambda x:six.b(x),["/selinux", "/mnt", "/srv", "/lost+found", + "/initrd.img.old", "/cdrom", "/home", "/vmlinuz", "/initrd.img"])) + + self.assertEqual(o, expected_output) + + self.cleanup(tmpfile) + + def test_relaxed_mode_column_count_mismatch__was_previously_fluffy_mode_test(self): + data_row = six.b("column1 column2 column3 column4") + data_list = [data_row] * 1000 + data_list[950] = six.b("column1 column2 column3 column4 column5") + tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) + + cmd = Q_EXECUTABLE + ' -m relaxed "select * from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),1000) + self.assertEqual(len(e),0) + self.assertEqual(o[950],six.b('column1 column2 column3 "column4 column5"')) + + self.cleanup(tmpfile) + + def test_strict_mode_column_count_mismatch__less_columns(self): + data_row = six.b("column1 column2 column3 column4") + data_list = [data_row] * 1000 + data_list[750] = six.b("column1 column3 column4") + tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) + + cmd = Q_EXECUTABLE + ' -m strict "select * from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode,0) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + self.assertTrue(e[0].startswith(six.b("Strict mode - Expected 4 columns instead of 3 columns"))) + self.assertTrue(six.b(' row 751.') in e[0]) + + self.cleanup(tmpfile) + + def test_strict_mode_column_count_mismatch__more_columns(self): + data_row = six.b("column1 column2 column3 column4") + data_list = [data_row] * 1000 + data_list[750] = six.b("column1 column2 column3 column4 column5") + tmpfile = self.create_file_with_data(six.b("\n").join(data_list)) + + cmd = Q_EXECUTABLE + ' -m strict "select * from %s"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertNotEqual(retcode,0) + self.assertEqual(len(o),0) + self.assertEqual(len(e),1) + self.assertTrue(e[0].startswith(six.b("Strict mode - Expected 4 columns instead of 5 columns"))) + self.assertTrue(six.b(' row 751.') in e[0]) + + self.cleanup(tmpfile) + + +class FormattingTests(AbstractQTestCase): + + def test_column_formatting(self): + # TODO Decide if this breaking change is reasonable + #cmd = 'seq 1 10 | ' + Q_EXECUTABLE + ' -f 1=%4.3f,2=%4.3f "select sum(c1),avg(c1) from -" -c 1' + cmd = 'seq 1 10 | ' + Q_EXECUTABLE + ' -f 1={:4.3f},2={:4.3f} "select sum(c1),avg(c1) from -" -c 1' + + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 1) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b('55.000 5.500')) + + def test_column_formatting_with_output_header(self): + perl_regex = "'s/1\n/column_name\n1\n/;'" + # TODO Decide if this breaking change is reasonable + #cmd = 'seq 1 10 | perl -pe ' + perl_regex + ' | ' + Q_EXECUTABLE + ' -f 1=%4.3f,2=%4.3f "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' + cmd = 'seq 1 10 | LANG=C perl -pe ' + perl_regex + ' | ' + Q_EXECUTABLE + ' -f 1={:4.3f},2={:4.3f} "select sum(column_name) mysum,avg(column_name) myavg from -" -c 1 -H -O' + + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b('mysum myavg')) + self.assertEqual(o[1], six.b('55.000 5.500')) + + def py3_test_successfuly_parse_universal_newlines_without_explicit_flag(self): + def list_as_byte_list(l): + return list(map(lambda x:six.b(x),l)) + + expected_output = list(map(lambda x:list_as_byte_list(x),[['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-May-07', '6850000', 'USD', 'b'], + ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Oct-06', '6000000', 'USD', 'a'], + ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Jan-08', '25000000', 'USD', 'c'], + ['mycityfaces', 'MyCityFaces', '7', 'web', 'Scottsdale', 'AZ', '1-Jan-08', '50000', 'USD', 'seed'], + ['flypaper', 'Flypaper', '', 'web', 'Phoenix', 'AZ', '1-Feb-08', '3000000', 'USD', 'a'], + ['infusionsoft', 'Infusionsoft', '105', 'software', 'Gilbert', 'AZ', '1-Oct-07', '9000000', 'USD', 'a']])) + + data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') + tmp_data_file = self.create_file_with_data(data) + + cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 6) + + actual_output = list(map(lambda row: row.split(six.b(",")),o)) + + self.assertEqual(actual_output,expected_output) + + self.cleanup(tmp_data_file) + + test_parsing_universal_newlines_without_explicit_flag = py3_test_successfuly_parse_universal_newlines_without_explicit_flag + + def test_universal_newlines_parsing_flag(self): + def list_as_byte_list(l): + return list(map(lambda x:six.b(x),l)) + + expected_output = list(map(lambda x:list_as_byte_list(x),[['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-May-07', '6850000', 'USD', 'b'], + ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Oct-06', '6000000', 'USD', 'a'], + ['lifelock', 'LifeLock', '', 'web', 'Tempe', 'AZ', '1-Jan-08', '25000000', 'USD', 'c'], + ['mycityfaces', 'MyCityFaces', '7', 'web', 'Scottsdale', 'AZ', '1-Jan-08', '50000', 'USD', 'seed'], + ['flypaper', 'Flypaper', '', 'web', 'Phoenix', 'AZ', '1-Feb-08', '3000000', 'USD', 'a'], + ['infusionsoft', 'Infusionsoft', '105', 'software', 'Gilbert', 'AZ', '1-Oct-07', '9000000', 'USD', 'a']])) + + data = six.b('permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round\rlifelock,LifeLock,,web,Tempe,AZ,1-May-07,6850000,USD,b\rlifelock,LifeLock,,web,Tempe,AZ,1-Oct-06,6000000,USD,a\rlifelock,LifeLock,,web,Tempe,AZ,1-Jan-08,25000000,USD,c\rmycityfaces,MyCityFaces,7,web,Scottsdale,AZ,1-Jan-08,50000,USD,seed\rflypaper,Flypaper,,web,Phoenix,AZ,1-Feb-08,3000000,USD,a\rinfusionsoft,Infusionsoft,105,software,Gilbert,AZ,1-Oct-07,9000000,USD,a') + tmp_data_file = self.create_file_with_data(data) + + cmd = Q_EXECUTABLE + ' -d , -H -U "select permalink,company,numEmps,category,city,state,fundedDate,raisedAmt,raisedCurrency,round from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + + if len(e) == 2 or len(e) == 1: + # In python 3.7, there's a deprecation warning for the 'U' file opening mode, which is ok for now + self.assertIn(len(e), [1,2]) + self.assertTrue(b"DeprecationWarning: 'U' mode is deprecated" in e[0]) + elif len(e) != 0: + # Nothing should be output to stderr in other versions + self.assertTrue(False,msg='Unidentified output in stderr') + + self.assertEqual(len(o), 6) + + actual_output = list(map(lambda row: row.split(six.b(",")),o)) + + self.assertEqual(actual_output,expected_output) + + self.cleanup(tmp_data_file) + + + +class SqlTests(AbstractQTestCase): + + def test_find_example(self): + tmpfile = self.create_file_with_data(find_output) + cmd = Q_EXECUTABLE + ' "select c5,c6,sum(c7)/1024.0/1024 as total from %s group by c5,c6 order by total desc"' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + self.assertEqual(len(e), 0) + + self.assertEqual(o[0], six.b('mapred mapred 0.9389581680297852')) + self.assertEqual(o[1], six.b('root root 0.02734375')) + self.assertEqual(o[2], six.b('harel harel 0.010888099670410156')) + + self.cleanup(tmpfile) + + def test_join_example(self): + cmd = Q_EXECUTABLE + ' "select myfiles.c8,emails.c2 from {0}/exampledatafile myfiles join {0}/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'.format(EXAMPLES) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 2) + + self.assertEqual(o[0], six.b('ppp dip.1@otherdomain.com')) + self.assertEqual(o[1], six.b('ppp dip.2@otherdomain.com')) + + def test_join_example_with_output_header(self): + cmd = Q_EXECUTABLE + ' -O "select myfiles.c8 aaa,emails.c2 bbb from {0}/exampledatafile myfiles join {0}/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'.format(EXAMPLES) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o), 3) + + self.assertEqual(o[0], six.b('aaa bbb')) + self.assertEqual(o[1], six.b('ppp dip.1@otherdomain.com')) + self.assertEqual(o[2], six.b('ppp dip.2@otherdomain.com')) + + def test_self_join1(self): + tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) + cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)"' % (tmpfile.name,tmpfile.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 10) + + self.cleanup(tmpfile) + + def test_self_join_reuses_table(self): + tmpfile = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) + cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)" -A' % (tmpfile.name,tmpfile.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 6) + + self.assertEqual(o[0],six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s') % six.b(tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4],six.b(' `c1` - int')) + self.assertEqual(o[5],six.b(' `c2` - int')) + + self.cleanup(tmpfile) + + def test_self_join2(self): + tmpfile1 = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) + cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c2 = a2.c2)"' % (tmpfile1.name,tmpfile1.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 10*10) + + self.cleanup(tmpfile1) + + tmpfile2 = self.create_file_with_data(six.b("\n").join([six.b("{} 9000".format(i)) for i in range(0,10)])) + cmd = Q_EXECUTABLE + ' "select * from %s a1 join %s a2 on (a1.c2 = a2.c2) join %s a3 on (a1.c2 = a3.c2)"' % (tmpfile2.name,tmpfile2.name,tmpfile2.name) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 10*10*10) + + self.cleanup(tmpfile2) + + def test_disable_column_type_detection(self): + tmpfile = self.create_file_with_data(six.b('''regular_text,text_with_digits1,text_with_digits2,float_number +"regular text 1",67,"67",12.3 +"regular text 2",067,"067",22.3 +"regular text 3",123,"123",33.4 +"regular text 4",-123,"-123",0122.2 +''')) + + # Check original column type detection + cmd = Q_EXECUTABLE + ' -A -d , -H "select * from %s"' % (tmpfile.name) + + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 8) + + self.assertEqual(o[0],six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1], six.b(' Sources:')) + self.assertEqual(o[2], six.b(' source_type: file source: %s') % six.b(tmpfile.name)) + self.assertEqual(o[3], six.b(' Fields:')) + self.assertEqual(o[4], six.b(' `regular_text` - text')) + self.assertEqual(o[5], six.b(' `text_with_digits1` - int')) + self.assertEqual(o[6], six.b(' `text_with_digits2` - int')) + self.assertEqual(o[7], six.b(' `float_number` - float')) + + # Check column types detected when actual detection is disabled + cmd = Q_EXECUTABLE + ' -A -d , -H --as-text "select * from %s"' % (tmpfile.name) + + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 8) + + self.assertEqual(o[0],six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s') % six.b(tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4],six.b(' `regular_text` - text')) + self.assertEqual(o[5],six.b(' `text_with_digits1` - text')) + self.assertEqual(o[6],six.b(' `text_with_digits2` - text')) + self.assertEqual(o[7],six.b(' `float_number` - text')) + + # Get actual data with regular detection + cmd = Q_EXECUTABLE + ' -d , -H "select * from %s"' % (tmpfile.name) + + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 4) + + self.assertEqual(o[0],six.b("regular text 1,67,67,12.3")) + self.assertEqual(o[1],six.b("regular text 2,67,67,22.3")) + self.assertEqual(o[2],six.b("regular text 3,123,123,33.4")) + self.assertEqual(o[3],six.b("regular text 4,-123,-123,122.2")) + + # Get actual data without detection + cmd = Q_EXECUTABLE + ' -d , -H --as-text "select * from %s"' % (tmpfile.name) + + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(e), 0) + self.assertEqual(len(o), 4) + + self.assertEqual(o[0],six.b("regular text 1,67,67,12.3")) + self.assertEqual(o[1],six.b("regular text 2,067,067,22.3")) + self.assertEqual(o[2],six.b("regular text 3,123,123,33.4")) + self.assertEqual(o[3],six.b("regular text 4,-123,-123,0122.2")) + + self.cleanup(tmpfile) + + +class BasicModuleTests(AbstractQTestCase): + + def test_engine_isolation(self): + tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + tmpfile2 = self.create_file_with_data(six.b("d e f\n10 20 30\n40 50 60")) + + # Run file 1 on engine 1 + q1 = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) + r = q1.execute('select * from %s' % tmpfile1.name) + print("QueryQuery",file=sys.stdout) + + self.assertTrue(r.status == 'ok') + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),2) + self.assertEqual(r.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r.data,[(1,2,3),(4,5,6)]) + self.assertTrue(tmpfile1.name in r.metadata.table_structures) + self.assertTrue(tmpfile1.name in r.metadata.new_table_structures) + self.assertEqual(r.metadata.table_structures[tmpfile1.name].atomic_fns,[tmpfile1.name]) + self.assertEqual(r.metadata.table_structures[tmpfile1.name].source_type,'file') + self.assertEqual(r.metadata.table_structures[tmpfile1.name].source,tmpfile1.name) + + # run file 1 on engine 2 + q2 = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) + r2 = q2.execute('select * from %s' % tmpfile1.name) + print("QueryQuery",file=sys.stdout) + + self.assertTrue(r2.status == 'ok') + self.assertEqual(len(r2.warnings),0) + self.assertEqual(len(r2.data),2) + self.assertEqual(r2.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r2.data,[(1,2,3),(4,5,6)]) + self.assertTrue(tmpfile1.name in r2.metadata.table_structures) + self.assertTrue(tmpfile1.name in r2.metadata.new_table_structures) + self.assertEqual(r2.metadata.table_structures[tmpfile1.name].atomic_fns,[tmpfile1.name]) + self.assertEqual(r2.metadata.table_structures[tmpfile1.name].source_type,'file') + self.assertEqual(r2.metadata.table_structures[tmpfile1.name].source,tmpfile1.name) + + # run file 2 on engine 1 + r3 = q1.execute('select * from %s' % tmpfile2.name) + print("QueryQuery",file=sys.stdout) + + print(r3) + self.assertTrue(r3.status == 'ok') + self.assertEqual(len(r3.warnings),0) + self.assertEqual(len(r3.data),2) + self.assertEqual(r3.metadata.output_column_name_list,['d','e','f']) + self.assertEqual(r3.data,[(10,20,30),(40,50,60)]) + self.assertTrue(tmpfile2.name in r3.metadata.table_structures) + self.assertTrue(tmpfile2.name in r3.metadata.new_table_structures) + self.assertEqual(r3.metadata.table_structures[tmpfile2.name].atomic_fns,[tmpfile2.name]) + self.assertEqual(r3.metadata.table_structures[tmpfile2.name].source,tmpfile2.name) + self.assertEqual(r3.metadata.table_structures[tmpfile2.name].source_type,'file') + + q1.done() + q2.done() + + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + def test_simple_query(self): + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + + q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) + r = q.execute('select * from %s' % tmpfile.name) + + self.assertTrue(r.status == 'ok') + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),2) + self.assertEqual(r.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r.data,[(1,2,3),(4,5,6)]) + self.assertTrue(tmpfile.name in r.metadata.table_structures) + self.assertTrue(tmpfile.name in r.metadata.new_table_structures) + self.assertEqual(r.metadata.table_structures[tmpfile.name].atomic_fns,[tmpfile.name]) + self.assertEqual(r.metadata.table_structures[tmpfile.name].source_type,'file') + self.assertEqual(r.metadata.table_structures[tmpfile.name].source,tmpfile.name) + + q.done() + self.cleanup(tmpfile) + + def test_loaded_data_reuse(self): + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + + q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) + r1 = q.execute('select * from %s' % tmpfile.name) + + r2 = q.execute('select * from %s' % tmpfile.name) + + self.assertTrue(r1.status == 'ok') + self.assertEqual(len(r1.warnings),0) + self.assertEqual(len(r1.data),2) + self.assertEqual(r1.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r1.data,[(1,2,3),(4,5,6)]) + self.assertTrue(tmpfile.name in r1.metadata.table_structures) + self.assertTrue(tmpfile.name in r1.metadata.new_table_structures) + self.assertEqual(r1.metadata.table_structures[tmpfile.name].atomic_fns,[tmpfile.name]) + self.assertEqual(r1.metadata.table_structures[tmpfile.name].source_type,'file') + self.assertEqual(r1.metadata.table_structures[tmpfile.name].source,tmpfile.name) + + self.assertTrue(r2.status == 'ok') + self.assertTrue(tmpfile.name in r2.metadata.table_structures) + self.assertTrue(tmpfile.name not in r2.metadata.new_table_structures) + self.assertEqual(r2.data,r1.data) + self.assertEqual(r2.metadata.output_column_name_list,r2.metadata.output_column_name_list) + self.assertEqual(len(r2.warnings),0) + + q.done() + + self.cleanup(tmpfile) + + def test_stdin_injection(self): + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + + data_streams_dict = { + '-': DataStream('stdin','-',codecs.open(tmpfile.name,'rb',encoding='utf-8')) + } + q = QTextAsData(QInputParams(skip_header=True,delimiter=' '),data_streams_dict=data_streams_dict) + r = q.execute('select * from -') + + self.assertTrue(r.status == 'ok') + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),2) + self.assertEqual(r.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r.data,[(1,2,3),(4,5,6)]) + self.assertEqual(r.metadata.new_table_structures['-'],r.metadata.table_structures['-']) + self.assertEqual(r.metadata.table_structures['-'].column_names,['a','b','c']) + self.assertEqual(r.metadata.table_structures['-'].python_column_types,[int,int,int]) + self.assertEqual(r.metadata.table_structures['-'].sqlite_column_types,['int','int','int']) + self.assertEqual(r.metadata.table_structures['-'].source_type,'data-stream') + self.assertEqual(r.metadata.table_structures['-'].source,'stdin') + + q.done() + self.cleanup(tmpfile) + + def test_named_stdin_injection(self): + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + + data_streams_dict = { + 'my_stdin_data': DataStream('my_stdin_data','my_stdin_data',codecs.open(tmpfile.name,'rb',encoding='utf-8')) + } + + q = QTextAsData(QInputParams(skip_header=True,delimiter=' '),data_streams_dict=data_streams_dict) + r = q.execute('select a from my_stdin_data') + + self.assertTrue(r.status == 'ok') + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),2) + self.assertEqual(r.metadata.output_column_name_list,['a']) + self.assertEqual(r.data,[(1,),(4,)]) + self.assertTrue('my_stdin_data' in r.metadata.table_structures) + self.assertTrue('my_stdin_data' in r.metadata.new_table_structures) + self.assertEqual(r.metadata.table_structures['my_stdin_data'].qtable_name,'my_stdin_data') + + q.done() + self.cleanup(tmpfile) + + def test_data_stream_isolation(self): + tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + tmpfile2 = self.create_file_with_data(six.b("d e f\n7 8 9\n10 11 12")) + + data_streams_dict = { + 'a-': DataStream('a-','a-',codecs.open(tmpfile1.name, 'rb', encoding='utf-8')), + 'b-': DataStream('b-','b-',codecs.open(tmpfile2.name, 'rb', encoding='utf-8')) + } + + q = QTextAsData(QInputParams(skip_header=True,delimiter=' '),data_streams_dict=data_streams_dict) + r1 = q.execute('select * from a-') + + self.assertTrue(r1.status == 'ok') + self.assertEqual(len(r1.warnings),0) + self.assertEqual(len(r1.data),2) + self.assertEqual(r1.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r1.data,[(1,2,3),(4,5,6)]) + self.assertTrue('a-' in r1.metadata.table_structures) + self.assertEqual(len(r1.metadata.table_structures),1) + self.assertEqual(r1.metadata.table_structures['a-'].source_type, 'data-stream') + self.assertEqual(r1.metadata.table_structures['a-'].source, 'a-') + self.assertEqual(r1.metadata.table_structures['a-'].column_names, ['a','b','c']) + self.assertEqual(r1.metadata.table_structures['a-'].python_column_types, [int,int,int]) + self.assertEqual(r1.metadata.table_structures['a-'].sqlite_column_types, ['int','int','int']) + + r2 = q.execute('select * from b-') + + self.assertTrue(r2.status == 'ok') + self.assertEqual(len(r2.warnings),0) + self.assertEqual(len(r2.data),2) + self.assertEqual(r2.metadata.output_column_name_list,['d','e','f']) + self.assertEqual(r2.data,[(7,8,9),(10,11,12)]) + + self.assertEqual(len(r1.metadata.table_structures),2) + self.assertTrue('b-' in r1.metadata.table_structures) + self.assertEqual(r1.metadata.table_structures['b-'].source_type, 'data-stream') + self.assertEqual(r1.metadata.table_structures['b-'].source, 'b-') + self.assertEqual(r1.metadata.table_structures['b-'].column_names, ['d','e','f']) + self.assertEqual(r1.metadata.table_structures['b-'].python_column_types, [int,int,int]) + self.assertEqual(r1.metadata.table_structures['b-'].sqlite_column_types, ['int','int','int']) + + q.done() + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + def test_multiple_stdin_injection(self): + tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + tmpfile2 = self.create_file_with_data(six.b("d e f\n7 8 9\n10 11 12")) + + data_streams_dict = { + 'my_stdin_data1': DataStream('my_stdin_data1','my_stdin_data1',codecs.open(tmpfile1.name,'rb',encoding='utf-8')), + 'my_stdin_data2': DataStream('my_stdin_data2','my_stdin_data2',codecs.open(tmpfile2.name,'rb',encoding='utf-8')) + } + q = QTextAsData(QInputParams(skip_header=True,delimiter=' '),data_streams_dict=data_streams_dict) + r1 = q.execute('select * from my_stdin_data1') + + self.assertTrue(r1.status == 'ok') + self.assertEqual(len(r1.warnings),0) + self.assertEqual(len(r1.data),2) + self.assertEqual(r1.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r1.data,[(1,2,3),(4,5,6)]) + self.assertTrue('my_stdin_data1' in r1.metadata.table_structures) + self.assertTrue('my_stdin_data1' in r1.metadata.new_table_structures) + self.assertEqual(r1.metadata.table_structures['my_stdin_data1'].qtable_name,'my_stdin_data1') + + r2 = q.execute('select * from my_stdin_data2') + + self.assertTrue(r2.status == 'ok') + self.assertEqual(len(r2.warnings),0) + self.assertEqual(len(r2.data),2) + self.assertEqual(r2.metadata.output_column_name_list,['d','e','f']) + self.assertEqual(r2.data,[(7,8,9),(10,11,12)]) + # There should be another data load, even though it's the same 'filename' as before + self.assertTrue('my_stdin_data2' in r2.metadata.table_structures) + self.assertTrue('my_stdin_data2' in r2.metadata.new_table_structures) + self.assertEqual(r2.metadata.table_structures['my_stdin_data2'].qtable_name,'my_stdin_data2') + + r3 = q.execute('select aa.*,bb.* from my_stdin_data1 aa join my_stdin_data2 bb') + + self.assertTrue(r3.status == 'ok') + self.assertEqual(len(r3.warnings),0) + self.assertEqual(len(r3.data),4) + self.assertEqual(r3.metadata.output_column_name_list,['a','b','c','d','e','f']) + self.assertEqual(r3.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) + self.assertTrue('my_stdin_data1' in r3.metadata.table_structures) + self.assertTrue('my_stdin_data1' not in r3.metadata.new_table_structures) + + q.done() + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + def test_different_input_params_for_different_files(self): + tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + tmpfile2 = self.create_file_with_data(six.b("7\t8\t9\n10\t11\t12")) + + q = QTextAsData(QInputParams(skip_header=True,delimiter=' ')) + + q.load_data(tmpfile1.name,QInputParams(skip_header=True,delimiter=' ')) + q.load_data(tmpfile2.name,QInputParams(skip_header=False,delimiter='\t')) + + r = q.execute('select aa.*,bb.* from %s aa join %s bb' % (tmpfile1.name,tmpfile2.name)) + + self.assertTrue(r.status == 'ok') + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),4) + self.assertEqual(r.metadata.output_column_name_list,['a','b','c','c1','c2','c3']) + self.assertEqual(r.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) + self.assertTrue(tmpfile1.name not in r.metadata.new_table_structures) + self.assertTrue(tmpfile2.name not in r.metadata.new_table_structures) + + q.done() + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + def test_different_input_params_for_different_files_2(self): + tmpfile1 = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + tmpfile2 = self.create_file_with_data(six.b("7\t8\t9\n10\t11\t12")) + + q = QTextAsData() + + q.load_data(tmpfile1.name,QInputParams(skip_header=True,delimiter=' ')) + q.load_data(tmpfile2.name,QInputParams(skip_header=False,delimiter='\t')) + + r = q.execute('select aa.*,bb.* from %s aa join %s bb' % (tmpfile1.name,tmpfile2.name)) + + self.assertTrue(r.status == 'ok') + self.assertEqual(len(r.warnings),0) + self.assertEqual(len(r.data),4) + self.assertEqual(r.metadata.output_column_name_list,['a','b','c','c1','c2','c3']) + self.assertEqual(r.data,[(1,2,3,7,8,9),(1,2,3,10,11,12),(4,5,6,7,8,9),(4,5,6,10,11,12)]) + self.assertTrue(tmpfile1.name not in r.metadata.new_table_structures) + self.assertTrue(tmpfile2.name not in r.metadata.new_table_structures) + + q.done() + self.cleanup(tmpfile1) + self.cleanup(tmpfile2) + + def test_input_params_override(self): + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + + default_input_params = QInputParams() + + for k in default_input_params.__dict__.keys(): + setattr(default_input_params,k,'GARBAGE') + + q = QTextAsData(default_input_params) + + r = q.execute('select * from %s' % tmpfile.name) + + self.assertTrue(r.status == 'error') + + overwriting_input_params = QInputParams(skip_header=True,delimiter=' ') + + r2 = q.execute('select * from %s' % tmpfile.name,input_params=overwriting_input_params) + + self.assertTrue(r2.status == 'ok') + self.assertEqual(len(r2.warnings),0) + self.assertEqual(len(r2.data),2) + self.assertEqual(r2.metadata.output_column_name_list,['a','b','c']) + self.assertEqual(r2.data,[(1,2,3),(4,5,6)]) + self.assertTrue(tmpfile.name in r2.metadata.table_structures) + self.assertTrue(tmpfile.name in r2.metadata.new_table_structures) + self.assertEqual(r2.metadata.table_structures[tmpfile.name].atomic_fns,[tmpfile.name]) + self.assertEqual(r2.metadata.table_structures[tmpfile.name].source,tmpfile.name) + self.assertEqual(r2.metadata.table_structures[tmpfile.name].source_type,'file') + + q.done() + self.cleanup(tmpfile) + + def test_input_params_merge(self): + input_params = QInputParams() + + for k in input_params.__dict__.keys(): + setattr(input_params,k,'GARBAGE') + + merged_input_params = input_params.merged_with(QInputParams()) + + for k in merged_input_params.__dict__.keys(): + self.assertTrue(getattr(merged_input_params,k) != 'GARBAGE') + + for k in input_params.__dict__.keys(): + self.assertTrue(getattr(merged_input_params,k) != 'GARBAGE') + + def test_table_analysis_with_syntax_error(self): + + q = QTextAsData() + + q_output = q.analyze("bad syntax") + + q.done() + self.assertTrue(q_output.status == 'error') + self.assertTrue(q_output.error.msg.startswith('query error')) + + def test_execute_response(self): + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + + q = QTextAsData() + + q_output = q.execute("select a,c from %s" % tmpfile.name,QInputParams(skip_header=True)) + + self.assertTrue(q_output.status == 'ok') + self.assertTrue(q_output.error is None) + self.assertEqual(len(q_output.warnings),0) + self.assertEqual(len(q_output.data),2) + self.assertEqual(q_output.data,[ (1,3),(4,6) ]) + self.assertTrue(q_output.metadata is not None) + + metadata = q_output.metadata + + self.assertEqual(metadata.output_column_name_list, [ 'a','c']) + self.assertTrue(tmpfile.name in metadata.new_table_structures) + self.assertEqual(len(metadata.table_structures),1) + + table_structure = metadata.new_table_structures[tmpfile.name] + + self.assertEqual(table_structure.column_names,[ 'a','b','c']) + self.assertEqual(table_structure.python_column_types,[ int,int,int]) + self.assertEqual(table_structure.sqlite_column_types,[ 'int','int','int']) + self.assertEqual(table_structure.qtable_name, tmpfile.name) + self.assertEqual(table_structure.atomic_fns,[tmpfile.name]) + self.assertEqual(table_structure.source_type,'file') + self.assertEqual(table_structure.source,tmpfile.name) + + q.done() + self.cleanup(tmpfile) + + def test_analyze_response(self): + tmpfile = self.create_file_with_data(six.b("a b c\n1 2 3\n4 5 6")) + + q = QTextAsData() + + q_output = q.analyze("select a,c from %s" % tmpfile.name,QInputParams(skip_header=True)) + + self.assertTrue(q_output.status == 'ok') + self.assertTrue(q_output.error is None) + self.assertEqual(len(q_output.warnings),0) + self.assertEqual(len(q_output.data),2) + self.assertEqual(q_output.data,[ (1,3),(4,6) ]) + self.assertTrue(q_output.metadata is not None) + + metadata = q_output.metadata + + self.assertEqual(metadata.output_column_name_list, [ 'a','c']) + self.assertEqual(len(metadata.table_structures),1) + self.assertTrue(tmpfile.name in metadata.new_table_structures) + + table_structure = metadata.table_structures[tmpfile.name] + + self.assertEqual(table_structure.column_names,[ 'a','b','c']) + self.assertEqual(table_structure.python_column_types,[ int,int,int]) + self.assertEqual(table_structure.sqlite_column_types,[ 'int','int','int']) + self.assertEqual(table_structure.qtable_name, tmpfile.name) + self.assertEqual(table_structure.atomic_fns,[tmpfile.name]) + self.assertEqual(table_structure.source_type,'file') + self.assertEqual(table_structure.source,tmpfile.name) + + q.done() + self.cleanup(tmpfile) + + def test_load_data_from_string_without_previous_data_load(self): + input_str = six.u('column1,column2,column3\n') + six.u('\n').join([six.u('value1,2.5,value3')] * 1000) + + + data_streams_dict = { + 'my_data': DataStream('my_data_stream_id','my_data',six.StringIO(input_str)) + } + q = QTextAsData(default_input_params=QInputParams(skip_header=True,delimiter=','),data_streams_dict=data_streams_dict) + + q_output = q.execute('select column2,column3 from my_data') + + self.assertTrue(q_output.status == 'ok') + self.assertTrue(q_output.error is None) + self.assertEqual(len(q_output.warnings),0) + self.assertTrue(len(q_output.data),1000) + self.assertEqual(len(set(q_output.data)),1) + self.assertEqual(list(set(q_output.data))[0],(2.5,'value3')) + + metadata = q_output.metadata + + self.assertTrue(metadata.output_column_name_list,['column2','column3']) + self.assertTrue('my_data' in metadata.new_table_structures) + self.assertEqual(len(metadata.table_structures),1) + + table_structure = metadata.table_structures['my_data'] + + self.assertEqual(table_structure.column_names,['column1','column2','column3']) + self.assertEqual(table_structure.sqlite_column_types,['text','float','text']) + self.assertEqual(table_structure.python_column_types,[str,float,str]) + self.assertEqual(table_structure.qtable_name, 'my_data') + self.assertEqual(table_structure.source_type, 'data-stream') + self.assertEqual(table_structure.source, 'my_data_stream_id') + + q.done() + + def test_load_data_from_string_with_previous_data_load(self): + input_str = six.u('column1,column2,column3\n') + six.u('\n').join([six.u('value1,2.5,value3')] * 1000) + + data_streams_dict = { + 'my_data': DataStream('a','my_data',six.StringIO(input_str)) + } + q = QTextAsData(default_input_params=QInputParams(skip_header=True,delimiter=','),data_streams_dict=data_streams_dict) + + dl = q.load_data('my_data',QInputParams(skip_header=True,delimiter=',')) + + q_output = q.execute('select column2,column3 from my_data') + + self.assertTrue(q_output.status == 'ok') + self.assertTrue(q_output.error is None) + self.assertEqual(len(q_output.warnings),0) + self.assertTrue(len(q_output.data),1000) + self.assertEqual(len(set(q_output.data)),1) + self.assertEqual(list(set(q_output.data))[0],(2.5,'value3')) + + metadata = q_output.metadata + + self.assertTrue(metadata.output_column_name_list,['column2','column3']) + self.assertTrue('my_data' not in metadata.new_table_structures) + self.assertEqual(len(metadata.table_structures),1) + + table_structure = metadata.table_structures['my_data'] + + self.assertEqual(table_structure.column_names,['column1','column2','column3']) + self.assertEqual(table_structure.sqlite_column_types,['text','float','text']) + self.assertEqual(table_structure.python_column_types,[str,float,str]) + self.assertEqual(table_structure.qtable_name, 'my_data') + + q.done() + + + +class BenchmarkAttemptResults(object): + def __init__(self, attempt, lines, columns, duration,return_code): + self.attempt = attempt + self.lines = lines + self.columns = columns + self.duration = duration + self.return_code = return_code + + def __str__(self): + return "{}".format(self.__dict__) + __repr__ = __str__ + +class BenchmarkResults(object): + def __init__(self, lines, columns, attempt_results, mean, stddev): + self.lines = lines + self.columns = columns + self.attempt_results = attempt_results + self.mean = mean + self.stddev = stddev + + def __str__(self): + return "{}".format(self.__dict__) + __repr__ = __str__ + +@pytest.mark.benchmark +class BenchmarkTests(AbstractQTestCase): + + BENCHMARK_DIR = os.environ.get('Q_BENCHMARK_DATA_DIR') + + def _ensure_benchmark_data_dir_exists(self): + try: + os.mkdir(BenchmarkTests.BENCHMARK_DIR) + except Exception as e: + pass + + def _create_benchmark_file_if_needed(self): + self._ensure_benchmark_data_dir_exists() + + if os.path.exists('{}/'.format(BenchmarkTests.BENCHMARK_DIR)): + return + + g = GzipFile('unit-file.csv.gz') + d = g.read().decode('utf-8') + f = open('{}/benchmark-file.csv'.format(BenchmarkTests.BENCHMARK_DIR), 'w') + for i in range(100): + f.write(d) + f.close() + + def _prepare_test_file(self, lines, columns): + + filename = '{}/_benchmark_data__lines_{}_columns_{}.csv'.format(BenchmarkTests.BENCHMARK_DIR,lines, columns) + + if os.path.exists(filename): + return filename + + c = ['c{}'.format(x + 1) for x in range(columns)] + + # write a header line + ff = open(filename,'w') + ff.write(",".join(c)) + ff.write('\n') + ff.close() + + r, o, e = run_command('head -{} {}/benchmark-file.csv | ' + Q_EXECUTABLE + ' -d , "select {} from -" >> {}'.format(lines, BenchmarkTests.BENCHMARK_DIR, ','.join(c), filename)) + self.assertEqual(r, 0) + # Create file cache as part of preparation + r, o, e = run_command(Q_EXECUTABLE + ' -C readwrite -d , "select count(*) from %s"' % filename) + self.asserEqual(r, 0) + return filename + + def _decide_result(self,attempt_results): + + failed = list(filter(lambda a: a.return_code != 0,attempt_results)) + + if len(failed) == 0: + mean = sum([x.duration for x in attempt_results]) / len(attempt_results) + sum_squared = sum([(x.duration - mean)**2 for x in attempt_results]) + ddof = 0 + pvar = sum_squared / (len(attempt_results) - ddof) + stddev = pvar ** 0.5 + else: + mean = None + stddev = None + + return BenchmarkResults( + attempt_results[0].lines, + attempt_results[0].columns, + attempt_results, + mean, + stddev + ) + + def _perform_test_performance_matrix(self,name,generate_cmd_function): + results = [] + + benchmark_results_folder = os.environ.get("Q_BENCHMARK_RESULTS_FOLDER",'') + if benchmark_results_folder == "": + raise Exception("Q_BENCHMARK_RESULTS_FOLDER must be provided as an environment variable") + + self._create_benchmark_file_if_needed() + for columns in [1, 5, 10, 20, 50, 100]: + for lines in [1, 10, 100, 1000, 10000, 100000, 1000000]: + attempt_results = [] + for attempt in range(10): + filename = self._prepare_test_file(lines, columns) + if DEBUG: + print("Testing {}".format(filename)) + t0 = time.time() + r, o, e = run_command(generate_cmd_function(filename,lines,columns)) + duration = time.time() - t0 + attempt_result = BenchmarkAttemptResults(attempt, lines, columns, duration, r) + attempt_results += [attempt_result] + if DEBUG: + print("Results: {}".format(attempt_result.__dict__)) + final_result = self._decide_result(attempt_results) + results += [final_result] + + series_fields = [six.u('lines'),six.u('columns')] + value_fields = [six.u('mean'),six.u('stddev')] + + all_fields = series_fields + value_fields + + output_filename = '{}/{}.benchmark-results'.format(benchmark_results_folder,name) + output_file = open(output_filename,'w') + for columns,g in itertools.groupby(sorted(results,key=lambda x:x.columns),key=lambda x:x.columns): + x = six.u("\t").join(series_fields + [six.u('{}_{}').format(name, f) for f in value_fields]) + print(x,file = output_file) + for result in g: + print(six.u("\t").join(map(str,[getattr(result,f) for f in all_fields])),file=output_file) + output_file.close() + + print("results have been written to : {}".format(output_filename)) + if DEBUG: + print("RESULTS FOR {}".format(name)) + print(open(output_filename,'r').read()) + + def test_q_matrix(self): + Q_BENCHMARK_NAME = os.environ.get('Q_BENCHMARK_NAME') + if Q_BENCHMARK_NAME is None: + raise Exception('Q_BENCHMARK_NAME must be provided as an env var') + + def generate_q_cmd(data_filename, line_count, column_count): + Q_BENCHMARK_ADDITIONAL_PARAMS = os.environ.get('Q_BENCHMARK_ADDITIONAL_PARAMS') or '' + additional_params = '' + additional_params = additional_params + ' ' + Q_BENCHMARK_ADDITIONAL_PARAMS + return '{} -d , {} "select count(*) from {}"'.format(Q_EXECUTABLE,additional_params, data_filename) + self._perform_test_performance_matrix(Q_BENCHMARK_NAME,generate_q_cmd) + + def _get_textql_version(self): + r,o,e = run_command("textql --version") + if r != 0: + raise Exception("Could not find textql") + if len(e) != 0: + raise Exception("Errors while getting textql version") + return o[0] + + def _get_octosql_version(self): + r,o,e = run_command("octosql --version") + if r != 0: + raise Exception("Could not find octosql") + if len(e) != 0: + raise Exception("Errors while getting octosql version") + version = re.findall('v[0-9]+\\.[0-9]+\\.[0-9]+',str(o[0],encoding='utf-8'))[0] + return version + + def test_textql_matrix(self): + def generate_textql_cmd(data_filename,line_count,column_count): + return 'textql -dlm , -sql "select count(*)" {}'.format(data_filename) + + name = 'textql_%s' % self._get_textql_version() + self._perform_test_performance_matrix(name,generate_textql_cmd) + + def test_octosql_matrix(self): + config_fn = self.random_tmp_filename('octosql', 'config') + def generate_octosql_cmd(data_filename,line_count,column_count): + j = """ +dataSources: + - name: bmdata + type: csv + config: + path: "{}" + headerRow: false + batchSize: 10000 +""".format(data_filename)[1:] + f = open(config_fn,'w') + f.write(j) + f.close() + return 'octosql -c {} -o batch-csv "select count(*) from bmdata a"'.format(config_fn) + + name = 'octosql_%s' % self._get_octosql_version() + self._perform_test_performance_matrix(name,generate_octosql_cmd) + +def suite(): + tl = unittest.TestLoader() + basic_stuff = tl.loadTestsFromTestCase(BasicTests) + parsing_mode = tl.loadTestsFromTestCase(ParsingModeTests) + sql = tl.loadTestsFromTestCase(SqlTests) + formatting = tl.loadTestsFromTestCase(FormattingTests) + basic_module_stuff = tl.loadTestsFromTestCase(BasicModuleTests) + save_db_to_disk_tests = tl.loadTestsFromTestCase(SaveDbToDiskTests) + user_functions_tests = tl.loadTestsFromTestCase(UserFunctionTests) + multi_header_tests = tl.loadTestsFromTestCase(MultiHeaderTests) + return unittest.TestSuite([basic_module_stuff, basic_stuff, parsing_mode, sql, formatting,save_db_to_disk_tests,multi_header_tests,user_functions_tests]) + +if __name__ == '__main__': + if len(sys.argv) > 1: + suite = unittest.TestSuite() + if '.' in sys.argv[1]: + c,m = sys.argv[1].split(".") + suite.addTest(globals()[c](m)) + else: + tl = unittest.TestLoader() + tc = tl.loadTestsFromTestCase(globals()[sys.argv[1]]) + suite = unittest.TestSuite([tc]) + else: + suite = suite() + + test_runner = unittest.TextTestRunner(verbosity=2) + result = test_runner.run(suite) + sys.exit(not result.wasSuccessful()) diff --git a/upload-release b/upload-release deleted file mode 100755 index bfd19018..00000000 --- a/upload-release +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -set -e - -base_folder=$(dirname $0) -pushd ${base_folder} >/dev/null - -if [ $# -ne 2 ]; -then - echo "Usage: $(dirname $0) " - echo - echo "Note that the git tag must be pushed to github before doing this." - exit 1 -fi -VERSION="$1" -BASED_ON_TAG="$2" - -echo uploading -gh release upload $VERSION ./packages/*.{rpm,deb} - -echo done - - - From c052fceaca83be7c09385e39febff4486edccf2d Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 22 Oct 2021 03:18:01 +0300 Subject: [PATCH 069/111] docs --- README.markdown | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.markdown b/README.markdown index c5cd183e..d08605a0 100644 --- a/README.markdown +++ b/README.markdown @@ -15,8 +15,9 @@ This is the list of new/changed functionality in this version: * **Direct querying of standard sqlite databases** - Just use it as a table name in the query. Format is `select ... from :::`, or just `` if the database contains only one table. Multiple separate sqlite databases are fully supported in the same query. * **Direct querying of the `qsql` cache files** - The user can query directly from the `qsql` files, removing the need for the original files. Just use `select ... from .qsql`. Please wait until the non-beta version is out before thinking about deleting any of your original files... * **Revamped `.qrc` mechanism** - allows opting-in to caching without specifying it in every query. By default, caching is **disabled**, for backward compatibility and for finding usability issues. -* **Only python3 is supported from now on** - Shouldn't be an issue, since q is a self-contained binary executable which has its own python embedded in it * **Save-to-db is now reusable for queries** - `--save-db-to-disk` option (`-S`) has been enhanced to match the new capabilities. You can query the resulting file directly through q, using the method mentioned above (it's just a standard sqlite database). +* **Only python3 is supported from now on** - Shouldn't be an issue, since q is a self-contained binary executable which has its own python embedded in it. Internally, q is now packaged with Python 3.8. After everything cools down, I'll probably bump this to 3.9/3.10. +* **Minimal Linux Version Bumped** - Works with CentOS 8, Ubuntu 18.04+, Debian 10+. Currently only for x86_64. Depends on glibc version 2.25+. Haven't tested it on other architectures. Issuing other architectures will be possible later on For details on the changes and the new usage, see [here](QSQL-NOTES.md) @@ -39,9 +40,11 @@ Instructions for all OSs are [here](http://harelba.github.io/q/#installation). ### Installation of the new beta release For now, only Linux RPM, DEB and Mac OSX are supported. Almost made the Windows version work, but there's some issue there, and the windows executable requires some external dependencies which I'm trying to eliminate. -The beta OSX version is not in `brew` yet, you'll need to take the `macos-q` executable and put it in your filesystem. DEB/RPM are working well, although for some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. +The beta OSX version is not in `brew` yet, you'll need to take the `macos-q` executable, put it in your filesystem and `chmod +x` it. -Download the relevant files directly from [Links Coming Soon](TBD). +DEB/RPM are working well, although for some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. + +Download the relevant files directly from [The Beta Release Assets](https://github.com/harelba/q/releases/tag/v3.1.0-beta). ## Examples From a853071ec435bcc0702ce0b6d2732458ba890dce Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 22 Oct 2021 03:44:39 +0300 Subject: [PATCH 070/111] docs --- QSQL-NOTES.md | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/QSQL-NOTES.md b/QSQL-NOTES.md index 1bb1e430..7a58a95e 100644 --- a/QSQL-NOTES.md +++ b/QSQL-NOTES.md @@ -1,5 +1,44 @@ -# New beta version 3.1.0-beta is available, which contains the following major changes/additions: +# New beta version 3.1.0-beta is available + +Contains a lot of major changes, see sections below for details. + +## Basic Example of using the caching +``` +# Prepare some data +$ seq 1 1000000 > myfile.csv + +# read from the resulting file (-c 1 just prevents the warning of having one column only) +$ time q -c 1 "select sum(c1),count(*) from myfile.csv" +500000500000 1000000 +q -c 1 "select sum(c1),count(*) from myfile.csv" 4.02s user 0.06s system 99% cpu 4.108 total + +# Running with `-C readwrite` auto-creates a cache file if there is none. The cache filename would be myfile.csv.qsql. The query runs as usual +$ time q -c 1 "select sum(c1),count(*) from myfile.csv" -C readwrite +time q -c 1 "select sum(c1),count(*) from myfile.csv" -C readwrite +500000500000 1000000 +q -c 1 "select sum(c1),count(*) from myfile.csv" -C readwrite 3.96s user 0.08s system 99% cpu 4.057 total + +# Now run with `-C read`. The query will run from the cache file and not the original. Change the query and run it several times, to notice the difference in speed. +$ time q -c 1 "select sum(c1),count(*) from myfile.csv" -C read +500000500000 1000000 +q -c 1 "select sum(c1),count(*) from myfile.csv" -C read 0.17s user 0.05s system 94% cpu 0.229 total + +# You can query the qsql file directly +$ time q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" +500000500000 1000000 +q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" 0.17s user 0.05s system 95% cpu 0.226 total + +# Now let's delete the original csv file +$ rm -vf myfile.csv + +# Running another query on the qsql file just works +$ q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" +500000500000 1000000 +q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" 0.17s user 0.04s system 94% cpu 0.226 total + +# See the `.qrc` section below if you want to set the default `-C` (`--caching-mode`) to something other than `none` (the default) +``` The following sections provide the details of each of the new functionality in this major version. @@ -32,7 +71,7 @@ caching_mode=read All other flags and parameters to q can be controlled by the `.qrc` file. To see the proper names for each parameter, run `q --dump-defaults` and it will dump a default `.qrc` file that contains all parameters to `stdout`. -### Direct querying of standard sqlite databases +## Direct querying of standard sqlite databases q now supports direct querying of standard sqlite databases. The syntax for accessing a table inside an sqlite database is `:::`. A query can contain any mix of sqlite files, qsql files or regular delimited files. For example, this command joins two tables from two separate sqlite databases: @@ -69,7 +108,7 @@ This database can be accessed directly by q later on, by providing ` Date: Fri, 22 Oct 2021 03:47:15 +0300 Subject: [PATCH 071/111] docs --- README.markdown | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.markdown b/README.markdown index d08605a0..1453554c 100644 --- a/README.markdown +++ b/README.markdown @@ -7,9 +7,10 @@ q treats ordinary files as database tables, and supports all SQL constructs, suc q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/) or [https://q.textasdata.wiki](https://q.textasdata.wiki) It contains everything you need to download and use q immediately. -## New beta version `3.1.0-beta` is available, which contains the following major changes/additions: +## New beta version `3.1.0-beta` is available +(Full Details [here](QSQL-NOTES.md)) -This is the list of new/changed functionality in this version: +This is the list of new/changed functionality in this version, major stuff, please make sure to read it and the details link as well. * **Automatic Immutable Caching** - Automatic caching of data files (into `.qsql` files), with huge speedups for medium/large files. Enabled through `-C readwrite` or `-C read` * **Direct querying of standard sqlite databases** - Just use it as a table name in the query. Format is `select ... from :::`, or just `` if the database contains only one table. Multiple separate sqlite databases are fully supported in the same query. @@ -19,7 +20,7 @@ This is the list of new/changed functionality in this version: * **Only python3 is supported from now on** - Shouldn't be an issue, since q is a self-contained binary executable which has its own python embedded in it. Internally, q is now packaged with Python 3.8. After everything cools down, I'll probably bump this to 3.9/3.10. * **Minimal Linux Version Bumped** - Works with CentOS 8, Ubuntu 18.04+, Debian 10+. Currently only for x86_64. Depends on glibc version 2.25+. Haven't tested it on other architectures. Issuing other architectures will be possible later on -For details on the changes and the new usage, see [here](QSQL-NOTES.md) +Full details on the changes and the new usage is in [here](QSQL-NOTES.md) The version is still in early testing, for two reasons: From d30a2cfc0836478f5f06213521f11b6ec25d779b Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 22 Oct 2021 03:49:09 +0300 Subject: [PATCH 072/111] docs --- README.markdown | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.markdown b/README.markdown index 1453554c..40f3b671 100644 --- a/README.markdown +++ b/README.markdown @@ -8,9 +8,9 @@ q treats ordinary files as database tables, and supports all SQL constructs, suc q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/) or [https://q.textasdata.wiki](https://q.textasdata.wiki) It contains everything you need to download and use q immediately. ## New beta version `3.1.0-beta` is available -(Full Details [here](QSQL-NOTES.md)) +Full Details [here](QSQL-NOTES.md), and an example of the caching is in [here](QSQL-NOTES.md#basic-example-of-using-the-caching). -This is the list of new/changed functionality in this version, major stuff, please make sure to read it and the details link as well. +This is the list of new/changed functionality in this version, large changes, please make sure to read it and the details link as well. * **Automatic Immutable Caching** - Automatic caching of data files (into `.qsql` files), with huge speedups for medium/large files. Enabled through `-C readwrite` or `-C read` * **Direct querying of standard sqlite databases** - Just use it as a table name in the query. Format is `select ... from :::`, or just `` if the database contains only one table. Multiple separate sqlite databases are fully supported in the same query. From a1a3d6941657dc5ff75d5c557144d06098bd2a5c Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 22 Oct 2021 13:42:43 +0300 Subject: [PATCH 073/111] docs --- QSQL-NOTES.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/QSQL-NOTES.md b/QSQL-NOTES.md index 7a58a95e..2d9dd35a 100644 --- a/QSQL-NOTES.md +++ b/QSQL-NOTES.md @@ -1,5 +1,6 @@ # New beta version 3.1.0-beta is available +Installation instructions [at the end of this document](TBD) Contains a lot of major changes, see sections below for details. @@ -135,3 +136,12 @@ Users which for some reason still use q's main source code file directly and use If you are such a user, and this decision hurts you considerably, please ping me. +# Installation of the new beta release +For now, only Linux RPM, DEB and Mac OSX are supported. Almost made the Windows version work, but there's some issue there, and the windows executable requires some external dependencies which I'm trying to eliminate. + +The beta OSX version is not in `brew` yet, you'll need to take the `macos-q` executable, put it in your filesystem and `chmod +x` it. + +DEB/RPM are working well, although for some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. + +Download the relevant files directly from [The Beta Release Assets](https://github.com/harelba/q/releases/tag/v3.1.0-beta). + From 7c0f84dbe2e5cd92760ae3bb01b7be14aad95f63 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 22 Oct 2021 13:43:25 +0300 Subject: [PATCH 074/111] docs --- QSQL-NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/QSQL-NOTES.md b/QSQL-NOTES.md index 2d9dd35a..48fa3285 100644 --- a/QSQL-NOTES.md +++ b/QSQL-NOTES.md @@ -1,6 +1,6 @@ # New beta version 3.1.0-beta is available -Installation instructions [at the end of this document](TBD) +Installation instructions [at the end of this document](QSQL-NOTES.md#installation-of-the-new-beta-release) Contains a lot of major changes, see sections below for details. From 5be7815463740aba17e0af4fd80bfb312d924d39 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 22 Oct 2021 16:42:36 +0300 Subject: [PATCH 075/111] man page and usage changes --- bin/q.py | 35 +++++++++++--------- doc/USAGE.markdown | 82 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/bin/q.py b/bin/q.py index 1ea957d4..a71fec45 100755 --- a/bin/q.py +++ b/bin/q.py @@ -3159,32 +3159,37 @@ def dump_default_values_as_qrc(parser,exclusions): print("%s=%s" % (k,m[k]),file=sys.stdout) USAGE_TEXT = """ - q allows performing SQL-like statements on tabular text data. + q - Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line. + Example Execution for a delimited file: - Basic usage is q "" where table names are just regular file names (Use - to read from standard input) - When the input contains a header row, use -H, and column names will be set according to the header row content. If there isn't a header row, then columns will automatically be named c1..cN. + q "select * from myfile.csv" - Column types are detected automatically. Use -A in order to see the column name/type analysis. + Example Execution for an sqlite3 database: - Delimiter can be set using the -d (or -t) option. Output delimiter can be set using -D + q "select * from mydatabase.sqlite:::my_table_name" - All sqlite3 SQL constructs are supported. + or - Examples: + q "select * from mydatabase.sqlite" - Example 1: ls -ltrd * | q "select c1,count(1) from - group by c1" - This example would print a count of each unique permission string in the current folder. + if the database file contains only one table - Example 2: seq 1 1000 | q "select avg(c1),sum(c1) from -" - This example would provide the average and the sum of the numbers in the range 1 to 1000 + Auto-caching of delimited files can be activated through `-C readwrite` (writes new caches if needed) or `-C read` (only reads existing cache files) - Example 3: sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc" - This example will output the total size in MB per user+group in the /tmp subtree + Setting the default caching mode (`-C`) can be done by writing a `~/.qrc` file. See docs for more info. + +q's purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data. +q allows the following: + +* Performing SQL-like statements directly on tabular text data, auto-caching the data in order to accelerate additional querying on the same file +* Performing SQL statements directly on multi-file sqlite3 databases, without having to merge them or load them into memory + +Changing the default values for parameters can be done by creating a `~/.qrc` file. Run q with `--dump-defaults` in order to dump a default `.qrc` file into stdout. + +See https://github.com/harelba/q for more details. - See the help or http://harelba.github.io/q/ for more details. """ def run_standalone(): diff --git a/doc/USAGE.markdown b/doc/USAGE.markdown index bc43021a..a1a51d4d 100644 --- a/doc/USAGE.markdown +++ b/doc/USAGE.markdown @@ -3,13 +3,39 @@ ## SYNOPSIS `q ` - Simplest execution is `q "SELECT * FROM myfile"` which prints the entire file. + Example Execution for a delimited file: + q "select * from myfile.csv" + + Example Execution for an sqlite3 database: + + q "select * from mydatabase.sqlite:::my_table_name" + + or + + q "select * from mydatabase.sqlite" + + if the database file contains only one table + + Auto-caching of delimited files can be activated through `-C readwrite` (writes new caches if needed) or `-C read` (only reads existing cache files) + + Setting the default caching mode (`-C`) can be done by writing a `~/.qrc` file. See docs for more info. + ## DESCRIPTION -q allows performing SQL-like statements on tabular text data. Its purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data. +q's purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data. + +q allows the following: + +* Performing SQL-like statements directly on tabular text data, auto-caching the data in order to accelerate additional querying on the same file +* Performing SQL statements directly on multi-file sqlite3 databases, without having to merge them or load them into memory Query should be an SQL-like query which contains filenames instead of table names (or - for stdin). The query itself should be provided as one parameter to the tool (i.e. enclosed in quotes). +The following filename types are supported: + +* Delimited-file filenames, including relative/absolute paths +* sqlite3 database filenames, with an additional `:::` for accessing a specific table. If a database contains only one table, then denoting the table name is not needed. Examples: `mydatabase.sqlite3:::users_table` or `my_single_table_database.sqlite`. + Use `-H` to signify that the input contains a header line. Column names will be detected automatically in that case, and can be used in the query. If this option is not provided, columns will be named cX, starting with 1 (e.g. q "SELECT c3,c8 from ..."). Use `-d` to specify the input delimiter. @@ -20,7 +46,7 @@ Please note that column names that include spaces need to be used in the query w Query/Input/Output encodings are fully supported (and q tries to provide out-of-the-box usability in that area). Please use `-e`,`-E` and `-Q` to control encoding if needed. -All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table). +All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table), with the exception of CTE (for now). See https://github.com/harelba/q for more details. @@ -44,6 +70,27 @@ q can also get some runtime flags. The following parameters can be used, all opt Options: -h, --help show this help message and exit -v, --version Print version + -V, --verbose Print debug info in case of problems + -S SAVE_DB_TO_DISK_FILENAME, --save-db-to-disk=SAVE_DB_TO_DISK_FILENAME + Save database to an sqlite database file + -C CACHING_MODE, --caching-mode=CACHING_MODE + Choose the autocaching mode (none/read/readwrite). + Autocaches files to disk db so further queries will be + faster. Caching is done to a side-file with the same + name of the table, but with an added extension .qsql + --dump-defaults Dump all default values for parameters and exit. Can + be used in order to make sure .qrc file content is + being read properly. + --max-attached-sqlite-databases=MAX_ATTACHED_SQLITE_DATABASES + Set the maximum number of concurrently-attached sqlite + dbs. This is a compile time definition of sqlite. q's + performance will slow down once this limit is reached + for a query, since it will perform table copies in + order to avoid that limit. + --overwrite-qsql=OVERWRITE_QSQL + When used, qsql files (both caches and store-to-db) + will be overwritten if they already exist. Use with + care. Input Data Options: -H, --skip-header Skip header row. This has been changed from earlier @@ -52,6 +99,8 @@ Options: -d DELIMITER, --delimiter=DELIMITER Field delimiter. If none specified, then space is used as the delimiter. + -p, --pipe-delimited + Same as -d '|'. Added for convenience and readability -t, --tab-delimited Same as -d . Just a shorthand for handling standard tab delimited file You can use $'\t' if you @@ -87,16 +136,26 @@ Options: escaping the double quote character. By default, you can use \" inside double quoted fields to escape double quotes. Mainly for backward compatibility. + --as-text Don't detect column types - All columns will be + treated as text columns -w INPUT_QUOTING_MODE, --input-quoting-mode=INPUT_QUOTING_MODE Input quoting mode. Possible values are all, minimal and none. Note the slightly misleading parameter name, and see the matching -W parameter for output quoting. + -M MAX_COLUMN_LENGTH_LIMIT, --max-column-length-limit=MAX_COLUMN_LENGTH_LIMIT + Sets the maximum column length. + -U, --with-universal-newlines + Expect universal newlines in the data. Limitation: -U + works only with regular files for now, stdin or .gz + files are not supported yet. Output Options: -D OUTPUT_DELIMITER, --output-delimiter=OUTPUT_DELIMITER Field delimiter for output. If none specified, then the -d delimiter is used if present, or space if no delimiter is specified + -P, --pipe-delimited-output + Same as -D '|'. Added for convenience and readability. -T, --tab-delimited-output Same as -D . Just a shorthand for outputting tab delimited output. You can use -D $'\t' if you want. @@ -121,6 +180,8 @@ Options: nonnumeric and none. Note the slightly misleading parameter name, and see the matching -w parameter for input quoting. + -L, --list-user-functions + List all user functions Query Related Options: -q QUERY_FILENAME, --query-filename=QUERY_FILENAME @@ -130,31 +191,26 @@ Options: -Q QUERY_ENCODING, --query-encoding=QUERY_ENCODING query text encoding. Experimental. Please send your feedback on this -```` +``` ### Table names The table names are the actual file names that you want to read from. Path names are allowed. Use "-" if you want to read from stdin (e.g. `q "SELECT * FROM -"`) -Multiple files can be concatenated by using one of both of the following ways: - -* Separating the filenames with a + sign: `SELECT * FROM datafile1+datafile2+datefile3`. -* Using glob matching: `SELECT * FROM mydata*.dat` +Wildcard matches are supported - For example: `SELECT ... FROM ... mydata*.dat` Files with .gz extension are considered to be gzipped and decompressed on the fly. ### Parsing Modes -q supports multiple parsing modes: +q supports two parsing modes: * `relaxed` - This is the default mode. It tries to lean towards simplicity of use. When a row doesn't contains enough columns, they'll be filled with nulls, and when there are too many, the extra values will be merged to the last column. Defining the number of expected columns in this mode is done using the `-c` parameter. If it is not provided, then the number of columns is detected automatically (In most use cases, there is no need to specify `-c`) * `strict` - Strict mode is for hardcore csv/tsv parsing. Whenever a row doesn't contain the proper number of columns, processing will stop. `-c` must be provided when using this mode -* `fluffy` - This mode should not be used, and is just some kind of "backward compatible" parsing mode which was used by q previously. It's left as a separate parsing mode on purpose, in order to accommodate existing users. If you are such a user, please open a bug for your use case, and I'll see how I can incorporate it into the other modes. It is reasonable to say that this mode will be removed in the future. ### Output formatting option The format of F is as a list of X=f separated by commas, where X is a column number and f is a python format: * X - column number - This is the SELECTed column (or expression) number, not the one from the original table. E.g, 1 is the first SELECTed column, 3 is the third SELECTed column. -* f - A python formatting string - See http://docs.python.org/release/2.4.4/lib/typesseq-strings.html for details if needed. -** Example: `-f 3=%-10s,5=%4.3f,1=%x` +* f - A python formatting string such as {} - See https://www.w3schools.com/python/ref_string_format.asp for details if needed. ## EXAMPLES Example 1: `ls -ltrd * | q "select c1,count(1) from - group by c1"` @@ -181,7 +237,7 @@ Harel Ben-Attia (harelba@gmail.com) Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course. ## COPYRIGHT -Copyright (C) 2012--2014 Harel Ben Attia +Copyright (C) 2012--2021 Harel Ben Attia This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. From 2c5c6e5034086acb87865f46a4dbfeaf3f1aee86 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 23 Oct 2021 22:43:29 +0300 Subject: [PATCH 076/111] Remove sqlitebck dependency + windows is working, but not in runner + other fixes (#284) for some reason q.exe works well after an MSI installation on Windows, but not working well on the runner doing a sanity, in terms of dependencies on sqlite3 extension. --- .github/workflows/build-and-package.yaml | 71 ++++++++++---------- .github/workflows/q.rb.brew-formula-template | 4 +- QSQL-NOTES.md | 7 +- README.markdown | 4 +- bin/q.py | 22 ++++-- pyoxidizer.bzl | 4 +- requirements-win-x86_64.txt | 4 -- requirements.txt | 1 - setup.py | 3 +- test-requirements.txt | 1 - test/test_suite.py | 5 +- 11 files changed, 65 insertions(+), 61 deletions(-) delete mode 100644 requirements-win-x86_64.txt diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index f7237862..2c550512 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -304,8 +304,13 @@ jobs: mkdir brew + export BRANCH_NAME=${{ github.event.pull_request.head.ref }} + # TODO temp, since template rendering action doesn't work in mac - cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.0-beta/g' > ./brew/q.rb + cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.0-beta/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb + + echo "Resulting formula:" + cat ./brew/q.rb brew install --display-times --formula --build-bottle --verbose ./brew/q.rb brew test ./brew/q.rb @@ -352,10 +357,8 @@ jobs: echo "select sum(c1),count(*) from data_stream_stdin" | sqlite3 test.sqlite - # TODO Windows build/test/package flow is running, but q executable is still not running well, due to pyox+sqlite3 issue build-windows: runs-on: windows-latest - if: ${{ false }} steps: - name: Checkout uses: actions/checkout@v2 @@ -380,9 +383,6 @@ jobs: run: | set -e -x - # Hack to overcome the fact that apsw doesn't have a registered wheel for Windows - cp requirements-win-x86_64.txt requirements.txt - pyoxidizer build --release export Q_EXECUTABLE=./build/x86_64-pc-windows-msvc/release/install/q @@ -397,23 +397,36 @@ jobs: - name: Upload Linux Executable uses: actions/upload-artifact@v1.0.0 with: - name: win-q + name: win-q.exe path: packages/windows/win-q.exe - test-windows: + not-really-test-windows: needs: build-windows runs-on: windows-latest steps: - name: Checkout uses: actions/checkout@v2 - - name: Fail deliberately - No tests on Windows + - name: Install Python for Testing + uses: actions/setup-python@v2 + with: + python-version: '3.8' + architecture: 'x64' + - name: Download Windows Executable + uses: actions/download-artifact@v2 + with: + name: win-q.exe + - name: Not-Really-Test Windows + shell: bash continue-on-error: true run: | - echo "Tests are not actually running on the Windows Executable itself. Only the packaging is later tested" - exit 1 + echo "Tests are not compatible with Windows (path separators, tmp folder names etc.). Only a sanity wil be tested" + + chmod +x ./win-q.exe + + seq 1 10000 | ./win-q.exe -c 1 "select sum(c1),count(*) from -" -S some-db.sqlite package-windows: - needs: [create-man, test-windows] + needs: [create-man, not-really-test-windows] runs-on: windows-latest steps: - name: Checkout @@ -439,24 +452,23 @@ jobs: run: | set -e -x - # Hack to overcome the fact that apsw doesn't have a registered wheel for Windows - cp requirements-win-x86_64.txt requirements.txt - pyoxidizer build --release msi_installer find ./ -ls - export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.0-beta.msi + # TODO Windows versions do not support the -beta postfix + + export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.0.msi chmod 755 $Q_MSI mkdir -p packages/windows/ - cp $Q_MSI packages/windows/q-text-as-data-3.1.0-beta.msi + cp $Q_MSI packages/windows/q-text-as-data-3.1.0.msi - name: Upload Windows MSI uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.0-beta.msi - path: packages/windows/q-text-as-data-3.1.0-beta.msi + name: q-text-as-data-3.1.0.msi + path: packages/windows/q-text-as-data-3.1.0.msi test-windows-packaging: needs: package-windows @@ -467,12 +479,12 @@ jobs: - name: Download Windows Package uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.0-beta.msi + name: q-text-as-data-3.1.0.msi - name: Test Install of MSI continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.0-beta.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.0.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait $process.ExitCode gc msi-install.log @@ -481,14 +493,14 @@ jobs: continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.0-beta.msi /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.0.msi /norestart /quiet" -PassThru -Wait $process.ExitCode exit $process.ExitCode perform-prerelease: # We'd like artifacts to be uploaded regardless of tests succeeded or not, # this is why the dependency here is not on test-X-packaging jobs - needs: [package-linux-deb, package-linux-rpm, package-mac] + needs: [package-linux-deb, package-linux-rpm, package-mac, package-windows] runs-on: ubuntu-latest # TODO Push to master will now pre-release as well, until things stabilize # if: ${{ github.event_name == 'pull_request' }} @@ -513,8 +525,7 @@ jobs: artifacts/**/* perform-release: - # TODO Windows is not here so users won't be confused by seeing an MSI (it's still not production-grade, you need to have sqlite3 dll in the path) - needs: [test-mac-packaging, test-deb-packaging, test-rpm-packaging] + needs: [test-mac-packaging, test-deb-packaging, test-rpm-packaging, test-windows-packaging] runs-on: ubuntu-latest # Disabled on purpose for now - Changing the beta release to a real one will be done manually until everything stabilizes # and then this will be reinstated @@ -525,16 +536,6 @@ jobs: uses: actions/download-artifact@v2 with: path: artifacts/ - - name: Delete Windows Artifacts so they're not part of the release for now - run: | - set -x -e - - echo "Deleting windows artifacts so they're not part of the release - windows is not fully ready" - - set +e - rm -vf artifacts/*.msi - rm -vf artifacts/win-q.exe - set -e - uses: "marvinpinto/action-automatic-releases@v1.2.1" with: repo_token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/q.rb.brew-formula-template b/.github/workflows/q.rb.brew-formula-template index c60fb430..15b77d0e 100644 --- a/.github/workflows/q.rb.brew-formula-template +++ b/.github/workflows/q.rb.brew-formula-template @@ -4,8 +4,8 @@ class Q < Formula desc "Run SQL directly on CSV or TSV files" homepage "https://harelba.github.io/q/" - # Building directly from master for now, eventually it will be tag-based so the version tag will be downloaded - url "https://github.com/harelba/q/archive/master.tar.gz" + # Using branch name for pre-releases, for tagged releases this would be the version tag, and not "version" part will be needed + url "https://github.com/harelba/q/archive/{{ .Q_BRANCH_NAME }}.tar.gz" version "{{ .Q_VERSION }}" # Removed for now, until everything is finalized diff --git a/QSQL-NOTES.md b/QSQL-NOTES.md index 48fa3285..6ef7e2b7 100644 --- a/QSQL-NOTES.md +++ b/QSQL-NOTES.md @@ -20,7 +20,7 @@ time q -c 1 "select sum(c1),count(*) from myfile.csv" -C readwrite 500000500000 1000000 q -c 1 "select sum(c1),count(*) from myfile.csv" -C readwrite 3.96s user 0.08s system 99% cpu 4.057 total -# Now run with `-C read`. The query will run from the cache file and not the original. Change the query and run it several times, to notice the difference in speed. +# Now run with `-C read`. The query will run from the cache file and not the original. Change the query and run it several times, to notice the difference in speed. As the file gets bigger, the difference will be much more noticable $ time q -c 1 "select sum(c1),count(*) from myfile.csv" -C read 500000500000 1000000 q -c 1 "select sum(c1),count(*) from myfile.csv" -C read 0.17s user 0.05s system 94% cpu 0.229 total @@ -137,11 +137,12 @@ If you are such a user, and this decision hurts you considerably, please ping me # Installation of the new beta release -For now, only Linux RPM, DEB and Mac OSX are supported. Almost made the Windows version work, but there's some issue there, and the windows executable requires some external dependencies which I'm trying to eliminate. +For now, only Linux RPM, DEB, Mac OSX and Windows are supported. Packages for additional Linux Distros will be added later (it should be rather easy now, due to the use of `fpm`). The beta OSX version is not in `brew` yet, you'll need to take the `macos-q` executable, put it in your filesystem and `chmod +x` it. -DEB/RPM are working well, although for some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. +Note: For some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. If you have any thoughts about this, please drop me a line. Download the relevant files directly from [The Beta Release Assets](https://github.com/harelba/q/releases/tag/v3.1.0-beta). + diff --git a/README.markdown b/README.markdown index 40f3b671..e0bc585c 100644 --- a/README.markdown +++ b/README.markdown @@ -39,11 +39,11 @@ The current production version `2.0.19` installation is extremely simple. Instructions for all OSs are [here](http://harelba.github.io/q/#installation). ### Installation of the new beta release -For now, only Linux RPM, DEB and Mac OSX are supported. Almost made the Windows version work, but there's some issue there, and the windows executable requires some external dependencies which I'm trying to eliminate. +For now, only Linux RPM, DEB, Mac OSX and Windows are supported. Packages for additional Linux Distros will be added later (it should be rather easy now, due to the use of `fpm`). The beta OSX version is not in `brew` yet, you'll need to take the `macos-q` executable, put it in your filesystem and `chmod +x` it. -DEB/RPM are working well, although for some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. +Note: For some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. If you have any thoughts about this, please drop me a line. Download the relevant files directly from [The Beta Release Assets](https://github.com/harelba/q/releases/tag/v3.1.0-beta). diff --git a/bin/q.py b/bin/q.py index a71fec45..3d6ece70 100755 --- a/bin/q.py +++ b/bin/q.py @@ -56,7 +56,6 @@ import six import io import json -import sqlitebck import datetime import hashlib @@ -67,6 +66,7 @@ unicode = six.text_type DEBUG = bool(os.environ.get('Q_DEBUG', None)) or '-V' in sys.argv +SQL_DEBUG = False if DEBUG: def xprint(*args,**kwargs): @@ -76,12 +76,17 @@ def iprint(*args,**kwargs): print(datetime.datetime.utcnow().isoformat()," INFO ",*args,file=sys.stderr,**kwargs) def sqlprint(*args,**kwargs): - print(datetime.datetime.utcnow().isoformat(), " SQL ", *args, file=sys.stderr, **kwargs) + pass else: def xprint(*args,**kwargs): pass def iprint(*args,**kwargs): pass def sqlprint(*args,**kwargs): pass +if SQL_DEBUG: + def sqlprint(*args,**kwargs): + print(datetime.datetime.utcnow().isoformat(), " SQL ", *args, file=sys.stderr, **kwargs) + + def get_stdout_encoding(encoding_override=None): if encoding_override is not None and encoding_override != 'none': return encoding_override @@ -1320,7 +1325,7 @@ def normalize_filename_to_table_name(filename): filename = filename[:-7] elif filename.lower().endswith('.sqlite3'): filename = filename[:-8] - return filename.replace("-","_dash_").replace(".","_dot_").replace('?','_qm_').replace("/","_slash_").replace("\\","_backslash_") + return filename.replace("-","_dash_").replace(".","_dot_").replace('?','_qm_').replace("/","_slash_").replace("\\","_backslash_").replace(":","_colon_").replace(" ","_space_").replace("+","_plus_") def validate_content_signature(original_filename, source_signature,other_filename, content_signature,scope=None,dump=False): if dump: @@ -1331,7 +1336,7 @@ def validate_content_signature(original_filename, source_signature,other_filenam scope = [] for k in source_signature: if type(source_signature[k]) == OrderedDict: - return validate_content_signature(original_filename, source_signature[k],other_filename, content_signature[k],scope + [k]) + validate_content_signature(original_filename, source_signature[k],other_filename, content_signature[k],scope + [k]) else: if k not in content_signature: raise ContentSignatureDataDiffersException("%s Content Signatures differ. %s is missing from content signature" % (s,k)) @@ -1374,8 +1379,10 @@ def get_last_modification_time_hash(self): if self.atomic_fns is None or len(self.atomic_fns) == 0: return "data stream-lmt" else: - x = ",".join(map(str,[os.stat(x).st_mtime_ns for x in self.atomic_fns])) - return hashlib.sha1(six.b(x)).hexdigest() + x = ",".join(map(lambda x: ':%s:' % x,[os.stat(x).st_mtime_ns for x in self.atomic_fns])) + res = hashlib.sha1(six.b(x)).hexdigest() + '///' + x + xprint("Hash of last modification time is %s" % res) + return res def open_file(self): if self.external_f: @@ -1699,7 +1706,8 @@ def save_cache_to_disk_if_needed(self, disk_db_filename, table_creator): def _store_qsql(self, source_sqlite_db, disk_db_filename): xprint("Storing data as disk db") disk_db_conn = sqlite3.connect(disk_db_filename) - sqlitebck.copy(source_sqlite_db.conn,disk_db_conn) + with disk_db_conn: + source_sqlite_db.conn.backup(disk_db_conn) xprint("Written db to disk: disk db filename %s" % (disk_db_filename)) disk_db_conn.close() diff --git a/pyoxidizer.bzl b/pyoxidizer.bzl index 88ce10a2..77270336 100644 --- a/pyoxidizer.bzl +++ b/pyoxidizer.bzl @@ -13,7 +13,7 @@ def make_exe(): policy.set_resource_handling_mode("classify") policy.resources_location = "in-memory" policy.resources_location_fallback = "filesystem-relative:Lib" - policy.allow_in_memory_shared_library_loading = True + policy.allow_in_memory_shared_library_loading = False python_config = dist.make_python_interpreter_config() @@ -61,7 +61,7 @@ def make_msi(exe): # The name of your application. "q-text-as-data", # The version of your application. - "2.1.0", + "3.1.0", # The author/manufacturer of your application. "Harel Ben-Attia" ) diff --git a/requirements-win-x86_64.txt b/requirements-win-x86_64.txt deleted file mode 100644 index 69d4961a..00000000 --- a/requirements-win-x86_64.txt +++ /dev/null @@ -1,4 +0,0 @@ -six==1.11.0 -flake8==3.6.0 -setuptools<45.0.0 -# Turns out it would not be possible to use apsw without a major change all around, so we'd need to be able to compile sqlitebck on Windows as well diff --git a/requirements.txt b/requirements.txt index 9661caf6..3ad7d2bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ six==1.11.0 flake8==3.6.0 setuptools<45.0.0 -sqlitebck diff --git a/setup.py b/setup.py index ead9127a..c260e58a 100644 --- a/setup.py +++ b/setup.py @@ -19,8 +19,7 @@ long_description_content_type="text/markdown", author_email='harelba@gmail.com', install_requires=[ - 'six==1.11.0', - 'sqlitebck' + 'six==1.11.0' ], package_dir={"": "bin"}, packages=setuptools.find_packages(where="bin"), diff --git a/test-requirements.txt b/test-requirements.txt index bb72fa51..7e995bae 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,4 +1,3 @@ pytest==6.2.2 flake8==3.6.0 six==1.11.0 -sqlitebck diff --git a/test/test_suite.py b/test/test_suite.py index 4f6aea39..f722e8f9 100755 --- a/test/test_suite.py +++ b/test/test_suite.py @@ -6,9 +6,10 @@ # Prefer end-to-end tests, running the actual q command and testing stdout/stderr, and the return code. # Some utilities are provided for making that easy, see other tests for examples. # -# Don't forget to use the Q_EXECUTABLE instead of hardcoding the q command line. This will be used in the near future -# in order to test the resulting binary executables as well, instead of just executing the q python source code. +# Q_EXECUTABLE env var can be used to inject the path of q. This allows full e2e testing of the resulting executable +# instead of just testing the python code. # +# Tests are compatible with Linux and OSX (path separators, tmp folder, etc.). from __future__ import print_function From 8282679ea436a254bc05068a269d97e3953d2ab8 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 23 Oct 2021 23:07:53 +0300 Subject: [PATCH 077/111] move back mac packaging to work with the master --- .github/workflows/build-and-package.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 2c550512..0d5463c9 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -304,7 +304,8 @@ jobs: mkdir brew - export BRANCH_NAME=${{ github.event.pull_request.head.ref }} + #export BRANCH_NAME=${{ github.event.pull_request.head.ref }} + export BRANCH_NAME=master # TODO temp, since template rendering action doesn't work in mac cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.0-beta/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb From 37c2ed63868ee27740f996317b8e9477dc4158c9 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 24 Oct 2021 09:32:29 +0300 Subject: [PATCH 078/111] Fix bug with output header default value parsing in .qrc + Bump Beta Version (#285) --- .github/workflows/build-and-package.yaml | 44 ++++++++++++------------ bin/q.py | 4 +-- pyoxidizer.bzl | 2 +- setup.py | 2 +- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 0d5463c9..b8cc4b09 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -139,12 +139,12 @@ jobs: gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.0-beta-1.x86_64.deb --version 3.1.0-beta ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.1-beta-1.x86_64.deb --version 3.1.1-beta ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload DEB Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.0-beta-1.x86_64.deb - path: packages/linux/q-text-as-data-3.1.0-beta-1.x86_64.deb + name: q-text-as-data-3.1.1-beta-1.x86_64.deb + path: packages/linux/q-text-as-data-3.1.1-beta-1.x86_64.deb test-deb-packaging: runs-on: ubuntu-18.04 @@ -155,7 +155,7 @@ jobs: - name: Download DEB uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.0-beta-1.x86_64.deb + name: q-text-as-data-3.1.1-beta-1.x86_64.deb - name: Install Python for Testing uses: actions/setup-python@v2 with: @@ -167,7 +167,7 @@ jobs: pip3 install -r test-requirements.txt - name: Test DEB Package Installation - run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.0-beta-1.x86_64.deb + run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.1-beta-1.x86_64.deb package-linux-rpm: needs: [test-linux, create-man] @@ -199,12 +199,12 @@ jobs: gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.0-beta.x86_64.rpm --version 3.1.0-beta ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.1-beta.x86_64.rpm --version 3.1.1-beta ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload RPM Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.0-beta.x86_64.rpm - path: packages/linux/q-text-as-data-3.1.0-beta.x86_64.rpm + name: q-text-as-data-3.1.1-beta.x86_64.rpm + path: packages/linux/q-text-as-data-3.1.1-beta.x86_64.rpm test-rpm-packaging: runs-on: ubuntu-18.04 @@ -215,9 +215,9 @@ jobs: - name: Download RPM uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.0-beta.x86_64.rpm + name: q-text-as-data-3.1.1-beta.x86_64.rpm - name: Retest using RPM - run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.0-beta.x86_64.rpm + run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.1-beta.x86_64.rpm build-mac: runs-on: macos-11 @@ -308,7 +308,7 @@ jobs: export BRANCH_NAME=master # TODO temp, since template rendering action doesn't work in mac - cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.0-beta/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb + cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.1-beta/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb echo "Resulting formula:" cat ./brew/q.rb @@ -322,8 +322,8 @@ jobs: - name: Upload Executable uses: actions/upload-artifact@v1.0.0 with: - name: q--3.1.0-beta_1.big_sur.bottle.tar.gz - path: ./q--3.1.0-beta_1.big_sur.bottle.tar.gz + name: q--3.1.1-beta_1.big_sur.bottle.tar.gz + path: ./q--3.1.1-beta_1.big_sur.bottle.tar.gz # TODO auto-create PR to main homebrew-core # git clone https://github.com/harelba/homebrew-core.git @@ -340,7 +340,7 @@ jobs: - name: Download q bottle uses: actions/download-artifact@v2 with: - name: q--3.1.0-beta_1.big_sur.bottle.tar.gz + name: q--3.1.1-beta_1.big_sur.bottle.tar.gz - name: Test the created bottle run: | set -x -e @@ -349,7 +349,7 @@ jobs: WD=$(pwd) pushd /usr/local/Cellar - tar xvfz ${WD}/q--3.1.0-beta_1.big_sur.bottle.tar.gz + tar xvfz ${WD}/q--3.1.1-beta_1.big_sur.bottle.tar.gz popd brew link q @@ -459,17 +459,17 @@ jobs: # TODO Windows versions do not support the -beta postfix - export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.0.msi + export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.1.msi chmod 755 $Q_MSI mkdir -p packages/windows/ - cp $Q_MSI packages/windows/q-text-as-data-3.1.0.msi + cp $Q_MSI packages/windows/q-text-as-data-3.1.1.msi - name: Upload Windows MSI uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.0.msi - path: packages/windows/q-text-as-data-3.1.0.msi + name: q-text-as-data-3.1.1.msi + path: packages/windows/q-text-as-data-3.1.1.msi test-windows-packaging: needs: package-windows @@ -480,12 +480,12 @@ jobs: - name: Download Windows Package uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.0.msi + name: q-text-as-data-3.1.1.msi - name: Test Install of MSI continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.0.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.1.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait $process.ExitCode gc msi-install.log @@ -494,7 +494,7 @@ jobs: continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.0.msi /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.1.msi /norestart /quiet" -PassThru -Wait $process.ExitCode exit $process.ExitCode diff --git a/bin/q.py b/bin/q.py index 3d6ece70..716e7831 100755 --- a/bin/q.py +++ b/bin/q.py @@ -35,7 +35,7 @@ from sqlite3.dbapi2 import OperationalError from uuid import uuid4 -q_version = '3.1.0-beta' +q_version = '3.1.1-beta' #__all__ = [ 'QTextAsData' ] @@ -3279,7 +3279,7 @@ def initialize_command_line_parser(p, qrc_filename): default_output_delimiter = get_option_with_default(p, 'string', 'output_delimiter', None) default_pipe_delimited_output = get_option_with_default(p, 'boolean', 'pipe_delimited_output', False) default_tab_delimited_output = get_option_with_default(p, 'boolean', 'tab_delimited_output', False) - default_output_header = get_option_with_default(p, 'string', 'output_header', False) + default_output_header = get_option_with_default(p, 'boolean', 'output_header', False) default_beautify = get_option_with_default(p, 'boolean', 'beautify', False) default_formatting = get_option_with_default(p, 'string', 'formatting', None) default_output_encoding = get_option_with_default(p, 'string', 'output_encoding', 'none') diff --git a/pyoxidizer.bzl b/pyoxidizer.bzl index 77270336..6ccf0dbe 100644 --- a/pyoxidizer.bzl +++ b/pyoxidizer.bzl @@ -61,7 +61,7 @@ def make_msi(exe): # The name of your application. "q-text-as-data", # The version of your application. - "3.1.0", + "3.1.1", # The author/manufacturer of your application. "Harel Ben-Attia" ) diff --git a/setup.py b/setup.py index c260e58a..d0c5d483 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup import setuptools -q_version = '3.1.0-beta' +q_version = '3.1.1-beta' with open("README.markdown", "r", encoding="utf-8") as fh: long_description = fh.read() From 43df29247d93a7f8cf6e6e3807c8415b49d83c8a Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 24 Oct 2021 09:53:58 +0300 Subject: [PATCH 079/111] bump beta version after making windows work + some small fixes --- QSQL-NOTES.md | 4 ++-- README.markdown | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/QSQL-NOTES.md b/QSQL-NOTES.md index 6ef7e2b7..33cc9445 100644 --- a/QSQL-NOTES.md +++ b/QSQL-NOTES.md @@ -1,5 +1,5 @@ -# New beta version 3.1.0-beta is available +# New beta version 3.1.1-beta is available Installation instructions [at the end of this document](QSQL-NOTES.md#installation-of-the-new-beta-release) Contains a lot of major changes, see sections below for details. @@ -143,6 +143,6 @@ The beta OSX version is not in `brew` yet, you'll need to take the `macos-q` exe Note: For some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. If you have any thoughts about this, please drop me a line. -Download the relevant files directly from [The Beta Release Assets](https://github.com/harelba/q/releases/tag/v3.1.0-beta). +Download the relevant files directly from [The Beta Release Assets](https://github.com/harelba/q/releases/tag/v3.1.1-beta). diff --git a/README.markdown b/README.markdown index e0bc585c..b0f20d25 100644 --- a/README.markdown +++ b/README.markdown @@ -7,7 +7,7 @@ q treats ordinary files as database tables, and supports all SQL constructs, suc q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/) or [https://q.textasdata.wiki](https://q.textasdata.wiki) It contains everything you need to download and use q immediately. -## New beta version `3.1.0-beta` is available +## New beta version `3.1.1-beta` is available Full Details [here](QSQL-NOTES.md), and an example of the caching is in [here](QSQL-NOTES.md#basic-example-of-using-the-caching). This is the list of new/changed functionality in this version, large changes, please make sure to read it and the details link as well. @@ -32,7 +32,7 @@ The version is still in early testing, for two reasons: If you're testing it out, I'd be more than happy to get any feedback. Please write all your feedback in [this issue](https://github.com/harelba/q/issues/281), instead of opening separate issues. That would really help me with managing this. ## Installation. -**This will currently install the latest standard version `2.0.19`. See below if you want to download the `3.1.0-beta` version** +**This will currently install the latest standard version `2.0.19`. See below if you want to download the `3.1.1-beta` version** The current production version `2.0.19` installation is extremely simple. @@ -45,7 +45,7 @@ The beta OSX version is not in `brew` yet, you'll need to take the `macos-q` exe Note: For some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. If you have any thoughts about this, please drop me a line. -Download the relevant files directly from [The Beta Release Assets](https://github.com/harelba/q/releases/tag/v3.1.0-beta). +Download the relevant files directly from [The Beta Release Assets](https://github.com/harelba/q/releases/tag/v3.1.1-beta). ## Examples From 6da9a88721ea8c541127c1d650c333b671de4393 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 29 Oct 2021 23:08:55 +0300 Subject: [PATCH 080/111] docs --- QSQL-NOTES.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/QSQL-NOTES.md b/QSQL-NOTES.md index 33cc9445..ef6e3e0d 100644 --- a/QSQL-NOTES.md +++ b/QSQL-NOTES.md @@ -20,12 +20,17 @@ time q -c 1 "select sum(c1),count(*) from myfile.csv" -C readwrite 500000500000 1000000 q -c 1 "select sum(c1),count(*) from myfile.csv" -C readwrite 3.96s user 0.08s system 99% cpu 4.057 total -# Now run with `-C read`. The query will run from the cache file and not the original. Change the query and run it several times, to notice the difference in speed. As the file gets bigger, the difference will be much more noticable +# Now run with `-C read`. The query will run from the cache file and not the original. As the file gets bigger, the difference will be much more noticable $ time q -c 1 "select sum(c1),count(*) from myfile.csv" -C read 500000500000 1000000 q -c 1 "select sum(c1),count(*) from myfile.csv" -C read 0.17s user 0.05s system 94% cpu 0.229 total -# You can query the qsql file directly +# Now let's try another query on that file. Notice the short query duration. The cache is being used for any query that uses this file, and queries on multiple files that contain caches will reuse the cache as well. +$ time q -c 1 "select avg(c1) from myfile.csv" -C read +500000.5 +q -c 1 "select avg(c1) from myfile.csv" -C read 0.16s user 0.05s system 99% cpu 0.217 total + +# You can also query the qsql file directly $ time q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" 500000500000 1000000 q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" 0.17s user 0.05s system 95% cpu 0.226 total @@ -33,7 +38,7 @@ q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" 0.17s user 0.05s system 9 # Now let's delete the original csv file $ rm -vf myfile.csv -# Running another query on the qsql file just works +# Running another query directly on the qsql file just works $ q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" 500000500000 1000000 q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" 0.17s user 0.04s system 94% cpu 0.226 total From f6c0299605c1f598c0fc08dcb029fcebd61b7052 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 29 Oct 2021 23:11:05 +0300 Subject: [PATCH 081/111] docs --- QSQL-NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/QSQL-NOTES.md b/QSQL-NOTES.md index ef6e3e0d..dc67d8ab 100644 --- a/QSQL-NOTES.md +++ b/QSQL-NOTES.md @@ -5,7 +5,7 @@ Installation instructions [at the end of this document](QSQL-NOTES.md#installati Contains a lot of major changes, see sections below for details. ## Basic Example of using the caching -``` +```bash # Prepare some data $ seq 1 1000000 > myfile.csv From c14ac80e07cf8c433cf74e026b6ed5e030afb155 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 26 Nov 2021 15:13:09 +0200 Subject: [PATCH 082/111] Release 3.1.3 (#286) Some minor fixes from beta. Also removed the one-column warning. Still with manual version bump - Hopefully all works well in terms of the package versioning. I'll automate the version bumps later on. --- .github/workflows/build-and-package.yaml | 44 +-- .github/workflows/q.rb.brew-formula-template | 3 +- QSQL-NOTES.md | 73 +--- README.markdown | 76 ++-- benchmark-config.sh | 2 +- bin/q.py | 11 +- mkdocs/docs/about.md | 2 +- mkdocs/docs/index.md | 367 +++++++++++++++---- pyoxidizer.bzl | 2 +- setup.py | 2 +- test/test_suite.py | 67 +++- 11 files changed, 424 insertions(+), 225 deletions(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index b8cc4b09..08970eec 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -139,12 +139,12 @@ jobs: gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.1-beta-1.x86_64.deb --version 3.1.1-beta ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.3-1.x86_64.deb --version 3.1.3 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload DEB Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.1-beta-1.x86_64.deb - path: packages/linux/q-text-as-data-3.1.1-beta-1.x86_64.deb + name: q-text-as-data-3.1.3-1.x86_64.deb + path: packages/linux/q-text-as-data-3.1.3-1.x86_64.deb test-deb-packaging: runs-on: ubuntu-18.04 @@ -155,7 +155,7 @@ jobs: - name: Download DEB uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.1-beta-1.x86_64.deb + name: q-text-as-data-3.1.3-1.x86_64.deb - name: Install Python for Testing uses: actions/setup-python@v2 with: @@ -167,7 +167,7 @@ jobs: pip3 install -r test-requirements.txt - name: Test DEB Package Installation - run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.1-beta-1.x86_64.deb + run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.3-1.x86_64.deb package-linux-rpm: needs: [test-linux, create-man] @@ -199,12 +199,12 @@ jobs: gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.1-beta.x86_64.rpm --version 3.1.1-beta ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.3.x86_64.rpm --version 3.1.3 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload RPM Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.1-beta.x86_64.rpm - path: packages/linux/q-text-as-data-3.1.1-beta.x86_64.rpm + name: q-text-as-data-3.1.3.x86_64.rpm + path: packages/linux/q-text-as-data-3.1.3.x86_64.rpm test-rpm-packaging: runs-on: ubuntu-18.04 @@ -215,9 +215,9 @@ jobs: - name: Download RPM uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.1-beta.x86_64.rpm + name: q-text-as-data-3.1.3.x86_64.rpm - name: Retest using RPM - run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.1-beta.x86_64.rpm + run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.3.x86_64.rpm build-mac: runs-on: macos-11 @@ -308,7 +308,7 @@ jobs: export BRANCH_NAME=master # TODO temp, since template rendering action doesn't work in mac - cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.1-beta/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb + cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.3/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb echo "Resulting formula:" cat ./brew/q.rb @@ -322,8 +322,8 @@ jobs: - name: Upload Executable uses: actions/upload-artifact@v1.0.0 with: - name: q--3.1.1-beta_1.big_sur.bottle.tar.gz - path: ./q--3.1.1-beta_1.big_sur.bottle.tar.gz + name: q--3.1.3_1.big_sur.bottle.tar.gz + path: ./q--3.1.3_1.big_sur.bottle.tar.gz # TODO auto-create PR to main homebrew-core # git clone https://github.com/harelba/homebrew-core.git @@ -340,7 +340,7 @@ jobs: - name: Download q bottle uses: actions/download-artifact@v2 with: - name: q--3.1.1-beta_1.big_sur.bottle.tar.gz + name: q--3.1.3_1.big_sur.bottle.tar.gz - name: Test the created bottle run: | set -x -e @@ -349,7 +349,7 @@ jobs: WD=$(pwd) pushd /usr/local/Cellar - tar xvfz ${WD}/q--3.1.1-beta_1.big_sur.bottle.tar.gz + tar xvfz ${WD}/q--3.1.3_1.big_sur.bottle.tar.gz popd brew link q @@ -459,17 +459,17 @@ jobs: # TODO Windows versions do not support the -beta postfix - export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.1.msi + export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.3.msi chmod 755 $Q_MSI mkdir -p packages/windows/ - cp $Q_MSI packages/windows/q-text-as-data-3.1.1.msi + cp $Q_MSI packages/windows/q-text-as-data-3.1.3.msi - name: Upload Windows MSI uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.1.msi - path: packages/windows/q-text-as-data-3.1.1.msi + name: q-text-as-data-3.1.3.msi + path: packages/windows/q-text-as-data-3.1.3.msi test-windows-packaging: needs: package-windows @@ -480,12 +480,12 @@ jobs: - name: Download Windows Package uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.1.msi + name: q-text-as-data-3.1.3.msi - name: Test Install of MSI continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.1.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.3.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait $process.ExitCode gc msi-install.log @@ -494,7 +494,7 @@ jobs: continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.1.msi /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.3.msi /norestart /quiet" -PassThru -Wait $process.ExitCode exit $process.ExitCode diff --git a/.github/workflows/q.rb.brew-formula-template b/.github/workflows/q.rb.brew-formula-template index 15b77d0e..c860a0ce 100644 --- a/.github/workflows/q.rb.brew-formula-template +++ b/.github/workflows/q.rb.brew-formula-template @@ -5,8 +5,7 @@ class Q < Formula desc "Run SQL directly on CSV or TSV files" homepage "https://harelba.github.io/q/" # Using branch name for pre-releases, for tagged releases this would be the version tag, and not "version" part will be needed - url "https://github.com/harelba/q/archive/{{ .Q_BRANCH_NAME }}.tar.gz" - version "{{ .Q_VERSION }}" + url "https://github.com/harelba/q/archive/3.1.3.tar.gz" # Removed for now, until everything is finalized # sha256 "0844aed6658d0347a299b84bee978c88724d45093e8cbd7b05506ecc0b93c98c" diff --git a/QSQL-NOTES.md b/QSQL-NOTES.md index dc67d8ab..5f61f83a 100644 --- a/QSQL-NOTES.md +++ b/QSQL-NOTES.md @@ -1,57 +1,22 @@ -# New beta version 3.1.1-beta is available -Installation instructions [at the end of this document](QSQL-NOTES.md#installation-of-the-new-beta-release) - -Contains a lot of major changes, see sections below for details. - -## Basic Example of using the caching -```bash -# Prepare some data -$ seq 1 1000000 > myfile.csv - -# read from the resulting file (-c 1 just prevents the warning of having one column only) -$ time q -c 1 "select sum(c1),count(*) from myfile.csv" -500000500000 1000000 -q -c 1 "select sum(c1),count(*) from myfile.csv" 4.02s user 0.06s system 99% cpu 4.108 total - -# Running with `-C readwrite` auto-creates a cache file if there is none. The cache filename would be myfile.csv.qsql. The query runs as usual -$ time q -c 1 "select sum(c1),count(*) from myfile.csv" -C readwrite -time q -c 1 "select sum(c1),count(*) from myfile.csv" -C readwrite -500000500000 1000000 -q -c 1 "select sum(c1),count(*) from myfile.csv" -C readwrite 3.96s user 0.08s system 99% cpu 4.057 total - -# Now run with `-C read`. The query will run from the cache file and not the original. As the file gets bigger, the difference will be much more noticable -$ time q -c 1 "select sum(c1),count(*) from myfile.csv" -C read -500000500000 1000000 -q -c 1 "select sum(c1),count(*) from myfile.csv" -C read 0.17s user 0.05s system 94% cpu 0.229 total - -# Now let's try another query on that file. Notice the short query duration. The cache is being used for any query that uses this file, and queries on multiple files that contain caches will reuse the cache as well. -$ time q -c 1 "select avg(c1) from myfile.csv" -C read -500000.5 -q -c 1 "select avg(c1) from myfile.csv" -C read 0.16s user 0.05s system 99% cpu 0.217 total - -# You can also query the qsql file directly -$ time q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" -500000500000 1000000 -q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" 0.17s user 0.05s system 95% cpu 0.226 total - -# Now let's delete the original csv file -$ rm -vf myfile.csv - -# Running another query directly on the qsql file just works -$ q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" -500000500000 1000000 -q -c 1 "select sum(c1),count(*) from myfile.csv.qsql" 0.17s user 0.04s system 94% cpu 0.226 total - -# See the `.qrc` section below if you want to set the default `-C` (`--caching-mode`) to something other than `none` (the default) -``` +## Major changes and additions in the new `3.x` version +This is the list of new/changed functionality in this version. Large changes, please make sure to read the details if you're already using q. + +* **Automatic Immutable Caching** - Automatic caching of data files (into `.qsql` files), with huge speedups for medium/large files. Enabled through `-C readwrite` or `-C read` +* **Direct querying of standard sqlite databases** - Just use it as a table name in the query. Format is `select ... from :::`, or just `` if the database contains only one table. Multiple separate sqlite databases are fully supported in the same query. +* **Direct querying of the `qsql` cache files** - The user can query directly from the `qsql` files, removing the need for the original files. Just use `select ... from .qsql`. Please wait until the non-beta version is out before thinking about deleting any of your original files... +* **Revamped `.qrc` mechanism** - allows opting-in to caching without specifying it in every query. By default, caching is **disabled**, for backward compatibility and for finding usability issues. +* **Save-to-db is now reusable for queries** - `--save-db-to-disk` option (`-S`) has been enhanced to match the new capabilities. You can query the resulting file directly through q, using the method mentioned above (it's just a standard sqlite database). +* **Only python3 is supported from now on** - Shouldn't be an issue, since q is a self-contained binary executable which has its own python embedded in it. Internally, q is now packaged with Python 3.8. After everything cools down, I'll probably bump this to 3.9/3.10. +* **Minimal Linux Version Bumped** - Works with CentOS 8, Ubuntu 18.04+, Debian 10+. Currently only for x86_64. Depends on glibc version 2.25+. Haven't tested it on other architectures. Issuing other architectures will be possible later on +* **Completely revamped binary packaging** - Using [pyoxidizer](https://github.com/indygreg/PyOxidizer) -The following sections provide the details of each of the new functionality in this major version. +The following sections provide the details of each of the new functionalities in this major version. ## Automatic caching of data files Speeding up subsequent reads from the same file by several orders of magnitude by automatically creating an immutable cache file for each tabular text file. -For example, reading a 0.9GB file with 1M rows and 100 columns without caching takes ~50 seconds. When the cache exists, querying the same file will take less than 1 second. Obviously, the cache can be used in order to perform any query and not just the original query that was used for creating the cache. +For example, reading a 0.9GB file with 1M rows and 100 columns without caching takes ~50 seconds. When the cache exists, querying the same file will take around ~1-2 seconds. Obviously, the cache can be used in order to perform any query and not just the original query that was used for creating the cache. When caching is enabled, the cache is created on the first read of a file, and used automatically when reading it in other queries. A separate cache is being created for each file that is being used, allowing reuse in multiple use-cases. For example, if two csv files each have their own cache file from previous queries, then running a query that JOINs these two files would use the caches as well (without loading the data into memory), speeding it up considerably. @@ -139,15 +104,3 @@ Removed the dual py2/py3 support. Since q is packaged as a self-contained execut Users which for some reason still use q's main source code file directly and use python 2 would need to stay with the latest 2.0.19 release. In some next version, q's code structure is going to change significantly anyway in order to become a standard python module, so using the main source code file directly would not be possible. If you are such a user, and this decision hurts you considerably, please ping me. - - -# Installation of the new beta release -For now, only Linux RPM, DEB, Mac OSX and Windows are supported. Packages for additional Linux Distros will be added later (it should be rather easy now, due to the use of `fpm`). - -The beta OSX version is not in `brew` yet, you'll need to take the `macos-q` executable, put it in your filesystem and `chmod +x` it. - -Note: For some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. If you have any thoughts about this, please drop me a line. - -Download the relevant files directly from [The Beta Release Assets](https://github.com/harelba/q/releases/tag/v3.1.1-beta). - - diff --git a/README.markdown b/README.markdown index b0f20d25..aacdfb62 100644 --- a/README.markdown +++ b/README.markdown @@ -1,66 +1,50 @@ -[![Build Status](https://travis-ci.org/harelba/q.svg?branch=master)](https://travis-ci.org/harelba/q) +[![Build and Package](https://github.com/harelba/q/workflows/BuildAndPackage/badge.svg?branch=master)](https://github.com/harelba/q/actions?query=branch%3Amaster) -# q - Text as Data -q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files). - -q treats ordinary files as database tables, and supports all SQL constructs, such as `WHERE`, `GROUP BY`, `JOIN`s, etc. It supports automatic column name and type detection, and q provides full support for multiple character encodings. - -q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/) or [https://q.textasdata.wiki](https://q.textasdata.wiki) It contains everything you need to download and use q immediately. - -## New beta version `3.1.1-beta` is available -Full Details [here](QSQL-NOTES.md), and an example of the caching is in [here](QSQL-NOTES.md#basic-example-of-using-the-caching). - -This is the list of new/changed functionality in this version, large changes, please make sure to read it and the details link as well. - -* **Automatic Immutable Caching** - Automatic caching of data files (into `.qsql` files), with huge speedups for medium/large files. Enabled through `-C readwrite` or `-C read` -* **Direct querying of standard sqlite databases** - Just use it as a table name in the query. Format is `select ... from :::`, or just `` if the database contains only one table. Multiple separate sqlite databases are fully supported in the same query. -* **Direct querying of the `qsql` cache files** - The user can query directly from the `qsql` files, removing the need for the original files. Just use `select ... from .qsql`. Please wait until the non-beta version is out before thinking about deleting any of your original files... -* **Revamped `.qrc` mechanism** - allows opting-in to caching without specifying it in every query. By default, caching is **disabled**, for backward compatibility and for finding usability issues. -* **Save-to-db is now reusable for queries** - `--save-db-to-disk` option (`-S`) has been enhanced to match the new capabilities. You can query the resulting file directly through q, using the method mentioned above (it's just a standard sqlite database). -* **Only python3 is supported from now on** - Shouldn't be an issue, since q is a self-contained binary executable which has its own python embedded in it. Internally, q is now packaged with Python 3.8. After everything cools down, I'll probably bump this to 3.9/3.10. -* **Minimal Linux Version Bumped** - Works with CentOS 8, Ubuntu 18.04+, Debian 10+. Currently only for x86_64. Depends on glibc version 2.25+. Haven't tested it on other architectures. Issuing other architectures will be possible later on -Full details on the changes and the new usage is in [here](QSQL-NOTES.md) - -The version is still in early testing, for two reasons: - -* Completely new build and packaging flow - Using [pyoxidizer](https://github.com/indygreg/PyOxidizer) -* It's a very large change in functionality, which might surface issues, new and backward compatibility ones +# q - Text as Data +q's purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data. -**Please don't use it for production, until the final non-beta version is out** +q allows the following: -If you're testing it out, I'd be more than happy to get any feedback. Please write all your feedback in [this issue](https://github.com/harelba/q/issues/281), instead of opening separate issues. That would really help me with managing this. +* Performing SQL-like statements directly on tabular text data, auto-caching the data in order to accelerate additional querying on the same file. +* Performing SQL statements directly on multi-file sqlite3 databases, without having to merge them or load them into memory -## Installation. -**This will currently install the latest standard version `2.0.19`. See below if you want to download the `3.1.1-beta` version** +The following table shows the impact of using caching: -The current production version `2.0.19` installation is extremely simple. +| Rows | Columns | File Size | Query time without caching | Query time with caching | Speed Improvement | +|:---------:|:-------:|:---------:|:--------------------------:|:-----------------------:|:-----------------:| +| 5,000,000 | 100 | 4.8GB | 4 minutes, 47 seconds | 1.92 seconds | x149 | +| 1,000,000 | 100 | 983MB | 50.9 seconds | 0.461 seconds | x110 | +| 1,000,000 | 50 | 477MB | 27.1 seconds | 0.272 seconds | x99 | +| 100,000 | 100 | 99MB | 5.2 seconds | 0.141 seconds | x36 | +| 100,000 | 50 | 48MB | 2.7 seconds | 0.105 seconds | x25 | -Instructions for all OSs are [here](http://harelba.github.io/q/#installation). +Notice that for the current version, caching is **not enabled** by default, since the caches take disk space. Use `-C readwrite` or `-C read` to enable it for a query, or add `caching_mode` to `.qrc` to set a new default. + +q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/) or [https://q.textasdata.wiki](https://q.textasdata.wiki) It contains everything you need to download and use q immediately. -### Installation of the new beta release -For now, only Linux RPM, DEB, Mac OSX and Windows are supported. Packages for additional Linux Distros will be added later (it should be rather easy now, due to the use of `fpm`). -The beta OSX version is not in `brew` yet, you'll need to take the `macos-q` executable, put it in your filesystem and `chmod +x` it. +## Usage Examples +q treats ordinary files as database tables, and supports all SQL constructs, such as `WHERE`, `GROUP BY`, `JOIN`s, etc. It supports automatic column name and type detection, and provides full support for multiple character encodings. -Note: For some reason showing the q manual (`man q`) does not work for Debian, even though it's packaged in the DEB file. I'll get around to fixing it later. If you have any thoughts about this, please drop me a line. +Here are some example commands to get the idea: -Download the relevant files directly from [The Beta Release Assets](https://github.com/harelba/q/releases/tag/v3.1.1-beta). +```bash +$ q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" -## Examples +$ ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" +$ q "select count(*) from some_db.sqlite3:::albums a left join another_db.sqlite3:::tracks t on (a.album_id = t.album_id)" ``` -q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" -ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" -``` +Detailed examples are in [here](http://harelba.github.io/q/#examples) -Go [here](http://harelba.github.io/q/#examples) for more examples. +## Installation. +**New Major Version `3.1.3` is out with a lot of significant additions.** -## Benchmark -I have created a preliminary benchmark comparing q's speed between python2, python3, and comparing both to textql and octosql. +Instructions for all OSs are [here](http://harelba.github.io/q/#installation). -Your input about the validity of the benchmark and about the results would be greatly appreciated. More details are [here](test/BENCHMARK.md). +The previous version `2.0.19` Can still be downloaded from [here](https://github.com/harelba/q/releases/tag/2.0.19) ## Contact Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course. @@ -71,5 +55,5 @@ Twitter [@harelba](https://twitter.com/harelba) Email [harelba@gmail.com](mailto:harelba@gmail.com) -q on twitter: #qtextasdata +q on twitter: [#qtextasdata](https://twitter.com/hashtag/qtextasdata?src=hashtag_click) diff --git a/benchmark-config.sh b/benchmark-config.sh index 8606b926..b293d9a2 100644 --- a/benchmark-config.sh +++ b/benchmark-config.sh @@ -1,3 +1,3 @@ #!/bin/bash -BENCHMARK_PYTHON_VERSIONS=(3.6.4 3.8.5) +BENCHMARK_PYTHON_VERSIONS=(3.8.5) diff --git a/bin/q.py b/bin/q.py index 716e7831..f36cb8f0 100755 --- a/bin/q.py +++ b/bin/q.py @@ -35,7 +35,7 @@ from sqlite3.dbapi2 import OperationalError from uuid import uuid4 -q_version = '3.1.1-beta' +q_version = '3.1.3' #__all__ = [ 'QTextAsData' ] @@ -337,7 +337,7 @@ class Sqlite3DB(object): QCATALOG_TABLE_NAME = '_qcatalog' NUMERIC_COLUMN_TYPES = {int, long, float} - PYTHON_TO_SQLITE_TYPE_NAMES = { str: 'TEXT', int: 'INT', long : 'INT' , float: 'FLOAT', None: 'TEXT' } + PYTHON_TO_SQLITE_TYPE_NAMES = { str: 'TEXT', int: 'INT', long : 'INT' , float: 'REAL', None: 'TEXT' } def __str__(self): @@ -1025,6 +1025,9 @@ def determine_type_of_value_list(self, value_list): # return it return type_list_without_nulls[0] else: + # If there are only two types, one float an one int, then choose a float type + if len(set(type_list_without_nulls)) == 2 and float in type_list_without_nulls and int in type_list_without_nulls: + return float return str def do_analysis(self): @@ -1035,8 +1038,8 @@ def do_analysis(self): else: raise Exception('Unknown parsing mode %s' % self.mode) - if self.column_count == 1 and self.expected_column_count != 1: - print("Warning: column count is one - did you provide the correct delimiter?", file=sys.stderr) + if self.column_count == 1 and self.expected_column_count != 1 and self.expected_column_count is not None: + print(f"Warning: column count is one (expected column count is {self.expected_column_count} - did you provide the correct delimiter?", file=sys.stderr) self.infer_column_types() self.infer_column_names() diff --git a/mkdocs/docs/about.md b/mkdocs/docs/about.md index 49ec15bd..9da12940 100644 --- a/mkdocs/docs/about.md +++ b/mkdocs/docs/about.md @@ -6,5 +6,5 @@ ### Email [harelba@gmail.com](mailto:harelba@gmail.com) -### Chinese translation [jinzhencheng@outlook.com](jinzhencheng@outlook.com) +### Chinese translation [jinzhencheng@outlook.com](mailto:jinzhencheng@outlook.com) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 98e31c69..29ab2e7d 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -5,19 +5,56 @@ ## Overview -q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files). +q's purpose is to bring SQL expressive power to the Linux command line by providing easy access to text as actual data. -q treats ordinary files as database tables, and supports all SQL constructs, such as WHERE, GROUP BY, JOINs etc. It supports automatic column name and column type detection, and provides full support for multiple encodings. +q allows the following: -``` bash -q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3" -``` +* Performing SQL-like statements directly on tabular text data, auto-caching the data in order to accelerate additional querying on the same file +* Performing SQL statements directly on multi-file sqlite3 databases, without having to merge them or load them into memory -``` bash -ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" +The following table shows the impact of using caching: + +| Rows | Columns | File Size | Query time without caching | Query time with caching | Speed Improvement | +|:---------:|:-------:|:---------:|:--------------------------:|:-----------------------:|:-----------------:| +| 5,000,000 | 100 | 4.8GB | 4 minutes, 47 seconds | 1.92 seconds | x149 | +| 1,000,000 | 100 | 983MB | 50.9 seconds | 0.461 seconds | x110 | +| 1,000,000 | 50 | 477MB | 27.1 seconds | 0.272 seconds | x99 | +| 100,000 | 100 | 99MB | 5.2 seconds | 0.141 seconds | x36 | +| 100,000 | 50 | 48MB | 2.7 seconds | 0.105 seconds | x25 | + +Notice that for the current version, caching is **not enabled** by default, since the caches take disk space. Use `-C readwrite` or `-C read` to enable it for a query, or add `caching_mode` to `.qrc` to set a new default. + +q treats ordinary files as database tables, and supports all SQL constructs, such as `WHERE`, `GROUP BY`, `JOIN`s, etc. It supports automatic column name and type detection, and provides full support for multiple character encodings. + +The new features - autocaching, direct querying of sqlite database and the use of `~/.qrc` file are described in detail in [here](https://github.com/harelba/q/blob/master/QSQL-NOTES.md). + +## Basic Usage + +```bash + q + +Example Execution for a delimited file: + + q "select * from myfile.csv" + +Example Execution for an sqlite3 database: + + q "select * from mydatabase.sqlite:::my_table_name" + + or + + q "select * from mydatabase.sqlite" + + if the database file contains only one table + +Auto-caching of delimited files can be activated through `-C readwrite` +(writes new caches if needed) or `-C read` (only reads existing cache files) + +Setting the default caching mode (`-C`) can be done by +writing a `~/.qrc` file. See docs for more info. ``` -Look at some examples [here](#examples), or just download the tool using the links in the [installation](#installation) below and play with it. +Download the tool using the links in the [installation](#installation) below and play with it. | | | |:--------------------------------------:|:-----------------------------------------------:| @@ -32,99 +69,262 @@ Look at some examples [here](#examples), or just download the tool using the lin | Format | Instructions | Comments | :---|:---|:---| -|[OSX](https://github.com/harelba/q/releases/download/2.0.19/q-x86_64-Darwin)|run `brew install q`|man page is not available for this release yet. Use `q --help` for now|| -|[RPM Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data-2.0.19-1.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| -|[DEB Package](https://github.com/harelba/q/releases/download/2.0.19/q-text-as-data_2.0.19-2_amd64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`.| -|[Windows Installer](https://github.com/harelba/q/releases/download/2.0.19/q-AMD64-Windows-installer.exe)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new cmd window after the installation is done.| -|[tar.gz](https://github.com/harelba/q/archive/2.0.19.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| -|[zip](https://github.com/harelba/q/archive/2.0.19.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[OSX](https://github.com/harelba/q/releases/download/v3.1.3/macos-q)|`brew install` will install the previous `2.0.19` for now, until homebrew approves the new version. In the mean time, you can download the new version executable from the link, `chmod +x` it and then run. You might need to run it the first time from Finder using Right-Click -> Open, and then click the Open button. After the first time, it will run from the command line without any issues. |A man page is available, just run `man q`|| +|[RPM Package](https://github.com/harelba/q/releases/download/v3.1.3/q-text-as-data-3.1.3.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| +|[DEB Package](https://github.com/harelba/q/releases/download/v3.1.3/q-text-as-data-3.1.3-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| +|[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.3/q-text-as-data-3.1.3.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| +|[Source tar.gz](https://github.com/harelba/q/archive/refs/tags/v3.1.3.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[Source zip](https://github.com/harelba/q/archive/refs/tags/v3.1.3.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| -**Older versions can be downloaded [here](https://github.com/harelba/packages-for-q). Please let me know if you plan on using an older version, and why - I know of no reason to use any of them.** +I will add packages for additional Linux Distributions if there's demand for it. If you're interested in another Linux distribution, please ping me. It's relatively easy to add new ones with the new packaging flow. + +The previous version `2.0.19` can be downloaded directly from [here](https://github.com/harelba/q/releases/tag/2.0.19). Please let me know if for some reason the new version is not suitable for your needs, and you're planning on using the previous one. ## Requirements -As of version `2.0.9`, there's no need for any external dependency. Python itself (3.7), and any needed libraries are self-contained inside the installation, isolated from the rest of your system. +q is packaged as a compiled standalone-executable that has no dependencies, not even python itself. This was done by using the awesome [pyoxidizer](https://github.com/indygreg/PyOxidizer) project. -## Usage -``` bash -q "" +## Examples - Simplest execution is `q "SELECT * FROM myfile"` which prints the entire file. +This section shows example flows that highlight the main features. For more basic examples, see [here](#getting-started-examples). + +### Basic Examples: + +```bash +# Prepare some data +$ seq 1 1000000 > myfile.csv + +# Query it +$ q "select sum(c1),count(*) from myfile.csv where c1 % 3 = 0" +166666833333 333333 + +# Use q to query from stdin +$ ps -ef | q -b -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3" +501 288 +0 115 +270 17 ``` -q allows performing SQL-like statements on tabular text data. Its purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data. +### Auto-caching Examples -Query should be an SQL-like query which contains *filenames instead of table names* (or - for stdin). The query itself should be provided as one parameter to the tool (i.e. enclosed in quotes). Multiple files can be used as one table by either writing them as `filename1+filename2+...` or by using shell wildcards (e.g. `my_files*.csv`). +```bash +# (time command output has been shortened for berevity) -Use `-H` to signify that the input contains a header line. Column names will be detected automatically in that case, and can be used in the query. If this option is not provided, columns will be named cX, starting with 1 (e.g. `q "SELECT c3,c8 from ..."`). +# Prepare some data +$ seq 1 1000000 > myfile.csv -Use `-d` to specify the input delimiter. +# Read from the resulting file +$ time q "select sum(c1),count(*) from myfile.csv" +500000500000 1000000 +total_time=4.108 seconds -Column types are auto detected by the tool, no casting is needed. Note that there's a flag `--as-text` which forces all columns to be treated as text columns. +# Running with `-C readwrite` auto-creates a cache file if there is none. The cache filename would be myfile.csv.qsql. The query runs as usual +$ time q "select sum(c1),count(*) from myfile.csv" -C readwrite +500000500000 1000000 +total_time=4.057 seconds -Please note that column names that include spaces need to be used in the query with back-ticks, as per the sqlite standard. +# Now run with `-C read`. The query will run from the cache file and not the original. As the file gets bigger, the difference will be much more noticable +$ time q "select sum(c1),count(*) from myfile.csv" -C read +500000500000 1000000 +total_time=0.229 seconds -Query/Input/Output encodings are fully supported (and q tries to provide out-of-the-box usability in that area). Please use `-e`,`-E` and `-Q` to control encoding if needed. +# Now let's try another query on that file. Notice the short query duration. The cache is being used for any query that uses this file, and queries on multiple files that contain caches will reuse the cache as well. +$ time q "select avg(c1) from myfile.csv" -C read +500000.5 +total_time=0.217 seconds -All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table). Take a look at the [limitations](#limitations) section below for some rarely-used use cases which are not fully supported. +# You can also query the qsql file directly, as it's just a standard sqlite3 DB file (see next section for q's support of reading directly from sqlite DBs) +$ time q "select sum(c1),count(*) from myfile.csv.qsql" +500000500000 1000000 +total_time=0.226 seconds -### Query -Each parameter that q gets is a full SQL query. All queries are executed one after another, outputing the results to standard output. Note that data loading is done only once, so when passing multiple queries on the same command-line, only the first one will take a long time. The rest will starting running almost instantanously, since all the data will already have been loaded. Remeber to double-quote each of the queries - Each parameter is a full SQL query. +# Now let's delete the original csv file (be careful when deleting original data) +$ rm -vf myfile.csv -Any standard SQL expression, condition (both WHERE and HAVING), GROUP BY, ORDER BY etc. are allowed. +# Running another query directly on the qsql file just works +$ time q "select sum(c1),count(*) from myfile.csv.qsql" +500000500000 1000000 +total_time=0.226 seconds -JOINs are supported and Subqueries are supported in the WHERE clause, but unfortunately not in the FROM clause for now. Use table aliases when performing JOINs. +# See the `.qrc` section below if you want to set the default `-C` (`--caching-mode`) to something other than `none` (the default) +``` -The SQL syntax itself is sqlite's syntax. For details look at http://www.sqlite.org/lang.html or search the net for examples. +### Direct sqlite Querying Examples + +```bash +# Download example sqlite3 database from https://www.sqlitetutorial.net/sqlite-sample-database/ and unzip it. The resulting file will be chinook.db +$ curl -L https://www.sqlitetutorial.net/wp-content/uploads/2018/03/chinook.zip | tar -xvf - + +# Now we can query the database directly, specifying the name of the table in the query (:::) +$ q "select count(*) from chinook.db:::albums" +347 + +# Let's take the top 5 longest tracks of album id 34. The -b option just beautifies the output, and -O tells q to output the column names as headers +$ q "select * from chinook.db:::tracks where albumid = '34' order by milliseconds desc limit 5" -b -O +TrackId Name AlbumId MediaTypeId GenreId Composer Milliseconds Bytes UnitPrice +407 "Só Tinha De Ser Com Você" 34 1 7 Vários 389642 13085596 0.99 +398 "Only A Dream In Rio" 34 1 7 Vários 371356 12192989 0.99 +393 "Tarde Em Itapoã" 34 1 7 Vários 313704 10344491 0.99 +401 "Momentos Que Marcam" 34 1 7 Vários 280137 9313740 0.99 +391 "Garota De Ipanema" 34 1 7 Vários 279536 9141343 0.99 + +# Let's now copy the chinook database to another file, as if it's just another different database +$ cp chinook.db another_db.db + +# Now we can run a join query between the two databases. They could have been any two different databases, using the copy of chinook is just for simplicity +# Let's get the top-5 longest albums, using albums from the first database and tracks from the second database. The track times are converted to seconds, and rounded to two digits after the decimal point. +$ q -b -O "select a.title,round(sum(t.milliseconds)/1000.0/60,2) total_album_time_seconds from chinook.db:::albums a left join another_database.db:::tracks t on (a.albumid = t.albumid) group by a.albumid order by total_album_time_seconds desc limit 5" +Title total_album_time_seconds +"Lost, Season 3" 1177.76 +"Battlestar Galactica (Classic), Season 1" 1170.23 +"Lost, Season 1" 1080.92 +"Lost, Season 2" 1054.83 +"Heroes, Season 1" 996.34 +``` -NOTE: Full type detection is implemented, so there is no need for any casting or anything. +### Analysis Examples + +```bash +# Let's create a simple CSV file without a header. Make sure to copy only the three lines, press enter, and +# then press Ctrl-D to exit so the file will be written. +$ cat > some-data-without-header.csv +harel,1,2 +ben,3,4 +attia,5,6 + + +# Let's run q on it with -A, to see the detected structure of the file. `-d ,` sets the delimiter to a comma +$ q -d , "select * from some-data-without-header.csv" -A +Table: /Users/harelben-attia/dev/harelba/q/some-data-without-header.csv + Sources: + source_type: file source: /Users/harelben-attia/dev/harelba/q/some-data-without-header.csv + Fields: + `c1` - text + `c2` - int + `c3` - int + +# Now let's create another simple CSV file, this time with a header (-H tells q to expect a header in the file) +$ cat > some-data.csv +planet_id,name,diameter_km,length_of_day_hours +1000,Earth,12756,24 +2000,Mars,6792,24.7 +3000,Jupiter,142984,9.9 + + +# Let's run q with -A to see the analysis results. +$ q -b -O -H -d , "select * from some-data.csv" -A +Table: /Users/harelben-attia/dev/harelba/q/some-data.csv + Sources: + source_type: file source: /Users/harelben-attia/dev/harelba/q/some-data.csv + Fields: + `planet_id` - int + `name` - text + `diameter_km` - int + `length_of_day_hours` - real + +# Let's run it with `-C readwrite` so a cache will be created +$ q -b -O -H -d , "select * from some-data.csv" -C readwrite +planet_id,name ,diameter_km,length_of_day_hours +1000 ,Earth ,12756 ,24.0 +2000 ,Mars ,6792 ,24.7 +3000 ,Jupiter,142984 ,9.9 + +# Running another query that uses some-data.csv with -A will now show that a qsql exists for that file. The source-type +# will be "file-with-unused-qsql". The qsql cache is not being used, since by default, q does not activate caching +# so backward compatibility is maintained +$ q -b -O -H -d , "select * from some-data.csv" -A +Table: /Users/harelben-attia/dev/harelba/q/some-data.csv + Sources: + source_type: file-with-unused-qsql source: /Users/harelben-attia/dev/harelba/q/some-data.csv + Fields: + `planet_id` - int + `name` - text + `diameter_km` - int + `length_of_day_hours` - real + +# Now let's run another query, this time with `-C read`, telling q to use the qsql caches. This time source-type will +# be "qsql-file-with-original", and the cache will be used when querying: +$ q -b -O -H -d , "select * from some-data.csv" -A -C read +Table: /Users/harelben-attia/dev/harelba/q/some-data.csv + Sources: + source_type: qsql-file-with-original source: /Users/harelben-attia/dev/harelba/q/some-data.csv.qsql + Fields: + `planet_id` - int + `name` - text + `diameter_km` - int + `length_of_day_hours` - real + +# Let's now read directly from the qsql file. Notice the change in the table name inside the query. `-C read` is not needed +# here. The source-type will be "qsql-file" +$ q -b -O -H -d , "select * from some-data.csv.qsql" -A +Table: /Users/harelben-attia/dev/harelba/q/some-data.csv.qsql + Sources: + source_type: qsql-file source: /Users/harelben-attia/dev/harelba/q/some-data.csv.qsql + Fields: + `planet_id` - int + `name` - text + `diameter_km` - int + `length_of_day_hours` - real +``` -NOTE2: When using the `-O` output header option, use column name aliases if you want to control the output column names. For example, `q -O -H "select count(*) cnt,sum(*) as mysum from -"` would output `cnt` and `mysum` as the output header column names. +### Usage +Query should be an SQL-like query which contains filenames instead of table names (or - for stdin). The query itself should be provided as one parameter to the tool (i.e. enclosed in quotes). -### Flags +All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table). Take a look at the [limitations](#limitations) section below for some rarely-used use cases which are not fully supported. -``` bash -Usage: - q allows performing SQL-like statements on tabular text data. +q gets a full SQL query as a parameter. Remember to double-quote the query. - Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line. +Historically, q supports multiple queries on the same command-line, loading each data file only once, even if it is used by multiple queries on the same q invocation. This is still supported. However, due to the new automatic-caching capabilities, this is not really required. Activate caching, and a cache file will be automatically created for each file. q Will use the cache behind the scenes in order to speed up queries. The speed up is extremely significant, so consider using caching for large files. - Basic usage is q "" where table names are just regular file names (Use - to read from standard input) - When the input contains a header row, use -H, and column names will be set according to the header row content. If there isn't a header row, then columns will automatically be named c1..cN. +The following filename types are supported: - Column types are detected automatically. Use -A in order to see the column name/type analysis. +* **Delimited-file filenames** - including relative/absolute paths. E.g. `./my_folder/my_file.csv` or `/var/tmp/my_file.csv` +* **sqlite3 database filenames** + * **With Multiple Tables** - Add an additional `:::` for accessing a specific table. For example `mydatabase.sqlite3:::users_table`. + * **With One Table Only** - Just specify the database filename, no need for a table name postfix. For example `my_single_table_database.sqlite`. +* **`.qsql` cache files** - q can auto-generate cache files for delimited files, and they can be queried directly as a table, since they contain only one table, as they are essentially standard sqlite datbases - Delimiter can be set using the -d (or -t) option. Output delimiter can be set using -D +Use `-H` to signify that the input contains a header line. Column names will be detected automatically in that case, and can be used in the query. If this option is not provided, columns will be named cX, starting with 1 (e.g. `q "SELECT c3,c8 from ..."`). - All sqlite3 SQL constructs are supported. +Use `-d` to specify the input delimiter. - Examples: +Column types are auto detected by the tool, no casting is needed. Note that there's a flag `--as-text` which forces all columns to be treated as text columns. - Example 1: ls -ltrd * | q "select c1,count(1) from - group by c1" - This example would print a count of each unique permission string in the current folder. +Please note that column names that include spaces need to be used in the query with back-ticks, as per the sqlite standard. Make sure to use single-quotes around the query, so bash/zsh won't interpret the backticks. - Example 2: seq 1 1000 | q "select avg(c1),sum(c1) from -" - This example would provide the average and the sum of the numbers in the range 1 to 1000 +Query/Input/Output encodings are fully supported (and q tries to provide out-of-the-box usability in that area). Please use `-e`,`-E` and `-Q` to control encoding if needed. - Example 3: sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc" - This example will output the total size in MB per user+group in the /tmp subtree +JOINs are supported and Subqueries are supported in the WHERE clause, but unfortunately not in the FROM clause for now. Use table aliases when performing JOINs. +The SQL syntax itself is sqlite's syntax. For details look at http://www.sqlite.org/lang.html or search the net for examples. - See the help or https://github.com/harelba/q/ for more details. - +NOTE: When using the `-O` output header option, use column name aliases if you want to control the output column names. For example, `q -O -H "select count(*) cnt,sum(*) as mysum from -"` would output `cnt` and `mysum` as the output header column names. +``` bash Options: -h, --help show this help message and exit -v, --version Print version -V, --verbose Print debug info in case of problems -S SAVE_DB_TO_DISK_FILENAME, --save-db-to-disk=SAVE_DB_TO_DISK_FILENAME Save database to an sqlite database file - --save-db-to-disk-method=SAVE_DB_TO_DISK_METHOD - Method to use to save db to disk. 'standard' does not - require any deps, 'fast' currenty requires manually - running `pip install sqlitebck` on your python - installation. Once packing issues are solved, the fast - method will be the default. + -C CACHING_MODE, --caching-mode=CACHING_MODE + Choose the autocaching mode (none/read/readwrite). + Autocaches files to disk db so further queries will be + faster. Caching is done to a side-file with the same + name of the table, but with an added extension .qsql + --dump-defaults Dump all default values for parameters and exit. Can + be used in order to make sure .qrc file content is + being read properly. + --max-attached-sqlite-databases=MAX_ATTACHED_SQLITE_DATABASES + Set the maximum number of concurrently-attached sqlite + dbs. This is a compile time definition of sqlite. q's + performance will slow down once this limit is reached + for a query, since it will perform table copies in + order to avoid that limit. + --overwrite-qsql=OVERWRITE_QSQL + When used, qsql files (both caches and store-to-db) + will be overwritten if they already exist. Use with + care. Input Data Options: -H, --skip-header Skip header row. This has been changed from earlier @@ -227,14 +427,32 @@ Options: feedback on this ``` -## Examples -The `-H` flag in the examples below signifies that the file has a header row which is used for naming columns. +### Setting the default values for parameters +It's possible to set default values for parameters which are used often by configuring them in the file `~/.qrc`. + +The file format is as follows: +```bash +[options] += +``` -The `-t` flag is just a shortcut for saying that the file is a tab-separated file (any delimiter is supported - Use the `-d` flag). +It's possible to generate a default `.qrc` file by running `q --dump-defaults` and write the output into the `.qrc` file. -Queries are given using upper case for clarity, but actual query keywords such as SELECT and WHERE are not really case sensitive. +One valuable use-case for this could be setting the caching-mode to `read`. This will make q automatically use generated `.qsql` cache files if they exist. Whenever you want a cache file to be generated, just use `-C readwrite` and a `.qsql` file will be generated if it doesn't exist. -Example List: + +## Getting Started Examples +This section shows some more basic examples of simple SQL constructs. + +For some more complex use-cases, see the [examples](#examples) at the beginning of the documentation. + +NOTES: + +* The `-H` flag in the examples below signifies that the file has a header row which is used for naming columns. +* The `-t` flag is just a shortcut for saying that the file is a tab-separated file (any delimiter is supported - Use the `-d` flag). +* Queries are given using upper case for clarity, but actual query keywords such as SELECT and WHERE are not really case sensitive. + +Basic Example List: * [Example 1 - COUNT DISTINCT values of specific field (uuid of clicks data)](#example-1) * [Example 2 - Filter numeric data, controlling ORDERing and LIMITing output](#example-2) @@ -345,24 +563,32 @@ You can see that the ppp filename appears twice, each time matched to one of the Column name detection is supported for JOIN scenarios as well. Just specify `-H` in the command line and make sure that the source files contain the header rows. ## Implementation -The current implementation is written in Python using an in-memory database, in order to prevent the need for external dependencies. The implementation itself supports SELECT statements, including JOINs (Subqueries are supported only in the WHERE clause for now). If you want to do further analysis on the data, you can use the `--save-db-to-disk` option to write the resulting tables to an sqlite database file, and then use `seqlite3` in order to perform queries on the data separately from q itself. +Behind the scenes q creates a "virtual" sqlite3 database that does not contain data of its own, but attaches to multiple other databases as follows: + +* When reading delimited files or data from `stdin`, it will analyze the data and construct an in-memory "adhoc database" that contains it. This adhoc database will be attached to the virtual database +* When a delimited file has a `.qsql` cache, it will attach to that file directly, without having to read it into memory +* When querying a standard sqlite3 file, it will be attached to the virtual database to it as well, without reading it into memory. sqlite3 files are auto-detected, no need for any special filename extension + +The user query will be executed directly on the virtual database, using the attached databases. -Please note that there is currently no checks and bounds on data size - It's up to the user to make sure things don't get too big. +sqlite3 itself has a limit on the number of attached databases (usually 10). If that limit is reached, q will automatically attach databases until that limit is reached, and will load additional tables into the adhoc database's in-memory database. Please make sure to read the [limitations](#limitations) section as well. ## Development ### Tests -The code includes a test suite runnable through `test/test-all`. If you're planning on sending a pull request, I'd appreciate if you could make sure that it doesn't fail. +The code includes a test suite runnable through `run-tests.sh`. By default, it uses the python source code for running the tests. However, it is possible to provide a path to an actual executable to the tests using the `Q_EXECUTABLE` env var. This is actually being used during the build and packaging process, in order to test the resulting binary. ## Limitations Here's the list of known limitations. Please contact me if you have a use case that needs any of those missing capabilities. +* Common Table Expressions (CTE) are not supported for now. Will be implemented soon - See [here](https://github.com/harelba/q/issues/67) and [here](https://github.com/harelba/q/issues/124) for details. * `FROM ` is not supported -* Common Table Expressions (CTE) are not supported * Spaces in file names are not supported. Use stdin for piping the data into q, or rename the file * Some rare cases of subqueries are not supported yet. +* Queries with more than 10 different sqlite3 databases will load some data into memory +* up to 500 tables are supported in a single query ## Rationale Have you ever stared at a text file on the screen, hoping it would have been a database so you could ask anything you want about it? I had that feeling many times, and I've finally understood that it's not the database that I want. It's the language - SQL. @@ -381,7 +607,6 @@ This tool has been designed with general Linux/Unix design principles in mind. I ## Future -* Expose python as a python module - Mostly implemented. Requires some internal API changes with regard to handling stdin before exposing it. -* Allow to use a distributed backend for scaling the computations +* Expose python as a python module - Planned as a goal after the new version `3.x` is out diff --git a/pyoxidizer.bzl b/pyoxidizer.bzl index 6ccf0dbe..22ef5bfb 100644 --- a/pyoxidizer.bzl +++ b/pyoxidizer.bzl @@ -61,7 +61,7 @@ def make_msi(exe): # The name of your application. "q-text-as-data", # The version of your application. - "3.1.1", + "3.1.3", # The author/manufacturer of your application. "Harel Ben-Attia" ) diff --git a/setup.py b/setup.py index d0c5d483..10b1e9e7 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup import setuptools -q_version = '3.1.1-beta' +q_version = '3.1.3' with open("README.markdown", "r", encoding="utf-8") as fh: long_description = fh.read() diff --git a/test/test_suite.py b/test/test_suite.py index f722e8f9..8ec21abf 100755 --- a/test/test_suite.py +++ b/test/test_suite.py @@ -1243,11 +1243,10 @@ def test_basic_aggregation(self): 'seq 1 10 | ' + Q_EXECUTABLE + ' "select sum(c1),avg(c1) from -"') self.assertTrue(retcode == 0) self.assertTrue(len(o) == 1) - self.assertTrue(len(e) == 1) + self.assertTrue(len(e) == 0) s = sum(range(1, 11)) self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0))) - self.assertTrue(one_column_warning(e)) def test_select_one_column(self): tmpfile = self.create_file_with_data(sample_data_no_header) @@ -2180,11 +2179,10 @@ def test_gzipped_file(self): retcode, o, e = run_command(cmd) self.assertTrue(retcode == 0) self.assertTrue(len(o) == 1) - self.assertTrue(len(e) == 1) + self.assertTrue(len(e) == 0) s = sum(range(1, 11)) self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0))) - self.assertTrue(one_column_warning(e)) self.cleanup(tmpfile) @@ -2199,12 +2197,10 @@ def test_delimition_mistake_with_header(self): self.assertNotEqual(retcode, 0) self.assertEqual(len(o), 0) - self.assertEqual(len(e), 3) + self.assertEqual(len(e), 2) - self.assertTrue(e[0].startswith( - six.b("Warning: column count is one - did you provide the correct delimiter"))) - self.assertTrue(e[1].startswith(six.b("Bad header row"))) - self.assertTrue(six.b("Column name cannot contain commas") in e[2]) + self.assertTrue(e[0].startswith(six.b("Bad header row"))) + self.assertTrue(six.b("Column name cannot contain commas") in e[1]) self.cleanup(tmpfile) @@ -2405,6 +2401,46 @@ def test_column_analysis(self): self.cleanup(tmpfile) + def test_column_analysis_with_mixed_ints_and_floats(self): + tmpfile = self.create_file_with_data(six.b("""planet_id,name,diameter_km,length_of_day_hours\n1000,Earth,12756,24\n2000,Mars,6792,24.7\n3000,Jupiter,142984,9.9""")) + + cmd = Q_EXECUTABLE + ' -d , -H "select * from %s" -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),8) + self.assertEqual(len(e),0) + self.assertEqual(o[0], six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s' % tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4], six.b(' `planet_id` - int')) + self.assertEqual(o[5], six.b(' `name` - text')) + self.assertEqual(o[6], six.b(' `diameter_km` - int')) + self.assertEqual(o[7], six.b(' `length_of_day_hours` - real')) + + self.cleanup(tmpfile) + + def test_column_analysis_with_mixed_ints_and_floats_and_nulls(self): + tmpfile = self.create_file_with_data(six.b("""planet_id,name,diameter_km,length_of_day_hours\n1000,Earth,12756,24\n2000,Mars,6792,24.7\n2500,Venus,,\n3000,Jupiter,142984,9.9""")) + + cmd = Q_EXECUTABLE + ' -d , -H "select * from %s" -A' % tmpfile.name + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode, 0) + self.assertEqual(len(o),8) + self.assertEqual(len(e),0) + self.assertEqual(o[0], six.b('Table: %s' % tmpfile.name)) + self.assertEqual(o[1],six.b(' Sources:')) + self.assertEqual(o[2],six.b(' source_type: file source: %s' % tmpfile.name)) + self.assertEqual(o[3],six.b(' Fields:')) + self.assertEqual(o[4], six.b(' `planet_id` - int')) + self.assertEqual(o[5], six.b(' `name` - text')) + self.assertEqual(o[6], six.b(' `diameter_km` - int')) + self.assertEqual(o[7], six.b(' `length_of_day_hours` - real')) + + self.cleanup(tmpfile) + def test_column_analysis_no_header(self): tmpfile = self.create_file_with_data(sample_data_no_header) @@ -2806,11 +2842,11 @@ def test_input_field_quoting_and_data_types_with_encoding(self): self.assertEqual(o[2],six.b(' source_type: file source: %s' % tmp_data_file.name)) self.assertEqual(o[3],six.b(' Fields:')) self.assertEqual(o[4],six.b(' `c1` - int')) - self.assertEqual(o[5],six.b(' `c2` - float')) + self.assertEqual(o[5],six.b(' `c2` - real')) self.assertEqual(o[6],six.b(' `c3` - text')) self.assertEqual(o[7],six.b(' `c4` - text')) self.assertEqual(o[8],six.b(' `c5` - text')) - self.assertEqual(o[9],six.b(' `c6` - float')) + self.assertEqual(o[9],six.b(' `c6` - real')) self.cleanup(tmp_data_file) @@ -4618,10 +4654,9 @@ def test_1_column_warning_in_relaxed_mode(self): retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) - self.assertEqual(len(e), 1) + self.assertEqual(len(e), 0) self.assertEqual(len(o),2) - self.assertEqual(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) self.assertEqual(o[0],six.b('data without commas 1')) self.assertEqual(o[1],six.b('data without commas 2')) @@ -4946,7 +4981,7 @@ def test_disable_column_type_detection(self): self.assertEqual(o[4], six.b(' `regular_text` - text')) self.assertEqual(o[5], six.b(' `text_with_digits1` - int')) self.assertEqual(o[6], six.b(' `text_with_digits2` - int')) - self.assertEqual(o[7], six.b(' `float_number` - float')) + self.assertEqual(o[7], six.b(' `float_number` - real')) # Check column types detected when actual detection is disabled cmd = Q_EXECUTABLE + ' -A -d , -H --as-text "select * from %s"' % (tmpfile.name) @@ -5439,7 +5474,7 @@ def test_load_data_from_string_without_previous_data_load(self): table_structure = metadata.table_structures['my_data'] self.assertEqual(table_structure.column_names,['column1','column2','column3']) - self.assertEqual(table_structure.sqlite_column_types,['text','float','text']) + self.assertEqual(table_structure.sqlite_column_types,['text','real','text']) self.assertEqual(table_structure.python_column_types,[str,float,str]) self.assertEqual(table_structure.qtable_name, 'my_data') self.assertEqual(table_structure.source_type, 'data-stream') @@ -5475,7 +5510,7 @@ def test_load_data_from_string_with_previous_data_load(self): table_structure = metadata.table_structures['my_data'] self.assertEqual(table_structure.column_names,['column1','column2','column3']) - self.assertEqual(table_structure.sqlite_column_types,['text','float','text']) + self.assertEqual(table_structure.sqlite_column_types,['text','real','text']) self.assertEqual(table_structure.python_column_types,[str,float,str]) self.assertEqual(table_structure.qtable_name, 'my_data') From 908bac920a0635c65f4228246f64878c189e5367 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 26 Nov 2021 15:23:06 +0200 Subject: [PATCH 083/111] Forgot to fix one of the tests. I knew I should test it on the PR before merging... sorry. --- test/test_suite.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_suite.py b/test/test_suite.py index 8ec21abf..fa8f2ea5 100755 --- a/test/test_suite.py +++ b/test/test_suite.py @@ -4668,10 +4668,9 @@ def test_1_column_warning_in_strict_mode(self): retcode, o, e = run_command(cmd) self.assertEqual(retcode, 0) - self.assertEqual(len(e), 1) + self.assertEqual(len(e), 0) self.assertEqual(len(o),2) - self.assertEqual(e[0],six.b("Warning: column count is one - did you provide the correct delimiter?")) self.assertEqual(o[0],six.b('data without commas 1')) self.assertEqual(o[1],six.b('data without commas 2')) From 62c8b10ca88f08f6831721f3be91e7826bce64ac Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 26 Nov 2021 15:34:03 +0200 Subject: [PATCH 084/111] reactivate package release + tag new version 3.1.3 --- .github/workflows/build-and-package.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 08970eec..1f162856 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -528,10 +528,7 @@ jobs: perform-release: needs: [test-mac-packaging, test-deb-packaging, test-rpm-packaging, test-windows-packaging] runs-on: ubuntu-latest - # Disabled on purpose for now - Changing the beta release to a real one will be done manually until everything stabilizes - # and then this will be reinstated - # if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - if: ${{ false }} + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} steps: - name: Download All Artifacts uses: actions/download-artifact@v2 From 12286ad001ee65642e6e3eb5adef774f27142a61 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 26 Nov 2021 16:24:42 +0200 Subject: [PATCH 085/111] fix q brew formula, so it's fully tested (can't test on local machine) --- .github/workflows/q.rb.brew-formula-template | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/q.rb.brew-formula-template b/.github/workflows/q.rb.brew-formula-template index c860a0ce..09116e8f 100644 --- a/.github/workflows/q.rb.brew-formula-template +++ b/.github/workflows/q.rb.brew-formula-template @@ -4,11 +4,9 @@ class Q < Formula desc "Run SQL directly on CSV or TSV files" homepage "https://harelba.github.io/q/" - # Using branch name for pre-releases, for tagged releases this would be the version tag, and not "version" part will be needed - url "https://github.com/harelba/q/archive/3.1.3.tar.gz" + url "https://github.com/harelba/q/archive/v3.1.3.tar.gz" - # Removed for now, until everything is finalized - # sha256 "0844aed6658d0347a299b84bee978c88724d45093e8cbd7b05506ecc0b93c98c" + sha256 "a77f8fe0e35d0c24f06493d2cc9beceb1ef9ded5b9bb44b85ef2aa11eadfe9ba" license "GPL-3.0-or-later" revision 1 From a125eacc9a0ba6bd224fceafc57343cc455dc9b1 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 26 Nov 2021 16:46:51 +0200 Subject: [PATCH 086/111] bump to v3.1.4, no mac sha yet in formula --- .github/workflows/build-and-package.yaml | 51 +++++++++----------- .github/workflows/q.rb.brew-formula-template | 4 +- README.markdown | 6 ++- bin/q.py | 2 +- mkdocs/docs/about.md | 3 ++ mkdocs/docs/index.md | 12 ++--- pyoxidizer.bzl | 2 +- setup.py | 2 +- 8 files changed, 42 insertions(+), 40 deletions(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 1f162856..bad9b885 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -2,11 +2,8 @@ name: BuildAndPackage on: push: - branches: master - paths-ignore: - - "*.md" - - "*.markdown" - - "mkdocs/**/*" + tags: + - "v*" # Remove comment in order to pre-release on a PR, to validate packaging flow pull_request: branches: master @@ -139,12 +136,12 @@ jobs: gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.3-1.x86_64.deb --version 3.1.3 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.4-1.x86_64.deb --version 3.1.4 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload DEB Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.3-1.x86_64.deb - path: packages/linux/q-text-as-data-3.1.3-1.x86_64.deb + name: q-text-as-data-3.1.4-1.x86_64.deb + path: packages/linux/q-text-as-data-3.1.4-1.x86_64.deb test-deb-packaging: runs-on: ubuntu-18.04 @@ -155,7 +152,7 @@ jobs: - name: Download DEB uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.3-1.x86_64.deb + name: q-text-as-data-3.1.4-1.x86_64.deb - name: Install Python for Testing uses: actions/setup-python@v2 with: @@ -167,7 +164,7 @@ jobs: pip3 install -r test-requirements.txt - name: Test DEB Package Installation - run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.3-1.x86_64.deb + run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.4-1.x86_64.deb package-linux-rpm: needs: [test-linux, create-man] @@ -199,12 +196,12 @@ jobs: gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.3.x86_64.rpm --version 3.1.3 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.4.x86_64.rpm --version 3.1.4 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload RPM Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.3.x86_64.rpm - path: packages/linux/q-text-as-data-3.1.3.x86_64.rpm + name: q-text-as-data-3.1.4.x86_64.rpm + path: packages/linux/q-text-as-data-3.1.4.x86_64.rpm test-rpm-packaging: runs-on: ubuntu-18.04 @@ -215,9 +212,9 @@ jobs: - name: Download RPM uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.3.x86_64.rpm + name: q-text-as-data-3.1.4.x86_64.rpm - name: Retest using RPM - run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.3.x86_64.rpm + run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.4.x86_64.rpm build-mac: runs-on: macos-11 @@ -308,7 +305,7 @@ jobs: export BRANCH_NAME=master # TODO temp, since template rendering action doesn't work in mac - cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.3/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb + cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.4/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb echo "Resulting formula:" cat ./brew/q.rb @@ -322,8 +319,8 @@ jobs: - name: Upload Executable uses: actions/upload-artifact@v1.0.0 with: - name: q--3.1.3_1.big_sur.bottle.tar.gz - path: ./q--3.1.3_1.big_sur.bottle.tar.gz + name: q--3.1.4_1.big_sur.bottle.tar.gz + path: ./q--3.1.4_1.big_sur.bottle.tar.gz # TODO auto-create PR to main homebrew-core # git clone https://github.com/harelba/homebrew-core.git @@ -340,7 +337,7 @@ jobs: - name: Download q bottle uses: actions/download-artifact@v2 with: - name: q--3.1.3_1.big_sur.bottle.tar.gz + name: q--3.1.4_1.big_sur.bottle.tar.gz - name: Test the created bottle run: | set -x -e @@ -349,7 +346,7 @@ jobs: WD=$(pwd) pushd /usr/local/Cellar - tar xvfz ${WD}/q--3.1.3_1.big_sur.bottle.tar.gz + tar xvfz ${WD}/q--3.1.4_1.big_sur.bottle.tar.gz popd brew link q @@ -459,17 +456,17 @@ jobs: # TODO Windows versions do not support the -beta postfix - export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.3.msi + export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.4.msi chmod 755 $Q_MSI mkdir -p packages/windows/ - cp $Q_MSI packages/windows/q-text-as-data-3.1.3.msi + cp $Q_MSI packages/windows/q-text-as-data-3.1.4.msi - name: Upload Windows MSI uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.3.msi - path: packages/windows/q-text-as-data-3.1.3.msi + name: q-text-as-data-3.1.4.msi + path: packages/windows/q-text-as-data-3.1.4.msi test-windows-packaging: needs: package-windows @@ -480,12 +477,12 @@ jobs: - name: Download Windows Package uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.3.msi + name: q-text-as-data-3.1.4.msi - name: Test Install of MSI continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.3.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.4.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait $process.ExitCode gc msi-install.log @@ -494,7 +491,7 @@ jobs: continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.3.msi /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.4.msi /norestart /quiet" -PassThru -Wait $process.ExitCode exit $process.ExitCode diff --git a/.github/workflows/q.rb.brew-formula-template b/.github/workflows/q.rb.brew-formula-template index 09116e8f..52be2deb 100644 --- a/.github/workflows/q.rb.brew-formula-template +++ b/.github/workflows/q.rb.brew-formula-template @@ -4,9 +4,9 @@ class Q < Formula desc "Run SQL directly on CSV or TSV files" homepage "https://harelba.github.io/q/" - url "https://github.com/harelba/q/archive/v3.1.3.tar.gz" + url "https://github.com/harelba/q/archive/v3.1.4.tar.gz" - sha256 "a77f8fe0e35d0c24f06493d2cc9beceb1ef9ded5b9bb44b85ef2aa11eadfe9ba" + sha256 "RLRL-UNKNOWN-YET" license "GPL-3.0-or-later" revision 1 diff --git a/README.markdown b/README.markdown index aacdfb62..3d7828e6 100644 --- a/README.markdown +++ b/README.markdown @@ -1,6 +1,5 @@ [![Build and Package](https://github.com/harelba/q/workflows/BuildAndPackage/badge.svg?branch=master)](https://github.com/harelba/q/actions?query=branch%3Amaster) - # q - Text as Data q's purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data. @@ -40,7 +39,7 @@ $ q "select count(*) from some_db.sqlite3:::albums a left join another_db.sqlite Detailed examples are in [here](http://harelba.github.io/q/#examples) ## Installation. -**New Major Version `3.1.3` is out with a lot of significant additions.** +**New Major Version `3.1.4` is out with a lot of significant additions.** Instructions for all OSs are [here](http://harelba.github.io/q/#installation). @@ -57,3 +56,6 @@ Email [harelba@gmail.com](mailto:harelba@gmail.com) q on twitter: [#qtextasdata](https://twitter.com/hashtag/qtextasdata?src=hashtag_click) +Patreon: [harelba](https://www.patreon.com/harelba) - All the money received is donated to the [Center for the Prevention and Treatment of Domestic Violence](https://www.gov.il/he/departments/bureaus/molsa-almab-ramla) in my hometown - Ramla, Israel. + + diff --git a/bin/q.py b/bin/q.py index f36cb8f0..a3d740b9 100755 --- a/bin/q.py +++ b/bin/q.py @@ -35,7 +35,7 @@ from sqlite3.dbapi2 import OperationalError from uuid import uuid4 -q_version = '3.1.3' +q_version = '3.1.4' #__all__ = [ 'QTextAsData' ] diff --git a/mkdocs/docs/about.md b/mkdocs/docs/about.md index 9da12940..37e3f005 100644 --- a/mkdocs/docs/about.md +++ b/mkdocs/docs/about.md @@ -6,5 +6,8 @@ ### Email [harelba@gmail.com](mailto:harelba@gmail.com) +### Patreon [harelba](https://www.patreon.com/harelba) +All the money received is donated to the [Center for the Prevention and Treatment of Domestic Violence](https://www.gov.il/he/departments/bureaus/molsa-almab-ramla) in my hometown - Ramla, Israel. + ### Chinese translation [jinzhencheng@outlook.com](mailto:jinzhencheng@outlook.com) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 29ab2e7d..f543d037 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -69,12 +69,12 @@ Download the tool using the links in the [installation](#installation) below and | Format | Instructions | Comments | :---|:---|:---| -|[OSX](https://github.com/harelba/q/releases/download/v3.1.3/macos-q)|`brew install` will install the previous `2.0.19` for now, until homebrew approves the new version. In the mean time, you can download the new version executable from the link, `chmod +x` it and then run. You might need to run it the first time from Finder using Right-Click -> Open, and then click the Open button. After the first time, it will run from the command line without any issues. |A man page is available, just run `man q`|| -|[RPM Package](https://github.com/harelba/q/releases/download/v3.1.3/q-text-as-data-3.1.3.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| -|[DEB Package](https://github.com/harelba/q/releases/download/v3.1.3/q-text-as-data-3.1.3-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| -|[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.3/q-text-as-data-3.1.3.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| -|[Source tar.gz](https://github.com/harelba/q/archive/refs/tags/v3.1.3.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| -|[Source zip](https://github.com/harelba/q/archive/refs/tags/v3.1.3.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[OSX](https://github.com/harelba/q/releases/download/v3.1.4/macos-q)|`brew install` will install the previous `2.0.19` for now, until homebrew approves the new version. In the mean time, you can download the new version executable from the link, `chmod +x` it and then run. You might need to run it the first time from Finder using Right-Click -> Open, and then click the Open button. After the first time, it will run from the command line without any issues. |A man page is available, just run `man q`|| +|[RPM Package](https://github.com/harelba/q/releases/download/v3.1.4/q-text-as-data-3.1.4.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| +|[DEB Package](https://github.com/harelba/q/releases/download/v3.1.4/q-text-as-data-3.1.4-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| +|[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.4/q-text-as-data-3.1.4.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| +|[Source tar.gz](https://github.com/harelba/q/archive/refs/tags/v3.1.4.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[Source zip](https://github.com/harelba/q/archive/refs/tags/v3.1.4.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| I will add packages for additional Linux Distributions if there's demand for it. If you're interested in another Linux distribution, please ping me. It's relatively easy to add new ones with the new packaging flow. diff --git a/pyoxidizer.bzl b/pyoxidizer.bzl index 22ef5bfb..1d847812 100644 --- a/pyoxidizer.bzl +++ b/pyoxidizer.bzl @@ -61,7 +61,7 @@ def make_msi(exe): # The name of your application. "q-text-as-data", # The version of your application. - "3.1.3", + "3.1.4", # The author/manufacturer of your application. "Harel Ben-Attia" ) diff --git a/setup.py b/setup.py index 10b1e9e7..8056a551 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup import setuptools -q_version = '3.1.3' +q_version = '3.1.4' with open("README.markdown", "r", encoding="utf-8") as fh: long_description = fh.read() From e0b8d4189771f2abb2232fc7e256d15b48b0aaa6 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 26 Nov 2021 16:50:22 +0200 Subject: [PATCH 087/111] tag v3.1.4, but sha on mac homebrew will never work... --- .github/workflows/q.rb.brew-formula-template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/q.rb.brew-formula-template b/.github/workflows/q.rb.brew-formula-template index 52be2deb..15af8cac 100644 --- a/.github/workflows/q.rb.brew-formula-template +++ b/.github/workflows/q.rb.brew-formula-template @@ -6,7 +6,7 @@ class Q < Formula homepage "https://harelba.github.io/q/" url "https://github.com/harelba/q/archive/v3.1.4.tar.gz" - sha256 "RLRL-UNKNOWN-YET" + sha256 "0f4656b19087332d5113dd38907b50d70c55a57b3e97f810b8090132412dc9fb" license "GPL-3.0-or-later" revision 1 From 5298a76b60bafda461f124b12418f448c618588c Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 26 Nov 2021 17:01:15 +0200 Subject: [PATCH 088/111] remove mac packaging tests, as expected, and move v3.1.4 i have to make this automatic --- .github/workflows/build-and-package.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index bad9b885..ea985754 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -290,6 +290,7 @@ jobs: package-mac: # create-man is not needed, as it's generated inside the brew formula independently needs: [test-mac] + if: ${{false}} runs-on: macos-11 steps: - name: Checkout @@ -330,6 +331,7 @@ jobs: test-mac-packaging: needs: package-mac + if: ${{false}} runs-on: macos-11 steps: - name: Checkout From 19bfff3704842aa3cf29b2136c69ca8b0c1856ee Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 26 Nov 2021 17:04:28 +0200 Subject: [PATCH 089/111] bump to 3.1.5, after removing osx packaging tests + new tag v3.1.5 --- .github/workflows/build-and-package.yaml | 44 ++++++++++---------- .github/workflows/q.rb.brew-formula-template | 2 +- README.markdown | 2 +- bin/q.py | 2 +- mkdocs/docs/index.md | 12 +++--- pyoxidizer.bzl | 2 +- setup.py | 2 +- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index ea985754..2ba23ad5 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -136,12 +136,12 @@ jobs: gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.4-1.x86_64.deb --version 3.1.4 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.5-1.x86_64.deb --version 3.1.5 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload DEB Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.4-1.x86_64.deb - path: packages/linux/q-text-as-data-3.1.4-1.x86_64.deb + name: q-text-as-data-3.1.5-1.x86_64.deb + path: packages/linux/q-text-as-data-3.1.5-1.x86_64.deb test-deb-packaging: runs-on: ubuntu-18.04 @@ -152,7 +152,7 @@ jobs: - name: Download DEB uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.4-1.x86_64.deb + name: q-text-as-data-3.1.5-1.x86_64.deb - name: Install Python for Testing uses: actions/setup-python@v2 with: @@ -164,7 +164,7 @@ jobs: pip3 install -r test-requirements.txt - name: Test DEB Package Installation - run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.4-1.x86_64.deb + run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.5-1.x86_64.deb package-linux-rpm: needs: [test-linux, create-man] @@ -196,12 +196,12 @@ jobs: gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.4.x86_64.rpm --version 3.1.4 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.5.x86_64.rpm --version 3.1.5 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload RPM Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.4.x86_64.rpm - path: packages/linux/q-text-as-data-3.1.4.x86_64.rpm + name: q-text-as-data-3.1.5.x86_64.rpm + path: packages/linux/q-text-as-data-3.1.5.x86_64.rpm test-rpm-packaging: runs-on: ubuntu-18.04 @@ -212,9 +212,9 @@ jobs: - name: Download RPM uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.4.x86_64.rpm + name: q-text-as-data-3.1.5.x86_64.rpm - name: Retest using RPM - run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.4.x86_64.rpm + run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.5.x86_64.rpm build-mac: runs-on: macos-11 @@ -306,7 +306,7 @@ jobs: export BRANCH_NAME=master # TODO temp, since template rendering action doesn't work in mac - cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.4/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb + cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.5/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb echo "Resulting formula:" cat ./brew/q.rb @@ -320,8 +320,8 @@ jobs: - name: Upload Executable uses: actions/upload-artifact@v1.0.0 with: - name: q--3.1.4_1.big_sur.bottle.tar.gz - path: ./q--3.1.4_1.big_sur.bottle.tar.gz + name: q--3.1.5_1.big_sur.bottle.tar.gz + path: ./q--3.1.5_1.big_sur.bottle.tar.gz # TODO auto-create PR to main homebrew-core # git clone https://github.com/harelba/homebrew-core.git @@ -339,7 +339,7 @@ jobs: - name: Download q bottle uses: actions/download-artifact@v2 with: - name: q--3.1.4_1.big_sur.bottle.tar.gz + name: q--3.1.5_1.big_sur.bottle.tar.gz - name: Test the created bottle run: | set -x -e @@ -348,7 +348,7 @@ jobs: WD=$(pwd) pushd /usr/local/Cellar - tar xvfz ${WD}/q--3.1.4_1.big_sur.bottle.tar.gz + tar xvfz ${WD}/q--3.1.5_1.big_sur.bottle.tar.gz popd brew link q @@ -458,17 +458,17 @@ jobs: # TODO Windows versions do not support the -beta postfix - export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.4.msi + export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.5.msi chmod 755 $Q_MSI mkdir -p packages/windows/ - cp $Q_MSI packages/windows/q-text-as-data-3.1.4.msi + cp $Q_MSI packages/windows/q-text-as-data-3.1.5.msi - name: Upload Windows MSI uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.4.msi - path: packages/windows/q-text-as-data-3.1.4.msi + name: q-text-as-data-3.1.5.msi + path: packages/windows/q-text-as-data-3.1.5.msi test-windows-packaging: needs: package-windows @@ -479,12 +479,12 @@ jobs: - name: Download Windows Package uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.4.msi + name: q-text-as-data-3.1.5.msi - name: Test Install of MSI continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.4.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.5.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait $process.ExitCode gc msi-install.log @@ -493,7 +493,7 @@ jobs: continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.4.msi /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.5.msi /norestart /quiet" -PassThru -Wait $process.ExitCode exit $process.ExitCode diff --git a/.github/workflows/q.rb.brew-formula-template b/.github/workflows/q.rb.brew-formula-template index 15af8cac..db9c9637 100644 --- a/.github/workflows/q.rb.brew-formula-template +++ b/.github/workflows/q.rb.brew-formula-template @@ -4,7 +4,7 @@ class Q < Formula desc "Run SQL directly on CSV or TSV files" homepage "https://harelba.github.io/q/" - url "https://github.com/harelba/q/archive/v3.1.4.tar.gz" + url "https://github.com/harelba/q/archive/v3.1.5.tar.gz" sha256 "0f4656b19087332d5113dd38907b50d70c55a57b3e97f810b8090132412dc9fb" diff --git a/README.markdown b/README.markdown index 3d7828e6..c207f733 100644 --- a/README.markdown +++ b/README.markdown @@ -39,7 +39,7 @@ $ q "select count(*) from some_db.sqlite3:::albums a left join another_db.sqlite Detailed examples are in [here](http://harelba.github.io/q/#examples) ## Installation. -**New Major Version `3.1.4` is out with a lot of significant additions.** +**New Major Version `3.1.5` is out with a lot of significant additions.** Instructions for all OSs are [here](http://harelba.github.io/q/#installation). diff --git a/bin/q.py b/bin/q.py index a3d740b9..ddb6188f 100755 --- a/bin/q.py +++ b/bin/q.py @@ -35,7 +35,7 @@ from sqlite3.dbapi2 import OperationalError from uuid import uuid4 -q_version = '3.1.4' +q_version = '3.1.5' #__all__ = [ 'QTextAsData' ] diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index f543d037..426189c9 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -69,12 +69,12 @@ Download the tool using the links in the [installation](#installation) below and | Format | Instructions | Comments | :---|:---|:---| -|[OSX](https://github.com/harelba/q/releases/download/v3.1.4/macos-q)|`brew install` will install the previous `2.0.19` for now, until homebrew approves the new version. In the mean time, you can download the new version executable from the link, `chmod +x` it and then run. You might need to run it the first time from Finder using Right-Click -> Open, and then click the Open button. After the first time, it will run from the command line without any issues. |A man page is available, just run `man q`|| -|[RPM Package](https://github.com/harelba/q/releases/download/v3.1.4/q-text-as-data-3.1.4.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| -|[DEB Package](https://github.com/harelba/q/releases/download/v3.1.4/q-text-as-data-3.1.4-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| -|[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.4/q-text-as-data-3.1.4.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| -|[Source tar.gz](https://github.com/harelba/q/archive/refs/tags/v3.1.4.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| -|[Source zip](https://github.com/harelba/q/archive/refs/tags/v3.1.4.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[OSX](https://github.com/harelba/q/releases/download/v3.1.5/macos-q)|`brew install` will install the previous `2.0.19` for now, until homebrew approves the new version. In the mean time, you can download the new version executable from the link, `chmod +x` it and then run. You might need to run it the first time from Finder using Right-Click -> Open, and then click the Open button. After the first time, it will run from the command line without any issues. |A man page is available, just run `man q`|| +|[RPM Package](https://github.com/harelba/q/releases/download/v3.1.5/q-text-as-data-3.1.5.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| +|[DEB Package](https://github.com/harelba/q/releases/download/v3.1.5/q-text-as-data-3.1.5-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| +|[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.5/q-text-as-data-3.1.5.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| +|[Source tar.gz](https://github.com/harelba/q/archive/refs/tags/v3.1.5.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[Source zip](https://github.com/harelba/q/archive/refs/tags/v3.1.5.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| I will add packages for additional Linux Distributions if there's demand for it. If you're interested in another Linux distribution, please ping me. It's relatively easy to add new ones with the new packaging flow. diff --git a/pyoxidizer.bzl b/pyoxidizer.bzl index 1d847812..840f174c 100644 --- a/pyoxidizer.bzl +++ b/pyoxidizer.bzl @@ -61,7 +61,7 @@ def make_msi(exe): # The name of your application. "q-text-as-data", # The version of your application. - "3.1.4", + "3.1.5", # The author/manufacturer of your application. "Harel Ben-Attia" ) diff --git a/setup.py b/setup.py index 8056a551..86405b17 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup import setuptools -q_version = '3.1.4' +q_version = '3.1.5' with open("README.markdown", "r", encoding="utf-8") as fh: long_description = fh.read() From a6617e45bea57ce437ae5f1a64ae1d2e9fb585ae Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Fri, 26 Nov 2021 17:19:40 +0200 Subject: [PATCH 090/111] replace to continue-on-error, so release will be created... + bump to 3.1.6 --- .github/workflows/build-and-package.yaml | 48 ++++++++++---------- .github/workflows/q.rb.brew-formula-template | 2 +- README.markdown | 2 +- bin/q.py | 2 +- mkdocs/docs/index.md | 12 ++--- pyoxidizer.bzl | 2 +- setup.py | 2 +- 7 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 2ba23ad5..6a64a520 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -136,12 +136,12 @@ jobs: gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.5-1.x86_64.deb --version 3.1.5 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.6-1.x86_64.deb --version 3.1.6 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload DEB Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.5-1.x86_64.deb - path: packages/linux/q-text-as-data-3.1.5-1.x86_64.deb + name: q-text-as-data-3.1.6-1.x86_64.deb + path: packages/linux/q-text-as-data-3.1.6-1.x86_64.deb test-deb-packaging: runs-on: ubuntu-18.04 @@ -152,7 +152,7 @@ jobs: - name: Download DEB uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.5-1.x86_64.deb + name: q-text-as-data-3.1.6-1.x86_64.deb - name: Install Python for Testing uses: actions/setup-python@v2 with: @@ -164,7 +164,7 @@ jobs: pip3 install -r test-requirements.txt - name: Test DEB Package Installation - run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.5-1.x86_64.deb + run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.6-1.x86_64.deb package-linux-rpm: needs: [test-linux, create-man] @@ -196,12 +196,12 @@ jobs: gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.5.x86_64.rpm --version 3.1.5 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.6.x86_64.rpm --version 3.1.6 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload RPM Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.5.x86_64.rpm - path: packages/linux/q-text-as-data-3.1.5.x86_64.rpm + name: q-text-as-data-3.1.6.x86_64.rpm + path: packages/linux/q-text-as-data-3.1.6.x86_64.rpm test-rpm-packaging: runs-on: ubuntu-18.04 @@ -212,9 +212,9 @@ jobs: - name: Download RPM uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.5.x86_64.rpm + name: q-text-as-data-3.1.6.x86_64.rpm - name: Retest using RPM - run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.5.x86_64.rpm + run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.6.x86_64.rpm build-mac: runs-on: macos-11 @@ -290,7 +290,7 @@ jobs: package-mac: # create-man is not needed, as it's generated inside the brew formula independently needs: [test-mac] - if: ${{false}} + continue-on-error: true runs-on: macos-11 steps: - name: Checkout @@ -306,7 +306,7 @@ jobs: export BRANCH_NAME=master # TODO temp, since template rendering action doesn't work in mac - cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.5/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb + cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.6/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb echo "Resulting formula:" cat ./brew/q.rb @@ -320,8 +320,8 @@ jobs: - name: Upload Executable uses: actions/upload-artifact@v1.0.0 with: - name: q--3.1.5_1.big_sur.bottle.tar.gz - path: ./q--3.1.5_1.big_sur.bottle.tar.gz + name: q--3.1.6_1.big_sur.bottle.tar.gz + path: ./q--3.1.6_1.big_sur.bottle.tar.gz # TODO auto-create PR to main homebrew-core # git clone https://github.com/harelba/homebrew-core.git @@ -331,7 +331,7 @@ jobs: test-mac-packaging: needs: package-mac - if: ${{false}} + continue-on-error: true runs-on: macos-11 steps: - name: Checkout @@ -339,7 +339,7 @@ jobs: - name: Download q bottle uses: actions/download-artifact@v2 with: - name: q--3.1.5_1.big_sur.bottle.tar.gz + name: q--3.1.6_1.big_sur.bottle.tar.gz - name: Test the created bottle run: | set -x -e @@ -348,7 +348,7 @@ jobs: WD=$(pwd) pushd /usr/local/Cellar - tar xvfz ${WD}/q--3.1.5_1.big_sur.bottle.tar.gz + tar xvfz ${WD}/q--3.1.6_1.big_sur.bottle.tar.gz popd brew link q @@ -458,17 +458,17 @@ jobs: # TODO Windows versions do not support the -beta postfix - export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.5.msi + export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.6.msi chmod 755 $Q_MSI mkdir -p packages/windows/ - cp $Q_MSI packages/windows/q-text-as-data-3.1.5.msi + cp $Q_MSI packages/windows/q-text-as-data-3.1.6.msi - name: Upload Windows MSI uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.5.msi - path: packages/windows/q-text-as-data-3.1.5.msi + name: q-text-as-data-3.1.6.msi + path: packages/windows/q-text-as-data-3.1.6.msi test-windows-packaging: needs: package-windows @@ -479,12 +479,12 @@ jobs: - name: Download Windows Package uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.5.msi + name: q-text-as-data-3.1.6.msi - name: Test Install of MSI continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.5.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.6.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait $process.ExitCode gc msi-install.log @@ -493,7 +493,7 @@ jobs: continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.5.msi /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.6.msi /norestart /quiet" -PassThru -Wait $process.ExitCode exit $process.ExitCode diff --git a/.github/workflows/q.rb.brew-formula-template b/.github/workflows/q.rb.brew-formula-template index db9c9637..ca53278c 100644 --- a/.github/workflows/q.rb.brew-formula-template +++ b/.github/workflows/q.rb.brew-formula-template @@ -4,7 +4,7 @@ class Q < Formula desc "Run SQL directly on CSV or TSV files" homepage "https://harelba.github.io/q/" - url "https://github.com/harelba/q/archive/v3.1.5.tar.gz" + url "https://github.com/harelba/q/archive/v3.1.6.tar.gz" sha256 "0f4656b19087332d5113dd38907b50d70c55a57b3e97f810b8090132412dc9fb" diff --git a/README.markdown b/README.markdown index c207f733..55cbe73e 100644 --- a/README.markdown +++ b/README.markdown @@ -39,7 +39,7 @@ $ q "select count(*) from some_db.sqlite3:::albums a left join another_db.sqlite Detailed examples are in [here](http://harelba.github.io/q/#examples) ## Installation. -**New Major Version `3.1.5` is out with a lot of significant additions.** +**New Major Version `3.1.6` is out with a lot of significant additions.** Instructions for all OSs are [here](http://harelba.github.io/q/#installation). diff --git a/bin/q.py b/bin/q.py index ddb6188f..c6d282b8 100755 --- a/bin/q.py +++ b/bin/q.py @@ -35,7 +35,7 @@ from sqlite3.dbapi2 import OperationalError from uuid import uuid4 -q_version = '3.1.5' +q_version = '3.1.6' #__all__ = [ 'QTextAsData' ] diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 426189c9..ddec35e5 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -69,12 +69,12 @@ Download the tool using the links in the [installation](#installation) below and | Format | Instructions | Comments | :---|:---|:---| -|[OSX](https://github.com/harelba/q/releases/download/v3.1.5/macos-q)|`brew install` will install the previous `2.0.19` for now, until homebrew approves the new version. In the mean time, you can download the new version executable from the link, `chmod +x` it and then run. You might need to run it the first time from Finder using Right-Click -> Open, and then click the Open button. After the first time, it will run from the command line without any issues. |A man page is available, just run `man q`|| -|[RPM Package](https://github.com/harelba/q/releases/download/v3.1.5/q-text-as-data-3.1.5.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| -|[DEB Package](https://github.com/harelba/q/releases/download/v3.1.5/q-text-as-data-3.1.5-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| -|[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.5/q-text-as-data-3.1.5.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| -|[Source tar.gz](https://github.com/harelba/q/archive/refs/tags/v3.1.5.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| -|[Source zip](https://github.com/harelba/q/archive/refs/tags/v3.1.5.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[OSX](https://github.com/harelba/q/releases/download/v3.1.6/macos-q)|`brew install` will install the previous `2.0.19` for now, until homebrew approves the new version. In the mean time, you can download the new version executable from the link, `chmod +x` it and then run. You might need to run it the first time from Finder using Right-Click -> Open, and then click the Open button. After the first time, it will run from the command line without any issues. |A man page is available, just run `man q`|| +|[RPM Package](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| +|[DEB Package](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| +|[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| +|[Source tar.gz](https://github.com/harelba/q/archive/refs/tags/v3.1.6.tar.gz)|Full source file tree for latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| +|[Source zip](https://github.com/harelba/q/archive/refs/tags/v3.1.6.zip)|Full source file tree for the latest stable version. Note that q.py cannot be used directly anymore, as it requires python dependencies|| I will add packages for additional Linux Distributions if there's demand for it. If you're interested in another Linux distribution, please ping me. It's relatively easy to add new ones with the new packaging flow. diff --git a/pyoxidizer.bzl b/pyoxidizer.bzl index 840f174c..da79ba24 100644 --- a/pyoxidizer.bzl +++ b/pyoxidizer.bzl @@ -61,7 +61,7 @@ def make_msi(exe): # The name of your application. "q-text-as-data", # The version of your application. - "3.1.5", + "3.1.6", # The author/manufacturer of your application. "Harel Ben-Attia" ) diff --git a/setup.py b/setup.py index 86405b17..6d0fac7e 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup import setuptools -q_version = '3.1.5' +q_version = '3.1.6' with open("README.markdown", "r", encoding="utf-8") as fh: long_description = fh.read() From 50bb452205378da14acc94f656e7a7f834bbc78f Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 27 Nov 2021 02:33:42 +0200 Subject: [PATCH 091/111] docs --- mkdocs/docs/about.md | 2 ++ mkdocs/docs/index.md | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/mkdocs/docs/about.md b/mkdocs/docs/about.md index 37e3f005..60662ef5 100644 --- a/mkdocs/docs/about.md +++ b/mkdocs/docs/about.md @@ -9,5 +9,7 @@ ### Patreon [harelba](https://www.patreon.com/harelba) All the money received is donated to the [Center for the Prevention and Treatment of Domestic Violence](https://www.gov.il/he/departments/bureaus/molsa-almab-ramla) in my hometown - Ramla, Israel. +Become a Patron! + ### Chinese translation [jinzhencheng@outlook.com](mailto:jinzhencheng@outlook.com) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index ddec35e5..ad1d3086 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -3,7 +3,6 @@ [![GitHub Stars](https://img.shields.io/github/stars/harelba/q.svg?style=social&label=GitHub Stars&maxAge=600)](https://GitHub.com/harelba/q/stargazers/) [![GitHub forks](https://img.shields.io/github/forks/harelba/q.svg?style=social&label=GitHub Forks&maxAge=600)](https://GitHub.com/harelba/q/network/) - ## Overview q's purpose is to bring SQL expressive power to the Linux command line by providing easy access to text as actual data. From 978209d4578f26d26018e26d671912e514e80b53 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 27 Nov 2021 10:44:04 +0200 Subject: [PATCH 092/111] site docs --- mkdocs/docs/index.md | 69 ++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index ad1d3086..2b4632fd 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -4,13 +4,46 @@ [![GitHub forks](https://img.shields.io/github/forks/harelba/q.svg?style=social&label=GitHub Forks&maxAge=600)](https://GitHub.com/harelba/q/network/) ## Overview -q's purpose is to bring SQL expressive power to the Linux command line by providing easy access to text as actual data. +q's purpose is to bring SQL expressive power to the Linux command line by providing easy access to text as actual data, and allowing direct access to multi-file sqlite3 databases. + +```bash + q +``` q allows the following: * Performing SQL-like statements directly on tabular text data, auto-caching the data in order to accelerate additional querying on the same file + +```bash + # Simple query from a file, columns are named c1...cN + q "select c1,c5 from myfile.csv" + + # -d '|' sets the input delimiter, -H says there's a header + q -d , -H "select my_field from myfile.delimited-file-with-pipes" + + # -C readwrite writes a cache for the csv file + q -d , -H "select my_field from myfile.csv" -C readwrite + + # -C read tells q to use the cache + q -d , -H "select my_field from myfile.csv" -C read + + # Setting the default caching mode (`-C`) can be done by writing a `~/.qrc` file +``` + * Performing SQL statements directly on multi-file sqlite3 databases, without having to merge them or load them into memory +```bash + q "select * from mydatabase.sqlite:::my_table_name" + + or + + q "select * from mydatabase.sqlite" + + if the database file contains only one table + + # sqlite files are autodetected, no need for any special filename extension +``` + The following table shows the impact of using caching: | Rows | Columns | File Size | Query time without caching | Query time with caching | Speed Improvement | @@ -27,34 +60,9 @@ q treats ordinary files as database tables, and supports all SQL constructs, suc The new features - autocaching, direct querying of sqlite database and the use of `~/.qrc` file are described in detail in [here](https://github.com/harelba/q/blob/master/QSQL-NOTES.md). -## Basic Usage - -```bash - q - -Example Execution for a delimited file: - - q "select * from myfile.csv" - -Example Execution for an sqlite3 database: - - q "select * from mydatabase.sqlite:::my_table_name" - - or - - q "select * from mydatabase.sqlite" - - if the database file contains only one table - -Auto-caching of delimited files can be activated through `-C readwrite` -(writes new caches if needed) or `-C read` (only reads existing cache files) - -Setting the default caching mode (`-C`) can be done by -writing a `~/.qrc` file. See docs for more info. -``` - Download the tool using the links in the [installation](#installation) below and play with it. +### Encodings | | | |:--------------------------------------:|:-----------------------------------------------:| | 完全支持所有的字符编码 | すべての文字エンコーディングを完全にサポート | @@ -266,7 +274,7 @@ Table: /Users/harelben-attia/dev/harelba/q/some-data.csv.qsql `length_of_day_hours` - real ``` -### Usage +## Usage Query should be an SQL-like query which contains filenames instead of table names (or - for stdin). The query itself should be provided as one parameter to the tool (i.e. enclosed in quotes). All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table). Take a look at the [limitations](#limitations) section below for some rarely-used use cases which are not fully supported. @@ -439,6 +447,11 @@ It's possible to generate a default `.qrc` file by running `q --dump-defaults` a One valuable use-case for this could be setting the caching-mode to `read`. This will make q automatically use generated `.qsql` cache files if they exist. Whenever you want a cache file to be generated, just use `-C readwrite` and a `.qsql` file will be generated if it doesn't exist. +Here's the content of the `~/.qrc` file for enabling cache reads by default: +```bash +[options] +caching_mode=read +``` ## Getting Started Examples This section shows some more basic examples of simple SQL constructs. From e1f2a15bd2b6f5c6b54f061d3895f988bf14c28d Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 27 Nov 2021 10:49:37 +0200 Subject: [PATCH 093/111] fix GA site logic to match new download structure --- mkdocs/docs/js/google-analytics.js | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/mkdocs/docs/js/google-analytics.js b/mkdocs/docs/js/google-analytics.js index 4c0829d7..186332e4 100644 --- a/mkdocs/docs/js/google-analytics.js +++ b/mkdocs/docs/js/google-analytics.js @@ -1,6 +1,7 @@ // Monitor all download links in GA var dlCnt = 0; +var tocCnt = 0; function GAizeDownloadLink(a) { var url = a.href; @@ -8,15 +9,10 @@ function GAizeDownloadLink(a) { if (x != -1) { url = url.substr(0, x); } - var url_test = url.match(/^https?:\/\/.+(\/rpms\/.*\.rpm|\/deb\/.*\.deb|\/single-binary\/Darwin\/.*\/q|\/archive\/.*\.tar\.gz|\/archive\/.*\.zip|\/windows\/.*\.exe)$/i); + var url_test = url.match(/^http.*(archive\/|releases\/)(?.*)/); if (url_test) { - console.log("Converting download link to be GA aware: " + url); - if (url_test.length > 1) { - var event_action = url_test[1]; - } else { - var event_action = 'unknown_action'; - } - a.event_action = event_action; + a.event_action = url_test.groups.path; + console.log("Converting download link to be GA aware: " + url + " . download path is " + a.event_action); dlCnt = dlCnt + 1; a.onclick = function() { console.log("Sending GA event for link" + url); @@ -31,6 +27,7 @@ function GAizeDownloadLink(a) { } function GAizeTOCLink(l) { + tocCnt = tocCnt + 1; l.onclick = function() { url_test = l.href.match(/^https?:\/\/.+(#.*)$/i); toc_name = url_test[1]; @@ -55,5 +52,5 @@ window.onload = function() { for (i = 0; i < toc_links.length; i++) { GAizeTOCLink(toc_links[i]); } - console.log("Converted " + dlCnt + " links to be GA aware"); + console.log("Converted " + dlCnt + " download links and " + tocCnt + " TOC links to be GA aware"); } From 69eb1828f95bd0588b0cc6d510db4dccd6c8a26c Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 27 Nov 2021 13:40:28 +0200 Subject: [PATCH 094/111] allow pyox to get injected python version, for brew ARM compat + site docs --- .github/workflows/q.rb.brew-formula-template | 3 +-- pyoxidizer.bzl | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/q.rb.brew-formula-template b/.github/workflows/q.rb.brew-formula-template index ca53278c..dc10c78b 100644 --- a/.github/workflows/q.rb.brew-formula-template +++ b/.github/workflows/q.rb.brew-formula-template @@ -9,7 +9,6 @@ class Q < Formula sha256 "0f4656b19087332d5113dd38907b50d70c55a57b3e97f810b8090132412dc9fb" license "GPL-3.0-or-later" - revision 1 depends_on "pyoxidizer" => :build depends_on "python@3.8" => :build @@ -17,7 +16,7 @@ class Q < Formula depends_on xcode: ["12.4", :build] def install - system "pyoxidizer", "build", "--release" + system "pyoxidizer", "build", "--release", "--var", "PYTHON_VERSION", "3.9" bin.install "./build/x86_64-apple-darwin/release/install/q" system "ronn", "--roff", "--section=1", "doc/USAGE.markdown" diff --git a/pyoxidizer.bzl b/pyoxidizer.bzl index da79ba24..8a27c4b9 100644 --- a/pyoxidizer.bzl +++ b/pyoxidizer.bzl @@ -3,11 +3,13 @@ # https://pyoxidizer.readthedocs.io/en/stable/ for details of this # configuration file format. +PYTHON_VERSION = VARS.get("PYTHON_VERSION","3.8") + # Configuration files consist of functions which define build "targets." # This function creates a Python executable and installs it in a destination # directory. def make_exe(): - dist = default_python_distribution(python_version="3.8") + dist = default_python_distribution(python_version=PYTHON_VERSION) policy = dist.make_python_packaging_policy() policy.set_resource_handling_mode("classify") From 5458f9198d77b0a4e592ecb7eff62f43d5ac5070 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Tue, 30 Nov 2021 20:51:54 +0200 Subject: [PATCH 095/111] osx installation instructions to work with the new tap --- mkdocs/docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 2b4632fd..21c5a486 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -76,7 +76,7 @@ Download the tool using the links in the [installation](#installation) below and | Format | Instructions | Comments | :---|:---|:---| -|[OSX](https://github.com/harelba/q/releases/download/v3.1.6/macos-q)|`brew install` will install the previous `2.0.19` for now, until homebrew approves the new version. In the mean time, you can download the new version executable from the link, `chmod +x` it and then run. You might need to run it the first time from Finder using Right-Click -> Open, and then click the Open button. After the first time, it will run from the command line without any issues. |A man page is available, just run `man q`|| +|OSX|Run `brew install harelba/q/q` in order to install q (moved it to its own tap)|A man page is available, just run `man q`|| |[RPM Package](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| |[DEB Package](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| |[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| From c28d520102bb98aaa8dd9283e748eb56554d6039 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Tue, 30 Nov 2021 22:11:54 +0200 Subject: [PATCH 096/111] docs --- mkdocs/docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index 21c5a486..d766db2d 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -76,7 +76,7 @@ Download the tool using the links in the [installation](#installation) below and | Format | Instructions | Comments | :---|:---|:---| -|OSX|Run `brew install harelba/q/q` in order to install q (moved it to its own tap)|A man page is available, just run `man q`|| +|[OSX]((https://github.com/harelba/q/releases/download/v3.1.6/macos-q))|Run `brew install harelba/q/q` in order to install q (moved it to its own tap), or download the standalone executable directly from the link on the left|A man page is available, just run `man q`|| |[RPM Package](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| |[DEB Package](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| |[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| From bef6e37b46a7961bddbf9e7d956494779f3e900f Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Tue, 30 Nov 2021 22:13:09 +0200 Subject: [PATCH 097/111] docs --- mkdocs/docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index d766db2d..a3c55d97 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -76,7 +76,7 @@ Download the tool using the links in the [installation](#installation) below and | Format | Instructions | Comments | :---|:---|:---| -|[OSX]((https://github.com/harelba/q/releases/download/v3.1.6/macos-q))|Run `brew install harelba/q/q` in order to install q (moved it to its own tap), or download the standalone executable directly from the link on the left|A man page is available, just run `man q`|| +|[OSX](https://github.com/harelba/q/releases/download/v3.1.6/macos-q)|Run `brew install harelba/q/q` in order to install q (moved it to its own tap), or download the standalone executable directly from the link on the left|A man page is available, just run `man q`|| |[RPM Package](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6.x86_64.rpm)| run `rpm -ivh ` or `rpm -U ` if you already have an older version of q.| A man page is available for this release. Just enter `man q`.| |[DEB Package](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6-1.x86_64.deb)| Run `sudo dpkg -i `|A man page is available for this release. Just enter `man q`. Some installations don't install the man page properly for some reason. I'll fix this soon| |[Windows Installer](https://github.com/harelba/q/releases/download/v3.1.6/q-text-as-data-3.1.6.msi)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new `cmd`/`bash` window after the installation is done.| From 8addcf51ab5efc95875ffa4c37fa947c7e85fd3f Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Thu, 13 Jan 2022 22:27:59 +0200 Subject: [PATCH 098/111] remove mac-packaging from workflow (q moved to its own tap harelba/homebrew-q) --- .github/workflows/build-and-package.yaml | 73 ++++-------------------- 1 file changed, 10 insertions(+), 63 deletions(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 6a64a520..18732271 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -4,7 +4,6 @@ on: push: tags: - "v*" - # Remove comment in order to pre-release on a PR, to validate packaging flow pull_request: branches: master paths-ignore: @@ -281,81 +280,30 @@ jobs: run: | set -e -x - find ./ -ls - chmod 755 ./macos-q Q_EXECUTABLE=`pwd`/macos-q Q_SKIP_EXECUTABLE_VALIDATION=true ./run-tests.sh -v - package-mac: + not-package-mac: # create-man is not needed, as it's generated inside the brew formula independently needs: [test-mac] - continue-on-error: true runs-on: macos-11 steps: - name: Checkout uses: actions/checkout@v2 - - name: Create homebrew formula and install it + - name: Not Packaging Mac run: | - set -x -e - set -o pipefail - - mkdir brew - - #export BRANCH_NAME=${{ github.event.pull_request.head.ref }} - export BRANCH_NAME=master - - # TODO temp, since template rendering action doesn't work in mac - cat .github/workflows/q.rb.brew-formula-template | sed 's/{{ .Q_VERSION }}/3.1.6/g' | sed "s/{{ .Q_BRANCH_NAME }}/${BRANCH_NAME}/g" > ./brew/q.rb + echo "homebrew mac cannot be packaged from the source code itself, due to the package build process of homebrew. See https://github.com/harelba/homebrew-q" - echo "Resulting formula:" - cat ./brew/q.rb - - brew install --display-times --formula --build-bottle --verbose ./brew/q.rb - brew test ./brew/q.rb - - - name: Create q bottle - run: | - brew bottle --force-core-tap --no-rebuild ./brew/q.rb - - name: Upload Executable - uses: actions/upload-artifact@v1.0.0 - with: - name: q--3.1.6_1.big_sur.bottle.tar.gz - path: ./q--3.1.6_1.big_sur.bottle.tar.gz - -# TODO auto-create PR to main homebrew-core -# git clone https://github.com/harelba/homebrew-core.git -# cd homebrew-core -# -# git checkout -b new-q-version - - test-mac-packaging: - needs: package-mac - continue-on-error: true + not-test-mac-packaging: + needs: not-package-mac runs-on: macos-11 steps: - name: Checkout uses: actions/checkout@v2 - - name: Download q bottle - uses: actions/download-artifact@v2 - with: - name: q--3.1.6_1.big_sur.bottle.tar.gz - - name: Test the created bottle + - name: Not Testing Mac Packaging run: | - set -x -e - set -o pipefail - - WD=$(pwd) - - pushd /usr/local/Cellar - tar xvfz ${WD}/q--3.1.6_1.big_sur.bottle.tar.gz - popd - - brew link q - - seq 1 100 | q -c 1 "select sum(c1),count(*) from -" -S test.sqlite - - echo "select sum(c1),count(*) from data_stream_stdin" | sqlite3 test.sqlite + echo "homebrew mac packaging cannot be tested here, due to the package build process of homebrew. See https://github.com/harelba/homebrew-q" build-windows: runs-on: windows-latest @@ -500,10 +448,9 @@ jobs: perform-prerelease: # We'd like artifacts to be uploaded regardless of tests succeeded or not, # this is why the dependency here is not on test-X-packaging jobs - needs: [package-linux-deb, package-linux-rpm, package-mac, package-windows] + needs: [package-linux-deb, package-linux-rpm, not-package-mac, package-windows] runs-on: ubuntu-latest - # TODO Push to master will now pre-release as well, until things stabilize - # if: ${{ github.event_name == 'pull_request' }} + if: ${{ github.event_name == 'pull_request' }} steps: - name: Download All Artifacts uses: actions/download-artifact@v2 @@ -525,7 +472,7 @@ jobs: artifacts/**/* perform-release: - needs: [test-mac-packaging, test-deb-packaging, test-rpm-packaging, test-windows-packaging] + needs: [not-test-mac-packaging, test-deb-packaging, test-rpm-packaging, test-windows-packaging] runs-on: ubuntu-latest if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} steps: From a8b671ffcea5b52195485fd78e8e547d2b2d652d Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 16 Jan 2022 08:25:18 +0200 Subject: [PATCH 099/111] update caching information in benchmark results page --- test/BENCHMARK.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/BENCHMARK.md b/test/BENCHMARK.md index 3a4d5732..2162e38c 100644 --- a/test/BENCHMARK.md +++ b/test/BENCHMARK.md @@ -2,6 +2,31 @@ NOTE: *Please don't use or publish this benchmark data yet. See below for details* +# Update +q now provides inherent automatic caching capabilities, writing the CSV/TSV file to a `.qsql` file that sits beside the original file. After the cache exists (created as part of an initial query on a file), q knows to use it behind the scenes without changing the query itself, speeding up performance significantly. + +The following table shows the impact of using caching in q: + +| Rows | Columns | File Size | Query time without caching | Query time with caching | Speed Improvement | +|:---------:|:-------:|:---------:|:--------------------------:|:-----------------------:|:-----------------:| +| 5,000,000 | 100 | 4.8GB | 4 minutes, 47 seconds | 1.92 seconds | x149 | +| 1,000,000 | 100 | 983MB | 50.9 seconds | 0.461 seconds | x110 | +| 1,000,000 | 50 | 477MB | 27.1 seconds | 0.272 seconds | x99 | +| 100,000 | 100 | 99MB | 5.2 seconds | 0.141 seconds | x36 | +| 100,000 | 50 | 48MB | 2.7 seconds | 0.105 seconds | x25 | + +Effectively, `.qsql` files are just standard sqlite3 files, with an additional metadata table that is used for detecting changes in the original delimited file. This means that any tool that can read sqlite3 files can use these files directly. + +As a side-effect from this addition, q knows how to directly query multi-file sqlite3 databases, which means that the user can query any sqlite3 database, or the `.qsql` file, even when the original file doesn't exist anymore. For example: + +```bash +q "select a.*,b.* from my_file.csv.qsql a left join some-sqlite3-database:::some_table_name b on (a.id = b.id)" +``` + +The benchmark results below reflect the peformance without the caching, e.g. directly reading the delimited files, parsing them and performing the query. + +I'll update benchmark results later on to provide cached results as well. + # Overview This just a preliminary benchmark, originally created for validating performance optimizations and suggestions from users, and analyzing q's move to python3. After writing it, I thought it might be interesting to test its speed against textql and octosql as well. From ce8733e74b35eb560b9f6ec8ce0c5fab94560f39 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 16 Jan 2022 08:30:51 +0200 Subject: [PATCH 100/111] update benchmark doc --- test/BENCHMARK.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/BENCHMARK.md b/test/BENCHMARK.md index 2162e38c..c8ec866d 100644 --- a/test/BENCHMARK.md +++ b/test/BENCHMARK.md @@ -15,14 +15,17 @@ The following table shows the impact of using caching in q: | 100,000 | 100 | 99MB | 5.2 seconds | 0.141 seconds | x36 | | 100,000 | 50 | 48MB | 2.7 seconds | 0.105 seconds | x25 | -Effectively, `.qsql` files are just standard sqlite3 files, with an additional metadata table that is used for detecting changes in the original delimited file. This means that any tool that can read sqlite3 files can use these files directly. -As a side-effect from this addition, q knows how to directly query multi-file sqlite3 databases, which means that the user can query any sqlite3 database, or the `.qsql` file, even when the original file doesn't exist anymore. For example: +Effectively, `.qsql` files are just standard sqlite3 files, with an additional metadata table that is used for detecting changes in the original delimited file. This means that any tool that can read sqlite3 files can use these files directly. The tradeoff is of course the additional disk usage that the cache files take. + +A good side-effect to this addition, is that q now knows how to directly query multi-file sqlite3 databases. This means that the user can query any sqlite3 database file, or the `.qsql` file itself, even when the original file doesn't exist anymore. For example: ```bash q "select a.*,b.* from my_file.csv.qsql a left join some-sqlite3-database:::some_table_name b on (a.id = b.id)" ``` +NOTE: In the current version, caching is not enabled by default - Use `-C readwrite` to enable reading+writing cache files, or `-C read` to just read any existing cache files. A `~/.qrc` file can be added in order to make these options the default if you want. + The benchmark results below reflect the peformance without the caching, e.g. directly reading the delimited files, parsing them and performing the query. I'll update benchmark results later on to provide cached results as well. From 01622020909676ca2aa68ed7d9ea24f798031e9b Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 22 Jan 2022 16:09:23 +0200 Subject: [PATCH 101/111] Generalize Versioning (#288) Still need to test on master whether release by tag actually works --- .github/workflows/build-and-package.yaml | 96 ++++++++++++++------ .github/workflows/q.rb.brew-formula-template | 31 ------- pyoxidizer.bzl | 3 +- 3 files changed, 68 insertions(+), 62 deletions(-) delete mode 100644 .github/workflows/q.rb.brew-formula-template diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 18732271..f4de7666 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -10,8 +10,44 @@ on: - "*.md" - "*.markdown" - "mkdocs/**/*" + tags-ignore: + - "*" jobs: + version_info: + runs-on: ubuntu-18.04 + steps: + - name: Checkout + uses: actions/checkout@v2 + - id: vars + run: | + set -e -x + + echo "github event ref is ${{ github.ref }}" + + if [ "x${{ startsWith(github.ref, 'refs/tags/v') }}" == "xtrue" ] + then + echo "Trigger was a version tag - ${{ github.ref }}" + echo ::set-output name=q_version::${GITHUB_REF#refs/tags/v} + else + # For testing version propagation inside the PR + echo "Either branch of a non-version tag - setting version to 0.0.0" + echo ::set-output name=q_version::0.0.0 + fi + + outputs: + q_version: ${{ steps.vars.outputs.q_version }} + + check_version_info: + runs-on: ubuntu-18.04 + needs: version_info + steps: + - name: test q_version + run: | + set -e -x + + echo q_version is ${{ needs.version_info.outputs.q_version }} + create-man: runs-on: ubuntu-18.04 steps: @@ -106,7 +142,7 @@ jobs: Q_EXECUTABLE=`pwd`/linux-q Q_SKIP_EXECUTABLE_VALIDATION=true ./run-tests.sh -v package-linux-deb: - needs: [test-linux, create-man] + needs: [test-linux, create-man, version_info] runs-on: ubuntu-18.04 steps: - name: Checkout @@ -133,25 +169,27 @@ jobs: chmod 755 ./linux-q + export q_version=${{ needs.version_info.outputs.q_version }} + gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-3.1.6-1.x86_64.deb --version 3.1.6 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t deb --deb-use-file-permissions -p packages/linux/q-text-as-data-${q_version}-1.x86_64.deb --version ${q_version} ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload DEB Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.6-1.x86_64.deb - path: packages/linux/q-text-as-data-3.1.6-1.x86_64.deb + name: q-text-as-data-${{ needs.version_info.outputs.q_version }}-1.x86_64.deb + path: packages/linux/q-text-as-data-${{ needs.version_info.outputs.q_version }}-1.x86_64.deb test-deb-packaging: runs-on: ubuntu-18.04 - needs: package-linux-deb + needs: [package-linux-deb, version_info] steps: - name: Checkout uses: actions/checkout@v2 - name: Download DEB uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.6-1.x86_64.deb + name: q-text-as-data-${{ needs.version_info.outputs.q_version }}-1.x86_64.deb - name: Install Python for Testing uses: actions/setup-python@v2 with: @@ -163,10 +201,10 @@ jobs: pip3 install -r test-requirements.txt - name: Test DEB Package Installation - run: ./dist/test-using-deb.sh ./q-text-as-data-3.1.6-1.x86_64.deb + run: ./dist/test-using-deb.sh ./q-text-as-data-${{ needs.version_info.outputs.q_version }}-1.x86_64.deb package-linux-rpm: - needs: [test-linux, create-man] + needs: [test-linux, create-man, version_info] runs-on: ubuntu-18.04 steps: - name: Checkout @@ -189,31 +227,32 @@ jobs: mkdir -p packages/linux - find ./ -ls chmod 755 ./linux-q + export q_version=${{ needs.version_info.outputs.q_version }} + gem install fpm cp dist/fpm-config ~/.fpm - fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-3.1.6.x86_64.rpm --version 3.1.6 ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz + fpm -s dir -t rpm --rpm-use-file-permissions -p packages/linux/q-text-as-data-${q_version}.x86_64.rpm --version ${q_version} ./linux-q=/usr/bin/q USAGE.gz=/usr/share/man/man1/q.1.gz - name: Upload RPM Package uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.6.x86_64.rpm - path: packages/linux/q-text-as-data-3.1.6.x86_64.rpm + name: q-text-as-data-${{ needs.version_info.outputs.q_version }}.x86_64.rpm + path: packages/linux/q-text-as-data-${{ needs.version_info.outputs.q_version }}.x86_64.rpm test-rpm-packaging: runs-on: ubuntu-18.04 - needs: package-linux-rpm + needs: [package-linux-rpm, version_info] steps: - name: Checkout uses: actions/checkout@v2 - name: Download RPM uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.6.x86_64.rpm + name: q-text-as-data-${{ needs.version_info.outputs.q_version }}.x86_64.rpm - name: Retest using RPM - run: ./dist/test-using-rpm.sh ./q-text-as-data-3.1.6.x86_64.rpm + run: ./dist/test-using-rpm.sh ./q-text-as-data-${{ needs.version_info.outputs.q_version }}.x86_64.rpm build-mac: runs-on: macos-11 @@ -307,6 +346,7 @@ jobs: build-windows: runs-on: windows-latest + needs: version_info steps: - name: Checkout uses: actions/checkout@v2 @@ -331,7 +371,7 @@ jobs: run: | set -e -x - pyoxidizer build --release + pyoxidizer build --release --var Q_VERSION ${{ needs.version_info.outputs.q_version }} export Q_EXECUTABLE=./build/x86_64-pc-windows-msvc/release/install/q chmod 755 $Q_EXECUTABLE @@ -374,7 +414,7 @@ jobs: seq 1 10000 | ./win-q.exe -c 1 "select sum(c1),count(*) from -" -S some-db.sqlite package-windows: - needs: [create-man, not-really-test-windows] + needs: [create-man, not-really-test-windows, version_info] runs-on: windows-latest steps: - name: Checkout @@ -400,26 +440,22 @@ jobs: run: | set -e -x - pyoxidizer build --release msi_installer - - find ./ -ls - - # TODO Windows versions do not support the -beta postfix + pyoxidizer build --release msi_installer --var Q_VERSION ${{ needs.version_info.outputs.q_version }} - export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-3.1.6.msi + export Q_MSI=./build/x86_64-pc-windows-msvc/release/msi_installer/q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi chmod 755 $Q_MSI mkdir -p packages/windows/ - cp $Q_MSI packages/windows/q-text-as-data-3.1.6.msi + cp $Q_MSI packages/windows/q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi - name: Upload Windows MSI uses: actions/upload-artifact@v1.0.0 with: - name: q-text-as-data-3.1.6.msi - path: packages/windows/q-text-as-data-3.1.6.msi + name: q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi + path: packages/windows/q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi test-windows-packaging: - needs: package-windows + needs: [package-windows, version_info] runs-on: windows-latest steps: - name: Checkout @@ -427,12 +463,12 @@ jobs: - name: Download Windows Package uses: actions/download-artifact@v2 with: - name: q-text-as-data-3.1.6.msi + name: q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi - name: Test Install of MSI continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-3.1.6.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/i q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi -l* msi-install.log /norestart /quiet" -PassThru -Wait $process.ExitCode gc msi-install.log @@ -441,7 +477,7 @@ jobs: continue-on-error: true shell: powershell run: | - $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-3.1.6.msi /norestart /quiet" -PassThru -Wait + $process = Start-Process msiexec.exe -ArgumentList "/u q-text-as-data-${{ needs.version_info.outputs.q_version }}.msi /norestart /quiet" -PassThru -Wait $process.ExitCode exit $process.ExitCode diff --git a/.github/workflows/q.rb.brew-formula-template b/.github/workflows/q.rb.brew-formula-template deleted file mode 100644 index dc10c78b..00000000 --- a/.github/workflows/q.rb.brew-formula-template +++ /dev/null @@ -1,31 +0,0 @@ -# frozen_string_literal: true - -# Formula for q -class Q < Formula - desc "Run SQL directly on CSV or TSV files" - homepage "https://harelba.github.io/q/" - url "https://github.com/harelba/q/archive/v3.1.6.tar.gz" - - sha256 "0f4656b19087332d5113dd38907b50d70c55a57b3e97f810b8090132412dc9fb" - - license "GPL-3.0-or-later" - - depends_on "pyoxidizer" => :build - depends_on "python@3.8" => :build - depends_on "ronn" => :build - depends_on xcode: ["12.4", :build] - - def install - system "pyoxidizer", "build", "--release", "--var", "PYTHON_VERSION", "3.9" - bin.install "./build/x86_64-apple-darwin/release/install/q" - - system "ronn", "--roff", "--section=1", "doc/USAGE.markdown" - man1.install "doc/USAGE.1" => "q.1" - end - - test do - seq = (1..100).map(&:to_s).join("\n") - output = pipe_output("#{bin}/q -c 1 'select sum(c1) from -'", seq) - assert_equal "5050\n", output - end -end diff --git a/pyoxidizer.bzl b/pyoxidizer.bzl index 8a27c4b9..1f7fb135 100644 --- a/pyoxidizer.bzl +++ b/pyoxidizer.bzl @@ -4,6 +4,7 @@ # configuration file format. PYTHON_VERSION = VARS.get("PYTHON_VERSION","3.8") +Q_VERSION = VARS.get("Q_VERSION","0.0.1") # Configuration files consist of functions which define build "targets." # This function creates a Python executable and installs it in a destination @@ -63,7 +64,7 @@ def make_msi(exe): # The name of your application. "q-text-as-data", # The version of your application. - "3.1.6", + Q_VERSION, # The author/manufacturer of your application. "Harel Ben-Attia" ) From 877005570fe482fb4ed423d4fac986eed8659d27 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 22 Jan 2022 16:18:58 +0200 Subject: [PATCH 102/111] prerelease/release trigger --- .github/workflows/build-and-package.yaml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index f4de7666..e7c223da 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -4,6 +4,7 @@ on: push: tags: - "v*" + branches: master pull_request: branches: master paths-ignore: @@ -29,10 +30,12 @@ jobs: then echo "Trigger was a version tag - ${{ github.ref }}" echo ::set-output name=q_version::${GITHUB_REF#refs/tags/v} + echo ::set-output name=is_release::true else # For testing version propagation inside the PR echo "Either branch of a non-version tag - setting version to 0.0.0" echo ::set-output name=q_version::0.0.0 + echo ::set-output name=is_release::false fi outputs: @@ -484,9 +487,9 @@ jobs: perform-prerelease: # We'd like artifacts to be uploaded regardless of tests succeeded or not, # this is why the dependency here is not on test-X-packaging jobs - needs: [package-linux-deb, package-linux-rpm, not-package-mac, package-windows] + needs: [package-linux-deb, package-linux-rpm, not-package-mac, package-windows, version_info] runs-on: ubuntu-latest - if: ${{ github.event_name == 'pull_request' }} + if: ${{ needs.version_info.outputs.is_release == 'false' }} steps: - name: Download All Artifacts uses: actions/download-artifact@v2 @@ -508,9 +511,9 @@ jobs: artifacts/**/* perform-release: - needs: [not-test-mac-packaging, test-deb-packaging, test-rpm-packaging, test-windows-packaging] + needs: [not-test-mac-packaging, test-deb-packaging, test-rpm-packaging, test-windows-packaging, version_info] runs-on: ubuntu-latest - if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} + if: ${{ needs.version_info.outputs.is_release == 'true' }} steps: - name: Download All Artifacts uses: actions/download-artifact@v2 From 2c9ba02159ce039b2612d407d7020d1c1db3227c Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 22 Jan 2022 16:40:03 +0200 Subject: [PATCH 103/111] github action stuff --- .github/workflows/build-and-package.yaml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index e7c223da..715236ca 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -484,12 +484,20 @@ jobs: $process.ExitCode exit $process.ExitCode + test_is_release: + needs: version_info + runs-on: ubuntu-latest + steps: + - name: test1 + run: | + echo ${{ toJson(needs.version_info) }} + perform-prerelease: # We'd like artifacts to be uploaded regardless of tests succeeded or not, # this is why the dependency here is not on test-X-packaging jobs needs: [package-linux-deb, package-linux-rpm, not-package-mac, package-windows, version_info] runs-on: ubuntu-latest - if: ${{ needs.version_info.outputs.is_release == 'false' }} + if: needs.version_info.outputs.is_release == 'false' steps: - name: Download All Artifacts uses: actions/download-artifact@v2 @@ -513,7 +521,7 @@ jobs: perform-release: needs: [not-test-mac-packaging, test-deb-packaging, test-rpm-packaging, test-windows-packaging, version_info] runs-on: ubuntu-latest - if: ${{ needs.version_info.outputs.is_release == 'true' }} + if: needs.version_info.outputs.is_release == 'true' steps: - name: Download All Artifacts uses: actions/download-artifact@v2 From 06a8b8916d8fea6f5beaf7fc8dd6f156ad098935 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 22 Jan 2022 16:42:55 +0200 Subject: [PATCH 104/111] github actions stuff --- .github/workflows/build-and-package.yaml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 715236ca..6b770a83 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -49,7 +49,7 @@ jobs: run: | set -e -x - echo q_version is ${{ needs.version_info.outputs.q_version }} + echo "outputs: ${{ needs.version_info }}" create-man: runs-on: ubuntu-18.04 @@ -484,14 +484,6 @@ jobs: $process.ExitCode exit $process.ExitCode - test_is_release: - needs: version_info - runs-on: ubuntu-latest - steps: - - name: test1 - run: | - echo ${{ toJson(needs.version_info) }} - perform-prerelease: # We'd like artifacts to be uploaded regardless of tests succeeded or not, # this is why the dependency here is not on test-X-packaging jobs From d039712c2cbc7537b34d950db685b7ecf538f8a7 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 22 Jan 2022 16:44:39 +0200 Subject: [PATCH 105/111] github actions stuff --- .github/workflows/build-and-package.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 6b770a83..2e53f693 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -49,7 +49,7 @@ jobs: run: | set -e -x - echo "outputs: ${{ needs.version_info }}" + echo "outputs: ${{ toJson(needs.version_info) }}" create-man: runs-on: ubuntu-18.04 From 2f2d99eba1c23e081d6a359ac943b2aa5e62369f Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 22 Jan 2022 16:47:05 +0200 Subject: [PATCH 106/111] GA stuff --- .github/workflows/build-and-package.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 2e53f693..33682206 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -40,6 +40,7 @@ jobs: outputs: q_version: ${{ steps.vars.outputs.q_version }} + is_release: ${{ steps.vars.outputs.is_release }} check_version_info: runs-on: ubuntu-18.04 From 0321d6d82529431a68ff899ada787bf45aaaac65 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 22 Jan 2022 17:55:47 +0200 Subject: [PATCH 107/111] Added filename parsing functions --- .github/workflows/build-and-package.yaml | 2 +- bin/q.py | 42 ++++++++++++++++++++++++ test/test_suite.py | 25 ++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-package.yaml b/.github/workflows/build-and-package.yaml index 33682206..37add713 100644 --- a/.github/workflows/build-and-package.yaml +++ b/.github/workflows/build-and-package.yaml @@ -22,7 +22,7 @@ jobs: uses: actions/checkout@v2 - id: vars run: | - set -e -x + set -x -e echo "github event ref is ${{ github.ref }}" diff --git a/bin/q.py b/bin/q.py index c6d282b8..653b8e51 100755 --- a/bin/q.py +++ b/bin/q.py @@ -147,6 +147,28 @@ def sqrt(data): def power(data,p): return data**p +def file_ext(data): + if data is None: + return None + + return os.path.splitext(data)[1] + +def file_folder(data): + if data is None: + return None + return os.path.split(data)[0] + +def file_basename(data): + if data is None: + return None + return os.path.split(data)[1] + +def file_basename_no_ext(data): + if data is None: + return None + + return os.path.split(os.path.splitext(data)[0])[-1] + def percentile(l, p): # TODO Alpha implementation, need to provide multiple interpolation methods, and add tests if not l: @@ -276,6 +298,26 @@ def __init__(self,func_type,name,usage,description,func_or_obj,param_count): "Raise expr1 to the power of expr2", power, 2), + UserFunctionDef(FunctionType.REGULAR, + "file_ext","file_ext() = ", + "Get the extension of a filename", + file_ext, + 1), + UserFunctionDef(FunctionType.REGULAR, + "file_folder","file_folder() = ", + "Get the folder part of a filename", + file_folder, + 1), + UserFunctionDef(FunctionType.REGULAR, + "file_basename","file_basename() = ", + "Get the basename of a filename, including extension if any", + file_basename, + 1), + UserFunctionDef(FunctionType.REGULAR, + "file_basename_no_ext","file_basename_no_ext() = ", + "Get the basename of a filename, without the extension if there is one", + file_basename_no_ext, + 1), UserFunctionDef(FunctionType.AGG, "percentile","percentile(,) = ", "Calculate the strict percentile of a set of a values.", diff --git a/test/test_suite.py b/test/test_suite.py index fa8f2ea5..aaa7476f 100755 --- a/test/test_suite.py +++ b/test/test_suite.py @@ -4332,6 +4332,31 @@ def test_power_function(self): self.assertEqual(o[3],six.b('32.0')) self.assertEqual(o[4],six.b('55.9016994375')) + def test_file_functions(self): + filenames = [ + "file1", + "file2.csv", + "/var/tmp/file3", + "/var/tmp/file4.gz", + "" + ] + data = "\n".join(filenames) + + cmd = 'echo "%s" | %s -c 1 -d , "select file_folder(c1),file_ext(c1),file_basename(c1),file_basename_no_ext(c1) from -"' % (data,Q_EXECUTABLE) + retcode, o, e = run_command(cmd) + + self.assertEqual(retcode,0) + self.assertEqual(len(o),5) + self.assertEqual(len(e),0) + self.assertEqual(o,[ + b',,file1,file1', + b',.csv,file2.csv,file2', + b'/var/tmp,,file3,file3', + b'/var/tmp,.gz,file4.gz,file4', + b',,,' + ]) + + def test_sha1_function(self): cmd = 'seq 1 4 | %s -c 1 -d , "select c1,sha1(c1) from -"' % Q_EXECUTABLE retcode, o, e = run_command(cmd) From c49d9fa30143c9c4effcfa38b373e506383e829c Mon Sep 17 00:00:00 2001 From: Marijan Smetko Date: Sat, 22 Jan 2022 20:10:52 +0100 Subject: [PATCH 108/111] Migrate from optparse to argparse (#291) --- bin/q.py | 90 +++++++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/bin/q.py b/bin/q.py index 653b8e51..31d10c1a 100755 --- a/bin/q.py +++ b/bin/q.py @@ -43,7 +43,7 @@ import sys import sqlite3 import glob -from optparse import OptionParser,OptionGroup +from argparse import ArgumentParser import codecs import locale import time @@ -3205,7 +3205,8 @@ def get_option_with_default(p, option_type, option, default): QRC_FILENAME_ENVVAR = 'QRC_FILENAME' def dump_default_values_as_qrc(parser,exclusions): - m = parser.get_default_values().__dict__ + m = parser.parse_args([]).__dict__ + m.pop('leftover') print("[options]",file=sys.stdout) for k in sorted(m.keys()): if k not in exclusions: @@ -3339,106 +3340,103 @@ def initialize_command_line_parser(p, qrc_filename): print("Incorrect value '%s' for option %s in .qrc file %s (option type is %s)" % ( e.actual_value, e.option, qrc_filename, e.option_type)) sys.exit(199) - parser = OptionParser(prog="q",usage=USAGE_TEXT) - parser.add_option("-v", "--version", dest="version", default=False, action="store_true", - help="Print version") - parser.add_option("-V", "--verbose", dest="verbose", default=default_verbose, action="store_true", + parser = ArgumentParser(prog="q",usage=USAGE_TEXT) + parser.add_argument("-v", "--version", action="store_true", help="Print version") + parser.add_argument("-V", "--verbose", default=default_verbose, action="store_true", help="Print debug info in case of problems") - parser.add_option("-S", "--save-db-to-disk", dest="save_db_to_disk_filename", default=default_save_db_to_disk, + parser.add_argument("-S", "--save-db-to-disk", dest="save_db_to_disk_filename", default=default_save_db_to_disk, help="Save database to an sqlite database file") - parser.add_option("-C", "--caching-mode", dest="caching_mode", default=default_caching_mode, + parser.add_argument("-C", "--caching-mode", default=default_caching_mode, help="Choose the autocaching mode (none/read/readwrite). Autocaches files to disk db so further queries will be faster. Caching is done to a side-file with the same name of the table, but with an added extension .qsql") - parser.add_option("", "--dump-defaults", dest="dump_defaults", default=False, action="store_true", + parser.add_argument("--dump-defaults", action="store_true", help="Dump all default values for parameters and exit. Can be used in order to make sure .qrc file content is being read properly.") - parser.add_option("", "--max-attached-sqlite-databases", dest="max_attached_sqlite_databases", default=default_max_attached_sqlite_databases,type="int", + parser.add_argument("--max-attached-sqlite-databases", default=default_max_attached_sqlite_databases,type=int, help="Set the maximum number of concurrently-attached sqlite dbs. This is a compile time definition of sqlite. q's performance will slow down once this limit is reached for a query, since it will perform table copies in order to avoid that limit.") # ----------------------------------------------- - input_data_option_group = OptionGroup(parser, "Input Data Options") - input_data_option_group.add_option("-H", "--skip-header", dest="skip_header", default=default_skip_header, + input_data_option_group = parser.add_argument_group("Input Data Options") + input_data_option_group.add_argument("-H", "--skip-header", default=default_skip_header, action="store_true", help="Skip header row. This has been changed from earlier version - Only one header row is supported, and the header row is used for column naming") - input_data_option_group.add_option("-d", "--delimiter", dest="delimiter", default=default_delimiter, + input_data_option_group.add_argument("-d", "--delimiter", default=default_delimiter, help="Field delimiter. If none specified, then space is used as the delimiter.") - input_data_option_group.add_option("-p", "--pipe-delimited", dest="pipe_delimited", default=default_pipe_delimited, + input_data_option_group.add_argument("-p", "--pipe-delimited", default=default_pipe_delimited, action="store_true", help="Same as -d '|'. Added for convenience and readability") - input_data_option_group.add_option("-t", "--tab-delimited", dest="tab_delimited", default=default_tab_delimited, + input_data_option_group.add_argument("-t", "--tab-delimited", default=default_tab_delimited, action="store_true", help="Same as -d . Just a shorthand for handling standard tab delimited file You can use $'\\t' if you want (this is how Linux expects to provide tabs in the command line") - input_data_option_group.add_option("-e", "--encoding", dest="encoding", default=default_encoding, + input_data_option_group.add_argument("-e", "--encoding", default=default_encoding, help="Input file encoding. Defaults to UTF-8. set to none for not setting any encoding - faster, but at your own risk...") - input_data_option_group.add_option("-z", "--gzipped", dest="gzipped", default=default_gzipped, action="store_true", + input_data_option_group.add_argument("-z", "--gzipped", default=default_gzipped, action="store_true", help="Data is gzipped. Useful for reading from stdin. For files, .gz means automatic gunzipping") - input_data_option_group.add_option("-A", "--analyze-only", dest="analyze_only", default=default_analyze_only, + input_data_option_group.add_argument("-A", "--analyze-only", default=default_analyze_only, action='store_true', help="Analyze sample input and provide information about data types") - input_data_option_group.add_option("-m", "--mode", dest="mode", default=default_mode, + input_data_option_group.add_argument("-m", "--mode", default=default_mode, help="Data parsing mode. fluffy, relaxed and strict. In strict mode, the -c column-count parameter must be supplied as well") - input_data_option_group.add_option("-c", "--column-count", dest="column_count", default=default_column_count, + input_data_option_group.add_argument("-c", "--column-count", default=default_column_count, help="Specific column count when using relaxed or strict mode") - input_data_option_group.add_option("-k", "--keep-leading-whitespace", dest="keep_leading_whitespace_in_values", + input_data_option_group.add_argument("-k", "--keep-leading-whitespace", dest="keep_leading_whitespace_in_values", default=default_keep_leading_whitespace_in_values, action="store_true", help="Keep leading whitespace in values. Default behavior strips leading whitespace off values, in order to provide out-of-the-box usability for simple use cases. If you need to preserve whitespace, use this flag.") - input_data_option_group.add_option("--disable-double-double-quoting", dest="disable_double_double_quoting", + input_data_option_group.add_argument("--disable-double-double-quoting", default=default_disable_double_double_quoting, action="store_false", help="Disable support for double double-quoting for escaping the double quote character. By default, you can use \"\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.") - input_data_option_group.add_option("--disable-escaped-double-quoting", dest="disable_escaped_double_quoting", + input_data_option_group.add_argument("--disable-escaped-double-quoting", default=default_disable_escaped_double_quoting, action="store_false", help="Disable support for escaped double-quoting for escaping the double quote character. By default, you can use \\\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.") - input_data_option_group.add_option("--as-text", dest="disable_column_type_detection", + input_data_option_group.add_argument("--as-text", dest="disable_column_type_detection", default=default_disable_column_type_detection, action="store_true", help="Don't detect column types - All columns will be treated as text columns") - input_data_option_group.add_option("-w", "--input-quoting-mode", dest="input_quoting_mode", + input_data_option_group.add_argument("-w", "--input-quoting-mode", default=default_input_quoting_mode, help="Input quoting mode. Possible values are all, minimal and none. Note the slightly misleading parameter name, and see the matching -W parameter for output quoting.") - input_data_option_group.add_option("-M", "--max-column-length-limit", dest="max_column_length_limit", + input_data_option_group.add_argument("-M", "--max-column-length-limit", default=default_max_column_length_limit, help="Sets the maximum column length.") - input_data_option_group.add_option("-U", "--with-universal-newlines", dest="with_universal_newlines", + input_data_option_group.add_argument("-U", "--with-universal-newlines", default=default_with_universal_newlines, action="store_true", help="Expect universal newlines in the data. Limitation: -U works only with regular files for now, stdin or .gz files are not supported yet.") - parser.add_option_group(input_data_option_group) # ----------------------------------------------- - output_data_option_group = OptionGroup(parser, "Output Options") - output_data_option_group.add_option("-D", "--output-delimiter", dest="output_delimiter", + output_data_option_group = parser.add_argument_group("Output Options") + output_data_option_group.add_argument("-D", "--output-delimiter", default=default_output_delimiter, help="Field delimiter for output. If none specified, then the -d delimiter is used if present, or space if no delimiter is specified") - output_data_option_group.add_option("-P", "--pipe-delimited-output", dest="pipe_delimited_output", + output_data_option_group.add_argument("-P", "--pipe-delimited-output", default=default_pipe_delimited_output, action="store_true", help="Same as -D '|'. Added for convenience and readability.") - output_data_option_group.add_option("-T", "--tab-delimited-output", dest="tab_delimited_output", + output_data_option_group.add_argument("-T", "--tab-delimited-output", default=default_tab_delimited_output, action="store_true", help="Same as -D . Just a shorthand for outputting tab delimited output. You can use -D $'\\t' if you want.") - output_data_option_group.add_option("-O", "--output-header", dest="output_header", default=default_output_header, + output_data_option_group.add_argument("-O", "--output-header", default=default_output_header, action="store_true", help="Output header line. Output column-names are determined from the query itself. Use column aliases in order to set your column names in the query. For example, 'select name FirstName,value1/value2 MyCalculation from ...'. This can be used even if there was no header in the input.") - output_data_option_group.add_option("-b", "--beautify", dest="beautify", default=default_beautify, + output_data_option_group.add_argument("-b", "--beautify", default=default_beautify, action="store_true", help="Beautify output according to actual values. Might be slow...") - output_data_option_group.add_option("-f", "--formatting", dest="formatting", default=default_formatting, + output_data_option_group.add_argument("-f", "--formatting", default=default_formatting, help="Output-level formatting, in the format X=fmt,Y=fmt etc, where X,Y are output column numbers (e.g. 1 for first SELECT column etc.") - output_data_option_group.add_option("-E", "--output-encoding", dest="output_encoding", + output_data_option_group.add_argument("-E", "--output-encoding", default=default_output_encoding, help="Output encoding. Defaults to 'none', leading to selecting the system/terminal encoding") - output_data_option_group.add_option("-W", "--output-quoting-mode", dest="output_quoting_mode", + output_data_option_group.add_argument("-W", "--output-quoting-mode", default=default_output_quoting_mode, help="Output quoting mode. Possible values are all, minimal, nonnumeric and none. Note the slightly misleading parameter name, and see the matching -w parameter for input quoting.") - output_data_option_group.add_option("-L", "--list-user-functions", dest="list_user_functions", + output_data_option_group.add_argument("-L", "--list-user-functions", default=default_list_user_functions, action="store_true", help="List all user functions") - parser.add_option("", "--overwrite-qsql", dest="overwrite_qsql", default=default_overwrite_qsql, + parser.add_argument("--overwrite-qsql", default=default_overwrite_qsql, help="When used, qsql files (both caches and store-to-db) will be overwritten if they already exist. Use with care.") - parser.add_option_group(output_data_option_group) # ----------------------------------------------- - query_option_group = OptionGroup(parser, "Query Related Options") - query_option_group.add_option("-q", "--query-filename", dest="query_filename", default=default_query_filename, + query_option_group = parser.add_argument_group("Query Related Options") + query_option_group.add_argument("-q", "--query-filename", default=default_query_filename, help="Read query from the provided filename instead of the command line, possibly using the provided query encoding (using -Q).") - query_option_group.add_option("-Q", "--query-encoding", dest="query_encoding", default=default_query_encoding, + query_option_group.add_argument("-Q", "--query-encoding", default=default_query_encoding, help="query text encoding. Experimental. Please send your feedback on this") - parser.add_option_group(query_option_group) # ----------------------------------------------- - (options, args) = parser.parse_args() - return args, options, parser + parser.add_argument('leftover', nargs='*') + args = parser.parse_args() + return args.leftover, args, parser def parse_qrc_file(): From 4c835b9fa9311f110a09e8f2cddd20367b0e49f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=AD=E4=B9=9D=E9=BC=8E?= <109224573@qq.com> Date: Tue, 25 Jan 2022 01:44:04 +0800 Subject: [PATCH 109/111] Use https for links (#293) --- README.markdown | 6 +++--- bin/q.py | 4 ++-- doc/USAGE.markdown | 2 +- mkdocs/docs/index.md | 2 +- mkdocs/docs/index_cn.md | 2 +- mkdocs/mkdocs.yml | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.markdown b/README.markdown index 55cbe73e..4d5edd5a 100644 --- a/README.markdown +++ b/README.markdown @@ -20,7 +20,7 @@ The following table shows the impact of using caching: Notice that for the current version, caching is **not enabled** by default, since the caches take disk space. Use `-C readwrite` or `-C read` to enable it for a query, or add `caching_mode` to `.qrc` to set a new default. -q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/) or [https://q.textasdata.wiki](https://q.textasdata.wiki) It contains everything you need to download and use q immediately. +q's web site is [https://harelba.github.io/q/](https://harelba.github.io/q/) or [https://q.textasdata.wiki](https://q.textasdata.wiki) It contains everything you need to download and use q immediately. ## Usage Examples @@ -36,12 +36,12 @@ $ ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC $ q "select count(*) from some_db.sqlite3:::albums a left join another_db.sqlite3:::tracks t on (a.album_id = t.album_id)" ``` -Detailed examples are in [here](http://harelba.github.io/q/#examples) +Detailed examples are in [here](https://harelba.github.io/q/#examples) ## Installation. **New Major Version `3.1.6` is out with a lot of significant additions.** -Instructions for all OSs are [here](http://harelba.github.io/q/#installation). +Instructions for all OSs are [here](https://harelba.github.io/q/#installation). The previous version `2.0.19` Can still be downloaded from [here](https://github.com/harelba/q/releases/tag/2.0.19) diff --git a/bin/q.py b/bin/q.py index 31d10c1a..2a2f6771 100755 --- a/bin/q.py +++ b/bin/q.py @@ -23,7 +23,7 @@ # # Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line. # -# Full Documentation and details in http://harelba.github.io/q/ +# Full Documentation and details in https://harelba.github.io/q/ # # Run with --help for command line details # @@ -2399,7 +2399,7 @@ def print_credentials(): print("q version %s" % q_version, file=sys.stderr) print("Python: %s" % " // ".join([str(x).strip() for x in sys.version.split("\n")]), file=sys.stderr) print("Copyright (C) 2012-2021 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr) - print("http://harelba.github.io/q/", file=sys.stderr) + print("https://harelba.github.io/q/", file=sys.stderr) print(file=sys.stderr) class QWarning(object): diff --git a/doc/USAGE.markdown b/doc/USAGE.markdown index a1a51d4d..f0750864 100644 --- a/doc/USAGE.markdown +++ b/doc/USAGE.markdown @@ -57,7 +57,7 @@ Any standard SQL expression, condition (both WHERE and HAVING), GROUP BY, ORDER JOINs are supported and Subqueries are supported in the WHERE clause, but unfortunately not in the FROM clause for now. Use table aliases when performing JOINs. -The SQL syntax itself is sqlite's syntax. For details look at http://www.sqlite.org/lang.html or search the net for examples. +The SQL syntax itself is sqlite's syntax. For details look at https://www.sqlite.org/lang.html or search the net for examples. **NOTE:** Full type detection is implemented, so there is no need for any casting or anything. diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index a3c55d97..f66fab68 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -303,7 +303,7 @@ Query/Input/Output encodings are fully supported (and q tries to provide out-of- JOINs are supported and Subqueries are supported in the WHERE clause, but unfortunately not in the FROM clause for now. Use table aliases when performing JOINs. -The SQL syntax itself is sqlite's syntax. For details look at http://www.sqlite.org/lang.html or search the net for examples. +The SQL syntax itself is sqlite's syntax. For details look at https://www.sqlite.org/lang.html or search the net for examples. NOTE: When using the `-O` output header option, use column name aliases if you want to control the output column names. For example, `q -O -H "select count(*) cnt,sum(*) as mysum from -"` would output `cnt` and `mysum` as the output header column names. diff --git a/mkdocs/docs/index_cn.md b/mkdocs/docs/index_cn.md index d17f48e0..96cba778 100644 --- a/mkdocs/docs/index_cn.md +++ b/mkdocs/docs/index_cn.md @@ -82,7 +82,7 @@ q 的每一个参数都是由双引号包裹的一条完整的SQL语句。所有 在WHERE条件查询中,支持JOIN操作和子查询,但在FROM子句中并不支持。JOIN操作时,可以为文件起别名。 -SQL语法同sqlite的语法,详情见 http://www.sqlite.org/lang.html 或上网找一些示例。 +SQL语法同sqlite的语法,详情见 https://www.sqlite.org/lang.html 或上网找一些示例。 **注意**: diff --git a/mkdocs/mkdocs.yml b/mkdocs/mkdocs.yml index c21b5d29..473af88c 100644 --- a/mkdocs/mkdocs.yml +++ b/mkdocs/mkdocs.yml @@ -1,6 +1,6 @@ site_name: q - Text as Data -site_url: http://harelba.github.io/q/ -repo_url: http://github.com/harelba/q +site_url: https://harelba.github.io/q/ +repo_url: https://github.com/harelba/q edit_uri: "" site_description: Text as Data - q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files). site_author: Harel Ben-Attia From a1da26ce85936b700ccefd1e35c279f9d2b21ecb Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sat, 6 Aug 2022 08:54:36 +0300 Subject: [PATCH 110/111] Create FUNDING.yml --- .github/FUNDING.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..4d68e318 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,3 @@ +# These are supported funding model platforms + +github: harelba From 03e8b395055747a45f8c12480fd4ed95c2b4e906 Mon Sep 17 00:00:00 2001 From: Ivan Bessarabov Date: Thu, 21 Dec 2023 10:09:47 +0000 Subject: [PATCH 111/111] Fixing doc (#325) --- mkdocs/docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md index f66fab68..3d5601ce 100644 --- a/mkdocs/docs/index.md +++ b/mkdocs/docs/index.md @@ -19,7 +19,7 @@ q allows the following: q "select c1,c5 from myfile.csv" # -d '|' sets the input delimiter, -H says there's a header - q -d , -H "select my_field from myfile.delimited-file-with-pipes" + q -d '|' -H "select my_field from myfile.delimited-file-with-pipes" # -C readwrite writes a cache for the csv file q -d , -H "select my_field from myfile.csv" -C readwrite