Skip to content

Commit f87ee7e

Browse files
committed
Merge pull request #1530 from dhermes/happybase-table-helpers
Adding HappyBase table helpers.
2 parents 5d79d58 + a5399c8 commit f87ee7e

2 files changed

Lines changed: 292 additions & 0 deletions

File tree

gcloud/bigtable/happybase/table.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,17 @@
1919

2020
import six
2121

22+
from gcloud._helpers import _datetime_from_microseconds
23+
from gcloud._helpers import _microseconds_from_datetime
24+
from gcloud._helpers import _to_bytes
2225
from gcloud._helpers import _total_seconds
2326
from gcloud.bigtable.column_family import GCRuleIntersection
2427
from gcloud.bigtable.column_family import MaxAgeGCRule
2528
from gcloud.bigtable.column_family import MaxVersionsGCRule
2629
from gcloud.bigtable.happybase.batch import _WAL_SENTINEL
2730
from gcloud.bigtable.happybase.batch import Batch
2831
from gcloud.bigtable.table import Table as _LowLevelTable
32+
from gcloud.bigtable.row import TimestampRange
2933

3034

3135
_UNPACK_I64 = struct.Struct('>q').unpack
@@ -563,3 +567,155 @@ def _gc_rule_to_dict(gc_rule):
563567
if key1 != key2:
564568
result = {key1: rule1[key1], key2: rule2[key2]}
565569
return result
570+
571+
572+
def _next_char(str_val, index):
573+
"""Gets the next character based on a position in a string.
574+
575+
:type str_val: str
576+
:param str_val: A string containing the character to update.
577+
578+
:type index: int
579+
:param index: An integer index in ``str_val``.
580+
581+
:rtype: str
582+
:returns: The next character after the character at ``index``
583+
in ``str_val``.
584+
"""
585+
ord_val = six.indexbytes(str_val, index)
586+
return _to_bytes(chr(ord_val + 1), encoding='latin-1')
587+
588+
589+
def _string_successor(str_val):
590+
"""Increment and truncate a byte string.
591+
592+
Determines shortest string that sorts after the given string when
593+
compared using regular string comparison semantics.
594+
595+
Modeled after implementation in ``gcloud-golang``.
596+
597+
Increments the last byte that is smaller than ``0xFF``, and
598+
drops everything after it. If the string only contains ``0xFF`` bytes,
599+
``''`` is returned.
600+
601+
:type str_val: str
602+
:param str_val: String to increment.
603+
604+
:rtype: str
605+
:returns: The next string in lexical order after ``str_val``.
606+
"""
607+
str_val = _to_bytes(str_val, encoding='latin-1')
608+
if str_val == b'':
609+
return str_val
610+
611+
index = len(str_val) - 1
612+
while index >= 0:
613+
if six.indexbytes(str_val, index) != 0xff:
614+
break
615+
index -= 1
616+
617+
if index == -1:
618+
return b''
619+
620+
return str_val[:index] + _next_char(str_val, index)
621+
622+
623+
def _convert_to_time_range(timestamp=None):
624+
"""Create a timestamp range from an HBase / HappyBase timestamp.
625+
626+
HBase uses timestamp as an argument to specify an exclusive end
627+
deadline. Cloud Bigtable also uses exclusive end times, so
628+
the behavior matches.
629+
630+
:type timestamp: int
631+
:param timestamp: (Optional) Timestamp (in milliseconds since the
632+
epoch). Intended to be used as the end of an HBase
633+
time range, which is exclusive.
634+
635+
:rtype: :class:`.TimestampRange`, :data:`NoneType <types.NoneType>`
636+
:returns: The timestamp range corresponding to the passed in
637+
``timestamp``.
638+
"""
639+
if timestamp is None:
640+
return None
641+
642+
next_timestamp = _datetime_from_microseconds(1000 * timestamp)
643+
return TimestampRange(end=next_timestamp)
644+
645+
646+
def _cells_to_pairs(cells, include_timestamp=False):
647+
"""Converts list of cells to HappyBase format.
648+
649+
For example::
650+
651+
>>> import datetime
652+
>>> from gcloud.bigtable.row_data import Cell
653+
>>> cell1 = Cell(b'val1', datetime.datetime.utcnow())
654+
>>> cell2 = Cell(b'val2', datetime.datetime.utcnow())
655+
>>> _cells_to_pairs([cell1, cell2])
656+
[b'val1', b'val2']
657+
>>> _cells_to_pairs([cell1, cell2], include_timestamp=True)
658+
[(b'val1', 1456361486255), (b'val2', 1456361491927)]
659+
660+
:type cells: list
661+
:param cells: List of :class:`.Cell` returned from a read request.
662+
663+
:type include_timestamp: bool
664+
:param include_timestamp: Flag to indicate if cell timestamps should be
665+
included with the output.
666+
667+
:rtype: list
668+
:returns: List of values in the cell. If ``include_timestamp=True``, each
669+
value will be a pair, with the first part the bytes value in
670+
the cell and the second part the number of milliseconds in the
671+
timestamp on the cell.
672+
"""
673+
result = []
674+
for cell in cells:
675+
if include_timestamp:
676+
ts_millis = _microseconds_from_datetime(cell.timestamp) // 1000
677+
result.append((cell.value, ts_millis))
678+
else:
679+
result.append(cell.value)
680+
return result
681+
682+
683+
def _partial_row_to_dict(partial_row_data, include_timestamp=False):
684+
"""Convert a low-level row data object to a dictionary.
685+
686+
Assumes only the latest value in each row is needed. This assumption
687+
is due to the fact that this method is used by callers which use
688+
a ``CellsColumnLimitFilter(1)`` filter.
689+
690+
For example::
691+
692+
>>> import datetime
693+
>>> from gcloud.bigtable.row_data import Cell, PartialRowData
694+
>>> cell1 = Cell(b'val1', datetime.datetime.utcnow())
695+
>>> cell2 = Cell(b'val2', datetime.datetime.utcnow())
696+
>>> row_data = PartialRowData(b'row-key')
697+
>>> _partial_row_to_dict(row_data)
698+
{}
699+
>>> row_data._cells[u'fam1'] = {b'col1': [cell1], b'col2': [cell2]}
700+
>>> _partial_row_to_dict(row_data)
701+
{b'fam1:col2': b'val2', b'fam1:col1': b'val1'}
702+
>>> _partial_row_to_dict(row_data, include_timestamp=True)
703+
{b'fam1:col2': (b'val2', 1456361724480),
704+
b'fam1:col1': (b'val1', 1456361721135)}
705+
706+
:type partial_row_data: :class:`.row_data.PartialRowData`
707+
:param partial_row_data: Row data consumed from a stream.
708+
709+
:type include_timestamp: bool
710+
:param include_timestamp: Flag to indicate if cell timestamps should be
711+
included with the output.
712+
713+
:rtype: dict
714+
:returns: The row data converted to a dictionary.
715+
"""
716+
result = {}
717+
for column, cells in six.iteritems(partial_row_data.to_dict()):
718+
cell_vals = _cells_to_pairs(cells,
719+
include_timestamp=include_timestamp)
720+
result[column] = cell_vals[0]
721+
return result

gcloud/bigtable/happybase/test_table.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,142 @@ def test_with_intersection_two_nested_rules(self):
443443
self.assertTrue(result is gc_rule)
444444

445445

446+
class Test__string_successor(unittest2.TestCase):
447+
448+
def _callFUT(self, *args, **kwargs):
449+
from gcloud.bigtable.happybase.table import _string_successor
450+
return _string_successor(*args, **kwargs)
451+
452+
def test_with_alphanumeric(self):
453+
self.assertEqual(self._callFUT(b'boa'), b'bob')
454+
self.assertEqual(self._callFUT(b'abc1'), b'abc2')
455+
456+
def test_with_last_byte(self):
457+
self.assertEqual(self._callFUT(b'boa\xff'), b'bob')
458+
459+
def test_with_empty_string(self):
460+
self.assertEqual(self._callFUT(b''), b'')
461+
462+
def test_with_all_last_bytes(self):
463+
self.assertEqual(self._callFUT(b'\xff\xff\xff'), b'')
464+
465+
def test_with_unicode_input(self):
466+
self.assertEqual(self._callFUT(u'boa'), b'bob')
467+
468+
469+
class Test__convert_to_time_range(unittest2.TestCase):
470+
471+
def _callFUT(self, timestamp=None):
472+
from gcloud.bigtable.happybase.table import _convert_to_time_range
473+
return _convert_to_time_range(timestamp=timestamp)
474+
475+
def test_null(self):
476+
timestamp = None
477+
result = self._callFUT(timestamp=timestamp)
478+
self.assertEqual(result, None)
479+
480+
def test_invalid_type(self):
481+
timestamp = object()
482+
with self.assertRaises(TypeError):
483+
self._callFUT(timestamp=timestamp)
484+
485+
def test_success(self):
486+
from gcloud._helpers import _datetime_from_microseconds
487+
from gcloud.bigtable.row import TimestampRange
488+
489+
timestamp = 1441928298571
490+
ts_dt = _datetime_from_microseconds(1000 * timestamp)
491+
result = self._callFUT(timestamp=timestamp)
492+
self.assertTrue(isinstance(result, TimestampRange))
493+
self.assertEqual(result.start, None)
494+
self.assertEqual(result.end, ts_dt)
495+
496+
497+
class Test__cells_to_pairs(unittest2.TestCase):
498+
499+
def _callFUT(self, *args, **kwargs):
500+
from gcloud.bigtable.happybase.table import _cells_to_pairs
501+
return _cells_to_pairs(*args, **kwargs)
502+
503+
def test_without_timestamp(self):
504+
from gcloud.bigtable.row_data import Cell
505+
506+
value1 = 'foo'
507+
cell1 = Cell(value=value1, timestamp=None)
508+
value2 = 'bar'
509+
cell2 = Cell(value=value2, timestamp=None)
510+
511+
result = self._callFUT([cell1, cell2])
512+
self.assertEqual(result, [value1, value2])
513+
514+
def test_with_timestamp(self):
515+
from gcloud._helpers import _datetime_from_microseconds
516+
from gcloud.bigtable.row_data import Cell
517+
518+
value1 = 'foo'
519+
ts1_millis = 1221934570148
520+
ts1 = _datetime_from_microseconds(ts1_millis * 1000)
521+
cell1 = Cell(value=value1, timestamp=ts1)
522+
523+
value2 = 'bar'
524+
ts2_millis = 1221955575548
525+
ts2 = _datetime_from_microseconds(ts2_millis * 1000)
526+
cell2 = Cell(value=value2, timestamp=ts2)
527+
528+
result = self._callFUT([cell1, cell2], include_timestamp=True)
529+
self.assertEqual(result,
530+
[(value1, ts1_millis), (value2, ts2_millis)])
531+
532+
533+
class Test__partial_row_to_dict(unittest2.TestCase):
534+
535+
def _callFUT(self, partial_row_data, include_timestamp=False):
536+
from gcloud.bigtable.happybase.table import _partial_row_to_dict
537+
return _partial_row_to_dict(partial_row_data,
538+
include_timestamp=include_timestamp)
539+
540+
def test_without_timestamp(self):
541+
from gcloud.bigtable.row_data import Cell
542+
from gcloud.bigtable.row_data import PartialRowData
543+
544+
row_data = PartialRowData(b'row-key')
545+
val1 = b'hi-im-bytes'
546+
val2 = b'bi-im-hytes'
547+
row_data._cells[u'fam1'] = {
548+
b'col1': [Cell(val1, None)],
549+
b'col2': [Cell(val2, None)],
550+
}
551+
result = self._callFUT(row_data)
552+
expected_result = {
553+
b'fam1:col1': val1,
554+
b'fam1:col2': val2,
555+
}
556+
self.assertEqual(result, expected_result)
557+
558+
def test_with_timestamp(self):
559+
from gcloud._helpers import _datetime_from_microseconds
560+
from gcloud.bigtable.row_data import Cell
561+
from gcloud.bigtable.row_data import PartialRowData
562+
563+
row_data = PartialRowData(b'row-key')
564+
val1 = b'hi-im-bytes'
565+
ts1_millis = 1221934570148
566+
ts1 = _datetime_from_microseconds(ts1_millis * 1000)
567+
val2 = b'bi-im-hytes'
568+
ts2_millis = 1331934880000
569+
ts2 = _datetime_from_microseconds(ts2_millis * 1000)
570+
row_data._cells[u'fam1'] = {
571+
b'col1': [Cell(val1, ts1)],
572+
b'col2': [Cell(val2, ts2)],
573+
}
574+
result = self._callFUT(row_data, include_timestamp=True)
575+
expected_result = {
576+
b'fam1:col1': (val1, ts1_millis),
577+
b'fam1:col2': (val2, ts2_millis),
578+
}
579+
self.assertEqual(result, expected_result)
580+
581+
446582
class _Connection(object):
447583

448584
def __init__(self, cluster):

0 commit comments

Comments
 (0)