From 1d01e543c8c10460ebaa4bea797f7057bd9908de Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Sun, 27 Jan 2013 14:53:11 +0100 Subject: [PATCH 01/22] Add happybase.filter with an escape() function The new happybase.filter module will include helper routines to construct Thrift filter string. For now, only an escape() routine has been implemented. Includes test. --- happybase/filter.py | 18 ++++++++++++++++++ tests/test_filter.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 happybase/filter.py create mode 100644 tests/test_filter.py diff --git a/happybase/filter.py b/happybase/filter.py new file mode 100644 index 0000000..a2516fc --- /dev/null +++ b/happybase/filter.py @@ -0,0 +1,18 @@ +""" +Filter module. + +This module provides helper routines to construct Thrift filter strings. +""" + +def escape(s): + """Escape a byte string for use in a filter string + + :param str host: The byte string to escape + :return: Escaped string + :rtype: str + """ + + if not isinstance(s, bytes): + raise TypeError("Only byte strings can be escaped") + + return s.replace("'", "''") diff --git a/tests/test_filter.py b/tests/test_filter.py new file mode 100644 index 0000000..e8ff9e6 --- /dev/null +++ b/tests/test_filter.py @@ -0,0 +1,30 @@ +""" +HappyBase filter tests. +""" + +from __future__ import unicode_literals + +from nose.tools import assert_equal, assert_raises + +from happybase.filter import escape + + +def test_escape(): + + assert_raises(TypeError, escape, u'foo') + assert_raises(TypeError, escape, 42) + assert_raises(TypeError, escape, None) + + def check(original, expected): + actual = escape(original) + assert_equal(actual, expected) + + test_values = [ + (b'', b''), + (b'foo', b'foo'), + (b'\x03\x02\x01\x00', b'\x03\x02\x01\x00'), + (b"foo'ba''r", b"foo''ba''''r"), + ] + + for original, expected in test_values: + yield check, original, expected From 9025e85772d0305d0eb0516f312486d4b91cc8f8 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Sun, 27 Jan 2013 18:12:37 +0100 Subject: [PATCH 02/22] Add helpers for constructing Filter strings Implement helpers for constructing filter strings. Added all built-in filters and support for serialising integers, strings, and comparison operators. Includes various tests. --- happybase/filter.py | 113 ++++++++++++++++++++++++++++++++++++++++++- tests/test_filter.py | 59 +++++++++++++++++++++- 2 files changed, 170 insertions(+), 2 deletions(-) diff --git a/happybase/filter.py b/happybase/filter.py index a2516fc..cf69338 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -4,8 +4,33 @@ This module provides helper routines to construct Thrift filter strings. """ +# TODO: support AND, OR, WHILE, SKIP (with operator overloading?) + +from __future__ import unicode_literals as _unicode_literals +from functools import partial as _partial + + +LESS = LT = object() +LESS_OR_EQUAL = LE = object() +EQUAL = EQ = object() +NOT_EQUAL = NE = object() +GREATER_OR_EQUAL = GE = object() +GREATER = GT = object() +NO_OP = object() + +_COMPARISON_OPERATOR_STRINGS = { + LESS: '<', + LESS_OR_EQUAL: '<=', + EQUAL: '=', + NOT_EQUAL: '!=', + GREATER_OR_EQUAL: '>=', + GREATER: '>', + NO_OP: '', +} + + def escape(s): - """Escape a byte string for use in a filter string + """Escape a byte string for use in a filter string. :param str host: The byte string to escape :return: Escaped string @@ -16,3 +41,89 @@ def escape(s): raise TypeError("Only byte strings can be escaped") return s.replace("'", "''") + + +class _Filter: + """Client-side Filter representation. + + This class does not have any filtering logic; it is only used to + build filter strings that the HBase Thrift server can parse and + apply. + """ + def __init__(self, name, *args): + + if isinstance(name, unicode): + name = name.encode('ascii') + + if not isinstance(name, bytes): + raise TypeError("Filter name must be a string") + + self.name = name + self.args = map(self._format_arg, args) + + def _format_arg(self, arg): + if isinstance(arg, int): + return bytes(arg) + + if arg in _COMPARISON_OPERATOR_STRINGS: + return _COMPARISON_OPERATOR_STRINGS[arg] + + if isinstance(arg, bytes): + # TODO: what to do with already escaped strings? + return "'%s'" % escape(arg) + + raise TypeError( + "Filter arguments must be integers, comparison operators " + "or byte strings; got %r" % arg) + + def __str__(self): + return b'%s(%s)' % (self.name, ', '.join(self.args)) + + +def make_filter(name): + """Define a new filter with the specified name. + + Use this function to specify custom filters that are not included by + default, such as custom filters you wrote yourself and made + available in the HBase server (or newly added filters that are not + yet in HappyBase). + + The callable returned by this function can be used just like the + built-in filters. + + Example:: + + MyCustomFilter = make_filter(b'MyCustomFilter') + f = MyCustomFilter(1, b'foo') + table.scan(..., filter=f) + + :param str name: name of the filter + :return: new filter callable + :rtype: filter callable + """ + return _partial(_Filter, name) + + +# +# Built-in filters (taken from the Thrift docs) +# + +KeyOnlyFilter = make_filter('KeyOnlyFilter') +FirstKeyOnlyFilter = make_filter('FirstKeyOnlyFilter') +PrefixFilter = make_filter('PrefixFilter') +ColumnPrefixFilter = make_filter('ColumnPrefixFilter') +MultipleColumnPrefixFilter = make_filter('MultipleColumnPrefixFilter') +ColumnCountGetFilter = make_filter('ColumnCountGetFilter') +PageFilter = make_filter('PageFilter') +ColumnPaginationFilter = make_filter('ColumnPaginationFilter') +InclusiveStopFilter = make_filter('InclusiveStopFilter') +TimeStampsFilter = make_filter('TimeStampsFilter') +RowFilter = make_filter('RowFilter') +FamilyFilter = make_filter('FamilyFilter') +QualifierFilter = make_filter('QualifierFilter') +QualifierFilter = make_filter('QualifierFilter') +ValueFilter = make_filter('ValueFilter') +DependentColumnFilter = make_filter('DependentColumnFilter') +SingleColumnValueFilter = make_filter('SingleColumnValueFilter') +SingleColumnValueExcludeFilter = make_filter('SingleColumnValueExcludeFilter') +ColumnRangeFilter = make_filter('ColumnRangeFilter') diff --git a/tests/test_filter.py b/tests/test_filter.py index e8ff9e6..80a0c24 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -6,7 +6,8 @@ from nose.tools import assert_equal, assert_raises -from happybase.filter import escape +import happybase.filter as filter +from happybase.filter import escape, make_filter, QualifierFilter def test_escape(): @@ -28,3 +29,59 @@ def check(original, expected): for original, expected in test_values: yield check, original, expected + + +def test_serialization(): + + # Comparison operators + f = QualifierFilter( + filter.LESS, + filter.LESS_OR_EQUAL, + filter.EQUAL, + filter.NOT_EQUAL, + filter.GREATER_OR_EQUAL, + filter.GREATER, + ) + exp = b"QualifierFilter(<, <=, =, !=, >=, >)" + assert_equal(exp, bytes(f)) + + # Integers + f = QualifierFilter(12, 13, -1, 0) + exp = b"QualifierFilter(12, 13, -1, 0)" + assert_equal(exp, bytes(f)) + + # Strings + f = QualifierFilter(b'foo', b"foo'bar", b'bar') + exp = b"QualifierFilter('foo', 'foo''bar', 'bar')" + assert_equal(exp, bytes(f)) + + # Mixed args + assert_equal( + b"QualifierFilter(>=, 'foo', 12, 'bar')", + bytes(QualifierFilter(filter.GREATER_OR_EQUAL, b'foo', 12, b'bar')) + ) + + +def test_type_checking(): + assert_raises(TypeError, QualifierFilter, u'foo') + assert_raises(TypeError, QualifierFilter, 3.14) + assert_raises(TypeError, QualifierFilter, object()) + assert_raises(TypeError, QualifierFilter, None) + + +def test_custom_filter(): + + MyCustomFilter = make_filter('MyCustomFilter') + + assert_equal( + b"MyCustomFilter(1, =, 'foo''bar')", + bytes(MyCustomFilter(1, filter.EQUAL, b"foo'bar")) + ) + + with assert_raises(TypeError): + f = make_filter(None) + f(1, 2) + + with assert_raises(TypeError): + f = make_filter, (12) + f(1, 2) From b301276dec8887b4e580f47762962348e443db0a Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Sun, 27 Jan 2013 18:16:11 +0100 Subject: [PATCH 03/22] Include happybase.filter in the test coverage report --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ed8626d..89befdf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,7 +3,7 @@ stop = 1 verbosity = 2 with-coverage = 1 cover-erase = 1 -cover-package=happybase.connection,happybase.table,happybase.batch,happybase.pool,happybase.util,tests +cover-package=happybase.connection,happybase.table,happybase.batch,happybase.pool,happybase.util,happybase.filter,tests cover-tests = 1 cover-html = 1 cover-html-dir = coverage/ From 375f01f9e266039c125e9b679765120a1222f092 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Sun, 27 Jan 2013 18:23:57 +0100 Subject: [PATCH 04/22] Add happybase.filter module to API docs --- doc/api.rst | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 2b1e3b7..87c2a94 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -29,6 +29,11 @@ The HappyBase API is organised as follows: The :py:class:`ConnectionPool` class implements a thread-safe connection pool that allows an application to (re)use multiple connections. +:py:mod:`~happybase.filter`: + The :py:mod:`happybase.filter` module provides various helper routines to + construct filter strings to be used with the `filter` argument to + :py:meth:`Table.scan()`. + Connection ========== @@ -56,4 +61,34 @@ Connection pool .. autoclass:: happybase.NoConnectionsAvailable +Scanner filters +=============== + +.. autofunction:: happybase.filter.escape + +.. autofunction:: happybase.filter.make_filter + + +The following filters are defined by default: + +.. class:: happybase.filter.KeyOnlyFilter +.. class:: happybase.filter.FirstKeyOnlyFilter +.. class:: happybase.filter.PrefixFilter +.. class:: happybase.filter.ColumnPrefixFilter +.. class:: happybase.filter.MultipleColumnPrefixFilter +.. class:: happybase.filter.ColumnCountGetFilter +.. class:: happybase.filter.PageFilter +.. class:: happybase.filter.ColumnPaginationFilter +.. class:: happybase.filter.InclusiveStopFilter +.. class:: happybase.filter.TimeStampsFilter +.. class:: happybase.filter.RowFilter +.. class:: happybase.filter.FamilyFilter +.. class:: happybase.filter.QualifierFilter +.. class:: happybase.filter.QualifierFilter +.. class:: happybase.filter.ValueFilter +.. class:: happybase.filter.DependentColumnFilter +.. class:: happybase.filter.SingleColumnValueFilter +.. class:: happybase.filter.SingleColumnValueExcludeFilter +.. class:: happybase.filter.ColumnRangeFilter + .. vim: set spell spelllang=en: From fb3b2e5f8dc1dd48c0f97eaec62cfa28de1824b0 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Sun, 27 Jan 2013 18:24:16 +0100 Subject: [PATCH 05/22] Expand tutorial with section about advanced scanner filters --- doc/user.rst | 77 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/doc/user.rst b/doc/user.rst index 57964aa..6e3923a 100644 --- a/doc/user.rst +++ b/doc/user.rst @@ -274,15 +274,18 @@ starting with `abc`:: print key, data The scanner examples above only limit the results by row key using the -`row_start`, `row_stop`, and `row_prefix` arguments, but scanners can also -limit results to certain columns, column families, and timestamps, just like -:py:meth:`Table.row` and :py:meth:`Table.rows`. For advanced users, a filter -string can be passed as the `filter` argument. Additionally, the optional +`row_start`, `row_stop`, and `row_prefix` arguments, but scanners can also limit +results to certain columns, column families, and timestamps, just like +:py:meth:`Table.row` and :py:meth:`Table.rows`. Additionally, the optional `limit` argument defines how much data is at most retrieved, and the `batch_size` argument specifies how big the transferred chunks should be. The :py:meth:`Table.scan` API documentation provides more information on the supported scanner options. +Scanners support more advanced filtering techniques by applying filters at the +region servers. See the section on advanced filtering elsewhere in this tutorial +to learn how to use this feature using HappyBase. + Manipulating data ================= @@ -468,7 +471,6 @@ methods can be used to retrieve or set a counter value directly:: :py:meth:`~Table.counter_dec` instead! - Using the connection pool ========================= @@ -555,6 +557,71 @@ operations. This means that the application still has to handle connection errors. +Advanced scanner filters +======================== + +In addition to the scanner features described earlier, HBase can filter scanner +results by applying additional filters at the region servers (predicate +push-down). To use this advanced feature from HappyBase, you can provide a +filter string describing the server-side filters and pass it as the `filter` +argument to :py:class:`Table.scan()`. Example:: + + scanner = table.scanner( + row_start=b'aaa', + row_start=b'eee', + filter=b'KeyOnlyFilter() AND FirstKeyOnlyFilter()', + ) + n_rows = 0 + for row, data in scanner: + n_rows += 1 + +See the HBase documentation for the supported filters and the supported +parameters. See the HBase Thrift documentation for more information about the +filter string syntax. + +Keep in mind that filter strings should be used in *addition* to other ways to +limit the returned scanner data, e.g. by using `row_start` or `columns`. Not +doing so results in horribly slow full table scans at the server. See the HBase +documentation for more information about how to properly use scanner filters. + +Dynamic filter strings +---------------------- + +For many use cases a literal filter string like the one in the example above +will suffice, but in some cases you might want to programmatically build filter +strings to pass to the Thrift server. This is where the +:py:mod:`happybase.filter` module comes into play. This module provides various +helper routines to build filter strings. For example, to construct a filter +string for a `QualifierFilter` you can use something like this:: + + from happybase.filter import QualifierFilter, LESS_OR_EQUAL + + qual = b‘column1’ + f = QualifierFilter(LESS_OR_EQUAL, qual) + scanner = table.scan(..., filter=f) + +Note that HappyBase does not include any filtering logic itself. HappyBase does +not check the validity (names and arguments) of the generated filter string, but +only helps with serialising the filter names and properly escaping the arguments +passed to it. + +Using custom filters +-------------------- + +In case you have implemented a custom filter and loaded it in HBase, you can +easily add support for it in HappyBase:: + + from happybase.filter import make_filter, EQUAL + MyCustomFilter = make_filter('MyCustomFilter') + +You can now use the custom filter exactly like the filters provided by default. +If the filter accepts an integer, a comparison operator and a string, you can +use it as follows:: + + f = MyCustomFilter(1, EQUAL, 'foobar') + scanner = table.scan(..., filter=f) + + .. rubric:: Next steps The next step is to try it out for yourself! The :doc:`API documentation ` From ec4a5d9880d78da8679c78088c1e0105f291bc95 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Sun, 27 Jan 2013 18:25:03 +0100 Subject: [PATCH 06/22] Update NEWS --- NEWS.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.rst b/NEWS.rst index 762acaa..f1b644c 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -15,6 +15,10 @@ Note: this version is not yet released! * Regenerated bundled Thrift code using Thrift 0.9.0 with the new-style classes flag (`issue #27 `_). +* The new :py:mod:`happybase.filter` module provides improved support for + specifying scanner filters that should be applied at the region server. See + the tutorial and API docs for more information. + HappyBase 0.5 ------------- From 9ebb5d34251faaa5065b2efbfafdead476ee3e76 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Sun, 27 Jan 2013 18:29:00 +0100 Subject: [PATCH 07/22] Use byte strings in happybase.filter.escape() --- happybase/filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/happybase/filter.py b/happybase/filter.py index cf69338..f7b8d55 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -40,7 +40,7 @@ def escape(s): if not isinstance(s, bytes): raise TypeError("Only byte strings can be escaped") - return s.replace("'", "''") + return s.replace(b"'", b"''") class _Filter: From 802bb6242232aa1c82fa8a99b4c89f362dd8a4f3 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Sun, 27 Jan 2013 18:36:36 +0100 Subject: [PATCH 08/22] Add support for booleans as filter arguments --- happybase/filter.py | 8 ++++++-- tests/test_filter.py | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/happybase/filter.py b/happybase/filter.py index f7b8d55..d604184 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -62,9 +62,13 @@ def __init__(self, name, *args): self.args = map(self._format_arg, args) def _format_arg(self, arg): + if isinstance(arg, bool): + return 'true' if arg else 'false' + if isinstance(arg, int): return bytes(arg) + if arg in _COMPARISON_OPERATOR_STRINGS: return _COMPARISON_OPERATOR_STRINGS[arg] @@ -73,8 +77,8 @@ def _format_arg(self, arg): return "'%s'" % escape(arg) raise TypeError( - "Filter arguments must be integers, comparison operators " - "or byte strings; got %r" % arg) + "Filter arguments must be booleans, integers, comparison " + "operators or byte strings; got %r" % arg) def __str__(self): return b'%s(%s)' % (self.name, ', '.join(self.args)) diff --git a/tests/test_filter.py b/tests/test_filter.py index 80a0c24..c75e553 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -45,6 +45,11 @@ def test_serialization(): exp = b"QualifierFilter(<, <=, =, !=, >=, >)" assert_equal(exp, bytes(f)) + # Booleans + f = QualifierFilter(True, False) + exp = b"QualifierFilter(true, false)" + assert_equal(exp, bytes(f)) + # Integers f = QualifierFilter(12, 13, -1, 0) exp = b"QualifierFilter(12, 13, -1, 0)" From 45718adc146c2006829a0dd252e0f47c7df3125a Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Sun, 27 Jan 2013 23:46:22 +0100 Subject: [PATCH 09/22] Doc updates --- doc/user.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/user.rst b/doc/user.rst index 6e3923a..f52d277 100644 --- a/doc/user.rst +++ b/doc/user.rst @@ -582,7 +582,7 @@ filter string syntax. Keep in mind that filter strings should be used in *addition* to other ways to limit the returned scanner data, e.g. by using `row_start` or `columns`. Not doing so results in horribly slow full table scans at the server. See the HBase -documentation for more information about how to properly use scanner filters. +documentation for more information on properly using scanner filters. Dynamic filter strings ---------------------- @@ -592,19 +592,21 @@ will suffice, but in some cases you might want to programmatically build filter strings to pass to the Thrift server. This is where the :py:mod:`happybase.filter` module comes into play. This module provides various helper routines to build filter strings. For example, to construct a filter -string for a `QualifierFilter` you can use something like this:: +string for a ``QualifierFilter`` you can use something like this:: from happybase.filter import QualifierFilter, LESS_OR_EQUAL qual = b‘column1’ f = QualifierFilter(LESS_OR_EQUAL, qual) - scanner = table.scan(..., filter=f) + scanner = table.scan(row_prefix=b'...', filter=f) Note that HappyBase does not include any filtering logic itself. HappyBase does not check the validity (names and arguments) of the generated filter string, but only helps with serialising the filter names and properly escaping the arguments passed to it. +TODO: it handles bool, int and str automatically + Using custom filters -------------------- @@ -619,7 +621,7 @@ If the filter accepts an integer, a comparison operator and a string, you can use it as follows:: f = MyCustomFilter(1, EQUAL, 'foobar') - scanner = table.scan(..., filter=f) + scanner = table.scan(row_prefix=b'...', filter=f) .. rubric:: Next steps From 8dac862503db18940a755e6e07c307f0f6332e76 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Sun, 27 Jan 2013 23:52:19 +0100 Subject: [PATCH 10/22] Add filter string helpers for AND and OR Added AND(...) and OR(...) callables, and implemented overloaded operators for & and | for Filter instances. --- happybase/filter.py | 32 ++++++++++++++++++++++++++++-- tests/test_filter.py | 46 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/happybase/filter.py b/happybase/filter.py index d604184..dcc1943 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -43,7 +43,11 @@ def escape(s): return s.replace(b"'", b"''") -class _Filter: +class _Node(object): + pass + + +class _Filter(_Node): """Client-side Filter representation. This class does not have any filtering logic; it is only used to @@ -68,7 +72,6 @@ def _format_arg(self, arg): if isinstance(arg, int): return bytes(arg) - if arg in _COMPARISON_OPERATOR_STRINGS: return _COMPARISON_OPERATOR_STRINGS[arg] @@ -83,6 +86,31 @@ def _format_arg(self, arg): def __str__(self): return b'%s(%s)' % (self.name, ', '.join(self.args)) + def __and__(self, other): + return AND(self, other) + + def __or__(self, rhs): + return OR(self, rhs) + + +class _BinaryOperatorNode(_Node): + + def __init__(self, operator, lhs, rhs): + self.operator = operator + self.lhs = lhs + self.rhs = rhs + + def __str__(self): + return b'(%s %s %s)' % (self.lhs, self.operator, self.rhs) + + +def AND(lhs, rhs): + return _BinaryOperatorNode('AND', lhs, rhs) + + +def OR(lhs, rhs): + return _BinaryOperatorNode('OR', lhs, rhs) + def make_filter(name): """Define a new filter with the specified name. diff --git a/tests/test_filter.py b/tests/test_filter.py index c75e553..c7cf8c0 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -6,8 +6,19 @@ from nose.tools import assert_equal, assert_raises -import happybase.filter as filter -from happybase.filter import escape, make_filter, QualifierFilter +from happybase.filter import ( + AND, + EQUAL, + escape, + GREATER, + GREATER_OR_EQUAL, + LESS, + LESS_OR_EQUAL, + make_filter, + NOT_EQUAL, + OR, + QualifierFilter, +) def test_escape(): @@ -35,12 +46,12 @@ def test_serialization(): # Comparison operators f = QualifierFilter( - filter.LESS, - filter.LESS_OR_EQUAL, - filter.EQUAL, - filter.NOT_EQUAL, - filter.GREATER_OR_EQUAL, - filter.GREATER, + LESS, + LESS_OR_EQUAL, + EQUAL, + NOT_EQUAL, + GREATER_OR_EQUAL, + GREATER, ) exp = b"QualifierFilter(<, <=, =, !=, >=, >)" assert_equal(exp, bytes(f)) @@ -63,7 +74,7 @@ def test_serialization(): # Mixed args assert_equal( b"QualifierFilter(>=, 'foo', 12, 'bar')", - bytes(QualifierFilter(filter.GREATER_OR_EQUAL, b'foo', 12, b'bar')) + bytes(QualifierFilter(GREATER_OR_EQUAL, b'foo', 12, b'bar')) ) @@ -80,7 +91,7 @@ def test_custom_filter(): assert_equal( b"MyCustomFilter(1, =, 'foo''bar')", - bytes(MyCustomFilter(1, filter.EQUAL, b"foo'bar")) + bytes(MyCustomFilter(1, EQUAL, b"foo'bar")) ) with assert_raises(TypeError): @@ -90,3 +101,18 @@ def test_custom_filter(): with assert_raises(TypeError): f = make_filter, (12) f(1, 2) + + +def test_operators(): + + def check(expected, original): + actual = bytes(original) + assert_equal(actual, expected) + + f = b"(QualifierFilter('foo') AND QualifierFilter('bar'))" + check(f, AND(QualifierFilter(b'foo'), QualifierFilter(b'bar'))) + check(f, QualifierFilter(b'foo') & QualifierFilter(b'bar')) + + f = b"(QualifierFilter('foo') OR QualifierFilter('bar'))" + check(f, OR(QualifierFilter(b'foo'), QualifierFilter(b'bar'))) + check(f, QualifierFilter(b'foo') | QualifierFilter(b'bar')) From 37330b149e75731ecf64033a6b16d477ed8f170c Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Mon, 28 Jan 2013 00:02:04 +0100 Subject: [PATCH 11/22] Use filter with shorter name for the tests --- tests/test_filter.py | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/test_filter.py b/tests/test_filter.py index c7cf8c0..0853d08 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -17,7 +17,7 @@ make_filter, NOT_EQUAL, OR, - QualifierFilter, + ValueFilter, ) @@ -45,7 +45,7 @@ def check(original, expected): def test_serialization(): # Comparison operators - f = QualifierFilter( + f = ValueFilter( LESS, LESS_OR_EQUAL, EQUAL, @@ -53,36 +53,36 @@ def test_serialization(): GREATER_OR_EQUAL, GREATER, ) - exp = b"QualifierFilter(<, <=, =, !=, >=, >)" + exp = b"ValueFilter(<, <=, =, !=, >=, >)" assert_equal(exp, bytes(f)) # Booleans - f = QualifierFilter(True, False) - exp = b"QualifierFilter(true, false)" + f = ValueFilter(True, False) + exp = b"ValueFilter(true, false)" assert_equal(exp, bytes(f)) # Integers - f = QualifierFilter(12, 13, -1, 0) - exp = b"QualifierFilter(12, 13, -1, 0)" + f = ValueFilter(12, 13, -1, 0) + exp = b"ValueFilter(12, 13, -1, 0)" assert_equal(exp, bytes(f)) # Strings - f = QualifierFilter(b'foo', b"foo'bar", b'bar') - exp = b"QualifierFilter('foo', 'foo''bar', 'bar')" + f = ValueFilter(b'foo', b"foo'bar", b'bar') + exp = b"ValueFilter('foo', 'foo''bar', 'bar')" assert_equal(exp, bytes(f)) # Mixed args assert_equal( - b"QualifierFilter(>=, 'foo', 12, 'bar')", - bytes(QualifierFilter(GREATER_OR_EQUAL, b'foo', 12, b'bar')) + b"ValueFilter(>=, 'foo', 12, 'bar')", + bytes(ValueFilter(GREATER_OR_EQUAL, b'foo', 12, b'bar')) ) def test_type_checking(): - assert_raises(TypeError, QualifierFilter, u'foo') - assert_raises(TypeError, QualifierFilter, 3.14) - assert_raises(TypeError, QualifierFilter, object()) - assert_raises(TypeError, QualifierFilter, None) + assert_raises(TypeError, ValueFilter, u'foo') + assert_raises(TypeError, ValueFilter, 3.14) + assert_raises(TypeError, ValueFilter, object()) + assert_raises(TypeError, ValueFilter, None) def test_custom_filter(): @@ -103,16 +103,16 @@ def test_custom_filter(): f(1, 2) -def test_operators(): +def test_binary_operators(): def check(expected, original): actual = bytes(original) assert_equal(actual, expected) - f = b"(QualifierFilter('foo') AND QualifierFilter('bar'))" - check(f, AND(QualifierFilter(b'foo'), QualifierFilter(b'bar'))) - check(f, QualifierFilter(b'foo') & QualifierFilter(b'bar')) + f = b"(ValueFilter('foo') AND ValueFilter('bar'))" + check(f, AND(ValueFilter(b'foo'), ValueFilter(b'bar'))) + check(f, ValueFilter(b'foo') & ValueFilter(b'bar')) - f = b"(QualifierFilter('foo') OR QualifierFilter('bar'))" - check(f, OR(QualifierFilter(b'foo'), QualifierFilter(b'bar'))) - check(f, QualifierFilter(b'foo') | QualifierFilter(b'bar')) + f = b"(ValueFilter('foo') OR ValueFilter('bar'))" + check(f, OR(ValueFilter(b'foo'), ValueFilter(b'bar'))) + check(f, ValueFilter(b'foo') | ValueFilter(b'bar')) From 01fd06e1c6f89e98cf4f033cd7068867ac377d79 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Mon, 28 Jan 2013 00:04:55 +0100 Subject: [PATCH 12/22] Add filter string helpers for SKIP and WHILE --- happybase/filter.py | 22 +++++++++++++++++----- tests/test_filter.py | 15 +++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/happybase/filter.py b/happybase/filter.py index dcc1943..a04e7ec 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -4,8 +4,6 @@ This module provides helper routines to construct Thrift filter strings. """ -# TODO: support AND, OR, WHILE, SKIP (with operator overloading?) - from __future__ import unicode_literals as _unicode_literals from functools import partial as _partial @@ -67,7 +65,7 @@ def __init__(self, name, *args): def _format_arg(self, arg): if isinstance(arg, bool): - return 'true' if arg else 'false' + return b'true' if arg else b'false' if isinstance(arg, int): return bytes(arg) @@ -93,6 +91,15 @@ def __or__(self, rhs): return OR(self, rhs) +class _UnaryOperatorNode(_Node): + def __init__(self, operator, value): + self.operator = operator + self.value = value + + def __str__(self): + return b'%s (%s)' % (self.operator, self.value) + + class _BinaryOperatorNode(_Node): def __init__(self, operator, lhs, rhs): @@ -103,13 +110,18 @@ def __init__(self, operator, lhs, rhs): def __str__(self): return b'(%s %s %s)' % (self.lhs, self.operator, self.rhs) +def SKIP(f): + return _UnaryOperatorNode(b'SKIP', f) + +def WHILE(f): + return _UnaryOperatorNode(b'WHILE', f) def AND(lhs, rhs): - return _BinaryOperatorNode('AND', lhs, rhs) + return _BinaryOperatorNode(b'AND', lhs, rhs) def OR(lhs, rhs): - return _BinaryOperatorNode('OR', lhs, rhs) + return _BinaryOperatorNode(b'OR', lhs, rhs) def make_filter(name): diff --git a/tests/test_filter.py b/tests/test_filter.py index 0853d08..2fe2183 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -17,7 +17,9 @@ make_filter, NOT_EQUAL, OR, + SKIP, ValueFilter, + WHILE, ) @@ -103,6 +105,19 @@ def test_custom_filter(): f(1, 2) +def test_unary_operators(): + + assert_equal( + b'SKIP (ValueFilter())', + bytes(SKIP(ValueFilter())) + ) + + assert_equal( + b'WHILE (ValueFilter())', + bytes(WHILE(ValueFilter())) + ) + + def test_binary_operators(): def check(expected, original): From b2e61867fb12f7828b8b7cb5d2e475622081b1bf Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Mon, 28 Jan 2013 00:12:19 +0100 Subject: [PATCH 13/22] Add TODO item about comparators in filters --- happybase/filter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/happybase/filter.py b/happybase/filter.py index a04e7ec..35fdd52 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -4,6 +4,8 @@ This module provides helper routines to construct Thrift filter strings. """ +# TODO: add support for comparators (regex, substring, and so on) + from __future__ import unicode_literals as _unicode_literals from functools import partial as _partial From efadeb3b75f311634a3b2567f348bb8def0ca894 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Tue, 29 Jan 2013 20:52:48 +0100 Subject: [PATCH 14/22] PEP8 --- happybase/filter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/happybase/filter.py b/happybase/filter.py index 35fdd52..0d56121 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -112,12 +112,15 @@ def __init__(self, operator, lhs, rhs): def __str__(self): return b'(%s %s %s)' % (self.lhs, self.operator, self.rhs) + def SKIP(f): return _UnaryOperatorNode(b'SKIP', f) + def WHILE(f): return _UnaryOperatorNode(b'WHILE', f) + def AND(lhs, rhs): return _BinaryOperatorNode(b'AND', lhs, rhs) From baefa40bafc1397f12142aa33cbe0224ddd727f6 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Tue, 29 Jan 2013 21:08:25 +0100 Subject: [PATCH 15/22] Simplify test code --- tests/test_filter.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/tests/test_filter.py b/tests/test_filter.py index 2fe2183..edadcc6 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -44,7 +44,7 @@ def check(original, expected): yield check, original, expected -def test_serialization(): +def test_filter_serialization(): # Comparison operators f = ValueFilter( @@ -107,14 +107,16 @@ def test_custom_filter(): def test_unary_operators(): + F = make_filter('F') + assert_equal( - b'SKIP (ValueFilter())', - bytes(SKIP(ValueFilter())) + b'SKIP (F())', + bytes(SKIP(F())) ) assert_equal( - b'WHILE (ValueFilter())', - bytes(WHILE(ValueFilter())) + b'WHILE (F())', + bytes(WHILE(F())) ) @@ -124,10 +126,12 @@ def check(expected, original): actual = bytes(original) assert_equal(actual, expected) - f = b"(ValueFilter('foo') AND ValueFilter('bar'))" - check(f, AND(ValueFilter(b'foo'), ValueFilter(b'bar'))) - check(f, ValueFilter(b'foo') & ValueFilter(b'bar')) + F = make_filter('F') + + f = b"(F(1) AND F(2))" + check(f, AND(F(1), F(2))) + check(f, F(1) & F(2)) - f = b"(ValueFilter('foo') OR ValueFilter('bar'))" - check(f, OR(ValueFilter(b'foo'), ValueFilter(b'bar'))) - check(f, ValueFilter(b'foo') | ValueFilter(b'bar')) + f = b"(F(1) OR F(2))" + check(f, OR(F(1), F(2))) + check(f, F(1) | F(2)) From eecbd061ced51a6b14565889d4c5e2bd241f0dab Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Tue, 29 Jan 2013 21:10:59 +0100 Subject: [PATCH 16/22] Simplify unary operator rendering --- happybase/filter.py | 11 ++++++++--- tests/test_filter.py | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/happybase/filter.py b/happybase/filter.py index 0d56121..5543972 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -47,7 +47,7 @@ class _Node(object): pass -class _Filter(_Node): +class _FilterNode(_Node): """Client-side Filter representation. This class does not have any filtering logic; it is only used to @@ -95,11 +95,16 @@ def __or__(self, rhs): class _UnaryOperatorNode(_Node): def __init__(self, operator, value): + if not isinstance(value, _FilterNode): + raise TypeError( + "'SKIP' and 'WHILE' can only be applied to Filters; " + "got %r" % value) + self.operator = operator self.value = value def __str__(self): - return b'%s (%s)' % (self.operator, self.value) + return b'%s %s' % (self.operator, self.value) class _BinaryOperatorNode(_Node): @@ -150,7 +155,7 @@ def make_filter(name): :return: new filter callable :rtype: filter callable """ - return _partial(_Filter, name) + return _partial(_FilterNode, name) # diff --git a/tests/test_filter.py b/tests/test_filter.py index edadcc6..2977697 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -110,12 +110,12 @@ def test_unary_operators(): F = make_filter('F') assert_equal( - b'SKIP (F())', + b'SKIP F()', bytes(SKIP(F())) ) assert_equal( - b'WHILE (F())', + b'WHILE F()', bytes(WHILE(F())) ) From 732ebecbd7abefc6fd849a0dcc58b3f2dda2ae8f Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Tue, 29 Jan 2013 21:15:27 +0100 Subject: [PATCH 17/22] Add classes for all filter operators --- happybase/filter.py | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/happybase/filter.py b/happybase/filter.py index 5543972..cac15e8 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -43,6 +43,10 @@ def escape(s): return s.replace(b"'", b"''") +# +# Internal node classes +# + class _Node(object): pass @@ -94,23 +98,28 @@ def __or__(self, rhs): class _UnaryOperatorNode(_Node): - def __init__(self, operator, value): + def __init__(self, value): if not isinstance(value, _FilterNode): raise TypeError( "'SKIP' and 'WHILE' can only be applied to Filters; " "got %r" % value) - self.operator = operator self.value = value def __str__(self): return b'%s %s' % (self.operator, self.value) -class _BinaryOperatorNode(_Node): +class _SkipNode(_UnaryOperatorNode): + operator = 'SKIP' + - def __init__(self, operator, lhs, rhs): - self.operator = operator +class _WhileNode(_UnaryOperatorNode): + operator = 'WHILE' + + +class _BooleanOperatorNode(_Node): + def __init__(self, lhs, rhs): self.lhs = lhs self.rhs = rhs @@ -118,20 +127,32 @@ def __str__(self): return b'(%s %s %s)' % (self.lhs, self.operator, self.rhs) +class _AndNode(_BooleanOperatorNode): + operator = 'AND' + + +class _OrNode(_BooleanOperatorNode): + operator = 'OR' + + +# +# Public API for constructing nodes +# + def SKIP(f): - return _UnaryOperatorNode(b'SKIP', f) + return _SkipNode(f) def WHILE(f): - return _UnaryOperatorNode(b'WHILE', f) + return _WhileNode(f) def AND(lhs, rhs): - return _BinaryOperatorNode(b'AND', lhs, rhs) + return _AndNode(lhs, rhs) def OR(lhs, rhs): - return _BinaryOperatorNode(b'OR', lhs, rhs) + return _OrNode(lhs, rhs) def make_filter(name): From d9956fd1fce198ee59242b689a0ecdb749d5b49c Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Tue, 29 Jan 2013 21:26:49 +0100 Subject: [PATCH 18/22] Make _format_arg() a function --- happybase/filter.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/happybase/filter.py b/happybase/filter.py index cac15e8..d5e9b8f 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -43,6 +43,25 @@ def escape(s): return s.replace(b"'", b"''") +def _format_arg(arg): + if isinstance(arg, bool): + return b'true' if arg else b'false' + + if isinstance(arg, int): + return bytes(arg) + + if arg in _COMPARISON_OPERATOR_STRINGS: + return _COMPARISON_OPERATOR_STRINGS[arg] + + if isinstance(arg, bytes): + # TODO: what to do with already escaped strings? + return "'%s'" % escape(arg) + + raise TypeError( + "Filter arguments must be booleans, integers, comparison " + "operators or byte strings; got %r" % arg) + + # # Internal node classes # @@ -67,25 +86,7 @@ def __init__(self, name, *args): raise TypeError("Filter name must be a string") self.name = name - self.args = map(self._format_arg, args) - - def _format_arg(self, arg): - if isinstance(arg, bool): - return b'true' if arg else b'false' - - if isinstance(arg, int): - return bytes(arg) - - if arg in _COMPARISON_OPERATOR_STRINGS: - return _COMPARISON_OPERATOR_STRINGS[arg] - - if isinstance(arg, bytes): - # TODO: what to do with already escaped strings? - return "'%s'" % escape(arg) - - raise TypeError( - "Filter arguments must be booleans, integers, comparison " - "operators or byte strings; got %r" % arg) + self.args = map(_format_arg, args) def __str__(self): return b'%s(%s)' % (self.name, ', '.join(self.args)) From 545e37ee4ff6124f0ca0ed1200d7545ef3b17088 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Tue, 29 Jan 2013 21:29:02 +0100 Subject: [PATCH 19/22] Drop parentheses around boolean operators --- happybase/filter.py | 2 +- tests/test_filter.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/happybase/filter.py b/happybase/filter.py index d5e9b8f..2e9be28 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -125,7 +125,7 @@ def __init__(self, lhs, rhs): self.rhs = rhs def __str__(self): - return b'(%s %s %s)' % (self.lhs, self.operator, self.rhs) + return b'%s %s %s' % (self.lhs, self.operator, self.rhs) class _AndNode(_BooleanOperatorNode): diff --git a/tests/test_filter.py b/tests/test_filter.py index 2977697..14ba1e8 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -120,7 +120,7 @@ def test_unary_operators(): ) -def test_binary_operators(): +def test_boolean_operators(): def check(expected, original): actual = bytes(original) @@ -128,10 +128,12 @@ def check(expected, original): F = make_filter('F') - f = b"(F(1) AND F(2))" + # Two arguments + + f = b'F(1) AND F(2)' check(f, AND(F(1), F(2))) check(f, F(1) & F(2)) - f = b"(F(1) OR F(2))" + f = b'F(1) OR F(2)' check(f, OR(F(1), F(2))) check(f, F(1) | F(2)) From 54bbb26b9d3b7640578057500bda82c0e0a39e37 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Tue, 29 Jan 2013 21:32:38 +0100 Subject: [PATCH 20/22] Support >2 operands for boolean operators --- happybase/filter.py | 16 ++++++++-------- tests/test_filter.py | 22 ++++++++++++++++++++-- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/happybase/filter.py b/happybase/filter.py index 2e9be28..84f0e47 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -120,12 +120,12 @@ class _WhileNode(_UnaryOperatorNode): class _BooleanOperatorNode(_Node): - def __init__(self, lhs, rhs): - self.lhs = lhs - self.rhs = rhs + def __init__(self, *operands): + self.operands = list(operands) def __str__(self): - return b'%s %s %s' % (self.lhs, self.operator, self.rhs) + glue = b' %s ' % self.operator + return glue.join(map(bytes, self.operands)) class _AndNode(_BooleanOperatorNode): @@ -148,12 +148,12 @@ def WHILE(f): return _WhileNode(f) -def AND(lhs, rhs): - return _AndNode(lhs, rhs) +def AND(*operands): + return _AndNode(*operands) -def OR(lhs, rhs): - return _OrNode(lhs, rhs) +def OR(*operands): + return _OrNode(*operands) def make_filter(name): diff --git a/tests/test_filter.py b/tests/test_filter.py index 14ba1e8..129cc70 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -128,8 +128,6 @@ def check(expected, original): F = make_filter('F') - # Two arguments - f = b'F(1) AND F(2)' check(f, AND(F(1), F(2))) check(f, F(1) & F(2)) @@ -137,3 +135,23 @@ def check(expected, original): f = b'F(1) OR F(2)' check(f, OR(F(1), F(2))) check(f, F(1) | F(2)) + + check( + b'F(1) AND F(2) AND F(3)', + AND(F(1), F(2), F(3)) + ) + + # check( + # b'F(1) AND F(2) AND F(3)', + # F(1) & F(2) & F(3) + # ) + + # check( + # b'F(1) AND F(2) OR F(3)', + # F(1) & F(2) | F(3) + # ) + + # check( + # b'F(1) AND (F(2) OR F(3))', + # F(1) & (F(2) | F(3)) + # ) From 2835e418ca7a7f8ae631fc21fa845708551033e5 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Tue, 29 Jan 2013 21:42:26 +0100 Subject: [PATCH 21/22] Work some more on boolean operator nodes --- happybase/filter.py | 27 +++++++++++++++++++-------- tests/test_filter.py | 29 +++++++++++++++-------------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/happybase/filter.py b/happybase/filter.py index 84f0e47..d81e80e 100644 --- a/happybase/filter.py +++ b/happybase/filter.py @@ -67,7 +67,11 @@ def _format_arg(arg): # class _Node(object): - pass + def __and__(self, other): + return AND(self, other) + + def __or__(self, rhs): + return OR(self, rhs) class _FilterNode(_Node): @@ -91,12 +95,6 @@ def __init__(self, name, *args): def __str__(self): return b'%s(%s)' % (self.name, ', '.join(self.args)) - def __and__(self, other): - return AND(self, other) - - def __or__(self, rhs): - return OR(self, rhs) - class _UnaryOperatorNode(_Node): def __init__(self, value): @@ -121,20 +119,33 @@ class _WhileNode(_UnaryOperatorNode): class _BooleanOperatorNode(_Node): def __init__(self, *operands): - self.operands = list(operands) + self.operands = operands def __str__(self): glue = b' %s ' % self.operator return glue.join(map(bytes, self.operands)) + def _extend(self, other): + if isinstance(other, self.__class__): + operands = self.operands + (other,) + return self.__class__(*operands) + else: + return self.__class__(self, other) + class _AndNode(_BooleanOperatorNode): operator = 'AND' + def __and__(self, other): + return self._extend(other) + class _OrNode(_BooleanOperatorNode): operator = 'OR' + def __or__(self, other): + return self._extend + # # Public API for constructing nodes diff --git a/tests/test_filter.py b/tests/test_filter.py index 129cc70..c3d4edb 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -141,17 +141,18 @@ def check(expected, original): AND(F(1), F(2), F(3)) ) - # check( - # b'F(1) AND F(2) AND F(3)', - # F(1) & F(2) & F(3) - # ) - - # check( - # b'F(1) AND F(2) OR F(3)', - # F(1) & F(2) | F(3) - # ) - - # check( - # b'F(1) AND (F(2) OR F(3))', - # F(1) & (F(2) | F(3)) - # ) + check( + b'F(1) AND F(2) AND F(3)', + F(1) & F(2) & F(3) + ) + + check( + b'F(1) AND F(2) OR F(3)', + F(1) & F(2) | F(3) + ) + + # FIXME: precedence stuff doesn't work correctly + check( + b'F(1) AND (F(2) OR F(3))', + F(1) & (F(2) | F(3)) + ) From 2fb7f8e4cffe0f8854f2945d3cdc114aeebff2d0 Mon Sep 17 00:00:00 2001 From: Wouter Bolsterlee Date: Fri, 7 Jun 2013 23:40:09 +0200 Subject: [PATCH 22/22] Support filter instances directly in Table.scan() --- happybase/table.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/happybase/table.py b/happybase/table.py index bb32ce6..77756b1 100644 --- a/happybase/table.py +++ b/happybase/table.py @@ -10,6 +10,7 @@ from .hbase.ttypes import TScan from .util import thrift_type_to_dict, str_increment from .batch import Batch +from .filter import _FilterNode logger = logging.getLogger(__name__) @@ -204,8 +205,8 @@ def cells(self, row, column, versions=None, timestamp=None, return map(make_cell, cells) def scan(self, row_start=None, row_stop=None, row_prefix=None, - columns=None, filter=None, timestamp=None, - include_timestamp=False, batch_size=1000, limit=None): + columns=None, timestamp=None, include_timestamp=False, + batch_size=1000, limit=None, filter=None): """Create a scanner for data in the table. This method returns an iterable that can be used for looping over the @@ -230,9 +231,6 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None, The `columns`, `timestamp` and `include_timestamp` arguments behave exactly the same as for :py:meth:`row`. - The `filter` argument may be a filter string that will be applied at - the server by the region servers. - If `limit` is given, at most `limit` results will be returned. The `batch_size` argument specifies how many results should be @@ -240,6 +238,12 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None, this to a low value (or even 1) if your data is large, since a low batch size results in added round-trips to the server. + The `filter` argument may be a filter string that will be + applied at the server by the region servers. If you need more + than a static filter string literal, use the helpers in the + :py:mod:`happybase.filter` module to construct filter strings + programmatically. + **Compatibility note:** The `filter` argument is only available when using HBase 0.92 (or up). In HBase 0.90 compatibility mode, specifying a `filter` raises an exception. @@ -274,6 +278,14 @@ def scan(self, row_start=None, row_stop=None, row_prefix=None, if row_start is None: row_start = '' + if filter is not None: + if isinstance(filter, _FilterNode): + filter = str(filter) + + if not isinstance(filter, str): + raise TypeError( + "'filter' must be a filter instance or a (byte) string") + if self.connection.compat == '0.90': # The scannerOpenWithScan() Thrift function is not # available, so work around it as much as possible with the